Merge branch 'master' into develop

2024-07-22 10:47:33 +02:00 · 2016-10-28 22:49:06 +02:00 · 2016-10-28 22:49:06 +02:00 · 76b6bfd6a7
commit 76b6bfd6a7
parent 69de9d30ca 3946c710fe
22 changed files with 694 additions and 571 deletions
--- a/5
+++ b/5
@ -487,7 +487,6 @@ def create_ninja_and_rc(l_installed):

    l_rc = [
        'export QP_ROOT={0}'.format(QP_ROOT),
-        '#export QP_NIC=ib0   # Choose the correct network inuterface',
        'export QP_EZFIO={0}'.format(path_ezfio.replace(QP_ROOT,"${QP_ROOT}")),
        'export QP_PYTHON={0}'.format(":".join(l_python)), "",
        'export IRPF90={0}'.format(path_irpf90.replace(QP_ROOT,"${QP_ROOT}")),
@ -498,6 +497,10 @@ def create_ninja_and_rc(l_installed):
        'export LIBRARY_PATH="${QP_ROOT}"/lib:"${LIBRARY_PATH}"', "",
        'source ${QP_ROOT}/install/EZFIO/Bash/ezfio.sh', "",
        'source ${HOME}/.opam/opam-init/init.sh > /dev/null 2> /dev/null || true',
+        '',
+        '# Choose the correct network interface',
+        '# export QP_NIC=ib0',
+        '# export QP_NIC=eth0',
        ""
    ]

--- a/ocaml/Address.ml
+++ b/ocaml/Address.ml
@ -42,7 +42,7 @@ end = struct
    assert (String.is_prefix ~prefix:"inproc://" x);
    x
  let create name =
-    Printf.sprintf "ipc://%s" name
+    Printf.sprintf "inproc://%s" name
  let to_string x = x
 end

--- a/ocaml/Progress_bar.ml
+++ b/ocaml/Progress_bar.ml
@ -14,13 +14,13 @@ type t =

 let init ?(bar_length=20) ?(start_value=0.) ?(end_value=1.) ~title =
  { title ; start_value ; end_value ; bar_length ; cur_value=start_value ;
-    init_time= Time.now () ; dirty = true ; next = Time.now () } 
+    init_time= Time.now () ; dirty = false ; next = Time.now () } 

 let update ~cur_value bar =
  { bar with cur_value ; dirty=true }

 let increment_end bar =
-  { bar with end_value=(bar.end_value +. 1.) ; dirty=true }
+  { bar with end_value=(bar.end_value +. 1.) ; dirty=false }

 let increment_cur bar =
  { bar with cur_value=(bar.cur_value +. 1.) ; dirty=true }
--- a/ocaml/TaskServer.ml
+++ b/ocaml/TaskServer.ml
@ -678,9 +678,9 @@ let run ~port =

              (** Debug input *)
              Printf.sprintf "q:%d  r:%d  n:%d  : %s\n%!"
-              (Queuing_system.number_of_queued  program_state.queue)
+              (Queuing_system.number_of_queued program_state.queue)
              (Queuing_system.number_of_running program_state.queue)
-              (Queuing_system.number_of_tasks   program_state.queue)
+              (Queuing_system.number_of_tasks program_state.queue)
              (Message.to_string message)
              |> debug;

--- a/plugins/MRCC_Utils/mrcc_dress.irp.f
+++ b/plugins/MRCC_Utils/mrcc_dress.irp.f
@ -271,7 +271,7 @@ subroutine mrcc_dress(delta_ij_, delta_ii_, Nstates, Ndet_non_ref, Ndet_ref,i_ge
          !delta_ii_(i_state,i_I)  = 0.d0
          do l_sd=1,idx_alpha(0)
            k_sd = idx_alpha(l_sd)
-            delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + dIa_hla(i_state,k_sd)
+            delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + 0.5d0 * dIa_hla(i_state,k_sd)
          enddo
        endif
      enddo
--- a/plugins/MRCC_Utils/mrcc_utils.irp.f
+++ b/plugins/MRCC_Utils/mrcc_utils.irp.f
@ -616,219 +616,429 @@ END_PROVIDER

 BEGIN_PROVIDER [ double precision, dIj_unique, (hh_shortcut(hh_shortcut(0)+1)-1, N_states) ]
 &BEGIN_PROVIDER [ double precision, rho_mrcc, (N_det_non_ref, N_states) ]
-   implicit none
-   logical                        :: ok
-   integer                        :: i, j, k, s, II, pp, hh, ind, wk, nex, a_col, at_row
-   integer, external              :: searchDet, unsortedSearchDet
-   integer(bit_kind)              :: myDet(N_int, 2), myMask(N_int, 2)
-   integer                        :: N, INFO, AtA_size, r1, r2
-   double precision , allocatable :: AtB(:), AtA_val(:), A_val(:,:), x(:), x_new(:), A_val_mwen(:)
-   double precision               :: t, norm, cx, res
-   integer, allocatable           :: A_ind(:,:), lref(:), AtA_ind(:), A_ind_mwen(:), col_shortcut(:), N_col(:)
-   double precision               :: phase
-   
-   
-   
-   nex = hh_shortcut(hh_shortcut(0)+1)-1
-   print *, "TI", nex, N_det_non_ref
-   allocate(A_ind(0:N_det_ref+1, nex), A_val(N_det_ref+1, nex))
-   allocate(AtA_ind(N_det_ref * nex), AtA_val(N_det_ref * nex)) !!!!! MAY BE TOO SMALL ? !!!!!!!!
-   allocate(x(nex), AtB(nex))
-   allocate(N_col(nex), col_shortcut(nex))
-   allocate(x_new(nex))
-   
-   do s = 1, N_states
-     
-     A_val = 0d0
-     A_ind = 0
-     AtA_ind = 0
-     AtA_val = 0d0
-     x = 0d0
-     A_val_mwen = 0d0
-     N_col = 0
-     col_shortcut = 0
-     
-     !$OMP PARALLEL default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind)&
-         !$OMP shared(s, hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref)&
-         !$OMP private(lref, pp, II, ok, myMask, myDet, ind, phase, wk)
-      allocate(lref(N_det_non_ref))
-     !$OMP DO schedule(static,10) 
-     do hh = 1, hh_shortcut(0)
-       do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1
-         lref = 0
-         do II = 1, N_det_ref
-           call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int)
-           if(.not. ok) cycle
-           call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int)
-           if(.not. ok) cycle
-           ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int)
-           if(ind /= -1) then
-             call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int)
-             if (phase  > 0.d0) then
-               lref(psi_non_ref_sorted_idx(ind)) =  II
-             else
-               lref(psi_non_ref_sorted_idx(ind)) = -II
-             endif
-           end if
-         end do
-         wk = 0
-         do i=1, N_det_non_ref
-           if(lref(i) > 0) then
-             wk += 1
-             A_val(wk, pp) = psi_ref_coef(lref(i), s)
-             A_ind(wk, pp) = i
-           else if(lref(i) < 0) then
-             wk += 1
-             A_val(wk, pp) = -psi_ref_coef(-lref(i), s)
-             A_ind(wk, pp) = i
-           end if
-         end do
-         A_ind(0,pp) = wk
-       end do
-     end do
-     !$OMP END DO
-     deallocate(lref)
-     !$OMP END PARALLEL   
- print *,  'Done building A_val, A_ind'
-     
-     AtB = 0d0
-     AtA_size = 0
-     col_shortcut = 0
-     N_col = 0
-     !$OMP PARALLEL default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref)&
-         !$OMP private(at_row, a_col, t, i, r1, r2, wk, A_ind_mwen, A_val_mwen)&
-         !$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s)
-     allocate(A_val_mwen(nex), A_ind_mwen(nex))
-     A_ind_mwen = 0
-     !$OMP DO schedule(dynamic, 100)
-     do at_row = 1, nex
-       wk = 0
-       if(mod(at_row, 10000) == 0) print *, "AtA", at_row, "/", nex
-       do i=1,A_ind(0,at_row)
-         AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_row), s) * A_val(i, at_row)
-       end do
+  implicit none
+  logical                        :: ok
+  integer                        :: i, j, k, s, II, pp, ppp, hh, ind, wk, nex, a_col, at_row
+  integer, external              :: searchDet, unsortedSearchDet
+  integer(bit_kind)              :: myDet(N_int, 2), myMask(N_int, 2)
+  integer                        :: N, INFO, AtA_size, r1, r2
+  double precision , allocatable :: AtB(:), AtA_val(:), A_val(:,:), x(:), x_new(:), A_val_mwen(:)
+  double precision               :: t, norm, cx, res
+  integer, allocatable           :: A_ind(:,:), lref(:), AtA_ind(:), A_ind_mwen(:), col_shortcut(:), N_col(:)
+  double precision               :: phase
+  
+  
+  integer, allocatable :: pathTo(:), active_hh_idx(:), active_pp_idx(:)
+  logical, allocatable :: active(:)
+  double precision, allocatable :: rho_mrcc_init(:,:)
+  integer :: nactive
+  
+  nex = hh_shortcut(hh_shortcut(0)+1)-1
+  print *, "TI", nex, N_det_non_ref
+  
+  allocate(pathTo(N_det_non_ref), active(nex))
+  allocate(active_pp_idx(nex), active_hh_idx(nex))
+  allocate(rho_mrcc_init(N_det_non_ref, N_states))
+  
+  pathTo = 0
+  active = .false.
+  nactive = 0
+  
+  
+  do hh = 1, hh_shortcut(0)
+    do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1
+      do II = 1, N_det_ref
+        call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int)
+        if(.not. ok) cycle
+        call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int)
+        if(.not. ok) cycle
+        ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int)
+        if(ind == -1) cycle
+        ind = psi_non_ref_sorted_idx(ind)
+        if(pathTo(ind) == 0) then
+          pathTo(ind) = pp
+        else
+          active(pp) = .true.
+          active(pathTo(ind)) = .true.
+        end if
+      end do
+    end do
+  end do
+  
+   do hh = 1, hh_shortcut(0)
+    do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1
+      if(active(pp)) then
+        nactive = nactive + 1
+        active_hh_idx(nactive) = hh
+        active_pp_idx(nactive) = pp
+      end if
+    end do
+   end do
+  
+  print *, nactive, "inact/", size(active)
+  
+  allocate(A_ind(0:N_det_ref+1, nactive), A_val(N_det_ref+1, nactive))
+  allocate(AtA_ind(N_det_ref * nactive), AtA_val(N_det_ref * nactive)) 
+  allocate(x(nex), AtB(nex))
+  allocate(N_col(nactive), col_shortcut(nactive))
+  allocate(x_new(nex))
+        

-       do a_col = 1, nex
-         t = 0d0
-         r1 = 1
-         r2 = 1
-         do while ((A_ind(r1, at_row) /= 0).and.(A_ind(r2, a_col) /= 0))
-           if(A_ind(r1, at_row) > A_ind(r2, a_col)) then
-             r2 = r2+1
-           else if(A_ind(r1, at_row) < A_ind(r2, a_col)) then
-             r1 = r1+1
-           else
-             t = t - A_val(r1, at_row) * A_val(r2, a_col)
-             r1 = r1+1
-             r2 = r2+1
-           end if
-         end do
-         
-         if(a_col == at_row) then
-           t = t + 1.d0
-         end if
-         if(t /= 0.d0) then
-           wk += 1
-           A_ind_mwen(wk) = a_col
-           A_val_mwen(wk) = t
-         end if
-       end do
-       
-       if(wk /= 0) then
-         !$OMP CRITICAL
-         col_shortcut(at_row) = AtA_size+1
-         N_col(at_row) = wk
-         if (AtA_size+wk > size(AtA_ind,1)) then
-            print *, AtA_size+wk , size(AtA_ind,1)
-            stop 'too small'
-         endif
-         do i=1,wk
+  
+  do s=1, N_states
+    
+    A_val = 0d0
+    A_ind = 0
+    AtA_ind = 0
+    AtB = 0d0
+    AtA_val = 0d0
+    x = 0d0
+    N_col = 0
+    col_shortcut = 0
+    
+    !$OMP PARALLEL default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind)&
+        !$OMP shared(s, hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref)&
+        !$OMP shared(active, active_hh_idx, active_pp_idx, nactive)  &
+        !$OMP private(lref, pp, II, ok, myMask, myDet, ind, phase, wk, ppp, hh)
+    allocate(lref(N_det_non_ref))
+    !$OMP DO schedule(static,10)
+    do ppp=1,nactive
+      pp = active_pp_idx(ppp)
+      hh = active_hh_idx(ppp)
+      lref = 0
+      do II = 1, N_det_ref
+        call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int)
+        if(.not. ok) cycle
+        call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int)
+        if(.not. ok) cycle
+        ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int)
+        if(ind /= -1) then
+          call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int)
+          if (phase  > 0.d0) then
+            lref(psi_non_ref_sorted_idx(ind)) =  II
+          else
+            lref(psi_non_ref_sorted_idx(ind)) = -II
+          endif
+        end if
+      end do
+      wk = 0
+      do i=1, N_det_non_ref
+        if(lref(i) > 0) then
+          wk += 1
+          A_val(wk, ppp) = psi_ref_coef(lref(i), s)
+          A_ind(wk, ppp) = i
+        else if(lref(i) < 0) then
+          wk += 1
+          A_val(wk, ppp) = -psi_ref_coef(-lref(i), s)
+          A_ind(wk, ppp) = i
+        end if
+      end do
+      A_ind(0,ppp) = wk
+    end do
+    !$OMP END DO
+    deallocate(lref)
+    !$OMP END PARALLEL
+    
+    
+    print *,  'Done building A_val, A_ind'
+    
+    AtA_size = 0
+    col_shortcut = 0
+    N_col = 0
+    integer                        :: a_coll, at_roww
+    
+    
+    !$OMP PARALLEL default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref)&
+        !$OMP private(at_row, a_col, t, i, j, r1, r2, wk, A_ind_mwen, A_val_mwen, a_coll, at_roww)&
+        !$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s, nactive, active_pp_idx)
+    allocate(A_val_mwen(nex), A_ind_mwen(nex))
+    
+    !$OMP DO schedule(dynamic, 100)
+    do at_roww = 1, nactive ! nex
+      at_row = active_pp_idx(at_roww)
+      wk = 0
+      if(mod(at_roww, 100) == 0) print *, "AtA", at_row, "/", nex
+      do i=1,A_ind(0,at_roww)
+        j = active_pp_idx(i)
+        AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_roww), s) * A_val(i, at_roww)
+      end do
+      
+      do a_coll = 1, nactive
+        a_col = active_pp_idx(a_coll)
+        t = 0d0
+        r1 = 1
+        r2 = 1
+        do while ((A_ind(r1, at_roww) /= 0).and.(A_ind(r2, a_coll) /= 0))
+          if(A_ind(r1, at_roww) > A_ind(r2, a_coll)) then
+            r2 = r2+1
+          else if(A_ind(r1, at_roww) < A_ind(r2, a_coll)) then
+            r1 = r1+1
+          else
+            t = t - A_val(r1, at_roww) * A_val(r2, a_coll)
+            r1 = r1+1
+            r2 = r2+1
+          end if
+        end do
+        
+        if(a_col == at_row) then
+          t = t + 1.d0
+        end if
+        if(t /= 0.d0) then
+          wk += 1
+          A_ind_mwen(wk) = a_col
+          A_val_mwen(wk) = t
+        end if
+      end do
+      
+      if(wk /= 0) then
+        !$OMP CRITICAL
+        col_shortcut(at_roww) = AtA_size+1
+        N_col(at_roww) = wk
+        if (AtA_size+wk > size(AtA_ind,1)) then
+          print *, AtA_size+wk , size(AtA_ind,1)
+          stop 'too small'
+        endif
+        do i=1,wk
          AtA_ind(AtA_size+i) = A_ind_mwen(i)
          AtA_val(AtA_size+i) = A_val_mwen(i)
-         enddo
-         AtA_size += wk
-         !$OMP END CRITICAL
-       end if
-     end do
-     !$OMP END DO NOWAIT
-     deallocate (A_ind_mwen, A_val_mwen)
-     !$OMP END PARALLEL
-     
-     if(AtA_size > size(AtA_val)) stop "SIZA"
-     print *, "ATA SIZE", ata_size
-     do i=1,nex
-       x(i) = AtB(i)
-     enddo
-     
-     double precision :: factor, resold
-     factor = 1.d0
-     resold = huge(1.d0)
-     do k=0,100000
-       !$OMP PARALLEL default(shared) private(cx, i, j, a_col)
-       
-       !$OMP DO
-       do i=1,N_det_non_ref
-         rho_mrcc(i,s) = 0.d0
-       enddo
-       !$OMP END DO
-       
-       !$OMP DO
-       do a_col = 1, nex
-         cx = 0d0
-         do i=col_shortcut(a_col), col_shortcut(a_col) + N_col(a_col) - 1
-           cx = cx + x(AtA_ind(i)) * AtA_val(i)
-         end do
-         x_new(a_col) = AtB(a_col) + cx * factor
-       end do
-       !$OMP END DO
-       
-       !$OMP END PARALLEL
-       
-       res = 0.d0
-       do a_col=1,nex
-         res = res + (X_new(a_col) - X(a_col))*(X_new(a_col) - X(a_col))
-       end do
-       
-       if (res < resold) then
-          do a_col=1,nex
-            do j=1,N_det_non_ref
-              i = A_ind(j,a_col)
-              if (i==0) exit
-              rho_mrcc(i,s) = rho_mrcc(i,s) + A_val(j,a_col) * X_new(a_col)
-            enddo
-            X(a_col) = X_new(a_col)
-          end do
-!          factor = 1.d0
-       else
-          factor = -factor * 0.5d0
-       endif
-       resold = res
-
-       if(mod(k, 100) == 0) then
-         print *, "res", k, res
-       end if
-       
-       if(res < 1d-8) exit
-     end do
-     ! rho_mrcc now contains A.X
-
-     norm = 0.d0
-     do i=1,N_det_non_ref
-       norm = norm + rho_mrcc(i,s)*rho_mrcc(i,s)
-     enddo
-     ! Norm now contains the norm of A.X
-
-     do i=1,N_det_ref
-       norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s)
-     enddo
-     ! Norm now contains the norm of Psi + A.X
-     
-     print *, k, "res : ", res, "norm : ", sqrt(norm)
-     
-     dIj_unique(:size(X), s) = X(:)
+        enddo
+        AtA_size += wk
+        !$OMP END CRITICAL
+      end if
+    end do
+    !$OMP END DO NOWAIT
+    deallocate (A_ind_mwen, A_val_mwen)
+    !$OMP END PARALLEL
+    
+    print *, "ATA SIZE", ata_size
+    x = 0d0
+    
+    
+    do a_coll = 1, nactive
+      a_col = active_pp_idx(a_coll)
+      X(a_col) = AtB(a_col)
+    end do
+    
+    rho_mrcc_init = 0d0
+    
+    allocate(lref(N_det_ref))
+    !$OMP PARALLEL DO default(shared) schedule(static, 1)            &
+        !$OMP private(lref, hh, pp, II, myMask, myDet, ok, ind, phase)
+    do hh = 1, hh_shortcut(0)
+      do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1
+        if(active(pp)) cycle
+        lref = 0
+        do II=1,N_det_ref
+          call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int)
+          if(.not. ok) cycle
+          call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int)
+          if(.not. ok) cycle
+          ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int)
+          if(ind == -1) cycle
+          ind = psi_non_ref_sorted_idx(ind)
+          call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int)
+          X(pp) += psi_ref_coef(II,s)**2
+          AtB(pp) += psi_non_ref_coef(ind, s) * psi_ref_coef(II, s) * phase
+          lref(II) = ind
+          if(phase < 0d0) lref(II) = -ind
+        end do
+        X(pp) =  AtB(pp) / X(pp)
+        do II=1,N_det_ref
+          if(lref(II) > 0) then
+            rho_mrcc_init(lref(II),s) = psi_ref_coef(II,s) * X(pp)
+          else if(lref(II) < 0) then
+            rho_mrcc_init(-lref(II),s) = -psi_ref_coef(II,s) * X(pp)
+          end if
+        end do
+      end do
+    end do
+    !$OMP END PARALLEL DO
+    
+    x_new = x
+    
+    double precision               :: factor, resold
+    factor = 1.d0
+    resold = huge(1.d0)
+    do k=0,100000
+      !$OMP PARALLEL default(shared) private(cx, i, j, a_col, a_coll)
+      
+      !$OMP DO
+      do i=1,N_det_non_ref
+        rho_mrcc(i,s) = rho_mrcc_init(i,s) ! 0d0
+      enddo
+      !$OMP END DO
+      
+      !$OMP DO
+      do a_coll = 1, nactive !: nex
+        a_col = active_pp_idx(a_coll)
+        cx = 0d0
+        do i=col_shortcut(a_coll), col_shortcut(a_coll) + N_col(a_coll) - 1
+          cx = cx + x(AtA_ind(i)) * AtA_val(i)
+        end do
+        x_new(a_col) = AtB(a_col) + cx * factor
+      end do
+      !$OMP END DO
+      
+      !$OMP END PARALLEL
+      
+      res = 0.d0
+      
+      
+      if (res < resold) then
+        do a_coll=1,nactive ! nex
+          a_col = active_pp_idx(a_coll)
+          do j=1,N_det_non_ref
+            i = A_ind(j,a_coll)
+            if (i==0) exit
+            rho_mrcc(i,s) = rho_mrcc(i,s) + A_val(j,a_coll) * X_new(a_col)
+          enddo
+          res = res + (X_new(a_col) - X(a_col))*(X_new(a_col) - X(a_col))
+          X(a_col) = X_new(a_col)
+        end do
+        factor = 1.d0
+      else
+        factor = -factor * 0.5d0
+      endif
+      resold = res
+      
+      if(mod(k, 100) == 0) then
+        print *, "res ", k, res
+      end if
+      
+      if(res < 1d-9) exit
+    end do
+    
+    
+    
+    norm = 0.d0
+    do i=1,N_det_non_ref
+      norm = norm + rho_mrcc(i,s)*rho_mrcc(i,s)
+    enddo
+    ! Norm now contains the norm of A.X
+    
+    do i=1,N_det_ref
+      norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s)
+    enddo
+    ! Norm now contains the norm of Psi + A.X
+    
+    print *, k, "res : ", res, "norm : ", sqrt(norm)
+        
+!---------------
+! double precision               :: e_0, overlap
+! double precision, allocatable  :: u_0(:)
+! integer(bit_kind), allocatable :: keys_tmp(:,:,:)
+! allocate (u_0(N_det), keys_tmp(N_int,2,N_det) )
+! k=0
+! overlap = 0.d0
+! do i=1,N_det_ref
+!   k = k+1
+!   u_0(k) = psi_ref_coef(i,1)
+!   keys_tmp(:,:,k) = psi_ref(:,:,i)
+!   overlap += u_0(k)*psi_ref_coef(i,1)
+! enddo
+! norm = 0.d0
+! do i=1,N_det_non_ref
+!   k = k+1
+!   u_0(k) = psi_non_ref_coef(i,1)
+!   keys_tmp(:,:,k) = psi_non_ref(:,:,i)
+!   overlap += u_0(k)*psi_non_ref_coef(i,1)
+! enddo
+! 
+! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det)
+! print *,  'Energy of |Psi_CASSD> : ', e_0 + nuclear_repulsion, overlap
+!
+! k=0
+! overlap = 0.d0
+! do i=1,N_det_ref
+!   k = k+1
+!   u_0(k) = psi_ref_coef(i,1)
+!   keys_tmp(:,:,k) = psi_ref(:,:,i)
+!   overlap += u_0(k)*psi_ref_coef(i,1)
+! enddo
+! norm = 0.d0
+! do i=1,N_det_non_ref
+!   k = k+1
+!   ! f is such that f.\tilde{c_i} = c_i
+!   f = psi_non_ref_coef(i,1) / rho_mrcc(i,1)
+!   
+!   ! Avoid numerical instabilities
+!   f = min(f,2.d0)
+!   f = max(f,-2.d0)
+!
+!   f = 1.d0
+!
+!   u_0(k) = rho_mrcc(i,1)*f
+!   keys_tmp(:,:,k) = psi_non_ref(:,:,i)
+!   norm += u_0(k)**2
+!   overlap += u_0(k)*psi_non_ref_coef(i,1)
+! enddo
+! 
+! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det)
+! print *,  'Energy of |(1+T)Psi_0> : ', e_0 + nuclear_repulsion, overlap
+!
+! f = 1.d0/norm
+! norm = 1.d0
+! do i=1,N_det_ref
+!  norm = norm - psi_ref_coef(i,s)*psi_ref_coef(i,s)
+! enddo
+! f = dsqrt(f*norm)
+! overlap = norm
+! do i=1,N_det_non_ref
+!   u_0(k) = rho_mrcc(i,1)*f
+!   overlap += u_0(k)*psi_non_ref_coef(i,1)
+! enddo
+!
+! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det)
+! print *,  'Energy of |(1+T)Psi_0> (normalized) : ', e_0 + nuclear_repulsion,  overlap
+!
+! k=0
+! overlap = 0.d0
+! do i=1,N_det_ref
+!   k = k+1
+!   u_0(k) = psi_ref_coef(i,1)
+!   keys_tmp(:,:,k) = psi_ref(:,:,i)
+!   overlap += u_0(k)*psi_ref_coef(i,1)
+! enddo
+! norm = 0.d0
+! do i=1,N_det_non_ref
+!   k = k+1
+!   ! f is such that f.\tilde{c_i} = c_i
+!   f = psi_non_ref_coef(i,1) / rho_mrcc(i,1)
+!   
+!   ! Avoid numerical instabilities
+!   f = min(f,2.d0)
+!   f = max(f,-2.d0)
+!
+!   u_0(k) = rho_mrcc(i,1)*f
+!   keys_tmp(:,:,k) = psi_non_ref(:,:,i)
+!   norm += u_0(k)**2
+!   overlap += u_0(k)*psi_non_ref_coef(i,1)
+! enddo
+! 
+! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det)
+! print *,  'Energy of |(1+T)Psi_0> (mu_i): ', e_0 + nuclear_repulsion, overlap
+!
+! f = 1.d0/norm
+! norm = 1.d0
+! do i=1,N_det_ref
+!  norm = norm - psi_ref_coef(i,s)*psi_ref_coef(i,s)
+! enddo
+! overlap = norm
+! f = dsqrt(f*norm)
+! do i=1,N_det_non_ref
+!   u_0(k) = rho_mrcc(i,1)*f
+!   overlap += u_0(k)*psi_non_ref_coef(i,1)
+! enddo
+!
+! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det)
+! print *,  'Energy of |(1+T)Psi_0> (normalized mu_i) : ', e_0 + nuclear_repulsion, overlap
+!
+! deallocate(u_0, keys_tmp)
+!
+!---------------
     
     norm = 0.d0
-     double precision :: f
+     double precision               :: f
     do i=1,N_det_non_ref
       if (rho_mrcc(i,s) == 0.d0) then
         rho_mrcc(i,s) = 1.d-32
@ -870,12 +1080,15 @@ END_PROVIDER
     enddo
     ! rho_mrcc now contains the product of the scaling factors and the
     ! normalization constant
+    
+    dIj_unique(:size(X), s) = X(:)
+  end do

-   end do
-   
 END_PROVIDER


+
+
 BEGIN_PROVIDER [ double precision, dij, (N_det_ref, N_det_non_ref, N_states) ]
  integer :: s,i,j
  double precision, external :: get_dij_index
@ -1141,3 +1354,6 @@ subroutine apply_particle_local(det, exc, res, ok, Nint)
  ok = .true.
 end subroutine

+
+
+
--- a/plugins/loc_cele/loc_cele.irp.f
+++ b/plugins/loc_cele/loc_cele.irp.f
@ -451,7 +451,7 @@

      enddo !big loop over symmetry

-      10 format (4E18.12)
+      10 format (4E19.12)


 !  Now we copyt the newcmo into the mo_coef
--- a/plugins/mrcepa0/EZFIO.cfg
+++ b/plugins/mrcepa0/EZFIO.cfg
@ -23,7 +23,7 @@ interface: ezfio
 type: Threshold
 doc: Threshold on the convergence of the dressed CI energy
 interface: ezfio,provider,ocaml
-default: 1.e-4
+default: 5.e-5

 [n_it_max_dressed_ci]
 type: Strictly_positive_int
--- a/plugins/mrcepa0/dressing.irp.f
+++ b/plugins/mrcepa0/dressing.irp.f
@ -299,7 +299,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe
          delta_ii_(i_state,i_I)  = 0.d0
          do l_sd=1,idx_alpha(0)
            k_sd = idx_alpha(l_sd)
-            delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + dIa_hla(i_state,k_sd)
+            delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + 0.5d0*dIa_hla(i_state,k_sd)
          enddo
        endif
      enddo
@ -554,7 +554,6 @@ END_PROVIDER
        do k=1,N_det_non_ref

          call i_h_j(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Hjk)
-          call i_h_j(psi_non_ref(1,1,k),psi_ref(1,1,i), N_int,Hki)
          
          delta_cas(i,j,i_state) += Hjk * dij(i, k, i_state) ! * Hki * lambda_mrcc(i_state, k)
          !print *, Hjk * get_dij(psi_ref(1,1,i), psi_non_ref(1,1,k), N_int), Hki * get_dij(psi_ref(1,1,j), psi_non_ref(1,1,k), N_int)
@ -647,7 +646,7 @@ end function
  integer                         :: p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_, sortRefIdx(N_det_ref)
  logical                         :: ok
  double precision                :: phase_iI, phase_Ik, phase_Jl, phase_IJ, phase_al, diI, hIi, hJi, delta_JI, dkI(1), HkI, ci_inv(1), dia_hla(1)
-  double precision                :: contrib, HIIi, HJk, wall
+  double precision                :: contrib, contrib2,  HIIi, HJk, wall
  integer, dimension(0:2,2,2)     :: exc_iI, exc_Ik, exc_IJ
  integer(bit_kind)               :: det_tmp(N_int, 2), made_hole(N_int,2), made_particle(N_int,2), myActive(N_int,2)
  integer(bit_kind),allocatable   :: sortRef(:,:,:)
@ -677,7 +676,7 @@ end function
    delta_mrcepa0_ij(:,:,:) = 0d0

    !$OMP PARALLEL DO default(none) schedule(dynamic) shared(delta_mrcepa0_ij, delta_mrcepa0_ii)       &
-    !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib)       &
+    !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib,contrib2)       &
    !$OMP shared(active_sorb, psi_non_ref, psi_non_ref_coef, psi_ref, psi_ref_coef, cepa0_shortcut, det_cepa0_active)     &
    !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_cas) &
    !$OMP shared(notf,i_state, sortRef, sortRefIdx, dij)
@ -720,16 +719,18 @@ end function
          !$OMP ATOMIC
          notf = notf+1

-          call i_h_j(psi_non_ref(1,1,det_cepa0_idx(k)),psi_ref(1,1,J),N_int,HJk)
-          !contrib = delta_cas(II, J, i_state) * HJk * lambda_mrcc(i_state, det_cepa0_idx(k))
+!          call i_h_j(psi_non_ref(1,1,det_cepa0_idx(k)),psi_ref(1,1,J),N_int,HJk)
          contrib = delta_cas(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state)
-          !$OMP ATOMIC
-          delta_mrcepa0_ij(J, det_cepa0_idx(i), i_state) += contrib
          
          if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then
+            contrib2 = contrib / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state)
            !$OMP ATOMIC
-            delta_mrcepa0_ii(J,i_state) -= contrib / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state)
+            delta_mrcepa0_ii(J,i_state) -= contrib2 
+          else
+            contrib = contrib * 0.5d0
          end if
+          !$OMP ATOMIC
+          delta_mrcepa0_ij(J, det_cepa0_idx(i), i_state) += contrib

        end do kloop
      end do
@ -753,7 +754,7 @@ END_PROVIDER
  integer                         :: p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_
  logical                         :: ok
  double precision                :: phase_Ji, phase_Ik, phase_Ii
-  double precision                :: contrib, delta_IJk, HJk, HIk, HIl
+  double precision                :: contrib, contrib2, delta_IJk, HJk, HIk, HIl
  integer, dimension(0:2,2,2)     :: exc_Ik, exc_Ji, exc_Ii
  integer(bit_kind)               :: det_tmp(N_int, 2), det_tmp2(N_int, 2)
  integer, allocatable            :: idx_sorted_bit(:)
@ -778,7 +779,7 @@ END_PROVIDER
    !$OMP PARALLEL DO default(none) schedule(dynamic,10) shared(delta_sub_ij, delta_sub_ii)       &
    !$OMP private(i, J, k, degree, degree2, l, deg, ni)       &
    !$OMP private(p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_)     &
-    !$OMP private(ok, phase_Ji, phase_Ik, phase_Ii, contrib, delta_IJk, HJk, HIk, HIl, exc_Ik, exc_Ji, exc_Ii) &
+    !$OMP private(ok, phase_Ji, phase_Ik, phase_Ii, contrib2, contrib, delta_IJk, HJk, HIk, HIl, exc_Ik, exc_Ji, exc_Ii) &
    !$OMP private(det_tmp, det_tmp2, II, blok)    &
    !$OMP shared(idx_sorted_bit, N_det_non_ref, N_det_ref, N_int, psi_non_ref, psi_non_ref_coef, psi_ref, psi_ref_coef)   &
    !$OMP shared(i_state,lambda_mrcc, hf_bitmask, active_sorb)
@ -827,13 +828,16 @@ END_PROVIDER
            delta_IJk = HJk * HIk * lambda_mrcc(i_state, k)
            call apply_excitation(psi_non_ref(1,1,i),exc_Ik,det_tmp,ok,N_int)
            if(ok) cycle
-              contrib = delta_IJk * HIl * lambda_mrcc(i_state,l)   
+            contrib = delta_IJk * HIl * lambda_mrcc(i_state,l)   
+            if(dabs(psi_ref_coef(II,i_state)).ge.5.d-5) then
+              contrib2 = contrib / psi_ref_coef(II, i_state) * psi_non_ref_coef(l,i_state)
+              !$OMP ATOMIC
+              delta_sub_ii(II,i_state) -= contrib2
+            else
+              contrib = contrib * 0.5d0
+            endif
            !$OMP ATOMIC
            delta_sub_ij(II, i, i_state) += contrib
-            if(dabs(psi_ref_coef(II,i_state)).ge.5.d-5) then
-              !$OMP ATOMIC
-              delta_sub_ii(II,i_state) -= contrib / psi_ref_coef(II, i_state) * psi_non_ref_coef(l,i_state)
-            endif
          end do
        end do
      end do
--- a/plugins/mrcepa0/mrcepa0_general.irp.f
+++ b/plugins/mrcepa0/mrcepa0_general.irp.f
@ -28,7 +28,7 @@ subroutine run(N_st,energy)
    enddo
    SOFT_TOUCH psi_coef ci_energy_dressed
    call write_double(6,ci_energy_dressed(1),"Final MRCC energy")
-    call ezfio_set_mrcc_cassd_energy(ci_energy_dressed(1))
+    call ezfio_set_mrcepa0_energy(ci_energy_dressed(1))
    call save_wavefunction
    energy(:) = ci_energy_dressed(:)
  else
--- a/src/Davidson/davidson_parallel.irp.f
+++ b/src/Davidson/davidson_parallel.irp.f
@ -501,7 +501,7 @@ subroutine davidson_miniserver_end()
  integer                 rc
  character*(64)          buf
  
-  address = trim(qp_run_address_tcp)//':11223'
+  address = trim(qp_run_address)//':11223'
  requester = f77_zmq_socket(zmq_context, ZMQ_REQ)
  rc        = f77_zmq_connect(requester,address)

@ -520,7 +520,7 @@ subroutine davidson_miniserver_get()
  character*(20)          buffer
  integer                 rc
  
-  address = trim(qp_run_address_tcp)//':11223'
+  address = trim(qp_run_address)//':11223'
  
  requester = f77_zmq_socket(zmq_context, ZMQ_REQ)
  rc        = f77_zmq_connect(requester,address)
--- a/src/Davidson/diagonalization_hs2.irp.f
+++ b/src/Davidson/diagonalization_hs2.irp.f
@ -22,7 +22,7 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d
  integer, intent(in)            :: dim_in, sze, N_st, N_st_diag, Nint, iunit
  integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
  double precision, intent(inout) :: u_in(dim_in,N_st_diag)
-  double precision, intent(out)  :: energies(N_st), s2_out(N_st)
+  double precision, intent(out)  :: energies(N_st), s2_out(N_st_diag)
  double precision, allocatable  :: H_jj(:), S2_jj(:)
  
  double precision               :: diag_h_mat_elem
@ -95,11 +95,10 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
  
  double precision               :: u_dot_v, u_dot_u
  
-  integer, allocatable           :: kl_pairs(:,:)
  integer                        :: k_pairs, kl
  
  integer                        :: iter2
-  double precision, allocatable  :: W(:,:),  U(:,:), R(:,:), S(:,:)
+  double precision, allocatable  :: W(:,:),  U(:,:), S(:,:)
  double precision, allocatable  :: y(:,:), h(:,:), lambda(:), s2(:)
  double precision, allocatable  :: c(:), s_(:,:), s_tmp(:,:)
  double precision               :: diag_h_mat_elem
@ -107,12 +106,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
  character*(16384)              :: write_buffer
  double precision               :: to_print(3,N_st)
  double precision               :: cpu, wall
-  integer                        :: shift, shift2
+  integer                        :: shift, shift2, itermax
  include 'constants.include.F'
  
-  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, R, S, y, h, lambda
-  if (N_st_diag > sze) then
-     stop 'error in Davidson : N_st_diag > sze'
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda
+  if (N_st_diag*3 > sze) then
+     print *,  'error in Davidson :'
+     print *,  'Increase n_det_max_jacobi to ', N_st_diag*3
+     stop -1
  endif

  PROVIDE nuclear_repulsion
@ -147,28 +148,26 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
  integer, external :: align_double
  sze_8 = align_double(sze)

+  itermax = min(davidson_sze_max, sze/N_st_diag)
  allocate(                                                          &
-      kl_pairs(2,N_st_diag*(N_st_diag+1)/2),                         &
-      W(sze_8,N_st_diag*davidson_sze_max),                           &
-      U(sze_8,N_st_diag*davidson_sze_max),                           &
-      R(sze_8,N_st_diag),                                            &
-      S(sze_8,N_st_diag*davidson_sze_max),                           &
-      h(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max),      &
-      y(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max),      &
-      s_(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max),     &
-      s_tmp(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max),  &
+      W(sze_8,N_st_diag*itermax),                           &
+      U(sze_8,N_st_diag*itermax),                           &
+      S(sze_8,N_st_diag*itermax),                           &
+      h(N_st_diag*itermax,N_st_diag*itermax),      &
+      y(N_st_diag*itermax,N_st_diag*itermax),      &
+      s_(N_st_diag*itermax,N_st_diag*itermax),     &
+      s_tmp(N_st_diag*itermax,N_st_diag*itermax),  &
      residual_norm(N_st_diag),                                      &
-      c(N_st_diag*davidson_sze_max),                                 &
-      s2(N_st_diag*davidson_sze_max),                                &
-      lambda(N_st_diag*davidson_sze_max))
+      c(N_st_diag*itermax),                                 &
+      s2(N_st_diag*itermax),                                &
+      lambda(N_st_diag*itermax))
  
  h  = 0.d0
  s_ = 0.d0
  s_tmp = 0.d0
-  c = 0.d0
  U = 0.d0
+  W = 0.d0
  S = 0.d0
-  R = 0.d0
  y = 0.d0


@ -183,25 +182,25 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
  
  converged = .False.
  
-  do k=1,N_st
-    call normalize(u_in(1,k),sze)
-  enddo
-
-  do k=N_st+1,N_st_diag
+  double precision               :: r1, r2
+  do k=N_st+1,N_st_diag-2,2
      do i=1,sze
-        double precision               :: r1, r2
        call random_number(r1)
        call random_number(r2)
-        u_in(i,k) = dsqrt(-2.d0*dlog(r1))*dcos(dtwo_pi*r2)
+        r1 = dsqrt(-2.d0*dlog(r1))
+        r2 = dtwo_pi*r2
+        u_in(i,k) = r1*dcos(r2)
+        u_in(i,k+1) = r1*dsin(r2)
+      enddo
+  enddo
+  do k=N_st_diag-1,N_st_diag
+      do i=1,sze
+        call random_number(r1)
+        call random_number(r2)
+        r1 = dsqrt(-2.d0*dlog(r1))
+        r2 = dtwo_pi*r2
+        u_in(i,k) = r1*dcos(r2)
      enddo
-    
-    ! Gram-Schmidt
-    ! ------------
-    call dgemv('T',sze,k-1,1.d0,u_in,size(u_in,1),                   &
-        u_in(1,k),1,0.d0,c,1)
-    call dgemv('N',sze,k-1,-1.d0,u_in,size(u_in,1),                  &
-        c,1,1.d0,u_in(1,k),1)
-    call normalize(u_in(1,k),sze)
  enddo


@ -213,11 +212,12 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
      enddo
    enddo

-    do iter=1,davidson_sze_max-1
+    do iter=1,itermax-1
      
      shift  = N_st_diag*(iter-1)
      shift2 = N_st_diag*iter

+      call ortho_qr(U,size(U,1),sze,shift2)

      ! Compute |W_k> = \sum_i |i><i|H|u_k>
      ! -----------------------------------------
@ -229,20 +229,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
      ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
      ! -------------------------------------------

-
-!      do l=1,N_st_diag
-!        do k=1,N_st_diag
-!          do iter2=1,iter-1
-!            h(k,iter2,l,iter) = u_dot_v(U(1,k,iter2),W(1,l,iter),sze)
-!            h(k,iter,l,iter2) = h(k,iter2,l,iter)
-!          enddo
-!        enddo
-!        do k=1,l
-!          h(k,iter,l,iter) = u_dot_v(U(1,k,iter),W(1,l,iter),sze)
-!          h(l,iter,k,iter) = h(k,iter,l,iter)
-!        enddo
-!      enddo
-
      call dgemm('T','N', shift2, N_st_diag, sze,                    &
          1.d0, U, size(U,1), W(1,shift+1), size(W,1),               &
          0.d0, h(1,shift+1), size(h,1))
@ -295,22 +281,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
      ! Express eigenvectors of h in the determinant basis
      ! --------------------------------------------------
      
-!      do k=1,N_st_diag
-!        do i=1,sze
-!          U(i,shift2+k) = 0.d0
-!          W(i,shift2+k) = 0.d0
-!          S(i,shift2+k) = 0.d0
-!        enddo
-!        do l=1,N_st_diag*iter
-!          do i=1,sze
-!            U(i,shift2+k) = U(i,shift2+k) + U(i,l)*y(l,k)
-!            W(i,shift2+k) = W(i,shift2+k) + W(i,l)*y(l,k)
-!            S(i,shift2+k) = S(i,shift2+k) + S(i,l)*y(l,k)
-!          enddo
-!        enddo
-!      enddo
-!
-!
      call dgemm('N','N', sze, N_st_diag, shift2,                    &
          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1))
      call dgemm('N','N', sze, N_st_diag, shift2,                    &
@ -318,83 +288,39 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
      call dgemm('N','N', sze, N_st_diag, shift2,                    &
          1.d0, S, size(S,1), y, size(y,1), 0.d0, S(1,shift2+1), size(S,1))

-      ! Compute residual vector
-      ! -----------------------
+      ! Compute residual vector and davidson step
+      ! -----------------------------------------
      
-!      do k=1,N_st_diag
-!        print *,  s2(k)
-!        s2(k) = u_dot_v(U(1,shift2+k),  S(1,shift2+k), sze) + S_z2_Sz
-!        print *,  s2(k)
-!        print *,  ''
-!        pause
-!      enddo
      do k=1,N_st_diag
        do i=1,sze
-          R(i,k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) )      &
-              * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz)
+          U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) )      &
+              * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz &
+             )/max(H_jj(i) - lambda (k),1.d-2)
        enddo
        if (k <= N_st) then
-          residual_norm(k) = u_dot_u(R(1,k),sze)
+          residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
          to_print(1,k) = lambda(k) + nuclear_repulsion
          to_print(2,k) = s2(k)
          to_print(3,k) = residual_norm(k)
-          if (residual_norm(k) > 1.e9) then
-            stop 'Davidson failed'
-          endif
        endif
      enddo
      
-      write(iunit,'(X,I3,X,100(X,F16.10,X,F11.6,X,E11.3))')  iter, to_print(:,1:N_st)
+      write(iunit,'(X,I3,X,100(X,F16.10,X,F11.6,X,E11.3,A30))')  iter, to_print(:,1:N_st), ''
      call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged)
+      do k=1,N_st
+        if (residual_norm(k) > 1.e4) then
+        print *,  ''
+          stop 'Davidson failed'
+        endif
+      enddo
      if (converged) then
        exit
      endif
      
-      ! Davidson step
-      ! -------------
-      
-      do k=1,N_st_diag
-        do i=1,sze
-          U(i,shift2+k) =  - R(i,k)/max(H_jj(i) - lambda(k),1.d-2) 
-        enddo
-      enddo
-      
-      ! Gram-Schmidt
-      ! ------------
-      
-      do k=1,N_st_diag
-
-!        do l=1,N_st_diag*iter
-!            c(1) = u_dot_v(U(1,shift2+k),U(1,l),sze)
-!            do i=1,sze
-!              U(i,k,iter+1) = U(i,shift2+k) - c(1) * U(i,l)
-!            enddo
-!        enddo
-!
-        call dgemv('T',sze,N_st_diag*iter,1.d0,U,size(U,1),          &
-            U(1,shift2+k),1,0.d0,c,1)
-        call dgemv('N',sze,N_st_diag*iter,-1.d0,U,size(U,1),         &
-            c,1,1.d0,U(1,shift2+k),1)
-!
-!        do l=1,k-1
-!          c(1) = u_dot_v(U(1,shift2+k),U(1,shift2+l),sze)
-!          do i=1,sze
-!            U(i,k,iter+1) = U(i,shift2+k) - c(1) * U(i,shift2+l)
-!          enddo
-!        enddo
-!
-        call dgemv('T',sze,k-1,1.d0,U(1,shift2+1),size(U,1),         &
-            U(1,shift2+k),1,0.d0,c,1)
-        call dgemv('N',sze,k-1,-1.d0,U(1,shift2+1),size(U,1),        &
-            c,1,1.d0,U(1,shift2+k),1)
-
-        call normalize( U(1,shift2+k), sze )
-      enddo
-
    enddo

    if (.not.converged) then
-      iter = davidson_sze_max-1
+      iter = itermax-1
    endif
    
    ! Re-contract to u_in
@ -404,20 +330,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
      energies(k) = lambda(k)
    enddo

-!    do k=1,N_st_diag
-!      do i=1,sze
-!        do l=1,iter*N_st_diag
-!            u_in(i,k) += U(i,l)*y(l,k)
-!          enddo
-!        enddo
-!      enddo
-!    enddo
-
    call dgemm('N','N', sze, N_st_diag, N_st_diag*iter, 1.d0,      &
        U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1))

  enddo

+  do k=1,N_st_diag
+    S2_jj(k) = s2(k)
+  enddo
  write_buffer = '===== '
  do i=1,N_st
    write_buffer = trim(write_buffer)//' ================ =========== ==========='
@ -427,10 +347,9 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
  call write_time(iunit)

  deallocate (                                                       &
-      kl_pairs,                                                      &
      W, residual_norm,                                              &
      U,                                                             &
-      R, c, S,                                                       &
+      c, S,                                                          &
      h,                                                             &
      y, s_, s_tmp,                                                  &
      lambda                                                         &
--- a/src/Davidson/u0Hu0.irp.f
+++ b/src/Davidson/u0Hu0.irp.f
@ -252,7 +252,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
  ave_workload = ave_workload/dble(shortcut(0,1))


-  do sh=shortcut(0,1),1,-1
+  do sh=1,shortcut(0,1),1
    workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2
    do i=sh, shortcut(0,2), shortcut(0,1)
      do j=i, min(i, shortcut(0,2))
--- a/src/Determinants/H_apply_zmq.template.f
+++ b/src/Determinants/H_apply_zmq.template.f
@ -35,7 +35,7 @@ subroutine $subroutine($params_main)

  call zmq_put_psi(zmq_to_qp_run_socket,1,energy,size(energy))

-  do i_generator=N_det_generators,1,-1
+  do i_generator=1,N_det_generators
    $skip
    write(task,*) i_generator
    call add_task_to_taskserver(zmq_to_qp_run_socket,task)
--- a/src/Determinants/connected_to_ref.irp.f
+++ b/src/Determinants/connected_to_ref.irp.f
@ -109,8 +109,6 @@ integer function get_index_in_psi_det_sorted_bit(key,Nint)
      continue
    else
      in_wavefunction = .True.
-      !DIR$ IVDEP
-      !DIR$ LOOP COUNT MIN(3)
      do l=2,Nint
        if ( (key(l,1) /= psi_det_sorted_bit(l,1,i)).or.                           &
              (key(l,2) /= psi_det_sorted_bit(l,2,i)) ) then
@ -175,7 +173,6 @@ logical function is_connected_to(key,keys,Nint,Ndet)
  do i=1,Ndet
    degree_x2 = popcnt(xor( key(1,1), keys(1,1,i))) +              &
        popcnt(xor( key(1,2), keys(1,2,i)))
-    !DEC$ LOOP COUNT MIN(3)
    do l=2,Nint
      degree_x2 = degree_x2 + popcnt(xor( key(l,1), keys(l,1,i))) +&
          popcnt(xor( key(l,2), keys(l,2,i)))
@ -231,7 +228,6 @@ logical function is_connected_to_by_mono(key,keys,Nint,Ndet)
  do i=1,Ndet
    degree_x2 = popcnt(xor( key(1,1), keys(1,1,i))) +              &
        popcnt(xor( key(1,2), keys(1,2,i)))
-    !DEC$ LOOP COUNT MIN(3)
    do l=2,Nint
      degree_x2 = degree_x2 + popcnt(xor( key(l,1), keys(l,1,i))) +&
          popcnt(xor( key(l,2), keys(l,2,i)))
@ -325,10 +321,12 @@ integer function connected_to_ref(key,keys,Nint,N_past_in,Ndet)
    do i=N_past-1,1,-1
      degree_x2 = popcnt(xor( key(1,1), keys(1,1,i))) +              &
          popcnt(xor( key(1,2), keys(1,2,i)))
-      !DEC$ LOOP COUNT MIN(3)
      do l=2,Nint
        degree_x2 = degree_x2 + popcnt(xor( key(l,1), keys(l,1,i))) +&
            popcnt(xor( key(l,2), keys(l,2,i)))
+        if (degree_x2 > 4) then
+          exit
+        endif
      enddo
      if (degree_x2 > 4) then
        cycle
@ -429,7 +427,6 @@ integer function connected_to_ref_by_mono(key,keys,Nint,N_past_in,Ndet)
    do i=N_past-1,1,-1
      degree_x2 = popcnt(xor( key(1,1), keys(1,1,i))) +              &
          popcnt(xor( key(1,2), keys(1,2,i)))
-      !DEC$ LOOP COUNT MIN(3)
      do l=2,Nint
        degree_x2 = degree_x2 + popcnt(xor( key(l,1), keys(l,1,i))) +&
            popcnt(xor( key(l,2), keys(l,2,i)))
--- a/src/Determinants/filter_connected.irp.f
+++ b/src/Determinants/filter_connected.irp.f
@ -300,7 +300,6 @@ subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx)
  else
    

-    integer, save :: icount(4) = (/0,0,0,0/)
    !DIR$ LOOP COUNT (1000)
    outer: do i=1,sze
      degree_x2 = 0
@ -318,7 +317,6 @@ subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx)
      enddo
      idx(l) = i
      l = l+1
-      icount(3) = icount(3) + 1_8
    enddo outer
    
  endif
--- a/src/Determinants/slater_rules.irp.f
+++ b/src/Determinants/slater_rules.irp.f
@ -515,8 +515,6 @@ subroutine i_H_j(key_i,key_j,Nint,hij)
  integer                        :: occ(Nint*bit_kind_size,2)
  double precision               :: diag_H_mat_elem, phase,phase_2
  integer                        :: n_occ_ab(2)
-  logical                        :: has_mipi(Nint*bit_kind_size)
-  double precision               :: mipi(Nint*bit_kind_size), miip(Nint*bit_kind_size)
  PROVIDE mo_bielec_integrals_in_map mo_integrals_map
  
  ASSERT (Nint > 0)
@ -568,59 +566,27 @@ subroutine i_H_j(key_i,key_j,Nint,hij)
      call get_mono_excitation(key_i,key_j,exc,phase,Nint)
      !DIR$ FORCEINLINE
      call bitstring_to_list_ab(key_i, occ, n_occ_ab, Nint)
-      has_mipi = .False.
      if (exc(0,1,1) == 1) then
        ! Mono alpha
        m = exc(1,1,1)
        p = exc(1,2,1)
        do k = 1, elec_alpha_num
-          i = occ(k,1)
-          if (.not.has_mipi(i)) then
-            mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map)
-            miip(i) = get_mo_bielec_integral(m,i,i,p,mo_integrals_map)
-            has_mipi(i) = .True.
-          endif
+          hij = hij + mo_bielec_integral_mipi_anti(occ(k,1),m,p)
        enddo
        do k = 1, elec_beta_num
-          i = occ(k,2)
-          if (.not.has_mipi(i)) then
-            mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map)
-            has_mipi(i) = .True.
-          endif
-        enddo
-        
-        do k = 1, elec_alpha_num
-          hij = hij + mipi(occ(k,1)) - miip(occ(k,1))
-        enddo
-        do k = 1, elec_beta_num
-          hij = hij + mipi(occ(k,2))
+          hij = hij + mo_bielec_integral_mipi(occ(k,2),m,p)
        enddo
        
      else
        ! Mono beta
        m = exc(1,1,2)
        p = exc(1,2,2)
-        do k = 1, elec_beta_num
-          i = occ(k,2)
-          if (.not.has_mipi(i)) then
-            mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map)
-            miip(i) = get_mo_bielec_integral(m,i,i,p,mo_integrals_map)
-            has_mipi(i) = .True.
-          endif
-        enddo
+
        do k = 1, elec_alpha_num
-          i = occ(k,1)
-          if (.not.has_mipi(i)) then
-            mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map)
-            has_mipi(i) = .True.
-          endif
-        enddo
-        
-        do k = 1, elec_alpha_num
-          hij = hij + mipi(occ(k,1))
+          hij = hij + mo_bielec_integral_mipi(occ(k,1),m,p) 
        enddo
        do k = 1, elec_beta_num
-          hij = hij + mipi(occ(k,2)) - miip(occ(k,2))
+          hij = hij + mo_bielec_integral_mipi_anti(occ(k,2),m,p)
        enddo
        
      endif
@ -651,8 +617,6 @@ subroutine i_H_j_phase_out(key_i,key_j,Nint,hij,phase,exc,degree)
  integer                        :: occ(Nint*bit_kind_size,2)
  double precision               :: diag_H_mat_elem
  integer                        :: n_occ_ab(2)
-  logical                        :: has_mipi(Nint*bit_kind_size)
-  double precision               :: mipi(Nint*bit_kind_size), miip(Nint*bit_kind_size)
  PROVIDE mo_bielec_integrals_in_map mo_integrals_map

  ASSERT (Nint > 0)
@ -704,59 +668,27 @@ subroutine i_H_j_phase_out(key_i,key_j,Nint,hij,phase,exc,degree)
      call get_mono_excitation(key_i,key_j,exc,phase,Nint)
      !DIR$ FORCEINLINE
      call bitstring_to_list_ab(key_i, occ, n_occ_ab, Nint)
-      has_mipi = .False.
      if (exc(0,1,1) == 1) then
        ! Mono alpha
        m = exc(1,1,1)
        p = exc(1,2,1)
        do k = 1, elec_alpha_num
-          i = occ(k,1)
-          if (.not.has_mipi(i)) then
-            mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map)
-            miip(i) = get_mo_bielec_integral(m,i,i,p,mo_integrals_map)
-            has_mipi(i) = .True.
-          endif
+          hij = hij + mo_bielec_integral_mipi_anti(occ(k,1),m,p)
        enddo
        do k = 1, elec_beta_num
-          i = occ(k,2)
-          if (.not.has_mipi(i)) then
-            mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map)
-            has_mipi(i) = .True.
-          endif
+          hij = hij + mo_bielec_integral_mipi(occ(k,2),m,p)
        enddo
-
-        do k = 1, elec_alpha_num
-          hij = hij + mipi(occ(k,1)) - miip(occ(k,1))
-        enddo
-        do k = 1, elec_beta_num
-          hij = hij + mipi(occ(k,2))
-        enddo
-
+        
      else
        ! Mono beta
        m = exc(1,1,2)
        p = exc(1,2,2)
-        do k = 1, elec_beta_num
-          i = occ(k,2)
-          if (.not.has_mipi(i)) then
-            mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map)
-            miip(i) = get_mo_bielec_integral(m,i,i,p,mo_integrals_map)
-            has_mipi(i) = .True.
-          endif
-        enddo
-        do k = 1, elec_alpha_num
-          i = occ(k,1)
-          if (.not.has_mipi(i)) then
-            mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map)
-            has_mipi(i) = .True.
-          endif
-        enddo

        do k = 1, elec_alpha_num
-          hij = hij + mipi(occ(k,1))
+          hij = hij + mo_bielec_integral_mipi(occ(k,1),m,p) 
        enddo
        do k = 1, elec_beta_num
-          hij = hij + mipi(occ(k,2)) - miip(occ(k,2))
+          hij = hij + mo_bielec_integral_mipi_anti(occ(k,2),m,p)
        enddo

      endif
@ -787,8 +719,6 @@ subroutine i_H_j_verbose(key_i,key_j,Nint,hij,hmono,hdouble)
  integer                        :: occ(Nint*bit_kind_size,2)
  double precision               :: diag_H_mat_elem, phase,phase_2
  integer                        :: n_occ_ab(2)
-  logical                        :: has_mipi(Nint*bit_kind_size)
-  double precision               :: mipi(Nint*bit_kind_size), miip(Nint*bit_kind_size)
  PROVIDE mo_bielec_integrals_in_map mo_integrals_map
  
  ASSERT (Nint > 0)
@ -842,59 +772,26 @@ subroutine i_H_j_verbose(key_i,key_j,Nint,hij,hmono,hdouble)
      call get_mono_excitation(key_i,key_j,exc,phase,Nint)
      !DIR$ FORCEINLINE
      call bitstring_to_list_ab(key_i, occ, n_occ_ab, Nint)
-      has_mipi = .False.
      if (exc(0,1,1) == 1) then
        ! Mono alpha
        m = exc(1,1,1)
        p = exc(1,2,1)
        do k = 1, elec_alpha_num
-          i = occ(k,1)
-          if (.not.has_mipi(i)) then
-            mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map)
-            miip(i) = get_mo_bielec_integral(m,i,i,p,mo_integrals_map)
-            has_mipi(i) = .True.
-          endif
+          hdouble = hdouble +  mo_bielec_integral_mipi_anti(occ(k,1),m,p)
        enddo
        do k = 1, elec_beta_num
-          i = occ(k,2)
-          if (.not.has_mipi(i)) then
-            mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map)
-            has_mipi(i) = .True.
-          endif
-        enddo
-        
-        do k = 1, elec_alpha_num
-          hdouble = hdouble + mipi(occ(k,1)) - miip(occ(k,1))
-        enddo
-        do k = 1, elec_beta_num
-          hdouble = hdouble + mipi(occ(k,2))
+          hdouble = hdouble + mo_bielec_integral_mipi(occ(k,2),m,p)
        enddo
        
      else
        ! Mono beta
        m = exc(1,1,2)
        p = exc(1,2,2)
-        do k = 1, elec_beta_num
-          i = occ(k,2)
-          if (.not.has_mipi(i)) then
-            mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map)
-            miip(i) = get_mo_bielec_integral(m,i,i,p,mo_integrals_map)
-            has_mipi(i) = .True.
-          endif
-        enddo
        do k = 1, elec_alpha_num
-          i = occ(k,1)
-          if (.not.has_mipi(i)) then
-            mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map)
-            has_mipi(i) = .True.
-          endif
-        enddo
-        
-        do k = 1, elec_alpha_num
-          hdouble = hdouble + mipi(occ(k,1))
+          hdouble = hdouble + mo_bielec_integral_mipi(occ(k,1),m,p) 
        enddo
        do k = 1, elec_beta_num
-          hdouble = hdouble + mipi(occ(k,2)) - miip(occ(k,2))
+          hdouble = hdouble + mo_bielec_integral_mipi_anti(occ(k,2),m,p)
        enddo
        
      endif
@ -1320,7 +1217,6 @@ subroutine get_excitation_degree_vector(key1,key2,degree,Nint,sze,idx)
  l=1
  if (Nint==1) then
    
-    !DIR$ LOOP COUNT (1000)
    do i=1,sze
      d = popcnt(xor( key1(1,1,i), key2(1,1))) +       &
          popcnt(xor( key1(1,2,i), key2(1,2)))
@ -1335,7 +1231,6 @@ subroutine get_excitation_degree_vector(key1,key2,degree,Nint,sze,idx)
    
  else if (Nint==2) then
    
-    !DIR$ LOOP COUNT (1000)
    do i=1,sze
      d = popcnt(xor( key1(1,1,i), key2(1,1))) +                     &
          popcnt(xor( key1(1,2,i), key2(1,2))) +                     &
@ -1352,7 +1247,6 @@ subroutine get_excitation_degree_vector(key1,key2,degree,Nint,sze,idx)
    
  else if (Nint==3) then
    
-    !DIR$ LOOP COUNT (1000)
    do i=1,sze
      d = popcnt(xor( key1(1,1,i), key2(1,1))) +                     &
          popcnt(xor( key1(1,2,i), key2(1,2))) +                     &
@ -1371,10 +1265,8 @@ subroutine get_excitation_degree_vector(key1,key2,degree,Nint,sze,idx)
    
  else
    
-    !DIR$ LOOP COUNT (1000)
    do i=1,sze
      d = 0
-      !DIR$ LOOP COUNT MIN(4)
      do m=1,Nint
        d = d + popcnt(xor( key1(m,1,i), key2(m,1)))                 &
              + popcnt(xor( key1(m,2,i), key2(m,2)))
--- a/src/Integrals_Bielec/ao_bi_integrals.irp.f
+++ b/src/Integrals_Bielec/ao_bi_integrals.irp.f
@ -368,7 +368,7 @@ BEGIN_PROVIDER [ logical, ao_bielec_integrals_in_map ]

  call new_parallel_job(zmq_to_qp_run_socket,'ao_integrals')

-  do l=1,ao_num
+  do l=ao_num,1,-1
    write(task,*) "triangle ", l
    call add_task_to_taskserver(zmq_to_qp_run_socket,task)
  enddo
--- a/src/Integrals_Bielec/map_integrals.irp.f
+++ b/src/Integrals_Bielec/map_integrals.irp.f
@ -120,15 +120,16 @@ end

 END_PROVIDER

-BEGIN_PROVIDER [ double precision, ao_integrals_cache, (ao_integrals_cache_min:ao_integrals_cache_max,ao_integrals_cache_min:ao_integrals_cache_max,ao_integrals_cache_min:ao_integrals_cache_max,ao_integrals_cache_min:ao_integrals_cache_max) ]
+BEGIN_PROVIDER [ double precision, ao_integrals_cache, (0:64*64*64*64) ]
 implicit none
 BEGIN_DOC
 ! Cache of AO integrals for fast access
 END_DOC
 PROVIDE ao_bielec_integrals_in_map
- integer                        :: i,j,k,l
+ integer                        :: i,j,k,l,ii
 integer(key_kind)              :: idx
- !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx)
+ real(integral_kind)            :: integral
+ !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral)
 do l=ao_integrals_cache_min,ao_integrals_cache_max
   do k=ao_integrals_cache_min,ao_integrals_cache_max
     do j=ao_integrals_cache_min,ao_integrals_cache_max
@ -136,7 +137,12 @@ BEGIN_PROVIDER [ double precision, ao_integrals_cache, (ao_integrals_cache_min:a
         !DIR$ FORCEINLINE
         call bielec_integrals_index(i,j,k,l,idx)
         !DIR$ FORCEINLINE
-         call map_get(ao_integrals_map,idx,ao_integrals_cache(i,j,k,l))
+         call map_get(ao_integrals_map,idx,integral)
+         ii = l-ao_integrals_cache_min
+         ii = ior( ishft(ii,6), k-ao_integrals_cache_min)
+         ii = ior( ishft(ii,6), j-ao_integrals_cache_min)
+         ii = ior( ishft(ii,6), i-ao_integrals_cache_min)
+         ao_integrals_cache(ii) = integral
       enddo
     enddo
   enddo
@ -155,30 +161,33 @@ double precision function get_ao_bielec_integral(i,j,k,l,map)
  integer, intent(in)            :: i,j,k,l
  integer(key_kind)              :: idx
  type(map_type), intent(inout)  :: map
+  integer                        :: ii
  real(integral_kind)            :: tmp
-  PROVIDE ao_bielec_integrals_in_map
+  PROVIDE ao_bielec_integrals_in_map ao_integrals_cache ao_integrals_cache_min
  !DIR$ FORCEINLINE
  if (ao_overlap_abs(i,k)*ao_overlap_abs(j,l) < ao_integrals_threshold ) then
    tmp = 0.d0
  else if (ao_bielec_integral_schwartz(i,k)*ao_bielec_integral_schwartz(j,l) < ao_integrals_threshold) then
    tmp = 0.d0
  else
-    if ( (i >= ao_integrals_cache_min) .and. &
-        (j >= ao_integrals_cache_min) .and. &
-        (k >= ao_integrals_cache_min) .and. &
-        (l >= ao_integrals_cache_min) .and. &
-        (i <= ao_integrals_cache_max) .and. &
-        (j <= ao_integrals_cache_max) .and. &
-        (k <= ao_integrals_cache_max) .and. &
-        (l <= ao_integrals_cache_max) ) then
-        tmp = ao_integrals_cache(i,j,k,l)
-    else
+    ii = l-ao_integrals_cache_min
+    ii = ior(ii, k-ao_integrals_cache_min)
+    ii = ior(ii, j-ao_integrals_cache_min)
+    ii = ior(ii, i-ao_integrals_cache_min)
+    if (iand(ii, -64) /= 0) then
      !DIR$ FORCEINLINE
      call bielec_integrals_index(i,j,k,l,idx)
+      !DIR$ FORCEINLINE
      call map_get(map,idx,tmp)
+      get_ao_bielec_integral = dble(tmp)
+    else
+      ii = l-ao_integrals_cache_min
+      ii = ior( ishft(ii,6), k-ao_integrals_cache_min)
+      ii = ior( ishft(ii,6), j-ao_integrals_cache_min)
+      ii = ior( ishft(ii,6), i-ao_integrals_cache_min)
+      get_ao_bielec_integral = ao_integrals_cache(ii)
    endif
  endif
-  get_ao_bielec_integral = tmp
 end


@ -324,20 +333,22 @@ end
 ! Min and max values of the MOs for which the integrals are in the cache
 END_DOC
 mo_integrals_cache_min = max(1,elec_alpha_num - 31)
- mo_integrals_cache_max = min(mo_tot_num,elec_alpha_num + 32)
+ mo_integrals_cache_max = min(mo_tot_num,mo_integrals_cache_min+63)

 END_PROVIDER

-BEGIN_PROVIDER [ double precision, mo_integrals_cache, (mo_integrals_cache_min:mo_integrals_cache_max,mo_integrals_cache_min:mo_integrals_cache_max,mo_integrals_cache_min:mo_integrals_cache_max,mo_integrals_cache_min:mo_integrals_cache_max) ]
+BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0:64*64*64*64) ]
 implicit none
 BEGIN_DOC
 ! Cache of MO integrals for fast access
 END_DOC
 PROVIDE mo_bielec_integrals_in_map
 integer                        :: i,j,k,l
+ integer                        :: ii
 integer(key_kind)              :: idx
+ real(integral_kind)            :: integral
 FREE ao_integrals_cache
- !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx)
+ !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral)
 do l=mo_integrals_cache_min,mo_integrals_cache_max
   do k=mo_integrals_cache_min,mo_integrals_cache_max
     do j=mo_integrals_cache_min,mo_integrals_cache_max
@ -345,7 +356,12 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (mo_integrals_cache_min:m
         !DIR$ FORCEINLINE
         call bielec_integrals_index(i,j,k,l,idx)
         !DIR$ FORCEINLINE
-         call map_get(mo_integrals_map,idx,mo_integrals_cache(i,j,k,l))
+         call map_get(mo_integrals_map,idx,integral)
+         ii = l-mo_integrals_cache_min
+         ii = ior( ishft(ii,6), k-mo_integrals_cache_min)
+         ii = ior( ishft(ii,6), j-mo_integrals_cache_min)
+         ii = ior( ishft(ii,6), i-mo_integrals_cache_min)
+         mo_integrals_cache(ii) = integral
       enddo
     enddo
   enddo
@ -354,6 +370,7 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (mo_integrals_cache_min:m

 END_PROVIDER

+
 double precision function get_mo_bielec_integral(i,j,k,l,map)
  use map_module
  implicit none
@ -362,24 +379,26 @@ double precision function get_mo_bielec_integral(i,j,k,l,map)
  END_DOC
  integer, intent(in)            :: i,j,k,l
  integer(key_kind)              :: idx
+  integer                        :: ii
  type(map_type), intent(inout)  :: map
  real(integral_kind)            :: tmp
  PROVIDE mo_bielec_integrals_in_map mo_integrals_cache
-  if ( (i >= mo_integrals_cache_min) .and. &
-       (j >= mo_integrals_cache_min) .and. &
-       (k >= mo_integrals_cache_min) .and. &
-       (l >= mo_integrals_cache_min) .and. &
-       (i <= mo_integrals_cache_max) .and. &
-       (j <= mo_integrals_cache_max) .and. &
-       (k <= mo_integrals_cache_max) .and. &
-       (l <= mo_integrals_cache_max) ) then
-       get_mo_bielec_integral = mo_integrals_cache(i,j,k,l)
-  else
+  ii = l-mo_integrals_cache_min
+  ii = ior(ii, k-mo_integrals_cache_min)
+  ii = ior(ii, j-mo_integrals_cache_min)
+  ii = ior(ii, i-mo_integrals_cache_min)
+  if (iand(ii, -64) /= 0) then
    !DIR$ FORCEINLINE
    call bielec_integrals_index(i,j,k,l,idx)
    !DIR$ FORCEINLINE
    call map_get(map,idx,tmp)
    get_mo_bielec_integral = dble(tmp)
+  else
+    ii = l-mo_integrals_cache_min
+    ii = ior( ishft(ii,6), k-mo_integrals_cache_min)
+    ii = ior( ishft(ii,6), j-mo_integrals_cache_min)
+    ii = ior( ishft(ii,6), i-mo_integrals_cache_min)
+    get_mo_bielec_integral = mo_integrals_cache(ii)
  endif
 end

@ -390,16 +409,15 @@ double precision function get_mo_bielec_integral_schwartz(i,j,k,l,map)
  ! Returns one integral <ij|kl> in the MO basis
  END_DOC
  integer, intent(in)            :: i,j,k,l
-  integer(key_kind)              :: idx
  type(map_type), intent(inout)  :: map
-  real(integral_kind)            :: tmp
+  double precision, external     :: get_mo_bielec_integral
+
  PROVIDE mo_bielec_integrals_in_map  mo_integrals_cache
  if (mo_bielec_integral_schwartz(i,k)*mo_bielec_integral_schwartz(j,l) > mo_integrals_threshold) then
-    double precision, external :: get_mo_bielec_integral
    !DIR$ FORCEINLINE
    get_mo_bielec_integral_schwartz = get_mo_bielec_integral(i,j,k,l,map)
  else
-    tmp = 0.d0
+    get_mo_bielec_integral_schwartz = 0.d0
  endif
 end

--- a/src/Integrals_Bielec/mo_bi_integrals.irp.f
+++ b/src/Integrals_Bielec/mo_bi_integrals.irp.f
@ -467,6 +467,31 @@ END_PROVIDER
   enddo
 enddo
  
+END_PROVIDER
+
+ BEGIN_PROVIDER [ double precision, mo_bielec_integral_mipi, (mo_tot_num_align,mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bielec_integral_mipi_anti, (mo_tot_num_align,mo_tot_num,mo_tot_num) ]
+  implicit none
+  BEGIN_DOC
+  ! <mi|pi> and <mi|pi> - <mi|ip>. Indices are (i,m,p)
+  END_DOC
+  
+  integer                        :: m,i,p
+  double precision               :: get_mo_bielec_integral
+  
+  PROVIDE mo_bielec_integrals_in_map
+
+  mo_bielec_integral_mipi = 0.d0
+  mo_bielec_integral_mipi_anti = 0.d0
+  do p=1,mo_tot_num
+   do m=1,mo_tot_num
+    do i=1,mo_tot_num
+      mo_bielec_integral_mipi(i,m,p) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map)
+      mo_bielec_integral_mipi_anti(i,m,p) = mo_bielec_integral_mipi(i,m,p) - get_mo_bielec_integral(m,i,i,p,mo_integrals_map)
+    enddo
+   enddo
+ enddo
+  
 END_PROVIDER

 BEGIN_PROVIDER [ double precision, mo_bielec_integral_schwartz,(mo_tot_num,mo_tot_num)  ]
--- a/src/Utils/LinearAlgebra.irp.f
+++ b/src/Utils/LinearAlgebra.irp.f
@ -11,9 +11,9 @@ subroutine svd(A,LDA,U,LDU,D,Vt,LDVt,m,n)
  
  integer, intent(in)             :: LDA, LDU, LDVt, m, n
  double precision, intent(in)    :: A(LDA,n)
-  double precision, intent(out)   :: U(LDU,n)
+  double precision, intent(out)   :: U(LDU,m)
  double precision,intent(out)    :: Vt(LDVt,n)
-  double precision,intent(out)    :: D(n)
+  double precision,intent(out)    :: D(min(m,n))
  double precision,allocatable    :: work(:)
  integer                         :: info, lwork, i, j, k
  
@ -24,13 +24,13 @@ subroutine svd(A,LDA,U,LDU,D,Vt,LDVt,m,n)
  ! Find optimal size for temp arrays
  allocate(work(1))
  lwork = -1
-  call dgesvd('A','A', n, n, A_tmp, LDA,                             &
+  call dgesvd('A','A', m, n, A_tmp, LDA,                             &
      D, U, LDU, Vt, LDVt, work, lwork, info)
  lwork = work(1)
  deallocate(work)

  allocate(work(lwork))
-  call dgesvd('A','A', n, n, A_tmp, LDA,                             &
+  call dgesvd('A','A', m, n, A_tmp, LDA,                             &
      D, U, LDU, Vt, LDVt, work, lwork, info)
  deallocate(work,A_tmp)

@ -125,6 +125,40 @@ subroutine ortho_canonical(overlap,LDA,N,C,LDC,m)
 end


+subroutine ortho_qr(A,LDA,m,n)
+  implicit none
+  BEGIN_DOC
+  ! Orthogonalization using Q.R factorization
+  !
+  ! A : matrix to orthogonalize
+  !
+  ! LDA : leftmost dimension of A
+  !
+  ! n : Number of rows of A
+  !
+  ! m : Number of columns of A
+  !
+  END_DOC
+  integer, intent(in)            :: m,n, LDA
+  double precision, intent(inout) :: A(LDA,n)
+
+  integer                        :: lwork, info
+  integer, allocatable           :: jpvt(:)
+  double precision, allocatable  :: tau(:), work(:)
+
+  allocate (jpvt(n), tau(n), work(1))
+  LWORK=-1
+!  call dgeqp3(m, n, A, LDA, jpvt, tau, WORK, LWORK, INFO)
+  call  dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
+  LWORK=WORK(1)
+  deallocate(WORK)
+  allocate(WORK(LWORK))
+!  call dgeqp3(m, n, A, LDA, jpvt, tau, WORK, LWORK, INFO)
+  call  dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
+  call dorgqr(m, n, n, A, LDA, tau, WORK, LWORK, INFO)
+  deallocate(WORK,jpvt,tau)
+end
+
 subroutine ortho_lowdin(overlap,LDA,N,C,LDC,m)
  implicit none
  BEGIN_DOC
@ -161,7 +195,7 @@ subroutine ortho_lowdin(overlap,LDA,N,C,LDC,m)

  allocate(U(ldc,n),Vt(lda,n),S_half(lda,n),D(n))

-  call svd(overlap,lda,U,ldc,D,Vt,lda,m,n)
+  call svd(overlap,lda,U,ldc,D,Vt,lda,n,n)

  !$OMP PARALLEL DEFAULT(NONE) &
  !$OMP SHARED(S_half,U,D,Vt,n,C,m) &
--- a/src/ZMQ/utils.irp.f
+++ b/src/ZMQ/utils.irp.f
@ -17,8 +17,6 @@ END_PROVIDER


 BEGIN_PROVIDER [ character*(128), qp_run_address ]
-&BEGIN_PROVIDER [ character*(128), qp_run_address_ipc ]
-&BEGIN_PROVIDER [ character*(128), qp_run_address_tcp ]
 &BEGIN_PROVIDER [ integer, zmq_port_start ]
  use f77_zmq
  implicit none
@ -36,22 +34,19 @@ END_PROVIDER
  integer                        :: i
  do i=len(buffer),1,-1
    if ( buffer(i:i) == ':') then
-      qp_run_address_tcp = trim(buffer(1:i-1))
+      qp_run_address = trim(buffer(1:i-1))
      read(buffer(i+1:), *) zmq_port_start
      exit
    endif
  enddo
-  qp_run_address_ipc = 'ipc:///tmp/qp_run'
-  qp_run_address = qp_run_address_ipc
 END_PROVIDER

-
 BEGIN_PROVIDER [ character*(128), zmq_socket_pull_tcp_address    ]
-&BEGIN_PROVIDER [ character*(128), zmq_socket_pull_inproc_address ]
 &BEGIN_PROVIDER [ character*(128), zmq_socket_pair_inproc_address ]
 &BEGIN_PROVIDER [ character*(128), zmq_socket_push_tcp_address    ]
+&BEGIN_PROVIDER [ character*(128), zmq_socket_pull_inproc_address ]
 &BEGIN_PROVIDER [ character*(128), zmq_socket_push_inproc_address ]
-&BEGIN_PROVIDER [ character*(128), zmq_socket_sub_address     ]
+&BEGIN_PROVIDER [ character*(128), zmq_socket_sub_tcp_address ]
  use f77_zmq
  implicit none
  BEGIN_DOC
@ -59,12 +54,12 @@ END_PROVIDER
  END_DOC
  character*(8), external        :: zmq_port

+  zmq_socket_sub_tcp_address     = trim(qp_run_address)//':'//zmq_port(1)//' '
  zmq_socket_pull_tcp_address    = 'tcp://*:'//zmq_port(2)//' '
+  zmq_socket_push_tcp_address    = trim(qp_run_address)//':'//zmq_port(2)//' '
  zmq_socket_pull_inproc_address = 'inproc://'//zmq_port(2)//' '
-  zmq_socket_pair_inproc_address = 'inproc://'//zmq_port(3)//' '
-  zmq_socket_push_tcp_address    = trim(qp_run_address_tcp)//':'//zmq_port(2)//' '
  zmq_socket_push_inproc_address = zmq_socket_pull_inproc_address
-  zmq_socket_sub_address         = trim(qp_run_address)//':'//zmq_port(1)//' '
+  zmq_socket_pair_inproc_address = 'inproc://'//zmq_port(3)//' '

  ! /!\ Don't forget to change subroutine reset_zmq_addresses
 END_PROVIDER
@ -77,13 +72,12 @@ subroutine reset_zmq_addresses
  END_DOC
  character*(8), external        :: zmq_port

+  zmq_socket_sub_tcp_address     = trim(qp_run_address)//':'//zmq_port(1)//' '
  zmq_socket_pull_tcp_address    = 'tcp://*:'//zmq_port(2)//' '
+  zmq_socket_push_tcp_address    = trim(qp_run_address)//':'//zmq_port(2)//' '
  zmq_socket_pull_inproc_address = 'inproc://'//zmq_port(2)//' '
-  zmq_socket_pair_inproc_address = 'inproc://'//zmq_port(3)//' '
-  zmq_socket_push_tcp_address    = trim(qp_run_address_tcp)//':'//zmq_port(2)//' '
  zmq_socket_push_inproc_address = zmq_socket_pull_inproc_address
-  zmq_socket_sub_address         = trim(qp_run_address)//':'//zmq_port(1)//' '
-
+  zmq_socket_pair_inproc_address = 'inproc://'//zmq_port(3)//' '
 end


@ -111,7 +105,6 @@ subroutine switch_qp_run_to_master
      exit
    endif
  enddo
-  qp_run_address_tcp = qp_run_address
  call reset_zmq_addresses

 end
@ -264,15 +257,39 @@ function new_zmq_pull_socket()
    stop 'Unable to set ZMQ_RCVHWM on pull socket'
  endif
  
-  rc = f77_zmq_bind(new_zmq_pull_socket, zmq_socket_pull_tcp_address)
-  if (rc /= 0) then
-    print *,  'Unable to bind new_zmq_pull_socket (tcp)', zmq_socket_pull_tcp_address
-    stop 
+  integer :: icount
+
+  icount = 10
+  do while (icount > 0)
+    rc = f77_zmq_bind(new_zmq_pull_socket, zmq_socket_pull_inproc_address)
+    if (rc /= 0) then
+      icount = icount-1
+      call sleep(3)
+    else
+      exit
+    endif
+  enddo
+
+  if (icount == 0) then
+    print *,  'Unable to bind new_zmq_pull_socket (inproc)', zmq_socket_pull_inproc_address
+    stop -1
  endif
-  
-  rc = f77_zmq_bind(new_zmq_pull_socket, zmq_socket_pull_inproc_address)
-  if (rc /= 0) then
-    stop 'Unable to bind new_zmq_pull_socket (inproc)'
+
+
+  icount = 10
+  do while (icount > 0)
+    rc = f77_zmq_bind(new_zmq_pull_socket, zmq_socket_pull_tcp_address)
+    if (rc /= 0) then
+      icount = icount-1
+      call sleep(3)
+    else
+      exit
+    endif
+  enddo
+
+  if (icount == 0) then
+    print *,  'Unable to bind new_zmq_pull_socket (tcp)', zmq_socket_pull_tcp_address
+    stop -1
  endif
  
 end
@ -374,7 +391,7 @@ function new_zmq_sub_socket()
    stop 'Unable to subscribe new_zmq_sub_socket'
  endif

-  rc = f77_zmq_connect(new_zmq_sub_socket, zmq_socket_sub_address)
+  rc = f77_zmq_connect(new_zmq_sub_socket, zmq_socket_sub_tcp_address)
  if (rc /= 0) then
    stop 'Unable to connect new_zmq_sub_socket'
  endif