diff --git a/ocaml/Address.ml b/ocaml/Address.ml
index 47eb3fd6..c819a463 100644
--- a/ocaml/Address.ml
+++ b/ocaml/Address.ml
@@ -8,7 +8,9 @@ module Tcp : sig
 end = struct
   type t = string
   let of_string x =
-    assert (String.is_prefix ~prefix:"tcp://" x);
+    if not (String.is_prefix ~prefix:"tcp://" x) then
+      invalid_arg "Address Invalid"
+    ;
     x
   let create ~host ~port =
     assert (port > 0);
diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml
index 1b2acdee..91fbd231 100644
--- a/ocaml/TaskServer.ml
+++ b/ocaml/TaskServer.ml
@@ -49,7 +49,7 @@ let zmq_context =
   ZMQ.Context.create ()
 
 let () =
-  ZMQ.Context.set_io_threads zmq_context 2
+  ZMQ.Context.set_io_threads zmq_context 8
 
 
 let bind_socket ~socket_type ~socket ~port =
diff --git a/plugins/Full_CI_ZMQ/pt2_slave.irp.f b/plugins/Full_CI_ZMQ/pt2_slave.irp.f
index f41ddb30..0aacbce7 100644
--- a/plugins/Full_CI_ZMQ/pt2_slave.irp.f
+++ b/plugins/Full_CI_ZMQ/pt2_slave.irp.f
@@ -5,7 +5,8 @@ program pt2_slave
   END_DOC
 
   read_wf = .False.
-  SOFT_TOUCH read_wf
+  distributed_davidson = .False.
+  SOFT_TOUCH read_wf distributed_davidson
   call provide_everything
   call switch_qp_run_to_master
   call run_wf
diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f
index 2293ecab..ceb7bd95 100644
--- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f
+++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f
@@ -30,7 +30,6 @@ subroutine run_selection_slave(thread,iproc,energy)
   zmq_socket_push      = new_zmq_push_socket(thread)
   call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread)
   if(worker_id == -1) then
-    print *, "WORKER -1"
     call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
     call end_zmq_push_socket(zmq_socket_push,thread)
     return
@@ -52,13 +51,13 @@ subroutine run_selection_slave(thread,iproc,energy)
         call create_selection_buffer(N, N*2, buf)
         call create_selection_buffer(N, N*2, buf2)
       else
-        if(N /= buf%N) stop "N changed... wtf man??"
+        ASSERT (N == buf%N)
       end if
       call select_connected(i_generator,energy,pt2,buf,0)
     endif
 
     if(done .or. ctask == size(task_id)) then
-      if(buf%N == 0 .and. ctask > 0) stop "uninitialized selection_buffer"
+      ASSERT (.not.(buf%N == 0 .and. ctask > 0))
       do i=1, ctask
          call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i))
       end do
diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f
index 451b70e3..8e1e43ae 100644
--- a/plugins/Full_CI_ZMQ/selection.irp.f
+++ b/plugins/Full_CI_ZMQ/selection.irp.f
@@ -86,7 +86,10 @@ subroutine select_connected(i_generator,E0,pt2,b,subset)
   double precision, intent(in)   :: E0(N_states)
 
   integer(bit_kind)              :: hole_mask(N_int,2), particle_mask(N_int,2)
-  double precision               :: fock_diag_tmp(2,mo_tot_num+1)
+
+  double precision, allocatable  :: fock_diag_tmp(:,:)
+
+  allocate(fock_diag_tmp(2,mo_tot_num+1))
   
   call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int)
 
@@ -100,6 +103,7 @@ subroutine select_connected(i_generator,E0,pt2,b,subset)
     enddo
     call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b,subset)
   enddo
+  deallocate(fock_diag_tmp)
 end subroutine
 
 
@@ -188,18 +192,21 @@ subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs)
   use bitmasks
   implicit none
   
-  integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2)
-  integer(1), intent(in) :: phasemask(2,N_int*bit_kind_size)
-  logical, intent(in) :: bannedOrb(mo_tot_num)
-  double precision, intent(in) :: coefs(N_states)
+  integer(bit_kind), intent(in)  :: gen(N_int, 2), mask(N_int, 2)
+  integer(1), intent(in)         :: phasemask(2,N_int*bit_kind_size)
+  logical, intent(in)            :: bannedOrb(mo_tot_num)
+  double precision, intent(in)   :: coefs(N_states)
   double precision, intent(inout) :: vect(N_states, mo_tot_num)
-  integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2)
-  integer :: i, hole, p1, p2, sh
-  logical :: ok, lbanned(mo_tot_num)
-  integer(bit_kind) :: det(N_int, 2)
-  double precision :: hij
-  double precision, external :: get_phase_bi, integral8
+  integer, intent(in)            :: sp, h(0:2, 2), p(0:3, 2)
+  integer                        :: i, hole, p1, p2, sh
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:)
+  integer(bit_kind)              :: det(N_int, 2)
+  double precision               :: hij
+  double precision, external     :: get_phase_bi, integral8
   
+  allocate (lbanned(mo_tot_num))
   lbanned = bannedOrb
   sh = 1
   if(h(0,2) == 1) sh = 2
@@ -239,6 +246,7 @@ subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs)
       vect(:,i) += hij * coefs
     end do
   end if
+  deallocate(lbanned)
 
   call apply_particle(mask, sp, p1, det, ok,  N_int)
   call i_h_j(gen, det, N_int, hij)
@@ -250,17 +258,20 @@ subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs)
   use bitmasks
   implicit none
   
-  integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2)
-  integer(1), intent(in) :: phasemask(2,N_int*bit_kind_size)
-  logical, intent(in) :: bannedOrb(mo_tot_num)
-  double precision, intent(in) :: coefs(N_states)
+  integer(bit_kind), intent(in)  :: gen(N_int, 2), mask(N_int, 2)
+  integer(1), intent(in)         :: phasemask(2,N_int*bit_kind_size)
+  logical, intent(in)            :: bannedOrb(mo_tot_num)
+  double precision, intent(in)   :: coefs(N_states)
   double precision, intent(inout) :: vect(N_states, mo_tot_num)
-  integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2)
-  integer :: i
-  logical :: ok, lbanned(mo_tot_num)
-  integer(bit_kind) :: det(N_int, 2)
-  double precision :: hij
+  integer, intent(in)            :: sp, h(0:2, 2), p(0:3, 2)
+  integer                        :: i
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:)
+  integer(bit_kind)              :: det(N_int, 2)
+  double precision               :: hij
   
+  allocate(lbanned(mo_tot_num))
   lbanned = bannedOrb
   lbanned(p(1,sp)) = .true.
   do i=1,mo_tot_num
@@ -269,6 +280,7 @@ subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs)
     call i_h_j(gen, det, N_int, hij)
     vect(:, i) += hij * coefs
   end do
+  deallocate(lbanned)
 end 
 
 subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf,subset)
@@ -286,7 +298,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
   double precision, intent(inout) :: pt2(N_states)
   type(selection_buffer), intent(inout) :: buf
   
-  double precision                :: mat(N_states, mo_tot_num, mo_tot_num)
   integer                         :: h1,h2,s1,s2,s3,i1,i2,ib,sp,k,i,j,nt,ii
   integer(bit_kind)               :: hole(N_int,2), particle(N_int,2), mask(N_int, 2), pmask(N_int, 2)
   logical                         :: fullMatch, ok
@@ -294,8 +305,11 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
   integer(bit_kind) :: mobMask(N_int, 2), negMask(N_int, 2)
   integer,allocatable               :: preinteresting(:), prefullinteresting(:), interesting(:), fullinteresting(:)
   integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :)
+  logical, allocatable           :: banned(:,:,:), bannedOrb(:,:)
+
+  double precision, allocatable   :: mat(:,:,:)
   
-  logical :: monoAdo, monoBdo;
+  logical :: monoAdo, monoBdo
   integer :: maskInd
 
   PROVIDE fragment_count
@@ -303,8 +317,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
   monoAdo = .true.
   monoBdo = .true.
   
-  allocate(minilist(N_int, 2, N_det_selectors), fullminilist(N_int, 2, N_det))
-  allocate(preinteresting(0:N_det_selectors), prefullinteresting(0:N_det), interesting(0:N_det_selectors), fullinteresting(0:N_det))
   
   do k=1,N_int
     hole    (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1))
@@ -316,8 +328,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
   integer                        :: N_holes(2), N_particles(2)
   integer                        :: hole_list(N_int*bit_kind_size,2)
   integer                        :: particle_list(N_int*bit_kind_size,2)
-  integer(bit_kind), allocatable:: preinteresting_det(:,:,:)
-  allocate (preinteresting_det(N_int,2,N_det))
 
   call bitstring_to_list_ab(hole    , hole_list    , N_holes    , N_int)
   call bitstring_to_list_ab(particle, particle_list, N_particles, N_int)
@@ -370,13 +380,18 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
       endif
     enddo
   enddo
+  deallocate(exc_degree)
   nmax=k-1
+
   allocate(iorder(nmax))
   do i=1,nmax
     iorder(i) = i
   enddo
   call isort(indices,iorder,nmax)
+  deallocate(iorder)
 
+  allocate(preinteresting(0:N_det_selectors), prefullinteresting(0:N_det), &
+            interesting(0:N_det_selectors), fullinteresting(0:N_det))
   preinteresting(0) = 0
   prefullinteresting(0) = 0
   
@@ -387,7 +402,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
   
   do k=1,nmax
     i = indices(k)
-!   do i=1,N_det
     mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i))
     mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i))
     nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) 
@@ -401,18 +415,18 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
       if(i <= N_det_selectors) then
         preinteresting(0) += 1
         preinteresting(preinteresting(0)) = i
-        do j=1,N_int
-          preinteresting_det(j,1,preinteresting(0)) = psi_det_sorted(j,1,i)
-          preinteresting_det(j,2,preinteresting(0)) = psi_det_sorted(j,2,i)
-        enddo
       else if(nt <= 2) then
         prefullinteresting(0) += 1
         prefullinteresting(prefullinteresting(0)) = i
       end if
     end if
   end do
+  deallocate(indices)
   
 
+  allocate(minilist(N_int, 2, N_det_selectors), fullminilist(N_int, 2, N_det))
+  allocate(banned(mo_tot_num, mo_tot_num,2), bannedOrb(mo_tot_num, 2))
+  allocate (mat(N_states, mo_tot_num, mo_tot_num))
   maskInd = -1
   integer :: nb_count
   do s1=1,2
@@ -427,32 +441,32 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
       
       do ii=1,preinteresting(0)
         i = preinteresting(ii)
-        mobMask(1,1) = iand(negMask(1,1), preinteresting_det(1,1,ii))
-        mobMask(1,2) = iand(negMask(1,2), preinteresting_det(1,2,ii))
+        mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,preinteresting(ii)))
+        mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,preinteresting(ii)))
         nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
         do j=2,N_int
-          mobMask(j,1) = iand(negMask(j,1), preinteresting_det(j,1,ii))
-          mobMask(j,2) = iand(negMask(j,2), preinteresting_det(j,2,ii))
+          mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(1,1,preinteresting(ii)))
+          mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(1,2,preinteresting(ii)))
           nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2))
         end do
         
         if(nt <= 4) then
           interesting(0) += 1
           interesting(interesting(0)) = i
-          minilist(1,1,interesting(0)) = preinteresting_det(1,1,ii)
-          minilist(1,2,interesting(0)) = preinteresting_det(1,2,ii)
+          minilist(1,1,interesting(0)) = psi_det_sorted(1,1,preinteresting(ii))
+          minilist(1,2,interesting(0)) = psi_det_sorted(1,2,preinteresting(ii))
           do j=2,N_int
-            minilist(j,1,interesting(0)) = preinteresting_det(j,1,ii)
-            minilist(j,2,interesting(0)) = preinteresting_det(j,2,ii)
+            minilist(j,1,interesting(0)) = psi_det_sorted(1,1,preinteresting(ii))
+            minilist(j,2,interesting(0)) = psi_det_sorted(1,2,preinteresting(ii))
           enddo
           if(nt <= 2) then
             fullinteresting(0) += 1
             fullinteresting(fullinteresting(0)) = i
-            fullminilist(1,1,fullinteresting(0)) = preinteresting_det(1,1,ii)
-            fullminilist(1,2,fullinteresting(0)) = preinteresting_det(1,2,ii)
+            fullminilist(1,1,fullinteresting(0)) = psi_det_sorted(1,1,preinteresting(ii))
+            fullminilist(1,2,fullinteresting(0)) = psi_det_sorted(1,2,preinteresting(ii))
             do j=2,N_int
-              fullminilist(j,1,fullinteresting(0)) = preinteresting_det(j,1,ii)
-              fullminilist(j,2,fullinteresting(0)) = preinteresting_det(j,2,ii)
+              fullminilist(j,1,fullinteresting(0)) = psi_det_sorted(1,1,preinteresting(ii))
+              fullminilist(j,2,fullinteresting(0)) = psi_det_sorted(1,2,preinteresting(ii))
             enddo
           end if
         end if
@@ -493,8 +507,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
         if(s1 == s2) ib = i1+1
         monoAdo = .true.
         do i2=N_holes(s2),ib,-1   ! Generate low excitations first
-          logical                        :: banned(mo_tot_num, mo_tot_num,2)
-          logical                        :: bannedOrb(mo_tot_num, 2)
           
           h2 = hole_list(i2,s2)
           call apply_hole(pmask, s2,h2, mask, ok, N_int)
@@ -534,6 +546,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
       enddo
     enddo
   enddo
+  deallocate(preinteresting, prefullinteresting, interesting, fullinteresting)
+  deallocate(minilist, fullminilist, banned, bannedOrb,mat)
 end subroutine
 
 
@@ -814,26 +828,28 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
   use bitmasks
   implicit none
 
-  integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2)
-  integer(1),intent(in) :: phasemask(2,N_int*bit_kind_size)
-  logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2)
-  integer(bit_kind) :: det(N_int, 2)
-  double precision, intent(in) :: coefs(N_states)
+  integer(bit_kind), intent(in)  :: mask(N_int, 2), gen(N_int, 2)
+  integer(1),intent(in)          :: phasemask(2,N_int*bit_kind_size)
+  logical, intent(in)            :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2)
+  integer(bit_kind)              :: det(N_int, 2)
+  double precision, intent(in)   :: coefs(N_states)
   double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num)
-  double precision :: hij, tmp_row(N_states, mo_tot_num), tmp_row2(N_states, mo_tot_num)
-  double precision, external :: get_phase_bi, integral8
+  integer, intent(in)            :: h(0:2,2), p(0:4,2), sp
+  double precision               :: hij, tmp_row(N_states, mo_tot_num), tmp_row2(N_states, mo_tot_num)
+  double precision, external     :: get_phase_bi, integral8
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:,:)
+  integer                        :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
+  integer                        :: hfix, pfix, h1, h2, p1, p2, ib
   
-  logical :: lbanned(mo_tot_num, 2), ok
-  integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j, hfix, pfix, h1, h2, p1, p2, ib
+  integer, parameter             :: turn2(2) = (/2,1/)
+  integer, parameter             :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
   
-  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
-  
-  integer, parameter :: turn2(2) = (/2,1/)
-  integer, parameter :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
-  
-  integer :: bant
+  integer                        :: bant
   
   
+  allocate (lbanned(mo_tot_num, 2))
   lbanned = bannedOrb
     
   do i=1, p(0,1)
@@ -952,6 +968,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
       mat(:,p1,p1:) += tmp_row2(:,p1:)
     end if
   end if
+  deallocate(lbanned)
 
  !! MONO
     if(sp == 3) then
diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f
index c1b6bcd9..b41662f4 100644
--- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f
+++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f
@@ -5,7 +5,8 @@ program selection_slave
   END_DOC
 
   read_wf = .False.
-  SOFT_TOUCH read_wf
+  distributed_davidson = .False.
+  SOFT_TOUCH read_wf distributed_davidson
   call provide_everything
   call switch_qp_run_to_master
   call run_wf
@@ -13,7 +14,7 @@ end
 
 subroutine provide_everything
   PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context
-   PROVIDE pt2_e0_denominator mo_tot_num N_int fragment_count
+  PROVIDE pt2_e0_denominator mo_tot_num N_int fragment_count 
 end
 
 subroutine run_wf
@@ -63,6 +64,7 @@ subroutine run_wf
       ! --------
 
       print *,  'Davidson'
+      call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states)
       call omp_set_nested(.True.)
       call davidson_slave_tcp(0)
       call omp_set_nested(.False.)
diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f
index 4b4276fd..76386c7b 100644
--- a/src/Davidson/davidson_parallel.irp.f
+++ b/src/Davidson/davidson_parallel.irp.f
@@ -64,8 +64,8 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze,
   integer                        :: imin, imax, ishift, istep
   
   integer, allocatable           :: psi_det_read(:,:,:)
-  double precision, allocatable  :: v_0(:,:), s_0(:,:), u_t(:,:)
-  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t, v_0, s_0
+  double precision, allocatable  :: v_t(:,:), s_t(:,:), u_t(:,:)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t, v_t, s_t
 
   ! Get wave function (u_t)
   ! -----------------------
@@ -108,7 +108,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze,
     TOUCH N_det
   endif
 
-  allocate(v_0(sze,N_st), s_0(sze,N_st),u_t(N_st,N_det_read))
+  allocate(u_t(N_st,N_det_read))
 
   rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det_read*bit_kind,0)
   if (rc /= N_int*2*N_det_read*bit_kind) then
@@ -119,7 +119,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze,
   rc = f77_zmq_recv(zmq_to_qp_run_socket,u_t,size(u_t)*8,0)
   if (rc /= size(u_t)*8) then
     print *,  rc, size(u_t)*8
-    print *, 'f77_zmq_recv(zmq_to_qp_run_socket,u_t,size(u_t)×8,0)'
+    print *, 'f77_zmq_recv(zmq_to_qp_run_socket,u_t,size(u_t)*8,0)'
     stop 'error'
   endif
 
@@ -133,41 +133,50 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze,
   ! ---------
 
 
+  allocate(v_t(N_st,N_det), s_t(N_st,N_det))
   do
-    v_0 = 0.d0
-    s_0 = 0.d0
     call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg)
     if(task_id == 0) exit
     read (msg,*) imin, imax, ishift, istep
-    call H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,N_det,imin,imax,ishift,istep)
+    v_t = 0.d0
+    s_t = 0.d0
+    call H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,N_det,imin,imax,ishift,istep)
     call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
-    call davidson_push_results(zmq_socket_push, v_0, s_0, task_id)
+    call davidson_push_results(zmq_socket_push, v_t, s_t, imin, imax, task_id)
   end do
-  deallocate(v_0, s_0, u_t)
+  deallocate(u_t,v_t, s_t)
 
 end subroutine
 
 
 
-subroutine davidson_push_results(zmq_socket_push, v_0, s_0, task_id)
+subroutine davidson_push_results(zmq_socket_push, v_t, s_t, imin, imax, task_id)
   use f77_zmq
   implicit none
   
   integer(ZMQ_PTR)    ,intent(in)    :: zmq_socket_push
-  integer             ,intent(in)    :: task_id
-  double precision    ,intent(in)    :: v_0(N_det,N_states_diag)
-  double precision    ,intent(in)    :: s_0(N_det,N_states_diag)
-  integer                            :: rc
+  integer             ,intent(in)    :: task_id, imin, imax
+  double precision    ,intent(in)    :: v_t(N_states_diag,N_det)
+  double precision    ,intent(in)    :: s_t(N_states_diag,N_det)
+  integer                            :: rc, sz
 
-  rc = f77_zmq_send( zmq_socket_push, v_0, 8*N_states_diag*N_det, ZMQ_SNDMORE)
-  if(rc /= 8*N_states_diag* N_det) stop "davidson_push_results failed to push vt"
+  sz = (imax-imin+1)*N_states_diag
 
-  rc = f77_zmq_send( zmq_socket_push, s_0, 8*N_states_diag*N_det, ZMQ_SNDMORE)
-  if(rc /= 8*N_states_diag* N_det) stop "davidson_push_results failed to push st"
-
-  rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
+  rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE)
   if(rc /= 4) stop "davidson_push_results failed to push task_id"
 
+  rc = f77_zmq_send( zmq_socket_push, imin, 4, ZMQ_SNDMORE)
+  if(rc /= 4) stop "davidson_push_results failed to push imin"
+
+  rc = f77_zmq_send( zmq_socket_push, imax, 4, ZMQ_SNDMORE)
+  if(rc /= 4) stop "davidson_push_results failed to push imax"
+
+  rc = f77_zmq_send( zmq_socket_push, v_t(1,imin), 8*sz, ZMQ_SNDMORE)
+  if(rc /= 8*sz) stop "davidson_push_results failed to push vt"
+
+  rc = f77_zmq_send( zmq_socket_push, s_t(1,imin), 8*sz, 0)
+  if(rc /= 8*sz) stop "davidson_push_results failed to push st"
+
 ! Activate is zmq_socket_push is a REQ
 IRP_IF ZMQ_PUSH
 IRP_ELSE
@@ -183,26 +192,34 @@ end subroutine
 
 
 
-subroutine davidson_pull_results(zmq_socket_pull, v_0, s_0, task_id)
+subroutine davidson_pull_results(zmq_socket_pull, v_t, s_t, imin, imax, task_id)
   use f77_zmq
   implicit none
   
   integer(ZMQ_PTR)    ,intent(in)     :: zmq_socket_pull
-  integer             ,intent(out)    :: task_id
-  double precision    ,intent(out)    :: v_0(N_det,N_states_diag)
-  double precision    ,intent(out)    :: s_0(N_det,N_states_diag)
+  integer             ,intent(out)    :: task_id, imin, imax
+  double precision    ,intent(out)    :: v_t(N_states_diag,N_det)
+  double precision    ,intent(out)    :: s_t(N_states_diag,N_det)
 
-  integer                            :: rc
-
-  rc = f77_zmq_recv( zmq_socket_pull, v_0, 8*N_det*N_states_diag, 0)
-  if(rc /= 8*N_det*N_states_diag) stop "davidson_push_results failed to pull v_0"
-
-  rc = f77_zmq_recv( zmq_socket_pull, s_0, 8*N_det*N_states_diag, 0)
-  if(rc /= 8*N_det*N_states_diag) stop "davidson_push_results failed to pull s_0"
+  integer                            :: rc, sz
 
   rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
   if(rc /= 4) stop "davidson_pull_results failed to pull task_id"
 
+  rc = f77_zmq_recv( zmq_socket_pull, imin, 4, 0)
+  if(rc /= 4) stop "davidson_pull_results failed to pull task_id"
+
+  rc = f77_zmq_recv( zmq_socket_pull, imax, 4, 0)
+  if(rc /= 4) stop "davidson_pull_results failed to pull task_id"
+
+  sz = (imax-imin+1)*N_states_diag
+
+  rc = f77_zmq_recv( zmq_socket_pull, v_t(1,imin), 8*sz, 0)
+  if(rc /= 8*sz) stop "davidson_pull_results failed to pull v_t"
+
+  rc = f77_zmq_recv( zmq_socket_pull, s_t(1,imin), 8*sz, 0)
+  if(rc /= 8*sz) stop "davidson_pull_results failed to pull s_t"
+
 ! Activate if zmq_socket_pull is a REP
 IRP_IF ZMQ_PUSH
 IRP_ELSE
@@ -227,29 +244,29 @@ subroutine davidson_collector(zmq_to_qp_run_socket, v0, s0, sze, N_st)
   double precision    ,intent(inout) :: v0(sze, N_st)
   double precision    ,intent(inout) :: s0(sze, N_st)
   
-  integer                          :: more, task_id
+  integer                          :: more, task_id, imin, imax
   
-  double precision, allocatable :: v_0(:,:), s_0(:,:)
+  double precision, allocatable :: v_t(:,:), s_t(:,:)
   integer :: i,j
   integer(ZMQ_PTR), external     :: new_zmq_pull_socket
   integer(ZMQ_PTR)               :: zmq_socket_pull
 
-  allocate(v_0(N_det,N_st), s_0(N_det,N_st))
+  allocate(v_t(N_st,N_det), s_t(N_st,N_det))
   v0 = 0.d0 
   s0 = 0.d0 
   more = 1
   zmq_socket_pull = new_zmq_pull_socket()
   do while (more == 1)
-    call davidson_pull_results(zmq_socket_pull, v_0, s_0, task_id)
+    call davidson_pull_results(zmq_socket_pull, v_t, s_t, imin, imax, task_id)
     do j=1,N_st
-      do i=1,N_det
-        v0(i,j) = v0(i,j) + v_0(i,j)
-        s0(i,j) = s0(i,j) + s_0(i,j)
+      do i=imin,imax
+        v0(i,j) = v0(i,j) + v_t(j,i)
+        s0(i,j) = s0(i,j) + s_t(j,i)
       enddo
     enddo
     call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more)
   end do
-  deallocate(v_0,s_0)
+  deallocate(v_t,s_t)
   call end_zmq_pull_socket(zmq_socket_pull)
 
 end subroutine
@@ -298,7 +315,8 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
 
   integer(ZMQ_PTR) :: zmq_to_qp_run_socket
   
-  if(N_st /= N_states_diag .or. sze < N_det) stop "assert fail in H_S2_u_0_nstates"
+  ASSERT (N_st == N_states_diag)
+  ASSERT (sze >= N_det) 
 
   call new_parallel_job(zmq_to_qp_run_socket,'davidson')
   
@@ -348,16 +366,15 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
 
   integer :: istep, imin, imax, ishift
   double precision :: w, max_workload, N_det_inv, di
-  max_workload = 1000000.d0
   w = 0.d0
-  istep=8
+  istep=1
   ishift=0
   imin=1
   N_det_inv = 1.d0/dble(N_det)
   di = dble(N_det)
+  max_workload = 50000.d0
   do imax=1,N_det
-    di = di-1.d0
-    w = w + di*N_det_inv
+    w = w + 1.d0
     if (w > max_workload) then
       do ishift=0,istep-1
         write(task,'(4(I9,1X),1A)') imin, imax, ishift, istep, '|'
diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f
index 54672609..ffd8b971 100644
--- a/src/Davidson/diagonalization_hs2.irp.f
+++ b/src/Davidson/diagonalization_hs2.irp.f
@@ -220,7 +220,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
       ! -----------------------------------------
       
        
-      if (distributed_davidson) then
+      if ((sze > 100000).and.distributed_davidson) then
           call H_S2_u_0_nstates_zmq   (W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze)
       else
           call H_S2_u_0_nstates_openmp(W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze)
@@ -444,3 +444,37 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
       )
 end
 
+subroutine u_0_H_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes e_0 = <u_0|H|u_0>/<u_0|u_0>
+  !
+  ! n : number of determinants
+  !
+  END_DOC
+  integer, intent(in)            :: n,Nint, N_st, sze
+  double precision, intent(out)  :: e_0(N_st)
+  double precision, intent(inout) :: u_0(sze,N_st)
+  integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
+  
+  double precision, allocatable  :: v_0(:,:), s_0(:,:), u_1(:,:)
+  double precision               :: u_dot_u,u_dot_v,diag_H_mat_elem
+  integer                        :: i,j
+
+  if ((sze > 100000).and.distributed_davidson) then
+    allocate (v_0(sze,N_states_diag),s_0(sze,N_states_diag), u_1(sze,N_states_diag))
+    u_1(1:sze,1:N_states) = u_0(1:sze,1:N_states) 
+    u_1(1:sze,N_states+1:N_states_diag) = 0.d0
+    call H_S2_u_0_nstates_zmq(v_0,s_0,u_1,N_states_diag,sze)
+    deallocate(u_1)
+  else
+    allocate (v_0(sze,N_st),s_0(sze,N_st))
+    call H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
+  endif
+  do i=1,N_st
+    e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)/u_dot_u(u_0(1,i),n)
+  enddo
+  deallocate (s_0, v_0)
+end
+
diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f
index d29b39f1..1fbf00e0 100644
--- a/src/Davidson/u0Hu0.irp.f
+++ b/src/Davidson/u0Hu0.irp.f
@@ -1,29 +1,3 @@
-subroutine u_0_H_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze)
-  use bitmasks
-  implicit none
-  BEGIN_DOC
-  ! Computes e_0 = <u_0|H|u_0>/<u_0|u_0>
-  !
-  ! n : number of determinants
-  !
-  END_DOC
-  integer, intent(in)            :: n,Nint, N_st, sze
-  double precision, intent(out)  :: e_0(N_st)
-  double precision, intent(inout) :: u_0(sze,N_st)
-  integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
-  
-  double precision, allocatable  :: v_0(:,:), s_0(:,:)
-  double precision               :: u_dot_u,u_dot_v,diag_H_mat_elem
-  integer                        :: i,j
-
-  allocate (v_0(sze,N_st),s_0(sze,N_st))
-  call H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
-  do i=1,N_st
-    e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)/u_dot_u(u_0(1,i),n)
-  enddo
-  deallocate (s_0, v_0)
-end
-
 BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ]
   implicit none
   BEGIN_DOC
@@ -47,14 +21,14 @@ subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
   integer, intent(in)            :: N_st,sze
   double precision, intent(inout)  :: v_0(sze,N_st), s_0(sze,N_st), u_0(sze,N_st)
   integer :: k
-  double precision, allocatable  :: u_t(:,:)
+  double precision, allocatable  :: u_t(:,:), v_t(:,:), s_t(:,:)
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
-  allocate(u_t(N_st,N_det))
+  allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det))
   do k=1,N_st
     call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
   enddo
-  v_0 = 0.d0
-  s_0 = 0.d0
+  v_t = 0.d0
+  s_t = 0.d0
   call dtranspose(                                                   &
       u_0,                                                           &
       size(u_0, 1),                                                  &
@@ -62,9 +36,23 @@ subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
       size(u_t, 1),                                                  &
       N_det, N_st)
 
-  call H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze,1,N_det,0,1)
+  call H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,1,N_det,0,1)
   deallocate(u_t)
 
+  call dtranspose(                                                   &
+      v_t,                                                           &
+      size(v_t, 1),                                                  &
+      v_0,                                                           &
+      size(v_0, 1),                                                  &
+      N_st, N_det)
+  call dtranspose(                                                   &
+      s_t,                                                           &
+      size(s_t, 1),                                                  &
+      s_0,                                                           &
+      size(s_0, 1),                                                  &
+      N_st, N_det)
+  deallocate(v_t,s_t)
+
   do k=1,N_st
     call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
     call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
@@ -74,47 +62,47 @@ subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
 end
 
 
-subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
+subroutine H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
   use bitmasks
   implicit none
   BEGIN_DOC
-  ! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
+  ! Computes v_t = H|u_t> and s_t = S^2 |u_t>
   !
   ! Default should be 1,N_det,0,1
   END_DOC
   integer, intent(in)            :: N_st,sze,istart,iend,ishift,istep
   double precision, intent(in)   :: u_t(N_st,N_det)
-  double precision, intent(out)  :: v_0(sze,N_st), s_0(sze,N_st) 
+  double precision, intent(out)  :: v_t(N_st,sze), s_t(N_st,sze) 
 
   
   PROVIDE ref_bitmask_energy N_int
 
   select case (N_int)
     case (1)
-      call H_S2_u_0_nstates_openmp_work_1(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
+      call H_S2_u_0_nstates_openmp_work_1(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
     case (2)
-      call H_S2_u_0_nstates_openmp_work_2(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
+      call H_S2_u_0_nstates_openmp_work_2(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
     case (3)
-      call H_S2_u_0_nstates_openmp_work_3(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
+      call H_S2_u_0_nstates_openmp_work_3(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
     case (4)
-      call H_S2_u_0_nstates_openmp_work_4(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
+      call H_S2_u_0_nstates_openmp_work_4(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
     case default
-      call H_S2_u_0_nstates_openmp_work_N_int(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
+      call H_S2_u_0_nstates_openmp_work_N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
   end select
 end
 BEGIN_TEMPLATE
 
-subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
+subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
   use bitmasks
   implicit none
   BEGIN_DOC
-  ! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
+  ! Computes v_t = H|u_t> and s_t = S^2 |u_t>
   !
   ! Default should be 1,N_det,0,1
   END_DOC
   integer, intent(in)            :: N_st,sze,istart,iend,ishift,istep
   double precision, intent(in)   :: u_t(N_st,N_det)
-  double precision, intent(out)  :: v_0(sze,N_st), s_0(sze,N_st) 
+  double precision, intent(out)  :: v_t(N_st,sze), s_t(N_st,sze) 
 
   double precision               :: hij, sij
   integer                        :: i,j,k,l
@@ -135,8 +123,6 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
   integer, allocatable           :: idx(:), idx0(:)
   integer                        :: maxab, n_singles_a, n_singles_b, kcol_prev
   integer*8                      :: k8
-  double precision, allocatable  :: v_t(:,:), s_t(:,:)
-  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: v_t, s_t
 
   maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
   allocate(idx0(maxab))
@@ -159,14 +145,15 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
       !$OMP          psi_bilinear_matrix_transp_order, N_st,         &
       !$OMP          psi_bilinear_matrix_order_transp_reverse,       &
       !$OMP          psi_bilinear_matrix_columns_loc,                &
-      !$OMP          istart, iend, istep, irp_here,                  &
-      !$OMP          ishift, idx0, u_t, maxab, v_0, s_0)             &
+      !$OMP          psi_bilinear_matrix_transp_rows_loc,            &
+      !$OMP          istart, iend, istep, irp_here, v_t, s_t,        &
+      !$OMP          ishift, idx0, u_t, maxab)                       &
       !$OMP   PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i,     &
       !$OMP          lcol, lrow, l_a, l_b,                           &
       !$OMP          buffer, doubles, n_doubles,                     &
-      !$OMP          tmp_det2, hij, sij, idx, l, kcol_prev, v_t,     &
+      !$OMP          tmp_det2, hij, sij, idx, l, kcol_prev,          &
       !$OMP          singles_a, n_singles_a, singles_b,              &
-      !$OMP          n_singles_b, s_t, k8)
+      !$OMP          n_singles_b, k8)
   
   ! Alpha/Beta double excitations
   ! =============================
@@ -175,12 +162,9 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
       singles_a(maxab),                                              &
       singles_b(maxab),                                              &
       doubles(maxab),                                                &
-      idx(maxab),                                                    &
-      v_t(N_st,N_det), s_t(N_st,N_det))
-  kcol_prev=-1
+      idx(maxab))
 
-  v_t = 0.d0
-  s_t = 0.d0
+  kcol_prev=-1
 
   ASSERT (iend <= N_det)
   ASSERT (istart > 0)
@@ -194,20 +178,20 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
 
     kcol = psi_bilinear_matrix_columns(k_a)
     ASSERT (kcol <= N_det_beta_unique)
-    
+
     tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
     tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
-    
+
     if (kcol /= kcol_prev) then
       call get_all_spin_singles_$N_int(                              &
-          psi_det_beta_unique(1,kcol+1), idx0(kcol+1),               &
-          tmp_det(1,2), N_det_beta_unique-kcol,                      &
+          psi_det_beta_unique, idx0,                                 &
+          tmp_det(1,2), N_det_beta_unique,                           &
           singles_b, n_singles_b)
     endif
     kcol_prev = kcol
 
-    ! Loop over singly excited beta columns > current column
-    ! ------------------------------------------------------
+    ! Loop over singly excited beta columns
+    ! -------------------------------------
 
     do i=1,n_singles_b
       lcol = singles_b(i)
@@ -228,7 +212,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
         l_a = l_a+1
       enddo
       j = j-1
-      
+
       call get_all_spin_singles_$N_int(                              &
           buffer, idx, tmp_det(1,1), j,                              &
           singles_a, n_singles_a )
@@ -249,8 +233,6 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
         do l=1,N_st
           v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
           s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a)
-          v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
-          s_t(l,l_a) = s_t(l,l_a) + sij * u_t(l,k_a)
         enddo
       enddo
 
@@ -288,7 +270,8 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
     spindet(1:$N_int) = tmp_det(1:$N_int,1)
     
     ! Loop inside the beta column to gather all the connected alphas
-    l_a = k_a+1
+    lcol = psi_bilinear_matrix_columns(k_a)
+    l_a = psi_bilinear_matrix_columns_loc(lcol)
     do i=1,N_det_alpha_unique
       if (l_a > N_det) exit
       lcol = psi_bilinear_matrix_columns(l_a)
@@ -321,7 +304,6 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
       call i_H_j_mono_spin( tmp_det, tmp_det2, $N_int, 1, hij)
 
       do l=1,N_st
-        v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
         v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
         ! single => sij = 0 
       enddo
@@ -340,7 +322,6 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
 
       call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij)
       do l=1,N_st
-        v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
         v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
         ! same spin => sij = 0
       enddo
@@ -369,7 +350,8 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
     ASSERT (k_b <= N_det)
     
     ! Loop inside the alpha row to gather all the connected betas
-    l_b = k_b+1
+    lrow = psi_bilinear_matrix_transp_rows(k_b)
+    l_b = psi_bilinear_matrix_transp_rows_loc(lrow)
     do i=1,N_det_beta_unique
       if (l_b > N_det) exit
       lrow = psi_bilinear_matrix_transp_rows(l_b)
@@ -403,7 +385,6 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
       l_a = psi_bilinear_matrix_transp_order(l_b)
       ASSERT (l_a <= N_det)
       do l=1,N_st
-        v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
         v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
         ! single => sij = 0 
       enddo
@@ -424,7 +405,6 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
       ASSERT (l_a <= N_det)
 
       do l=1,N_st
-        v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
         v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
         ! same spin => sij = 0 
       enddo
@@ -457,20 +437,8 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
     enddo
 
   end do
-  !$OMP END DO NOWAIT
+  !$OMP END DO 
   deallocate(buffer, singles_a, singles_b, doubles, idx)
-
-  !$OMP CRITICAL
-  do l=1,N_st
-    do i=1, N_det
-      v_0(i,l) = v_0(i,l) + v_t(l,i)
-      s_0(i,l) = s_0(i,l) + s_t(l,i)
-    enddo
-  enddo
-  !$OMP END CRITICAL
-  deallocate(v_t, s_t)
-
-  !$OMP BARRIER
   !$OMP END PARALLEL
 
 end
diff --git a/src/Davidson/u0Hu0_old.irp.f b/src/Davidson/u0Hu0_old.irp.f
index 142197d6..5fc68f04 100644
--- a/src/Davidson/u0Hu0_old.irp.f
+++ b/src/Davidson/u0Hu0_old.irp.f
@@ -500,7 +500,7 @@ subroutine H_S2_u_0_nstates_test(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze
 !      if (exc(0,1,2) /= 0) cycle
 !      if (exc(0,1,1) == 2) cycle
 !      if (exc(0,1,2) == 2) cycle
-      if ((degree==1).and.(exc(0,1,1) == 1)) cycle
+!      if ((degree==1).and.(exc(0,1,1) == 1)) cycle
       call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij)
       do l=1,N_st
         !$OMP ATOMIC
diff --git a/src/Determinants/density_matrix.irp.f b/src/Determinants/density_matrix.irp.f
index 923318bc..e4e94b7f 100644
--- a/src/Determinants/density_matrix.irp.f
+++ b/src/Determinants/density_matrix.irp.f
@@ -67,6 +67,7 @@ END_PROVIDER
       enddo
     enddo
 
+    if (k == N_det) cycle
     l = k+1
     lrow = psi_bilinear_matrix_rows(l) 
     lcol = psi_bilinear_matrix_columns(l) 
@@ -90,7 +91,9 @@ END_PROVIDER
       lcol = psi_bilinear_matrix_columns(l) 
     enddo
 
-    l = psi_bilinear_matrix_order_reverse(k)+1
+    l = psi_bilinear_matrix_order_reverse(k)
+    if (l == N_det) cycle
+    l = l+1
     ! Fix alpha determinant, loop over betas
     lrow = psi_bilinear_matrix_transp_rows(l) 
     lcol = psi_bilinear_matrix_transp_columns(l) 
diff --git a/src/Determinants/filter_connected.irp.f b/src/Determinants/filter_connected.irp.f
index b76540f7..84775770 100644
--- a/src/Determinants/filter_connected.irp.f
+++ b/src/Determinants/filter_connected.irp.f
@@ -235,7 +235,9 @@ subroutine create_microlist(minilist, N_minilist, key_mask, microlist, idx_micro
   integer(bit_kind), intent(out) :: microlist(Nint,2,N_minilist*4)
   
   integer :: i,j,k,nt,n_element(2)
-  integer :: list(Nint*bit_kind_size,2), cur_microlist(0:mo_tot_num*2+1)
+  integer :: list(Nint*bit_kind_size,2)
+  integer, allocatable :: cur_microlist(:)
+  allocate (cur_microlist(0:mo_tot_num*2+1))
   integer(bit_kind) :: key_mask_neg(Nint,2), mobileMask(Nint,2)
   integer :: mo_tot_num_2
   mo_tot_num_2 = mo_tot_num+mo_tot_num
@@ -324,6 +326,7 @@ subroutine create_microlist(minilist, N_minilist, key_mask, microlist, idx_micro
       end do
     end if
   end do
+  deallocate(cur_microlist)
 end subroutine
   
   
diff --git a/src/Determinants/mono_excitations.irp.f b/src/Determinants/mono_excitations.irp.f
index 01af4c25..ab0d5af3 100644
--- a/src/Determinants/mono_excitations.irp.f
+++ b/src/Determinants/mono_excitations.irp.f
@@ -36,7 +36,8 @@ BEGIN_PROVIDER [double precision, fock_operator_closed_shell_ref_bitmask, (mo_to
   key_virt(i,1) = xor(key_virt(i,1),ref_closed_shell_bitmask(i,1))
   key_virt(i,2) = xor(key_virt(i,2),ref_closed_shell_bitmask(i,2))
  enddo
- double precision :: array_coulomb(mo_tot_num),array_exchange(mo_tot_num)
+ double precision, allocatable :: array_coulomb(:),array_exchange(:)
+ allocate (array_coulomb(mo_tot_num),array_exchange(mo_tot_num))
  call bitstring_to_list_ab(key_virt, occ_virt, n_occ_ab_virt, N_int)
  ! docc ---> virt mono excitations
  do i0 = 1,  n_occ_ab(1)
@@ -89,6 +90,7 @@ BEGIN_PROVIDER [double precision, fock_operator_closed_shell_ref_bitmask, (mo_to
    fock_operator_closed_shell_ref_bitmask(j,i) = accu+ mo_mono_elec_integral(i,j)
   enddo
  enddo
+ deallocate(array_coulomb,array_exchange)
 
 END_PROVIDER
 
diff --git a/src/Determinants/occ_pattern.irp.f b/src/Determinants/occ_pattern.irp.f
index 38460f87..89350543 100644
--- a/src/Determinants/occ_pattern.irp.f
+++ b/src/Determinants/occ_pattern.irp.f
@@ -109,7 +109,8 @@ recursive subroutine  rec_occ_pattern_to_dets(list_todo,nt,list_a,na,d,nd,sze,am
     endif
   else
     integer :: i, j, k
-    integer :: list_todo_tmp(nt)
+    integer, allocatable :: list_todo_tmp(:)
+    allocate (list_todo_tmp(nt))
     do i=1,nt
       if (na > 0) then
         if (list_todo(i) < list_a(na)) then
@@ -126,6 +127,7 @@ recursive subroutine  rec_occ_pattern_to_dets(list_todo,nt,list_a,na,d,nd,sze,am
       enddo
       call rec_occ_pattern_to_dets(list_todo_tmp,nt-1,list_a,na+1,d,nd,sze,amax,Nint)
     enddo
+    deallocate(list_todo_tmp)
   endif
 
 end
diff --git a/src/Determinants/spindeterminants.irp.f b/src/Determinants/spindeterminants.irp.f
index bf753704..39efb26c 100644
--- a/src/Determinants/spindeterminants.irp.f
+++ b/src/Determinants/spindeterminants.irp.f
@@ -416,7 +416,7 @@ BEGIN_PROVIDER  [ double precision, psi_bilinear_matrix_values, (N_det,N_states)
 
     j = get_index_in_psi_det_beta_unique (psi_det(1,2,k),N_int)
     ASSERT (j>0)
-    ASSERT (j<=N_det_alpha_unique)
+    ASSERT (j<=N_det_beta_unique)
 
     do l=1,N_states
       psi_bilinear_matrix_values(k,l) = psi_coef(k,l)
diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f
index f2907c3a..4b0cd0c5 100644
--- a/src/ZMQ/utils.irp.f
+++ b/src/ZMQ/utils.irp.f
@@ -180,14 +180,14 @@ function new_zmq_pair_socket(bind)
   endif
 
 
-  rc = f77_zmq_setsockopt(new_zmq_pair_socket, ZMQ_SNDHWM, 4, 4)
+  rc = f77_zmq_setsockopt(new_zmq_pair_socket, ZMQ_SNDHWM, 1, 4)
   if (rc /= 0) then
-    stop 'f77_zmq_setsockopt(new_zmq_pair_socket, ZMQ_SNDHWM, 4, 4)'
+    stop 'f77_zmq_setsockopt(new_zmq_pair_socket, ZMQ_SNDHWM, 1, 4)'
   endif
 
-  rc = f77_zmq_setsockopt(new_zmq_pair_socket, ZMQ_RCVHWM, 4, 4)
+  rc = f77_zmq_setsockopt(new_zmq_pair_socket, ZMQ_RCVHWM, 1, 4)
   if (rc /= 0) then
-    stop 'f77_zmq_setsockopt(new_zmq_pair_socket, ZMQ_RCVHWM, 4, 4)'
+    stop 'f77_zmq_setsockopt(new_zmq_pair_socket, ZMQ_RCVHWM, 1, 4)'
   endif
 
   rc = f77_zmq_setsockopt(new_zmq_pair_socket, ZMQ_IMMEDIATE, 1, 4)
@@ -252,7 +252,7 @@ IRP_ENDIF
     stop 'Unable to set ZMQ_RCVBUF on pull socket'
   endif
   
-  rc = f77_zmq_setsockopt(new_zmq_pull_socket,ZMQ_RCVHWM,4,4)
+  rc = f77_zmq_setsockopt(new_zmq_pull_socket,ZMQ_RCVHWM,1,4)
   if (rc /= 0) then
     stop 'Unable to set ZMQ_RCVHWM on pull socket'
   endif
@@ -327,7 +327,7 @@ IRP_ENDIF
 !    stop 'Unable to set ZMQ_LINGER on push socket'
 !  endif
   
-  rc = f77_zmq_setsockopt(new_zmq_push_socket,ZMQ_SNDHWM,4,4)
+  rc = f77_zmq_setsockopt(new_zmq_push_socket,ZMQ_SNDHWM,1,4)
   if (rc /= 0) then
     stop 'Unable to set ZMQ_SNDHWM on push socket'
   endif