diff --git a/src/FourIdx/four_index_slave.irp.f b/src/FourIdx/four_index_slave.irp.f
index 28859c9a..bc967528 100644
--- a/src/FourIdx/four_index_slave.irp.f
+++ b/src/FourIdx/four_index_slave.irp.f
@@ -2,7 +2,7 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB,            &
       i_start, j_start, k_start, l_start,                            &
       i_end  , j_end  , k_end  , l_end  ,                            &
       a_start, b_start, c_start, d_start,                            &
-      a_end  , b_end  , c_end  , d_end, task_id, thread  )
+      a_end  , b_end  , c_end  , d_end, task_id, worker_id, thread  )
   implicit none
   use f77_zmq
   use map_module
@@ -19,7 +19,7 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB,            &
   integer, intent(in)            :: i_end  , j_end  , k_end  , l_end
   integer, intent(in)            :: a_start, b_start, c_start, d_start
   integer, intent(in)            :: a_end  , b_end  , c_end  , d_end
-  integer, intent(in)            :: task_id, thread
+  integer, intent(in)            :: task_id, thread, worker_id
 
   double precision, allocatable  :: T(:,:), U(:,:,:), V(:,:)
   double precision, allocatable  :: T2d(:,:), V2d(:,:)
@@ -63,13 +63,13 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB,            &
   double precision, allocatable  :: a_array_value(:)
 
   integer*8 :: new_size
-  new_size = max(1024_8, 5_8 * map_a % n_elements )
+  new_size = max(2048_8, 5_8 * map_a % n_elements )
 
   integer*8 :: tempspace
   integer   :: npass, l_block
 
-  tempspace = (new_size * 16_8) / (1024_8 * 1024_8)
-  npass = int(min(1_8+int(l_end-l_start,8),1_8 + tempspace / 1024_8),4)   ! 1 GiB of scratch space
+  tempspace = (new_size * 16_8) / (2048_8 * 2048_8)
+  npass = int(min(1_8+int(l_end-l_start,8),1_8 + tempspace / 2048_8),4)   ! 2 GiB of scratch space
   l_block = (l_end-l_start+1)/npass
 
   allocate(a_array_ik(new_size/npass), a_array_j(new_size/npass), a_array_value(new_size/npass))
@@ -133,17 +133,13 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB,            &
 !open(unit=10,file='OUTPUT',form='FORMATTED')
 ! END INPUT DATA
 
-  PROVIDE nproc
-  integer :: n_running_threads
-  n_running_threads = 0
-  call omp_set_nested(.true.)
   !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, &
       !$OMP  a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,&
       !$OMP  i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,&
       !$OMP  i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max,        &
-      !$OMP  matrix_B,l_pointer,thread,task_id,n_running_threads,nproc) &
-      !$OMP  PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll,   &
-      !$OMP  a,b,c,d,p,q,tmp,T2d,V2d,ii,zmq_socket_push)
+      !$OMP  matrix_B,l_pointer,thread,task_id,worker_id) &
+      !$OMP  PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll,zmq_to_qp_run_socket,   &
+      !$OMP  a,b,c,d,p,q,tmp,T2d,V2d,ii,zmq_socket_push) 
 
   integer(ZMQ_PTR)               :: zmq_socket_push
   integer(ZMQ_PTR), external     :: new_zmq_push_socket
@@ -153,9 +149,6 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB,            &
 
   allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) )
 
-  !$OMP ATOMIC
-  n_running_threads = n_running_threads+1
-
   !$OMP DO SCHEDULE(dynamic,1)
   do d=d_start,d_end
     U = 0.d0
@@ -185,13 +178,7 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB,            &
           V2d(1,b_start), size(V2d,1) )
       deallocate(T2d)
 
-      !$OMP FLUSH(n_running_threads)
-      !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, &
-          !$OMP  a_start,b_start,b_end,c_start,c_end,i_start,k_start,k_end,   &
-          !$OMP  matrix_B,U,l,d,V2d,i_end,a_end,n_running_threads) &
-          !$OMP  PRIVATE(T,V,i,k,ik) NUM_THREADS(nproc-n_running_threads+1)
       allocate( V(i_start:i_end, k_start:k_end), T(k_start:k_end, a_start:a_end))
-      !$OMP DO SCHEDULE(static,1)
       do b=b_start,d
         ik = 0
         do k=k_start,k_end
@@ -221,10 +208,7 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB,            &
               U(a_start,b+1,b), size(U,1) )
         endif
       enddo
-      deallocate(T,V)
-      !$OMP END PARALLEL
-
-      deallocate(V2d)
+      deallocate(T,V,V2d)
     enddo
 
     idx = 0_8
@@ -253,19 +237,23 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB,            &
     deallocate(key,value)
 
   enddo
-  !$OMP END DO NOWAIT
   deallocate(U)
 
-  !$OMP ATOMIC
-  n_running_threads = n_running_threads-1
-
   !$OMP BARRIER
   !$OMP MASTER
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+  integer(ZMQ_PTR), external     :: task_done_to_taskserver, new_zmq_to_qp_run_socket
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+  if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then
+    stop 'Unable to send task done'
+  endif
   call four_idx_push_results(zmq_socket_push, 0_8, 0.d0, 0, task_id)
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
   !$OMP END MASTER
   call end_zmq_push_socket(zmq_socket_push)
   !$OMP END PARALLEL
 
+
   deallocate(l_pointer,a_array_ik,a_array_j,a_array_value)
 
 end
diff --git a/src/FourIdx/four_index_zmq.irp.f b/src/FourIdx/four_index_zmq.irp.f
index 4e0673b1..5fffe999 100644
--- a/src/FourIdx/four_index_zmq.irp.f
+++ b/src/FourIdx/four_index_zmq.irp.f
@@ -62,13 +62,13 @@ subroutine four_index_transform_zmq(map_a,map_c,matrix_B,LDB,        &
   call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'four_idx')
 
   integer*8 :: new_size
-  new_size = max(1024_8, 5_8 * map_a % n_elements )
+  new_size = max(2048_8, 5_8 * map_a % n_elements )
 
   integer :: npass
   integer*8 :: tempspace
 
-  tempspace = (new_size * 16_8) / (1024_8 * 1024_8)
-  npass = int(min(int(l_end-l_start,8),1_8 + tempspace / 1024_8),4)   ! 1 GiB of scratch space
+  tempspace = (new_size * 16_8) / (2048_8 * 2048_8)
+  npass = int(min(int(l_end-l_start,8),1_8 + tempspace / 2048_8),4)   ! 2 GiB of scratch space
   l_block = (l_end-l_start+1)/npass
 
   ! Create tasks
@@ -98,9 +98,11 @@ subroutine four_index_transform_zmq(map_a,map_c,matrix_B,LDB,        &
 
   PROVIDE nproc
 
+  integer :: ithread, sqnproc
+  sqnproc = int(sqrt(float(nproc))+0.5)
   call omp_set_nested(.True.)
-  integer :: ithread
-  !$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread)
+  call omp_set_dynamic(.True.)
+  !$OMP PARALLEL NUM_THREADS(1+max(1,sqnproc)) PRIVATE(ithread)
   ithread = omp_get_thread_num()
   if (ithread==0) then
     call four_idx_collector(zmq_socket_pull,map_c)
@@ -178,19 +180,13 @@ subroutine four_index_transform_slave(thread,worker_id)
         i_end  , j_end  , k_end  , l_end_block  ,                    &
         a_start, b_start, c_start, d_start,                          &
         a_end  , b_end  , c_end  , d_end,                            &
-        task_id, thread)
+        task_id, worker_id, thread)
 
-    integer, external              :: task_done_to_taskserver
-    if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then
-      print *,  irp_here, ': Unable  to send task_done'
-      stop
-    endif
 
   enddo
   integer, external :: disconnect_from_taskserver
   if (disconnect_from_taskserver(zmq_to_qp_run_socket,thread) == -1) then
-    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
-    return
+    continue
   endif
   call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
 
@@ -287,8 +283,6 @@ subroutine four_idx_pull_results(zmq_socket_pull, map_c, task_id)
   rc8 = f77_zmq_recv8( zmq_socket_pull, value, int(integral_kind*sze,8), 0)
   if(rc8 /= integral_kind*sze) stop 'four_idx_pull_results failed to pull value'
 
-  call map_update(map_c, key, value, sze, mo_integrals_threshold)  
-
 ! Activate if zmq_socket_pull is a REP
   IRP_IF ZMQ_PUSH
   IRP_ELSE
@@ -299,6 +293,8 @@ subroutine four_idx_pull_results(zmq_socket_pull, map_c, task_id)
     endif
   IRP_ENDIF
 
+  call map_update(map_c, key, value, sze, mo_integrals_threshold)  
+
   deallocate(key, value)
 end