diff --git a/src/cipsi/cipsi.irp.f b/src/cipsi/cipsi.irp.f
index 229e3844..8882ee88 100644
--- a/src/cipsi/cipsi.irp.f
+++ b/src/cipsi/cipsi.irp.f
@@ -1,18 +1,20 @@
 subroutine run_cipsi
   implicit none
+  use selection_types
   BEGIN_DOC
 ! Selected Full Configuration Interaction with deterministic selection and
 ! stochastic PT2.
   END_DOC
   integer                        :: i,j,k
-  double precision, allocatable  :: pt2(:), variance(:), norm(:), rpt2(:), zeros(:)
+  type(pt2_type)                 :: pt2_data, pt2_data_err
+  double precision, allocatable  :: zeros(:)
   integer                        :: to_select
   logical, external :: qp_stop
 
   double precision :: threshold_generators_save
   double precision :: rss
   double precision, external :: memory_of_double
-  PROVIDE H_apply_buffer_allocated 
+  PROVIDE H_apply_buffer_allocated
 
   N_iter = 1
   threshold_generators = 1.d0
@@ -21,21 +23,25 @@ subroutine run_cipsi
   rss = memory_of_double(N_states)*4.d0
   call check_mem(rss,irp_here)
 
-  allocate (pt2(N_states), zeros(N_states), rpt2(N_states), norm(N_states), variance(N_states))
+  allocate (zeros(N_states))
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
 
   double precision               :: hf_energy_ref
   logical                        :: has
   double precision               :: relative_error
 
-  !PROVIDE h_apply_buffer_allocated
-
   relative_error=PT2_relative_error
 
   zeros = 0.d0
-  pt2 = -huge(1.e0)
-  rpt2 = -huge(1.e0)
-  norm = 0.d0
-  variance = huge(1.e0)
+  pt2_data % pt2  = -huge(1.e0)
+  pt2_data % rpt2 = -huge(1.e0)
+  pt2_data % overlap(:,:) = 0.d0
+  pt2_data % variance = huge(1.e0)
+  if (is_complex) then
+    pt2_data % overlap_imag(:,:) = 0.d0
+  endif
+
 
   if (s2_eig) then
     call make_s2_eigenfunction
@@ -77,14 +83,13 @@ subroutine run_cipsi
   endif
 
   double precision :: correlation_energy_ratio
-  double precision :: error(N_states)
 
   correlation_energy_ratio = 0.d0
 
   do while (                                                         &
         (N_det < N_det_max) .and.                                    &
-        (maxval(abs(rpt2(1:N_states))) > pt2_max) .and.               &
-        (maxval(abs(variance(1:N_states))) > variance_max) .and.     &
+        (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and.               &
+        (maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and.     &
         (correlation_energy_ratio <= correlation_energy_ratio_max)   &
         )
       write(*,'(A)')  '--------------------------------------------------------------------------------'
@@ -93,39 +98,33 @@ subroutine run_cipsi
     to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor)
     to_select = max(N_states_diag, to_select)
     if (do_pt2) then
-      pt2 = 0.d0
-      variance = 0.d0
-      norm = 0.d0
+      call pt2_dealloc(pt2_data)
+      call pt2_dealloc(pt2_data_err)
+      call pt2_alloc(pt2_data, N_states)
+      call pt2_alloc(pt2_data_err, N_states)
       threshold_generators_save = threshold_generators
       threshold_generators = 1.d0
       SOFT_TOUCH threshold_generators
-!      if (is_complex) then
-!        call zmq_pt2_complex(psi_energy_with_nucl_rep,pt2,relative_error,error, variance, &
-!          norm, 0) ! Stochastic PT2
-!      else
-        call zmq_pt2(psi_energy_with_nucl_rep,pt2,relative_error,error, variance, &
-          norm, 0) ! Stochastic PT2
-!      endif
+      call ZMQ_pt2(psi_energy_with_nucl_rep,pt2_data,pt2_data_err,relative_error, 0) ! Stochastic PT2
       threshold_generators = threshold_generators_save
       SOFT_TOUCH threshold_generators
     else
-      call ZMQ_selection(to_select, pt2, variance, norm)
+      call pt2_dealloc(pt2_data)
+      call pt2_alloc(pt2_data, N_states)
+      call ZMQ_selection(to_select, pt2_data)
     endif
 
-    do k=1,N_states
-      rpt2(k) = pt2(k)/(1.d0 + norm(k))
-    enddo
-
     correlation_energy_ratio = (psi_energy_with_nucl_rep(1) - hf_energy_ref)  /     &
-                    (psi_energy_with_nucl_rep(1) + rpt2(1) - hf_energy_ref)
+                    (psi_energy_with_nucl_rep(1) + pt2_data % rpt2(1) - hf_energy_ref)
     correlation_energy_ratio = min(1.d0,correlation_energy_ratio)
 
     call write_double(6,correlation_energy_ratio, 'Correlation ratio')
-    call print_summary(psi_energy_with_nucl_rep,pt2,error,variance,norm,N_det,N_occ_pattern,N_states,psi_s2)
+    call print_summary(psi_energy_with_nucl_rep, &
+       pt2_data, pt2_data_err, N_det,N_occ_pattern,N_states,psi_s2)
 
-    call save_energy(psi_energy_with_nucl_rep, rpt2)
+    call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2)
 
-    call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det)
+    call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det)
     call print_extrapolated_energy()
     N_iter += 1
 
@@ -135,15 +134,9 @@ subroutine run_cipsi
     call copy_H_apply_buffer_to_wf()
 !    call save_wavefunction
 
-    !n_det_before = N_det
-    !to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor)
-    !to_select = max(N_states_diag, to_select)
-    !call zmq_selection(to_select, pt2, variance, norm)
     if (is_complex) then
-!      call zmq_selection_complex(to_select, pt2, variance, norm)
       PROVIDE  psi_coef_complex
     else
-!      call zmq_selection(to_select, pt2, variance, norm)
       PROVIDE  psi_coef
     endif
     PROVIDE  psi_det
@@ -156,7 +149,7 @@ subroutine run_cipsi
     endif
     call save_wavefunction
     call save_energy(psi_energy_with_nucl_rep, zeros)
-    if (qp_stop()) exit 
+    if (qp_stop()) exit
   enddo
 
   if (.not.qp_stop()) then
@@ -171,18 +164,13 @@ subroutine run_cipsi
     endif
 
     if (do_pt2) then
-      pt2(:) = 0.d0
-      variance(:) = 0.d0
-      norm(:) = 0.d0
+      call pt2_dealloc(pt2_data)
+      call pt2_dealloc(pt2_data_err)
+      call pt2_alloc(pt2_data, N_states)
+      call pt2_alloc(pt2_data_err, N_states)
       threshold_generators = 1d0
       SOFT_TOUCH threshold_generators
-!      if (is_complex) then
-!        call zmq_pt2_complex(psi_energy_with_nucl_rep, pt2,relative_error,error,variance, &
-!          norm,0) ! Stochastic PT2
-!      else
-        call ZMQ_pt2(psi_energy_with_nucl_rep, pt2,relative_error,error,variance, &
-          norm,0) ! Stochastic PT2
-!      endif
+      call ZMQ_pt2(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, relative_error, 0) ! Stochastic PT2
       SOFT_TOUCH threshold_generators
     endif
     print *,  'N_det             = ', N_det
@@ -190,15 +178,13 @@ subroutine run_cipsi
     print *,  'N_states          = ', N_states
     print*,   'correlation_ratio = ', correlation_energy_ratio
 
-
-    do k=1,N_states
-      rpt2(k) = pt2(k)/(1.d0 + norm(k))
-    enddo
-
-    call save_energy(psi_energy_with_nucl_rep, rpt2)
-    call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm,N_det,N_occ_pattern,N_states,psi_s2)
-    call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det)
+    call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2)
+    call print_summary(psi_energy_with_nucl_rep(1:N_states), &
+      pt2_data, pt2_data_err, N_det,N_occ_pattern,N_states,psi_s2)
+    call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det)
     call print_extrapolated_energy()
   endif
+  call pt2_dealloc(pt2_data)
+  call pt2_dealloc(pt2_data_err)
 
 end
diff --git a/src/cipsi/energy.irp.f b/src/cipsi/energy.irp.f
index 38a6a92e..50dc4620 100644
--- a/src/cipsi/energy.irp.f
+++ b/src/cipsi/energy.irp.f
@@ -41,3 +41,19 @@ BEGIN_PROVIDER [ double precision, pt2_E0_denominator, (N_states) ]
 END_PROVIDER
 
 
+BEGIN_PROVIDER [ double precision, pt2_overlap, (N_states, N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Overlap between the perturbed wave functions
+ END_DOC
+ pt2_overlap(1:N_states,1:N_states) = 0.d0
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, pt2_overlap_imag, (N_states, N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Overlap between the perturbed wave functions
+ END_DOC
+ pt2_overlap_imag(1:N_states,1:N_states) = 0.d0
+END_PROVIDER
+
diff --git a/src/cipsi/pert_rdm_providers.irp.f b/src/cipsi/pert_rdm_providers.irp.f
index e2917261..caea57b2 100644
--- a/src/cipsi/pert_rdm_providers.irp.f
+++ b/src/cipsi/pert_rdm_providers.irp.f
@@ -31,7 +31,7 @@ BEGIN_PROVIDER [double precision, pert_2rdm_provider, (n_orb_pert_rdm,n_orb_pert
 
 END_PROVIDER
 
-subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm, mat, buf, psi_det_connection, psi_coef_connection_reverse, n_det_connection)
+subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf, psi_det_connection, psi_coef_connection_reverse, n_det_connection)
   use bitmasks
   use selection_types
   implicit none
@@ -44,12 +44,10 @@ subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fo
   logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num)
   double precision, intent(in)           :: fock_diag_tmp(mo_num)
   double precision, intent(in)    :: E0(N_states)
-  double precision, intent(inout) :: pt2(N_states)
-  double precision, intent(inout) :: variance(N_states)
-  double precision, intent(inout) :: norm(N_states)
+  type(pt2_type), intent(inout)   :: pt2_data
   type(selection_buffer), intent(inout) :: buf
   logical :: ok
-  integer :: s1, s2, p1, p2, ib, j, istate
+  integer :: s1, s2, p1, p2, ib, j, istate, jstate
   integer(bit_kind) :: mask(N_int, 2), det(N_int, 2)
   double precision :: e_pert, delta_E, val, Hii, sum_e_pert, tmp, alpha_h_psi, coef(N_states)
   double precision, external :: diag_H_mat_elem_fock
@@ -152,9 +150,16 @@ subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fo
         e_pert = 0.5d0 * (tmp - delta_E)
         coef(istate) = e_pert / alpha_h_psi
         print*,e_pert,coef,alpha_h_psi
-        pt2(istate) = pt2(istate) + e_pert
-        variance(istate) = variance(istate) + alpha_h_psi * alpha_h_psi
-        norm(istate) = norm(istate) + coef(istate) * coef(istate)
+        pt2_data % pt2(istate) += e_pert
+        pt2_data % variance(istate) += alpha_h_psi * alpha_h_psi
+      enddo
+
+      do istate=1,N_states
+        alpha_h_psi = mat(istate, p1, p2)
+        e_pert = coef(istate) * alpha_h_psi
+        do jstate=1,N_states
+          pt2_data % overlap(jstate,jstate) = coef(istate) * coef(jstate)
+        enddo
 
         if (weight_selection /= 5) then
           ! Energy selection
diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f
index 7b84221b..c555fe0b 100644
--- a/src/cipsi/pt2_stoch_routines.irp.f
+++ b/src/cipsi/pt2_stoch_routines.irp.f
@@ -115,7 +115,7 @@ end function
 
 
 
-subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in)
+subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
   use f77_zmq
   use selection_types
 
@@ -125,10 +125,8 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in)
   integer, intent(in)            :: N_in
 !  integer, intent(inout)         :: N_in
   double precision, intent(in)   :: relative_error, E(N_states)
-  double precision, intent(out)  :: pt2(N_states),error(N_states)
-  double precision, intent(out)  :: variance(N_states),norm2(N_states)
-
-
+  type(pt2_type), intent(inout)  :: pt2_data, pt2_data_err
+!
   integer                        :: i, N
 
   double precision               :: state_average_weight_save(N_states), w(N_states,4)
@@ -154,11 +152,7 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in)
   endif
 
   if (N_det <= max(4,N_states) .or. pt2_N_teeth < 2) then
-    pt2=0.d0
-    variance=0.d0
-    norm2=0.d0
-    call ZMQ_selection(N_in, pt2, variance, norm2)
-    error(:) = 0.d0
+    call ZMQ_selection(N_in, pt2_data)
   else
 
     N = max(N_in,1) * N_states
@@ -272,8 +266,8 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in)
       mem_collector = 8.d0 *                  & ! bytes
             ( 1.d0*pt2_n_tasks_max            & ! task_id, index
             + 0.635d0*N_det_generators        & ! f,d
-            + 3.d0*N_det_generators*N_states  & ! eI, vI, nI
-            + 3.d0*pt2_n_tasks_max*N_states   & ! eI_task, vI_task, nI_task
+            + pt2_n_tasks_max*pt2_type_size(N_states) & ! pt2_data_task
+            + N_det_generators*pt2_type_size(N_states)  & ! pt2_data_I
             + 4.d0*(pt2_N_teeth+1)            & ! S, S2, T2, T3
             + 1.d0*(N_int*2.d0*N + N)         & ! selection buffer
             + 1.d0*(N_int*2.d0*N + N)         & ! sort selection buffer
@@ -288,7 +282,7 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in)
               nproc_target * 8.d0 *             & ! bytes
               ( 0.5d0*pt2_n_tasks_max           & ! task_id
               + 64.d0*pt2_n_tasks_max           & ! task
-              + 3.d0*pt2_n_tasks_max*N_states   & ! pt2, variance, norm2
+              + pt2_type_size(N_states)*pt2_n_tasks_max*N_states   & ! pt2, variance, overlap
               + 1.d0*pt2_n_tasks_max            & ! i_generator, subset
               + 1.d0*(N_int*2.d0*ii+ ii)        & ! selection buffer
               + 1.d0*(N_int*2.d0*ii+ ii)        & ! sort selection buffer
@@ -321,21 +315,24 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in)
       call omp_set_nested(.false.)
 
 
-      print '(A)', '========== ================= =========== =============== =============== ================='
-      print '(A)', ' Samples        Energy        Stat. Err     Variance          Norm^2        Seconds      '
-      print '(A)', '========== ================= =========== =============== =============== ================='
+      print '(A)', '========== ======================= ===================== ===================== ==========='
+      print '(A)', ' Samples          Energy                Variance               Norm^2          Seconds'
+      print '(A)', '========== ======================= ===================== ===================== ==========='
 
       PROVIDE global_selection_buffer
+
       !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc_target+1)            &
           !$OMP  PRIVATE(i)
       i = omp_get_thread_num()
       if (i==0) then
 
-        call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, w(1,1), w(1,2), w(1,3), w(1,4), b, N)
-        pt2(pt2_stoch_istate) = w(pt2_stoch_istate,1)
-        error(pt2_stoch_istate) = w(pt2_stoch_istate,2)
-        variance(pt2_stoch_istate) = w(pt2_stoch_istate,3)
-        norm2(pt2_stoch_istate) = w(pt2_stoch_istate,4)
+        call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, pt2_data, pt2_data_err, b, N)
+        pt2_data % rpt2(pt2_stoch_istate) =  &
+          pt2_data % pt2(pt2_stoch_istate)/(1.d0+pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate))
+
+        !TODO : We should use here the correct formula for the error of X/Y
+        pt2_data_err % rpt2(pt2_stoch_istate) =  &
+          pt2_data_err % pt2(pt2_stoch_istate)/(1.d0 + pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate))
 
       else
         call pt2_slave_inproc(i)
@@ -343,11 +340,48 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in)
       !$OMP END PARALLEL
       call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
 
-      print '(A)', '========== ================= =========== =============== =============== ================='
+      print '(A)', '========== ======================= ===================== ===================== ==========='
+
+    do k=1,N_states
+      pt2_overlap(pt2_stoch_istate,k) = pt2_data % overlap(k,pt2_stoch_istate)
+    enddo
+    SOFT_TOUCH pt2_overlap
+    if (is_complex) then
+      !TODO: transpose/conjugate?
+      do k=1,N_states
+        pt2_overlap_imag(pt2_stoch_istate,k) = pt2_data % overlap_imag(k,pt2_stoch_istate)
+      enddo
+      SOFT_TOUCH pt2_overlap_imag
+    endif
 
     enddo
     FREE pt2_stoch_istate
 
+    ! Symmetrize overlap
+    do j=2,N_states
+     do i=1,j-1
+       pt2_overlap(i,j) = 0.5d0 * (pt2_overlap(i,j) + pt2_overlap(j,i))
+       pt2_overlap(j,i) = pt2_overlap(i,j)
+     enddo
+    enddo
+    
+    if (is_complex) then
+      !TODO: check sign
+      do j=2,N_states
+        do i=1,j-1
+          pt2_overlap_imag(i,j) = 0.5d0 * (pt2_overlap_imag(i,j) - pt2_overlap_imag(j,i))
+          pt2_overlap_imag(j,i) = -pt2_overlap_imag(i,j)
+        enddo
+      enddo
+    endif
+
+    print *, 'Overlap of perturbed states:'
+    do k=1,N_states
+      print *, pt2_overlap(k,:)
+    enddo
+    print *, '-------'
+    !TODO: print imag part?
+
     if (N_in > 0) then
       b%cur = min(N_in,b%cur)
       if (s2_eig) then
@@ -362,11 +396,8 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in)
     state_average_weight(:) = state_average_weight_save(:)
     TOUCH state_average_weight
   endif
-  do k=N_det+1,N_states
-    pt2(k) = 0.d0
-  enddo
 
-  call update_pt2_and_variance_weights(pt2, variance, norm2, N_states)
+  call update_pt2_and_variance_weights(pt2_data, N_states)
 
 end subroutine
 
@@ -380,7 +411,7 @@ subroutine pt2_slave_inproc(i)
 end
 
 
-subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, variance, norm2, b, N_)
+subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_err, b, N_)
   use f77_zmq
   use selection_types
   use bitmasks
@@ -389,15 +420,15 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc
 
   integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
   double precision, intent(in)   :: relative_error, E
-  double precision, intent(out)  :: pt2(N_states), error(N_states)
-  double precision, intent(out)  :: variance(N_states), norm2(N_states)
+  type(pt2_type), intent(inout)  :: pt2_data, pt2_data_err
   type(selection_buffer), intent(inout) :: b
   integer, intent(in)            :: N_
 
-
-  double precision, allocatable      :: eI(:,:), eI_task(:,:), S(:), S2(:)
-  double precision, allocatable      :: vI(:,:), vI_task(:,:), T2(:)
-  double precision, allocatable      :: nI(:,:), nI_task(:,:), T3(:)
+  type(pt2_type), allocatable    :: pt2_data_task(:)
+  type(pt2_type), allocatable    :: pt2_data_I(:)
+  type(pt2_type), allocatable    :: pt2_data_S(:)
+  type(pt2_type), allocatable    :: pt2_data_S2(:)
+  type(pt2_type)                 :: pt2_data_teeth
   integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
   integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
   integer, external :: zmq_delete_tasks_async_send
@@ -405,11 +436,15 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc
   integer, external :: zmq_abort
   integer, external :: pt2_find_sample_lr
 
+  PROVIDE pt2_stoch_istate
+
   integer :: more, n, i, p, c, t, n_tasks, U
   integer, allocatable :: task_id(:)
   integer, allocatable :: index(:)
 
-  double precision :: v, x, x2, x3, avg, avg2, avg3, eqt, E0, v0, n0
+  double precision :: v, x, x2, x3, avg, avg2, avg3(N_states), eqt, E0, v0, n0(N_states)
+  double precision :: avg3im(N_states), n0im(N_states)
+  double precision :: eqta(N_states)
   double precision :: time, time1, time0
 
   integer, allocatable :: f(:)
@@ -434,11 +469,10 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc
   ! updated in ZMQ_pt2
   allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators))
   allocate(d(N_det_generators+1))
-  allocate(eI(N_states, N_det_generators), eI_task(N_states, pt2_n_tasks_max))
-  allocate(vI(N_states, N_det_generators), vI_task(N_states, pt2_n_tasks_max))
-  allocate(nI(N_states, N_det_generators), nI_task(N_states, pt2_n_tasks_max))
-  allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1))
-  allocate(T2(pt2_N_teeth+1), T3(pt2_N_teeth+1))
+  allocate(pt2_data_task(pt2_n_tasks_max))
+  allocate(pt2_data_I(N_det_generators))
+  allocate(pt2_data_S(pt2_N_teeth+1))
+  allocate(pt2_data_S2(pt2_N_teeth+1))
 
 
 
@@ -446,26 +480,37 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc
   call create_selection_buffer(N_, N_*2, b2)
 
 
-  pt2(:) = -huge(1.)
-  error(:) = huge(1.)
-  variance(:) = huge(1.)
-  norm2(:) = 0.d0
-  S(:) = 0d0
-  S2(:) = 0d0
-  T2(:) = 0d0
-  T3(:) = 0d0
+  pt2_data % pt2(pt2_stoch_istate) = -huge(1.)
+  pt2_data_err % pt2(pt2_stoch_istate) = huge(1.)
+  pt2_data % variance(pt2_stoch_istate) = huge(1.)
+  pt2_data_err % variance(pt2_stoch_istate) = huge(1.)
+  pt2_data % overlap(:,pt2_stoch_istate) = 0.d0
+  pt2_data_err % overlap(:,pt2_stoch_istate) = huge(1.)
+  !TODO: init overlap_imag?
+  if (is_complex) then
+    pt2_data % overlap_imag(:,pt2_stoch_istate) = 0.d0
+    pt2_data_err % overlap_imag(:,pt2_stoch_istate) = 0.d0
+  endif
   n = 1
   t = 0
   U = 0
-  eI(:,:) = 0d0
-  vI(:,:) = 0d0
-  nI(:,:) = 0d0
+  do i=1,pt2_n_tasks_max
+    call pt2_alloc(pt2_data_task(i),N_states)
+  enddo
+  do i=1,pt2_N_teeth+1
+    call pt2_alloc(pt2_data_S(i),N_states)
+    call pt2_alloc(pt2_data_S2(i),N_states)
+  enddo
+  do i=1,N_det_generators
+    call pt2_alloc(pt2_data_I(i),N_states)
+  enddo
   f(:) = pt2_F(:)
   d(:) = .false.
   n_tasks = 0
   E0 = E
   v0 = 0.d0
-  n0 = 0.d0
+  n0(:) = 0.d0
+  n0im(:) = 0.d0
   more = 1
   call wall_time(time0)
   time1 = time0
@@ -485,11 +530,15 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc
           t=t+1
           E0 = 0.d0
           v0 = 0.d0
-          n0 = 0.d0
+          n0(:) = 0.d0
+          n0im(:) = 0.d0
           do i=pt2_n_0(t),1,-1
-            E0 += eI(pt2_stoch_istate, i)
-            v0 += vI(pt2_stoch_istate, i)
-            n0 += nI(pt2_stoch_istate, i)
+            E0 += pt2_data_I(i) % pt2(pt2_stoch_istate)
+            v0 += pt2_data_I(i) % variance(pt2_stoch_istate)
+            n0(:) += pt2_data_I(i) % overlap(:,pt2_stoch_istate)
+            if (is_complex) then
+              n0im(:) += pt2_data_I(i) % overlap_imag(:,pt2_stoch_istate)
+            endif
           end do
         else
           exit
@@ -499,45 +548,71 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc
       ! Add Stochastic part
       c = pt2_R(n)
       if(c > 0) then
-!print *,  'c>0'
-        x  = 0d0
-        x2 = 0d0
-        x3 = 0d0
+
+        call pt2_alloc(pt2_data_teeth,N_states)
         do p=pt2_N_teeth, 1, -1
           v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1))
           i = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(p),pt2_n_0(p+1))
-          x  += eI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i)
-          x2 += vI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i)
-          x3 += nI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i)
-          S(p) += x
-          S2(p) += x*x
-          T2(p) += x2
-          T3(p) += x3
-        end do
-        avg  = E0 + S(t) / dble(c)
-        avg2 = v0 + T2(t) / dble(c)
-        avg3 = n0 + T3(t) / dble(c)
+          v = pt2_W_T / pt2_w(i)
+          call pt2_add ( pt2_data_teeth,  v,  pt2_data_I(i) )
+          call pt2_add ( pt2_data_S(p),  1.d0,  pt2_data_teeth )
+          call pt2_add2( pt2_data_S2(p), 1.d0,  pt2_data_teeth )
+        enddo
+        call pt2_dealloc(pt2_data_teeth)
+
+        avg  = E0 + pt2_data_S(t) % pt2(pt2_stoch_istate) / dble(c)
+        avg2 = v0 + pt2_data_S(t) % variance(pt2_stoch_istate) / dble(c)
+        avg3(:) = n0(:) + pt2_data_S(t) % overlap(:,pt2_stoch_istate) / dble(c)
+        if (is_complex) then
+          avg3im(:) = n0im(:) + pt2_data_S(t) % overlap_imag(:,pt2_stoch_istate) / dble(c)
+        endif
         if ((avg /= 0.d0) .or. (n == N_det_generators) ) then
           do_exit = .true.
         endif
         if (qp_stop()) then
           stop_now = .True.
         endif
-        pt2(pt2_stoch_istate) = avg
-        variance(pt2_stoch_istate) = avg2
-        norm2(pt2_stoch_istate) = avg3
+        pt2_data % pt2(pt2_stoch_istate) = avg
+        pt2_data % variance(pt2_stoch_istate) = avg2
+        pt2_data % overlap(:,pt2_stoch_istate) = avg3(:)
+        if (is_complex) then
+          pt2_data % overlap_imag(:,pt2_stoch_istate) = avg3im(:)
+        endif
         call wall_time(time)
         ! 1/(N-1.5) : see  Brugger, The American Statistician (23) 4 p. 32 (1969)
         if(c > 2) then
-          eqt = dabs((S2(t) / c) - (S(t)/c)**2) ! dabs for numerical stability
+          eqt = dabs((pt2_data_S2(t) % pt2(pt2_stoch_istate) / c) - (pt2_data_S(t) % pt2(pt2_stoch_istate)/c)**2) ! dabs for numerical stability
           eqt = sqrt(eqt / (dble(c) - 1.5d0))
-          error(pt2_stoch_istate) = eqt
+          pt2_data_err % pt2(pt2_stoch_istate) = eqt
+
+          eqt = dabs((pt2_data_S2(t) % variance(pt2_stoch_istate) / c) - (pt2_data_S(t) % variance(pt2_stoch_istate)/c)**2) ! dabs for numerical stability
+          eqt = sqrt(eqt / (dble(c) - 1.5d0))
+          pt2_data_err % variance(pt2_stoch_istate) = eqt
+
+          if (is_complex) then
+            eqta(:) = dabs((pt2_data_S2(t) % overlap(:,pt2_stoch_istate) / c) - &
+            (pt2_data_S(t) % overlap(:,pt2_stoch_istate)/c)**2 - &
+            (pt2_data_S(t) % overlap_imag(:,pt2_stoch_istate)/c)**2 ) ! dabs for numerical stability
+          else
+            eqta(:) = dabs((pt2_data_S2(t) % overlap(:,pt2_stoch_istate) / c) - (pt2_data_S(t) % overlap(:,pt2_stoch_istate)/c)**2) ! dabs for numerical stability
+          endif
+          eqta(:) = sqrt(eqta(:) / (dble(c) - 1.5d0))
+          pt2_data_err % overlap(:,pt2_stoch_istate) = eqta(:)
+
+
           if ((time - time1 > 1.d0) .or. (n==N_det_generators)) then
             time1 = time
-            print '(G10.3, 2X, F16.10, 2X, G10.3, 2X, F14.10, 2X, F14.10, 2X, F10.4, A10)', c, avg+E, eqt, avg2, avg3, time-time0, ''
+            print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.4)', c, &
+              pt2_data     % pt2(pt2_stoch_istate) +E, &
+              pt2_data_err % pt2(pt2_stoch_istate), &
+              pt2_data     % variance(pt2_stoch_istate), &
+              pt2_data_err % variance(pt2_stoch_istate), &
+              pt2_data     % overlap(pt2_stoch_istate,pt2_stoch_istate), &
+              pt2_data_err % overlap(pt2_stoch_istate,pt2_stoch_istate), &
+              time-time0
             if (stop_now .or. (                                      &
-                  (do_exit .and. (dabs(error(pt2_stoch_istate)) /    &
-                  (1.d-20 + dabs(pt2(pt2_stoch_istate)) ) <= relative_error))) ) then
+                  (do_exit .and. (dabs(pt2_data_err % pt2(pt2_stoch_istate)) /    &
+                  (1.d-20 + dabs(pt2_data % pt2(pt2_stoch_istate)) ) <= relative_error))) ) then
               if (zmq_abort(zmq_to_qp_run_socket) == -1) then
                 call sleep(10)
                 if (zmq_abort(zmq_to_qp_run_socket) == -1) then
@@ -552,10 +627,10 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc
     else if(more == 0) then
       exit
     else
-      call pull_pt2_results(zmq_socket_pull, index, eI_task, vI_task, nI_task, task_id, n_tasks, b2)
+      call pull_pt2_results(zmq_socket_pull, index, pt2_data_task, task_id, n_tasks, b2)
       if(n_tasks > pt2_n_tasks_max)then
        print*,'PB !!!'
-       print*,'If you see this, send an email to Anthony scemama with the following content'
+       print*,'If you see this, send a bug report with the following content'
        print*,irp_here
        print*,'n_tasks,pt2_n_tasks_max = ',n_tasks,pt2_n_tasks_max
        stop -1
@@ -564,16 +639,14 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc
           stop 'PT2: Unable to delete tasks (send)'
       endif
       do i=1,n_tasks
-        if(index(i).gt.size(eI,2).or.index(i).lt.1)then
+        if(index(i).gt.size(pt2_data_I,1).or.index(i).lt.1)then
          print*,'PB !!!'
-         print*,'If you see this, send an email to Anthony scemama with the following content'
+         print*,'If you see this, send a bug report with the following content'
          print*,irp_here
-         print*,'i,index(i),size(ei,2) = ',i,index(i),size(ei,2)
+         print*,'i,index(i),size(pt2_data_I,1) = ',i,index(i),size(pt2_data_I,1)
          stop -1
         endif
-        eI(1:N_states, index(i)) += eI_task(1:N_states,i)
-        vI(1:N_states, index(i)) += vI_task(1:N_states,i)
-        nI(1:N_states, index(i)) += nI_task(1:N_states,i)
+        call pt2_add(pt2_data_I(index(i)),1.d0,pt2_data_task(i))
         f(index(i)) -= 1
       end do
       do i=1, b2%cur
@@ -586,6 +659,16 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc
       endif
     end if
   end do
+  do i=1,N_det_generators
+    call pt2_dealloc(pt2_data_I(i))
+  enddo
+  do i=1,pt2_N_teeth+1
+    call pt2_dealloc(pt2_data_S(i))
+    call pt2_dealloc(pt2_data_S2(i))
+  enddo
+  do i=1,pt2_n_tasks_max
+    call pt2_dealloc(pt2_data_task(i))
+  enddo
 !print *,  'deleting b2'
   call delete_selection_buffer(b2)
 !print *,  'sorting b'
diff --git a/src/cipsi/pt2_type.irp.f b/src/cipsi/pt2_type.irp.f
new file mode 100644
index 00000000..7964a942
--- /dev/null
+++ b/src/cipsi/pt2_type.irp.f
@@ -0,0 +1,155 @@
+subroutine pt2_alloc(pt2_data,N)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(inout) :: pt2_data
+  integer, intent(in)           :: N
+  integer :: k
+
+  allocate(pt2_data % pt2(N)           &
+          ,pt2_data % variance(N)      &
+          ,pt2_data % rpt2(N)          &
+          ,pt2_data % overlap(N,N)     &
+          )
+
+  pt2_data % pt2(:)           = 0.d0
+  pt2_data % variance(:)      = 0.d0
+  pt2_data % rpt2(:)          = 0.d0
+  pt2_data % overlap(:,:)     = 0.d0
+  if (is_complex) then
+    allocate(pt2_data % overlap_imag(N,N))
+    pt2_data % overlap_imag(:,:) = 0.d0
+  endif
+
+end subroutine
+
+subroutine pt2_dealloc(pt2_data)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(inout) :: pt2_data
+  deallocate(pt2_data % pt2         &
+            ,pt2_data % variance    &
+            ,pt2_data % rpt2        &
+            ,pt2_data % overlap     &
+            )
+  if (is_complex) then
+    deallocate(pt2_data % overlap_imag)
+  endif
+end subroutine
+
+subroutine pt2_add(p1, w, p2)
+  implicit none
+  use selection_types
+  BEGIN_DOC
+! p1 += w * p2
+  END_DOC
+  type(pt2_type), intent(inout) :: p1
+  double precision, intent(in)  :: w
+  type(pt2_type), intent(in)    :: p2
+
+  if (w == 1.d0) then
+
+    p1 % pt2(:)            = p1 % pt2(:)           + p2 % pt2(:)
+    p1 % rpt2(:)           = p1 % rpt2(:)          + p2 % rpt2(:)
+    p1 % variance(:)       = p1 % variance(:)      + p2 % variance(:)
+    p1 % overlap(:,:)      = p1 % overlap(:,:)     + p2 % overlap(:,:)
+    if (is_complex) then
+      p1 % overlap_imag(:,:) = p1 % overlap_imag(:,:) + p2 % overlap_imag(:,:)
+    endif
+
+  else
+
+    p1 % pt2(:)            = p1 % pt2(:)           + w * p2 % pt2(:)
+    p1 % rpt2(:)           = p1 % rpt2(:)          + w * p2 % rpt2(:)
+    p1 % variance(:)       = p1 % variance(:)      + w * p2 % variance(:)
+    p1 % overlap(:,:)      = p1 % overlap(:,:)     + w * p2 % overlap(:,:)
+    if (is_complex) then
+      p1 % overlap_imag(:,:) = p1 % overlap_imag(:,:) + w * p2 % overlap_imag(:,:)
+    endif
+
+  endif
+
+end subroutine
+
+
+subroutine pt2_add2(p1, w, p2)
+  implicit none
+  use selection_types
+  BEGIN_DOC
+! p1 += w * p2**2
+  END_DOC
+  type(pt2_type), intent(inout) :: p1
+  double precision, intent(in)  :: w
+  type(pt2_type), intent(in)    :: p2
+
+  if (w == 1.d0) then
+
+    p1 % pt2(:)           = p1 % pt2(:)           + p2 % pt2(:)           * p2 % pt2(:)
+    p1 % rpt2(:)          = p1 % rpt2(:)          + p2 % rpt2(:)          * p2 % rpt2(:)
+    p1 % variance(:)      = p1 % variance(:)      + p2 % variance(:)      * p2 % variance(:)
+    p1 % overlap(:,:)     = p1 % overlap(:,:)     + p2 % overlap(:,:)     * p2 % overlap(:,:)
+    if (is_complex) then
+      p1 % overlap(:,:)   = p1 % overlap(:,:)     + p2 % overlap_imag(:,:) * p2 % overlap_imag(:,:)
+    endif
+
+  else
+
+    p1 % pt2(:)           = p1 % pt2(:)           + w * p2 % pt2(:)           * p2 % pt2(:)
+    p1 % rpt2(:)          = p1 % rpt2(:)          + w * p2 % rpt2(:)          * p2 % rpt2(:)
+    p1 % variance(:)      = p1 % variance(:)      + w * p2 % variance(:)      * p2 % variance(:)
+    p1 % overlap(:,:)     = p1 % overlap(:,:)     + w * p2 % overlap(:,:)     * p2 % overlap(:,:)
+    if (is_complex) then
+      p1 % overlap(:,:) = p1 % overlap(:,:)  + w * p2 % overlap_imag(:,:) * p2 % overlap_imag(:,:)
+    endif
+
+  endif
+
+end subroutine
+
+
+subroutine pt2_serialize(pt2_data, n, x)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(in)    :: pt2_data
+  integer, intent(in)           :: n
+  double precision, intent(out) :: x(*)
+
+  integer :: i,k,n2
+
+  n2 = n*n
+  x(1:n)           =  pt2_data % pt2(1:n)
+  k=n
+  x(k+1:k+n)     =  pt2_data % rpt2(1:n)
+  k=k+n
+  x(k+1:k+n)     =  pt2_data % variance(1:n)
+  k=k+n
+  x(k+1:k+n2)  =  reshape(pt2_data % overlap(1:n,1:n), (/ n2 /))
+  if (is_complex) then
+    k=k+n2
+    x(k+1:k+n2) = reshape(pt2_data % overlap_imag(1:n,1:n), (/ n2 /))
+  endif
+
+end
+
+subroutine pt2_deserialize(pt2_data, n, x)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(inout) :: pt2_data
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: x(*)
+
+  integer :: i,k,n2
+
+  n2 = n*n
+  pt2_data % pt2(1:n)           =   x(1:n)
+  k=n
+  pt2_data % rpt2(1:n)          =   x(k+1:k+n)
+  k=k+n
+  pt2_data % variance(1:n)      =   x(k+1:k+n)
+  k=k+n
+  pt2_data % overlap(1:n,1:n) = reshape(x(k+1:k+n2), (/ n, n /))
+  if (is_complex) then
+    k=k+n2
+    pt2_data % overlap_imag(1:n,1:n) = reshape(x(k+1:k+n2), (/ n, n /))
+  endif
+
+end
diff --git a/src/cipsi/run_pt2_slave.irp.f b/src/cipsi/run_pt2_slave.irp.f
index 7df98a87..d3f4d45d 100644
--- a/src/cipsi/run_pt2_slave.irp.f
+++ b/src/cipsi/run_pt2_slave.irp.f
@@ -1,8 +1,8 @@
- use omp_lib 
+ use omp_lib
  use selection_types
  use f77_zmq
 BEGIN_PROVIDER [ integer(omp_lock_kind), global_selection_buffer_lock ]
- use omp_lib 
+ use omp_lib
  implicit none
  BEGIN_DOC
  ! Global buffer for the OpenMP selection
@@ -11,7 +11,7 @@ BEGIN_PROVIDER [ integer(omp_lock_kind), global_selection_buffer_lock ]
 END_PROVIDER
 
 BEGIN_PROVIDER [ type(selection_buffer), global_selection_buffer ]
- use omp_lib 
+ use omp_lib
  implicit none
  BEGIN_DOC
  ! Global buffer for the OpenMP selection
@@ -61,7 +61,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
   type(selection_buffer) :: b
   logical :: done, buffer_ready
 
-  double precision,allocatable :: pt2(:,:), variance(:,:), norm(:,:)
+  type(pt2_type), allocatable :: pt2_data(:)
   integer :: n_tasks, k, N
   integer, allocatable :: i_generator(:), subset(:)
 
@@ -70,10 +70,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
 !  logical :: sending
 
   allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
-  allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
-  allocate(variance(N_states,pt2_n_tasks_max))
-  allocate(norm(N_states,pt2_n_tasks_max))
-
+  allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
   zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
 
   integer, external :: connect_to_taskserver
@@ -120,13 +117,11 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
     double precision :: time0, time1
     call wall_time(time0)
     do k=1,n_tasks
-        pt2(:,k) = 0.d0
-        variance(:,k) = 0.d0
-        norm(:,k) = 0.d0
-        b%cur = 0
+      call pt2_alloc(pt2_data(k),N_states)
+      b%cur = 0
 !double precision :: time2
 !call wall_time(time2)
-        call select_connected(i_generator(k),energy,pt2(1,k),variance(1,k),norm(1,k),b,subset(k),pt2_F(i_generator(k)))
+      call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k)))
 !call wall_time(time1)
 !print *,  i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1))
     enddo
@@ -138,11 +133,15 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
       done = .true.
     endif
     call sort_selection_buffer(b)
-    call push_pt2_results(zmq_socket_push, i_generator, pt2, variance, norm, b, task_id, n_tasks)
+    call push_pt2_results(zmq_socket_push, i_generator, pt2_data, b, task_id, n_tasks)
+    do k=1,n_tasks
+      call pt2_dealloc(pt2_data(k))
+    enddo
     b%cur=0
 
 !    ! Try to adjust n_tasks around nproc/2 seconds per job
     n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0)))
+    n_tasks = min(n_tasks, pt2_n_tasks_max)
 !    n_tasks = 1
   end do
 
@@ -158,6 +157,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
   if (buffer_ready) then
     call delete_selection_buffer(b)
   endif
+  deallocate(pt2_data)
 end subroutine
 
 
@@ -171,8 +171,8 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
   integer                         :: rc, i
 
   integer                        :: worker_id, ctask, ltask
-  character*(512), allocatable   :: task(:)
-  integer, allocatable           :: task_id(:)
+  character*(512)                :: task
+  integer                        :: task_id(1)
 
   integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
   integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
@@ -183,20 +183,15 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
   type(selection_buffer) :: b
   logical :: done, buffer_ready
 
-  double precision,allocatable :: pt2(:,:), variance(:,:), norm(:,:)
+  type(pt2_type) :: pt2_data(1)
   integer :: n_tasks, k, N
-  integer, allocatable :: i_generator(:), subset(:)
+  integer :: i_generator(1), subset
 
   integer :: bsize ! Size of selection buffers
   logical :: sending
   PROVIDE global_selection_buffer global_selection_buffer_lock
 
 
-  allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
-  allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
-  allocate(variance(N_states,pt2_n_tasks_max))
-  allocate(norm(N_states,pt2_n_tasks_max))
-
   zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
 
   integer, external :: connect_to_taskserver
@@ -215,22 +210,17 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
   done = .False.
   do while (.not.done)
 
-    n_tasks = max(1,n_tasks)
-    n_tasks = min(pt2_n_tasks_max,n_tasks)
-
     integer, external :: get_tasks_from_taskserver
     if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then
       exit
     endif
-    done = task_id(n_tasks) == 0
+    done = task_id(1) == 0
     if (done) then
       n_tasks = n_tasks-1
     endif
     if (n_tasks == 0) exit
 
-    do k=1,n_tasks
-      read (task(k),*) subset(k), i_generator(k), N
-    enddo
+    read (task,*) subset, i_generator(1), N
     if (b%N == 0) then
       ! Only first time
       bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2)
@@ -242,17 +232,13 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
 
     double precision :: time0, time1
     call wall_time(time0)
-    do k=1,n_tasks
-        pt2(:,k) = 0.d0
-        variance(:,k) = 0.d0
-        norm(:,k) = 0.d0
-        b%cur = 0
+    call pt2_alloc(pt2_data(1),N_states)
+    b%cur = 0
 !double precision :: time2
 !call wall_time(time2)
-        call select_connected(i_generator(k),energy,pt2(1,k),variance(1,k),norm(1,k),b,subset(k),pt2_F(i_generator(k)))
+    call select_connected(i_generator(1),energy,pt2_data(1),b,subset,pt2_F(i_generator(1)))
 !call wall_time(time1)
 !print *,  i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1))
-    enddo
     call wall_time(time1)
 !print *,  '-->', i_generator(1), time1-time0, n_tasks
 
@@ -269,16 +255,14 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
     call omp_unset_lock(global_selection_buffer_lock)
     if ( iproc == 1 ) then
       call omp_set_lock(global_selection_buffer_lock)
-      call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2, variance, norm, global_selection_buffer, task_id, n_tasks,sending)
+      call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2_data, global_selection_buffer, task_id, n_tasks,sending)
       global_selection_buffer%cur = 0
       call omp_unset_lock(global_selection_buffer_lock)
     else
-      call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2, variance, norm, b, task_id, n_tasks,sending)
+      call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2_data, b, task_id, n_tasks,sending)
     endif
 
-!    ! Try to adjust n_tasks around nproc/2 seconds per job
-!    n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0)))
-    n_tasks = 1
+    call pt2_dealloc(pt2_data(1))
   end do
   call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)
 
@@ -298,39 +282,36 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
 end subroutine
 
 
-subroutine push_pt2_results(zmq_socket_push, index, pt2, variance, norm, b, task_id, n_tasks)
+subroutine push_pt2_results(zmq_socket_push, index, pt2_data, b, task_id, n_tasks)
  use selection_types
  use f77_zmq
   implicit none
 
   integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
-  double precision, intent(in)   :: pt2(N_states,n_tasks)
-  double precision, intent(in)   :: variance(N_states,n_tasks)
-  double precision, intent(in)   :: norm(N_states,n_tasks)
+  type(pt2_type), intent(in)     :: pt2_data(n_tasks)
   integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks)
   type(selection_buffer), intent(inout) :: b
 
   logical :: sending
   sending = .False.
-  call push_pt2_results_async_send(zmq_socket_push, index, pt2, variance, norm, b, task_id, n_tasks, sending)
+  call push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending)
   call push_pt2_results_async_recv(zmq_socket_push, b%mini, sending)
 end subroutine
 
 
-subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2, variance, norm, b, task_id, n_tasks, sending)
+subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending)
  use selection_types
  use f77_zmq
   implicit none
 
   integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
-  double precision, intent(in)   :: pt2(N_states,n_tasks)
-  double precision, intent(in)   :: variance(N_states,n_tasks)
-  double precision, intent(in)   :: norm(N_states,n_tasks)
+  type(pt2_type), intent(in)     :: pt2_data(n_tasks)
   integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks)
   type(selection_buffer), intent(inout) :: b
   logical, intent(inout) :: sending
-  integer :: rc
+  integer :: rc, i
   integer*8 :: rc8
+  double precision, allocatable :: pt2_serialized(:,:)
 
   if (sending) then
     print *,  irp_here, ': sending is true'
@@ -358,32 +339,18 @@ subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2, variance, no
   endif
 
 
-  rc = f77_zmq_send( zmq_socket_push, pt2, 8*N_states*n_tasks, ZMQ_SNDMORE)
+  allocate(pt2_serialized (pt2_type_size(N_states),n_tasks) )
+  do i=1,n_tasks
+    call pt2_serialize(pt2_data(i),N_states,pt2_serialized(1,i))
+  enddo
+
+  rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE)
+  deallocate(pt2_serialized)
   if (rc == -1) then
     print *,  irp_here, ': error sending result'
     stop 3
     return
-  else if(rc /= 8*N_states*n_tasks) then
-    stop 'push'
-  endif
-
-
-  rc = f77_zmq_send( zmq_socket_push, variance, 8*N_states*n_tasks, ZMQ_SNDMORE)
-  if (rc == -1) then
-    print *,  irp_here, ': error sending result'
-    stop 4
-    return
-  else if(rc /= 8*N_states*n_tasks) then
-    stop 'push'
-  endif
-
-
-  rc = f77_zmq_send( zmq_socket_push, norm, 8*N_states*n_tasks, ZMQ_SNDMORE)
-  if (rc == -1) then
-    print *,  irp_here, ': error sending result'
-    stop 5
-    return
-  else if(rc /= 8*N_states*n_tasks) then
+  else if(rc /= size(pt2_serialized)*8) then
     stop 'push'
   endif
 
@@ -475,7 +442,7 @@ IRP_ELSE
     stop 11
     return
   else if (rc /= 8) then
-    print *,  irp_here//': error in receiving mini' 
+    print *,  irp_here//': error in receiving mini'
     stop 12
   endif
 IRP_ENDIF
@@ -484,19 +451,18 @@ end subroutine
 
 
 
-subroutine pull_pt2_results(zmq_socket_pull, index, pt2, variance, norm, task_id, n_tasks, b)
+subroutine pull_pt2_results(zmq_socket_pull, index, pt2_data, task_id, n_tasks, b)
  use selection_types
  use f77_zmq
   implicit none
   integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
-  double precision, intent(inout) :: pt2(N_states,*)
-  double precision, intent(inout) :: variance(N_states,*)
-  double precision, intent(inout) :: norm(N_states,*)
+  type(pt2_type), intent(inout)  :: pt2_data(*)
   type(selection_buffer), intent(inout) :: b
   integer, intent(out) :: index(*)
   integer, intent(out) :: n_tasks, task_id(*)
   integer :: rc, rn, i
   integer*8 :: rc8
+  double precision, allocatable :: pt2_serialized(:,:)
 
   rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0)
   if (rc == -1) then
@@ -514,29 +480,19 @@ subroutine pull_pt2_results(zmq_socket_pull, index, pt2, variance, norm, task_id
     stop 'pull'
   endif
 
-  rc = f77_zmq_recv( zmq_socket_pull, pt2, N_states*8*n_tasks, 0)
+  allocate(pt2_serialized (pt2_type_size(N_states),n_tasks) )
+  rc = f77_zmq_recv( zmq_socket_pull, pt2_serialized, 8*size(pt2_serialized)*n_tasks, 0)
   if (rc == -1) then
     n_tasks = 1
     task_id(1) = 0
-  else if(rc /= 8*N_states*n_tasks) then
+  else if(rc /= 8*size(pt2_serialized)) then
     stop 'pull'
   endif
 
-  rc = f77_zmq_recv( zmq_socket_pull, variance, N_states*8*n_tasks, 0)
-  if (rc == -1) then
-    n_tasks = 1
-    task_id(1) = 0
-  else if(rc /= 8*N_states*n_tasks) then
-    stop 'pull'
-  endif
-
-  rc = f77_zmq_recv( zmq_socket_pull, norm, N_states*8*n_tasks, 0)
-  if (rc == -1) then
-    n_tasks = 1
-    task_id(1) = 0
-  else if(rc /= 8*N_states*n_tasks) then
-    stop 'pull'
-  endif
+  do i=1,n_tasks
+    call pt2_deserialize(pt2_data(i),N_states,pt2_serialized(1,i))
+  enddo
+  deallocate(pt2_serialized)
 
   rc = f77_zmq_recv( zmq_socket_pull, task_id, n_tasks*4, 0)
   if (rc == -1) then
diff --git a/src/cipsi/run_selection_slave.irp.f b/src/cipsi/run_selection_slave.irp.f
index 0ebdd4fd..0d06d1d0 100644
--- a/src/cipsi/run_selection_slave.irp.f
+++ b/src/cipsi/run_selection_slave.irp.f
@@ -18,9 +18,8 @@ subroutine run_selection_slave(thread,iproc,energy)
 
   type(selection_buffer) :: buf, buf2
   logical :: done, buffer_ready
-  double precision :: pt2(N_states)
-  double precision :: variance(N_states)
-  double precision :: norm(N_states)
+  type(pt2_type) :: pt2_data
+
   
   !todo: check for providers that are now unlinked for real/complex
   PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
@@ -33,6 +32,7 @@ subroutine run_selection_slave(thread,iproc,energy)
     PROVIDE psi_selectors_coef_transp psi_det_sorted weight_selection
   endif
 
+  call pt2_alloc(pt2_data,N_states)
 
   zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
 
@@ -47,9 +47,6 @@ subroutine run_selection_slave(thread,iproc,energy)
   buf%N = 0
   buffer_ready = .False.
   ctask = 1
-  pt2(:) = 0d0
-  variance(:) = 0d0
-  norm(:) = 0.d0
 
   do
     integer, external :: get_task_from_taskserver
@@ -74,7 +71,7 @@ subroutine run_selection_slave(thread,iproc,energy)
           stop '-1'
         end if
       end if
-      call select_connected(i_generator,energy,pt2,variance,norm,buf,subset,pt2_F(i_generator))
+      call select_connected(i_generator,energy,pt2_data,buf,subset,pt2_F(i_generator))
     endif
 
     integer, external :: task_done_to_taskserver
@@ -93,12 +90,10 @@ subroutine run_selection_slave(thread,iproc,energy)
       if(ctask > 0) then
         call sort_selection_buffer(buf)
 !        call merge_selection_buffers(buf,buf2)
-!print *, task_id(1), pt2(1), buf%cur, ctask
-        call push_selection_results(zmq_socket_push, pt2, variance, norm, buf, task_id(1), ctask)
+        call push_selection_results(zmq_socket_push, pt2_data, buf, task_id(1), ctask)
+        call pt2_dealloc(pt2_data)
+        call pt2_alloc(pt2_data,N_states)
 !        buf%mini = buf2%mini
-        pt2(:) = 0d0
-        variance(:) = 0d0
-        norm(:) = 0d0
         buf%cur = 0
       end if
       ctask = 0
@@ -111,14 +106,12 @@ subroutine run_selection_slave(thread,iproc,energy)
   if(ctask > 0) then
     call sort_selection_buffer(buf)
 !    call merge_selection_buffers(buf,buf2)
-    call push_selection_results(zmq_socket_push, pt2, variance, norm, buf, task_id(1), ctask)
+    call push_selection_results(zmq_socket_push, pt2_data, buf, task_id(1), ctask)
 !    buf%mini = buf2%mini
-    pt2(:) = 0d0
-    variance(:) = 0d0
-    norm(:) = 0d0
     buf%cur = 0
   end if
   ctask = 0
+  call pt2_dealloc(pt2_data)
 
   integer, external :: disconnect_from_taskserver
   if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
@@ -134,18 +127,17 @@ subroutine run_selection_slave(thread,iproc,energy)
 end subroutine
 
 
-subroutine push_selection_results(zmq_socket_push, pt2, variance, norm, b, task_id, ntask)
+subroutine push_selection_results(zmq_socket_push, pt2_data, b, task_id, ntasks)
   use f77_zmq
   use selection_types
   implicit none
 
   integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
-  double precision, intent(in)   :: pt2(N_states)
-  double precision, intent(in)   :: variance(N_states)
-  double precision, intent(in)   :: norm(N_states)
+  type(pt2_type), intent(in)     :: pt2_data
   type(selection_buffer), intent(inout) :: b
-  integer, intent(in) :: ntask, task_id(*)
+  integer, intent(in) :: ntasks, task_id(*)
   integer :: rc
+  double precision, allocatable :: pt2_serialized(:)
 
   rc = f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE)
   if(rc /= 4) then
@@ -153,20 +145,18 @@ subroutine push_selection_results(zmq_socket_push, pt2, variance, norm, b, task_
   endif
 
 
-  rc = f77_zmq_send( zmq_socket_push, pt2, 8*N_states, ZMQ_SNDMORE)
-  if(rc /= 8*N_states) then
-    print *,  'f77_zmq_send( zmq_socket_push, pt2, 8*N_states, ZMQ_SNDMORE)'
-  endif
+  allocate(pt2_serialized (pt2_type_size(N_states)) )
+  call pt2_serialize(pt2_data,N_states,pt2_serialized)
 
-  rc = f77_zmq_send( zmq_socket_push, variance, 8*N_states, ZMQ_SNDMORE)
-  if(rc /= 8*N_states) then
-    print *,  'f77_zmq_send( zmq_socket_push, variance, 8*N_states, ZMQ_SNDMORE)'
-  endif
-
-  rc = f77_zmq_send( zmq_socket_push, norm, 8*N_states, ZMQ_SNDMORE)
-  if(rc /= 8*N_states) then
-    print *,  'f77_zmq_send( zmq_socket_push, norm, 8*N_states, ZMQ_SNDMORE)'
+  rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 3
+    return
+  else if(rc /= size(pt2_serialized)*8) then
+    stop 'push'
   endif
+  deallocate(pt2_serialized)
 
   if (b%cur > 0) then
 
@@ -182,14 +172,14 @@ subroutine push_selection_results(zmq_socket_push, pt2, variance, norm, b, task_
 
   endif
 
-  rc = f77_zmq_send( zmq_socket_push, ntask, 4, ZMQ_SNDMORE)
+  rc = f77_zmq_send( zmq_socket_push, ntasks, 4, ZMQ_SNDMORE)
   if(rc /= 4) then
-    print *,  'f77_zmq_send( zmq_socket_push, ntask, 4, ZMQ_SNDMORE)'
+    print *,  'f77_zmq_send( zmq_socket_push, ntasks, 4, ZMQ_SNDMORE)'
   endif
 
-  rc = f77_zmq_send( zmq_socket_push, task_id(1), ntask*4, 0)
-  if(rc /= 4*ntask) then
-    print *,  'f77_zmq_send( zmq_socket_push, task_id(1), ntask*4, 0)'
+  rc = f77_zmq_send( zmq_socket_push, task_id(1), ntasks*4, 0)
+  if(rc /= 4*ntasks) then
+    print *,  'f77_zmq_send( zmq_socket_push, task_id(1), ntasks*4, 0)'
   endif
 
 ! Activate is zmq_socket_push is a REQ
@@ -206,42 +196,34 @@ IRP_ENDIF
 end subroutine
 
 
-subroutine pull_selection_results(zmq_socket_pull, pt2, variance, norm, val, det, N, task_id, ntask)
+subroutine pull_selection_results(zmq_socket_pull, pt2_data, val, det, N, task_id, ntasks)
   use f77_zmq
   use selection_types
   implicit none
   integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
-  double precision, intent(inout) :: pt2(N_states)
-  double precision, intent(inout) :: variance(N_states)
-  double precision, intent(inout) :: norm(N_states)
+  type(pt2_type), intent(inout) :: pt2_data
   double precision, intent(out) :: val(*)
   integer(bit_kind), intent(out) :: det(N_int, 2, *)
-  integer, intent(out) :: N, ntask, task_id(*)
+  integer, intent(out) :: N, ntasks, task_id(*)
   integer :: rc, rn, i
+  double precision, allocatable :: pt2_serialized(:)
 
   rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0)
   if(rc /= 4) then
     print *,  'f77_zmq_recv( zmq_socket_pull, N, 4, 0)'
   endif
 
-  pt2(:) = 0.d0
-  variance(:) = 0.d0
-  norm(:) = 0.d0
-
-  rc = f77_zmq_recv( zmq_socket_pull, pt2, N_states*8, 0)
-  if(rc /= 8*N_states) then
-    print *,  'f77_zmq_recv( zmq_socket_pull, pt2, N_states*8, 0)'
+  allocate(pt2_serialized (pt2_type_size(N_states)) )
+  rc = f77_zmq_recv( zmq_socket_pull, pt2_serialized, 8*size(pt2_serialized), 0)
+  if (rc == -1) then
+    ntasks = 1
+    task_id(1) = 0
+  else if(rc /= 8*size(pt2_serialized)) then
+    stop 'pull'
   endif
 
-  rc = f77_zmq_recv( zmq_socket_pull, variance, N_states*8, 0)
-  if(rc /= 8*N_states) then
-    print *,  'f77_zmq_recv( zmq_socket_pull, variance, N_states*8, 0)'
-  endif
-
-  rc = f77_zmq_recv( zmq_socket_pull, norm, N_states*8, 0)
-  if(rc /= 8*N_states) then
-    print *,  'f77_zmq_recv( zmq_socket_pull, norm, N_states*8, 0)'
-  endif
+  call pt2_deserialize(pt2_data,N_states,pt2_serialized)
+  deallocate(pt2_serialized)
 
   if (N>0) then
       rc = f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0)
@@ -255,14 +237,14 @@ subroutine pull_selection_results(zmq_socket_pull, pt2, variance, norm, val, det
       endif
   endif
 
-  rc = f77_zmq_recv( zmq_socket_pull, ntask, 4, 0)
+  rc = f77_zmq_recv( zmq_socket_pull, ntasks, 4, 0)
   if(rc /= 4) then
-    print *,  'f77_zmq_recv( zmq_socket_pull, ntask, 4, 0)'
+    print *,  'f77_zmq_recv( zmq_socket_pull, ntasks, 4, 0)'
   endif
 
-  rc = f77_zmq_recv( zmq_socket_pull, task_id(1), ntask*4, 0)
-  if(rc /= 4*ntask) then
-    print *,  'f77_zmq_recv( zmq_socket_pull, task_id(1), ntask*4, 0)'
+  rc = f77_zmq_recv( zmq_socket_pull, task_id(1), ntasks*4, 0)
+  if(rc /= 4*ntasks) then
+    print *,  'f77_zmq_recv( zmq_socket_pull, task_id(1), ntasks*4, 0)'
   endif
 
 ! Activate is zmq_socket_pull is a REP
diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f
index 4513e9cf..f34be9cf 100644
--- a/src/cipsi/selection.irp.f
+++ b/src/cipsi/selection.irp.f
@@ -19,22 +19,26 @@ BEGIN_PROVIDER [ double precision, variance_match_weight, (N_states) ]
  variance_match_weight(:) = 1.d0
 END_PROVIDER
 
-subroutine update_pt2_and_variance_weights(pt2, variance, norm2, N_st)
+subroutine update_pt2_and_variance_weights(pt2_data, N_st)
   implicit none
+  use selection_types
   BEGIN_DOC
 ! Updates the PT2- and Variance- matching weights.
   END_DOC
   integer, intent(in)          :: N_st
-  double precision, intent(in) :: pt2(N_st)
-  double precision, intent(in) :: variance(N_st)
-  double precision, intent(in) :: norm2(N_st)
+  type(pt2_type), intent(in)   :: pt2_data
+  double precision             :: pt2(N_st)
+  double precision             :: variance(N_st)
 
-  double precision :: avg, rpt2(N_st), element, dt, x
+  double precision :: avg, element, dt, x
   integer          :: k
   integer, save    :: i_iter=0
   integer, parameter :: i_itermax = 1
   double precision, allocatable, save :: memo_variance(:,:), memo_pt2(:,:)
 
+  pt2(:)      = pt2_data % pt2(:)
+  variance(:) = pt2_data % variance(:)
+
   if (i_iter == 0) then
     allocate(memo_variance(N_st,i_itermax), memo_pt2(N_st,i_itermax))
     memo_pt2(:,:) = 1.d0
@@ -48,11 +52,6 @@ subroutine update_pt2_and_variance_weights(pt2, variance, norm2, N_st)
 
   dt = 2.0d0
 
-  do k=1,N_st
-    ! rPT2
-    rpt2(k) = pt2(k)/(1.d0 + norm2(k))
-  enddo
-
   avg = sum(pt2(1:N_st)) / dble(N_st) - 1.d-32 ! Avoid future division by zero
   do k=1,N_st
     element = exp(dt*(pt2(k)/avg -1.d0))
@@ -179,16 +178,14 @@ subroutine get_mask_phase(det1, pm, Nint)
 end subroutine
 
 
-subroutine select_connected(i_generator,E0,pt2,variance,norm2,b,subset,csubset)
+subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset)
   !todo: simplify for kpts
   use bitmasks
   use selection_types
   implicit none
   integer, intent(in)            :: i_generator, subset, csubset
   type(selection_buffer), intent(inout) :: b
-  double precision, intent(inout)  :: pt2(N_states)
-  double precision, intent(inout)  :: variance(N_states)
-  double precision, intent(inout)  :: norm2(N_states)
+  type(pt2_type), intent(inout)   :: pt2_data
   integer :: k,l
   double precision, intent(in)   :: E0(N_states)
 
@@ -209,7 +206,7 @@ subroutine select_connected(i_generator,E0,pt2,variance,norm2,b,subset,csubset)
       particle_mask(k,1) = iand(generators_bitmask(k,1,s_part), not(psi_det_generators(k,1,i_generator)) )
       particle_mask(k,2) = iand(generators_bitmask(k,2,s_part), not(psi_det_generators(k,2,i_generator)) )
   enddo
-  call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,variance,norm2,b,subset,csubset)
+  call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,b,subset,csubset)
   deallocate(fock_diag_tmp)
 end subroutine
 
@@ -258,7 +255,7 @@ double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2, Nint)
 end
 
 
-subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,variance,norm2,buf,subset,csubset)
+subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,buf,subset,csubset)
   use bitmasks
   use selection_types
   implicit none
@@ -270,9 +267,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
   integer(bit_kind), intent(in)  :: hole_mask(N_int,2), particle_mask(N_int,2)
   double precision, intent(in)   :: fock_diag_tmp(mo_num)
   double precision, intent(in)   :: E0(N_states)
-  double precision, intent(inout) :: pt2(N_states)
-  double precision, intent(inout)  :: variance(N_states)
-  double precision, intent(inout)  :: norm2(N_states)
+  type(pt2_type), intent(inout)   :: pt2_data
   type(selection_buffer), intent(inout) :: buf
 
   integer                         :: h1,h2,s1,s2,s3,i1,i2,ib,sp,k,i,j,nt,ii,sze
@@ -746,7 +741,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
               call splash_pq_complex(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat_complex, interesting)
               
               if(.not.pert_2rdm)then
-               call fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat_complex, buf)
+               !call fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat_complex, buf)
+               call fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat_complex, buf)
               else
                 print*,irp_here,' not implemented for complex (fill_buffer_double_rdm_complex)'
                 stop -1
@@ -756,9 +752,9 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
             call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting)
 
             if(.not.pert_2rdm)then
-             call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat, buf)
+             call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf)
             else
-             call fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat, buf,fullminilist, coef_fullminilist_rev, fullinteresting(0))
+             call fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf,fullminilist, coef_fullminilist_rev, fullinteresting(0))
             endif
             endif!complex
           end if
@@ -787,7 +783,7 @@ end subroutine
 
 
 
-subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat, buf)
+subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf)
   use bitmasks
   use selection_types
   implicit none
@@ -795,16 +791,15 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
   integer, intent(in) :: i_generator, sp, h1, h2
   double precision, intent(in) :: mat(N_states, mo_num, mo_num)
   logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num)
-  double precision, intent(in)           :: fock_diag_tmp(mo_num)
+  double precision, intent(in)    :: fock_diag_tmp(mo_num)
   double precision, intent(in)    :: E0(N_states)
-  double precision, intent(inout) :: pt2(N_states)
-  double precision, intent(inout) :: variance(N_states)
-  double precision, intent(inout) :: norm2(N_states)
+  type(pt2_type), intent(inout)   :: pt2_data
   type(selection_buffer), intent(inout) :: buf
   logical :: ok
-  integer :: s1, s2, p1, p2, ib, j, istate
+  integer :: s1, s2, p1, p2, ib, j, istate, jstate
   integer(bit_kind) :: mask(N_int, 2), det(N_int, 2)
-  double precision :: e_pert, delta_E, val, Hii, w, tmp, alpha_h_psi, coef
+  double precision :: e_pert(N_states), coef(N_states), X(N_states)
+  double precision :: delta_E, val, Hii, w, tmp, alpha_h_psi
   double precision, external :: diag_H_mat_elem_fock
   double precision :: E_shift
 
@@ -812,7 +807,12 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
   double precision, allocatable :: values(:)
   integer, allocatable          :: keys(:,:)
   integer                       :: nkeys
-
+  double precision :: s_weight(N_states,N_states)
+  do jstate=1,N_states
+    do istate=1,N_states
+      s_weight(istate,jstate) = dsqrt(selection_weight(istate)*selection_weight(jstate))
+    enddo
+  enddo
 
   if(sp == 3) then
     s1 = 1
@@ -902,18 +902,42 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
         if (delta_E < 0.d0) then
             tmp = -tmp
         endif
-        e_pert = 0.5d0 * (tmp - delta_E)
+        e_pert(istate) = 0.5d0 * (tmp - delta_E)
         if (dabs(alpha_h_psi) > 1.d-4) then
-          coef = e_pert / alpha_h_psi
+          coef(istate) = e_pert(istate) / alpha_h_psi
         else
-          coef = alpha_h_psi / delta_E
+          coef(istate) = alpha_h_psi / delta_E
         endif
-        pt2(istate) = pt2(istate) + e_pert
-        variance(istate) = variance(istate) + alpha_h_psi * alpha_h_psi
-        norm2(istate) = norm2(istate) + coef * coef
+        if (e_pert(istate) < 0.d0) then
+           X(istate) = -dsqrt(-e_pert(istate))
+        else
+           X(istate) = dsqrt(e_pert(istate))
+        endif
+      enddo
+
+!      ! Gram-Schmidt using input overlap matrix
+!      do istate=1,N_states
+!        do jstate=1,istate-1
+!          if ( (pt2_overlap(jstate,istate) == 0.d0).or.(pt2_overlap(jstate,jstate) == 0.d0) ) cycle
+!          coef(istate) = coef(istate) - pt2_overlap(jstate,istate)/pt2_overlap(jstate,jstate) * coef(jstate)
+!        enddo
+!      enddo
+
+      do istate=1, N_states
+        do jstate=1,N_states
+          pt2_data % overlap(jstate,istate) += coef(jstate) * coef(istate)
+        enddo
+      enddo
+
+      do istate=1,N_states
+        alpha_h_psi = mat(istate, p1, p2)
+
+        pt2_data % variance(istate) += alpha_h_psi * alpha_h_psi
+        pt2_data % pt2(istate)      += e_pert(istate)
 
 !!!DEBUG
-!        pt2(istate) = pt2(istate) - e_pert + alpha_h_psi**2/delta_E
+!        delta_E = E0(istate) - Hii + E_shift
+!        pt2_data % pt2(istate) = pt2_data % pt2(istate) + alpha_h_psi**2/delta_E
 !
 !        integer :: k
 !        double precision :: alpha_h_psi_2,hij
@@ -934,14 +958,26 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
 
           case(5)
             ! Variance selection
-            w = w - alpha_h_psi * alpha_h_psi * selection_weight(istate)
+            w = w - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)
+            do jstate=1,N_states
+              if (istate == jstate) cycle
+              w = w + alpha_h_psi*mat(jstate,p1,p2) * s_weight(istate,jstate)
+            enddo
 
           case(6)
-            w = w - coef * coef * selection_weight(istate)
+            w = w - coef(istate) * coef(istate) * s_weight(istate,istate)
+            do jstate=1,N_states
+              if (istate == jstate) cycle
+              w = w + coef(istate)*coef(jstate) * s_weight(istate,jstate)
+            enddo
 
           case default
             ! Energy selection
-            w = w + e_pert * selection_weight(istate)
+            w = w + e_pert(istate) * s_weight(istate,istate)
+            do jstate=1,N_states
+              if (istate == jstate) cycle
+              w = w - dabs(X(istate))*X(jstate) * s_weight(istate,jstate)
+            enddo
 
         end select
       end do
@@ -2049,7 +2085,7 @@ end
 !                                                                              !
 !==============================================================================!
 
-subroutine fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat, buf)
+subroutine fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf)
   !todo: should be okay for complex
   use bitmasks
   use selection_types
@@ -2058,17 +2094,16 @@ subroutine fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned
   integer, intent(in) :: i_generator, sp, h1, h2
   complex*16, intent(in) :: mat(N_states, mo_num, mo_num)
   logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num)
-  double precision, intent(in)           :: fock_diag_tmp(mo_num)
+  double precision, intent(in)    :: fock_diag_tmp(mo_num)
   double precision, intent(in)    :: E0(N_states)
-  double precision, intent(inout) :: pt2(N_states)
-  double precision, intent(inout) :: variance(N_states)
-  double precision, intent(inout) :: norm2(N_states)
+  type(pt2_type), intent(inout)   :: pt2_date
   type(selection_buffer), intent(inout) :: buf
   logical :: ok
-  integer :: s1, s2, p1, p2, ib, j, istate
+  integer :: s1, s2, p1, p2, ib, j, istate, jstate
   integer(bit_kind) :: mask(N_int, 2), det(N_int, 2)
-  double precision :: e_pert, delta_E, val, Hii, w, tmp
-  complex*16 ::  alpha_h_psi, coef, val_c
+  double precision :: e_pert(n_states), x(n_states)
+  double precision :: delta_E, val, Hii, w, tmp
+  complex*16 ::  alpha_h_psi, coef(n_states), val_c
   double precision, external :: diag_H_mat_elem_fock
   double precision :: E_shift
 
@@ -2076,7 +2111,12 @@ subroutine fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned
 !  double precision, allocatable :: values(:)
 !  integer, allocatable          :: keys(:,:)
 !  integer                       :: nkeys
-  
+  double precision :: s_weight(n_states,n_states)
+  do jstate=1,n_states
+    do istate=1,n_states
+      s_weight(istate,jstate) = dsqrt(selection_weight(istate)*selection_weight(jstate))
+    enddo
+  enddo
 
   if(sp == 3) then
     s1 = 1
@@ -2166,15 +2206,32 @@ subroutine fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned
         if (delta_E < 0.d0) then
             tmp = -tmp
         endif
-        e_pert = 0.5d0 * (tmp - delta_E)
+        e_pert(istate) = 0.5d0 * (tmp - delta_E)
+        !TODO: check conjugate for coef
         if (cdabs(alpha_h_psi) > 1.d-4) then
-          coef = e_pert / alpha_h_psi
+          coef(istate) = e_pert / alpha_h_psi
         else
-          coef = alpha_h_psi / delta_E
+          coef(istate) = alpha_h_psi / delta_E
         endif
-        pt2(istate) = pt2(istate) + e_pert
-        variance(istate) = variance(istate) + cdabs(alpha_h_psi * alpha_h_psi)
-        norm2(istate) = norm2(istate) + cdabs(coef * coef)
+        if (e_pert(istate) < 0.d0) then
+          x(istate) = -dsqrt(-e_pert(istate))
+        else
+          x(istate) = dsqrt(e_pert(istate))
+        endif
+      enddo
+
+      do istate=1,n_states
+        do jstate=1,n_states
+          val_c = coef(jstate) * dconjg(coef(istate)) 
+          pt2_data % overlap(jstate,istate) += dble(val_c) 
+          pt2_data % overlap_imag(jstate,istate) += dimag(val_c) 
+        enddo
+      enddo
+
+      do istate=1,n_states
+        alpha_h_psi = mat(istate, p1, p2)
+        pt2_data % variance(istate) += cdabs(alpha_h_psi * alpha_h_psi)
+        pt2_data % pt2(istate) += e_pert(istate)
 
 !!!DEBUG
 !        integer :: k
@@ -2194,16 +2251,30 @@ subroutine fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned
 
         select case (weight_selection)
 
+          !TODO: check off-diagonals
           case(5)
             ! Variance selection
-            w = w - cdabs(alpha_h_psi * alpha_h_psi) * selection_weight(istate)
+            w = w - cdabs(alpha_h_psi * alpha_h_psi) * s_weight(istate,istate)
+            do jstate=1,n_states
+              if (istate == jstate) cycle
+              w = w + cdabs(alpha_h_psi * mat(jstate,p1,p2)) * s_weight(istate,jstate)
+            enddo
 
           case(6)
-            w = w - cdabs(coef * coef) * selection_weight(istate)
+            w = w - cdabs(coef(istate) * coef(istate)) * s_weight(istate,istate)
+            do jstate=1,n_states
+              if (istate == jstate) cycle
+              w = w + cdabs(coef(istate)*coef(jstate)) * s_weight(istate,jstate)
+            enddo
 
           case default
             ! Energy selection
-            w = w + e_pert * selection_weight(istate)
+            w = w + e_pert(istate) * s_weight(istate,istate)
+            do jstate=1,n_states
+              if (istate == jstate) cycle
+              !TODO: why dabs?
+              w = w - dabs(x(istate))*x(jstate) * s_weight(istate,jstate)
+            enddo
 
         end select
       end do
diff --git a/src/cipsi/selection_types.f90 b/src/cipsi/selection_types.f90
index 29e48524..53250b57 100644
--- a/src/cipsi/selection_types.f90
+++ b/src/cipsi/selection_types.f90
@@ -5,5 +5,26 @@ module selection_types
     double precision, pointer :: val(:)
     double precision          :: mini
   endtype
+
+  type pt2_type
+    double precision, allocatable :: pt2(:)
+    double precision, allocatable :: rpt2(:)
+    double precision, allocatable :: variance(:)
+    double precision, allocatable :: overlap(:,:)
+  endtype
+
+  contains
+
+  integer function pt2_type_size(N)
+    implicit none
+    integer, intent(in) :: N
+    if (is_complex) then
+      pt2_type_size = (3*n + 2*n*n)
+    else
+      pt2_type_size = (3*n + n*n)
+    endif
+
+  end function
+
 end module
 
diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f
index f9d0ea42..ea3dce9d 100644
--- a/src/cipsi/stochastic_cipsi.irp.f
+++ b/src/cipsi/stochastic_cipsi.irp.f
@@ -1,12 +1,15 @@
 subroutine run_stochastic_cipsi
+  use selection_types
   implicit none
   BEGIN_DOC
 ! Selected Full Configuration Interaction with Stochastic selection and PT2.
   END_DOC
   integer                        :: i,j,k
-  double precision, allocatable  :: pt2(:), variance(:), norm2(:), rpt2(:), zeros(:)
+  double precision, allocatable  :: zeros(:)
   integer                        :: to_select
-  logical, external :: qp_stop
+  type(pt2_type)                 :: pt2_data, pt2_data_err
+  logical, external              :: qp_stop
+
 
   double precision :: rss
   double precision, external :: memory_of_double
@@ -19,7 +22,9 @@ subroutine run_stochastic_cipsi
   rss = memory_of_double(N_states)*4.d0
   call check_mem(rss,irp_here)
 
-  allocate (pt2(N_states), zeros(N_states), rpt2(N_states), norm2(N_states), variance(N_states))
+  allocate (zeros(N_states))
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
 
   double precision               :: hf_energy_ref
   logical                        :: has
@@ -28,10 +33,13 @@ subroutine run_stochastic_cipsi
   relative_error=PT2_relative_error
 
   zeros = 0.d0
-  pt2 = -huge(1.e0)
-  rpt2 = -huge(1.e0)
-  norm2 = 0.d0
-  variance = huge(1.e0)
+  pt2_data % pt2   = -huge(1.e0)
+  pt2_data % rpt2  = -huge(1.e0)
+  pt2_data % overlap= 0.d0
+  pt2_data % variance = huge(1.e0)
+  if (is_complex) then
+    pt2_data % overlap_imag = 0.d0
+  endif
 
   if (s2_eig) then
     call make_s2_eigenfunction
@@ -73,14 +81,13 @@ subroutine run_stochastic_cipsi
   endif
 
   double precision :: correlation_energy_ratio
-  double precision :: error(N_states)
 
   correlation_energy_ratio = 0.d0
 
   do while (                                                         &
         (N_det < N_det_max) .and.                                    &
-        (maxval(abs(rpt2(1:N_states))) > pt2_max) .and.               &
-        (maxval(abs(variance(1:N_states))) > variance_max) .and.     &
+        (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and.               &
+        (maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and.     &
         (correlation_energy_ratio <= correlation_energy_ratio_max)   &
         )
       write(*,'(A)')  '--------------------------------------------------------------------------------'
@@ -89,26 +96,24 @@ subroutine run_stochastic_cipsi
     to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor)
     to_select = max(N_states_diag, to_select)
 
-    pt2 = 0.d0
-    variance = 0.d0
-    norm2 = 0.d0
-    call ZMQ_pt2(psi_energy_with_nucl_rep,pt2,relative_error,error, variance, &
-      norm2, to_select) ! Stochastic PT2 and selection
 
-    do k=1,N_states
-      rpt2(k) = pt2(k)/(1.d0 + norm2(k))
-    enddo
+    call pt2_dealloc(pt2_data)
+    call pt2_dealloc(pt2_data_err)
+    call pt2_alloc(pt2_data, N_states)
+    call pt2_alloc(pt2_data_err, N_states)
+    call ZMQ_pt2(psi_energy_with_nucl_rep,pt2_data,pt2_data_err,relative_error,to_select) ! Stochastic PT2 and selection
 
     correlation_energy_ratio = (psi_energy_with_nucl_rep(1) - hf_energy_ref)  /     &
-                    (psi_energy_with_nucl_rep(1) + rpt2(1) - hf_energy_ref)
+                    (psi_energy_with_nucl_rep(1) + pt2_data % rpt2(1) - hf_energy_ref)
     correlation_energy_ratio = min(1.d0,correlation_energy_ratio)
 
     call write_double(6,correlation_energy_ratio, 'Correlation ratio')
-    call print_summary(psi_energy_with_nucl_rep,pt2,error,variance,norm2,N_det,N_occ_pattern,N_states,psi_s2)
+    call print_summary(psi_energy_with_nucl_rep, &
+       pt2_data, pt2_data_err, N_det,N_occ_pattern,N_states,psi_s2)
 
-    call save_energy(psi_energy_with_nucl_rep, rpt2)
+    call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2)
 
-    call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det)
+    call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det)
     call print_extrapolated_energy()
     N_iter += 1
 
@@ -147,20 +152,19 @@ subroutine run_stochastic_cipsi
         call save_energy(psi_energy_with_nucl_rep, zeros)
     endif
 
-    pt2(:) = 0.d0
-    variance(:) = 0.d0
-    norm2(:) = 0.d0
-    call ZMQ_pt2(psi_energy_with_nucl_rep, pt2,relative_error,error,variance, &
-      norm2,0) ! Stochastic PT2
+    call pt2_dealloc(pt2_data)
+    call pt2_dealloc(pt2_data_err)
+    call pt2_alloc(pt2_data, N_states)
+    call pt2_alloc(pt2_data_err, N_states)
+    call ZMQ_pt2(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, relative_error, 0) ! Stochastic PT2
 
-    do k=1,N_states
-      rpt2(k) = pt2(k)/(1.d0 + norm2(k))
-    enddo
-
-    call save_energy(psi_energy_with_nucl_rep, rpt2)
-    call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm2,N_det,N_occ_pattern,N_states,psi_s2)
-    call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det)
+    call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2)
+    call print_summary(psi_energy_with_nucl_rep, &
+       pt2_data , pt2_data_err, N_det, N_occ_pattern, N_states, psi_s2)
+    call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det)
     call print_extrapolated_energy()
   endif
+  call pt2_dealloc(pt2_data)
+  call pt2_dealloc(pt2_data_err)
 
 end
diff --git a/src/cipsi/zmq_selection.irp.f b/src/cipsi/zmq_selection.irp.f
index 4456b53e..5ac1d6fb 100644
--- a/src/cipsi/zmq_selection.irp.f
+++ b/src/cipsi/zmq_selection.irp.f
@@ -1,4 +1,4 @@
-subroutine ZMQ_selection(N_in, pt2, variance, norm2)
+subroutine ZMQ_selection(N_in, pt2_data)
   use f77_zmq
   use selection_types
 
@@ -7,11 +7,9 @@ subroutine ZMQ_selection(N_in, pt2, variance, norm2)
   integer(ZMQ_PTR)               :: zmq_to_qp_run_socket , zmq_socket_pull
   integer, intent(in)            :: N_in
   type(selection_buffer)         :: b
-  integer                        :: i, N
+  integer                        :: i, l, N
   integer, external              :: omp_get_thread_num
-  double precision, intent(out)  :: pt2(N_states)
-  double precision, intent(out)  :: variance(N_states)
-  double precision, intent(out)  :: norm2(N_states)
+  type(pt2_type), intent(inout)  :: pt2_data
 
 !  PROVIDE psi_det psi_coef N_det qp_max_mem N_states pt2_F s2_eig N_det_generators
 
@@ -107,6 +105,12 @@ subroutine ZMQ_selection(N_in, pt2, variance, norm2)
 
   f(:) = 1.d0
   if (.not.do_pt2) then
+<<<<<<< HEAD
+    double precision :: f(N_states), u_dot_u
+    do k=1,min(N_det,N_states)
+     f(k) = 1.d0 / u_dot_u(psi_selectors_coef(1,k), N_det_selectors)
+    enddo
+=======
   double precision :: f(N_states), u_dot_u
     if (is_complex) then
       double precision :: u_dot_u_complex
@@ -118,22 +122,19 @@ subroutine ZMQ_selection(N_in, pt2, variance, norm2)
        f(k) = 1.d0 / u_dot_u(psi_selectors_coef(1,k), N_det_selectors)
       enddo
     endif
+>>>>>>> origin/cleaning_kpts
   endif
 
-  !$OMP PARALLEL DEFAULT(shared)  SHARED(b, pt2, variance, norm2)  PRIVATE(i) NUM_THREADS(nproc_target+1)
+  !$OMP PARALLEL DEFAULT(shared)  SHARED(b, pt2_data)  PRIVATE(i) NUM_THREADS(nproc_target+1)
   i = omp_get_thread_num()
   if (i==0) then
-    call selection_collector(zmq_socket_pull, b, N, pt2, variance, norm2)
+    call selection_collector(zmq_socket_pull, b, N, pt2_data)
   else
     call selection_slave_inproc(i)
   endif
   !$OMP END PARALLEL
+
   call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'selection')
-  do i=N_det+1,N_states
-    pt2(i) = 0.d0
-    variance(i) = 0.d0
-    norm2(i) = 0.d0
-  enddo
   if (N_in > 0) then
     if (s2_eig) then
       call make_selection_buffer_s2(b)
@@ -141,13 +142,28 @@ subroutine ZMQ_selection(N_in, pt2, variance, norm2)
     call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0)
   endif
   call delete_selection_buffer(b)
+
   do k=1,N_states
-    pt2(k) = pt2(k) * f(k)
-    variance(k) = variance(k) * f(k)
-    norm2(k) = norm2(k) * f(k)
+    pt2_data % pt2(k) = pt2_data % pt2(k) * f(k)
+    pt2_data % variance(k) = pt2_data % variance(k) * f(k)
+    do l=1,N_states
+      pt2_data % overlap(k,l) = pt2_data % overlap(k,l) * dsqrt(f(k)*f(l))
+      pt2_data % overlap(l,k) = pt2_data % overlap(l,k) * dsqrt(f(k)*f(l))
+    enddo
+
+    pt2_data % rpt2(k) =  &
+       pt2_data % pt2(k)/(1.d0 + pt2_data % overlap(k,k))
   enddo
 
-  call update_pt2_and_variance_weights(pt2, variance, norm2, N_states)
+  pt2_overlap(:,:) = pt2_data % overlap(:,:)
+
+  print *, 'Overlap of perturbed states:'
+  do l=1,N_states
+    print *, pt2_overlap(l,:)
+  enddo
+  print *, '-------'
+  SOFT_TOUCH pt2_overlap
+  call update_pt2_and_variance_weights(pt2_data, N_states)
 
 end subroutine
 
@@ -159,7 +175,7 @@ subroutine selection_slave_inproc(i)
   call run_selection_slave(1,i,pt2_e0_denominator)
 end
 
-subroutine selection_collector(zmq_socket_pull, b, N, pt2, variance, norm2)
+subroutine selection_collector(zmq_socket_pull, b, N, pt2_data)
   use f77_zmq
   use selection_types
   use bitmasks
@@ -169,12 +185,12 @@ subroutine selection_collector(zmq_socket_pull, b, N, pt2, variance, norm2)
   integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
   type(selection_buffer), intent(inout) :: b
   integer, intent(in)            :: N
-  double precision, intent(out)      :: pt2(N_states)
-  double precision, intent(out)      :: variance(N_states)
-  double precision, intent(out)      :: norm2(N_states)
-  double precision                   :: pt2_mwen(N_states)
-  double precision                   :: variance_mwen(N_states)
-  double precision                   :: norm2_mwen(N_states)
+  type(pt2_type), intent(inout)  :: pt2_data
+  type(pt2_type)                 :: pt2_data_tmp
+
+  double precision               :: pt2_mwen(N_states)
+  double precision               :: variance_mwen(N_states)
+  double precision               :: norm2_mwen(N_states)
   integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
   integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
 
@@ -192,24 +208,24 @@ subroutine selection_collector(zmq_socket_pull, b, N, pt2, variance, norm2)
 
   zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
   call create_selection_buffer(N, N*2, b2)
+  integer :: k
   double precision :: rss
   double precision, external :: memory_of_int
   rss = memory_of_int(N_det_generators)
   call check_mem(rss,irp_here)
   allocate(task_id(N_det_generators))
   more = 1
-  pt2(:)           = 0d0
-  variance(:)      = 0.d0
-  norm2(:)          = 0.d0
-  pt2_mwen(:)      = 0.d0
-  variance_mwen(:) = 0.d0
-  norm2_mwen(:)     = 0.d0
+  pt2_data % pt2(:)      = 0d0
+  pt2_data % variance(:) = 0.d0
+  pt2_data % overlap(:,:) = 0.d0
+  if (is_complex) then
+    pt2_data % overlap_imag(:,:) = 0.d0
+  endif
+  call pt2_alloc(pt2_data_tmp,N_states)
   do while (more == 1)
-    call pull_selection_results(zmq_socket_pull, pt2_mwen, variance_mwen, norm2_mwen, b2%val(1), b2%det(1,1,1), b2%cur, task_id, ntask)
+    call pull_selection_results(zmq_socket_pull, pt2_data_tmp, b2%val(1), b2%det(1,1,1), b2%cur, task_id, ntask)
 
-    pt2(:) += pt2_mwen(:)
-    variance(:) += variance_mwen(:)
-    norm2(:) += norm2_mwen(:)
+    call pt2_add(pt2_data, 1.d0, pt2_data_tmp)
     do i=1, b2%cur
       call add_to_selection_buffer(b, b2%det(1,1,i), b2%val(i))
       if (b2%val(i) > b%mini) exit
@@ -225,6 +241,7 @@ subroutine selection_collector(zmq_socket_pull, b, N, pt2, variance, norm2)
       endif
     end do
   end do
+  call pt2_dealloc(pt2_data_tmp)
 
 
   call delete_selection_buffer(b2)
diff --git a/src/determinants/EZFIO.cfg b/src/determinants/EZFIO.cfg
index 489bf6da..b03ab374 100644
--- a/src/determinants/EZFIO.cfg
+++ b/src/determinants/EZFIO.cfg
@@ -44,7 +44,7 @@ default: 2
 type: integer
 doc: Weight used in the selection. 0: input state-average weight, 1: 1./(c_0^2), 2: rPT2 matching, 3: variance matching, 4: variance and rPT2 matching, 5: variance minimization and matching, 6: CI coefficients 7: input state-average multiplied by variance and rPT2 matching 8: input state-average multiplied by rPT2 matching 9: input state-average multiplied by variance matching
 interface: ezfio,provider,ocaml
-default: 2
+default: 1
 
 [threshold_generators]
 type: Threshold
diff --git a/src/fci/pt2.irp.f b/src/fci/pt2.irp.f
index 83994781..eb11e28c 100644
--- a/src/fci/pt2.irp.f
+++ b/src/fci/pt2.irp.f
@@ -28,35 +28,38 @@ end
 
 subroutine run
   implicit none
+  use selection_types
   integer                        :: i,j,k
   logical, external              :: detEq
 
-  double precision               :: pt2(N_states)
+  type(pt2_type)                 :: pt2_data, pt2_data_err
   integer                        :: degree
   integer                        :: n_det_before, to_select
   double precision               :: threshold_davidson_in
 
-  double precision               :: E_CI_before(N_states), relative_error, error(N_states), variance(N_states), norm2(N_states), rpt2(N_states)
+  double precision               :: relative_error
+  double precision, allocatable  :: E_CI_before(:)
 
-  pt2(:) = 0.d0
+  allocate ( E_CI_before(N_states))
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
 
   E_CI_before(:) = psi_energy(:) + nuclear_repulsion
   relative_error=PT2_relative_error
 
   if (do_pt2) then
-    call ZMQ_pt2(psi_energy_with_nucl_rep,pt2,relative_error,error, variance, &
-      norm2,0) ! Stochastic PT2
+    call ZMQ_pt2(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, relative_error, 0) ! Stochastic PT2
   else
-    call ZMQ_selection(0, pt2, variance, norm2)
+    call ZMQ_selection(0, pt2_data)
   endif
 
-  do k=1,N_states
-    rpt2(k) = pt2(k)/(1.d0 + norm2(k))
-  enddo
+  call print_summary(psi_energy_with_nucl_rep(1:N_states), &
+    pt2_data, pt2_data_err, N_det,N_occ_pattern,N_states,psi_s2)
 
-  call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm2,N_det,N_occ_pattern,N_states,psi_s2)
-
-  call save_energy(E_CI_before,pt2)
+  call save_energy(E_CI_before, pt2_data % pt2)
+  call pt2_dealloc(pt2_data)
+  call pt2_dealloc(pt2_data_err)
+  deallocate(E_CI_before)
 end
 
 
diff --git a/src/hartree_fock/10.hf.bats b/src/hartree_fock/10.hf.bats
index 6f13e595..54845685 100644
--- a/src/hartree_fock/10.hf.bats
+++ b/src/hartree_fock/10.hf.bats
@@ -11,6 +11,7 @@ function run() {
   qp edit --check
   qp reset --mos
   qp set scf_utils n_it_scf_max 50
+  qp set ao_one_e_ints lin_dep_cutoff 1.e-50
   qp run scf
 #  qp set_frozen_core 
   energy="$(ezfio get hartree_fock energy)"
diff --git a/src/iterations/print_summary.irp.f b/src/iterations/print_summary.irp.f
index 87bc28fa..0fabffc7 100644
--- a/src/iterations/print_summary.irp.f
+++ b/src/iterations/print_summary.irp.f
@@ -1,16 +1,17 @@
-subroutine print_summary(e_,pt2_,error_,variance_,norm_,n_det_,n_occ_pattern_,n_st,s2_)
+subroutine print_summary(e_,pt2_data,pt2_data_err,n_det_,n_occ_pattern_,n_st,s2_)
+  use selection_types
   implicit none
   BEGIN_DOC
 ! Print the extrapolated energy in the output
   END_DOC
 
   integer, intent(in)            :: n_det_, n_occ_pattern_, n_st
-  double precision, intent(in)   :: e_(n_st), pt2_(n_st), variance_(n_st), norm_(n_st), error_(n_st), s2_(n_st)
+  double precision, intent(in)   :: e_(n_st), s2_(n_st)
+  type(pt2_type)  , intent(in)   :: pt2_data, pt2_data_err
   integer                        :: i, k
   integer                        :: N_states_p
   character*(9)                  :: pt2_string
   character*(512)                :: fmt
-  double precision               :: f(n_st)
 
   if (do_pt2) then
     pt2_string = '        '
@@ -20,10 +21,6 @@ subroutine print_summary(e_,pt2_,error_,variance_,norm_,n_det_,n_occ_pattern_,n_
 
   N_states_p = min(N_det_,n_st)
 
-  do i=1,N_states_p
-    f(i) = 1.d0/(1.d0+norm_(i))
-  enddo
-
   print *, ''
   print '(A,I12)',  'Summary at N_det = ', N_det_
   print '(A)',      '-----------------------------------'
@@ -42,16 +39,16 @@ subroutine print_summary(e_,pt2_,error_,variance_,norm_,n_det_,n_occ_pattern_,n_
     write(*,fmt) '# Excit. (eV)', (e_(1:N_states_p)-e_(1))*27.211396641308d0
   endif
   write(fmt,*) '(A13,', 2*N_states_p, '(1X,F14.8))'
-  write(*,fmt) '# PT2 '//pt2_string, (pt2_(k), error_(k), k=1,N_states_p)
-  write(*,fmt) '# rPT2'//pt2_string, (pt2_(k)*f(k), error_(k)*f(k), k=1,N_states_p)
+  write(*,fmt) '# PT2 '//pt2_string, (pt2_data % pt2(k), pt2_data_err % pt2(k), k=1,N_states_p)
+  write(*,fmt) '# rPT2'//pt2_string, (pt2_data % rpt2(k), pt2_data_err % rpt2(k), k=1,N_states_p)
   write(*,'(A)') '#'
-  write(*,fmt) '# E+PT2      ', (e_(k)+pt2_(k),error_(k), k=1,N_states_p)
-  write(*,fmt) '# E+rPT2     ', (e_(k)+pt2_(k)*f(k),error_(k)*f(k), k=1,N_states_p)
+  write(*,fmt) '# E+PT2      ', (e_(k)+pt2_data % pt2(k),pt2_data_err % pt2(k), k=1,N_states_p)
+  write(*,fmt) '# E+rPT2     ', (e_(k)+pt2_data % rpt2(k),pt2_data_err % rpt2(k), k=1,N_states_p)
   if (N_states_p > 1) then
-    write(*,fmt) '# Excit. (au)', ( (e_(k)+pt2_(k)-e_(1)-pt2_(1)), &
-      dsqrt(error_(k)*error_(k)+error_(1)*error_(1)), k=1,N_states_p)
-    write(*,fmt) '# Excit. (eV)', ( (e_(k)+pt2_(k)-e_(1)-pt2_(1))*27.211396641308d0, &
-      dsqrt(error_(k)*error_(k)+error_(1)*error_(1))*27.211396641308d0, k=1,N_states_p)
+    write(*,fmt) '# Excit. (au)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1)), &
+      dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1)), k=1,N_states_p)
+    write(*,fmt) '# Excit. (eV)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1))*27.211396641308d0, &
+      dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1))*27.211396641308d0, k=1,N_states_p)
   endif
   write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))'
   write(*,fmt)
@@ -68,12 +65,12 @@ subroutine print_summary(e_,pt2_,error_,variance_,norm_,n_det_,n_occ_pattern_,n_
     print*,'* State ',k
     print *,  '< S^2 >         = ', s2_(k)
     print *,  'E               = ', e_(k)
-    print *,  'Variance        = ', variance_(k)
-    print *,  'PT norm         = ', dsqrt(norm_(k))
-    print *,  'PT2             = ', pt2_(k)
-    print *,  'rPT2            = ', pt2_(k)*f(k)
-    print *,  'E+PT2 '//pt2_string//'  = ', e_(k)+pt2_(k), ' +/- ', error_(k)
-    print *,  'E+rPT2'//pt2_string//'  = ', e_(k)+pt2_(k)*f(k), ' +/- ', error_(k)*f(k)
+    print *,  'Variance        = ', pt2_data % variance(k), ' +/- ', pt2_data_err % variance(k)
+    print *,  'PT norm         = ', dsqrt(pt2_data % overlap(k,k)), ' +/- ', 0.5d0*dsqrt(pt2_data % overlap(k,k)) * pt2_data_err % overlap(k,k) / (pt2_data % overlap(k,k))
+    print *,  'PT2             = ', pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k)
+    print *,  'rPT2            = ', pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k)
+    print *,  'E+PT2 '//pt2_string//' = ', e_(k)+pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k)
+    print *,  'E+rPT2'//pt2_string//' = ', e_(k)+pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k)
     print *,  ''
   enddo
 
@@ -87,14 +84,14 @@ subroutine print_summary(e_,pt2_,error_,variance_,norm_,n_det_,n_occ_pattern_,n_
     print *,  '-----'
     print*, 'Variational + perturbative Energy difference (au | eV)'
     do i=2, N_states_p
-      print*,'Delta E = ', (e_(i)+ pt2_(i) - (e_(1) + pt2_(1))), &
-        (e_(i)+ pt2_(i) - (e_(1) + pt2_(1))) * 27.211396641308d0
+      print*,'Delta E = ', (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))), &
+        (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))) * 27.211396641308d0
     enddo
     print *,  '-----'
     print*, 'Variational + renormalized perturbative Energy difference (au | eV)'
     do i=2, N_states_p
-      print*,'Delta E = ', (e_(i)+ pt2_(i)*f(i) - (e_(1) + pt2_(1)*f(1))), &
-        (e_(i)+ pt2_(i)*f(i) - (e_(1) + pt2_(1)*f(1))) * 27.211396641308d0
+      print*,'Delta E = ', (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))), &
+        (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))) * 27.211396641308d0
     enddo
   endif
 
diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f
index d1236ce7..dc44e262 100644
--- a/src/scf_utils/roothaan_hall_scf.irp.f
+++ b/src/scf_utils/roothaan_hall_scf.irp.f
@@ -186,10 +186,10 @@ END_DOC
 
   implicit none
 
-  double precision,intent(in)   :: Fock_matrix_DIIS(ao_num,ao_num,*),error_matrix_DIIS(ao_num,ao_num,*)
+  integer,intent(inout)         :: dim_DIIS
+  double precision,intent(in)   :: Fock_matrix_DIIS(ao_num,ao_num,dim_DIIS),error_matrix_DIIS(ao_num,ao_num,dim_DIIS)
   integer,intent(in)            :: iteration_SCF, size_Fock_matrix_AO
   double precision,intent(inout):: Fock_matrix_AO_(size_Fock_matrix_AO,ao_num)
-  integer,intent(inout)         :: dim_DIIS
 
   double precision,allocatable  :: B_matrix_DIIS(:,:),X_vector_DIIS(:)
   double precision,allocatable  :: C_vector_DIIS(:)
@@ -212,11 +212,12 @@ END_DOC
   )
 
 ! Compute the matrices B and X
+  B_matrix_DIIS(:,:) = 0.d0
   do j=1,dim_DIIS
+    j_DIIS = min(dim_DIIS,mod(iteration_SCF-j,max_dim_DIIS)+1)
     do i=1,dim_DIIS
 
-      j_DIIS = mod(iteration_SCF-j,max_dim_DIIS)+1
-      i_DIIS = mod(iteration_SCF-i,max_dim_DIIS)+1
+      i_DIIS = min(dim_DIIS,mod(iteration_SCF-i,max_dim_DIIS)+1)
 
 ! Compute product of two errors vectors
 
@@ -229,7 +230,6 @@ END_DOC
 
 ! Compute Trace
 
-      B_matrix_DIIS(i,j) = 0.d0
       do k=1,ao_num
         B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + scratch(k,k)
       enddo
@@ -238,12 +238,11 @@ END_DOC
 
 ! Pad B matrix and build the X matrix
 
+  C_vector_DIIS(:) = 0.d0
   do i=1,dim_DIIS
     B_matrix_DIIS(i,dim_DIIS+1) = -1.d0
     B_matrix_DIIS(dim_DIIS+1,i) = -1.d0
-    C_vector_DIIS(i) = 0.d0
   enddo
-  B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1) = 0.d0
   C_vector_DIIS(dim_DIIS+1) = -1.d0
 
   deallocate(scratch)
@@ -259,9 +258,10 @@ END_DOC
   allocate(AF(dim_DIIS+1,dim_DIIS+1))
   allocate(ipiv(2*(dim_DIIS+1)), iwork(2*(dim_DIIS+1)) )
   allocate(scratch(lwork,1))
+  scratch(:,1) = 0.d0
 
   anorm = dlange('1', dim_DIIS+1, dim_DIIS+1, B_matrix_DIIS, &
-              size(B_matrix_DIIS,1), scratch)
+              size(B_matrix_DIIS,1), scratch(1,1))
 
   AF(:,:) = B_matrix_DIIS(:,:)
   call dgetrf(dim_DIIS+1,dim_DIIS+1,AF,size(AF,1),ipiv,info)
diff --git a/src/tools/molden.irp.f b/src/tools/molden.irp.f
index a7d5b978..417b25ad 100644
--- a/src/tools/molden.irp.f
+++ b/src/tools/molden.irp.f
@@ -17,7 +17,7 @@ program molden
 
   write(i_unit_output,'(A)') '[Molden Format]'
 
-  write(i_unit_output,'(A)') '[Atoms] ANGSTROM'
+  write(i_unit_output,'(A)') '[Atoms] Angs'
   do i = 1, nucl_num
     write(i_unit_output,'(A2,2X,I4,2X,I4,3(2X,F15.10))')                   &
         trim(element_name(int(nucl_charge(i)))),                     &
diff --git a/src/zmq/utils.irp.f b/src/zmq/utils.irp.f
index 503dedd2..07e3a88f 100644
--- a/src/zmq/utils.irp.f
+++ b/src/zmq/utils.irp.f
@@ -585,6 +585,7 @@ subroutine end_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,name_in)
     stop 'Wrong end of job'
   endif
 
+  message = repeat(' ',512)
   do i=360,1,-1
     rc = f77_zmq_send(zmq_to_qp_run_socket, 'end_job '//trim(zmq_state),8+len(trim(zmq_state)),0)
     rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 512, 0)
@@ -645,6 +646,7 @@ integer function connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread)
     endif
   endif
 
+  message = repeat(' ',512)
   rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0)
   message = trim(message(1:rc))
   if(message(1:5) == "error") then