From 622aee8bf5d7e1be0424b14b27a98b0290e6b4f5 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Sat, 29 Aug 2020 01:15:48 +0200
Subject: [PATCH] Introduced overlap

---
 src/cipsi/pert_rdm_providers.irp.f |  12 ++--
 src/cipsi/pt2_stoch_routines.irp.f |  25 +++----
 src/cipsi/run_pt2_slave.irp.f      | 109 +++++++++--------------------
 src/cipsi/selection.irp.f          |  64 ++++++++++-------
 src/cipsi/selection_types.f90      |   7 ++
 5 files changed, 96 insertions(+), 121 deletions(-)

diff --git a/src/cipsi/pert_rdm_providers.irp.f b/src/cipsi/pert_rdm_providers.irp.f
index e2917261..82ef1e63 100644
--- a/src/cipsi/pert_rdm_providers.irp.f
+++ b/src/cipsi/pert_rdm_providers.irp.f
@@ -31,7 +31,7 @@ BEGIN_PROVIDER [double precision, pert_2rdm_provider, (n_orb_pert_rdm,n_orb_pert
 
 END_PROVIDER
 
-subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm, mat, buf, psi_det_connection, psi_coef_connection_reverse, n_det_connection)
+subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf, psi_det_connection, psi_coef_connection_reverse, n_det_connection)
   use bitmasks
   use selection_types
   implicit none
@@ -44,9 +44,7 @@ subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fo
   logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num)
   double precision, intent(in)           :: fock_diag_tmp(mo_num)
   double precision, intent(in)    :: E0(N_states)
-  double precision, intent(inout) :: pt2(N_states)
-  double precision, intent(inout) :: variance(N_states)
-  double precision, intent(inout) :: norm(N_states)
+  type(pt2_type), intent(inout)   :: pt2_data
   type(selection_buffer), intent(inout) :: buf
   logical :: ok
   integer :: s1, s2, p1, p2, ib, j, istate
@@ -152,9 +150,9 @@ subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fo
         e_pert = 0.5d0 * (tmp - delta_E)
         coef(istate) = e_pert / alpha_h_psi
         print*,e_pert,coef,alpha_h_psi
-        pt2(istate) = pt2(istate) + e_pert
-        variance(istate) = variance(istate) + alpha_h_psi * alpha_h_psi
-        norm(istate) = norm(istate) + coef(istate) * coef(istate)
+        pt2_data % pt2(istate) += e_pert
+        pt2_data % variance(istate) += alpha_h_psi * alpha_h_psi
+        pt2_data % norm2(istate) = coef(istate) * coef(istate)
 
         if (weight_selection /= 5) then
           ! Energy selection
diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f
index 04df5e16..5acd4752 100644
--- a/src/cipsi/pt2_stoch_routines.irp.f
+++ b/src/cipsi/pt2_stoch_routines.irp.f
@@ -243,7 +243,7 @@ subroutine ZMQ_pt2(E, pt2_data, relative_error, N_in)
             ( 1.d0*pt2_n_tasks_max            & ! task_id, index
             + 0.635d0*N_det_generators        & ! f,d
             + 3.d0*N_det_generators*N_states  & ! eI, vI, nI
-            + 3.d0*pt2_n_tasks_max*N_states   & ! eI_task, vI_task, nI_task
+            + pt2_n_tasks_max*pt2_type_size(N_states)/8 & ! pt2_data_task
             + 4.d0*(pt2_N_teeth+1)            & ! S, S2, T2, T3
             + 1.d0*(N_int*2.d0*N + N)         & ! selection buffer
             + 1.d0*(N_int*2.d0*N + N)         & ! sort selection buffer
@@ -359,10 +359,10 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, b, N_)
   type(selection_buffer), intent(inout) :: b
   integer, intent(in)            :: N_
 
-
-  double precision, allocatable      :: eI(:,:), eI_task(:,:), Se(:), Se2(:)
-  double precision, allocatable      :: vI(:,:), vI_task(:,:), Sv(:), Sv2(:)
-  double precision, allocatable      :: nI(:,:), nI_task(:,:), Sn(:), Sn2(:)
+  type(pt2_type), allocatable    :: pt2_data_task(:)
+  double precision, allocatable      :: eI(:,:), Se(:), Se2(:)
+  double precision, allocatable      :: vI(:,:), Sv(:), Sv2(:)
+  double precision, allocatable      :: nI(:,:), Sn(:), Sn2(:)
   integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
   integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
   integer, external :: zmq_delete_tasks_async_send
@@ -399,9 +399,10 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, b, N_)
   ! updated in ZMQ_pt2
   allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators))
   allocate(d(N_det_generators+1))
-  allocate(eI(N_states, N_det_generators), eI_task(N_states, pt2_n_tasks_max))
-  allocate(vI(N_states, N_det_generators), vI_task(N_states, pt2_n_tasks_max))
-  allocate(nI(N_states, N_det_generators), nI_task(N_states, pt2_n_tasks_max))
+  allocate(pt2_data_task(pt2_n_tasks_max))
+  allocate(eI(N_states, N_det_generators))
+  allocate(vI(N_states, N_det_generators))
+  allocate(nI(N_states, N_det_generators))
   allocate(Se(pt2_N_teeth+1), Se2(pt2_N_teeth+1))
   allocate(Sv(pt2_N_teeth+1), Sv2(pt2_N_teeth+1))
   allocate(Sn(pt2_N_teeth+1), Sn2(pt2_N_teeth+1))
@@ -531,7 +532,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, b, N_)
     else if(more == 0) then
       exit
     else
-      call pull_pt2_results(zmq_socket_pull, index, eI_task, vI_task, nI_task, task_id, n_tasks, b2)
+      call pull_pt2_results(zmq_socket_pull, index, pt2_data_task, task_id, n_tasks, b2)
       if(n_tasks > pt2_n_tasks_max)then
        print*,'PB !!!'
        print*,'If you see this, send a bug report with the following content'
@@ -550,9 +551,9 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, b, N_)
          print*,'i,index(i),size(ei,2) = ',i,index(i),size(ei,2)
          stop -1
         endif
-        eI(1:N_states, index(i)) += eI_task(1:N_states,i)
-        vI(1:N_states, index(i)) += vI_task(1:N_states,i)
-        nI(1:N_states, index(i)) += nI_task(1:N_states,i)
+        eI(1:N_states, index(i)) += pt2_data_task(i) % pt2(1:N_states)
+        vI(1:N_states, index(i)) += pt2_data_task(i) % variance(1:N_states)
+        nI(1:N_states, index(i)) += pt2_data_task(i) % norm2(1:N_states)
         f(index(i)) -= 1
       end do
       do i=1, b2%cur
diff --git a/src/cipsi/run_pt2_slave.irp.f b/src/cipsi/run_pt2_slave.irp.f
index 7df98a87..ae0ac2a0 100644
--- a/src/cipsi/run_pt2_slave.irp.f
+++ b/src/cipsi/run_pt2_slave.irp.f
@@ -1,8 +1,8 @@
- use omp_lib 
+ use omp_lib
  use selection_types
  use f77_zmq
 BEGIN_PROVIDER [ integer(omp_lock_kind), global_selection_buffer_lock ]
- use omp_lib 
+ use omp_lib
  implicit none
  BEGIN_DOC
  ! Global buffer for the OpenMP selection
@@ -11,7 +11,7 @@ BEGIN_PROVIDER [ integer(omp_lock_kind), global_selection_buffer_lock ]
 END_PROVIDER
 
 BEGIN_PROVIDER [ type(selection_buffer), global_selection_buffer ]
- use omp_lib 
+ use omp_lib
  implicit none
  BEGIN_DOC
  ! Global buffer for the OpenMP selection
@@ -61,7 +61,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
   type(selection_buffer) :: b
   logical :: done, buffer_ready
 
-  double precision,allocatable :: pt2(:,:), variance(:,:), norm(:,:)
+  type(pt2_type), allocatable :: pt2_data(:)
   integer :: n_tasks, k, N
   integer, allocatable :: i_generator(:), subset(:)
 
@@ -70,10 +70,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
 !  logical :: sending
 
   allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
-  allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
-  allocate(variance(N_states,pt2_n_tasks_max))
-  allocate(norm(N_states,pt2_n_tasks_max))
-
+  allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
   zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
 
   integer, external :: connect_to_taskserver
@@ -120,13 +117,11 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
     double precision :: time0, time1
     call wall_time(time0)
     do k=1,n_tasks
-        pt2(:,k) = 0.d0
-        variance(:,k) = 0.d0
-        norm(:,k) = 0.d0
+        call pt2_alloc(pt2_data(k),N_states)
         b%cur = 0
 !double precision :: time2
 !call wall_time(time2)
-        call select_connected(i_generator(k),energy,pt2(1,k),variance(1,k),norm(1,k),b,subset(k),pt2_F(i_generator(k)))
+        call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k)))
 !call wall_time(time1)
 !print *,  i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1))
     enddo
@@ -138,7 +133,10 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
       done = .true.
     endif
     call sort_selection_buffer(b)
-    call push_pt2_results(zmq_socket_push, i_generator, pt2, variance, norm, b, task_id, n_tasks)
+    call push_pt2_results(zmq_socket_push, i_generator, pt2_data, b, task_id, n_tasks)
+    do k=1,n_tasks
+      call pt2_dealloc(pt2_data(k))
+    enddo
     b%cur=0
 
 !    ! Try to adjust n_tasks around nproc/2 seconds per job
@@ -183,7 +181,7 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
   type(selection_buffer) :: b
   logical :: done, buffer_ready
 
-  double precision,allocatable :: pt2(:,:), variance(:,:), norm(:,:)
+  type(pt2_type), allocatable :: pt2_data(:)
   integer :: n_tasks, k, N
   integer, allocatable :: i_generator(:), subset(:)
 
@@ -193,9 +191,7 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
 
 
   allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
-  allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
-  allocate(variance(N_states,pt2_n_tasks_max))
-  allocate(norm(N_states,pt2_n_tasks_max))
+  allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
 
   zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
 
@@ -243,13 +239,11 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
     double precision :: time0, time1
     call wall_time(time0)
     do k=1,n_tasks
-        pt2(:,k) = 0.d0
-        variance(:,k) = 0.d0
-        norm(:,k) = 0.d0
+        call pt2_alloc(pt2_data(k),N_states)
         b%cur = 0
 !double precision :: time2
 !call wall_time(time2)
-        call select_connected(i_generator(k),energy,pt2(1,k),variance(1,k),norm(1,k),b,subset(k),pt2_F(i_generator(k)))
+        call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k)))
 !call wall_time(time1)
 !print *,  i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1))
     enddo
@@ -269,13 +263,16 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
     call omp_unset_lock(global_selection_buffer_lock)
     if ( iproc == 1 ) then
       call omp_set_lock(global_selection_buffer_lock)
-      call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2, variance, norm, global_selection_buffer, task_id, n_tasks,sending)
+      call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2_data, global_selection_buffer, task_id, n_tasks,sending)
       global_selection_buffer%cur = 0
       call omp_unset_lock(global_selection_buffer_lock)
     else
-      call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2, variance, norm, b, task_id, n_tasks,sending)
+      call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2_data, b, task_id, n_tasks,sending)
     endif
 
+    do k=1,n_tasks
+      call pt2_dealloc(pt2_data(k))
+    enddo
 !    ! Try to adjust n_tasks around nproc/2 seconds per job
 !    n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0)))
     n_tasks = 1
@@ -298,34 +295,30 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
 end subroutine
 
 
-subroutine push_pt2_results(zmq_socket_push, index, pt2, variance, norm, b, task_id, n_tasks)
+subroutine push_pt2_results(zmq_socket_push, index, pt2_data, b, task_id, n_tasks)
  use selection_types
  use f77_zmq
   implicit none
 
   integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
-  double precision, intent(in)   :: pt2(N_states,n_tasks)
-  double precision, intent(in)   :: variance(N_states,n_tasks)
-  double precision, intent(in)   :: norm(N_states,n_tasks)
+  type(pt2_type), intent(in)     :: pt2_data(n_tasks)
   integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks)
   type(selection_buffer), intent(inout) :: b
 
   logical :: sending
   sending = .False.
-  call push_pt2_results_async_send(zmq_socket_push, index, pt2, variance, norm, b, task_id, n_tasks, sending)
+  call push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending)
   call push_pt2_results_async_recv(zmq_socket_push, b%mini, sending)
 end subroutine
 
 
-subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2, variance, norm, b, task_id, n_tasks, sending)
+subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending)
  use selection_types
  use f77_zmq
   implicit none
 
   integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
-  double precision, intent(in)   :: pt2(N_states,n_tasks)
-  double precision, intent(in)   :: variance(N_states,n_tasks)
-  double precision, intent(in)   :: norm(N_states,n_tasks)
+  type(pt2_type), intent(in)     :: pt2_data(n_tasks)
   integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks)
   type(selection_buffer), intent(inout) :: b
   logical, intent(inout) :: sending
@@ -358,32 +351,12 @@ subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2, variance, no
   endif
 
 
-  rc = f77_zmq_send( zmq_socket_push, pt2, 8*N_states*n_tasks, ZMQ_SNDMORE)
+  rc = f77_zmq_send( zmq_socket_push, pt2_data, pt2_type_size(N_states)*n_tasks, ZMQ_SNDMORE)
   if (rc == -1) then
     print *,  irp_here, ': error sending result'
     stop 3
     return
-  else if(rc /= 8*N_states*n_tasks) then
-    stop 'push'
-  endif
-
-
-  rc = f77_zmq_send( zmq_socket_push, variance, 8*N_states*n_tasks, ZMQ_SNDMORE)
-  if (rc == -1) then
-    print *,  irp_here, ': error sending result'
-    stop 4
-    return
-  else if(rc /= 8*N_states*n_tasks) then
-    stop 'push'
-  endif
-
-
-  rc = f77_zmq_send( zmq_socket_push, norm, 8*N_states*n_tasks, ZMQ_SNDMORE)
-  if (rc == -1) then
-    print *,  irp_here, ': error sending result'
-    stop 5
-    return
-  else if(rc /= 8*N_states*n_tasks) then
+  else if(rc /= pt2_type_size(N_states)*n_tasks) then
     stop 'push'
   endif
 
@@ -475,7 +448,7 @@ IRP_ELSE
     stop 11
     return
   else if (rc /= 8) then
-    print *,  irp_here//': error in receiving mini' 
+    print *,  irp_here//': error in receiving mini'
     stop 12
   endif
 IRP_ENDIF
@@ -484,14 +457,12 @@ end subroutine
 
 
 
-subroutine pull_pt2_results(zmq_socket_pull, index, pt2, variance, norm, task_id, n_tasks, b)
+subroutine pull_pt2_results(zmq_socket_pull, index, pt2_data, task_id, n_tasks, b)
  use selection_types
  use f77_zmq
   implicit none
   integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
-  double precision, intent(inout) :: pt2(N_states,*)
-  double precision, intent(inout) :: variance(N_states,*)
-  double precision, intent(inout) :: norm(N_states,*)
+  type(pt2_type), intent(inout)  :: pt2_data(n_tasks)
   type(selection_buffer), intent(inout) :: b
   integer, intent(out) :: index(*)
   integer, intent(out) :: n_tasks, task_id(*)
@@ -514,27 +485,11 @@ subroutine pull_pt2_results(zmq_socket_pull, index, pt2, variance, norm, task_id
     stop 'pull'
   endif
 
-  rc = f77_zmq_recv( zmq_socket_pull, pt2, N_states*8*n_tasks, 0)
+  rc = f77_zmq_recv( zmq_socket_pull, pt2_data, pt2_type_size(N_states)*n_tasks, 0)
   if (rc == -1) then
     n_tasks = 1
     task_id(1) = 0
-  else if(rc /= 8*N_states*n_tasks) then
-    stop 'pull'
-  endif
-
-  rc = f77_zmq_recv( zmq_socket_pull, variance, N_states*8*n_tasks, 0)
-  if (rc == -1) then
-    n_tasks = 1
-    task_id(1) = 0
-  else if(rc /= 8*N_states*n_tasks) then
-    stop 'pull'
-  endif
-
-  rc = f77_zmq_recv( zmq_socket_pull, norm, N_states*8*n_tasks, 0)
-  if (rc == -1) then
-    n_tasks = 1
-    task_id(1) = 0
-  else if(rc /= 8*N_states*n_tasks) then
+  else if(rc /= pt2_type_size(N_states)*n_tasks) then
     stop 'pull'
   endif
 
diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f
index cf52aaff..e68e4b79 100644
--- a/src/cipsi/selection.irp.f
+++ b/src/cipsi/selection.irp.f
@@ -180,15 +180,13 @@ subroutine get_mask_phase(det1, pm, Nint)
 end subroutine
 
 
-subroutine select_connected(i_generator,E0,pt2,variance,norm2,b,subset,csubset)
+subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset)
   use bitmasks
   use selection_types
   implicit none
   integer, intent(in)            :: i_generator, subset, csubset
   type(selection_buffer), intent(inout) :: b
-  double precision, intent(inout)  :: pt2(N_states)
-  double precision, intent(inout)  :: variance(N_states)
-  double precision, intent(inout)  :: norm2(N_states)
+  type(pt2_type), intent(inout)   :: pt2_data
   integer :: k,l
   double precision, intent(in)   :: E0(N_states)
 
@@ -206,7 +204,7 @@ subroutine select_connected(i_generator,E0,pt2,variance,norm2,b,subset,csubset)
       particle_mask(k,1) = iand(generators_bitmask(k,1,s_part), not(psi_det_generators(k,1,i_generator)) )
       particle_mask(k,2) = iand(generators_bitmask(k,2,s_part), not(psi_det_generators(k,2,i_generator)) )
   enddo
-  call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,variance,norm2,b,subset,csubset)
+  call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,b,subset,csubset)
   deallocate(fock_diag_tmp)
 end subroutine
 
@@ -255,7 +253,7 @@ double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2, Nint)
 end
 
 
-subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,variance,norm2,buf,subset,csubset)
+subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,buf,subset,csubset)
   use bitmasks
   use selection_types
   implicit none
@@ -267,9 +265,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
   integer(bit_kind), intent(in)  :: hole_mask(N_int,2), particle_mask(N_int,2)
   double precision, intent(in)   :: fock_diag_tmp(mo_num)
   double precision, intent(in)   :: E0(N_states)
-  double precision, intent(inout) :: pt2(N_states)
-  double precision, intent(inout)  :: variance(N_states)
-  double precision, intent(inout)  :: norm2(N_states)
+  type(pt2_type), intent(inout)   :: pt2_data
   type(selection_buffer), intent(inout) :: buf
 
   integer                         :: h1,h2,s1,s2,s3,i1,i2,ib,sp,k,i,j,nt,ii,sze
@@ -645,9 +641,9 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
             call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting)
 
             if(.not.pert_2rdm)then
-             call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat, buf)
+             call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf)
             else
-             call fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat, buf,fullminilist, coef_fullminilist_rev, fullinteresting(0))
+             call fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf,fullminilist, coef_fullminilist_rev, fullinteresting(0))
             endif
           end if
         enddo
@@ -665,7 +661,7 @@ end subroutine
 
 
 
-subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat, buf)
+subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf)
   use bitmasks
   use selection_types
   implicit none
@@ -673,16 +669,14 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
   integer, intent(in) :: i_generator, sp, h1, h2
   double precision, intent(in) :: mat(N_states, mo_num, mo_num)
   logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num)
-  double precision, intent(in)           :: fock_diag_tmp(mo_num)
+  double precision, intent(in)    :: fock_diag_tmp(mo_num)
   double precision, intent(in)    :: E0(N_states)
-  double precision, intent(inout) :: pt2(N_states)
-  double precision, intent(inout) :: variance(N_states)
-  double precision, intent(inout) :: norm2(N_states)
+  type(pt2_type), intent(inout)   :: pt2_data
   type(selection_buffer), intent(inout) :: buf
   logical :: ok
-  integer :: s1, s2, p1, p2, ib, j, istate
+  integer :: s1, s2, p1, p2, ib, j, istate, jstate
   integer(bit_kind) :: mask(N_int, 2), det(N_int, 2)
-  double precision :: e_pert, delta_E, val, Hii, w, tmp, alpha_h_psi, coef
+  double precision :: e_pert, delta_E, val, Hii, w, tmp, alpha_h_psi, coef(N_states)
   double precision, external :: diag_H_mat_elem_fock
   double precision :: E_shift
 
@@ -782,16 +776,36 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
         endif
         e_pert = 0.5d0 * (tmp - delta_E)
         if (dabs(alpha_h_psi) > 1.d-4) then
-          coef = e_pert / alpha_h_psi
+          coef(istate) = e_pert / alpha_h_psi
         else
-          coef = alpha_h_psi / delta_E
+          coef(istate) = alpha_h_psi / delta_E
         endif
-        pt2(istate) = pt2(istate) + e_pert
-        variance(istate) = variance(istate) + alpha_h_psi * alpha_h_psi
-        norm2(istate) = norm2(istate) + coef * coef
+      enddo
+
+      do istate=1,N_states
+        do jstate=1,N_states
+          pt2_data % overlap(jstate,istate) += coef(jstate) * coef(istate)
+        enddo
+      enddo
+
+      ! Gram-Schmidt using input overlap matrix
+!      do istate=1,N_states
+!        do jstate=1,istate-1
+!          coef(istate) = coef(istate) - pt2_overlap(jstate,istate)/(pt2_overlap(jstate,jstate)) * coef(jstate)
+!        enddo
+!      enddo
+
+      do istate=1,N_states
+        alpha_h_psi = mat(istate, p1, p2)
+        e_pert = coef(istate) * alpha_h_psi
+
+        pt2_data % variance(istate) += alpha_h_psi * alpha_h_psi
+        pt2_data % norm2(istate)    += coef(istate) * coef(istate)
+        pt2_data % pt2(istate)      += e_pert
 
 !!!DEBUG
-!        pt2(istate) = pt2(istate) - e_pert + alpha_h_psi**2/delta_E
+!        delta_E = E0(istate) - Hii + E_shift
+!        pt2_data % pt2(istate) = pt2_data % pt2(istate) + alpha_h_psi**2/delta_E
 !
 !        integer :: k
 !        double precision :: alpha_h_psi_2,hij
@@ -815,7 +829,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
             w = w - alpha_h_psi * alpha_h_psi * selection_weight(istate)
 
           case(6)
-            w = w - coef * coef * selection_weight(istate)
+            w = w - coef(istate) * coef(istate) * selection_weight(istate)
 
           case default
             ! Energy selection
diff --git a/src/cipsi/selection_types.f90 b/src/cipsi/selection_types.f90
index 1905f2d3..52b84cf1 100644
--- a/src/cipsi/selection_types.f90
+++ b/src/cipsi/selection_types.f90
@@ -19,6 +19,13 @@ module selection_types
     double precision, allocatable :: overlap_err(:,:)
   endtype
 
+  contains
+
+  integer function pt2_type_size(N)
+    implicit none
+    integer, intent(in) :: N
+    pt2_type_size = 8*(8*n + 2*n*n)
+  end function
 
 end module