From 2e65943c0b3eb0d49b6008d53c3df90def18823d Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 14 Apr 2017 12:04:21 +0200
Subject: [PATCH] Bug corrected in selection tasks

---
 plugins/CAS_SD_ZMQ/selection.irp.f      |  3 +-
 plugins/Full_CI_ZMQ/zmq_selection.irp.f |  3 +-
 src/Davidson/u0Hu0.irp.f                | 53 +++++++++++++++++--------
 src/Determinants/s2.irp.f               | 40 ++++++++++++++++---
 src/Determinants/slater_rules.irp.f     |  3 +-
 5 files changed, 76 insertions(+), 26 deletions(-)

diff --git a/plugins/CAS_SD_ZMQ/selection.irp.f b/plugins/CAS_SD_ZMQ/selection.irp.f
index 5d2cda78..3692710d 100644
--- a/plugins/CAS_SD_ZMQ/selection.irp.f
+++ b/plugins/CAS_SD_ZMQ/selection.irp.f
@@ -1241,8 +1241,7 @@ subroutine ZMQ_selection(N_in, pt2)
   do i= 1, N_det_generators
     k = k+1
     write(task(20*(k-1)+1:20*k),'(I9,1X,I9,''|'')') i, N
-    k = k+20
-    if (k>20*maxtasks) then
+    if (k>=maxtasks) then
        k=0
        call add_task_to_taskserver(zmq_to_qp_run_socket,task)
     endif
diff --git a/plugins/Full_CI_ZMQ/zmq_selection.irp.f b/plugins/Full_CI_ZMQ/zmq_selection.irp.f
index 62703a43..7ffb4a44 100644
--- a/plugins/Full_CI_ZMQ/zmq_selection.irp.f
+++ b/plugins/Full_CI_ZMQ/zmq_selection.irp.f
@@ -32,8 +32,7 @@ subroutine ZMQ_selection(N_in, pt2)
   do i= 1, N_det_generators
     k = k+1
     write(task(20*(k-1)+1:20*k),'(I9,1X,I9,''|'')') i, N
-    k = k+20
-    if (k>20*maxtasks) then
+    if (k>=maxtasks) then
        k=0
        call add_task_to_taskserver(zmq_to_qp_run_socket,task)
     endif
diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f
index 421c31cd..7231611a 100644
--- a/src/Davidson/u0Hu0.irp.f
+++ b/src/Davidson/u0Hu0.irp.f
@@ -631,8 +631,8 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8)
   
   PROVIDE ref_bitmask_energy
 
-  double precision               :: hij, s2 
-  integer                        :: i,j,k
+  double precision               :: hij, sij
+  integer                        :: i,j,k,l
   integer                        :: k_a, k_b, l_a, l_b, m_a, m_b
   integer                        :: istate
   integer                        :: krow, kcol, krow_b, kcol_b
@@ -684,6 +684,7 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8)
   enddo
 
   v_0 = 0.d0
+  s_0 = 0.d0
   do k_a=1,N_det
     
     ! Initial determinant is at k_a in alpha-major representation
@@ -703,10 +704,14 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8)
     ! Diagonal contribution
     ! ---------------------
 
-    double precision, external :: diag_H_mat_elem
+    double precision, external :: diag_H_mat_elem, diag_S_mat_elem
   
-    v_0(k_a,1:N_st) = v_0(k_a,1:N_st) + diag_H_mat_elem(tmp_det,N_int) * &
-        psi_bilinear_matrix_values(k_a,1:N_st)
+    hij = diag_H_mat_elem(tmp_det,N_int) 
+    sij = diag_S_mat_elem(tmp_det,N_int)
+    do l=1,N_st
+      v_0(k_a,l) = v_0(k_a,l) + hij * psi_bilinear_matrix_values(k_a,l)
+      s_0(k_a,l) = s_0(k_a,l) + sij * psi_bilinear_matrix_values(k_a,l)
+    enddo
 
     
     ! Get all single and double alpha excitations
@@ -746,8 +751,11 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8)
       enddo
       tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow)
       call i_H_j_mono_spin( tmp_det, tmp_det2, N_int, 1, hij)
-      v_0(l_a, 1:N_st) += hij * psi_bilinear_matrix_values(k_a,1:N_st)
-      v_0(k_a, 1:N_st) += hij * psi_bilinear_matrix_values(l_a,1:N_st)
+      do l=1,N_st
+        v_0(l_a, l) += hij * psi_bilinear_matrix_values(k_a,l)
+        v_0(k_a, l) += hij * psi_bilinear_matrix_values(l_a,l)
+        ! single => sij = 0 
+      enddo
     enddo
     
     ! Compute Hij for all alpha doubles
@@ -761,8 +769,11 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8)
         lrow = psi_bilinear_matrix_rows(l_a)
       enddo
       call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, doubles(i)), N_int, hij)
-      v_0(l_a, 1:N_st) += hij * psi_bilinear_matrix_values(k_a,1:N_st)
-      v_0(k_a, 1:N_st) += hij * psi_bilinear_matrix_values(l_a,1:N_st)
+      do l=1,N_st
+        v_0(l_a, l) += hij * psi_bilinear_matrix_values(k_a,l)
+        v_0(k_a, l) += hij * psi_bilinear_matrix_values(l_a,l)
+        ! same spin => sij = 0 
+      enddo
     enddo
     
     
@@ -805,8 +816,11 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8)
       tmp_det2(1:N_int,2) = psi_det_beta_unique (1:N_int, lcol)
       l_a = psi_bilinear_matrix_transp_order(l_b)
       call i_H_j_mono_spin( tmp_det, tmp_det2, N_int, 2, hij)
-      v_0(l_a, 1:N_st) += hij * psi_bilinear_matrix_values(k_a,1:N_st)
-      v_0(k_a, 1:N_st) += hij * psi_bilinear_matrix_values(l_a,1:N_st)
+      do l=1,N_st
+        v_0(l_a, l) += hij * psi_bilinear_matrix_values(k_a,l)
+        v_0(k_a, l) += hij * psi_bilinear_matrix_values(l_a,l)
+        ! single => sij = 0 
+      enddo
     enddo
     
     ! Compute Hij for all beta doubles
@@ -821,8 +835,11 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8)
       enddo
       l_a = psi_bilinear_matrix_transp_order(l_b)
       call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, doubles(i)), N_int, hij)
-      v_0(l_a, 1:N_st) += hij * psi_bilinear_matrix_values(k_a,1:N_st)
-      v_0(k_a, 1:N_st) += hij * psi_bilinear_matrix_values(l_a,1:N_st)
+      do l=1,N_st
+        v_0(l_a, l) += hij * psi_bilinear_matrix_values(k_a,l)
+        v_0(k_a, l) += hij * psi_bilinear_matrix_values(l_a,l)
+        ! same spin => sij = 0 
+      enddo
     enddo
     
   end do
@@ -875,9 +892,13 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8)
         if (is_single_a(lrow)) then
           tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow)
 
-          call i_H_j_double_alpha_beta(tmp_det,tmp_det2,N_int,hij)
-          v_0(k_a, 1:N_st) += hij * psi_bilinear_matrix_values(l_a,1:N_st)
-          v_0(l_a, 1:N_st) += hij * psi_bilinear_matrix_values(k_a,1:N_st)
+          call i_H_j_double_alpha_beta(tmp_det,tmp_det2,N_int,hij,sij)
+          do l=1,N_st
+            v_0(k_a, l) += hij * psi_bilinear_matrix_values(l_a,l)
+            v_0(l_a, l) += hij * psi_bilinear_matrix_values(k_a,l)
+            s_0(k_a, l) -= sij * psi_bilinear_matrix_values(l_a,l)
+            s_0(l_a, l) -= sij * psi_bilinear_matrix_values(k_a,l)
+          enddo
         endif
         l_a += 1
 
diff --git a/src/Determinants/s2.irp.f b/src/Determinants/s2.irp.f
index a6e69fb5..0340361d 100644
--- a/src/Determinants/s2.irp.f
+++ b/src/Determinants/s2.irp.f
@@ -1,3 +1,35 @@
+double precision function diag_S_mat_elem(key_i,Nint)
+  implicit none
+  use bitmasks
+  include 'Utils/constants.include.F'
+
+  integer                        :: Nint
+  integer(bit_kind), intent(in)  :: key_i(Nint,2)
+  BEGIN_DOC
+! Returns <i|S^2|i>
+  END_DOC
+  integer                        :: nup, i
+  integer(bit_kind)              :: xorvec(N_int_max)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xorvec
+
+  do i=1,Nint
+    xorvec(i) = xor(key_i(i,1),key_i(i,2))
+  enddo
+
+  do i=1,Nint
+    xorvec(i) = iand(xorvec(i),key_i(i,1))
+  enddo
+
+  nup = 0
+  do i=1,Nint
+    if (xorvec(i) /= 0_bit_kind) then
+      nup += popcnt(xorvec(i))
+    endif
+  enddo
+  diag_S_mat_elem = dble(nup)
+
+end
+
 subroutine get_s2(key_i,key_j,Nint,s2)
   implicit none
   use bitmasks
@@ -25,11 +57,9 @@ subroutine get_s2(key_i,key_j,Nint,s2)
         endif
       endif
     case(0)
-      nup = 0
-      do i=1,Nint
-        nup += popcnt(iand(xor(key_i(i,1),key_i(i,2)),key_i(i,1)))
-      enddo
-      s2 = dble(nup)
+      double precision, external :: diag_S_mat_elem
+      !DIR$ FORCEINLINE
+      s2 = diag_S_mat_elem(key_i,Nint)
   end select
 end
 
diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f
index 1e0cb0a8..ef246e50 100644
--- a/src/Determinants/slater_rules.irp.f
+++ b/src/Determinants/slater_rules.irp.f
@@ -11,6 +11,7 @@ subroutine get_excitation_degree(key1,key2,degree,Nint)
   integer, intent(out)           :: degree
   
   integer(bit_kind)              :: xorvec(2*N_int_max)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xorvec
   integer                        :: l
   
   ASSERT (Nint > 0)
@@ -2555,7 +2556,7 @@ subroutine i_H_j_double_spin(key_i,key_j,Nint,hij)
       exc(1,2), mo_integrals_map) )
 end
 
-subroutine i_H_j_double_alpha_beta(key_i,key_j,Nint,hij)
+subroutine i_H_j_double_alpha_beta(key_i,key_j,Nint,hij,phase)
   use bitmasks
   implicit none
   BEGIN_DOC