Include S^2 inside davidson

2024-12-22 12:23:48 +01:00 · 2016-09-27 15:55:38 +02:00 · 2016-09-27 15:55:38 +02:00 · c5501ef1f9
commit c5501ef1f9
parent 9e5ec756b3
7 changed files with 238 additions and 27 deletions
--- a/plugins/Full_CI/full_ci.irp.f
+++ b/plugins/Full_CI/full_ci.irp.f
@ -100,8 +100,8 @@ program full_ci
    print *,  'N_det    = ', N_det
    print *,  'N_states = ', N_states
    print *,  'PT2      = ', pt2
-    print *,  'E        = ', CI_energy
-    print *,  'E+PT2    = ', CI_energy+pt2
+    print *,  'E        = ', CI_energy(1:N_states)
+    print *,  'E+PT2    = ', CI_energy(1:N_states)+pt2(1:N_states)
    print *,  '-----'
    call ezfio_set_full_ci_energy_pt2(CI_energy(1)+pt2(1))
   endif
--- a/plugins/Full_CI_ZMQ/selection_double.irp.f
+++ b/plugins/Full_CI_ZMQ/selection_double.irp.f
@ -425,19 +425,19 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
        if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle
        hij = (integral8(p1, p2, putj, hfix)-integral8(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2)
        !call debug_hij(hij, gen, mask, mi, ma, puti, putj)
-        tmp_row(:,putj) += hij * coefs
+        tmp_row(1:N_states,putj) += hij * coefs(1:N_states)
      end do
      do putj=hfix+1, mo_tot_num
        if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle
        hij = (integral8(p1, p2, hfix, putj)-integral8(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2)
        !call debug_hij(hij, gen, mask, mi, ma, puti, putj)
-        tmp_row(:,putj) += hij * coefs
+        tmp_row(1:N_states,putj) += hij * coefs(1:N_states)
      end do

      if(ma == 1) then           
-        mat(:,:,puti) += tmp_row(:,:)
+        mat(1:N_states,1:mo_tot_num,puti) += tmp_row(1:N_states,1:mo_tot_num)
      else
-        mat(:,puti,:) += tmp_row(:,:)
+        mat(1:N_states,puti,1:mo_tot_num) += tmp_row(1:N_states,1:mo_tot_num)
      end if
    end if

@ -585,12 +585,12 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
          call apply_particles(mask, 1,p1,2,p2, det, ok, N_int)
          !call assert(ok, "zsdq")
          call i_h_j(gen, det, N_int, hij)
-          mat(:, p1, p2) += coefs * hij
+          mat(:, p1, p2) += coefs(:) * hij
        else
          hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2)
          phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2)
          !call debug_hij(hij, gen, mask, 1, 2, p1, p2)
-          mat(:, p1, p2) += coefs * hij
+          mat(:, p1, p2) += coefs(:) * hij
        end if
      end do
    end do
@ -605,10 +605,10 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
        if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
          call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
          call i_h_j(gen, det, N_int, hij)
-          mat(:, puti, putj) += coefs * hij
+          mat(:, puti, putj) += coefs(:) * hij
        else
          hij = (integral8(p1, p2, puti, putj) -  integral8(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2)
-          mat(:, puti, putj) += coefs * hij
+          mat(:, puti, putj) += coefs(:) * hij
          !call debug_hij(hij, gen, mask, sp, sp, puti, putj)
        end if
      end do
--- a/scripts/generate_h_apply.py
+++ b/scripts/generate_h_apply.py
@ -439,13 +439,13 @@ class H_apply_zmq(H_apply):
  enddo
     """ % (self.energy)
     self.data["copy_buffer"] = """
-  do i=1,N_det_generators
-    do k=1,N_st
-      pt2(k) = pt2(k) + pt2_generators(k,i)
-      norm_pert(k) = norm_pert(k) + norm_pert_generators(k,i)
-      H_pert_diag(k) = H_pert_diag(k) + H_pert_diag_generators(k,i)
+    do i=1,N_det_generators
+      do k=1,N_st
+        pt2(k) = pt2(k) + pt2_generators(k,i)
+        norm_pert(k) = norm_pert(k) + norm_pert_generators(k,i)
+        H_pert_diag(k) = H_pert_diag(k) + H_pert_diag_generators(k,i)
+      enddo
    enddo
-  enddo
     """

  def set_selection_pt2(self,pert):
--- a/src/Davidson/diagonalize_CI.irp.f
+++ b/src/Davidson/diagonalize_CI.irp.f
@ -55,12 +55,13 @@ END_PROVIDER
   
   if (diag_algorithm == "Davidson") then
     
-     call davidson_diag(psi_det,CI_eigenvectors,CI_electronic_energy,&
-         size(CI_eigenvectors,1),N_det,N_states,N_states_diag,N_int,output_determinants)
+     call davidson_diag_HS2(psi_det,CI_eigenvectors,                 &
+         size(CI_eigenvectors,1),CI_electronic_energy,               &
+         N_det,N_states,N_states_diag,N_int,output_determinants)

     call u_0_S2_u_0(CI_eigenvectors_s2,CI_eigenvectors,N_det,psi_det,N_int,&
         N_states_diag,size(CI_eigenvectors,1))
-     
+
     
   else if (diag_algorithm == "Lapack") then
     
--- a/src/Davidson/diagonalize_CI_mono.irp.f
+++ b/src/Davidson/diagonalize_CI_mono.irp.f
@ -34,7 +34,7 @@
    i_state = 0
    if (s2_eig) then
      do j=1,N_det
-        call get_s2_u0(psi_det,eigenvectors(1,j),N_det,N_det,s2)
+        call get_s2_u0(psi_det,eigenvectors(1,j),N_det,s2,N_det)
        if(dabs(s2-expected_s2).le.0.3d0)then
          print*,'j = ',j
          print*,'e = ',eigenvalues(j)
@ -54,7 +54,7 @@
      enddo
    else
      do j=1,N_states_diag
-        call get_s2_u0(psi_det,eigenvectors(1,j),N_det,N_det,s2)
+        call get_s2_u0(psi_det,eigenvectors(1,j),N_det,s2,N_det)
        if(dabs(eigenvectors(1,j)).gt.0.9d0)then
          i_state += 1
          do i=1,N_det
--- a/src/Davidson/u0Hu0.irp.f
+++ b/src/Davidson/u0Hu0.irp.f
@ -174,3 +174,158 @@ BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ]
  call u_0_H_u_0(psi_energy,psi_coef,N_det,psi_det,N_int,N_states,psi_det_size)
 END_PROVIDER

+
+
+
+
+subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
+  !
+  ! n : number of determinants
+  !
+  ! H_jj : array of <j|H|j>
+  !
+  ! S2_jj : array of <j|S^2|j>
+  END_DOC
+  integer, intent(in)            :: N_st,n,Nint, sze_8
+  double precision, intent(out)  :: v_0(sze_8,N_st), s_0(sze_8,N_st)
+  double precision, intent(in)   :: u_0(sze_8,N_st)
+  double precision, intent(in)   :: H_jj(n), S2_jj(n)
+  integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
+  double precision               :: hij,s2 
+  double precision, allocatable  :: vt(:,:), ut(:,:), st(:,:)
+  integer                        :: i,j,k,l, jj,ii
+  integer                        :: i0, j0
+  
+  integer, allocatable           :: shortcut(:,:), sort_idx(:,:)
+  integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
+  integer(bit_kind)              :: sorted_i(Nint)
+  
+  integer                        :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
+  integer                        :: N_st_8
+  
+  integer, external              :: align_double
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut
+
+  N_st_8 = align_double(N_st)
+
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+  ASSERT (n>0)
+  PROVIDE ref_bitmask_energy 
+
+  allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
+  allocate(ut(N_st_8,n))
+
+  v_0 = 0.d0
+  s_0 = 0.d0
+
+  do i=1,n
+    do istate=1,N_st
+      ut(istate,i) = u_0(i,istate)
+    enddo
+  enddo
+
+  call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
+  call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
+  
+  !$OMP PARALLEL DEFAULT(NONE)                                       &
+      !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
+      !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8)
+  allocate(vt(N_st_8,n),st(N_st_8,n))
+  Vt = 0.d0
+  St = 0.d0
+  
+  !$OMP DO SCHEDULE(dynamic)
+  do sh=1,shortcut(0,1)
+    do sh2=sh,shortcut(0,1)
+      exa = 0
+      do ni=1,Nint
+        exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
+      end do
+      if(exa > 2) then
+        cycle
+      end if
+      
+      do i=shortcut(sh,1),shortcut(sh+1,1)-1
+        org_i = sort_idx(i,1)
+        if(sh==sh2) then
+          endi = i-1
+        else
+          endi = shortcut(sh2+1,1)-1
+        end if
+        do ni=1,Nint
+          sorted_i(ni) = sorted(ni,i,1)
+        enddo
+        
+        do j=shortcut(sh2,1),endi
+          org_j = sort_idx(j,1)
+          ext = exa
+          do ni=1,Nint
+            ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
+          end do
+          if(ext <= 4) then
+            call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
+            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) 
+            do istate=1,n_st
+              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
+              vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i)
+              st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j)
+              st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i)
+            enddo
+          endif
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO NOWAIT
+  
+  !$OMP DO SCHEDULE(dynamic)
+  do sh=1,shortcut(0,2)
+    do i=shortcut(sh,2),shortcut(sh+1,2)-1
+      org_i = sort_idx(i,2)
+      do j=shortcut(sh,2),i-1
+        org_j = sort_idx(j,2)
+        ext = 0
+        do ni=1,Nint
+          ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
+        end do
+        if(ext == 4) then
+            call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
+            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) 
+            do istate=1,n_st
+              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
+              vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i)
+              st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j)
+              st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i)
+            enddo
+        end if
+      end do
+    end do
+  enddo
+  !$OMP END DO NOWAIT
+  
+  !$OMP CRITICAL
+  do istate=1,N_st
+    do i=n,1,-1
+      v_0(i,istate) = v_0(i,istate) + vt(istate,i)
+      s_0(i,istate) = s_0(i,istate) + st(istate,i)
+    enddo
+  enddo
+  !$OMP END CRITICAL
+
+  deallocate(vt,st)
+  !$OMP END PARALLEL
+  
+  do istate=1,N_st
+    do i=1,n
+      v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate)
+      s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
+    enddo
+  enddo
+  deallocate (shortcut, sort_idx, sorted, version, ut)
+end
+
--- a/src/Determinants/s2.irp.f
+++ b/src/Determinants/s2.irp.f
@ -1,4 +1,4 @@
-subroutine get_s2(key_i,key_j,s2,Nint)
+subroutine get_s2(key_i,key_j,Nint,s2)
 implicit none
 use bitmasks
 BEGIN_DOC
@ -189,7 +189,7 @@ subroutine S2_u_0_nstates(v_0,u_0,n,keys_tmp,Nint,N_st,sze_8)
            ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
          end do
          if(ext <= 4) then
-            call get_s2(keys_tmp(1,1,org_i),keys_tmp(1,1,org_j),s2_tmp,Nint)
+            call get_s2(keys_tmp(1,1,org_i),keys_tmp(1,1,org_j),Nint,s2_tmp)
            do istate=1,N_st
              vt (org_i,istate) = vt (org_i,istate) + s2_tmp*u_0(org_j,istate)
              vt (org_j,istate) = vt (org_j,istate) + s2_tmp*u_0(org_i,istate)
@ -212,7 +212,7 @@ subroutine S2_u_0_nstates(v_0,u_0,n,keys_tmp,Nint,N_st,sze_8)
          ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
        end do
        if(ext == 4) then
-          call get_s2(keys_tmp(1,1,org_i),keys_tmp(1,1,org_j),s2_tmp,Nint)
+          call get_s2(keys_tmp(1,1,org_i),keys_tmp(1,1,org_j),Nint,s2_tmp)
          do istate=1,N_st
            vt (org_i,istate) = vt (org_i,istate) + s2_tmp*u_0(org_j,istate)
            vt (org_j,istate) = vt (org_j,istate) + s2_tmp*u_0(org_i,istate)
@ -235,7 +235,7 @@ subroutine S2_u_0_nstates(v_0,u_0,n,keys_tmp,Nint,N_st,sze_8)
  !$OMP END PARALLEL
  
  do i=1,n
-    call get_s2(keys_tmp(1,1,i),keys_tmp(1,1,i),s2_tmp,Nint)
+    call get_s2(keys_tmp(1,1,i),keys_tmp(1,1,i),Nint,s2_tmp)
    do istate=1,N_st
      v_0(i,istate) += s2_tmp * u_0(i,istate)
    enddo
@ -275,12 +275,12 @@ subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nst
      allocate(idx(0:n))
      !$OMP DO SCHEDULE(dynamic)
      do i = n,1,-1   ! Better OMP scheduling
-        call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int)
+        call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),N_int,s2_tmp)
        accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj)
        call filter_connected(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx)
        do kk=1,idx(0)
          j = idx(kk)
-          call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),s2_tmp,N_int)
+          call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),N_int,s2_tmp)
          accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(j,jj) + psi_coefs_tmp(i,jj) * s2_tmp * psi_coefs_tmp(j,ll)
        enddo
      enddo
@ -418,3 +418,58 @@ subroutine diagonalize_s2_betweenstates(keys_tmp,u_0,n,nmax_keys,nmax_coefs,nsta
  
 end

+subroutine i_S2_psi_minilist(key,keys,idx_key,N_minilist,coef,Nint,Ndet,Ndet_max,Nstate,i_S2_psi_array)
+  use bitmasks
+  implicit none
+  integer, intent(in)            :: Nint, Ndet,Ndet_max,Nstate,idx_key(Ndet), N_minilist
+  integer(bit_kind), intent(in)  :: keys(Nint,2,Ndet)
+  integer(bit_kind), intent(in)  :: key(Nint,2)
+  double precision, intent(in)   :: coef(Ndet_max,Nstate)
+  double precision, intent(out)  :: i_S2_psi_array(Nstate)
+  
+  integer                        :: i, ii,j, i_in_key, i_in_coef
+  double precision               :: phase
+  integer                        :: exc(0:2,2,2)
+  double precision               :: s2ij
+  integer                        :: idx(0:Ndet)
+  BEGIN_DOC
+! Computes <i|S2|Psi> = \sum_J c_J <i|S2|J>.
+!
+! Uses filter_connected_i_H_psi0 to get all the |J> to which |i>
+! is connected. The |J> are searched in short pre-computed lists.
+  END_DOC
+  
+  ASSERT (Nint > 0)
+  ASSERT (N_int == Nint)
+  ASSERT (Nstate > 0)
+  ASSERT (Ndet > 0)
+  ASSERT (Ndet_max >= Ndet)
+  i_S2_psi_array = 0.d0
+  
+  call filter_connected_i_H_psi0(keys,key,Nint,N_minilist,idx)
+  if (Nstate == 1) then
+
+    do ii=1,idx(0)
+      i_in_key = idx(ii)
+      i_in_coef = idx_key(idx(ii))
+      !DIR$ FORCEINLINE
+      call get_s2(keys(1,1,i_in_key),key,Nint,s2ij)
+      ! TODO : Cache misses
+      i_S2_psi_array(1) = i_S2_psi_array(1) + coef(i_in_coef,1)*s2ij
+    enddo
+
+  else
+
+    do ii=1,idx(0)
+      i_in_key = idx(ii)
+      i_in_coef = idx_key(idx(ii))
+      !DIR$ FORCEINLINE
+      call get_s2(keys(1,1,i_in_key),key,Nint,s2ij)
+      do j = 1, Nstate
+        i_S2_psi_array(j) = i_S2_psi_array(j) + coef(i_in_coef,j)*s2ij
+      enddo
+    enddo
+
+  endif
+
+end