9
1
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-11-14 17:33:38 +01:00
qp2/src/davidson/u0_h_u0.irp.f
2019-02-05 12:02:50 +01:00

648 lines
21 KiB
Fortran
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ]
&BEGIN_PROVIDER [ double precision, psi_s2, (N_states) ]
implicit none
BEGIN_DOC
! psi_energy(i) = $\langle \Psi_i | H | \Psi_i \rangle$
!
! psi_s2(i) = $\langle \Psi_i | S^2 | \Psi_i \rangle$
END_DOC
call u_0_H_u_0(psi_energy,psi_s2,psi_coef,N_det,psi_det,N_int,N_states,psi_det_size)
integer :: i
do i=N_det+1,N_states
psi_energy(i) = 0.d0
psi_s2(i) = 0.d0
enddo
END_PROVIDER
BEGIN_PROVIDER [ double precision, psi_energy_with_nucl_rep, (N_states) ]
implicit none
BEGIN_DOC
! Energy of the wave function with the nuclear repulsion energy.
END_DOC
psi_energy_with_nucl_rep(1:N_states) = psi_energy(1:N_states) + nuclear_repulsion
END_PROVIDER
subroutine u_0_H_u_0(e_0,s_0,u_0,n,keys_tmp,Nint,N_st,sze)
use bitmasks
implicit none
BEGIN_DOC
! Computes $E_0 = \frac{\langle u_0 | H | u_0 \rangle}{\langle u_0 | u_0 \rangle}$
!
! and $S_0 = \frac{\langle u_0 | S^2 | u_0 \rangle}{\langle u_0 | u_0 \rangle}$
!
! n : number of determinants
!
END_DOC
integer, intent(in) :: n,Nint, N_st, sze
double precision, intent(out) :: e_0(N_st),s_0(N_st)
double precision, intent(inout) :: u_0(sze,N_st)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
double precision, allocatable :: v_0(:,:), s_vec(:,:), u_1(:,:)
double precision :: u_dot_u,u_dot_v,diag_H_mat_elem
integer :: i,j, istate
if ((n > 100000).and.distributed_davidson) then
allocate (v_0(n,N_states_diag),s_vec(n,N_states_diag), u_1(n,N_states_diag))
u_1(:,:) = 0.d0
u_1(1:n,1:N_st) = u_0(1:n,1:N_st)
call H_S2_u_0_nstates_zmq(v_0,s_vec,u_1,N_states_diag,n)
else if (n < n_det_max_full) then
allocate (v_0(n,N_st),s_vec(n,N_st), u_1(n,N_st))
v_0(:,:) = 0.d0
u_1(:,:) = 0.d0
s_vec(:,:) = 0.d0
u_1(1:n,1:N_st) = u_0(1:n,1:N_st)
do istate = 1,N_st
do j=1,n
do i=1,n
v_0(i,istate) = v_0(i,istate) + h_matrix_all_dets(i,j) * u_0(j,istate)
s_vec(i,istate) = s_vec(i,istate) + S2_matrix_all_dets(i,j) * u_0(j,istate)
enddo
enddo
enddo
else
allocate (v_0(n,N_st),s_vec(n,N_st),u_1(n,N_st))
u_1(:,:) = 0.d0
u_1(1:n,1:N_st) = u_0(1:n,1:N_st)
call H_S2_u_0_nstates_openmp(v_0,s_vec,u_1,N_st,n)
endif
u_0(1:n,1:N_st) = u_1(1:n,1:N_st)
deallocate(u_1)
double precision :: norm
!$OMP PARALLEL DO PRIVATE(i,norm) DEFAULT(SHARED)
do i=1,N_st
norm = u_dot_u(u_0(1,i),n)
if (norm /= 0.d0) then
e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)
s_0(i) = u_dot_v(s_vec(1,i),u_0(1,i),n)
else
e_0(i) = 0.d0
s_0(i) = 0.d0
endif
enddo
!$OMP END PARALLEL DO
deallocate (s_vec, v_0)
end
subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
use bitmasks
implicit none
BEGIN_DOC
! Computes $v_0 = H | u_0\rangle$ and $s_0 = S^2 | u_0\rangle$.
!
! Assumes that the determinants are in psi_det
!
! istart, iend, ishift, istep are used in ZMQ parallelization.
END_DOC
integer, intent(in) :: N_st,sze
double precision, intent(inout) :: v_0(sze,N_st), s_0(sze,N_st), u_0(sze,N_st)
integer :: k
double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:)
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det))
do k=1,N_st
call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
enddo
v_t = 0.d0
s_t = 0.d0
call dtranspose( &
u_0, &
size(u_0, 1), &
u_t, &
size(u_t, 1), &
N_det, N_st)
call H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,1,N_det,0,1)
deallocate(u_t)
call dtranspose( &
v_t, &
size(v_t, 1), &
v_0, &
size(v_0, 1), &
N_st, N_det)
call dtranspose( &
s_t, &
size(s_t, 1), &
s_0, &
size(s_0, 1), &
N_st, N_det)
deallocate(v_t,s_t)
do k=1,N_st
call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
enddo
end
subroutine H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
use bitmasks
implicit none
BEGIN_DOC
! Computes $v_t = H | u_t\rangle$ and $s_t = S^2 | u_t\rangle$
!
! Default should be 1,N_det,0,1
END_DOC
integer, intent(in) :: N_st,sze,istart,iend,ishift,istep
double precision, intent(in) :: u_t(N_st,N_det)
double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze)
PROVIDE ref_bitmask_energy N_int
select case (N_int)
case (1)
call H_S2_u_0_nstates_openmp_work_1(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
case (2)
call H_S2_u_0_nstates_openmp_work_2(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
case (3)
call H_S2_u_0_nstates_openmp_work_3(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
case (4)
call H_S2_u_0_nstates_openmp_work_4(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
case default
call H_S2_u_0_nstates_openmp_work_N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
end select
end
BEGIN_TEMPLATE
subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
use bitmasks
implicit none
BEGIN_DOC
! Computes $v_t = H | u_t \\rangle$ and $s_t = S^2 | u_t\\rangle$
!
! Default should be 1,N_det,0,1
END_DOC
integer, intent(in) :: N_st,sze,istart,iend,ishift,istep
double precision, intent(in) :: u_t(N_st,N_det)
double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze)
double precision :: hij, sij
integer :: i,j,k,l
integer :: k_a, k_b, l_a, l_b, m_a, m_b
integer :: istate
integer :: krow, kcol, krow_b, kcol_b
integer :: lrow, lcol
integer :: mrow, mcol
integer(bit_kind) :: spindet($N_int)
integer(bit_kind) :: tmp_det($N_int,2)
integer(bit_kind) :: tmp_det2($N_int,2)
integer(bit_kind) :: tmp_det3($N_int,2)
integer(bit_kind), allocatable :: buffer(:,:)
integer :: n_doubles
integer, allocatable :: doubles(:)
integer, allocatable :: singles_a(:)
integer, allocatable :: singles_b(:)
integer, allocatable :: idx(:), idx0(:)
integer :: maxab, n_singles_a, n_singles_b, kcol_prev
integer*8 :: k8
logical :: compute_singles
integer*8 :: last_found, left, right, right_max
double precision :: rss, mem, ratio
! call resident_memory(rss)
! mem = dble(singles_beta_csc_size) / 1024.d0**3
!
! compute_singles = (mem+rss > qp_max_mem)
!
! if (.not.compute_singles) then
! provide singles_beta_csc
! endif
compute_singles=.True.
maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
allocate(idx0(maxab))
do i=1,maxab
idx0(i) = i
enddo
! Prepare the array of all alpha single excitations
! -------------------------------------------------
PROVIDE N_int nthreads_davidson
!$OMP PARALLEL DEFAULT(SHARED) NUM_THREADS(nthreads_davidson) &
!$OMP SHARED(psi_bilinear_matrix_rows, N_det, &
!$OMP psi_bilinear_matrix_columns, &
!$OMP psi_det_alpha_unique, psi_det_beta_unique, &
!$OMP n_det_alpha_unique, n_det_beta_unique, N_int, &
!$OMP psi_bilinear_matrix_transp_rows, &
!$OMP psi_bilinear_matrix_transp_columns, &
!$OMP psi_bilinear_matrix_transp_order, N_st, &
!$OMP psi_bilinear_matrix_order_transp_reverse, &
!$OMP psi_bilinear_matrix_columns_loc, &
!$OMP psi_bilinear_matrix_transp_rows_loc, &
!$OMP istart, iend, istep, irp_here, v_t, s_t, &
!$OMP ishift, idx0, u_t, maxab, compute_singles, &
!$OMP singles_alpha_csc,singles_alpha_csc_idx, &
!$OMP singles_beta_csc,singles_beta_csc_idx) &
!$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, &
!$OMP lcol, lrow, l_a, l_b, &
!$OMP buffer, doubles, n_doubles, &
!$OMP tmp_det2, hij, sij, idx, l, kcol_prev, &
!$OMP singles_a, n_singles_a, singles_b, ratio, &
!$OMP n_singles_b, k8, last_found,left,right,right_max)
! Alpha/Beta double excitations
! =============================
allocate( buffer($N_int,maxab), &
singles_a(maxab), &
singles_b(maxab), &
doubles(maxab), &
idx(maxab))
kcol_prev=-1
ASSERT (iend <= N_det)
ASSERT (istart > 0)
ASSERT (istep > 0)
!$OMP DO SCHEDULE(guided,64)
do k_a=istart+ishift,iend,istep
krow = psi_bilinear_matrix_rows(k_a)
ASSERT (krow <= N_det_alpha_unique)
kcol = psi_bilinear_matrix_columns(k_a)
ASSERT (kcol <= N_det_beta_unique)
tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
if (kcol /= kcol_prev) then
tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
if (compute_singles) then
call get_all_spin_singles_$N_int( &
psi_det_beta_unique, idx0, &
tmp_det(1,2), N_det_beta_unique, &
singles_b, n_singles_b)
else
n_singles_b = 0
!DIR$ LOOP COUNT avg(1000)
do k8=singles_beta_csc_idx(kcol),singles_beta_csc_idx(kcol+1)-1
n_singles_b = n_singles_b+1
singles_b(n_singles_b) = singles_beta_csc(k8)
enddo
endif
endif
kcol_prev = kcol
! Loop over singly excited beta columns
! -------------------------------------
!DIR$ LOOP COUNT avg(1000)
do i=1,n_singles_b
lcol = singles_b(i)
tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol)
!---
! if (compute_singles) then
l_a = psi_bilinear_matrix_columns_loc(lcol)
ASSERT (l_a <= N_det)
!DIR$ UNROLL(8)
!DIR$ LOOP COUNT avg(50000)
do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol)
lrow = psi_bilinear_matrix_rows(l_a)
ASSERT (lrow <= N_det_alpha_unique)
buffer(1:$N_int,j) = psi_det_alpha_unique(1:$N_int, lrow) ! hot spot
ASSERT (l_a <= N_det)
idx(j) = l_a
l_a = l_a+1
enddo
j = j-1
call get_all_spin_singles_$N_int( &
buffer, idx, tmp_det(1,1), j, &
singles_a, n_singles_a )
!-----
! else
!
! ! Search for singles
!
!call cpu_time(time0)
! ! Right boundary
! l_a = psi_bilinear_matrix_columns_loc(lcol+1)-1
! ASSERT (l_a <= N_det)
! do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol)
! lrow = psi_bilinear_matrix_rows(l_a)
! ASSERT (lrow <= N_det_alpha_unique)
!
! left = singles_alpha_csc_idx(krow)
! right_max = -1_8
! right = singles_alpha_csc_idx(krow+1)
! do while (right-left>0_8)
! k8 = shiftr(right+left,1)
! if (singles_alpha_csc(k8) > lrow) then
! right = k8
! else if (singles_alpha_csc(k8) < lrow) then
! left = k8 + 1_8
! else
! right_max = k8+1_8
! exit
! endif
! enddo
! if (right_max > 0_8) exit
! l_a = l_a-1
! enddo
! if (right_max < 0_8) right_max = singles_alpha_csc_idx(krow)
!
! ! Search
! n_singles_a = 0
! l_a = psi_bilinear_matrix_columns_loc(lcol)
! ASSERT (l_a <= N_det)
!
! last_found = singles_alpha_csc_idx(krow)
! do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol)
! lrow = psi_bilinear_matrix_rows(l_a)
! ASSERT (lrow <= N_det_alpha_unique)
!
! left = last_found
! right = right_max
! do while (right-left>0_8)
! k8 = shiftr(right+left,1)
! if (singles_alpha_csc(k8) > lrow) then
! right = k8
! else if (singles_alpha_csc(k8) < lrow) then
! left = k8 + 1_8
! else
! n_singles_a += 1
! singles_a(n_singles_a) = l_a
! last_found = k8+1_8
! exit
! endif
! enddo
! l_a = l_a+1
! enddo
! j = j-1
!
! endif
!-----
! Loop over alpha singles
! -----------------------
!DIR$ LOOP COUNT avg(1000)
do k = 1,n_singles_a
l_a = singles_a(k)
ASSERT (l_a <= N_det)
lrow = psi_bilinear_matrix_rows(l_a)
ASSERT (lrow <= N_det_alpha_unique)
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
call i_H_j_double_alpha_beta(tmp_det,tmp_det2,$N_int,hij)
call get_s2(tmp_det,tmp_det2,$N_int,sij)
!DIR$ LOOP COUNT AVG(4)
do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a)
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP DO SCHEDULE(guided,64)
do k_a=istart+ishift,iend,istep
! Single and double alpha excitations
! ===================================
! Initial determinant is at k_a in alpha-major representation
! -----------------------------------------------------------------------
krow = psi_bilinear_matrix_rows(k_a)
ASSERT (krow <= N_det_alpha_unique)
kcol = psi_bilinear_matrix_columns(k_a)
ASSERT (kcol <= N_det_beta_unique)
tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
! Initial determinant is at k_b in beta-major representation
! ----------------------------------------------------------------------
k_b = psi_bilinear_matrix_order_transp_reverse(k_a)
ASSERT (k_b <= N_det)
spindet(1:$N_int) = tmp_det(1:$N_int,1)
! Loop inside the beta column to gather all the connected alphas
lcol = psi_bilinear_matrix_columns(k_a)
l_a = psi_bilinear_matrix_columns_loc(lcol)
!DIR$ LOOP COUNT avg(200000)
do i=1,N_det_alpha_unique
if (l_a > N_det) exit
lcol = psi_bilinear_matrix_columns(l_a)
if (lcol /= kcol) exit
lrow = psi_bilinear_matrix_rows(l_a)
ASSERT (lrow <= N_det_alpha_unique)
buffer(1:$N_int,i) = psi_det_alpha_unique(1:$N_int, lrow) ! Hot spot
idx(i) = l_a
l_a = l_a+1
enddo
i = i-1
call get_all_spin_singles_and_doubles_$N_int( &
buffer, idx, spindet, i, &
singles_a, doubles, n_singles_a, n_doubles )
! Compute Hij for all alpha singles
! ----------------------------------
tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
!DIR$ LOOP COUNT avg(1000)
do i=1,n_singles_a
l_a = singles_a(i)
ASSERT (l_a <= N_det)
lrow = psi_bilinear_matrix_rows(l_a)
ASSERT (lrow <= N_det_alpha_unique)
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 1, hij)
!DIR$ LOOP COUNT AVG(4)
do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! single => sij = 0
enddo
enddo
! Compute Hij for all alpha doubles
! ----------------------------------
!DIR$ LOOP COUNT avg(50000)
do i=1,n_doubles
l_a = doubles(i)
ASSERT (l_a <= N_det)
lrow = psi_bilinear_matrix_rows(l_a)
ASSERT (lrow <= N_det_alpha_unique)
call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij)
!DIR$ LOOP COUNT AVG(4)
do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! same spin => sij = 0
enddo
enddo
! Single and double beta excitations
! ==================================
! Initial determinant is at k_a in alpha-major representation
! -----------------------------------------------------------------------
krow = psi_bilinear_matrix_rows(k_a)
kcol = psi_bilinear_matrix_columns(k_a)
tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
spindet(1:$N_int) = tmp_det(1:$N_int,2)
! Initial determinant is at k_b in beta-major representation
! -----------------------------------------------------------------------
k_b = psi_bilinear_matrix_order_transp_reverse(k_a)
ASSERT (k_b <= N_det)
! Loop inside the alpha row to gather all the connected betas
lrow = psi_bilinear_matrix_transp_rows(k_b)
l_b = psi_bilinear_matrix_transp_rows_loc(lrow)
!DIR$ LOOP COUNT avg(200000)
do i=1,N_det_beta_unique
if (l_b > N_det) exit
lrow = psi_bilinear_matrix_transp_rows(l_b)
if (lrow /= krow) exit
lcol = psi_bilinear_matrix_transp_columns(l_b)
ASSERT (lcol <= N_det_beta_unique)
buffer(1:$N_int,i) = psi_det_beta_unique(1:$N_int, lcol)
idx(i) = l_b
l_b = l_b+1
enddo
i = i-1
call get_all_spin_singles_and_doubles_$N_int( &
buffer, idx, spindet, i, &
singles_b, doubles, n_singles_b, n_doubles )
! Compute Hij for all beta singles
! ----------------------------------
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
!DIR$ LOOP COUNT avg(1000)
do i=1,n_singles_b
l_b = singles_b(i)
ASSERT (l_b <= N_det)
lcol = psi_bilinear_matrix_transp_columns(l_b)
ASSERT (lcol <= N_det_beta_unique)
tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol)
call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 2, hij)
l_a = psi_bilinear_matrix_transp_order(l_b)
ASSERT (l_a <= N_det)
!DIR$ LOOP COUNT AVG(4)
do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! single => sij = 0
enddo
enddo
! Compute Hij for all beta doubles
! ----------------------------------
!DIR$ LOOP COUNT avg(50000)
do i=1,n_doubles
l_b = doubles(i)
ASSERT (l_b <= N_det)
lcol = psi_bilinear_matrix_transp_columns(l_b)
ASSERT (lcol <= N_det_beta_unique)
call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij)
l_a = psi_bilinear_matrix_transp_order(l_b)
ASSERT (l_a <= N_det)
!DIR$ LOOP COUNT AVG(4)
do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! same spin => sij = 0
enddo
enddo
! Diagonal contribution
! =====================
! Initial determinant is at k_a in alpha-major representation
! -----------------------------------------------------------------------
krow = psi_bilinear_matrix_rows(k_a)
ASSERT (krow <= N_det_alpha_unique)
kcol = psi_bilinear_matrix_columns(k_a)
ASSERT (kcol <= N_det_beta_unique)
tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
double precision, external :: diag_H_mat_elem, diag_S_mat_elem
hij = diag_H_mat_elem(tmp_det,$N_int)
sij = diag_S_mat_elem(tmp_det,$N_int)
!DIR$ LOOP COUNT AVG(4)
do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a)
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a)
enddo
end do
!$OMP END DO
deallocate(buffer, singles_a, singles_b, doubles, idx)
!$OMP END PARALLEL
end
SUBST [ N_int ]
1;;
2;;
3;;
4;;
N_int;;
END_TEMPLATE