10
0
mirror of https://github.com/QuantumPackage/qp2.git synced 2025-01-03 01:55:59 +01:00

Improved memory access in sigma_vector_cfg.

This commit is contained in:
vijay gopal chilkuri 2021-03-15 14:22:08 +01:00
parent a3c134c3d0
commit 6e634b0779
2 changed files with 69 additions and 42 deletions

View File

@ -445,7 +445,8 @@ end subroutine get_phase_qp_to_cfg
print *,"Rowsmax=",rowsmax," Colsmax=",colsmax
END_PROVIDER
BEGIN_PROVIDER [ real*8, AIJpqContainer, (NSOMOMin:NSOMOMax,4,NSOMOMax+1,NSOMOMax+1,NBFMax,NBFMax)]
!BEGIN_PROVIDER [ real*8, AIJpqContainer, (NSOMOMin:NSOMOMax,4,NSOMOMax+1,NSOMOMax+1,NBFMax,NBFMax)]
BEGIN_PROVIDER [ real*8, AIJpqContainer, (NBFMax,NBFmax,NSOMOMax+1,NSOMOMax+1,4,NSOMOMin:NSOMOMax)]
use cfunctions
implicit none
BEGIN_DOC
@ -486,7 +487,8 @@ end subroutine get_phase_qp_to_cfg
! Type
! 1. SOMO -> SOMO
!print *,"Doing SOMO -> SOMO"
AIJpqContainer(NSOMOMin,1,1,1,1,1) = 1.0d0
!AIJpqContainer(NSOMOMin,1,1,1,1,1) = 1.0d0
AIJpqContainer(1,1,1,1,1,NSOMOMin) = 1.0d0
do i = NSOMOMin+2, NSOMOMax, 2
Isomo = ISHFT(1_8,i)-1
do j = i-2,i-2, 2
@ -512,7 +514,8 @@ end subroutine get_phase_qp_to_cfg
!call debug_spindet(Jsomo,1)
!call debug_spindet(Isomo,1)
AIJpqContainer(nsomoi,1,k,l,:,:) = 0.0d0
!AIJpqContainer(nsomoi,1,k,l,:,:) = 0.0d0
AIJpqContainer(:,:,k,l,1,nsomoi) = 0.0d0
call getApqIJMatrixDims(Isomo, &
Jsomo, &
MS, &
@ -538,7 +541,8 @@ end subroutine get_phase_qp_to_cfg
! i -> j
do ri = 1,rows
do ci = 1,cols
AIJpqContainer(nsomoi,1,k,l,ri,ci) = meMatrix(ri, ci)
!AIJpqContainer(nsomoi,1,k,l,ri,ci) = meMatrix(ri, ci)
AIJpqContainer(ri,ci,k,l,1,nsomoi) = meMatrix(ri, ci)
end do
end do
deallocate(meMatrix)
@ -549,7 +553,8 @@ end subroutine get_phase_qp_to_cfg
! Type
! 2. DOMO -> VMO
!print *,"Doing DOMO -> VMO"
AIJpqContainer(NSOMOMin,2,1,1,1,1) = 1.0d0
!AIJpqContainer(NSOMOMin,2,1,1,1,1) = 1.0d0
AIJpqContainer(1,1,1,1,2,NSOMOMin) = 1.0d0
do i = NSOMOMin, NSOMOMax, 2
Isomo = ISHFT(1_8,i)-1
tmpsomo = ISHFT(1_8,i+2)-1
@ -579,7 +584,8 @@ end subroutine get_phase_qp_to_cfg
!call debug_spindet(Jsomo,1)
!call debug_spindet(Isomo,1)
AIJpqContainer(nsomoi,2,k,l,:,:) = 0.0d0
!AIJpqContainer(nsomoi,2,k,l,:,:) = 0.0d0
AIJpqContainer(:,:,k,l,2,nsomoi) = 0.0d0
call getApqIJMatrixDims(Isomo, &
Jsomo, &
MS, &
@ -605,7 +611,8 @@ end subroutine get_phase_qp_to_cfg
! i -> j
do ri = 1,rows
do ci = 1,cols
AIJpqContainer(nsomoi,2,k,l,ri,ci) = meMatrix(ri, ci)
!AIJpqContainer(nsomoi,2,k,l,ri,ci) = meMatrix(ri, ci)
AIJpqContainer(ri,ci,k,l,2,nsomoi) = meMatrix(ri, ci)
end do
end do
deallocate(meMatrix)
@ -616,7 +623,8 @@ end subroutine get_phase_qp_to_cfg
! Type
! 3. SOMO -> VMO
!print *,"Doing SOMO -> VMO"
AIJpqContainer(NSOMOMin,3,1,1,1,1) = 1.0d0
!AIJpqContainer(NSOMOMin,3,1,1,1,1) = 1.0d0
AIJpqContainer(1,1,1,1,3,NSOMOMin) = 1.0d0
do i = NSOMOMin, NSOMOMax, 2
Isomo = ISHFT(1_8,i)-1
do j = i,i, 2
@ -638,7 +646,8 @@ end subroutine get_phase_qp_to_cfg
!call debug_spindet(Jsomo,1)
!call debug_spindet(Isomo,1)
AIJpqContainer(i,3,k,l,:,:) = 0.0d0
!AIJpqContainer(i,3,k,l,:,:) = 0.0d0
AIJpqContainer(:,:,k,l,3,i) = 0.0d0
call getApqIJMatrixDims(Isomo, &
Jsomo, &
MS, &
@ -664,7 +673,8 @@ end subroutine get_phase_qp_to_cfg
! i -> j
do ri = 1,rows
do ci = 1,cols
AIJpqContainer(i,3,k,l,ri,ci) = meMatrix(ri, ci)
!AIJpqContainer(i,3,k,l,ri,ci) = meMatrix(ri, ci)
AIJpqContainer(ri,ci,k,l,3,i) = meMatrix(ri, ci)
end do
end do
deallocate(meMatrix)
@ -675,7 +685,8 @@ end subroutine get_phase_qp_to_cfg
! Type
! 4. DOMO -> SOMO
!print *,"Doing DOMO -> SOMO"
AIJpqContainer(NSOMOMin,4,1,1,1,1) = 1.0d0
!AIJpqContainer(NSOMOMin,4,1,1,1,1) = 1.0d0
AIJpqContainer(1,1,1,1,4,NSOMOMin) = 1.0d0
do i = NSOMOMin+2, NSOMOMax, 2
Isomo = ISHFT(1_8,i)-1
do j = i,i, 2
@ -693,7 +704,8 @@ end subroutine get_phase_qp_to_cfg
Jsomo = ISHFT(1_8,j)-1
endif
AIJpqContainer(i,4,k,l,:,:) = 0.0d0
!AIJpqContainer(i,4,k,l,:,:) = 0.0d0
AIJpqContainer(:,:,k,l,4,i) = 0.0d0
call getApqIJMatrixDims(Isomo, &
Jsomo, &
MS, &
@ -720,7 +732,8 @@ end subroutine get_phase_qp_to_cfg
! i -> j
do ri = 1,rows
do ci = 1,cols
AIJpqContainer(i,4,k,l,ri,ci) = meMatrix(ri, ci)
!AIJpqContainer(i,4,k,l,ri,ci) = meMatrix(ri, ci)
AIJpqContainer(ri,ci,k,l,4,i) = meMatrix(ri, ci)
end do
end do
deallocate(meMatrix)
@ -888,8 +901,8 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
! returns : psi_coef_out_det :
END_DOC
integer,intent(in) :: sze, istart,iend, istep, ishift, n_st
real*8,intent(in) :: psi_in(sze,n_st)
real*8,intent(out) :: psi_out(sze,n_st)
real*8,intent(in) :: psi_in(n_st,sze)
real*8,intent(out) :: psi_out(n_st,sze)
integer(bit_kind) :: Icfg(N_INT,2)
integer :: i,j,k,l,p,q,noccp,noccq, ii, jj, m, n, idxI, kk, nocck,orbk
integer(bit_kind),dimension(:,:,:),allocatable :: alphas_Icfg
@ -1054,8 +1067,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
cntj = 0
do jj = startj, endj
cntj += 1
meCC1 = AIJpqContainer(NSOMOI,extype,pmodel,qmodel,cnti,cntj)
psi_out(jj,kk) += meCC1 * psi_in(ii,kk) * h_core_ri(p,q)
!meCC1 = AIJpqContainer(NSOMOI,extype,pmodel,qmodel,cnti,cntj)
meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)
psi_out(kk,jj) += meCC1 * psi_in(kk,ii) * h_core_ri(p,q)
enddo
enddo
enddo
@ -1072,6 +1086,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
!!! Double Excitations !!!
call omp_set_max_active_levels(0)
!$OMP parallel
!$OMP master
! Loop over all selected configurations
do i = istart_cfg,iend_cfg
@ -1126,11 +1143,11 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
endif
enddo
allocate(TKI(rowsTKI,n_st,totcolsTKI)) ! coefficients of CSF
allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF
! Initialize the inegral container
! dims : (totcolsTKI, nconnectedI)
allocate(GIJpqrs(totcolsTKI,nconnectedI)) ! gpqrs
allocate(TKIGIJ(rowsTKI,n_st,nconnectedI)) ! TKI * gpqrs
allocate(TKIGIJ(n_st,rowsTKI,nconnectedI)) ! TKI * gpqrs
totcolsTKI = 0
do j = 1,nconnectedI
@ -1143,16 +1160,18 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
allocate(CCmattmp(colsikpq,n_st))
do kk = 1,n_st
!do kk = 1,n_st
!do m = 1,colsikpq
! CCmattmp(m,kk) = psi_in(idxs_connectedI_alpha(j)+m-1,kk)
!enddo
!enddo
do m = 1,colsikpq
CCmattmp(m,kk) = psi_in(idxs_connectedI_alpha(j)+m-1,kk)
enddo
enddo
do kk = 1,n_st
do m = 1,colsikpq
tmpvar = CCmattmp(m,kk)
do l = 1,rowsTKI
TKI(l,kk,totcolsTKI+m) = AIJpqContainer(NSOMOalpha,extype,pmodel,qmodel,l,m) * tmpvar
do kk = 1,n_st
!tmpvar = CCmattmp(m,kk)
!TKI(kk,l,totcolsTKI+m) = AIJpqContainer(NSOMOalpha,extype,pmodel,qmodel,l,m) * tmpvar
!TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * tmpvar
TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * psi_in(kk,idxs_connectedI_alpha(j)+m-1)
enddo
enddo
enddo
@ -1185,8 +1204,8 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
! Do big BLAS
! TODO TKI, size(TKI,1)*size(TKI,2)
call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0, &
TKI, size(TKI,1)*n_st, GIJpqrs, size(GIJpqrs,1), 0.d0, &
TKIGIJ , size(TKIGIJ,1)*n_st )
TKI, size(TKI,1)*size(TKI,2), GIJpqrs, size(GIJpqrs,1), 0.d0, &
TKIGIJ , size(TKIGIJ,1)*size(TKIGIJ,2) )
!print *,"DIMs = ",rowsTKI,n_st,totcolsTKI,nconnectedI
!print *,"TKI mat"
@ -1218,13 +1237,14 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
!print *,">j=",j,rowsikpq,colsikpq, ">>",totcolsTKI,",",idxs_connectedI_alpha(j)
do kk = 1,n_st
do m = 1,colsikpq
tmpvar = psi_out(idxs_connectedI_alpha(j)+m-1,kk)
!tmpvar = psi_out(kk,idxs_connectedI_alpha(j)+m-1)
do l = 1,rowsTKI
tmpvar += AIJpqContainer(NSOMOalpha,extype,pmodel,qmodel,l,m) * TKIGIJ(l,kk,j)
do kk = 1,n_st
!tmpvar += AIJpqContainer(NSOMOalpha,extype,pmodel,qmodel,l,m) * TKIGIJ(kk,l,j)
psi_out(kk,idxs_connectedI_alpha(j)+m-1) += AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * TKIGIJ(kk,l,j)
enddo
psi_out(idxs_connectedI_alpha(j)+m-1,kk) = tmpvar
!psi_out(kk,idxs_connectedI_alpha(j)+m-1) = tmpvar
enddo
enddo
totcolsTKI += colsikpq
@ -1238,12 +1258,15 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
enddo ! loop over alphas
enddo ! loop over I
!$OMP end master
!$OMP end parallel
call omp_set_max_active_levels(4)
! Add the diagonal contribution
do kk=1,n_st
do i = 1,n_CSF
psi_out(i,kk) += 1.0d0*diag_energies(i)*psi_in(i,kk)
psi_out(kk,i) += diag_energies(i)*psi_in(kk,i)
enddo
enddo

View File

@ -314,17 +314,19 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
!call convertWFfromDETtoCSF(N_st_diag,W,W_csf(1,shift+1))
! call calculate_sigma_vector_cfg_nst(W_csf(1,shift+1),U_csf(1,shift+1),N_st_diag,sze_csf,1,sze_csf,0,1)
! ! TODO : psi_det_size ? for psi_det
allocate(tmpW(sze_csf,N_st_diag))
allocate(tmpU(sze_csf,N_st_diag))
!allocate(tmpW(sze_csf,N_st_diag))
allocate(tmpW(N_st_diag,sze_csf))
!allocate(tmpU(sze_csf,N_st_diag))
allocate(tmpU(N_st_diag,sze_csf))
do kk=1,N_st_diag
do ii=1,sze_csf
tmpU(ii,kk) = U_csf(ii,shift+kk)
tmpU(kk,ii) = U_csf(ii,shift+kk)
enddo
enddo
call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1)
do kk=1,N_st_diag
do ii=1,sze_csf
W_csf(ii,shift+kk)=tmpW(ii,kk)
W_csf(ii,shift+kk)=tmpW(kk,ii)
enddo
enddo
deallocate(tmpW)
@ -339,17 +341,19 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
!call convertWFfromDETtoCSF(N_st_diag,U,U_csf(1,shift+1))
!call convertWFfromDETtoCSF(N_st_diag,W,W_csf(1,shift+1))
! call calculate_sigma_vector_cfg_nst(W_csf(1,shift+1),U_csf(1,shift+1),N_st_diag,sze_csf,1,sze_csf,0,1)
allocate(tmpW(sze_csf,N_st_diag))
allocate(tmpU(sze_csf,N_st_diag))
!allocate(tmpW(sze_csf,N_st_diag))
allocate(tmpW(N_st_diag,sze_csf))
!allocate(tmpU(sze_csf,N_st_diag))
allocate(tmpU(N_st_diag,sze_csf))
do kk=1,N_st_diag
do ii=1,sze_csf
tmpU(ii,kk) = U_csf(ii,shift+kk)
tmpU(kk,ii) = U_csf(ii,shift+kk)
enddo
enddo
call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1)
do kk=1,N_st_diag
do ii=1,sze_csf
W_csf(ii,shift+kk)=tmpW(ii,kk)
W_csf(ii,shift+kk)=tmpW(kk,ii)
enddo
enddo
deallocate(tmpW)