9
1
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-12-25 13:03:28 +01:00

merged parallel version.

This commit is contained in:
vijay gopal chilkuri 2021-03-17 15:40:11 +01:00
commit 5517506b9a
5 changed files with 288 additions and 356 deletions

View File

@ -287,7 +287,8 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
call write_int(6,nproc_target,'Number of threads for PT2') call write_int(6,nproc_target,'Number of threads for PT2')
call write_double(6,mem,'Memory (Gb)') call write_double(6,mem,'Memory (Gb)')
call omp_set_nested(.false.) call omp_set_max_active_levels(1)
print '(A)', '========== ======================= ===================== ===================== ===========' print '(A)', '========== ======================= ===================== ===================== ==========='

View File

@ -673,10 +673,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
w = 0d0 w = 0d0
! integer(bit_kind) :: occ(N_int,2), n
! call configuration_of_det(det,occ,N_int)
! call configuration_to_dets_size(occ,n,elec_alpha_num,N_int)
e_pert = 0.d0 e_pert = 0.d0
coef = 0.d0 coef = 0.d0
logical :: do_diag logical :: do_diag
@ -704,7 +700,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
double precision :: eigvalues(N_states+1) double precision :: eigvalues(N_states+1)
double precision :: work(1+6*(N_states+1)+2*(N_states+1)**2) double precision :: work(1+6*(N_states+1)+2*(N_states+1)**2)
integer :: iwork(3+5*(N_states+1)), info, k ,n integer :: iwork(3+5*(N_states+1)), info, k
if (do_diag) then if (do_diag) then
double precision :: pt2_matrix(N_states+1,N_states+1) double precision :: pt2_matrix(N_states+1,N_states+1)
@ -770,36 +766,43 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
case(5) case(5)
! Variance selection ! Variance selection
! w = w - alpha_h_psi * alpha_h_psi * s_weight(istate,istate) if (h0_type == 'CFG') then
w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) &
/ c0_weight(istate)
else
w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate))
! do jstate=1,N_states endif
! if (istate == jstate) cycle
! w = w + dabs(alpha_h_psi*mat(jstate,p1,p2)) * s_weight(istate,jstate)
! enddo
case(6) case(6)
! w = w - coef(istate) * coef(istate) * s_weight(istate,istate) if (h0_type == 'CFG') then
w = min(w,- coef(istate) * coef(istate) * s_weight(istate,istate)) &
/ c0_weight(istate)
else
w = min(w,- coef(istate) * coef(istate) * s_weight(istate,istate)) w = min(w,- coef(istate) * coef(istate) * s_weight(istate,istate))
! do jstate=1,N_states endif
! if (istate == jstate) cycle
! w = w + dabs(coef(istate)*coef(jstate)) * s_weight(istate,jstate)
! enddo
case default case default
! Energy selection ! Energy selection
! w = w + e_pert(istate) * s_weight(istate,istate) if (h0_type == 'CFG') then
w = min(w, e_pert(istate) * s_weight(istate,istate)) / c0_weight(istate)
else
w = min(w, e_pert(istate) * s_weight(istate,istate)) w = min(w, e_pert(istate) * s_weight(istate,istate))
! do jstate=1,N_states endif
! if (istate == jstate) cycle
! w = w + dabs(X(istate)*X(jstate)) * s_weight(istate,jstate)
! enddo
end select end select
end do end do
! w = dble(n) * w integer(bit_kind) :: occ(N_int,2), n
if (h0_type == 'CFG') then
do k=1,N_int
occ(k,1) = ieor(det(k,1),det(k,2))
occ(k,2) = iand(det(k,1),det(k,2))
enddo
call configuration_to_dets_size(occ,n,elec_alpha_num,N_int)
n = max(n,1)
w *= dble(n)
endif
if(w <= buf%mini) then if(w <= buf%mini) then
call add_to_selection_buffer(buf, det, w) call add_to_selection_buffer(buf, det, w)

View File

@ -380,6 +380,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
p = 0 p = 0
q = 0 q = 0
if (N_int > 1) stop 'obtain_connected_i_foralpha : N_int > 1'
do i=idxI,end_index do i=idxI,end_index
Isomo = Ialpha(1,1) Isomo = Ialpha(1,1)
Idomo = Ialpha(1,2) Idomo = Ialpha(1,2)
@ -393,7 +394,6 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
!print *," --- IdsJ=",i
select case(ndiffDOMO) select case(ndiffDOMO)
case (0) case (0)
! SOMO -> VMO ! SOMO -> VMO
@ -440,7 +440,10 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
starti = psi_config_data(i,1) starti = psi_config_data(i,1)
endi = psi_config_data(i,2) endi = psi_config_data(i,2)
nconnectedI += 1 nconnectedI += 1
connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) do k=1,N_int
connectedI(k,1,nconnectedI) = psi_configuration(k,1,i)
connectedI(k,2,nconnectedI) = psi_configuration(k,2,i)
enddo
idxs_connectedI(nconnectedI)=starti idxs_connectedI(nconnectedI)=starti
excitationIds(1,nconnectedI)=p excitationIds(1,nconnectedI)=p
excitationIds(2,nconnectedI)=q excitationIds(2,nconnectedI)=q

View File

@ -176,7 +176,7 @@ end subroutine get_phase_qp_to_cfg
istate = 1 istate = 1
psi_csf_to_config_data(1) = 1 psi_csf_to_config_data(1) = 1
phasedet = 1.0d0 phasedet = 1.0d0
call omp_set_nested(.False.) call omp_set_max_active_levels(1)
!$OMP PARALLEL !$OMP PARALLEL
!$OMP MASTER !$OMP MASTER
do i = 1,N_configuration do i = 1,N_configuration
@ -232,8 +232,7 @@ end subroutine get_phase_qp_to_cfg
print *,"Norm det=",norm_det1, size(psi_coef_config,1), " Dim csf=", countcsf print *,"Norm det=",norm_det1, size(psi_coef_config,1), " Dim csf=", countcsf
!$OMP END MASTER !$OMP END MASTER
!$OMP END PARALLEL !$OMP END PARALLEL
call omp_set_max_active_levels(4)
call omp_set_nested(.True.)
END_PROVIDER END_PROVIDER
@ -886,6 +885,7 @@ end subroutine calculate_preconditioner_cfg
subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze, istart, iend, ishift, istep) subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze, istart, iend, ishift, istep)
implicit none implicit none
use bitmasks use bitmasks
use omp_lib
BEGIN_DOC BEGIN_DOC
! Documentation for sigma-vector calculation ! Documentation for sigma-vector calculation
! !
@ -946,19 +946,13 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
real*8 :: diag_energies(n_CSF) real*8 :: diag_energies(n_CSF)
real*8 :: tmpvar, tmptot real*8 :: tmpvar, tmptot
! allocate integer(omp_lock_kind), allocatable :: lock(:)
allocate(alphas_Icfg(N_INT,2,max(sze,100))) call omp_set_max_active_levels(1)
allocate(listconnectedJ(N_INT,2,max(sze,100)))
allocate(singlesI(N_INT,2,max(sze,100))) allocate(lock(sze))
allocate(connectedI_alpha(N_INT,2,max(sze,100))) do i=1,sze
allocate(idxs_singlesI(max(sze,100))) call omp_init_lock(lock(i))
allocate(idxs_connectedI_alpha(max(sze,100))) enddo
allocate(excitationIds_single(2,max(sze,100)))
allocate(excitationTypes_single(max(sze,100)))
allocate(excitationIds(2,max(sze,100)))
allocate(excitationTypes(max(sze,100)))
allocate(idslistconnectedJ(max(sze,100)))
allocate(diagfactors(max(sze,100)))
!print *," sze = ",sze !print *," sze = ",sze
call calculate_preconditioner_cfg(diag_energies) call calculate_preconditioner_cfg(diag_energies)
@ -972,7 +966,33 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
iend_cfg = psi_csf_to_config_data(iend) iend_cfg = psi_csf_to_config_data(iend)
call omp_set_max_active_levels(1)
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP private(i,icfg, isomo, idomo, NSOMOI, NSOMOJ, nholes, k, listholes,&
!$OMP holetype, vmotype, nvmos, listvmos, starti, endi, &
!$OMP nsinglesI, singlesI,idxs_singlesI,excitationIds_single,&
!$OMP excitationTypes_single, idxI, p, q, extype, pmodel, qmodel,&
!$OMP Jsomo, Jdomo, startj, endj, kk, jj, ii, cnti, cntj, meCC1,&
!$OMP Nalphas_Icfg,alphas_Icfg,connectedI_alpha, &
!$OMP idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors,&
!$OMP totcolsTKI,rowsTKI,NSOMOalpha,rowsikpq, &
!$OMP colsikpq, GIJpqrs,TKIGIJ,j,l,m,TKI,CCmattmp, moi, moj, mok, mol,&
!$OMP diagfac) &
!$OMP shared(istart_cfg, iend_cfg, psi_configuration, mo_num, psi_config_data,&
!$OMP N_int, N_st, psi_out, psi_in, h_core_ri, AIJpqContainer,&
!$OMP sze, NalphaIcfg_list,alphasIcfg_list, &
!$OMP AIJpqMatrixDimsList, diag_energies, n_CSF, lock)
allocate(singlesI(N_INT,2,max(sze,100)))
allocate(idxs_singlesI(max(sze,100)))
allocate(excitationIds_single(2,max(sze,100)))
allocate(excitationTypes_single(max(sze,100)))
!
!!! Single Excitations !!! !!! Single Excitations !!!
!$OMP DO SCHEDULE(dynamic,16)
do i=istart_cfg,iend_cfg do i=istart_cfg,iend_cfg
! if Seniority_range > 8 then ! if Seniority_range > 8 then
@ -1035,17 +1055,17 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
endi = psi_config_data(i,2) endi = psi_config_data(i,2)
NSOMOI = getNSOMO(Icfg) NSOMOI = getNSOMO(Icfg)
call generate_all_singles_cfg_with_type(Icfg,singlesI,idxs_singlesI,excitationIds_single, & call generate_all_singles_cfg_with_type(Icfg,singlesI,idxs_singlesI,excitationIds_single,&
excitationTypes_single,nsinglesI,N_int) excitationTypes_single,nsinglesI,N_int)
do j = 1,nsinglesI do j = 1,nsinglesI
idxI = idxs_singlesI(j) idxI = idxs_singlesI(j)
NSOMOJ = getNSOMO(singlesI(:,:,j)) NSOMOJ = getNSOMO(singlesI(1,1,j))
p = excitationIds_single(1,j) p = excitationIds_single(1,j)
q = excitationIds_single(2,j) q = excitationIds_single(2,j)
extype = excitationTypes_single(j) extype = excitationTypes_single(j)
! Off diagonal terms ! Off diagonal terms
call convertOrbIdsToModelSpaceIds(Icfg, singlesI(:,:,j), p, q, extype, pmodel, qmodel) call convertOrbIdsToModelSpaceIds(Icfg, singlesI(1,1,j), p, q, extype, pmodel, qmodel)
Jsomo = singlesI(1,1,j) Jsomo = singlesI(1,1,j)
Jdomo = singlesI(1,2,j) Jdomo = singlesI(1,2,j)
@ -1065,17 +1085,17 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
endj = psi_config_data(idxI,2) endj = psi_config_data(idxI,2)
!!! One-electron contribution !!! !!! One-electron contribution !!!
do kk = 1,n_st
cnti = 0
do ii = starti, endi do ii = starti, endi
cnti += 1 cnti = ii-starti+1
cntj = 0
do jj = startj, endj do jj = startj, endj
cntj += 1 cntj = jj-startj+1
!meCC1 = AIJpqContainer(NSOMOI,extype,pmodel,qmodel,cnti,cntj) !meCC1 = AIJpqContainer(NSOMOI,extype,pmodel,qmodel,cnti,cntj)
meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI) meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)
call omp_set_lock(lock(jj))
do kk = 1,n_st
psi_out(kk,jj) += meCC1 * psi_in(kk,ii) * h_core_ri(p,q) psi_out(kk,jj) += meCC1 * psi_in(kk,ii) * h_core_ri(p,q)
enddo enddo
call omp_unset_lock(lock(jj))
enddo enddo
enddo enddo
@ -1088,13 +1108,21 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
endif endif
enddo enddo
enddo enddo
!$OMP END DO
deallocate(singlesI)
deallocate(idxs_singlesI)
deallocate(excitationIds_single)
deallocate(excitationTypes_single)
!!! Double Excitations !!! allocate(alphas_Icfg(N_INT,2,max(sze,100)))
allocate(connectedI_alpha(N_INT,2,max(sze,100)))
allocate(idxs_connectedI_alpha(max(sze,100)))
allocate(excitationIds(2,max(sze,100)))
allocate(excitationTypes(max(sze,100)))
allocate(diagfactors(max(sze,100)))
call omp_set_max_active_levels(0)
!$OMP parallel
!$OMP master
! Loop over all selected configurations ! Loop over all selected configurations
!$OMP DO SCHEDULE(dynamic,16)
do i = istart_cfg,iend_cfg do i = istart_cfg,iend_cfg
! if Seniority_range > 8 then ! if Seniority_range > 8 then
@ -1376,124 +1404,6 @@ subroutine calculate_sigma_vector_cfg_nst(psi_out, psi_in, n_st, sze, istart, ie
do i=istart_cfg,iend_cfg do i=istart_cfg,iend_cfg
print *,"I=",i print *,"I=",i
! if Seniority_range > 8 then
! continue
! else
! cycle
Icfg(1,1) = psi_configuration(1,1,i)
Icfg(1,2) = psi_configuration(1,2,i)
Isomo = Icfg(1,1)
Idomo = Icfg(1,2)
NSOMOI = getNSOMO(Icfg)
! find out all pq holes possible
nholes = 0
! holes in SOMO
! list_act
! list_core
! list_core_inact
! bitmasks
!do k = n_core_orb+1,n_core_orb + n_act_orb
do k = 1,mo_num
if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then
nholes += 1
listholes(nholes) = k
holetype(nholes) = 1
endif
enddo
! holes in DOMO
!do k = n_core_orb+1,n_core_orb + n_act_orb
do k = 1,mo_num
if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then
nholes += 1
listholes(nholes) = k
holetype(nholes) = 2
endif
enddo
! find vmos
listvmos = -1
vmotype = -1
nvmos = 0
!do k = n_core_orb+1,n_core_orb + n_act_orb
do k = 1,mo_num
!print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1))))
if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then
nvmos += 1
listvmos(nvmos) = k
vmotype(nvmos) = 0
else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then
nvmos += 1
listvmos(nvmos) = k
vmotype(nvmos) = 1
end if
enddo
! Icsf ids
starti = psi_config_data(i,1)
endi = psi_config_data(i,2)
NSOMOI = getNSOMO(Icfg)
call generate_all_singles_cfg_with_type(Icfg,singlesI,idxs_singlesI,excitationIds_single, &
excitationTypes_single,nsinglesI,N_int)
do j = 1,nsinglesI
idxI = idxs_singlesI(j)
NSOMOJ = getNSOMO(singlesI(:,:,j))
p = excitationIds_single(1,j)
q = excitationIds_single(2,j)
extype = excitationTypes_single(j)
! Off diagonal terms
call convertOrbIdsToModelSpaceIds(Icfg, singlesI(:,:,j), p, q, extype, pmodel, qmodel)
Jsomo = singlesI(1,1,j)
Jdomo = singlesI(1,2,j)
! Add the hole on J
if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
nholes += 1
listholes(nholes) = q
holetype(nholes) = 1
endif
if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
nholes += 1
listholes(nholes) = q
holetype(nholes) = 2
endif
startj = psi_config_data(idxI,1)
endj = psi_config_data(idxI,2)
!!! One-electron contribution !!!
do kk = 1,n_st
cnti = 0
do ii = starti, endi
cnti += 1
cntj = 0
do jj = startj, endj
cntj += 1
meCC1 = AIJpqContainer(NSOMOI,extype,pmodel,qmodel,cnti,cntj)
psi_out(jj,kk) += meCC1 * psi_in(ii,kk) * h_core_ri(p,q)
enddo
enddo
enddo
! Undo setting in listholes
if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
nholes -= 1
endif
if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
nholes -= 1
endif
enddo
enddo
!!! Double Excitations !!!
! Loop over all selected configurations
do i = istart_cfg,iend_cfg
! if Seniority_range > 8 then ! if Seniority_range > 8 then
! continue ! continue
! else ! else
@ -1508,21 +1418,21 @@ subroutine calculate_sigma_vector_cfg_nst(psi_out, psi_in, n_st, sze, istart, ie
Nalphas_Icfg = 0 Nalphas_Icfg = 0
! TODO: ! TODO:
! test if size(alphas_Icfg,1) < Nmo**2) then deallocate + allocate ! test if size(alphas_Icfg,1) < Nmo**2) then deallocate + allocate
call obtain_associated_alphaI(i, Icfg, alphas_Icfg, Nalphas_Icfg) !call obtain_associated_alphaI(i, Icfg, alphas_Icfg, Nalphas_Icfg)
Nalphas_Icfg = NalphaIcfg_list(i)
alphas_Icfg(1:N_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg)
! TODO : remove doubly excited for return ! TODO : remove doubly excited for return
! Here we do 2x the loop. One to count for the size of the matrix, then we compute. ! Here we do 2x the loop. One to count for the size of the matrix, then we compute.
do k = 1,Nalphas_Icfg do k = 1,Nalphas_Icfg
! Now generate all singly excited with respect to a given alpha CFG ! Now generate all singly excited with respect to a given alpha CFG
call obtain_connected_I_foralpha(i,alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors) call obtain_connected_I_foralpha(i,alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors)
if(nconnectedI .EQ. 0) then
cycle
endif
totcolsTKI = 0 totcolsTKI = 0
rowsTKI = -1 rowsTKI = -1
do j = 1,nconnectedI do j = 1,nconnectedI
NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k)) NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k))
NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) NSOMOI = getNSOMO(connectedI_alpha(1,1,j))
p = excitationIds(1,j) p = excitationIds(1,j)
q = excitationIds(2,j) q = excitationIds(2,j)
extype = excitationTypes(j) extype = excitationTypes(j)
@ -1534,34 +1444,34 @@ subroutine calculate_sigma_vector_cfg_nst(psi_out, psi_in, n_st, sze, istart, ie
rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
totcolsTKI += colsikpq totcolsTKI += colsikpq
if(rowsTKI .LT. rowsikpq .AND. rowsTKI .NE. -1) then ! if(rowsTKI .LT. rowsikpq .AND. rowsTKI .NE. -1) then
print *,">",j,"Something is wrong in sigma-vector", rowsTKI, rowsikpq, "(p,q)=",pmodel,qmodel,"ex=",extype,"na=",NSOMOalpha," nI=",NSOMOI ! print *,">",j,"Something is wrong in sigma-vector", rowsTKI, rowsikpq, "(p,q)=",pmodel,qmodel,"ex=",extype,"na=",NSOMOalpha," nI=",NSOMOI
!rowsTKI = rowsikpq ! !rowsTKI = rowsikpq
else ! else
rowsTKI = rowsikpq rowsTKI = rowsikpq
endif ! endif
enddo enddo
allocate(TKI(rowsTKI,n_st,totcolsTKI)) ! coefficients of CSF allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF
! Initialize the inegral container ! Initialize the inegral container
! dims : (totcolsTKI, nconnectedI) ! dims : (totcolsTKI, nconnectedI)
allocate(GIJpqrs(totcolsTKI,nconnectedI)) ! gpqrs allocate(GIJpqrs(totcolsTKI,nconnectedI)) ! gpqrs
allocate(TKIGIJ(rowsTKI,n_st,nconnectedI)) ! gpqrs allocate(TKIGIJ(n_st,rowsTKI,nconnectedI)) ! TKI * gpqrs
totcolsTKI = 0 totcolsTKI = 0
do j = 1,nconnectedI do j = 1,nconnectedI
NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k)) NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k))
NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) NSOMOI = getNSOMO(connectedI_alpha(1,1,j))
p = excitationIds(1,j) p = excitationIds(1,j)
q = excitationIds(2,j) q = excitationIds(2,j)
extype = excitationTypes(j) extype = excitationTypes(j)
call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel) call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel)
rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
do kk = 1,n_st
do l = 1,rowsTKI
do m = 1,colsikpq do m = 1,colsikpq
TKI(l,kk,totcolsTKI+m) = AIJpqContainer(NSOMOalpha,extype,pmodel,qmodel,l,m) * psi_in(idxs_connectedI_alpha(j)+m-1,kk) do l = 1,rowsTKI
do kk = 1,n_st
TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * psi_in(kk,idxs_connectedI_alpha(j)+m-1)
enddo enddo
enddo enddo
enddo enddo
@ -1592,28 +1502,30 @@ subroutine calculate_sigma_vector_cfg_nst(psi_out, psi_in, n_st, sze, istart, ie
! Do big BLAS ! Do big BLAS
! TODO TKI, size(TKI,1)*size(TKI,2) ! TODO TKI, size(TKI,1)*size(TKI,2)
call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0, & call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0,&
TKI, size(TKI,1)*n_st, GIJpqrs, size(GIJpqrs,1), 0.d0, & TKI, size(TKI,1)*size(TKI,2), GIJpqrs, size(GIJpqrs,1), 0.d0,&
TKIGIJ , size(TKIGIJ,1)*n_st ) TKIGIJ , size(TKIGIJ,1)*size(TKIGIJ,2) )
! Collect the result ! Collect the result
totcolsTKI = 0 totcolsTKI = 0
do j = 1,nconnectedI do j = 1,nconnectedI
NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k)) NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k))
NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) NSOMOI = getNSOMO(connectedI_alpha(1,1,j))
p = excitationIds(1,j) p = excitationIds(1,j)
q = excitationIds(2,j) q = excitationIds(2,j)
extype = excitationTypes(j) extype = excitationTypes(j)
call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel) call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel)
rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
!print *,">j=",j,rowsikpq,colsikpq, ">>",totcolsTKI,",",idxs_connectedI_alpha(j)
do kk = 1,n_st
do m = 1,colsikpq do m = 1,colsikpq
do l = 1,rowsTKI do l = 1,rowsTKI
psi_out(idxs_connectedI_alpha(j)+m-1,kk) += AIJpqContainer(NSOMOalpha,extype,pmodel,qmodel,l,m) * TKIGIJ(l,kk,j) call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1))
do kk = 1,n_st
psi_out(kk,idxs_connectedI_alpha(j)+m-1) = psi_out(kk,idxs_connectedI_alpha(j)+m-1) + &
AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * TKIGIJ(kk,l,j)
enddo enddo
call omp_unset_lock(lock(idxs_connectedI_alpha(j)+m-1))
enddo enddo
enddo enddo
totcolsTKI += colsikpq totcolsTKI += colsikpq
@ -1627,14 +1539,25 @@ subroutine calculate_sigma_vector_cfg_nst(psi_out, psi_in, n_st, sze, istart, ie
enddo ! loop over alphas enddo ! loop over alphas
enddo ! loop over I enddo ! loop over I
!$OMP end do
deallocate(connectedI_alpha)
deallocate(idxs_connectedI_alpha)
deallocate(excitationIds)
deallocate(excitationTypes)
deallocate(diagfactors)
! Add the diagonal contribution ! Add the diagonal contribution
do kk=1,n_st !$OMP DO
do i = 1,n_CSF do i = 1,n_CSF
psi_out(i,kk) += 1.0d0*diag_energies(i)*psi_in(i,kk) do kk=1,n_st
psi_out(kk,i) += diag_energies(i)*psi_in(kk,i)
enddo enddo
enddo enddo
!$OMP END DO
!$OMP end parallel
call omp_set_max_active_levels(4)
end subroutine calculate_sigma_vector_cfg_nst_naive_store
end subroutine calculate_sigma_vector_cfg_nst

View File

@ -22,8 +22,10 @@ struct bin_tree {
int NBF; int NBF;
}; };
#include "/opt/intel/oneapi/mkl/2021.1.1/include/mkl_cblas.h" //#include "/opt/intel/oneapi/mkl/2021.1.1/include/mkl_cblas.h"
//#include "cblas.h" //#include "cblas.h"
#include "mkl_cblas.h"
///opt/intel/compilers_and_libraries_2020.1.217/linux/mkl/include/mkl_cblas.h
#define MAX_SOMO 32 #define MAX_SOMO 32