From c604b34a0b8483f13d6745210aeedfe508e893db Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 26 Feb 2021 18:40:29 +0100 Subject: [PATCH 1/8] Renormalized selection for CSF --- src/cipsi/selection.irp.f | 53 +++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 5afb514e..799fce95 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -668,10 +668,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d w = 0d0 -! integer(bit_kind) :: occ(N_int,2), n -! call configuration_of_det(det,occ,N_int) -! call configuration_to_dets_size(occ,n,elec_alpha_num,N_int) - e_pert = 0.d0 coef = 0.d0 logical :: do_diag @@ -699,7 +695,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d double precision :: eigvalues(N_states+1) double precision :: work(1+6*(N_states+1)+2*(N_states+1)**2) - integer :: iwork(3+5*(N_states+1)), info, k ,n + integer :: iwork(3+5*(N_states+1)), info, k if (do_diag) then double precision :: pt2_matrix(N_states+1,N_states+1) @@ -765,36 +761,43 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d case(5) ! Variance selection -! w = w - alpha_h_psi * alpha_h_psi * s_weight(istate,istate) - w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) -! do jstate=1,N_states -! if (istate == jstate) cycle -! w = w + dabs(alpha_h_psi*mat(jstate,p1,p2)) * s_weight(istate,jstate) -! enddo + if (h0_type == 'CFG') then + w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) & + / c0_weight(istate) + else + w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) + endif case(6) -! w = w - coef(istate) * coef(istate) * s_weight(istate,istate) - w = min(w,- coef(istate) * coef(istate) * s_weight(istate,istate)) -! do jstate=1,N_states -! if (istate == jstate) cycle -! w = w + dabs(coef(istate)*coef(jstate)) * s_weight(istate,jstate) -! enddo + if (h0_type == 'CFG') then + w = min(w,- coef(istate) * coef(istate) * s_weight(istate,istate)) & + / c0_weight(istate) + else + w = min(w,- coef(istate) * coef(istate) * s_weight(istate,istate)) + endif case default ! Energy selection -! w = w + e_pert(istate) * s_weight(istate,istate) - w = min(w, e_pert(istate) * s_weight(istate,istate)) -! do jstate=1,N_states -! if (istate == jstate) cycle -! w = w + dabs(X(istate)*X(jstate)) * s_weight(istate,jstate) -! enddo + if (h0_type == 'CFG') then + w = min(w, e_pert(istate) * s_weight(istate,istate)) / c0_weight(istate) + else + w = min(w, e_pert(istate) * s_weight(istate,istate)) + endif end select end do -! w = dble(n) * w - + integer(bit_kind) :: occ(N_int,2), n + if (h0_type == 'CFG') then + do k=1,N_int + occ(k,1) = ieor(det(k,1),det(k,2)) + occ(k,2) = iand(det(k,1),det(k,2)) + enddo + call configuration_to_dets_size(occ,n,elec_alpha_num,N_int) + n = max(n,1) + w *= dsqrt(dble(n)) + endif if(w <= buf%mini) then call add_to_selection_buffer(buf, det, w) From 9f26e533518530e9d7008f15913b60b8a41acd2a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 Mar 2021 11:41:58 +0100 Subject: [PATCH 2/8] Fixes #148 --- src/cipsi/pt2_stoch_routines.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f index 489ffaaf..a0107513 100644 --- a/src/cipsi/pt2_stoch_routines.irp.f +++ b/src/cipsi/pt2_stoch_routines.irp.f @@ -187,7 +187,7 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in) if (zmq_put_ivector(zmq_to_qp_run_socket,1,'pt2_stoch_istate',pt2_stoch_istate,1) == -1) then stop 'Unable to put pt2_stoch_istate on ZMQ server' endif - if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',threshold_generators,1) == -1) then + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) then stop 'Unable to put threshold_generators on ZMQ server' endif From 2a264766134460ca3849f91470f64ecdd9699c45 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 Mar 2021 11:42:15 +0100 Subject: [PATCH 3/8] Change in weights --- src/cipsi/selection.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 799fce95..0b7e99c1 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -796,7 +796,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d enddo call configuration_to_dets_size(occ,n,elec_alpha_num,N_int) n = max(n,1) - w *= dsqrt(dble(n)) + w *= dble(n) endif if(w <= buf%mini) then From 0078c677d1f8f6e7a0ff08c2adb7ef5de7380fa7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 15 Mar 2021 15:17:05 +0100 Subject: [PATCH 4/8] Parallelized K loop --- src/csf/obtain_I_foralpha.irp.f | 5 +++- src/csf/sigma_vector.irp.f | 47 ++++++++++++++++++++++++--------- src/csf/tree_utils.h | 4 ++- 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f index 680dc37f..dbd44f9b 100644 --- a/src/csf/obtain_I_foralpha.irp.f +++ b/src/csf/obtain_I_foralpha.irp.f @@ -132,7 +132,10 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI starti = psi_config_data(i,1) endi = psi_config_data(i,2) nconnectedI += 1 - connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + do k=1,N_int + connectedI(k,1,nconnectedI) = psi_configuration(k,1,i) + connectedI(k,2,nconnectedI) = psi_configuration(k,2,i) + enddo idxs_connectedI(nconnectedI)=starti excitationIds(1,nconnectedI)=p excitationIds(2,nconnectedI)=q diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index b3d43d08..673cd4e8 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -98,12 +98,12 @@ subroutine get_phase_qp_to_cfg(Ialpha, Ibeta, phaseout) do while(detb(k) /= 0_bit_kind) ! Find the lowest beta electron and clear it - ipos = trailz(detb(k)) - detb(k) = ibclr(detb(k),ipos) + ipos = trailz(detb(k)) + detb(k) = ibclr(detb(k),ipos) ! Create a mask will all MOs higher than the beta electron mask = not(shiftl(1_bit_kind,ipos + 1) - 1_bit_kind) - + ! Apply the mask to the alpha string to count how many electrons to cross nperm = popcnt( iand(mask, deta(k)) ) @@ -886,6 +886,7 @@ end subroutine calculate_preconditioner_cfg subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze, istart, iend, ishift, istep) implicit none use bitmasks + use omp_lib BEGIN_DOC ! Documentation for sigma-vector calculation ! @@ -943,17 +944,18 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze real*8 :: diag_energies(n_CSF) real*8 :: tmpvar, tmptot + integer(omp_lock_kind), allocatable :: lock(:) + ! allocate allocate(alphas_Icfg(N_INT,2,max(sze,100))) allocate(singlesI(N_INT,2,max(sze,100))) - allocate(connectedI_alpha(N_INT,2,max(sze,100))) allocate(idxs_singlesI(max(sze,100))) - allocate(idxs_connectedI_alpha(max(sze,100))) allocate(excitationIds_single(2,max(sze,100))) allocate(excitationTypes_single(max(sze,100))) - allocate(excitationIds(2,max(sze,100))) - allocate(excitationTypes(max(sze,100))) - allocate(diagfactors(max(sze,100))) + allocate(lock(sze)) + do i=1,sze + call omp_init_lock(lock(i)) + enddo !print *," sze = ",sze call calculate_preconditioner_cfg(diag_energies) @@ -1086,10 +1088,23 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze !!! Double Excitations !!! - call omp_set_max_active_levels(0) - !$OMP parallel - !$OMP master + call omp_set_max_active_levels(1) + !$OMP parallel default(none) & + !$OMP private(i,Icfg,starti,endi,Nalphas_Icfg,alphas_Icfg,k,connectedI_alpha, & + !$OMP idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors, & + !$OMP totcolsTKI,rowsTKI,NSOMOalpha,NSOMOI,p,q,extype,pmodel,qmodel,rowsikpq, & + !$OMP colsikpq, GIJpqrs,TKIGIJ,j,kk,l,m,TKI,CCmattmp, moi, moj, mok, mol, & + !$OMP diagfac) & + !$OMP shared(psi_configuration,NalphaIcfg_list,alphasIcfg_list,N_int,N_st, & + !$OMP AIJpqMatrixDimsList, AIJpqContainer, sze, istart_cfg, iend_cfg, & + !$OMP psi_config_data, psi_in, psi_out, lock) + allocate(connectedI_alpha(N_INT,2,max(sze,100))) + allocate(idxs_connectedI_alpha(max(sze,100))) + allocate(excitationIds(2,max(sze,100))) + allocate(excitationTypes(max(sze,100))) + allocate(diagfactors(max(sze,100))) ! Loop over all selected configurations + !$OMP DO do i = istart_cfg,iend_cfg ! if Seniority_range > 8 then @@ -1240,11 +1255,12 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze do m = 1,colsikpq !tmpvar = psi_out(kk,idxs_connectedI_alpha(j)+m-1) do l = 1,rowsTKI + call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1)) do kk = 1,n_st !tmpvar += AIJpqContainer(NSOMOalpha,extype,pmodel,qmodel,l,m) * TKIGIJ(kk,l,j) psi_out(kk,idxs_connectedI_alpha(j)+m-1) += AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * TKIGIJ(kk,l,j) enddo - !psi_out(kk,idxs_connectedI_alpha(j)+m-1) = tmpvar + call omp_unset_lock(lock(idxs_connectedI_alpha(j)+m-1)) enddo enddo totcolsTKI += colsikpq @@ -1258,7 +1274,12 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze enddo ! loop over alphas enddo ! loop over I - !$OMP end master + !$OMP end do + deallocate(connectedI_alpha) + deallocate(idxs_connectedI_alpha) + deallocate(excitationIds) + deallocate(excitationTypes) + deallocate(diagfactors) !$OMP end parallel call omp_set_max_active_levels(4) diff --git a/src/csf/tree_utils.h b/src/csf/tree_utils.h index 332ddf69..b3c124a0 100644 --- a/src/csf/tree_utils.h +++ b/src/csf/tree_utils.h @@ -22,8 +22,10 @@ struct bin_tree { int NBF; }; -#include "/opt/intel/oneapi/mkl/2021.1.1/include/mkl_cblas.h" +//#include "/opt/intel/oneapi/mkl/2021.1.1/include/mkl_cblas.h" //#include "cblas.h" +#include "mkl_cblas.h" +///opt/intel/compilers_and_libraries_2020.1.217/linux/mkl/include/mkl_cblas.h #define MAX_SOMO 32 From 52922eca32d9884170894a0f3330e58b35c6c74b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 15 Mar 2021 15:49:17 +0100 Subject: [PATCH 5/8] OpenMP --- src/csf/sigma_vector.irp.f | 65 +++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 673cd4e8..97d5b766 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -176,7 +176,7 @@ end subroutine get_phase_qp_to_cfg istate = 1 psi_csf_to_config_data(1) = 1 phasedet = 1.0d0 - call omp_set_nested(.False.) + call omp_set_max_active_levels(1) !$OMP PARALLEL !$OMP MASTER do i = 1,N_configuration @@ -945,13 +945,8 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze real*8 :: tmpvar, tmptot integer(omp_lock_kind), allocatable :: lock(:) + call omp_set_max_active_levels(1) - ! allocate - allocate(alphas_Icfg(N_INT,2,max(sze,100))) - allocate(singlesI(N_INT,2,max(sze,100))) - allocate(idxs_singlesI(max(sze,100))) - allocate(excitationIds_single(2,max(sze,100))) - allocate(excitationTypes_single(max(sze,100))) allocate(lock(sze)) do i=1,sze call omp_init_lock(lock(i)) @@ -969,7 +964,32 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze iend_cfg = psi_csf_to_config_data(iend) + call omp_set_max_active_levels(1) + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP private(i,icfg, isomo, idomo, NSOMOI, NSOMOJ, nholes, k, listholes,& + !$OMP holetype, vmotype, nvmos, listvmos, starti, endi, & + !$OMP nsinglesI, singlesI,idxs_singlesI,excitationIds_single,& + !$OMP excitationTypes_single, idxI, p, q, extype, pmodel, qmodel,& + !$OMP Jsomo, Jdomo, startj, endj, kk, jj, ii, cnti, cntj, meCC1,& + !$OMP Nalphas_Icfg,alphas_Icfg,connectedI_alpha, & + !$OMP idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors,& + !$OMP totcolsTKI,rowsTKI,NSOMOalpha,rowsikpq, & + !$OMP colsikpq, GIJpqrs,TKIGIJ,j,l,m,TKI,CCmattmp, moi, moj, mok, mol,& + !$OMP diagfac) & + !$OMP shared(istart_cfg, iend_cfg, psi_configuration, mo_num, psi_config_data,& + !$OMP N_int, N_st, psi_out, psi_in, h_core_ri, AIJpqContainer,& + !$OMP sze, NalphaIcfg_list,alphasIcfg_list, & + !$OMP AIJpqMatrixDimsList, diag_energies, n_CSF, lock) + + allocate(singlesI(N_INT,2,max(sze,100))) + allocate(idxs_singlesI(max(sze,100))) + allocate(excitationIds_single(2,max(sze,100))) + allocate(excitationTypes_single(max(sze,100))) + !!! Single Excitations !!! + + !$OMP DO SCHEDULE(dynamic,128) do i=istart_cfg,iend_cfg ! if Seniority_range > 8 then @@ -1037,12 +1057,12 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze do j = 1,nsinglesI idxI = idxs_singlesI(j) - NSOMOJ = getNSOMO(singlesI(:,:,j)) + NSOMOJ = getNSOMO(singlesI(1,1,j)) p = excitationIds_single(1,j) q = excitationIds_single(2,j) extype = excitationTypes_single(j) ! Off diagonal terms - call convertOrbIdsToModelSpaceIds(Icfg, singlesI(:,:,j), p, q, extype, pmodel, qmodel) + call convertOrbIdsToModelSpaceIds(Icfg, singlesI(1,1,j), p, q, extype, pmodel, qmodel) Jsomo = singlesI(1,1,j) Jdomo = singlesI(1,2,j) @@ -1085,26 +1105,17 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze endif enddo enddo + !$OMP END DO - !!! Double Excitations !!! - call omp_set_max_active_levels(1) - !$OMP parallel default(none) & - !$OMP private(i,Icfg,starti,endi,Nalphas_Icfg,alphas_Icfg,k,connectedI_alpha, & - !$OMP idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors, & - !$OMP totcolsTKI,rowsTKI,NSOMOalpha,NSOMOI,p,q,extype,pmodel,qmodel,rowsikpq, & - !$OMP colsikpq, GIJpqrs,TKIGIJ,j,kk,l,m,TKI,CCmattmp, moi, moj, mok, mol, & - !$OMP diagfac) & - !$OMP shared(psi_configuration,NalphaIcfg_list,alphasIcfg_list,N_int,N_st, & - !$OMP AIJpqMatrixDimsList, AIJpqContainer, sze, istart_cfg, iend_cfg, & - !$OMP psi_config_data, psi_in, psi_out, lock) + allocate(alphas_Icfg(N_INT,2,max(sze,100))) allocate(connectedI_alpha(N_INT,2,max(sze,100))) allocate(idxs_connectedI_alpha(max(sze,100))) allocate(excitationIds(2,max(sze,100))) allocate(excitationTypes(max(sze,100))) allocate(diagfactors(max(sze,100))) ! Loop over all selected configurations - !$OMP DO + !$OMP DO SCHEDULE(dynamic,128) do i = istart_cfg,iend_cfg ! if Seniority_range > 8 then @@ -1280,17 +1291,19 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze deallocate(excitationIds) deallocate(excitationTypes) deallocate(diagfactors) - !$OMP end parallel - call omp_set_max_active_levels(4) ! Add the diagonal contribution - do kk=1,n_st + !$OMP DO do i = 1,n_CSF - psi_out(kk,i) += diag_energies(i)*psi_in(kk,i) - enddo + do kk=1,n_st + psi_out(kk,i) += diag_energies(i)*psi_in(kk,i) + enddo enddo + !$OMP END DO + !$OMP end parallel + call omp_set_max_active_levels(4) end subroutine calculate_sigma_vector_cfg_nst_naive_store From dad0ebc0890998548ab3ae9188611c4d6d98fb18 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 15 Mar 2021 16:03:35 +0100 Subject: [PATCH 6/8] OpenMP --- src/csf/obtain_I_foralpha.irp.f | 5 ----- src/csf/sigma_vector.irp.f | 26 +++++++++++++------------- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f index dbd44f9b..9a67b6ff 100644 --- a/src/csf/obtain_I_foralpha.irp.f +++ b/src/csf/obtain_I_foralpha.irp.f @@ -61,11 +61,6 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI Idomo = Ialpha(1,2) Jsomo = psi_configuration(1,1,i) Jdomo = psi_configuration(1,2,i) - !call debug_spindet(Isomo,1) - !call debug_spindet(Idomo,1) - !print *,"-J--i=",i,Idomo,Jdomo,">",N_configuration - !call debug_spindet(Jsomo,1) - !call debug_spindet(Jdomo,1) diffSOMO = IEOR(Isomo,Jsomo) ndiffSOMO = POPCNT(diffSOMO) !if(ndiffSOMO .NE. 2 .AND. ndiffSOMO .NE. 0) then diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 97d5b766..95b4dff6 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -986,10 +986,17 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze allocate(idxs_singlesI(max(sze,100))) allocate(excitationIds_single(2,max(sze,100))) allocate(excitationTypes_single(max(sze,100))) +! + allocate(alphas_Icfg(N_INT,2,max(sze,100))) + allocate(connectedI_alpha(N_INT,2,max(sze,100))) + allocate(idxs_connectedI_alpha(max(sze,100))) + allocate(excitationIds(2,max(sze,100))) + allocate(excitationTypes(max(sze,100))) + allocate(diagfactors(max(sze,100))) !!! Single Excitations !!! - !$OMP DO SCHEDULE(dynamic,128) + !$OMP DO SCHEDULE(dynamic,16) do i=istart_cfg,iend_cfg ! if Seniority_range > 8 then @@ -1104,19 +1111,12 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze nholes -= 1 endif enddo - enddo - !$OMP END DO +! enddo +! !$OMP END DO - - allocate(alphas_Icfg(N_INT,2,max(sze,100))) - allocate(connectedI_alpha(N_INT,2,max(sze,100))) - allocate(idxs_connectedI_alpha(max(sze,100))) - allocate(excitationIds(2,max(sze,100))) - allocate(excitationTypes(max(sze,100))) - allocate(diagfactors(max(sze,100))) - ! Loop over all selected configurations - !$OMP DO SCHEDULE(dynamic,128) - do i = istart_cfg,iend_cfg +! Loop over all selected configurations +! !$OMP DO SCHEDULE(dynamic,128) +! do i = istart_cfg,iend_cfg ! if Seniority_range > 8 then ! continue From 6635b02e1da13b527c4f4c8a4c1f2bb789e9ca78 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 16 Mar 2021 01:13:44 +0100 Subject: [PATCH 7/8] Fixed OpenMP --- src/cipsi/pt2_stoch_routines.irp.f | 3 ++- src/csf/sigma_vector.irp.f | 27 +++++++++++++++------------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f index a0107513..d0d7e511 100644 --- a/src/cipsi/pt2_stoch_routines.irp.f +++ b/src/cipsi/pt2_stoch_routines.irp.f @@ -287,7 +287,8 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in) call write_int(6,nproc_target,'Number of threads for PT2') call write_double(6,mem,'Memory (Gb)') - call omp_set_nested(.false.) + call omp_set_max_active_levels(1) + print '(A)', '========== ======================= ===================== ===================== ===========' diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 95b4dff6..d6404d30 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -232,8 +232,7 @@ end subroutine get_phase_qp_to_cfg print *,"Norm det=",norm_det1, size(psi_coef_config,1), " Dim csf=", countcsf !$OMP END MASTER !$OMP END PARALLEL - - call omp_set_nested(.True.) + call omp_set_max_active_levels(4) END_PROVIDER @@ -987,12 +986,6 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze allocate(excitationIds_single(2,max(sze,100))) allocate(excitationTypes_single(max(sze,100))) ! - allocate(alphas_Icfg(N_INT,2,max(sze,100))) - allocate(connectedI_alpha(N_INT,2,max(sze,100))) - allocate(idxs_connectedI_alpha(max(sze,100))) - allocate(excitationIds(2,max(sze,100))) - allocate(excitationTypes(max(sze,100))) - allocate(diagfactors(max(sze,100))) !!! Single Excitations !!! @@ -1111,12 +1104,22 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze nholes -= 1 endif enddo -! enddo -! !$OMP END DO + enddo + !$OMP END DO + deallocate(singlesI) + deallocate(idxs_singlesI) + deallocate(excitationIds_single) + deallocate(excitationTypes_single) + allocate(alphas_Icfg(N_INT,2,max(sze,100))) + allocate(connectedI_alpha(N_INT,2,max(sze,100))) + allocate(idxs_connectedI_alpha(max(sze,100))) + allocate(excitationIds(2,max(sze,100))) + allocate(excitationTypes(max(sze,100))) + allocate(diagfactors(max(sze,100))) ! Loop over all selected configurations -! !$OMP DO SCHEDULE(dynamic,128) -! do i = istart_cfg,iend_cfg + !$OMP DO SCHEDULE(dynamic,16) + do i = istart_cfg,iend_cfg ! if Seniority_range > 8 then ! continue From acef0b913b29b665b6e2fa9f07ab991a7253cb2c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 16 Mar 2021 01:13:44 +0100 Subject: [PATCH 8/8] Fixed OpenMP --- src/cipsi/pt2_stoch_routines.irp.f | 3 +- src/csf/obtain_I_foralpha.irp.f | 10 +- src/csf/sigma_vector.irp.f | 797 ++++++++--------------------- 3 files changed, 215 insertions(+), 595 deletions(-) diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f index a0107513..d0d7e511 100644 --- a/src/cipsi/pt2_stoch_routines.irp.f +++ b/src/cipsi/pt2_stoch_routines.irp.f @@ -287,7 +287,8 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in) call write_int(6,nproc_target,'Number of threads for PT2') call write_double(6,mem,'Memory (Gb)') - call omp_set_nested(.false.) + call omp_set_max_active_levels(1) + print '(A)', '========== ======================= ===================== ===================== ===========' diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f index 9a67b6ff..23bcd7fc 100644 --- a/src/csf/obtain_I_foralpha.irp.f +++ b/src/csf/obtain_I_foralpha.irp.f @@ -56,6 +56,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI p = 0 q = 0 + if (N_int > 1) stop 'obtain_connected_i_foralpha : N_int > 1' do i=idxI,end_index Isomo = Ialpha(1,1) Idomo = Ialpha(1,2) @@ -71,16 +72,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI ndiffDOMO = POPCNT(diffDOMO) nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO - !print *,"-I--i=",i,ndiffSOMO,ndiffDOMO,nxordiffSOMODOMO!Isomo,Jsomo,ndiffSOMO,ndiffDOMO - !if((ndiffSOMO + ndiffDOMO) .EQ. 0) cycle - !print *,POPCNT(IEOR(diffSOMO,diffDOMO)), ndiffDOMO - !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then - !call debug_spindet(Isomo,1) - !call debug_spindet(Idomo,1) - !print *,"-J--i=",i,Idomo,Jdomo,">",N_configuration - !call debug_spindet(Jsomo,1) - !call debug_spindet(Jdomo,1) select case(ndiffDOMO) case (0) ! SOMO -> VMO diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 95b4dff6..8fb29e4d 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -232,8 +232,7 @@ end subroutine get_phase_qp_to_cfg print *,"Norm det=",norm_det1, size(psi_coef_config,1), " Dim csf=", countcsf !$OMP END MASTER !$OMP END PARALLEL - - call omp_set_nested(.True.) + call omp_set_max_active_levels(4) END_PROVIDER @@ -987,12 +986,6 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze allocate(excitationIds_single(2,max(sze,100))) allocate(excitationTypes_single(max(sze,100))) ! - allocate(alphas_Icfg(N_INT,2,max(sze,100))) - allocate(connectedI_alpha(N_INT,2,max(sze,100))) - allocate(idxs_connectedI_alpha(max(sze,100))) - allocate(excitationIds(2,max(sze,100))) - allocate(excitationTypes(max(sze,100))) - allocate(diagfactors(max(sze,100))) !!! Single Excitations !!! @@ -1019,20 +1012,20 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze ! bitmasks !do k = n_core_orb+1,n_core_orb + n_act_orb do k = 1,mo_num - if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = k - holetype(nholes) = 1 - endif + if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = k + holetype(nholes) = 1 + endif enddo ! holes in DOMO !do k = n_core_orb+1,n_core_orb + n_act_orb do k = 1,mo_num - if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = k - holetype(nholes) = 2 - endif + if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = k + holetype(nholes) = 2 + endif enddo ! find vmos @@ -1041,249 +1034,228 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze nvmos = 0 !do k = n_core_orb+1,n_core_orb + n_act_orb do k = 1,mo_num - !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) - if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then - nvmos += 1 - listvmos(nvmos) = k - vmotype(nvmos) = 0 - else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then - nvmos += 1 - listvmos(nvmos) = k - vmotype(nvmos) = 1 - end if + !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) + if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then + nvmos += 1 + listvmos(nvmos) = k + vmotype(nvmos) = 0 + else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then + nvmos += 1 + listvmos(nvmos) = k + vmotype(nvmos) = 1 + end if enddo - - + + ! Icsf ids starti = psi_config_data(i,1) endi = psi_config_data(i,2) NSOMOI = getNSOMO(Icfg) - - call generate_all_singles_cfg_with_type(Icfg,singlesI,idxs_singlesI,excitationIds_single, & - excitationTypes_single,nsinglesI,N_int) - + + call generate_all_singles_cfg_with_type(Icfg,singlesI,idxs_singlesI,excitationIds_single,& + excitationTypes_single,nsinglesI,N_int) + do j = 1,nsinglesI - idxI = idxs_singlesI(j) - NSOMOJ = getNSOMO(singlesI(1,1,j)) - p = excitationIds_single(1,j) - q = excitationIds_single(2,j) - extype = excitationTypes_single(j) - ! Off diagonal terms - call convertOrbIdsToModelSpaceIds(Icfg, singlesI(1,1,j), p, q, extype, pmodel, qmodel) - Jsomo = singlesI(1,1,j) - Jdomo = singlesI(1,2,j) + idxI = idxs_singlesI(j) + NSOMOJ = getNSOMO(singlesI(1,1,j)) + p = excitationIds_single(1,j) + q = excitationIds_single(2,j) + extype = excitationTypes_single(j) + ! Off diagonal terms + call convertOrbIdsToModelSpaceIds(Icfg, singlesI(1,1,j), p, q, extype, pmodel, qmodel) + Jsomo = singlesI(1,1,j) + Jdomo = singlesI(1,2,j) - ! Add the hole on J - if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then - nholes += 1 - listholes(nholes) = q - holetype(nholes) = 1 - endif - if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then - nholes += 1 - listholes(nholes) = q - holetype(nholes) = 2 - endif + ! Add the hole on J + if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes += 1 + listholes(nholes) = q + holetype(nholes) = 1 + endif + if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes += 1 + listholes(nholes) = q + holetype(nholes) = 2 + endif - startj = psi_config_data(idxI,1) - endj = psi_config_data(idxI,2) + startj = psi_config_data(idxI,1) + endj = psi_config_data(idxI,2) - !!! One-electron contribution !!! - do kk = 1,n_st - cnti = 0 - do ii = starti, endi - cnti += 1 - cntj = 0 - do jj = startj, endj - cntj += 1 - !meCC1 = AIJpqContainer(NSOMOI,extype,pmodel,qmodel,cnti,cntj) - meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI) - psi_out(kk,jj) += meCC1 * psi_in(kk,ii) * h_core_ri(p,q) - enddo - enddo + !!! One-electron contribution !!! + do ii = starti, endi + cnti = ii-starti+1 + do jj = startj, endj + cntj = jj-startj+1 + !meCC1 = AIJpqContainer(NSOMOI,extype,pmodel,qmodel,cnti,cntj) + meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI) + call omp_set_lock(lock(jj)) + do kk = 1,n_st + psi_out(kk,jj) += meCC1 * psi_in(kk,ii) * h_core_ri(p,q) + enddo + call omp_unset_lock(lock(jj)) enddo + enddo - ! Undo setting in listholes - if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then - nholes -= 1 + ! Undo setting in listholes + if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes -= 1 + endif + if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes -= 1 + endif + enddo + enddo + !$OMP END DO + deallocate(singlesI) + deallocate(idxs_singlesI) + deallocate(excitationIds_single) + deallocate(excitationTypes_single) + + allocate(alphas_Icfg(N_INT,2,max(sze,100))) + allocate(connectedI_alpha(N_INT,2,max(sze,100))) + allocate(idxs_connectedI_alpha(max(sze,100))) + allocate(excitationIds(2,max(sze,100))) + allocate(excitationTypes(max(sze,100))) + allocate(diagfactors(max(sze,100))) + + ! Loop over all selected configurations + !$OMP DO SCHEDULE(dynamic,16) + do i = istart_cfg,iend_cfg + + ! if Seniority_range > 8 then + ! continue + ! else + ! cycle + + Icfg(1,1) = psi_configuration(1,1,i) + Icfg(1,2) = psi_configuration(1,2,i) + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + + ! Returns all unique (checking the past) singly excited cfgs connected to I + Nalphas_Icfg = 0 + ! TODO: + ! test if size(alphas_Icfg,1) < Nmo**2) then deallocate + allocate + !call obtain_associated_alphaI(i, Icfg, alphas_Icfg, Nalphas_Icfg) + Nalphas_Icfg = NalphaIcfg_list(i) + alphas_Icfg(1:N_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg) + + ! TODO : remove doubly excited for return + ! Here we do 2x the loop. One to count for the size of the matrix, then we compute. + do k = 1,Nalphas_Icfg + ! Now generate all singly excited with respect to a given alpha CFG + call obtain_connected_I_foralpha(i,alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors) + + totcolsTKI = 0 + rowsTKI = -1 + do j = 1,nconnectedI + NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k)) + NSOMOI = getNSOMO(connectedI_alpha(1,1,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) + ! for E_pp E_rs and E_ppE_rr case + if(p.EQ.q) then + NSOMOalpha = NSOMOI endif - if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then - nholes -= 1 - endif - enddo -! enddo -! !$OMP END DO + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + totcolsTKI += colsikpq +! if(rowsTKI .LT. rowsikpq .AND. rowsTKI .NE. -1) then +! print *,">",j,"Something is wrong in sigma-vector", rowsTKI, rowsikpq, "(p,q)=",pmodel,qmodel,"ex=",extype,"na=",NSOMOalpha," nI=",NSOMOI +! !rowsTKI = rowsikpq +! else + rowsTKI = rowsikpq +! endif + enddo -! Loop over all selected configurations -! !$OMP DO SCHEDULE(dynamic,128) -! do i = istart_cfg,iend_cfg + allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF + ! Initialize the inegral container + ! dims : (totcolsTKI, nconnectedI) + allocate(GIJpqrs(totcolsTKI,nconnectedI)) ! gpqrs + allocate(TKIGIJ(n_st,rowsTKI,nconnectedI)) ! TKI * gpqrs - ! if Seniority_range > 8 then - ! continue - ! else - ! cycle - - Icfg(1,1) = psi_configuration(1,1,i) - Icfg(1,2) = psi_configuration(1,2,i) - starti = psi_config_data(i,1) - endi = psi_config_data(i,2) - - ! Returns all unique (checking the past) singly excited cfgs connected to I - Nalphas_Icfg = 0 - ! TODO: - ! test if size(alphas_Icfg,1) < Nmo**2) then deallocate + allocate - !call obtain_associated_alphaI(i, Icfg, alphas_Icfg, Nalphas_Icfg) - Nalphas_Icfg = NalphaIcfg_list(i) - alphas_Icfg(1:n_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg) - - ! TODO : remove doubly excited for return - ! Here we do 2x the loop. One to count for the size of the matrix, then we compute. - do k = 1,Nalphas_Icfg - ! Now generate all singly excited with respect to a given alpha CFG - call obtain_connected_I_foralpha(i,alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors) - - if(nconnectedI .EQ. 0) then - cycle - endif - totcolsTKI = 0 - rowsTKI = -1 - do j = 1,nconnectedI - NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k)) - NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) - p = excitationIds(1,j) - q = excitationIds(2,j) - extype = excitationTypes(j) - call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) - ! for E_pp E_rs and E_ppE_rr case - if(p.EQ.q) then - NSOMOalpha = NSOMOI - endif - rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) - colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) - totcolsTKI += colsikpq - if(rowsTKI .LT. rowsikpq .AND. rowsTKI .NE. -1) then - print *,">",j,"Something is wrong in sigma-vector", rowsTKI, rowsikpq, "(p,q)=",pmodel,qmodel,"ex=",extype,"na=",NSOMOalpha," nI=",NSOMOI - !rowsTKI = rowsikpq - else - rowsTKI = rowsikpq - endif + totcolsTKI = 0 + do j = 1,nconnectedI + NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k)) + NSOMOI = getNSOMO(connectedI_alpha(1,1,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + do m = 1,colsikpq + do l = 1,rowsTKI + do kk = 1,n_st + TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * psi_in(kk,idxs_connectedI_alpha(j)+m-1) + enddo + enddo enddo - - allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF - ! Initialize the inegral container - ! dims : (totcolsTKI, nconnectedI) - allocate(GIJpqrs(totcolsTKI,nconnectedI)) ! gpqrs - allocate(TKIGIJ(n_st,rowsTKI,nconnectedI)) ! TKI * gpqrs - - totcolsTKI = 0 - do j = 1,nconnectedI - NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k)) - NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) - p = excitationIds(1,j) - q = excitationIds(2,j) - extype = excitationTypes(j) - call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel) - rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) - colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) - allocate(CCmattmp(colsikpq,n_st)) - !do kk = 1,n_st - !do m = 1,colsikpq - ! CCmattmp(m,kk) = psi_in(idxs_connectedI_alpha(j)+m-1,kk) - !enddo - !enddo - do m = 1,colsikpq - do l = 1,rowsTKI - do kk = 1,n_st - !tmpvar = CCmattmp(m,kk) - !TKI(kk,l,totcolsTKI+m) = AIJpqContainer(NSOMOalpha,extype,pmodel,qmodel,l,m) * tmpvar - !TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * tmpvar - TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * psi_in(kk,idxs_connectedI_alpha(j)+m-1) - enddo - enddo - enddo - deallocate(CCmattmp) - do m = 1,colsikpq - do l = 1,nconnectedI - ! = (ik|jl) - moi = excitationIds(1,j) ! p - mok = excitationIds(2,j) ! q - moj = excitationIds(2,l) ! s - mol = excitationIds(1,l) ! r - if(moi.EQ.mok .AND. moj.EQ.mol)then - diagfac = diagfactors(j) - diagfac *= diagfactors(l) - !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac - GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = - else - diagfac = diagfactors(j)*diagfactors(l) - !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac - GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = - !endif - endif - enddo - enddo - totcolsTKI += colsikpq + do m = 1,colsikpq + do l = 1,nconnectedI + ! = (ik|jl) + moi = excitationIds(1,j) ! p + mok = excitationIds(2,j) ! q + moj = excitationIds(2,l) ! s + mol = excitationIds(1,l) ! r + if(moi.EQ.mok .AND. moj.EQ.mol)then + diagfac = diagfactors(j) + diagfac *= diagfactors(l) + !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac + GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = + else + diagfac = diagfactors(j)*diagfactors(l) + !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac + GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = + !endif + endif + enddo enddo + totcolsTKI += colsikpq + enddo - ! Do big BLAS - ! TODO TKI, size(TKI,1)*size(TKI,2) - call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0, & - TKI, size(TKI,1)*size(TKI,2), GIJpqrs, size(GIJpqrs,1), 0.d0, & - TKIGIJ , size(TKIGIJ,1)*size(TKIGIJ,2) ) - - !print *,"DIMs = ",rowsTKI,n_st,totcolsTKI,nconnectedI - !print *,"TKI mat" - !do kk=1,n_st - ! do j=1,totcolsTKI - ! print *,TKI(:,kk,j) - ! enddo - ! print *,"--" - !enddo - - !print *,"TKIGIJ mat" - !do kk=1,n_st - ! do j=1,nconnectedI - ! print *,TKIGIJ(:,kk,j) - ! enddo - ! print *,"--" - !enddo + ! Do big BLAS + ! TODO TKI, size(TKI,1)*size(TKI,2) + call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0,& + TKI, size(TKI,1)*size(TKI,2), GIJpqrs, size(GIJpqrs,1), 0.d0,& + TKIGIJ , size(TKIGIJ,1)*size(TKIGIJ,2) ) - ! Collect the result - totcolsTKI = 0 - do j = 1,nconnectedI - NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k)) - NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) - p = excitationIds(1,j) - q = excitationIds(2,j) - extype = excitationTypes(j) - call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel) - rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) - colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) - !print *,">j=",j,rowsikpq,colsikpq, ">>",totcolsTKI,",",idxs_connectedI_alpha(j) - do m = 1,colsikpq - !tmpvar = psi_out(kk,idxs_connectedI_alpha(j)+m-1) - do l = 1,rowsTKI - call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1)) - do kk = 1,n_st - !tmpvar += AIJpqContainer(NSOMOalpha,extype,pmodel,qmodel,l,m) * TKIGIJ(kk,l,j) - psi_out(kk,idxs_connectedI_alpha(j)+m-1) += AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * TKIGIJ(kk,l,j) - enddo - call omp_unset_lock(lock(idxs_connectedI_alpha(j)+m-1)) - enddo - enddo - totcolsTKI += colsikpq + ! Collect the result + totcolsTKI = 0 + do j = 1,nconnectedI + NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k)) + NSOMOI = getNSOMO(connectedI_alpha(1,1,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel) + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + do m = 1,colsikpq + do l = 1,rowsTKI + call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1)) + do kk = 1,n_st + psi_out(kk,idxs_connectedI_alpha(j)+m-1) = psi_out(kk,idxs_connectedI_alpha(j)+m-1) + & + AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * TKIGIJ(kk,l,j) + enddo + call omp_unset_lock(lock(idxs_connectedI_alpha(j)+m-1)) + enddo enddo + totcolsTKI += colsikpq + enddo - deallocate(TKI) ! coefficients of CSF - ! Initialize the inegral container - ! dims : (totcolsTKI, nconnectedI) - deallocate(GIJpqrs) ! gpqrs - deallocate(TKIGIJ) ! gpqrs + deallocate(TKI) ! coefficients of CSF + ! Initialize the inegral container + ! dims : (totcolsTKI, nconnectedI) + deallocate(GIJpqrs) ! gpqrs + deallocate(TKIGIJ) ! gpqrs - enddo ! loop over alphas + enddo ! loop over alphas enddo ! loop over I !$OMP end do deallocate(connectedI_alpha) @@ -1304,354 +1276,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze !$OMP end parallel call omp_set_max_active_levels(4) +! do i=1,sze +! call omp_deinit_lock(lock(i)) +! enddo end subroutine calculate_sigma_vector_cfg_nst_naive_store -subroutine calculate_sigma_vector_cfg_nst(psi_out, psi_in, n_st, sze, istart, iend, ishift, istep) - implicit none - use bitmasks - BEGIN_DOC - ! Documentation for sigma-vector calculation - ! - ! Calculates the result of the - ! application of the hamiltonian to the - ! wavefunction in CFG basis once - ! TODO : Things prepare outside this routine - ! 1. Touch the providers for - ! a. ApqIJ containers - ! b. DET to CSF transformation matrices - ! 2. DET to CSF transcormation - ! 2. CSF to DET back transcormation - ! returns : psi_coef_out_det : - END_DOC - integer,intent(in) :: sze, istart,iend, istep, ishift, n_st - real*8,intent(in) :: psi_in(sze,n_st) - real*8,intent(out) :: psi_out(sze,n_st) - integer(bit_kind) :: Icfg(N_INT,2) - integer :: i,j,k,l,p,q,noccp,noccq, ii, jj, m, n, idxI, kk, nocck,orbk - integer(bit_kind),dimension(:,:,:),allocatable :: alphas_Icfg - integer(bit_kind),dimension(:,:,:),allocatable :: singlesI - integer(bit_kind),dimension(:,:,:),allocatable :: connectedI_alpha - integer,dimension(:),allocatable :: idxs_singlesI - integer,dimension(:),allocatable :: idxs_connectedI_alpha - integer,dimension(:,:),allocatable :: excitationIds_single - integer,dimension(:),allocatable :: excitationTypes_single - integer,dimension(:,:),allocatable :: excitationIds - integer,dimension(:),allocatable :: excitationTypes - real*8,dimension(:),allocatable :: diagfactors - integer :: nholes - integer :: nvmos - integer :: listvmos(mo_num) - integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO - integer :: listholes(mo_num) - integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO - integer :: Nalphas_Icfg, nconnectedI, rowsikpq, colsikpq, nsinglesI - integer :: extype,NSOMOalpha,NSOMOI,NSOMOJ,pmodel,qmodel - integer :: getNSOMO - integer :: totcolsTKI - integer :: rowsTKI - integer :: noccpp - integer :: istart_cfg, iend_cfg - integer*8 :: MS, Isomo, Idomo, Jsomo, Jdomo, Ialpha, Ibeta - integer :: moi, moj, mok, mol, starti, endi, startj, endj, cnti, cntj, cntk - real*8 :: norm_coef_cfg, fac2eints - real*8 :: norm_coef_det - real*8 :: meCC1, meCC2, diagfac - real*8,dimension(:,:,:),allocatable :: TKI - real*8,dimension(:,:),allocatable :: GIJpqrs - real*8,dimension(:,:,:),allocatable :: TKIGIJ - real*8, external :: mo_two_e_integral - real*8, external :: get_two_e_integral - real*8 :: diag_energies(n_CSF) - - ! allocate - allocate(alphas_Icfg(N_INT,2,max(sze/2,100))) - allocate(singlesI(N_INT,2,max(sze/2,100))) - allocate(connectedI_alpha(N_INT,2,max(sze/2,100))) - allocate(idxs_singlesI(max(sze/2,100))) - allocate(idxs_connectedI_alpha(max(sze/2,100))) - allocate(excitationIds_single(2,max(sze/2,100))) - allocate(excitationTypes_single(max(sze/2,100))) - allocate(excitationIds(2,max(sze/2,100))) - allocate(excitationTypes(max(sze/2,100))) - allocate(diagfactors(max(sze/2,100))) - - - !print *," sze = ",sze - call calculate_preconditioner_cfg(diag_energies) - - MS = 0 - norm_coef_cfg=0.d0 - - psi_out=0.d0 - - istart_cfg = psi_csf_to_config_data(istart) - iend_cfg = psi_csf_to_config_data(iend) - - - !!! Single Excitations !!! - do i=istart_cfg,iend_cfg - print *,"I=",i - - ! if Seniority_range > 8 then - ! continue - ! else - ! cycle - - Icfg(1,1) = psi_configuration(1,1,i) - Icfg(1,2) = psi_configuration(1,2,i) - Isomo = Icfg(1,1) - Idomo = Icfg(1,2) - NSOMOI = getNSOMO(Icfg) - - ! find out all pq holes possible - nholes = 0 - ! holes in SOMO - ! list_act - ! list_core - ! list_core_inact - ! bitmasks - !do k = n_core_orb+1,n_core_orb + n_act_orb - do k = 1,mo_num - if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = k - holetype(nholes) = 1 - endif - enddo - ! holes in DOMO - !do k = n_core_orb+1,n_core_orb + n_act_orb - do k = 1,mo_num - if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = k - holetype(nholes) = 2 - endif - enddo - - ! find vmos - listvmos = -1 - vmotype = -1 - nvmos = 0 - !do k = n_core_orb+1,n_core_orb + n_act_orb - do k = 1,mo_num - !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) - if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then - nvmos += 1 - listvmos(nvmos) = k - vmotype(nvmos) = 0 - else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then - nvmos += 1 - listvmos(nvmos) = k - vmotype(nvmos) = 1 - end if - enddo - - - ! Icsf ids - starti = psi_config_data(i,1) - endi = psi_config_data(i,2) - NSOMOI = getNSOMO(Icfg) - - call generate_all_singles_cfg_with_type(Icfg,singlesI,idxs_singlesI,excitationIds_single, & - excitationTypes_single,nsinglesI,N_int) - - do j = 1,nsinglesI - idxI = idxs_singlesI(j) - NSOMOJ = getNSOMO(singlesI(:,:,j)) - p = excitationIds_single(1,j) - q = excitationIds_single(2,j) - extype = excitationTypes_single(j) - ! Off diagonal terms - call convertOrbIdsToModelSpaceIds(Icfg, singlesI(:,:,j), p, q, extype, pmodel, qmodel) - Jsomo = singlesI(1,1,j) - Jdomo = singlesI(1,2,j) - - ! Add the hole on J - if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then - nholes += 1 - listholes(nholes) = q - holetype(nholes) = 1 - endif - if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then - nholes += 1 - listholes(nholes) = q - holetype(nholes) = 2 - endif - - startj = psi_config_data(idxI,1) - endj = psi_config_data(idxI,2) - - !!! One-electron contribution !!! - do kk = 1,n_st - cnti = 0 - do ii = starti, endi - cnti += 1 - cntj = 0 - do jj = startj, endj - cntj += 1 - meCC1 = AIJpqContainer(NSOMOI,extype,pmodel,qmodel,cnti,cntj) - psi_out(jj,kk) += meCC1 * psi_in(ii,kk) * h_core_ri(p,q) - enddo - enddo - enddo - - ! Undo setting in listholes - if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then - nholes -= 1 - endif - if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then - nholes -= 1 - endif - enddo - enddo - - !!! Double Excitations !!! - - ! Loop over all selected configurations - do i = istart_cfg,iend_cfg - - ! if Seniority_range > 8 then - ! continue - ! else - ! cycle - - Icfg(1,1) = psi_configuration(1,1,i) - Icfg(1,2) = psi_configuration(1,2,i) - starti = psi_config_data(i,1) - endi = psi_config_data(i,2) - - ! Returns all unique (checking the past) singly excited cfgs connected to I - Nalphas_Icfg = 0 - ! TODO: - ! test if size(alphas_Icfg,1) < Nmo**2) then deallocate + allocate - call obtain_associated_alphaI(i, Icfg, alphas_Icfg, Nalphas_Icfg) - ! TODO : remove doubly excited for return - ! Here we do 2x the loop. One to count for the size of the matrix, then we compute. - do k = 1,Nalphas_Icfg - ! Now generate all singly excited with respect to a given alpha CFG - call obtain_connected_I_foralpha(i,alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors) - - if(nconnectedI .EQ. 0) then - cycle - endif - totcolsTKI = 0 - rowsTKI = -1 - do j = 1,nconnectedI - NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k)) - NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) - p = excitationIds(1,j) - q = excitationIds(2,j) - extype = excitationTypes(j) - call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) - ! for E_pp E_rs and E_ppE_rr case - if(p.EQ.q) then - NSOMOalpha = NSOMOI - endif - rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) - colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) - totcolsTKI += colsikpq - if(rowsTKI .LT. rowsikpq .AND. rowsTKI .NE. -1) then - print *,">",j,"Something is wrong in sigma-vector", rowsTKI, rowsikpq, "(p,q)=",pmodel,qmodel,"ex=",extype,"na=",NSOMOalpha," nI=",NSOMOI - !rowsTKI = rowsikpq - else - rowsTKI = rowsikpq - endif - enddo - - allocate(TKI(rowsTKI,n_st,totcolsTKI)) ! coefficients of CSF - ! Initialize the inegral container - ! dims : (totcolsTKI, nconnectedI) - allocate(GIJpqrs(totcolsTKI,nconnectedI)) ! gpqrs - allocate(TKIGIJ(rowsTKI,n_st,nconnectedI)) ! gpqrs - - totcolsTKI = 0 - do j = 1,nconnectedI - NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k)) - NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) - p = excitationIds(1,j) - q = excitationIds(2,j) - extype = excitationTypes(j) - call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel) - rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) - colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) - do kk = 1,n_st - do l = 1,rowsTKI - do m = 1,colsikpq - TKI(l,kk,totcolsTKI+m) = AIJpqContainer(NSOMOalpha,extype,pmodel,qmodel,l,m) * psi_in(idxs_connectedI_alpha(j)+m-1,kk) - enddo - enddo - enddo - do m = 1,colsikpq - do l = 1,nconnectedI - ! = (ik|jl) - moi = excitationIds(1,j) ! p - mok = excitationIds(2,j) ! q - moj = excitationIds(2,l) ! s - mol = excitationIds(1,l) ! r - if(moi.EQ.mok .AND. moj.EQ.mol)then - diagfac = diagfactors(j) - diagfac *= diagfactors(l) - !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac - GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = - else - diagfac = diagfactors(j)*diagfactors(l) - !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac - GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = - !endif - endif - enddo - enddo - totcolsTKI += colsikpq - enddo - - - - ! Do big BLAS - ! TODO TKI, size(TKI,1)*size(TKI,2) - call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0, & - TKI, size(TKI,1)*n_st, GIJpqrs, size(GIJpqrs,1), 0.d0, & - TKIGIJ , size(TKIGIJ,1)*n_st ) - - - ! Collect the result - totcolsTKI = 0 - do j = 1,nconnectedI - NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k)) - NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) - p = excitationIds(1,j) - q = excitationIds(2,j) - extype = excitationTypes(j) - call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel) - rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) - colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) - !print *,">j=",j,rowsikpq,colsikpq, ">>",totcolsTKI,",",idxs_connectedI_alpha(j) - do kk = 1,n_st - do m = 1,colsikpq - do l = 1,rowsTKI - psi_out(idxs_connectedI_alpha(j)+m-1,kk) += AIJpqContainer(NSOMOalpha,extype,pmodel,qmodel,l,m) * TKIGIJ(l,kk,j) - enddo - enddo - enddo - totcolsTKI += colsikpq - enddo - - deallocate(TKI) ! coefficients of CSF - ! Initialize the inegral container - ! dims : (totcolsTKI, nconnectedI) - deallocate(GIJpqrs) ! gpqrs - deallocate(TKIGIJ) ! gpqrs - - enddo ! loop over alphas - enddo ! loop over I - - - ! Add the diagonal contribution - do kk=1,n_st - do i = 1,n_CSF - psi_out(i,kk) += 1.0d0*diag_energies(i)*psi_in(i,kk) - enddo - enddo - - -end subroutine calculate_sigma_vector_cfg_nst