From f6f111dfffe8687b49819a21551b723b9c164a0c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 4 Jun 2014 21:28:43 +0200 Subject: [PATCH] Accelerated SC2 --- setup_environment.sh | 1 + .../cisd_sc2_selection.irp.f | 7 +- src/Dets/README.rst | 80 +++++++++++--- src/Dets/SC2.irp.f | 101 +++++++++++++----- src/Dets/determinants.irp.f | 21 +++- src/Dets/diagonalize_CI.irp.f | 2 +- src/Dets/diagonalize_CI_SC2.irp.f | 6 +- src/Dets/filter_connected.irp.f | 15 ++- src/Perturbation/pert_sc2.irp.f | 19 ++-- 9 files changed, 192 insertions(+), 60 deletions(-) diff --git a/setup_environment.sh b/setup_environment.sh index 62b579ed..058a90cb 100755 --- a/setup_environment.sh +++ b/setup_environment.sh @@ -15,6 +15,7 @@ then echo "Error in IRPF90 installation" exit 1 fi + rm -rf EZFIO fi cat << EOF > quantum_package.rc diff --git a/src/CISD_SC2_selected/cisd_sc2_selection.irp.f b/src/CISD_SC2_selected/cisd_sc2_selection.irp.f index 2eec29eb..da87f25a 100644 --- a/src/CISD_SC2_selected/cisd_sc2_selection.irp.f +++ b/src/CISD_SC2_selected/cisd_sc2_selection.irp.f @@ -13,10 +13,13 @@ program cisd_sc2_selected pt2 = 1.d0 perturbation = "epstein_nesbet_sc2_projected" E_old(1) = HF_energy - do while (maxval(abs(pt2(1:N_st))) > 1.d-10) + davidson_threshold = 1.d-4 + + do while (maxval(abs(pt2(1:N_st))) > 1.d-6) print*,'----' print*,'' call H_apply_cisd_selection(perturbation,pt2, norm_pert, H_pert_diag, N_st) +! soft_touch det_connections call diagonalize_CI_SC2 print *, 'N_det = ', N_det do i = 1, N_st @@ -34,6 +37,8 @@ program cisd_sc2_selected exit endif enddo + davidson_threshold = 1.d-8 + touch davidson_threshold davidson_criterion do i = 1, N_st max = 0.d0 diff --git a/src/Dets/README.rst b/src/Dets/README.rst index 208042cf..ce68d471 100644 --- a/src/Dets/README.rst +++ b/src/Dets/README.rst @@ -67,14 +67,36 @@ Documentation `resize_h_apply_buffer `_ Undocumented -`connected_to_ref `_ +`cisd_sc2 `_ + CISD+SC2 method :: take off all the disconnected terms of a CISD (selected or not) + .br + dets_in : bitmasks corresponding to determinants + .br + u_in : guess coefficients on the various states. Overwritten + on exit + .br + dim_in : leftmost dimension of u_in + .br + sze : Number of determinants + .br + N_st : Number of eigenstates + .br + Initial guess vectors are not necessarily orthonormal + +`repeat_excitation `_ Undocumented -`det_is_not_or_may_be_in_ref `_ +`connected_to_ref `_ + Undocumented + +`det_is_not_or_may_be_in_ref `_ If true, det is not in ref If false, det may be in ref -`key_pattern_not_in_ref `_ +`is_in_wavefunction `_ + Undocumented + +`key_pattern_not_in_ref `_ Min and max values of the integers of the keys of the reference `davidson_converged `_ @@ -130,37 +152,50 @@ Documentation `davidson_threshold `_ Can be : [ energy | residual | both | wall_time | cpu_time | iterations ] +`det_search_key `_ + Return an integer*8 corresponding to a determinant index for searching + `n_det `_ Number of determinants in the wave function -`n_det_reference `_ - Number of determinants in the reference wave function +`n_det_max_jacobi `_ + Maximum number of determinants diagonalized my jacobi `n_states `_ Number of states to consider -`psi_average_norm_contrib `_ +`psi_average_norm_contrib `_ Contribution of determinants to the state-averaged density -`psi_average_norm_contrib_sorted `_ - Wave function sorted by determinants (state-averaged) +`psi_average_norm_contrib_sorted `_ + Wave function sorted by determinants contribution to the norm (state-averaged) -`psi_coef `_ +`psi_coef `_ The wave function. Initialized with Hartree-Fock if the EZFIO file is empty -`psi_coef_sorted `_ - Wave function sorted by determinants (state-averaged) +`psi_coef_sorted `_ + Wave function sorted by determinants contribution to the norm (state-averaged) -`psi_det `_ +`psi_coef_sorted_bit `_ + Determinants on which we apply for perturbation. + o They are sorted by determinants interpreted as integers. Useful + to accelerate the search of a determinant + +`psi_det `_ The wave function. Initialized with Hartree-Fock if the EZFIO file is empty -`psi_det_size `_ +`psi_det_size `_ Size of the psi_det/psi_coef arrays -`psi_det_sorted `_ - Wave function sorted by determinants (state-averaged) +`psi_det_sorted `_ + Wave function sorted by determinants contribution to the norm (state-averaged) + +`psi_det_sorted_bit `_ + Determinants on which we apply for perturbation. + o They are sorted by determinants interpreted as integers. Useful + to accelerate the search of a determinant `double_exc_bitmask `_ double_exc_bitmask(:,1,i) is the bitmask for holes of excitation 1 @@ -196,6 +231,19 @@ Documentation Replace the coefficients of the CI states by the coefficients of the eigenstates of the CI matrix +`ci_sc2_eigenvectors `_ + Eigenvectors/values of the CI matrix + +`ci_sc2_electronic_energy `_ + Eigenvectors/values of the CI matrix + +`ci_sc2_energy `_ + N_states lowest eigenvalues of the CI matrix + +`diagonalize_ci_sc2 `_ + Replace the coefficients of the CI states by the coefficients of the + eigenstates of the CI matrix + `filter_connected `_ Filters out the determinants that are not connected by H .br @@ -298,7 +346,7 @@ Documentation Returns where i and j are determinants `i_h_psi `_ - for the various Nstate + for the various Nstates `i_h_psi_sc2 `_ for the various Nstate diff --git a/src/Dets/SC2.irp.f b/src/Dets/SC2.irp.f index 443d72d3..aa2f49d1 100644 --- a/src/Dets/SC2.irp.f +++ b/src/Dets/SC2.irp.f @@ -1,4 +1,4 @@ -subroutine CISD_SC2(dets_in,u_in,energies,dim_in,sze,N_st,Nint,iunit) +subroutine CISD_SC2(dets_in,u_in,energies,dim_in,sze,N_st,Nint,convergence) use bitmasks implicit none BEGIN_DOC @@ -17,10 +17,11 @@ subroutine CISD_SC2(dets_in,u_in,energies,dim_in,sze,N_st,Nint,iunit) ! ! Initial guess vectors are not necessarily orthonormal END_DOC - integer, intent(in) :: dim_in, sze, N_st, Nint,iunit + integer, intent(in) :: dim_in, sze, N_st, Nint integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) double precision, intent(inout) :: u_in(dim_in,N_st) double precision, intent(out) :: energies(N_st) + double precision, intent(in) :: convergence PROVIDE ref_bitmask_energy ASSERT (N_st > 0) ASSERT (sze > 0) @@ -32,14 +33,17 @@ subroutine CISD_SC2(dets_in,u_in,energies,dim_in,sze,N_st,Nint,iunit) double precision :: overlap(N_st,N_st) double precision :: u_dot_v, u_dot_u - integer :: degree,N_double,index_hf,index_double(sze) + integer :: degree,N_double,index_hf double precision :: hij_elec, e_corr_double,e_corr,diag_h_mat_elem,inv_c0 - double precision :: e_corr_array(sze),H_jj_ref(sze),H_jj_dressed(sze),hij_double(sze) double precision :: e_corr_double_before,accu,cpu_2,cpu_1 - integer :: degree_exc(sze) + integer,allocatable :: degree_exc(:), index_double(:) integer :: i_ok + double precision,allocatable :: e_corr_array(:),H_jj_ref(:),H_jj_dressed(:),hij_double(:) double precision, allocatable :: eigenvectors(:,:), eigenvalues(:),H_matrix_tmp(:,:) - if(sze.le.1000)then + integer(bit_kind), allocatable :: doubles(:,:,:) + integer ,parameter :: sze_max = 1000 + + if(sze.le.sze_max)then allocate (eigenvectors(size(H_matrix_all_dets,1),sze)) allocate (H_matrix_tmp(size(H_matrix_all_dets,1),sze)) allocate (eigenvalues(sze)) @@ -50,6 +54,8 @@ subroutine CISD_SC2(dets_in,u_in,energies,dim_in,sze,N_st,Nint,iunit) enddo endif + allocate (doubles(Nint,2,sze),e_corr_array(sze),H_jj_ref(sze),H_jj_dressed(sze), & + index_double(sze), degree_exc(sze), hij_double(sze)) call write_time(output_Dets) write(output_Dets,'(A)') '' write(output_Dets,'(A)') 'CISD SC2' @@ -71,18 +77,18 @@ subroutine CISD_SC2(dets_in,u_in,energies,dim_in,sze,N_st,Nint,iunit) e_corr_double = 0.d0 do i = 1, sze call get_excitation_degree(ref_bitmask,dets_in(1,1,i),degree,Nint) - degree_exc(i) = degree + degree_exc(i) = degree+1 if(degree==0)then index_hf=i else if (degree == 2)then N_double += 1 index_double(N_double) = i + doubles(:,:,N_double) = dets_in(:,:,i) call i_H_j(ref_bitmask,dets_in(1,1,i),Nint,hij_elec) hij_double(N_double) = hij_elec e_corr_array(N_double) = u_in(i,1)* hij_elec e_corr_double += e_corr_array(N_double) e_corr += e_corr_array(N_double) - index_double(N_double) = i else if (degree == 1)then call i_H_j(ref_bitmask,dets_in(1,1,i),Nint,hij_elec) e_corr += u_in(i,1)* hij_elec @@ -98,28 +104,70 @@ subroutine CISD_SC2(dets_in,u_in,energies,dim_in,sze,N_st,Nint,iunit) e_corr_double_before = e_corr_double iter = 0 do while (.not.converged) + if (abort_here) then + exit + endif iter +=1 + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,degree,accu) & + !$OMP SHARED(H_jj_dressed,sze,H_jj_ref,index_hf,N_int,N_double,& + !$OMP dets_in,doubles,degree_exc,e_corr_array,e_corr_double) + !$OMP DO SCHEDULE(STATIC) do i=1,sze H_jj_dressed(i) = H_jj_ref(i) if (i==index_hf)cycle - accu = 0.d0 - if(degree_exc(i)==1)then - do j=1,N_double - call get_excitation_degree(dets_in(1,1,i),dets_in(1,1,index_double(j)),degree,N_int) - if (degree<=2)cycle - accu += e_corr_array(j) - enddo - else - do j=1,N_double - call get_excitation_degree(dets_in(1,1,i),dets_in(1,1,index_double(j)),degree,N_int) - if (degree<=3)cycle - accu += e_corr_array(j) - enddo - endif - H_jj_dressed(i) += accu + accu = -e_corr_double + select case (N_int) + case (1) + do j=1,N_double + degree = & + popcnt(xor( dets_in(1,1,i),doubles(1,1,j))) + & + popcnt(xor( dets_in(1,2,i),doubles(1,2,j))) + + if (degree<=ishft(degree_exc(i),1)) then + accu += e_corr_array(j) + endif + enddo + case (2) + do j=1,N_double + degree = & + popcnt(xor( dets_in(1,1,i),doubles(1,1,j))) + & + popcnt(xor( dets_in(1,2,i),doubles(1,2,j))) + & + popcnt(xor( dets_in(2,1,i),doubles(2,1,j))) + & + popcnt(xor( dets_in(2,2,i),doubles(2,2,j))) + + if (degree<=ishft(degree_exc(i),1)) then + accu += e_corr_array(j) + endif + enddo + case (3) + do j=1,N_double + degree = & + popcnt(xor( dets_in(1,1,i),doubles(1,1,j))) + & + popcnt(xor( dets_in(1,2,i),doubles(1,2,j))) + & + popcnt(xor( dets_in(2,1,i),doubles(2,1,j))) + & + popcnt(xor( dets_in(2,2,i),doubles(2,2,j))) + & + popcnt(xor( dets_in(3,1,i),doubles(3,1,j))) + & + popcnt(xor( dets_in(3,2,i),doubles(3,2,j))) + + if (degree<=ishft(degree_exc(i),1)) then + accu += e_corr_array(j) + endif + enddo + case default + do j=1,N_double + call get_excitation_degree(dets_in(1,1,i),doubles(1,1,j),degree,N_int) + if (degree<=degree_exc(i)) then + accu += e_corr_array(j) + endif + enddo + end select + H_jj_dressed(i) -= accu enddo + !$OMP END DO + !$OMP END PARALLEL - if(sze>1000)then + if(sze>sze_max)then call davidson_diag_hjj(dets_in,u_in,H_jj_dressed,energies,dim_in,sze,N_st,Nint,output_Dets) else do i = 1,sze @@ -154,7 +202,8 @@ subroutine CISD_SC2(dets_in,u_in,energies,dim_in,sze,N_st,Nint,iunit) write(output_Dets,'(A)') '' call write_double(output_Dets,(e_corr_double - e_corr_double_before),& 'Delta(E_corr)') - converged = dabs(e_corr_double - e_corr_double_before) < 1.d-10 + converged = dabs(e_corr_double - e_corr_double_before) < convergence + converged = converged .or. abort_here if (converged) then exit endif @@ -163,6 +212,8 @@ subroutine CISD_SC2(dets_in,u_in,energies,dim_in,sze,N_st,Nint,iunit) enddo call write_time(output_Dets) + deallocate (doubles,e_corr_array,H_jj_ref,H_jj_dressed, & + index_double, degree_exc, hij_double) end diff --git a/src/Dets/determinants.irp.f b/src/Dets/determinants.irp.f index 570741cc..bf30e434 100644 --- a/src/Dets/determinants.irp.f +++ b/src/Dets/determinants.irp.f @@ -62,10 +62,9 @@ BEGIN_PROVIDER [ integer, psi_det_size ] END_PROVIDER BEGIN_PROVIDER [ integer(bit_kind), psi_det, (N_int,2,psi_det_size) ] -&BEGIN_PROVIDER [ double precision, psi_coef, (psi_det_size,N_states) ] implicit none BEGIN_DOC - ! The wave function. Initialized with Hartree-Fock if the EZFIO file + ! The wave function determinants. Initialized with Hartree-Fock if the EZFIO file ! is empty END_DOC @@ -74,7 +73,6 @@ END_PROVIDER if (ifirst == 0) then ifirst = 1 psi_det = 0_bit_kind - psi_coef = 0.d0 endif integer :: i @@ -83,6 +81,23 @@ END_PROVIDER psi_det(i,2,1) = HF_bitmask(i,2) enddo +END_PROVIDER + +BEGIN_PROVIDER [ double precision, psi_coef, (psi_det_size,N_states) ] + implicit none + BEGIN_DOC + ! The wave function coefficients. Initialized with Hartree-Fock if the EZFIO file + ! is empty + END_DOC + + integer, save :: ifirst = 0 + integer :: i + + if (ifirst == 0) then + ifirst = 1 + psi_coef = 0.d0 + endif + do i=1,N_states psi_coef(i,i) = 1.d0 enddo diff --git a/src/Dets/diagonalize_CI.irp.f b/src/Dets/diagonalize_CI.irp.f index 8ac7aba7..f9cc3ef0 100644 --- a/src/Dets/diagonalize_CI.irp.f +++ b/src/Dets/diagonalize_CI.irp.f @@ -82,5 +82,5 @@ subroutine diagonalize_CI psi_coef(i,j) = CI_eigenvectors(i,j) enddo enddo - SOFT_TOUCH psi_coef psi_det CI_electronic_energy CI_energy CI_eigenvectors + SOFT_TOUCH psi_coef CI_electronic_energy CI_energy CI_eigenvectors end diff --git a/src/Dets/diagonalize_CI_SC2.irp.f b/src/Dets/diagonalize_CI_SC2.irp.f index 0265af2f..6873f930 100644 --- a/src/Dets/diagonalize_CI_SC2.irp.f +++ b/src/Dets/diagonalize_CI_SC2.irp.f @@ -30,9 +30,9 @@ END_PROVIDER CI_SC2_electronic_energy(j) = CI_electronic_energy(j) enddo - + double precision :: convergence call CISD_SC2(psi_det,CI_SC2_eigenvectors,CI_SC2_electronic_energy, & - size(CI_SC2_eigenvectors,1),N_det,N_states,N_int,output_Dets) + size(CI_SC2_eigenvectors,1),N_det,N_states,N_int,davidson_threshold) END_PROVIDER subroutine diagonalize_CI_SC2 @@ -47,5 +47,5 @@ subroutine diagonalize_CI_SC2 psi_coef(i,j) = CI_SC2_eigenvectors(i,j) enddo enddo - SOFT_TOUCH psi_coef psi_det CI_SC2_electronic_energy CI_SC2_energy CI_SC2_eigenvectors + SOFT_TOUCH psi_coef CI_SC2_electronic_energy CI_SC2_energy CI_SC2_eigenvectors end diff --git a/src/Dets/filter_connected.irp.f b/src/Dets/filter_connected.irp.f index 8b8e6c16..19a1c050 100644 --- a/src/Dets/filter_connected.irp.f +++ b/src/Dets/filter_connected.irp.f @@ -356,15 +356,20 @@ subroutine filter_connected_i_H_psi0_SC2(key1,key2,Nint,sze,idx,idx_repeat) ASSERT (Nint == N_int) ASSERT (sze > 0) - l=1 - l_repeat=1 - call get_excitation_degree(ref_bitmask,key2,degree,Nint) integer :: degree - ASSERT (degree .ne. 0) + degree = popcnt(xor( ref_bitmask(1,1), key2(1,1))) + & + popcnt(xor( ref_bitmask(1,2), key2(1,2))) + !DEC$ NOUNROLL + do l=2,Nint + degree = degree+ popcnt(xor( ref_bitmask(l,1), key2(l,1))) + & + popcnt(xor( ref_bitmask(l,2), key2(l,2))) + enddo + degree = ishft(degree,-1) + l_repeat=1 + l=1 if(degree == 2)then if (Nint==1) then - !DIR$ LOOP COUNT (1000) do i=1,sze diff --git a/src/Perturbation/pert_sc2.irp.f b/src/Perturbation/pert_sc2.irp.f index 15b81377..1703f8f4 100644 --- a/src/Perturbation/pert_sc2.irp.f +++ b/src/Perturbation/pert_sc2.irp.f @@ -32,14 +32,14 @@ subroutine pt2_epstein_nesbet_SC2_projected(det_pert,c_pert,e_2_pert,H_pert_diag ! H_pert_diag = c_pert END_DOC - integer :: i,j,degree + integer :: i,j,degree,l double precision :: diag_H_mat_elem,accu_e_corr,hij,h0j,h,delta_E double precision :: repeat_all_e_corr,accu_e_corr_tmp,e_2_pert_fonda ASSERT (Nint == N_int) ASSERT (Nint > 0) call i_H_psi_SC2(det_pert,psi_selectors,psi_selectors_coef,Nint,N_det_selectors,psi_selectors_size,N_st,i_H_psi_array,idx_repeat) accu_e_corr = 0.d0 - call i_H_j(ref_bitmask,det_pert,Nint,h0j) + !$IVDEP do i = 1, idx_repeat(0) accu_e_corr = accu_e_corr + E_corr_per_selectors(idx_repeat(i)) enddo @@ -50,8 +50,6 @@ subroutine pt2_epstein_nesbet_SC2_projected(det_pert,c_pert,e_2_pert,H_pert_diag c_pert(1) = i_H_psi_array(1) * delta_E e_2_pert(1) = i_H_psi_array(1) * c_pert(1) - H_pert_diag(1) = c_pert(1) * h0j/coef_hf_selector - e_2_pert_fonda = H_pert_diag(1) do i =2,N_st H_pert_diag(i) = h @@ -67,9 +65,18 @@ subroutine pt2_epstein_nesbet_SC2_projected(det_pert,c_pert,e_2_pert,H_pert_diag endif enddo - call get_excitation_degree(ref_bitmask,det_pert,degree,Nint) - if(degree==2)then + degree = popcnt(xor( ref_bitmask(1,1), det_pert(1,1))) + & + popcnt(xor( ref_bitmask(1,2), det_pert(1,2))) + !DEC$ NOUNROLL + do l=2,Nint + degree = degree+ popcnt(xor( ref_bitmask(l,1), det_pert(l,1))) + & + popcnt(xor( ref_bitmask(l,2), det_pert(l,2))) + enddo + if(degree==4)then ! + call i_H_j(ref_bitmask,det_pert,Nint,h0j) + H_pert_diag(1) = c_pert(1) * h0j/coef_hf_selector + e_2_pert_fonda = H_pert_diag(1) do i = 1, N_st do j = 1, idx_repeat(0) e_2_pert(i) += e_2_pert_fonda * psi_selectors_coef(idx_repeat(j),i) * psi_selectors_coef(idx_repeat(j),i)