Added PT2 to FCI ZMQ

This commit is contained in:
Anthony Scemama 2016-08-04 15:15:14 +02:00
parent c30ceed47d
commit 8b530a6db6
4 changed files with 121 additions and 62 deletions

62
config/sse4_avx2.cfg Normal file
View File

@ -0,0 +1,62 @@
# Common flags
##############
#
# -mkl=[parallel|sequential] : Use the MKL library
# --ninja : Allow the utilisation of ninja. It is mandatory !
# --align=32 : Align all provided arrays on a 32-byte boundary
#
[COMMON]
FC : ifort
LAPACK_LIB : -mkl=parallel
IRPF90 : irpf90
IRPF90_FLAGS : --ninja --align=32
# Global options
################
#
# 1 : Activate
# 0 : Deactivate
#
[OPTION]
MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
CACHE : 1 ; Enable cache_compile.py
OPENMP : 1 ; Append OpenMP flags
# Optimization flags
####################
#
# -xHost : Compile a binary optimized for the current architecture
# -O2 : O3 not better than O2.
# -ip : Inter-procedural optimizations
# -ftz : Flushes denormal results to zero
#
[OPT]
FCFLAGS : -axSSE4.2,AVX,CORE-AVX2 -O2 -ip -ftz -g -traceback
# Profiling flags
#################
#
[PROFILE]
FC : -p -g
FCFLAGS : -xSSE4.2 -O2 -ip -ftz
# Debugging flags
#################
#
# -traceback : Activate backtrace on runtime
# -fpe0 : All floating point exaceptions
# -C : Checks uninitialized variables, array subscripts, etc...
# -g : Extra debugging information
# -xSSE2 : Valgrind needs a very simple x86 executable
#
[DEBUG]
FC : -g -traceback
FCFLAGS : -xSSE2 -C -fpe0
# OpenMP flags
#################
#
[OPENMP]
FC : -openmp
IRPF90_FLAGS : --openmp

View File

@ -38,7 +38,6 @@ program fci_zmq
do while (N_det < N_det_max.and.maxval(abs(pt2(1:N_st))) > pt2_max) do while (N_det < N_det_max.and.maxval(abs(pt2(1:N_st))) > pt2_max)
n_det_before = N_det n_det_before = N_det
! call H_apply_FCI(pt2, norm_pert, H_pert_diag, N_st)
call ZMQ_selection(max(1024-N_det, N_det), pt2) call ZMQ_selection(max(1024-N_det, N_det), pt2)
PROVIDE psi_coef PROVIDE psi_coef
@ -90,21 +89,21 @@ program fci_zmq
N_det = min(N_det_max,N_det) N_det = min(N_det_max,N_det)
touch N_det psi_det psi_coef touch N_det psi_det psi_coef
call diagonalize_CI call diagonalize_CI
! if(do_pt2_end)then if(do_pt2_end)then
! print*,'Last iteration only to compute the PT2' print*,'Last iteration only to compute the PT2'
! threshold_selectors = 1.d0 threshold_selectors = 1.d0
! threshold_generators = 0.999d0 threshold_generators = 0.9999d0
! call H_apply_FCI_PT2(pt2, norm_pert, H_pert_diag, N_st) E_CI_before = CI_energy
! call ZMQ_selection(1, pt2)
! print *, 'Final step' print *, 'Final step'
! print *, 'N_det = ', N_det print *, 'N_det = ', N_det
! print *, 'N_states = ', N_states print *, 'N_states = ', N_states
! print *, 'PT2 = ', pt2 print *, 'PT2 = ', pt2
! print *, 'E = ', CI_energy print *, 'E = ', E_CI_before
! print *, 'E+PT2 = ', CI_energy+pt2 print *, 'E+PT2 = ', E_CI_before+pt2
! print *, '-----' print *, '-----'
! call ezfio_set_full_ci_energy_pt2(CI_energy+pt2) call ezfio_set_full_ci_energy_pt2(E_CI_before+pt2)
! endif endif
call save_wavefunction call save_wavefunction
end end

View File

@ -530,7 +530,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun
!$OMP END PARALLEL !$OMP END PARALLEL
write(iunit,'(X,I3,X,100(X,F16.10,X,E16.6))') iter, to_print(:,1:N_st) write(iunit,'(X,I3,X,100(X,F16.10,X,E16.6))') iter, to_print(:,1:N_st)
call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged) call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_states,converged)
if (converged) then if (converged) then
exit exit
endif endif

View File

@ -215,54 +215,52 @@ subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2)
end end
subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nstates) subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nstates)
implicit none implicit none
use bitmasks use bitmasks
integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax_keys) integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax_keys)
integer, intent(in) :: n,nmax_coefs,nmax_keys,nstates integer, intent(in) :: n,nmax_coefs,nmax_keys,nstates
double precision, intent(in) :: psi_coefs_tmp(nmax_coefs,nstates) double precision, intent(in) :: psi_coefs_tmp(nmax_coefs,nstates)
double precision, intent(out) :: s2(nstates,nstates) double precision, intent(out) :: s2(nstates,nstates)
double precision :: s2_tmp,accu double precision :: s2_tmp,accu
integer :: i,j,l,jj,ll,kk integer :: i,j,l,jj,ll,kk
integer, allocatable :: idx(:) integer, allocatable :: idx(:)
double precision, allocatable :: tmp(:,:) BEGIN_DOC
BEGIN_DOC ! returns the matrix elements of S^2 "s2(i,j)" between the "nstates" states
! returns the matrix elements of S^2 "s2(i,j)" between the "nstates" states ! psi_coefs_tmp(:,i) and psi_coefs_tmp(:,j)
! psi_coefs_tmp(:,i) and psi_coefs_tmp(:,j) END_DOC
END_DOC s2 = 0.d0
s2 = 0.d0 do ll = 1, nstates
do ll = 1, nstates do jj = 1, nstates
do jj = 1, nstates accu = 0.d0
accu = 0.d0 !$OMP PARALLEL DEFAULT(NONE) &
!$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE (i,j,kk,idx,s2_tmp) &
!$OMP PRIVATE (i,j,kk,idx,tmp,s2_tmp) & !$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates)&
!$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates) & !$OMP REDUCTION(+:accu)
!$OMP REDUCTION(+:accu) allocate(idx(0:n))
allocate(idx(0:n)) !$OMP DO SCHEDULE(dynamic)
!$OMP DO SCHEDULE(dynamic) do i = n,1,-1 ! Better OMP scheduling
do i = 1, n call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int)
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int) accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj)
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj) call filter_connected(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx)
call filter_connected(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx) do kk=1,idx(0)
do kk=1,idx(0) j = idx(kk)
j = idx(kk) call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),s2_tmp,N_int)
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),s2_tmp,N_int) accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(j,jj) + psi_coefs_tmp(i,jj) * s2_tmp * psi_coefs_tmp(j,ll)
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(j,jj) + psi_coefs_tmp(i,jj) * s2_tmp * psi_coefs_tmp(j,ll) enddo
enddo
!$OMP END DO
deallocate(idx)
!$OMP END PARALLEL
s2(ll,jj) += accu
enddo enddo
enddo
!$OMP END DO NOWAIT
deallocate(idx)
!$OMP BARRIER
!$OMP END PARALLEL
s2(ll,jj) += accu
enddo enddo
enddo do i = 1, nstates
do i = 1, nstates do j =i+1,nstates
do j =i+1,nstates accu = 0.5d0 * (s2(i,j) + s2(j,i))
accu = 0.5d0 * (s2(i,j) + s2(j,i)) s2(i,j) = accu
s2(i,j) = accu s2(j,i) = accu
s2(j,i) = accu enddo
enddo enddo
enddo
end end
subroutine diagonalize_s2_betweenstates(keys_tmp,psi_coefs_inout,n,nmax_keys,nmax_coefs,nstates,s2_eigvalues) subroutine diagonalize_s2_betweenstates(keys_tmp,psi_coefs_inout,n,nmax_keys,nmax_coefs,nstates,s2_eigvalues)