Added PT2 to FCI ZMQ

This commit is contained in:
Anthony Scemama 2016-08-04 15:15:14 +02:00
parent c30ceed47d
commit 8b530a6db6
4 changed files with 121 additions and 62 deletions

62
config/sse4_avx2.cfg Normal file
View File

@ -0,0 +1,62 @@
# Common flags
##############
#
# -mkl=[parallel|sequential] : Use the MKL library
# --ninja : Allow the utilisation of ninja. It is mandatory !
# --align=32 : Align all provided arrays on a 32-byte boundary
#
[COMMON]
FC : ifort
LAPACK_LIB : -mkl=parallel
IRPF90 : irpf90
IRPF90_FLAGS : --ninja --align=32
# Global options
################
#
# 1 : Activate
# 0 : Deactivate
#
[OPTION]
MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
CACHE : 1 ; Enable cache_compile.py
OPENMP : 1 ; Append OpenMP flags
# Optimization flags
####################
#
# -xHost : Compile a binary optimized for the current architecture
# -O2 : O3 not better than O2.
# -ip : Inter-procedural optimizations
# -ftz : Flushes denormal results to zero
#
[OPT]
FCFLAGS : -axSSE4.2,AVX,CORE-AVX2 -O2 -ip -ftz -g -traceback
# Profiling flags
#################
#
[PROFILE]
FC : -p -g
FCFLAGS : -xSSE4.2 -O2 -ip -ftz
# Debugging flags
#################
#
# -traceback : Activate backtrace on runtime
# -fpe0 : All floating point exaceptions
# -C : Checks uninitialized variables, array subscripts, etc...
# -g : Extra debugging information
# -xSSE2 : Valgrind needs a very simple x86 executable
#
[DEBUG]
FC : -g -traceback
FCFLAGS : -xSSE2 -C -fpe0
# OpenMP flags
#################
#
[OPENMP]
FC : -openmp
IRPF90_FLAGS : --openmp

View File

@ -38,7 +38,6 @@ program fci_zmq
do while (N_det < N_det_max.and.maxval(abs(pt2(1:N_st))) > pt2_max)
n_det_before = N_det
! call H_apply_FCI(pt2, norm_pert, H_pert_diag, N_st)
call ZMQ_selection(max(1024-N_det, N_det), pt2)
PROVIDE psi_coef
@ -90,21 +89,21 @@ program fci_zmq
N_det = min(N_det_max,N_det)
touch N_det psi_det psi_coef
call diagonalize_CI
! if(do_pt2_end)then
! print*,'Last iteration only to compute the PT2'
! threshold_selectors = 1.d0
! threshold_generators = 0.999d0
! call H_apply_FCI_PT2(pt2, norm_pert, H_pert_diag, N_st)
!
! print *, 'Final step'
! print *, 'N_det = ', N_det
! print *, 'N_states = ', N_states
! print *, 'PT2 = ', pt2
! print *, 'E = ', CI_energy
! print *, 'E+PT2 = ', CI_energy+pt2
! print *, '-----'
! call ezfio_set_full_ci_energy_pt2(CI_energy+pt2)
! endif
if(do_pt2_end)then
print*,'Last iteration only to compute the PT2'
threshold_selectors = 1.d0
threshold_generators = 0.9999d0
E_CI_before = CI_energy
call ZMQ_selection(1, pt2)
print *, 'Final step'
print *, 'N_det = ', N_det
print *, 'N_states = ', N_states
print *, 'PT2 = ', pt2
print *, 'E = ', E_CI_before
print *, 'E+PT2 = ', E_CI_before+pt2
print *, '-----'
call ezfio_set_full_ci_energy_pt2(E_CI_before+pt2)
endif
call save_wavefunction
end

View File

@ -530,7 +530,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun
!$OMP END PARALLEL
write(iunit,'(X,I3,X,100(X,F16.10,X,E16.6))') iter, to_print(:,1:N_st)
call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged)
call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_states,converged)
if (converged) then
exit
endif

View File

@ -215,54 +215,52 @@ subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2)
end
subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nstates)
implicit none
use bitmasks
integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax_keys)
integer, intent(in) :: n,nmax_coefs,nmax_keys,nstates
double precision, intent(in) :: psi_coefs_tmp(nmax_coefs,nstates)
double precision, intent(out) :: s2(nstates,nstates)
double precision :: s2_tmp,accu
integer :: i,j,l,jj,ll,kk
integer, allocatable :: idx(:)
double precision, allocatable :: tmp(:,:)
BEGIN_DOC
! returns the matrix elements of S^2 "s2(i,j)" between the "nstates" states
! psi_coefs_tmp(:,i) and psi_coefs_tmp(:,j)
END_DOC
s2 = 0.d0
do ll = 1, nstates
do jj = 1, nstates
accu = 0.d0
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE (i,j,kk,idx,tmp,s2_tmp) &
!$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates) &
!$OMP REDUCTION(+:accu)
allocate(idx(0:n))
!$OMP DO SCHEDULE(dynamic)
do i = 1, n
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int)
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj)
call filter_connected(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx)
do kk=1,idx(0)
j = idx(kk)
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),s2_tmp,N_int)
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(j,jj) + psi_coefs_tmp(i,jj) * s2_tmp * psi_coefs_tmp(j,ll)
implicit none
use bitmasks
integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax_keys)
integer, intent(in) :: n,nmax_coefs,nmax_keys,nstates
double precision, intent(in) :: psi_coefs_tmp(nmax_coefs,nstates)
double precision, intent(out) :: s2(nstates,nstates)
double precision :: s2_tmp,accu
integer :: i,j,l,jj,ll,kk
integer, allocatable :: idx(:)
BEGIN_DOC
! returns the matrix elements of S^2 "s2(i,j)" between the "nstates" states
! psi_coefs_tmp(:,i) and psi_coefs_tmp(:,j)
END_DOC
s2 = 0.d0
do ll = 1, nstates
do jj = 1, nstates
accu = 0.d0
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE (i,j,kk,idx,s2_tmp) &
!$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates)&
!$OMP REDUCTION(+:accu)
allocate(idx(0:n))
!$OMP DO SCHEDULE(dynamic)
do i = n,1,-1 ! Better OMP scheduling
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int)
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj)
call filter_connected(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx)
do kk=1,idx(0)
j = idx(kk)
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),s2_tmp,N_int)
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(j,jj) + psi_coefs_tmp(i,jj) * s2_tmp * psi_coefs_tmp(j,ll)
enddo
enddo
!$OMP END DO
deallocate(idx)
!$OMP END PARALLEL
s2(ll,jj) += accu
enddo
enddo
!$OMP END DO NOWAIT
deallocate(idx)
!$OMP BARRIER
!$OMP END PARALLEL
s2(ll,jj) += accu
enddo
enddo
do i = 1, nstates
do j =i+1,nstates
accu = 0.5d0 * (s2(i,j) + s2(j,i))
s2(i,j) = accu
s2(j,i) = accu
do i = 1, nstates
do j =i+1,nstates
accu = 0.5d0 * (s2(i,j) + s2(j,i))
s2(i,j) = accu
s2(j,i) = accu
enddo
enddo
enddo
end
subroutine diagonalize_s2_betweenstates(keys_tmp,psi_coefs_inout,n,nmax_keys,nmax_coefs,nstates,s2_eigvalues)