mirror of
https://github.com/LCPQ/quantum_package
synced 2025-01-03 01:56:05 +01:00
Added PT2 to FCI ZMQ
This commit is contained in:
parent
c30ceed47d
commit
8b530a6db6
62
config/sse4_avx2.cfg
Normal file
62
config/sse4_avx2.cfg
Normal file
@ -0,0 +1,62 @@
|
||||
# Common flags
|
||||
##############
|
||||
#
|
||||
# -mkl=[parallel|sequential] : Use the MKL library
|
||||
# --ninja : Allow the utilisation of ninja. It is mandatory !
|
||||
# --align=32 : Align all provided arrays on a 32-byte boundary
|
||||
#
|
||||
[COMMON]
|
||||
FC : ifort
|
||||
LAPACK_LIB : -mkl=parallel
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32
|
||||
|
||||
# Global options
|
||||
################
|
||||
#
|
||||
# 1 : Activate
|
||||
# 0 : Deactivate
|
||||
#
|
||||
[OPTION]
|
||||
MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
|
||||
CACHE : 1 ; Enable cache_compile.py
|
||||
OPENMP : 1 ; Append OpenMP flags
|
||||
|
||||
# Optimization flags
|
||||
####################
|
||||
#
|
||||
# -xHost : Compile a binary optimized for the current architecture
|
||||
# -O2 : O3 not better than O2.
|
||||
# -ip : Inter-procedural optimizations
|
||||
# -ftz : Flushes denormal results to zero
|
||||
#
|
||||
[OPT]
|
||||
FCFLAGS : -axSSE4.2,AVX,CORE-AVX2 -O2 -ip -ftz -g -traceback
|
||||
|
||||
# Profiling flags
|
||||
#################
|
||||
#
|
||||
[PROFILE]
|
||||
FC : -p -g
|
||||
FCFLAGS : -xSSE4.2 -O2 -ip -ftz
|
||||
|
||||
# Debugging flags
|
||||
#################
|
||||
#
|
||||
# -traceback : Activate backtrace on runtime
|
||||
# -fpe0 : All floating point exaceptions
|
||||
# -C : Checks uninitialized variables, array subscripts, etc...
|
||||
# -g : Extra debugging information
|
||||
# -xSSE2 : Valgrind needs a very simple x86 executable
|
||||
#
|
||||
[DEBUG]
|
||||
FC : -g -traceback
|
||||
FCFLAGS : -xSSE2 -C -fpe0
|
||||
|
||||
# OpenMP flags
|
||||
#################
|
||||
#
|
||||
[OPENMP]
|
||||
FC : -openmp
|
||||
IRPF90_FLAGS : --openmp
|
||||
|
@ -38,7 +38,6 @@ program fci_zmq
|
||||
|
||||
do while (N_det < N_det_max.and.maxval(abs(pt2(1:N_st))) > pt2_max)
|
||||
n_det_before = N_det
|
||||
! call H_apply_FCI(pt2, norm_pert, H_pert_diag, N_st)
|
||||
call ZMQ_selection(max(1024-N_det, N_det), pt2)
|
||||
|
||||
PROVIDE psi_coef
|
||||
@ -90,21 +89,21 @@ program fci_zmq
|
||||
N_det = min(N_det_max,N_det)
|
||||
touch N_det psi_det psi_coef
|
||||
call diagonalize_CI
|
||||
! if(do_pt2_end)then
|
||||
! print*,'Last iteration only to compute the PT2'
|
||||
! threshold_selectors = 1.d0
|
||||
! threshold_generators = 0.999d0
|
||||
! call H_apply_FCI_PT2(pt2, norm_pert, H_pert_diag, N_st)
|
||||
!
|
||||
! print *, 'Final step'
|
||||
! print *, 'N_det = ', N_det
|
||||
! print *, 'N_states = ', N_states
|
||||
! print *, 'PT2 = ', pt2
|
||||
! print *, 'E = ', CI_energy
|
||||
! print *, 'E+PT2 = ', CI_energy+pt2
|
||||
! print *, '-----'
|
||||
! call ezfio_set_full_ci_energy_pt2(CI_energy+pt2)
|
||||
! endif
|
||||
if(do_pt2_end)then
|
||||
print*,'Last iteration only to compute the PT2'
|
||||
threshold_selectors = 1.d0
|
||||
threshold_generators = 0.9999d0
|
||||
E_CI_before = CI_energy
|
||||
call ZMQ_selection(1, pt2)
|
||||
print *, 'Final step'
|
||||
print *, 'N_det = ', N_det
|
||||
print *, 'N_states = ', N_states
|
||||
print *, 'PT2 = ', pt2
|
||||
print *, 'E = ', E_CI_before
|
||||
print *, 'E+PT2 = ', E_CI_before+pt2
|
||||
print *, '-----'
|
||||
call ezfio_set_full_ci_energy_pt2(E_CI_before+pt2)
|
||||
endif
|
||||
call save_wavefunction
|
||||
end
|
||||
|
||||
|
@ -530,7 +530,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun
|
||||
!$OMP END PARALLEL
|
||||
|
||||
write(iunit,'(X,I3,X,100(X,F16.10,X,E16.6))') iter, to_print(:,1:N_st)
|
||||
call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged)
|
||||
call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_states,converged)
|
||||
if (converged) then
|
||||
exit
|
||||
endif
|
||||
|
@ -224,7 +224,6 @@ subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nst
|
||||
double precision :: s2_tmp,accu
|
||||
integer :: i,j,l,jj,ll,kk
|
||||
integer, allocatable :: idx(:)
|
||||
double precision, allocatable :: tmp(:,:)
|
||||
BEGIN_DOC
|
||||
! returns the matrix elements of S^2 "s2(i,j)" between the "nstates" states
|
||||
! psi_coefs_tmp(:,i) and psi_coefs_tmp(:,j)
|
||||
@ -234,12 +233,12 @@ subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nst
|
||||
do jj = 1, nstates
|
||||
accu = 0.d0
|
||||
!$OMP PARALLEL DEFAULT(NONE) &
|
||||
!$OMP PRIVATE (i,j,kk,idx,tmp,s2_tmp) &
|
||||
!$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates) &
|
||||
!$OMP PRIVATE (i,j,kk,idx,s2_tmp) &
|
||||
!$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates)&
|
||||
!$OMP REDUCTION(+:accu)
|
||||
allocate(idx(0:n))
|
||||
!$OMP DO SCHEDULE(dynamic)
|
||||
do i = 1, n
|
||||
do i = n,1,-1 ! Better OMP scheduling
|
||||
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int)
|
||||
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj)
|
||||
call filter_connected(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx)
|
||||
@ -249,9 +248,8 @@ subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nst
|
||||
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(j,jj) + psi_coefs_tmp(i,jj) * s2_tmp * psi_coefs_tmp(j,ll)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
!$OMP END DO
|
||||
deallocate(idx)
|
||||
!$OMP BARRIER
|
||||
!$OMP END PARALLEL
|
||||
s2(ll,jj) += accu
|
||||
enddo
|
||||
|
Loading…
Reference in New Issue
Block a user