Added PT2 to FCI ZMQ
This commit is contained in:
parent
c30ceed47d
commit
8b530a6db6
|
@ -0,0 +1,62 @@
|
||||||
|
# Common flags
|
||||||
|
##############
|
||||||
|
#
|
||||||
|
# -mkl=[parallel|sequential] : Use the MKL library
|
||||||
|
# --ninja : Allow the utilisation of ninja. It is mandatory !
|
||||||
|
# --align=32 : Align all provided arrays on a 32-byte boundary
|
||||||
|
#
|
||||||
|
[COMMON]
|
||||||
|
FC : ifort
|
||||||
|
LAPACK_LIB : -mkl=parallel
|
||||||
|
IRPF90 : irpf90
|
||||||
|
IRPF90_FLAGS : --ninja --align=32
|
||||||
|
|
||||||
|
# Global options
|
||||||
|
################
|
||||||
|
#
|
||||||
|
# 1 : Activate
|
||||||
|
# 0 : Deactivate
|
||||||
|
#
|
||||||
|
[OPTION]
|
||||||
|
MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
|
||||||
|
CACHE : 1 ; Enable cache_compile.py
|
||||||
|
OPENMP : 1 ; Append OpenMP flags
|
||||||
|
|
||||||
|
# Optimization flags
|
||||||
|
####################
|
||||||
|
#
|
||||||
|
# -xHost : Compile a binary optimized for the current architecture
|
||||||
|
# -O2 : O3 not better than O2.
|
||||||
|
# -ip : Inter-procedural optimizations
|
||||||
|
# -ftz : Flushes denormal results to zero
|
||||||
|
#
|
||||||
|
[OPT]
|
||||||
|
FCFLAGS : -axSSE4.2,AVX,CORE-AVX2 -O2 -ip -ftz -g -traceback
|
||||||
|
|
||||||
|
# Profiling flags
|
||||||
|
#################
|
||||||
|
#
|
||||||
|
[PROFILE]
|
||||||
|
FC : -p -g
|
||||||
|
FCFLAGS : -xSSE4.2 -O2 -ip -ftz
|
||||||
|
|
||||||
|
# Debugging flags
|
||||||
|
#################
|
||||||
|
#
|
||||||
|
# -traceback : Activate backtrace on runtime
|
||||||
|
# -fpe0 : All floating point exaceptions
|
||||||
|
# -C : Checks uninitialized variables, array subscripts, etc...
|
||||||
|
# -g : Extra debugging information
|
||||||
|
# -xSSE2 : Valgrind needs a very simple x86 executable
|
||||||
|
#
|
||||||
|
[DEBUG]
|
||||||
|
FC : -g -traceback
|
||||||
|
FCFLAGS : -xSSE2 -C -fpe0
|
||||||
|
|
||||||
|
# OpenMP flags
|
||||||
|
#################
|
||||||
|
#
|
||||||
|
[OPENMP]
|
||||||
|
FC : -openmp
|
||||||
|
IRPF90_FLAGS : --openmp
|
||||||
|
|
|
@ -38,7 +38,6 @@ program fci_zmq
|
||||||
|
|
||||||
do while (N_det < N_det_max.and.maxval(abs(pt2(1:N_st))) > pt2_max)
|
do while (N_det < N_det_max.and.maxval(abs(pt2(1:N_st))) > pt2_max)
|
||||||
n_det_before = N_det
|
n_det_before = N_det
|
||||||
! call H_apply_FCI(pt2, norm_pert, H_pert_diag, N_st)
|
|
||||||
call ZMQ_selection(max(1024-N_det, N_det), pt2)
|
call ZMQ_selection(max(1024-N_det, N_det), pt2)
|
||||||
|
|
||||||
PROVIDE psi_coef
|
PROVIDE psi_coef
|
||||||
|
@ -90,21 +89,21 @@ program fci_zmq
|
||||||
N_det = min(N_det_max,N_det)
|
N_det = min(N_det_max,N_det)
|
||||||
touch N_det psi_det psi_coef
|
touch N_det psi_det psi_coef
|
||||||
call diagonalize_CI
|
call diagonalize_CI
|
||||||
! if(do_pt2_end)then
|
if(do_pt2_end)then
|
||||||
! print*,'Last iteration only to compute the PT2'
|
print*,'Last iteration only to compute the PT2'
|
||||||
! threshold_selectors = 1.d0
|
threshold_selectors = 1.d0
|
||||||
! threshold_generators = 0.999d0
|
threshold_generators = 0.9999d0
|
||||||
! call H_apply_FCI_PT2(pt2, norm_pert, H_pert_diag, N_st)
|
E_CI_before = CI_energy
|
||||||
!
|
call ZMQ_selection(1, pt2)
|
||||||
! print *, 'Final step'
|
print *, 'Final step'
|
||||||
! print *, 'N_det = ', N_det
|
print *, 'N_det = ', N_det
|
||||||
! print *, 'N_states = ', N_states
|
print *, 'N_states = ', N_states
|
||||||
! print *, 'PT2 = ', pt2
|
print *, 'PT2 = ', pt2
|
||||||
! print *, 'E = ', CI_energy
|
print *, 'E = ', E_CI_before
|
||||||
! print *, 'E+PT2 = ', CI_energy+pt2
|
print *, 'E+PT2 = ', E_CI_before+pt2
|
||||||
! print *, '-----'
|
print *, '-----'
|
||||||
! call ezfio_set_full_ci_energy_pt2(CI_energy+pt2)
|
call ezfio_set_full_ci_energy_pt2(E_CI_before+pt2)
|
||||||
! endif
|
endif
|
||||||
call save_wavefunction
|
call save_wavefunction
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -530,7 +530,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
write(iunit,'(X,I3,X,100(X,F16.10,X,E16.6))') iter, to_print(:,1:N_st)
|
write(iunit,'(X,I3,X,100(X,F16.10,X,E16.6))') iter, to_print(:,1:N_st)
|
||||||
call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged)
|
call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_states,converged)
|
||||||
if (converged) then
|
if (converged) then
|
||||||
exit
|
exit
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -215,54 +215,52 @@ subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2)
|
||||||
end
|
end
|
||||||
|
|
||||||
subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nstates)
|
subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nstates)
|
||||||
implicit none
|
implicit none
|
||||||
use bitmasks
|
use bitmasks
|
||||||
integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax_keys)
|
integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax_keys)
|
||||||
integer, intent(in) :: n,nmax_coefs,nmax_keys,nstates
|
integer, intent(in) :: n,nmax_coefs,nmax_keys,nstates
|
||||||
double precision, intent(in) :: psi_coefs_tmp(nmax_coefs,nstates)
|
double precision, intent(in) :: psi_coefs_tmp(nmax_coefs,nstates)
|
||||||
double precision, intent(out) :: s2(nstates,nstates)
|
double precision, intent(out) :: s2(nstates,nstates)
|
||||||
double precision :: s2_tmp,accu
|
double precision :: s2_tmp,accu
|
||||||
integer :: i,j,l,jj,ll,kk
|
integer :: i,j,l,jj,ll,kk
|
||||||
integer, allocatable :: idx(:)
|
integer, allocatable :: idx(:)
|
||||||
double precision, allocatable :: tmp(:,:)
|
BEGIN_DOC
|
||||||
BEGIN_DOC
|
! returns the matrix elements of S^2 "s2(i,j)" between the "nstates" states
|
||||||
! returns the matrix elements of S^2 "s2(i,j)" between the "nstates" states
|
! psi_coefs_tmp(:,i) and psi_coefs_tmp(:,j)
|
||||||
! psi_coefs_tmp(:,i) and psi_coefs_tmp(:,j)
|
END_DOC
|
||||||
END_DOC
|
s2 = 0.d0
|
||||||
s2 = 0.d0
|
do ll = 1, nstates
|
||||||
do ll = 1, nstates
|
do jj = 1, nstates
|
||||||
do jj = 1, nstates
|
accu = 0.d0
|
||||||
accu = 0.d0
|
!$OMP PARALLEL DEFAULT(NONE) &
|
||||||
!$OMP PARALLEL DEFAULT(NONE) &
|
!$OMP PRIVATE (i,j,kk,idx,s2_tmp) &
|
||||||
!$OMP PRIVATE (i,j,kk,idx,tmp,s2_tmp) &
|
!$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates)&
|
||||||
!$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates) &
|
!$OMP REDUCTION(+:accu)
|
||||||
!$OMP REDUCTION(+:accu)
|
allocate(idx(0:n))
|
||||||
allocate(idx(0:n))
|
!$OMP DO SCHEDULE(dynamic)
|
||||||
!$OMP DO SCHEDULE(dynamic)
|
do i = n,1,-1 ! Better OMP scheduling
|
||||||
do i = 1, n
|
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int)
|
||||||
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int)
|
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj)
|
||||||
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj)
|
call filter_connected(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx)
|
||||||
call filter_connected(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx)
|
do kk=1,idx(0)
|
||||||
do kk=1,idx(0)
|
j = idx(kk)
|
||||||
j = idx(kk)
|
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),s2_tmp,N_int)
|
||||||
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),s2_tmp,N_int)
|
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(j,jj) + psi_coefs_tmp(i,jj) * s2_tmp * psi_coefs_tmp(j,ll)
|
||||||
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(j,jj) + psi_coefs_tmp(i,jj) * s2_tmp * psi_coefs_tmp(j,ll)
|
enddo
|
||||||
|
enddo
|
||||||
|
!$OMP END DO
|
||||||
|
deallocate(idx)
|
||||||
|
!$OMP END PARALLEL
|
||||||
|
s2(ll,jj) += accu
|
||||||
enddo
|
enddo
|
||||||
enddo
|
|
||||||
!$OMP END DO NOWAIT
|
|
||||||
deallocate(idx)
|
|
||||||
!$OMP BARRIER
|
|
||||||
!$OMP END PARALLEL
|
|
||||||
s2(ll,jj) += accu
|
|
||||||
enddo
|
enddo
|
||||||
enddo
|
do i = 1, nstates
|
||||||
do i = 1, nstates
|
do j =i+1,nstates
|
||||||
do j =i+1,nstates
|
accu = 0.5d0 * (s2(i,j) + s2(j,i))
|
||||||
accu = 0.5d0 * (s2(i,j) + s2(j,i))
|
s2(i,j) = accu
|
||||||
s2(i,j) = accu
|
s2(j,i) = accu
|
||||||
s2(j,i) = accu
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
|
||||||
end
|
end
|
||||||
|
|
||||||
subroutine diagonalize_s2_betweenstates(keys_tmp,psi_coefs_inout,n,nmax_keys,nmax_coefs,nstates,s2_eigvalues)
|
subroutine diagonalize_s2_betweenstates(keys_tmp,psi_coefs_inout,n,nmax_keys,nmax_coefs,nstates,s2_eigvalues)
|
||||||
|
|
Loading…
Reference in New Issue