10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-12-23 04:43:50 +01:00

Merge branch 'master' of github.com:scemama/quantum_package

This commit is contained in:
Anthony Scemama 2017-06-03 00:09:39 +02:00
commit d27d69fbb8
12 changed files with 321 additions and 12 deletions

63
config/ifort_mpi.cfg Normal file
View File

@ -0,0 +1,63 @@
# Common flags
##############
#
# -mkl=[parallel|sequential] : Use the MKL library
# --ninja : Allow the utilisation of ninja. It is mandatory !
# --align=32 : Align all provided arrays on a 32-byte boundary
#
[COMMON]
FC : mpif90
LAPACK_LIB : -mkl=parallel
IRPF90 : irpf90
IRPF90_FLAGS : --ninja --align=32 -DMPI
# Global options
################
#
# 1 : Activate
# 0 : Deactivate
#
[OPTION]
MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
CACHE : 1 ; Enable cache_compile.py
OPENMP : 1 ; Append OpenMP flags
# Optimization flags
####################
#
# -xHost : Compile a binary optimized for the current architecture
# -O2 : O3 not better than O2.
# -ip : Inter-procedural optimizations
# -ftz : Flushes denormal results to zero
#
[OPT]
FCFLAGS : -xAVX -O2 -ip -ftz -g -traceback
# Profiling flags
#################
#
[PROFILE]
FC : -p -g
FCFLAGS : -xSSE4.2 -O2 -ip -ftz
# Debugging flags
#################
#
# -traceback : Activate backtrace on runtime
# -fpe0 : All floating point exaceptions
# -C : Checks uninitialized variables, array subscripts, etc...
# -g : Extra debugging information
# -xSSE2 : Valgrind needs a very simple x86 executable
#
[DEBUG]
FC : -g -traceback
FCFLAGS : -xSSE2 -C -fpe0
IRPF90_FLAGS : --openmp
# OpenMP flags
#################
#
[OPENMP]
FC : -openmp
IRPF90_FLAGS : --openmp

View File

@ -19,13 +19,14 @@ end
subroutine run_wf subroutine run_wf
use f77_zmq use f77_zmq
implicit none implicit none
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket
double precision :: energy(N_states) double precision :: energy(N_states)
character*(64) :: states(4) character*(64) :: states(4)
integer :: rc, i integer :: rc, i, ierr
call provide_everything call provide_everything

View File

@ -0,0 +1 @@
Full_CI_ZMQ MPI

View File

@ -0,0 +1,14 @@
===============
Full_CI_ZMQ_MPI
===============
MPI Slave for Full_CI with ZMQ
Needed Modules
==============
.. Do not edit this section It was auto-generated
.. by the `update_README.py` script.
Documentation
=============
.. Do not edit this section It was auto-generated
.. by the `update_README.py` script.

View File

@ -0,0 +1,101 @@
program selection_slave
implicit none
BEGIN_DOC
! Helper program to compute the PT2 in distributed mode.
END_DOC
read_wf = .False.
distributed_davidson = .False.
SOFT_TOUCH read_wf distributed_davidson
call provide_everything
call switch_qp_run_to_master
call run_wf
end
subroutine provide_everything
PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context
PROVIDE pt2_e0_denominator mo_tot_num N_int fragment_count MPI_Initialized
end
subroutine run_wf
use f77_zmq
implicit none
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
double precision :: energy(N_states)
character*(64) :: states(4)
integer :: rc, i, ierr
call provide_everything
zmq_context = f77_zmq_ctx_new ()
states(1) = 'selection'
states(2) = 'davidson'
states(3) = 'pt2'
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
do
call wait_for_states(states,zmq_state,3)
if(trim(zmq_state) == 'Stopped') then
exit
else if (trim(zmq_state) == 'selection') then
! Selection
! ---------
print *, 'Selection'
if (is_mpi_master) then
call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states)
endif
IRP_IF MIP
call MPI_BCAST(n,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
IRP_ENDIF
!$OMP PARALLEL PRIVATE(i)
i = omp_get_thread_num()
call run_selection_slave(0,i,energy)
!$OMP END PARALLEL
print *, 'Selection done'
else if (trim(zmq_state) == 'davidson') then
! Davidson
! --------
print *, 'Davidson'
call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states)
call omp_set_nested(.True.)
call davidson_slave_tcp(0)
call omp_set_nested(.False.)
print *, 'Davidson done'
else if (trim(zmq_state) == 'pt2') then
! PT2
! ---
print *, 'PT2'
call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states)
logical :: lstop
lstop = .False.
!$OMP PARALLEL PRIVATE(i)
i = omp_get_thread_num()
call run_pt2_slave(0,i,energy,lstop)
!$OMP END PARALLEL
print *, 'PT2 done'
endif
end do
end

View File

@ -21,15 +21,22 @@ subroutine run
selection_criterion_factor = 0.d0 selection_criterion_factor = 0.d0
TOUCH selection_criterion_min selection_criterion selection_criterion_factor TOUCH selection_criterion_min selection_criterion selection_criterion_factor
call H_apply_mp2_selection(pt2, norm_pert, H_pert_diag, N_st) call H_apply_mp2_selection(pt2, norm_pert, H_pert_diag, N_st)
touch N_det psi_det psi_coef
psi_det = psi_det_sorted psi_det = psi_det_sorted
psi_coef = psi_coef_sorted psi_coef = psi_coef_sorted
touch N_det psi_det psi_coef touch N_det psi_det psi_coef
do i=N_det,1,-1
if (dabs(psi_coef(i,1)) <= 1.d-8) then
N_det -= 1
endif
enddo
print*,'N_det = ',N_det print*,'N_det = ',N_det
print*,'-----' print*,'-----'
print *, 'PT2 = ', pt2(1) print *, 'PT2 = ', pt2(1)
print *, 'E = ', HF_energy print *, 'E = ', HF_energy
print *, 'E_before +PT2 = ', HF_energy+pt2(1) print *, 'E_before +PT2 = ', HF_energy+pt2(1)
N_det = min(N_det,N_det_max) N_det = min(N_det,N_det_max)
touch N_det psi_det psi_coef
call save_wavefunction call save_wavefunction
call ezfio_set_mp2_energy(HF_energy+pt2(1)) call ezfio_set_mp2_energy(HF_energy+pt2(1))
deallocate(pt2,norm_pert,H_pert_diag) deallocate(pt2,norm_pert,H_pert_diag)

View File

@ -0,0 +1 @@

14
plugins/MPI/README.rst Normal file
View File

@ -0,0 +1,14 @@
===
MPI
===
Providers for MPI programs.
Needed Modules
==============
.. Do not edit this section It was auto-generated
.. by the `update_README.py` script.
Documentation
=============
.. Do not edit this section It was auto-generated
.. by the `update_README.py` script.

46
plugins/MPI/bcast.irp.f Normal file
View File

@ -0,0 +1,46 @@
subroutine mpi_bcast_psi()
use f77_zmq
implicit none
BEGIN_DOC
! Put the wave function on the qp_run scheduler
END_DOC
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
integer, intent(in) :: worker_id
integer :: ierr
character*(256) :: msg
IRP_IF MPI
call MPI_BCast(N_states, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
call MPI_BCast(N_det, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
call MPI_BCast(psi_det_size, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
TOUCH psi_det_size N_det N_states
call MPI_BCast(psi_det, N_det, MPI_INTEGER8, 0, MPI_COMM_WORLD, ierr)
call MPI_BCast(psi_coef, psi_det_size, MPI_DOUBLE_PRECISION* N_states, 0, MPI_COMM_WORLD, ierr)
rc8 = f77_zmq_recv8(zmq_to_qp_run_socket,psi_coef,int(psi_det_size*N_states*8_8,8),0)
if (rc8 /= psi_det_size*N_states*8_8) then
print *, '77_zmq_recv8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE)'
stop 'error'
endif
TOUCH psi_det psi_coef
rc = f77_zmq_recv(zmq_to_qp_run_socket,energy,size_energy*8,0)
if (rc /= size_energy*8) then
print *, '77_zmq_recv(zmq_to_qp_run_socket,energy,size_energy*8,0)'
stop 'error'
endif
if (N_det_generators_read > 0) then
N_det_generators = N_det_generators_read
TOUCH N_det_generators
endif
if (N_det_selectors_read > 0) then
N_det_selectors = N_det_selectors_read
TOUCH N_det_selectors
endif
end

68
plugins/MPI/utils.irp.f Normal file
View File

@ -0,0 +1,68 @@
BEGIN_PROVIDER [ logical, MPI_Initialized ]
&BEGIN_PROVIDER [ logical, has_mpi ]
implicit none
BEGIN_DOC
! This is true when MPI_Init has been called
END_DOC
IRP_IF MPI
integer :: ierr
call MPI_Init(ierr)
if (ierr /= 0) then
print *, ierr
print *, 'MPI failed to initialize'
stop -1
endif
IRP_ENDIF
MPI_Initialized = .True.
END_PROVIDER
BEGIN_PROVIDER [ integer, MPI_rank ]
&BEGIN_PROVIDER [ integer, MPI_size ]
&BEGIN_PROVIDER [ logical, is_MPI_master ]
implicit none
BEGIN_DOC
! Usual MPI variables
END_DOC
PROVIDE MPI_Initialized
IRP_IF MPI
integer :: ierr
call mpi_comm_size(MPI_COMM_WORLD, MPI_size, ierr)
if (ierr /= 0) then
print *, ierr
print *, 'Unable to get MPI_size'
stop -1
endif
call mpi_comm_rank(MPI_COMM_WORLD, MPI_rank, ierr)
if (ierr /= 0) then
print *, ierr
print *, 'Unable to get MPI_rank'
stop -1
endif
is_MPI_master = (MPI_rank == 0)
IRP_ELSE
MPI_rank = 0
MPI_size = 1
is_MPI_master = .True.
IRP_ENDIF
END_PROVIDER
subroutine qp_mpi_finalize()
implicit none
PROVIDE MPI_Initialized
IRP_IF MPI
integer :: ierr
call MPI_Finalize(ierr)
if (ierr /= 0) then
print *, ierr
print *, 'Unable to finalize MPI'
stop -1
endif
IRP_ENDIF
end subroutine

View File

@ -226,18 +226,15 @@ subroutine pt2_moller_plesset ($arguments)
call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2)
delta_e = (Fock_matrix_diag_mo(h1) - Fock_matrix_diag_mo(p1)) + & delta_e = (Fock_matrix_diag_mo(h1) - Fock_matrix_diag_mo(p1)) + &
(Fock_matrix_diag_mo(h2) - Fock_matrix_diag_mo(p2)) (Fock_matrix_diag_mo(h2) - Fock_matrix_diag_mo(p2))
delta_e = 1.d0/delta_e
! print*,'h1,p1',h1,p1
! print*,'h2,p2',h2,p2
else if (degree == 1) then else if (degree == 1) then
call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2)
delta_e = Fock_matrix_diag_mo(h1) - Fock_matrix_diag_mo(p1) delta_e = Fock_matrix_diag_mo(h1) - Fock_matrix_diag_mo(p1)
delta_e = 1.d0/delta_e
else else
delta_e = 0.d0 delta_e = 0.d0
endif endif
if (delta_e /= 0.d0) then if (dabs(delta_e) > 1.d-10) then
delta_e = 1.d0/delta_e
call i_H_psi_minilist(det_pert,minilist,idx_minilist,N_minilist,psi_selectors_coef,Nint,N_minilist,psi_selectors_size,N_st,i_H_psi_array) call i_H_psi_minilist(det_pert,minilist,idx_minilist,N_minilist,psi_selectors_coef,Nint,N_minilist,psi_selectors_size,N_st,i_H_psi_array)
h = diag_H_mat_elem_fock(det_ref,det_pert,fock_diag_tmp,Nint) h = diag_H_mat_elem_fock(det_ref,det_pert,fock_diag_tmp,Nint)
else else
@ -246,11 +243,6 @@ subroutine pt2_moller_plesset ($arguments)
endif endif
do i =1,N_st do i =1,N_st
H_pert_diag(i) = h H_pert_diag(i) = h
! if(dabs(i_H_psi_array(i)).gt.1.d-8)then
! print*, i_H_psi_array(i)
! call debug_det(det_pert,N_int)
! print*, h1,p1,h2,p2,s1,s2
! endif
c_pert(i) = i_H_psi_array(i) *delta_e c_pert(i) = i_H_psi_array(i) *delta_e
e_2_pert(i) = c_pert(i) * i_H_psi_array(i) e_2_pert(i) = c_pert(i) * i_H_psi_array(i)
enddo enddo

View File

@ -76,7 +76,8 @@ subroutine resize_H_apply_buffer(new_size,iproc)
allocate ( buffer_det(N_int,2,new_size), & allocate ( buffer_det(N_int,2,new_size), &
buffer_coef(new_size,N_states), & buffer_coef(new_size,N_states), &
buffer_e2(new_size,N_states) ) buffer_e2(new_size,N_states) )
buffer_coef = 0.d0
buffer_e2 = 0.d0
do i=1,min(new_size,H_apply_buffer(iproc)%N_det) do i=1,min(new_size,H_apply_buffer(iproc)%N_det)
do k=1,N_int do k=1,N_int
buffer_det(k,1,i) = H_apply_buffer(iproc)%det(k,1,i) buffer_det(k,1,i) = H_apply_buffer(iproc)%det(k,1,i)