diff --git a/config/ifort_mpi.cfg b/config/ifort_mpi.cfg new file mode 100644 index 00000000..c0dafc81 --- /dev/null +++ b/config/ifort_mpi.cfg @@ -0,0 +1,63 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : mpif90 +LAPACK_LIB : -mkl=parallel +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 -DMPI + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 1 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FCFLAGS : -xAVX -O2 -ip -ftz -g -traceback + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -xSSE4.2 -O2 -ip -ftz + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -xSSE2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -xSSE2 -C -fpe0 +IRPF90_FLAGS : --openmp + +# OpenMP flags +################# +# +[OPENMP] +FC : -openmp +IRPF90_FLAGS : --openmp + diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f index b41662f4..f86a7fcd 100644 --- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f @@ -19,13 +19,14 @@ end subroutine run_wf use f77_zmq + implicit none integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket double precision :: energy(N_states) character*(64) :: states(4) - integer :: rc, i + integer :: rc, i, ierr call provide_everything diff --git a/plugins/Full_CI_ZMQ_MPI/NEEDED_CHILDREN_MODULES b/plugins/Full_CI_ZMQ_MPI/NEEDED_CHILDREN_MODULES new file mode 100644 index 00000000..03d04444 --- /dev/null +++ b/plugins/Full_CI_ZMQ_MPI/NEEDED_CHILDREN_MODULES @@ -0,0 +1 @@ +Full_CI_ZMQ MPI diff --git a/plugins/Full_CI_ZMQ_MPI/README.rst b/plugins/Full_CI_ZMQ_MPI/README.rst new file mode 100644 index 00000000..bf9dfab4 --- /dev/null +++ b/plugins/Full_CI_ZMQ_MPI/README.rst @@ -0,0 +1,14 @@ +=============== +Full_CI_ZMQ_MPI +=============== + +MPI Slave for Full_CI with ZMQ + +Needed Modules +============== +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. +Documentation +============= +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. diff --git a/plugins/Full_CI_ZMQ_MPI/selection_davidson_slave_mpi.irp.f b/plugins/Full_CI_ZMQ_MPI/selection_davidson_slave_mpi.irp.f new file mode 100644 index 00000000..6e4bf775 --- /dev/null +++ b/plugins/Full_CI_ZMQ_MPI/selection_davidson_slave_mpi.irp.f @@ -0,0 +1,101 @@ +program selection_slave + implicit none + BEGIN_DOC +! Helper program to compute the PT2 in distributed mode. + END_DOC + + read_wf = .False. + distributed_davidson = .False. + SOFT_TOUCH read_wf distributed_davidson + call provide_everything + call switch_qp_run_to_master + call run_wf +end + +subroutine provide_everything + PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context + PROVIDE pt2_e0_denominator mo_tot_num N_int fragment_count MPI_Initialized +end + +subroutine run_wf + use f77_zmq + + implicit none + + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + double precision :: energy(N_states) + character*(64) :: states(4) + integer :: rc, i, ierr + + call provide_everything + + zmq_context = f77_zmq_ctx_new () + states(1) = 'selection' + states(2) = 'davidson' + states(3) = 'pt2' + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + do + + call wait_for_states(states,zmq_state,3) + + if(trim(zmq_state) == 'Stopped') then + + exit + + else if (trim(zmq_state) == 'selection') then + + ! Selection + ! --------- + + print *, 'Selection' + if (is_mpi_master) then + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + endif + IRP_IF MIP + call MPI_BCAST(n,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr) + IRP_ENDIF + + !$OMP PARALLEL PRIVATE(i) + i = omp_get_thread_num() + call run_selection_slave(0,i,energy) + !$OMP END PARALLEL + print *, 'Selection done' + + else if (trim(zmq_state) == 'davidson') then + + ! Davidson + ! -------- + + print *, 'Davidson' + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + call omp_set_nested(.True.) + call davidson_slave_tcp(0) + call omp_set_nested(.False.) + print *, 'Davidson done' + + else if (trim(zmq_state) == 'pt2') then + + ! PT2 + ! --- + + print *, 'PT2' + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + + logical :: lstop + lstop = .False. + !$OMP PARALLEL PRIVATE(i) + i = omp_get_thread_num() + call run_pt2_slave(0,i,energy,lstop) + !$OMP END PARALLEL + print *, 'PT2 done' + + endif + + end do +end + + + diff --git a/plugins/MP2/mp2_wf.irp.f b/plugins/MP2/mp2_wf.irp.f index e7419319..1dda8d69 100644 --- a/plugins/MP2/mp2_wf.irp.f +++ b/plugins/MP2/mp2_wf.irp.f @@ -21,15 +21,22 @@ subroutine run selection_criterion_factor = 0.d0 TOUCH selection_criterion_min selection_criterion selection_criterion_factor call H_apply_mp2_selection(pt2, norm_pert, H_pert_diag, N_st) + touch N_det psi_det psi_coef psi_det = psi_det_sorted psi_coef = psi_coef_sorted touch N_det psi_det psi_coef + do i=N_det,1,-1 + if (dabs(psi_coef(i,1)) <= 1.d-8) then + N_det -= 1 + endif + enddo print*,'N_det = ',N_det print*,'-----' print *, 'PT2 = ', pt2(1) print *, 'E = ', HF_energy print *, 'E_before +PT2 = ', HF_energy+pt2(1) N_det = min(N_det,N_det_max) + touch N_det psi_det psi_coef call save_wavefunction call ezfio_set_mp2_energy(HF_energy+pt2(1)) deallocate(pt2,norm_pert,H_pert_diag) diff --git a/plugins/MPI/NEEDED_CHILDREN_MODULES b/plugins/MPI/NEEDED_CHILDREN_MODULES new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/plugins/MPI/NEEDED_CHILDREN_MODULES @@ -0,0 +1 @@ + diff --git a/plugins/MPI/README.rst b/plugins/MPI/README.rst new file mode 100644 index 00000000..7962296b --- /dev/null +++ b/plugins/MPI/README.rst @@ -0,0 +1,14 @@ +=== +MPI +=== + +Providers for MPI programs. + +Needed Modules +============== +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. +Documentation +============= +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. diff --git a/plugins/MPI/bcast.irp.f b/plugins/MPI/bcast.irp.f new file mode 100644 index 00000000..3236f9f5 --- /dev/null +++ b/plugins/MPI/bcast.irp.f @@ -0,0 +1,46 @@ +subroutine mpi_bcast_psi() + use f77_zmq + implicit none + BEGIN_DOC +! Put the wave function on the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + integer :: ierr + character*(256) :: msg + + IRP_IF MPI + call MPI_BCast(N_states, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCast(N_det, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCast(psi_det_size, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + TOUCH psi_det_size N_det N_states + + call MPI_BCast(psi_det, N_det, MPI_INTEGER8, 0, MPI_COMM_WORLD, ierr) + call MPI_BCast(psi_coef, psi_det_size, MPI_DOUBLE_PRECISION* N_states, 0, MPI_COMM_WORLD, ierr) + + + rc8 = f77_zmq_recv8(zmq_to_qp_run_socket,psi_coef,int(psi_det_size*N_states*8_8,8),0) + if (rc8 /= psi_det_size*N_states*8_8) then + print *, '77_zmq_recv8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE)' + stop 'error' + endif + TOUCH psi_det psi_coef + + rc = f77_zmq_recv(zmq_to_qp_run_socket,energy,size_energy*8,0) + if (rc /= size_energy*8) then + print *, '77_zmq_recv(zmq_to_qp_run_socket,energy,size_energy*8,0)' + stop 'error' + endif + + if (N_det_generators_read > 0) then + N_det_generators = N_det_generators_read + TOUCH N_det_generators + endif + if (N_det_selectors_read > 0) then + N_det_selectors = N_det_selectors_read + TOUCH N_det_selectors + endif + +end + diff --git a/plugins/MPI/utils.irp.f b/plugins/MPI/utils.irp.f new file mode 100644 index 00000000..aa4e0742 --- /dev/null +++ b/plugins/MPI/utils.irp.f @@ -0,0 +1,68 @@ + BEGIN_PROVIDER [ logical, MPI_Initialized ] +&BEGIN_PROVIDER [ logical, has_mpi ] + implicit none + BEGIN_DOC +! This is true when MPI_Init has been called + END_DOC + + IRP_IF MPI + integer :: ierr + call MPI_Init(ierr) + if (ierr /= 0) then + print *, ierr + print *, 'MPI failed to initialize' + stop -1 + endif + IRP_ENDIF + MPI_Initialized = .True. +END_PROVIDER + + + BEGIN_PROVIDER [ integer, MPI_rank ] +&BEGIN_PROVIDER [ integer, MPI_size ] +&BEGIN_PROVIDER [ logical, is_MPI_master ] + implicit none + BEGIN_DOC +! Usual MPI variables + END_DOC + + PROVIDE MPI_Initialized + + IRP_IF MPI + integer :: ierr + call mpi_comm_size(MPI_COMM_WORLD, MPI_size, ierr) + if (ierr /= 0) then + print *, ierr + print *, 'Unable to get MPI_size' + stop -1 + endif + call mpi_comm_rank(MPI_COMM_WORLD, MPI_rank, ierr) + if (ierr /= 0) then + print *, ierr + print *, 'Unable to get MPI_rank' + stop -1 + endif + is_MPI_master = (MPI_rank == 0) + IRP_ELSE + MPI_rank = 0 + MPI_size = 1 + is_MPI_master = .True. + IRP_ENDIF + + +END_PROVIDER + +subroutine qp_mpi_finalize() + implicit none + PROVIDE MPI_Initialized + IRP_IF MPI + integer :: ierr + call MPI_Finalize(ierr) + if (ierr /= 0) then + print *, ierr + print *, 'Unable to finalize MPI' + stop -1 + endif + IRP_ENDIF +end subroutine + diff --git a/plugins/Perturbation/pt2_equations.irp.f b/plugins/Perturbation/pt2_equations.irp.f index b29e130f..1cd0d440 100644 --- a/plugins/Perturbation/pt2_equations.irp.f +++ b/plugins/Perturbation/pt2_equations.irp.f @@ -226,18 +226,15 @@ subroutine pt2_moller_plesset ($arguments) call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) delta_e = (Fock_matrix_diag_mo(h1) - Fock_matrix_diag_mo(p1)) + & (Fock_matrix_diag_mo(h2) - Fock_matrix_diag_mo(p2)) - delta_e = 1.d0/delta_e -! print*,'h1,p1',h1,p1 -! print*,'h2,p2',h2,p2 else if (degree == 1) then call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) delta_e = Fock_matrix_diag_mo(h1) - Fock_matrix_diag_mo(p1) - delta_e = 1.d0/delta_e else delta_e = 0.d0 endif - if (delta_e /= 0.d0) then + if (dabs(delta_e) > 1.d-10) then + delta_e = 1.d0/delta_e call i_H_psi_minilist(det_pert,minilist,idx_minilist,N_minilist,psi_selectors_coef,Nint,N_minilist,psi_selectors_size,N_st,i_H_psi_array) h = diag_H_mat_elem_fock(det_ref,det_pert,fock_diag_tmp,Nint) else @@ -246,11 +243,6 @@ subroutine pt2_moller_plesset ($arguments) endif do i =1,N_st H_pert_diag(i) = h -! if(dabs(i_H_psi_array(i)).gt.1.d-8)then -! print*, i_H_psi_array(i) -! call debug_det(det_pert,N_int) -! print*, h1,p1,h2,p2,s1,s2 -! endif c_pert(i) = i_H_psi_array(i) *delta_e e_2_pert(i) = c_pert(i) * i_H_psi_array(i) enddo diff --git a/src/Determinants/H_apply.irp.f b/src/Determinants/H_apply.irp.f index 442d0d84..26f981dc 100644 --- a/src/Determinants/H_apply.irp.f +++ b/src/Determinants/H_apply.irp.f @@ -76,7 +76,8 @@ subroutine resize_H_apply_buffer(new_size,iproc) allocate ( buffer_det(N_int,2,new_size), & buffer_coef(new_size,N_states), & buffer_e2(new_size,N_states) ) - + buffer_coef = 0.d0 + buffer_e2 = 0.d0 do i=1,min(new_size,H_apply_buffer(iproc)%N_det) do k=1,N_int buffer_det(k,1,i) = H_apply_buffer(iproc)%det(k,1,i)