From 334f165ff17af34f398623282e85f774c9efa602 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 22 Nov 2017 17:07:16 +0100 Subject: [PATCH 1/2] First MPI tests seem OK --- config/gfortran_mpi.cfg | 62 +++++++++++++++++++++++++++++ config/ifort_mpi.cfg | 2 +- configure | 11 ++--- ocaml/qp_run.ml | 18 ++++++--- plugins/MPI/.gitignore | 5 +++ plugins/MPI/NEEDED_CHILDREN_MODULES | 1 + plugins/MPI/README.rst | 12 ++++++ plugins/MPI/mpi.irp.f | 60 ++++++++++++++++++++++++++++ plugins/MPI/mpi_test.irp.f | 12 ++++++ src/Utils/LinearAlgebra.irp.f | 2 +- 10 files changed, 171 insertions(+), 14 deletions(-) create mode 100644 config/gfortran_mpi.cfg create mode 100644 plugins/MPI/.gitignore create mode 100644 plugins/MPI/NEEDED_CHILDREN_MODULES create mode 100644 plugins/MPI/README.rst create mode 100644 plugins/MPI/mpi.irp.f create mode 100644 plugins/MPI/mpi_test.irp.f diff --git a/config/gfortran_mpi.cfg b/config/gfortran_mpi.cfg new file mode 100644 index 00000000..f40123b1 --- /dev/null +++ b/config/gfortran_mpi.cfg @@ -0,0 +1,62 @@ +# Common flags +############## +# +# -ffree-line-length-none : Needed for IRPF90 which produces long lines +# -lblas -llapack : Link with libblas and liblapack libraries provided by the system +# -I . : Include the curent directory (Mandatory) +# +# --ninja : Allow the utilisation of ninja. (Mandatory) +# --align=32 : Align all provided arrays on a 32-byte boundary +# +# +[COMMON] +FC : mpif90 -ffree-line-length-none -I . +LAPACK_LIB : -lblas -llapack +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 -DMPI + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 1 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -Ofast : Disregard strict standards compliance. Enables all -O3 optimizations. +# It also enables optimizations that are not valid +# for all standard-compliant programs. It turns on +# -ffast-math and the Fortran-specific +# -fno-protect-parens and -fstack-arrays. +[OPT] +FCFLAGS : -Ofast -msse4.2 + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -Ofast -msse4.2 + +# Debugging flags +################# +# +# -fcheck=all : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# +[DEBUG] +FCFLAGS : -fcheck=all -g + +# OpenMP flags +################# +# +[OPENMP] +FC : -fopenmp +IRPF90_FLAGS : --openmp + diff --git a/config/ifort_mpi.cfg b/config/ifort_mpi.cfg index c0dafc81..73c3b712 100644 --- a/config/ifort_mpi.cfg +++ b/config/ifort_mpi.cfg @@ -16,7 +16,7 @@ IRPF90_FLAGS : --ninja --align=32 -DMPI # # 1 : Activate # 0 : Deactivate -# +# [OPTION] MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below CACHE : 1 ; Enable cache_compile.py diff --git a/configure b/configure index cd76e907..230d3069 100755 --- a/configure +++ b/configure @@ -66,7 +66,6 @@ d_dependency = { "python": [], "ninja": ["g++", "python"], "make": [], - "gpi2": ["g++", "make"], "p_graphviz": ["python"], "bats": [] } @@ -142,11 +141,6 @@ f77zmq = Info( description=' F77-ZeroMQ', default_path=join(QP_ROOT_LIB, "libf77zmq.a") ) -gpi2 = Info( - url='https://github.com/cc-hpc-itwm/GPI-2/archive/v1.3.0.tar.gz', - description=' GPI-2', - default_path=join(QP_ROOT_LIB64, "libGPI2.a") ) - p_graphviz = Info( url='https://github.com/xflr6/graphviz/archive/master.tar.gz', description=' Python library for graphviz', @@ -161,7 +155,7 @@ d_info = dict() for m in ["ocaml", "m4", "curl", "zlib", "patch", "irpf90", "docopt", "resultsFile", "ninja", "emsl", "ezfio", "p_graphviz", - "zeromq", "f77zmq", "bats", "gpi2"]: + "zeromq", "f77zmq", "bats"]: exec ("d_info['{0}']={0}".format(m)) @@ -497,6 +491,9 @@ def create_ninja_and_rc(l_installed): '# Choose the correct network interface', '# export QP_NIC=ib0', '# export QP_NIC=eth0', + '', + '# Choose how to start MPI processes', + '# export QP_MPIRUN="mpirun"', '' ] diff --git a/ocaml/qp_run.ml b/ocaml/qp_run.ml index 5be26539..4761c3f4 100644 --- a/ocaml/qp_run.ml +++ b/ocaml/qp_run.ml @@ -4,6 +4,7 @@ open Qputils (* Environment variables : QP_PREFIX=gdb : to run gdb (or valgrind, or whatever) + QP_MPIRUN=mpirun: to run mpi slaves QP_TASK_DEBUG=1 : debug task server *) @@ -15,7 +16,7 @@ let print_list () = let () = Random.self_init () -let run slave exe ezfio_file = +let run slave mpi exe ezfio_file = (** Check availability of the ports *) @@ -115,13 +116,18 @@ let run slave exe ezfio_file = match Sys.getenv "QP_PREFIX" with | Some x -> x^" " | None -> "" + and mpirun = + match (mpi, Sys.getenv "QP_MPIRUN") with + | (true, None) -> "mpirun " + | (true, Some x) -> x^" " + | _ -> "" and exe = match (List.find ~f:(fun (x,_) -> x = exe) executables) with | Some (_,x) -> x^" " | None -> assert false in let exit_code = - match (Sys.command (prefix^exe^ezfio_file)) with + match (Sys.command (mpirun^prefix^exe^ezfio_file)) with | 0 -> 0 | i -> (Printf.printf "Program exited with code %d.\n%!" i; i) in @@ -141,7 +147,9 @@ let spec = let open Command.Spec in empty +> flag "slave" no_arg - ~doc:(" Needed for slave tasks") + ~doc:(" Required for slave tasks") + +> flag "mpi" no_arg + ~doc:(" Required for MPI slaves") +> anon ("executable" %: string) +> anon ("ezfio_file" %: string) ;; @@ -159,8 +167,8 @@ Executes a Quantum Package binary file among these:\n\n" ) ) spec - (fun slave exe ezfio_file () -> - run slave exe ezfio_file + (fun slave mpi exe ezfio_file () -> + run slave mpi exe ezfio_file ) |> Command.run ~version: Git.sha1 ~build_info: Git.message diff --git a/plugins/MPI/.gitignore b/plugins/MPI/.gitignore new file mode 100644 index 00000000..7ac9fbf6 --- /dev/null +++ b/plugins/MPI/.gitignore @@ -0,0 +1,5 @@ +IRPF90_temp/ +IRPF90_man/ +irpf90.make +irpf90_entities +tags \ No newline at end of file diff --git a/plugins/MPI/NEEDED_CHILDREN_MODULES b/plugins/MPI/NEEDED_CHILDREN_MODULES new file mode 100644 index 00000000..19028952 --- /dev/null +++ b/plugins/MPI/NEEDED_CHILDREN_MODULES @@ -0,0 +1 @@ +Utils diff --git a/plugins/MPI/README.rst b/plugins/MPI/README.rst new file mode 100644 index 00000000..8826a8ee --- /dev/null +++ b/plugins/MPI/README.rst @@ -0,0 +1,12 @@ +=== +MPI +=== + +Needed Modules +============== +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. +Documentation +============= +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. diff --git a/plugins/MPI/mpi.irp.f b/plugins/MPI/mpi.irp.f new file mode 100644 index 00000000..45b1ab2a --- /dev/null +++ b/plugins/MPI/mpi.irp.f @@ -0,0 +1,60 @@ +BEGIN_PROVIDER [ logical, mpi_initialized ] + implicit none + BEGIN_DOC + ! Always true. Initialized MPI + END_DOC + IRP_IF MPI + include 'mpif.h' + integer :: ierr + call mpi_init(ierr) + if (ierr /= 0) then + print *, 'ierr = ', ierr + stop 'Unable to initialize MPI' + endif + IRP_ENDIF + mpi_initialized = .True. +END_PROVIDER + + + BEGIN_PROVIDER [ integer, mpi_rank ] +&BEGIN_PROVIDER [ integer, mpi_size ] + implicit none + BEGIN_DOC + ! Rank of MPI process and number of MPI processes + END_DOC + IRP_IF MPI + include 'mpif.h' + PROVIDE mpi_initialized + integer :: ierr + + call MPI_COMM_RANK (MPI_COMM_WORLD, mpi_rank, ierr) + if (ierr /= 0) then + print *, 'ierr = ', ierr + stop 'Unable to get MPI rank' + endif + + call MPI_COMM_SIZE (MPI_COMM_WORLD, mpi_size, ierr) + if (ierr /= 0) then + print *, 'ierr = ', ierr + stop 'Unable to get MPI size' + endif + + IRP_ELSE + mpi_rank = 0 + mpi_size = 1 + IRP_ENDIF + ASSERT (mpi_rank >= 0) + ASSERT (mpi_rank < mpi_size) + +END_PROVIDER + + +BEGIN_PROVIDER [ logical, mpi_master ] + implicit none + BEGIN_DOC + ! If true, rank is zero + END_DOC + mpi_master = (mpi_rank == 0) + +END_PROVIDER + diff --git a/plugins/MPI/mpi_test.irp.f b/plugins/MPI/mpi_test.irp.f new file mode 100644 index 00000000..3fcb355a --- /dev/null +++ b/plugins/MPI/mpi_test.irp.f @@ -0,0 +1,12 @@ +program MPI + implicit none + BEGIN_DOC +! MPI test program + END_DOC + print *, 'hello world' + print *, 'rank, size, master = ', mpi_rank, mpi_size, mpi_master + + integer :: ierr + call MPI_FINALIZE(ierr) + print *, ierr +end diff --git a/src/Utils/LinearAlgebra.irp.f b/src/Utils/LinearAlgebra.irp.f index 7e81756c..46859f1a 100644 --- a/src/Utils/LinearAlgebra.irp.f +++ b/src/Utils/LinearAlgebra.irp.f @@ -280,7 +280,7 @@ subroutine get_inverse(A,LDA,m,C,LDC) integer :: info,lwork integer, allocatable :: ipiv(:) double precision,allocatable :: work(:) - allocate (ipiv(ao_num), work(ao_num*ao_num)) + allocate (ipiv(m), work(m*m)) lwork = size(work) C(1:m,1:m) = A(1:m,1:m) call dgetrf(m,m,C,size(C,1),ipiv,info) From e55a1565e8a0e2b190c124252e6f9c8a4e2df934 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 23 Nov 2017 10:35:13 +0100 Subject: [PATCH 2/2] MPI test is OK --- config/gfortran_mpi.cfg | 2 +- plugins/CAS_SD_ZMQ/selection.irp.f | 2 +- plugins/Full_CI_ZMQ/NEEDED_CHILDREN_MODULES | 2 +- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 71 ++++++++++--------- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 4 +- plugins/Full_CI_ZMQ/run_selection_slave.irp.f | 4 +- plugins/Full_CI_ZMQ/selection.irp.f | 29 ++++---- plugins/Full_CI_ZMQ/selection_buffer.irp.f | 4 +- .../selection_davidson_slave.irp.f | 16 ++++- plugins/Full_CI_ZMQ/zmq_selection.irp.f | 6 +- plugins/MPI/NEEDED_CHILDREN_MODULES | 2 +- plugins/MPI/mpi_test.irp.f | 12 ---- src/Bitmask/bitmasks_module.f90 | 3 +- src/Davidson/u0Hu0.irp.f | 4 ++ 14 files changed, 81 insertions(+), 80 deletions(-) delete mode 100644 plugins/MPI/mpi_test.irp.f diff --git a/config/gfortran_mpi.cfg b/config/gfortran_mpi.cfg index f40123b1..6358d745 100644 --- a/config/gfortran_mpi.cfg +++ b/config/gfortran_mpi.cfg @@ -10,7 +10,7 @@ # # [COMMON] -FC : mpif90 -ffree-line-length-none -I . +FC : mpif90 -ffree-line-length-none -I . -g LAPACK_LIB : -lblas -llapack IRPF90 : irpf90 IRPF90_FLAGS : --ninja --align=32 -DMPI diff --git a/plugins/CAS_SD_ZMQ/selection.irp.f b/plugins/CAS_SD_ZMQ/selection.irp.f index b42116bd..6b010950 100644 --- a/plugins/CAS_SD_ZMQ/selection.irp.f +++ b/plugins/CAS_SD_ZMQ/selection.irp.f @@ -646,7 +646,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d do p2=ib,mo_tot_num if(bannedOrb(p2, s2)) cycle if(banned(p1,p2)) cycle - if(mat(1, p1, p2) == 0d0) cycle + if(sum(dabs(mat(:, p1, p2))) == 0d0) cycle call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) logical, external :: is_in_wavefunction diff --git a/plugins/Full_CI_ZMQ/NEEDED_CHILDREN_MODULES b/plugins/Full_CI_ZMQ/NEEDED_CHILDREN_MODULES index d9a3a160..6736cc4e 100644 --- a/plugins/Full_CI_ZMQ/NEEDED_CHILDREN_MODULES +++ b/plugins/Full_CI_ZMQ/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -Perturbation Selectors_full Generators_full ZMQ FourIdx +Perturbation Selectors_full Generators_full ZMQ FourIdx MPI diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 7f9c17c6..06946f21 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -81,40 +81,41 @@ program fci_zmq (CI_energy(1) + pt2(1) - hf_energy_ref) correlation_energy_ratio = min(1.d0,correlation_energy_ratio) -! print *, '' -! print '(A,I12)', 'Summary at N_det = ', N_det -! print '(A)', '-----------------------------------' -! print *, '' -! call write_double(6,correlation_energy_ratio, 'Correlation ratio') -! print *, '' -! -! N_states_p = min(N_det,N_states) -! print *, '' -! write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=========================''))' -! write(*,fmt) -! write(fmt,*) '(12X,', N_states_p, '(6X,A5,1X,I6,8X))' -! write(*,fmt) ('State',k, k=1,N_states_p) -! write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=========================''))' -! write(*,fmt) -! write(fmt,*) '(A12,', N_states_p, '(1X,F12.8,13X))' -! write(*,fmt) '# E ', CI_energy(1:N_states_p) -! if (N_states_p > 1) then -! write(*,fmt) '# Excit. (au)', CI_energy(1:N_states_p)-CI_energy(1) -! write(*,fmt) '# Excit. (eV)', (CI_energy(1:N_states_p)-CI_energy(1))*27.211396641308d0 -! endif -! write(fmt,*) '(A12,', 2*N_states_p, '(1X,F12.8))' -! write(*,fmt) '# PT2'//pt2_string, (pt2(k), error(k), k=1,N_states_p) -! write(*,*) '#' -! write(*,fmt) '# E+PT2 ', (CI_energy(k)+pt2(k),error(k), k=1,N_states_p) -! if (N_states_p > 1) then -! write(*,fmt) '# Excit. (au)', ( (CI_energy(k)+pt2(k)-CI_energy(1)-pt2(1)), & -! dsqrt(error(k)*error(k)+error(1)*error(1)), k=1,N_states_p) -! write(*,fmt) '# Excit. (eV)', ( (CI_energy(k)+pt2(k)-CI_energy(1)-pt2(1))*27.211396641308d0, & -! dsqrt(error(k)*error(k)+error(1)*error(1))*27.211396641308d0, k=1,N_states_p) -! endif -! write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=========================''))' -! write(*,fmt) -! print *, '' + N_states_p = min(N_det,N_states) + + print *, '' + print '(A,I12)', 'Summary at N_det = ', N_det + print '(A)', '-----------------------------------' + print *, '' + call write_double(6,correlation_energy_ratio, 'Correlation ratio') + print *, '' + + print *, '' + write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=========================''))' + write(*,fmt) + write(fmt,*) '(12X,', N_states_p, '(6X,A5,1X,I6,8X))' + write(*,fmt) ('State',k, k=1,N_states_p) + write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=========================''))' + write(*,fmt) + write(fmt,*) '(A12,', N_states_p, '(1X,F12.8,13X))' + write(*,fmt) '# E ', CI_energy(1:N_states_p) + if (N_states_p > 1) then + write(*,fmt) '# Excit. (au)', CI_energy(1:N_states_p)-CI_energy(1) + write(*,fmt) '# Excit. (eV)', (CI_energy(1:N_states_p)-CI_energy(1))*27.211396641308d0 + endif + write(fmt,*) '(A12,', 2*N_states_p, '(1X,F12.8))' + write(*,fmt) '# PT2'//pt2_string, (pt2(k), error(k), k=1,N_states_p) + write(*,'(A)') '#' + write(*,fmt) '# E+PT2 ', (CI_energy(k)+pt2(k),error(k), k=1,N_states_p) + if (N_states_p > 1) then + write(*,fmt) '# Excit. (au)', ( (CI_energy(k)+pt2(k)-CI_energy(1)-pt2(1)), & + dsqrt(error(k)*error(k)+error(1)*error(1)), k=1,N_states_p) + write(*,fmt) '# Excit. (eV)', ( (CI_energy(k)+pt2(k)-CI_energy(1)-pt2(1))*27.211396641308d0, & + dsqrt(error(k)*error(k)+error(1)*error(1))*27.211396641308d0, k=1,N_states_p) + endif + write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=========================''))' + write(*,fmt) + print *, '' print *, 'N_det = ', N_det print *, 'N_states = ', N_states @@ -220,7 +221,7 @@ program fci_zmq endif write(fmt,*) '(A12,', 2*N_states_p, '(1X,F12.8))' write(*,fmt) '# PT2'//pt2_string, (pt2(k), error(k), k=1,N_states_p) - write(*,*) '#' + write(*,'(A)') '#' write(*,fmt) '# E+PT2 ', (CI_energy(k)+pt2(k),error(k), k=1,N_states_p) if (N_states_p > 1) then write(*,fmt) '# Excit. (au)', ( (CI_energy(k)+pt2(k)-CI_energy(1)-pt2(1)), & diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index c641c50d..1f24da38 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -119,8 +119,8 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) call update_psi_average_norm_contrib(w) SOFT_TOUCH psi_average_norm_contrib endif - do i=N_det+1,N_states - pt2(i) = 0.d0 + do k=N_det+1,N_states + pt2(k) = 0.d0 enddo end subroutine diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f index 930eec2c..d6f5739f 100644 --- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f @@ -36,7 +36,7 @@ subroutine run_selection_slave(thread,iproc,energy) end if buf%N = 0 ctask = 1 - pt2 = 0d0 + pt2(:) = 0d0 do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id(ctask), task) @@ -65,7 +65,7 @@ subroutine run_selection_slave(thread,iproc,energy) call merge_selection_buffers(buf,buf2) call push_selection_results(zmq_socket_push, pt2, buf, task_id(1), ctask) buf%mini = buf2%mini - pt2 = 0d0 + pt2(:) = 0d0 buf%cur = 0 end if ctask = 0 diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 3e58224a..af898941 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -563,7 +563,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle - mat = 0d0 call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, mat, buf) @@ -617,7 +616,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d do p2=ib,mo_tot_num if(bannedOrb(p2, s2)) cycle if(banned(p1,p2)) cycle - if(mat(1, p1, p2) == 0d0) cycle + if( sum(abs(mat(1:N_states, p1, p2))) == 0d0) cycle call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) @@ -783,9 +782,9 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) hij = (mo_bielec_integral(p1, p2, h1, h2) - mo_bielec_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2) if(ma == 1) then - mat(:, putj, puti) += coefs * hij + mat(:, putj, puti) += coefs(:) * hij else - mat(:, puti, putj) += coefs * hij + mat(:, puti, putj) += coefs(:) * hij end if end do else @@ -801,7 +800,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(turn2(i), 1) hij = mo_bielec_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) - mat(:, puti, putj) += coefs * hij + mat(:, puti, putj) += coefs(:) * hij end do end do end if @@ -821,7 +820,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(i1, ma) p2 = p(i2, ma) hij = (mo_bielec_integral(p1, p2, h1, h2) - mo_bielec_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2) - mat(:, puti, putj) += coefs * hij + mat(:, puti, putj) += coefs(:) * hij end do end do else if(tip == 3) then @@ -835,7 +834,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(i, ma) hij = mo_bielec_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2) - mat(:, min(puti, putj), max(puti, putj)) += coefs * hij + mat(:, min(puti, putj), max(puti, putj)) += coefs(:) * hij end do else ! tip == 4 puti = p(1, sp) @@ -846,7 +845,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) h1 = h(1, mi) h2 = h(2, mi) hij = (mo_bielec_integral(p1, p2, h1, h2) - mo_bielec_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2) - mat(:, puti, putj) += coefs * hij + mat(:, puti, putj) += coefs(:) * hij end if end if end if @@ -931,13 +930,13 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) putj = p1 if(.not. banned(putj,puti,bant)) then hij = mo_bielec_integral(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix) - tmp_row(:,puti) += hij * coefs + tmp_row(:,puti) += hij * coefs(:) end if putj = p2 if(.not. banned(putj,puti,bant)) then hij = mo_bielec_integral(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix) - tmp_row2(:,puti) += hij * coefs + tmp_row2(:,puti) += hij * coefs(:) end if end do @@ -959,12 +958,12 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=1,hfix-1 if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle hij = (mo_bielec_integral(p1, p2, putj, hfix)-mo_bielec_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2) - tmp_row(:,putj) += hij * coefs + tmp_row(:,putj) += hij * coefs(:) end do do putj=hfix+1,mo_tot_num if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle hij = (mo_bielec_integral(p1, p2, hfix, putj)-mo_bielec_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2) - tmp_row(:,putj) += hij * coefs + tmp_row(:,putj) += hij * coefs(:) end do mat(:, :puti-1, puti) += tmp_row(:,:puti-1) @@ -982,13 +981,13 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) putj = p2 if(.not. banned(puti,putj,1)) then hij = mo_bielec_integral(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1) - tmp_row(:,puti) += hij * coefs + tmp_row(:,puti) += hij * coefs(:) end if putj = p1 if(.not. banned(puti,putj,1)) then hij = mo_bielec_integral(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2) - tmp_row2(:,puti) += hij * coefs + tmp_row2(:,puti) += hij * coefs(:) end if end do mat(:,:p2-1,p2) += tmp_row(:,:p2-1) @@ -1017,7 +1016,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) call i_h_j(gen, det, N_int, hij) - mat(:, p1, p2) += coefs * hij + mat(:, p1, p2) += coefs(:) * hij end do end do end diff --git a/plugins/Full_CI_ZMQ/selection_buffer.irp.f b/plugins/Full_CI_ZMQ/selection_buffer.irp.f index 3deccb12..f3759781 100644 --- a/plugins/Full_CI_ZMQ/selection_buffer.irp.f +++ b/plugins/Full_CI_ZMQ/selection_buffer.irp.f @@ -8,8 +8,8 @@ subroutine create_selection_buffer(N, siz, res) allocate(res%det(N_int, 2, siz), res%val(siz)) - res%val = 0d0 - res%det = 0_8 + res%val(:) = 0d0 + res%det(:,:,:) = 0_8 res%N = N res%mini = 0d0 res%cur = 0 diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f index f86a7fcd..50d8bbcc 100644 --- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f @@ -51,7 +51,10 @@ subroutine run_wf ! --------- print *, 'Selection' - call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + if (mpi_master) then + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + endif + call mpi_bcast_psi(energy,N_states) !$OMP PARALLEL PRIVATE(i) i = omp_get_thread_num() @@ -65,7 +68,11 @@ subroutine run_wf ! -------- print *, 'Davidson' - call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + print *, 'PT2' + if (mpi_master) then + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + endif + call mpi_bcast_psi(energy,N_states) call omp_set_nested(.True.) call davidson_slave_tcp(0) call omp_set_nested(.False.) @@ -77,7 +84,10 @@ subroutine run_wf ! --- print *, 'PT2' - call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + if (mpi_master) then + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + endif + call mpi_bcast_psi(energy,N_states) logical :: lstop lstop = .False. diff --git a/plugins/Full_CI_ZMQ/zmq_selection.irp.f b/plugins/Full_CI_ZMQ/zmq_selection.irp.f index 700ee0e7..29ccd214 100644 --- a/plugins/Full_CI_ZMQ/zmq_selection.irp.f +++ b/plugins/Full_CI_ZMQ/zmq_selection.irp.f @@ -100,7 +100,6 @@ subroutine selection_collector(b, N, pt2) double precision, pointer :: val(:) integer(bit_kind), pointer :: det(:,:,:) integer, allocatable :: task_id(:) - real :: time, time0 type(selection_buffer) :: b2 zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() @@ -109,11 +108,11 @@ subroutine selection_collector(b, N, pt2) allocate(task_id(N_det_generators)) more = 1 pt2(:) = 0d0 - call CPU_TIME(time0) + pt2_mwen(:) = 0.d0 do while (more == 1) call pull_selection_results(zmq_socket_pull, pt2_mwen, b2%val(1), b2%det(1,1,1), b2%cur, task_id, ntask) - pt2 += pt2_mwen + pt2(:) += pt2_mwen(:) do i=1, b2%cur call add_to_selection_buffer(b, b2%det(1,1,i), b2%val(i)) if (b2%val(i) > b%mini) exit @@ -125,7 +124,6 @@ subroutine selection_collector(b, N, pt2) endif call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) end do - call CPU_TIME(time) end do diff --git a/plugins/MPI/NEEDED_CHILDREN_MODULES b/plugins/MPI/NEEDED_CHILDREN_MODULES index 19028952..495f2ad0 100644 --- a/plugins/MPI/NEEDED_CHILDREN_MODULES +++ b/plugins/MPI/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -Utils +Determinants Utils diff --git a/plugins/MPI/mpi_test.irp.f b/plugins/MPI/mpi_test.irp.f deleted file mode 100644 index 3fcb355a..00000000 --- a/plugins/MPI/mpi_test.irp.f +++ /dev/null @@ -1,12 +0,0 @@ -program MPI - implicit none - BEGIN_DOC -! MPI test program - END_DOC - print *, 'hello world' - print *, 'rank, size, master = ', mpi_rank, mpi_size, mpi_master - - integer :: ierr - call MPI_FINALIZE(ierr) - print *, ierr -end diff --git a/src/Bitmask/bitmasks_module.f90 b/src/Bitmask/bitmasks_module.f90 index c5a9093f..52c917f8 100644 --- a/src/Bitmask/bitmasks_module.f90 +++ b/src/Bitmask/bitmasks_module.f90 @@ -1,7 +1,8 @@ module bitmasks integer, parameter :: bit_kind_shift = 6 ! 5: 32 bits, 6: 64 bits integer, parameter :: bit_kind_size = 64 - integer, parameter :: bit_kind = 64/8 + integer, parameter :: bit_kind = 8 + integer, parameter :: mpi_bit_kind = 8 integer, parameter :: d_hole1 = 1 integer, parameter :: d_part1 = 2 integer, parameter :: d_hole2 = 3 diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index c67b1440..e4b1de50 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -4,6 +4,10 @@ BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ] ! Energy of the current wave function END_DOC call u_0_H_u_0(psi_energy,psi_coef,N_det,psi_det,N_int,N_states,psi_det_size) + integer :: i + do i=N_det+1,N_states + psi_energy(i) = 0.d0 + enddo END_PROVIDER