From 1f9687153410ede39332b0cfc13aa7458ab08232 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 12 Apr 2017 18:26:57 +0200 Subject: [PATCH] Fixed memory bugs --- plugins/CAS_SD_ZMQ/selection.irp.f | 6 +--- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 17 +++++++---- plugins/Full_CI_ZMQ/run_pt2_slave.irp.f | 18 +++++++----- plugins/Full_CI_ZMQ/run_selection_slave.irp.f | 1 - plugins/Full_CI_ZMQ/selection.irp.f | 4 ++- plugins/Full_CI_ZMQ/selection_buffer.irp.f | 3 ++ plugins/Full_CI_ZMQ/zmq_selection.irp.f | 28 ++++++++++++++----- .../pot_ao_pseudo_ints.irp.f | 5 ++-- 8 files changed, 53 insertions(+), 29 deletions(-) diff --git a/plugins/CAS_SD_ZMQ/selection.irp.f b/plugins/CAS_SD_ZMQ/selection.irp.f index d9b8c67d..04f464dc 100644 --- a/plugins/CAS_SD_ZMQ/selection.irp.f +++ b/plugins/CAS_SD_ZMQ/selection.irp.f @@ -671,10 +671,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if(mat(1, p1, p2) == 0d0) cycle call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) logical, external :: is_in_wavefunction -! if (is_in_wavefunction(det,N_int)) then -! stop 'is_in_wf' -! cycle -! endif if (do_ddci) then logical, external :: is_a_two_holes_two_particles @@ -1234,7 +1230,6 @@ subroutine ZMQ_selection(N_in, pt2) provide nproc call new_parallel_job(zmq_to_qp_run_socket,"selection") call zmq_put_psi(zmq_to_qp_run_socket,1,pt2_e0_denominator,size(pt2_e0_denominator)) - call zmq_set_running(zmq_to_qp_run_socket) call create_selection_buffer(N, N*2, b) endif @@ -1248,6 +1243,7 @@ subroutine ZMQ_selection(N_in, pt2) write(task,*) i_generator_start, i_generator_max, 1, N call add_task_to_taskserver(zmq_to_qp_run_socket,task) end do + call zmq_set_running(zmq_to_qp_run_socket) !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) i = omp_get_thread_num() diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 19cc5988..42379d8b 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -117,6 +117,8 @@ subroutine ZMQ_pt2(pt2,relative_error) endif end do + print *, 'OK' + deallocate(pt2_detail, comb, computed, tbc) end subroutine @@ -196,11 +198,15 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators), & pt2_mwen(N_states, N_det_generators) ) - actually_computed(:) = computed(:) + do i=1,N_det_generators + actually_computed(i) = computed(i) + enddo parts_to_get(:) = 1 if(fragment_first > 0) then - parts_to_get(1:fragment_first) = fragment_count + do i=1,fragment_first + parts_to_get(i) = fragment_count + enddo endif do i=1,tbc(0) @@ -223,7 +229,7 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su pullLoop : do while (more == 1) call pull_pt2_results(zmq_socket_pull, Nindex, index, pt2_mwen, task_id, ntask) do i=1,Nindex - pt2_detail(:, index(i)) += pt2_mwen(:,i) + pt2_detail(1:N_states, index(i)) += pt2_mwen(1:N_states,i) parts_to_get(index(i)) -= 1 if(parts_to_get(index(i)) < 0) then print *, i, index(i), parts_to_get(index(i)), Nindex @@ -273,12 +279,11 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su if (dabs(eqt/avg) < relative_error) then pt2(1) = avg ! exit pullLoop + else + print "(4(G22.13), 4(I9))", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth) endif - print "(4(G22.13), 4(I9))", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth) end if end do pullLoop - print "(4(G22.13), 4(I9))", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth) - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_pull_socket(zmq_socket_pull) diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index 452b446b..5a246319 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -25,7 +25,7 @@ subroutine run_pt2_slave(thread,iproc,energy) integer :: index integer :: Nindex - allocate(pt2_detail(N_states, N_det)) + allocate(pt2_detail(N_states, N_det_generators)) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_push = new_zmq_push_socket(thread) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) @@ -101,7 +101,7 @@ subroutine push_pt2_results(zmq_socket_push, N, index, pt2_detail, task_id, ntas implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_push - double precision, intent(in) :: pt2_detail(N_states, N_det) + double precision, intent(in) :: pt2_detail(N_states, N_det_generators) integer, intent(in) :: ntask, N, index, task_id(*) integer :: rc @@ -133,7 +133,7 @@ subroutine pull_pt2_results(zmq_socket_pull, N, index, pt2_detail, task_id, ntas use selection_types implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - double precision, intent(inout) :: pt2_detail(N_states, N_det) + double precision, intent(inout) :: pt2_detail(N_states, N_det_generators) integer, intent(out) :: index integer, intent(out) :: N, ntask, task_id(*) integer :: rc, rn, i @@ -150,18 +150,22 @@ subroutine pull_pt2_results(zmq_socket_pull, N, index, pt2_detail, task_id, ntas rc = f77_zmq_recv( zmq_socket_pull, ntask, 4, 0) if(rc /= 4) stop "pull" - rc = f77_zmq_recv( zmq_socket_pull, task_id(1), ntask*4, 0) + rc = f77_zmq_recv( zmq_socket_pull, task_id, ntask*4, 0) if(rc /= 4*ntask) stop "pull" ! Activate is zmq_socket_pull is a REP rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0) + + do i=N+1,N_det_generators + pt2_detail(1:N_states,i) = 0.d0 + enddo end subroutine -BEGIN_PROVIDER [ double precision, pt2_workload, (N_det) ] +BEGIN_PROVIDER [ double precision, pt2_workload, (N_det_generators) ] integer :: i - do i=1,N_det - pt2_workload(:) = dfloat(N_det - i + 1)**2 + do i=1,N_det_generators + pt2_workload(i) = dfloat(N_det_generators - i + 1)**2 end do pt2_workload = pt2_workload / sum(pt2_workload) END_PROVIDER diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f index 59b2ba1f..85b52c30 100644 --- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f @@ -26,7 +26,6 @@ subroutine run_selection_slave(thread,iproc,energy) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) if(worker_id == -1) then print *, "WORKER -1" - !call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) return diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 1ecf1ea4..45cdb0db 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -543,7 +543,9 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d delta_E = E0(istate) - Hii val = mat(istate, p1, p2) + mat(istate, p1, p2) tmp = dsqrt(delta_E * delta_E + val * val) - delta_E = dabs(delta_E) + if (delta_E < 0.d0) then + tmp = -tmp + endif e_pert = 0.5d0 * ( tmp - delta_E) pt2(istate) = pt2(istate) + e_pert max_e_pert = min(e_pert,max_e_pert) diff --git a/plugins/Full_CI_ZMQ/selection_buffer.irp.f b/plugins/Full_CI_ZMQ/selection_buffer.irp.f index 4ca3830e..8a47cb9d 100644 --- a/plugins/Full_CI_ZMQ/selection_buffer.irp.f +++ b/plugins/Full_CI_ZMQ/selection_buffer.irp.f @@ -62,6 +62,9 @@ subroutine sort_selection_buffer(b) detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i)) vals(i) = b%val(iorder(i)) end do + do i=nmwen+1, size(vals) + vals(i) = 0.d0 + enddo deallocate(b%det, b%val) b%det => detmp b%val => vals diff --git a/plugins/Full_CI_ZMQ/zmq_selection.irp.f b/plugins/Full_CI_ZMQ/zmq_selection.irp.f index 8aaddc19..ebb9d630 100644 --- a/plugins/Full_CI_ZMQ/zmq_selection.irp.f +++ b/plugins/Full_CI_ZMQ/zmq_selection.irp.f @@ -10,26 +10,39 @@ subroutine ZMQ_selection(N_in, pt2) integer :: i, N integer, external :: omp_get_thread_num double precision, intent(out) :: pt2(N_states) + integer, parameter :: maxtasks=10000 PROVIDE fragment_count + N = max(N_in,1) if (.True.) then PROVIDE pt2_e0_denominator - N = max(N_in,1) provide nproc call new_parallel_job(zmq_to_qp_run_socket,"selection") call zmq_put_psi(zmq_to_qp_run_socket,1,pt2_e0_denominator,size(pt2_e0_denominator)) - call zmq_set_running(zmq_to_qp_run_socket) call create_selection_buffer(N, N*2, b) endif - character(len=:), allocatable :: task - task = repeat(' ',20*N_det_generators) + ! Ugly, but variable-length strings don't work as expected with gfortran < 4.8 :-( + character*(20*maxtasks) :: task + task = ' ' + + integer :: k + k=0 do i= 1, N_det_generators - write(task(20*(i-1)+1:20*i),'(I9,X,I9,''|'')') i, N + k = k+1 + write(task(20*(k-1)+1:20*k),'(I9,X,I9,''|'')') i, N + k = k+20 + if (k>20*maxtasks) then + k=0 + call add_task_to_taskserver(zmq_to_qp_run_socket,task) + endif end do - call add_task_to_taskserver(zmq_to_qp_run_socket,task) + if (k > 0) then + call add_task_to_taskserver(zmq_to_qp_run_socket,task) + endif + call zmq_set_running(zmq_to_qp_run_socket) !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) i = omp_get_thread_num() @@ -48,6 +61,7 @@ subroutine ZMQ_selection(N_in, pt2) endif call save_wavefunction endif + end subroutine @@ -83,7 +97,7 @@ subroutine selection_collector(b, pt2) real :: time, time0 zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_pull = new_zmq_pull_socket() - allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det)) + allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det_generators)) done = 0 more = 1 pt2(:) = 0d0 diff --git a/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f b/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f index 7cfd6f9f..22cceab9 100644 --- a/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f +++ b/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f @@ -51,7 +51,6 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu print*, 'Providing the nuclear electron pseudo integrals (local)' call wall_time(wall_1) - wall_0 = wall_1 call cpu_time(cpu_1) @@ -67,6 +66,8 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu !$OMP wall_1) !$ thread_num = omp_get_thread_num() + + wall_0 = wall_1 !$OMP DO SCHEDULE (guided) do j = 1, ao_num @@ -149,7 +150,6 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu print*, 'Providing the nuclear electron pseudo integrals (non-local)' call wall_time(wall_1) - wall_0 = wall_1 call cpu_time(cpu_1) thread_num = 0 @@ -165,6 +165,7 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu !$ thread_num = omp_get_thread_num() + wall_0 = wall_1 !$OMP DO SCHEDULE (guided) ! do j = 1, ao_num