From 27dc0c06aea0ae63ed55621106df40353201e3a2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 10 Mar 2022 00:55:23 +0100 Subject: [PATCH] Optimize PT2 --- src/cipsi/run_pt2_slave.irp.f | 18 ++++---- src/cipsi/selection_buffer.irp.f | 71 +++++++++++++++++++------------- 2 files changed, 51 insertions(+), 38 deletions(-) diff --git a/src/cipsi/run_pt2_slave.irp.f b/src/cipsi/run_pt2_slave.irp.f index 9e046877..30fc7ce0 100644 --- a/src/cipsi/run_pt2_slave.irp.f +++ b/src/cipsi/run_pt2_slave.irp.f @@ -253,22 +253,22 @@ subroutine run_pt2_slave_large(thread,iproc,energy) call sort_selection_buffer(b) call wall_time(time1) - if (time1-time0 > 15.d0) then +! if (time1-time0 > 15.d0) then call omp_set_lock(global_selection_buffer_lock) global_selection_buffer%mini = b%mini call merge_selection_buffers(b,global_selection_buffer) b%cur=0 call omp_unset_lock(global_selection_buffer_lock) call wall_time(time0) - if ( iproc == 1 .or. i_generator < 100 .or. done) then - call omp_set_lock(global_selection_buffer_lock) - call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending) - call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), global_selection_buffer, (/task_id/), 1,sending) - global_selection_buffer%cur = 0 - call omp_unset_lock(global_selection_buffer_lock) - endif +! endif + + call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending) + if ( iproc == 1 .or. i_generator < 100 .or. done) then + call omp_set_lock(global_selection_buffer_lock) + call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), global_selection_buffer, (/task_id/), 1,sending) + global_selection_buffer%cur = 0 + call omp_unset_lock(global_selection_buffer_lock) else - call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending) call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), b, (/task_id/), 1,sending) endif diff --git a/src/cipsi/selection_buffer.irp.f b/src/cipsi/selection_buffer.irp.f index 79899139..1f743e0e 100644 --- a/src/cipsi/selection_buffer.irp.f +++ b/src/cipsi/selection_buffer.irp.f @@ -92,38 +92,51 @@ subroutine merge_selection_buffers(b1, b2) allocate(val(sze), detmp(N_int, 2, sze)) i1=1 i2=1 - do i=1,nmwen - if ( (i1 > b1%cur).and.(i2 > b2%cur) ) then - exit - else if (i1 > b1%cur) then - val(i) = b2%val(i2) - detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2) - detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2) - i2=i2+1 - else if (i2 > b2%cur) then - val(i) = b1%val(i1) - detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1) - detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1) - i1=i1+1 - else - if (b1%val(i1) <= b2%val(i2)) then - val(i) = b1%val(i1) - detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1) - detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1) - i1=i1+1 + + select case (N_int) +BEGIN_TEMPLATE + case $case + do i=1,nmwen + if ( (i1 > b1%cur).and.(i2 > b2%cur) ) then + exit + else if (i1 > b1%cur) then + val(i) = b2%val(i2) + detmp(1:$N_int,1,i) = b2%det(1:$N_int,1,i2) + detmp(1:$N_int,2,i) = b2%det(1:$N_int,2,i2) + i2=i2+1 + else if (i2 > b2%cur) then + val(i) = b1%val(i1) + detmp(1:$N_int,1,i) = b1%det(1:$N_int,1,i1) + detmp(1:$N_int,2,i) = b1%det(1:$N_int,2,i1) + i1=i1+1 else - val(i) = b2%val(i2) - detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2) - detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2) - i2=i2+1 + if (b1%val(i1) <= b2%val(i2)) then + val(i) = b1%val(i1) + detmp(1:$N_int,1,i) = b1%det(1:$N_int,1,i1) + detmp(1:$N_int,2,i) = b1%det(1:$N_int,2,i1) + i1=i1+1 + else + val(i) = b2%val(i2) + detmp(1:$N_int,1,i) = b2%det(1:$N_int,1,i2) + detmp(1:$N_int,2,i) = b2%det(1:$N_int,2,i2) + i2=i2+1 + endif endif - endif - enddo + enddo + do i=nmwen+1,b2%N + val(i) = 0.d0 +! detmp(1:$N_int,1,i) = 0_bit_kind +! detmp(1:$N_int,2,i) = 0_bit_kind + enddo +SUBST [ case, N_int ] +(1); 1;; +(2); 2;; +(3); 3;; +(4); 4;; +default; N_int;; +END_TEMPLATE + end select deallocate(b2%det, b2%val) - do i=nmwen+1,b2%N - val(i) = 0.d0 - detmp(1:N_int,1:2,i) = 0_bit_kind - enddo b2%det => detmp b2%val => val b2%mini = min(b2%mini,b2%val(b2%N))