From 02b48aabce9b16081a00c9061e3f959e0b8a58cd Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 30 Sep 2016 15:29:06 +0200 Subject: [PATCH 01/13] shared memory davidson --- src/Davidson/davidson_parallel.irp.f | 366 +++++++++++++++++++++++++++ src/Davidson/u0Hu0.irp.f | 130 +++++----- 2 files changed, 439 insertions(+), 57 deletions(-) create mode 100644 src/Davidson/davidson_parallel.irp.f diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f new file mode 100644 index 00000000..d97e46b4 --- /dev/null +++ b/src/Davidson/davidson_parallel.irp.f @@ -0,0 +1,366 @@ + +!brought to you by garniroy inc. + +use bitmasks + +subroutine davidson_process(block, N, idx, vt, st) + + implicit none + + + integer , intent(in) :: block + integer , intent(inout) :: N + integer , intent(inout) :: idx(N_det) + double precision , intent(inout) :: vt(N_states, N_det) + double precision , intent(inout) :: st(N_states, N_det) + + integer :: i, j, sh, sh2, exa, ext, org_i, org_j, istate, ni, endi + integer(bit_kind) :: sorted_i(N_int) + double precision :: s2, hij + + vt = 0d0 + st = 0d0 + + N = N_det + do i=1,N + idx(i) = i + end do + + sh = block + + do sh2=1,sh + exa = 0 + do ni=1,N_int + exa = exa + popcnt(xor(version_(ni,sh,1), version_(ni,sh2,1))) + end do + if(exa > 2) then + cycle + end if + + do i=shortcut_(sh,1),shortcut_(sh+1,1)-1 + org_i = sort_idx_(i,1) + if(sh==sh2) then + endi = i-1 + else + endi = shortcut_(sh2+1,1)-1 + end if + do ni=1,N_int + sorted_i(ni) = sorted_(ni,i,1) + enddo + + do j=shortcut_(sh2,1),endi + org_j = sort_idx_(j,1) + ext = exa + do ni=1,N_int + ext = ext + popcnt(xor(sorted_i(ni), sorted_(ni,j,1))) + end do + if(ext <= 4) then + call i_h_j (psi_det(1,1,org_j),psi_det(1,1,org_i),n_int,hij) + call get_s2(psi_det(1,1,org_j),psi_det(1,1,org_i),n_int,s2) + do istate=1,N_states + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) + vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) + st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j) + st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i) + enddo + endif + enddo + enddo + enddo +end subroutine + + + BEGIN_PROVIDER [ integer, shortcut_, (0:N_det+1, 2) ] +&BEGIN_PROVIDER [ integer(bit_kind), version_, (N_int, N_det, 2) ] +&BEGIN_PROVIDER [ integer(bit_kind), sorted_, (N_int, N_det, 2) ] +&BEGIN_PROVIDER [ integer, sort_idx_, (N_det, 2) ] + implicit none + call sort_dets_ab_v(psi_det, sorted_(1,1,1), sort_idx_(1,1), shortcut_(0,1), version_(1,1,1), n_det, N_int) + call sort_dets_ba_v(psi_det, sorted_(1,1,2), sort_idx_(1,2), shortcut_(0,2), version_(1,1,2), n_det, N_int) +END_PROVIDER + + + +subroutine davidson_collect(block, N, idx, vt, st , v0, s0) + implicit none + + + integer , intent(in) :: block + integer , intent(in) :: N + integer , intent(in) :: idx(N) + double precision , intent(in) :: vt(N_states, N) + double precision , intent(in) :: st(N_states, N) + double precision , intent(inout) :: v0(N_det, N_states) + double precision , intent(inout) :: s0(N_det, N_states) + + integer :: i + + do i=1,N + v0(idx(i), :) += vt(:, i) + s0(idx(i), :) += st(:, i) + end do +end subroutine + + +subroutine davidson_init(zmq_to_qp_run_socket) + use f77_zmq + implicit none + + integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket ! zmq_to_qp_run_socket + + call new_parallel_job(zmq_to_qp_run_socket,'davidson') +end subroutine + + + +subroutine davidson_add_task(zmq_to_qp_run_socket, block) + use f77_zmq + implicit none + + integer(ZMQ_PTR) ,intent(in) :: zmq_to_qp_run_socket + integer ,intent(in) :: block + character*(512) :: task + + + write(task,*) block + call add_task_to_taskserver(zmq_to_qp_run_socket, task) +end subroutine + + + +subroutine davidson_slave_inproc(i) + implicit none + integer, intent(in) :: i + + call davidson_run_slave(1,i) +end + + +subroutine davidson_slave_tcp(i) + implicit none + integer, intent(in) :: i + + call davidson_run_slave(0,i) +end + + + +subroutine davidson_run_slave(thread,iproc) + use f77_zmq + implicit none + + integer, intent(in) :: thread, iproc + + integer :: worker_id, task_id, block + character*(512) :: task + + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_push_socket + integer(ZMQ_PTR) :: zmq_socket_push + + + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + zmq_socket_push = new_zmq_push_socket(thread) + call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) + if(worker_id == -1) then + print *, "WORKER -1" + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) + return + end if + + call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) + + call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id) + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) +end subroutine + + + +subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) + use f77_zmq + implicit none + + integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket + integer(ZMQ_PTR),intent(in) :: zmq_socket_push + integer,intent(in) :: worker_id + integer :: task_id + character*(512) :: task + + + integer :: block + integer :: N + integer , allocatable :: idx(:) + double precision , allocatable :: vt(:,:) + double precision , allocatable :: st(:,:) + + + allocate(idx(N_det)) + allocate(vt(N_states, N_det)) + allocate(st(N_states, N_det)) + + do + call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) + if(task_id == 0) exit + read (task,*) block + + call davidson_process(block,N, idx, vt, st) + + call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) + call davidson_push_results(zmq_socket_push, block, N, idx, vt, st, task_id) + end do +end subroutine + + + +subroutine davidson_push_results(zmq_socket_push, block, N, idx, vt, st, task_id) + use f77_zmq + implicit none + + integer(ZMQ_PTR) ,intent(in) :: zmq_socket_push + integer ,intent(in) :: task_id + + integer ,intent(in) :: block + integer ,intent(in) :: N + integer ,intent(in) :: idx(N) + double precision ,intent(in) :: vt(N_states, N) + double precision ,intent(in) :: st(N_states, N) + integer :: rc + + rc = f77_zmq_send( zmq_socket_push, block, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "davidson_push_results failed to push block" + + rc = f77_zmq_send( zmq_socket_push, N, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "davidson_push_results failed to push N" + + rc = f77_zmq_send( zmq_socket_push, idx, 4*N, ZMQ_SNDMORE) + if(rc /= 4*N) stop "davidson_push_results failed to push idx" + + rc = f77_zmq_send( zmq_socket_push, vt, 8*N_states* N, ZMQ_SNDMORE) + if(rc /= 8*N_states* N) stop "davidson_push_results failed to push vt" + + rc = f77_zmq_send( zmq_socket_push, st, 8*N_states* N, ZMQ_SNDMORE) + if(rc /= 8*N_states* N) stop "davidson_push_results failed to push st" + + rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) + if(rc /= 4) stop "davidson_push_results failed to push task_id" +end subroutine + + + +subroutine davidson_pull_results(zmq_socket_pull, block, N, idx, vt, st, task_id) + use f77_zmq + implicit none + + integer(ZMQ_PTR) ,intent(in) :: zmq_socket_pull + integer ,intent(out) :: task_id + integer ,intent(out) :: block + integer ,intent(out) :: N + integer ,intent(out) :: idx(N_det) + double precision ,intent(out) :: vt(N_states, N_det) + double precision ,intent(out) :: st(N_states, N_det) + + integer :: rc + + rc = f77_zmq_recv( zmq_socket_pull, block, 4, 0) + if(rc /= 4) stop "davidson_push_results failed to pull block" + + rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0) + if(rc /= 4) stop "davidson_push_results failed to pull N" + + rc = f77_zmq_recv( zmq_socket_pull, idx, 4*N, 0) + if(rc /= 4*N) stop "davidson_push_results failed to pull idx" + + rc = f77_zmq_recv( zmq_socket_pull, vt, 8*N_states* N, 0) + if(rc /= 8*N_states* N) stop "davidson_push_results failed to pull vt" + + rc = f77_zmq_recv( zmq_socket_pull, st, 8*N_states* N, 0) + if(rc /= 8*N_states* N) stop "davidson_push_results failed to pull st" + + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) + if(rc /= 4) stop "davidson_pull_results failed to pull task_id" +end subroutine + + + +subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0) + use f77_zmq + implicit none + + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + + double precision ,intent(inout) :: v0(N_det, N_states) + double precision ,intent(inout) :: s0(N_det, N_states) + + integer :: more, task_id + + + integer :: block + integer :: N + integer , allocatable :: idx(:) + double precision , allocatable :: vt(:,:) + double precision , allocatable :: st(:,:) + + + allocate(idx(N_det)) + allocate(vt(N_states, N_det)) + allocate(st(N_states, N_det)) + + more = 1 + + do while (more == 1) + call davidson_pull_results(zmq_socket_pull, block, N, idx, vt, st, task_id) + call davidson_collect(block, N, idx, vt, st , v0, s0) + + call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) + end do +end subroutine + + +subroutine davidson_run(zmq_to_qp_run_socket , v0, s0) + use f77_zmq + implicit none + + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_collector + integer(ZMQ_PTR), external :: new_zmq_pull_socket + integer(ZMQ_PTR) :: zmq_socket_pull + + integer :: i + integer, external :: omp_get_thread_num + + double precision , intent(inout) :: v0(N_det, N_states) + double precision , intent(inout) :: s0(N_det, N_states) + + call zmq_set_running(zmq_to_qp_run_socket) + + zmq_collector = new_zmq_to_qp_run_socket() + zmq_socket_pull = new_zmq_pull_socket() + i = omp_get_thread_num() + + + PROVIDE nproc + + !$OMP PARALLEL DEFAULT(shared) private(i) num_threads(nproc+1) + i = omp_get_thread_num() + if (i==0) then + call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0) + call end_zmq_to_qp_run_socket(zmq_collector) + call end_zmq_pull_socket(zmq_socket_pull) + else + call davidson_slave_inproc(i) + endif + !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, 'davidson') +end subroutine + + + + + diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index daef3ee5..39e81f28 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -43,7 +43,8 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) double precision, intent(in) :: H_jj(n) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) double precision :: hij - double precision, allocatable :: vt(:,:), ut(:,:) + double precision, allocatable :: vt(:,:) +! double precision, allocatable :: ut(:,:) integer :: i,j,k,l, jj,ii integer :: i0, j0 @@ -55,9 +56,10 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) integer :: N_st_8 integer, external :: align_double - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut + !!!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut - N_st_8 = align_double(N_st) + if(N_st /= N_states) stop "H_u_0_nstates N_st /= N_states" + N_st_8 = N_states ! align_double(N_st) ASSERT (Nint > 0) ASSERT (Nint == N_int) @@ -163,7 +165,7 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) v_0(i,istate) += H_jj(i) * u_0(i,istate) enddo enddo - deallocate (shortcut, sort_idx, sorted, version, ut) + !deallocate (shortcut, sort_idx, sorted, version, ut) end BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ] @@ -175,11 +177,14 @@ BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ] END_PROVIDER - +BEGIN_PROVIDER [ double precision, ut, (N_states, N_det) ] + ut = 0d0 +END_PROVIDER subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) use bitmasks + use f77_zmq implicit none BEGIN_DOC ! Computes v_0 = H|u_0> and s_0 = S^2 |u_0> @@ -196,7 +201,8 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) double precision, intent(in) :: H_jj(n), S2_jj(n) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) double precision :: hij,s2 - double precision, allocatable :: vt(:,:), ut(:,:), st(:,:) + double precision, allocatable :: vt(:,:), st(:,:) + !double precision, allocatable :: ut(:,:) integer :: i,j,k,l, jj,ii integer :: i0, j0 @@ -208,9 +214,12 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) integer :: N_st_8 integer, external :: align_double - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut - - N_st_8 = align_double(N_st) + !!!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut + + integer(ZMQ_PTR) :: handler + + if(N_st /= N_states .or. sze_8 /= N_det) stop "SPEP" + N_st_8 = N_st !! align_double(N_st) ASSERT (Nint > 0) ASSERT (Nint == N_int) @@ -218,20 +227,27 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) PROVIDE ref_bitmask_energy allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2)) - allocate(ut(N_st_8,n)) + !allocate(ut(N_st_8,n)) v_0 = 0.d0 s_0 = 0.d0 - + provide ut do i=1,n do istate=1,N_st - ut(istate,i) = u_0(i,istate) + ut(istate,i) = u_0(i,istate) enddo enddo - call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint) call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint) + call davidson_init(handler) + + do sh=shortcut(0,1),1,-1 + call davidson_add_task(handler, sh) + enddo + + call davidson_run(handler, v_0, s_0) + !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) @@ -239,49 +255,49 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) Vt = 0.d0 St = 0.d0 - !$OMP DO SCHEDULE(dynamic) - do sh=1,shortcut(0,1) - do sh2=sh,shortcut(0,1) - exa = 0 - do ni=1,Nint - exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1))) - end do - if(exa > 2) then - cycle - end if - - do i=shortcut(sh,1),shortcut(sh+1,1)-1 - org_i = sort_idx(i,1) - if(sh==sh2) then - endi = i-1 - else - endi = shortcut(sh2+1,1)-1 - end if - do ni=1,Nint - sorted_i(ni) = sorted(ni,i,1) - enddo - - do j=shortcut(sh2,1),endi - org_j = sort_idx(j,1) - ext = exa - do ni=1,Nint - ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) - end do - if(ext <= 4) then - call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) - call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) - do istate=1,n_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) - vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j) - st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i) - enddo - endif - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT +! ! !$OMP DO SCHEDULE(dynamic) +! ! do sh=1,shortcut(0,1) +! ! do sh2=sh,shortcut(0,1) +! ! exa = 0 +! ! do ni=1,Nint +! ! exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1))) +! ! end do +! ! if(exa > 2) then +! ! cycle +! ! end if +! ! +! ! do i=shortcut(sh,1),shortcut(sh+1,1)-1 +! ! org_i = sort_idx(i,1) +! ! if(sh==sh2) then +! ! endi = i-1 +! ! else +! ! endi = shortcut(sh2+1,1)-1 +! ! end if +! ! do ni=1,Nint +! ! sorted_i(ni) = sorted(ni,i,1) +! ! enddo +! ! +! ! do j=shortcut(sh2,1),endi +! ! org_j = sort_idx(j,1) +! ! ext = exa +! ! do ni=1,Nint +! ! ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) +! ! end do +! ! if(ext <= 4) then +! ! call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) +! ! call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) +! ! do istate=1,n_st +! ! vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) +! ! vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) +! ! st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j) +! ! st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i) +! ! enddo +! ! endif +! ! enddo +! ! enddo +! ! enddo +! ! enddo +! ! !$OMP END DO NOWAIT !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0,2) @@ -326,6 +342,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate) enddo enddo - deallocate (shortcut, sort_idx, sorted, version, ut) + deallocate (shortcut, sort_idx, sorted, version) end From c431da38305eafb69a121dc3d4b15b3874464f8f Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 30 Sep 2016 18:33:46 +0200 Subject: [PATCH 02/13] hobo_server - unstable --- src/Davidson/davidson_parallel.irp.f | 130 ++++++++++++++++++++++++--- src/Davidson/u0Hu0.irp.f | 7 +- 2 files changed, 118 insertions(+), 19 deletions(-) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index d97e46b4..37bac8d2 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -2,6 +2,7 @@ !brought to you by garniroy inc. use bitmasks +use f77_zmq subroutine davidson_process(block, N, idx, vt, st) @@ -58,10 +59,10 @@ subroutine davidson_process(block, N, idx, vt, st) call i_h_j (psi_det(1,1,org_j),psi_det(1,1,org_i),n_int,hij) call get_s2(psi_det(1,1,org_j),psi_det(1,1,org_i),n_int,s2) do istate=1,N_states - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) - vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j) - st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i) + vt (istate,org_i) = vt (istate,org_i) + hij*dav_ut(istate,org_j) + vt (istate,org_j) = vt (istate,org_j) + hij*dav_ut(istate,org_i) + st (istate,org_i) = st (istate,org_i) + s2*dav_ut(istate,org_j) + st (istate,org_j) = st (istate,org_j) + s2*dav_ut(istate,org_i) enddo endif enddo @@ -70,15 +71,6 @@ subroutine davidson_process(block, N, idx, vt, st) end subroutine - BEGIN_PROVIDER [ integer, shortcut_, (0:N_det+1, 2) ] -&BEGIN_PROVIDER [ integer(bit_kind), version_, (N_int, N_det, 2) ] -&BEGIN_PROVIDER [ integer(bit_kind), sorted_, (N_int, N_det, 2) ] -&BEGIN_PROVIDER [ integer, sort_idx_, (N_det, 2) ] - implicit none - call sort_dets_ab_v(psi_det, sorted_(1,1,1), sort_idx_(1,1), shortcut_(0,1), version_(1,1,1), n_det, N_int) - call sort_dets_ba_v(psi_det, sorted_(1,1,2), sort_idx_(1,2), shortcut_(0,2), version_(1,1,2), n_det, N_int) -END_PROVIDER - subroutine davidson_collect(block, N, idx, vt, st , v0, s0) @@ -203,6 +195,8 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) allocate(vt(N_states, N_det)) allocate(st(N_states, N_det)) + call hobo_get() + do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) if(task_id == 0) exit @@ -353,6 +347,9 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0) call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0) call end_zmq_to_qp_run_socket(zmq_collector) call end_zmq_pull_socket(zmq_socket_pull) + call hobo_server_end() + else if(i==1) then + call hobo_server() else call davidson_slave_inproc(i) endif @@ -362,5 +359,112 @@ end subroutine +subroutine hobo_server() + use f77_zmq + implicit none + integer(ZMQ_PTR) context + integer(ZMQ_PTR) responder + character*(64) address + character(len=:), allocatable :: buffer + integer rc + allocate (character(len=20) :: buffer) + address = 'tcp://*:11223' + + context = f77_zmq_ctx_new() + responder = f77_zmq_socket(context, ZMQ_REP) + rc = f77_zmq_bind(responder,address) + + do + rc = f77_zmq_recv(responder, buffer, 5, 0) + if (buffer(1:rc) /= 'end') then + rc = f77_zmq_send (responder, N_det, 4, ZMQ_SNDMORE) + rc = f77_zmq_send (responder, psi_det, 16*N_int*N_det, ZMQ_SNDMORE) + rc = f77_zmq_send (responder, ut, 8*N_det*N_states, 0) + else + rc = f77_zmq_send (responder, "end", 3, 0) + exit + endif + enddo + rc = f77_zmq_close(responder) + rc = f77_zmq_ctx_destroy(context) +end subroutine + + +subroutine hobo_server_end() + implicit none + use f77_zmq + + integer(ZMQ_PTR) context + integer(ZMQ_PTR) requester + character*(64) address + integer rc + character*(64) buf + + address = trim(qp_run_address)//':11223' + context = f77_zmq_ctx_new() + requester = f77_zmq_socket(context, ZMQ_REQ) + rc = f77_zmq_connect(requester,address) + + rc = f77_zmq_send(requester, "end", 3, 0) + rc = f77_zmq_recv(requester, buf, 3, 0) + rc = f77_zmq_close(requester) + rc = f77_zmq_ctx_destroy(context) +end subroutine + + +subroutine hobo_get() + implicit none + use f77_zmq + + integer(ZMQ_PTR) context + integer(ZMQ_PTR) requester + character*(64) address + character*(20) buffer +! integer(8), intent(inout) :: det(N_int,2,*) +! double precision, intent(inout) :: u_0(*) +! integer,intent(out) :: nd + integer rc + + address = trim(qp_run_address)//':11223' + + context = f77_zmq_ctx_new() + requester = f77_zmq_socket(context, ZMQ_REQ) + rc = f77_zmq_connect(requester,address) + + rc = f77_zmq_send(requester, "Hello", 5, 0) + rc = f77_zmq_recv(requester, dav_size, 4, 0) + TOUCH dav_size + rc = f77_zmq_recv(requester, dav_det, 16*N_int*dav_size, 0) + rc = f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states, 0) + rc = f77_zmq_close(requester) + rc = f77_zmq_ctx_destroy(context) +end subroutine + + + +BEGIN_PROVIDER [ integer(bit_kind), dav_det, (N_int, 2, dav_size) ] +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, dav_ut, (N_states, dav_size) ] +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, ut, (N_states, N_det) ] +END_PROVIDER + + +BEGIN_PROVIDER [ integer, dav_size ] +END_PROVIDER + + + BEGIN_PROVIDER [ integer, shortcut_, (0:dav_size+1, 2) ] +&BEGIN_PROVIDER [ integer(bit_kind), version_, (N_int, dav_size, 2) ] +&BEGIN_PROVIDER [ integer(bit_kind), sorted_, (N_int, dav_size, 2) ] +&BEGIN_PROVIDER [ integer, sort_idx_, (dav_size, 2) ] + implicit none + call sort_dets_ab_v(dav_det, sorted_(1,1,1), sort_idx_(1,1), shortcut_(0,1), version_(1,1,1), dav_size, N_int) + call sort_dets_ba_v(dav_det, sorted_(1,1,2), sort_idx_(1,2), shortcut_(0,2), version_(1,1,2), dav_size, N_int) +END_PROVIDER \ No newline at end of file diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 39e81f28..f23c03bc 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -177,11 +177,6 @@ BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ] END_PROVIDER -BEGIN_PROVIDER [ double precision, ut, (N_states, N_det) ] - ut = 0d0 -END_PROVIDER - - subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) use bitmasks use f77_zmq @@ -231,7 +226,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) v_0 = 0.d0 s_0 = 0.d0 - provide ut + do i=1,n do istate=1,N_st ut(istate,i) = u_0(i,istate) From 272482e8fa860fc6fd5d454aab6de6b2b7cd6c38 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 3 Oct 2016 10:42:47 +0200 Subject: [PATCH 03/13] miniserver when needed - untested --- src/Davidson/davidson_parallel.irp.f | 27 +++++++++++++++------------ src/Davidson/u0Hu0.irp.f | 8 +++++++- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 37bac8d2..0b4a84d8 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -22,7 +22,7 @@ subroutine davidson_process(block, N, idx, vt, st) vt = 0d0 st = 0d0 - N = N_det + N = dav_size ! N_det do i=1,N idx(i) = i end do @@ -56,8 +56,8 @@ subroutine davidson_process(block, N, idx, vt, st) ext = ext + popcnt(xor(sorted_i(ni), sorted_(ni,j,1))) end do if(ext <= 4) then - call i_h_j (psi_det(1,1,org_j),psi_det(1,1,org_i),n_int,hij) - call get_s2(psi_det(1,1,org_j),psi_det(1,1,org_i),n_int,s2) + call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) ! psi_det + call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) do istate=1,N_states vt (istate,org_i) = vt (istate,org_i) + hij*dav_ut(istate,org_j) vt (istate,org_j) = vt (istate,org_j) + hij*dav_ut(istate,org_i) @@ -100,6 +100,7 @@ subroutine davidson_init(zmq_to_qp_run_socket) integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket ! zmq_to_qp_run_socket + touch dav_size dav_det dav_ut call new_parallel_job(zmq_to_qp_run_socket,'davidson') end subroutine @@ -195,7 +196,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) allocate(vt(N_states, N_det)) allocate(st(N_states, N_det)) - call hobo_get() + !call hobo_get() do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) @@ -347,9 +348,9 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0) call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0) call end_zmq_to_qp_run_socket(zmq_collector) call end_zmq_pull_socket(zmq_socket_pull) - call hobo_server_end() + call davidson_miniserver_end() else if(i==1) then - call hobo_server() + call davidson_miniserver_run() else call davidson_slave_inproc(i) endif @@ -359,7 +360,7 @@ end subroutine -subroutine hobo_server() +subroutine davidson_miniserver_run() use f77_zmq implicit none integer(ZMQ_PTR) context @@ -378,9 +379,9 @@ subroutine hobo_server() do rc = f77_zmq_recv(responder, buffer, 5, 0) if (buffer(1:rc) /= 'end') then - rc = f77_zmq_send (responder, N_det, 4, ZMQ_SNDMORE) - rc = f77_zmq_send (responder, psi_det, 16*N_int*N_det, ZMQ_SNDMORE) - rc = f77_zmq_send (responder, ut, 8*N_det*N_states, 0) + rc = f77_zmq_send (responder, dav_size, 4, ZMQ_SNDMORE) + rc = f77_zmq_send (responder, dav_det, 16*N_int*N_det, ZMQ_SNDMORE) + rc = f77_zmq_send (responder, dav_ut, 8*N_det*N_states, 0) else rc = f77_zmq_send (responder, "end", 3, 0) exit @@ -392,7 +393,7 @@ subroutine hobo_server() end subroutine -subroutine hobo_server_end() +subroutine davidson_miniserver_end() implicit none use f77_zmq @@ -414,7 +415,7 @@ subroutine hobo_server_end() end subroutine -subroutine hobo_get() +subroutine davidson_miniserver_get() implicit none use f77_zmq @@ -440,6 +441,8 @@ subroutine hobo_get() rc = f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states, 0) rc = f77_zmq_close(requester) rc = f77_zmq_ctx_destroy(context) + + touch dav_det dav_ut end subroutine diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index f23c03bc..b4819067 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -227,6 +227,8 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) v_0 = 0.d0 s_0 = 0.d0 + if(n /= N_det) stop "n /= N_det" + do i=1,n do istate=1,N_st ut(istate,i) = u_0(i,istate) @@ -234,7 +236,11 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint) call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint) - + + dav_size = n + dav_det = psi_det + dav_ut = ut + call davidson_init(handler) do sh=shortcut(0,1),1,-1 From 521c37add9ffd6f402b27c3cf8020395879c1bf6 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 3 Oct 2016 14:02:26 +0200 Subject: [PATCH 04/13] davidson_slave sparse output --- src/Davidson/davidson_parallel.irp.f | 85 +++++++++++++++++----------- src/Davidson/u0Hu0.irp.f | 9 ++- 2 files changed, 57 insertions(+), 37 deletions(-) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 0b4a84d8..9b5130bc 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -11,18 +11,19 @@ subroutine davidson_process(block, N, idx, vt, st) integer , intent(in) :: block integer , intent(inout) :: N - integer , intent(inout) :: idx(N_det) - double precision , intent(inout) :: vt(N_states, N_det) - double precision , intent(inout) :: st(N_states, N_det) + integer , intent(inout) :: idx(dav_size) + double precision , intent(inout) :: vt(N_states, dav_size) + double precision , intent(inout) :: st(N_states, dav_size) integer :: i, j, sh, sh2, exa, ext, org_i, org_j, istate, ni, endi integer(bit_kind) :: sorted_i(N_int) double precision :: s2, hij +! print *, "processing block", block, "/", shortcut_(0,1) vt = 0d0 st = 0d0 - N = dav_size ! N_det + N = dav_size do i=1,N idx(i) = i end do @@ -68,6 +69,19 @@ subroutine davidson_process(block, N, idx, vt, st) enddo enddo enddo + + N = 0 + do i=1, dav_size + if(vt(1, i) /= 0d0 .or. st(1, i) /= 0d0) then + N = N+1 + do istate=1,N_states + vt (istate,N) = vt (istate,i) + st (istate,N) = st (istate,i) + idx(N) = i + enddo + end if + end do +! print *, "done processing block", block, "/", shortcut_(0,1) end subroutine @@ -82,8 +96,8 @@ subroutine davidson_collect(block, N, idx, vt, st , v0, s0) integer , intent(in) :: idx(N) double precision , intent(in) :: vt(N_states, N) double precision , intent(in) :: st(N_states, N) - double precision , intent(inout) :: v0(N_det, N_states) - double precision , intent(inout) :: s0(N_det, N_states) + double precision , intent(inout) :: v0(dav_size, N_states) + double precision , intent(inout) :: s0(dav_size, N_states) integer :: i @@ -101,7 +115,7 @@ subroutine davidson_init(zmq_to_qp_run_socket) integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket ! zmq_to_qp_run_socket touch dav_size dav_det dav_ut - call new_parallel_job(zmq_to_qp_run_socket,'davidson') + call new_parallel_job(zmq_to_qp_run_socket,"davidson") end subroutine @@ -166,7 +180,8 @@ subroutine davidson_run_slave(thread,iproc) end if call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) - +! print *, "done slavin'" + !call sleep(1) call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) @@ -192,11 +207,10 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) double precision , allocatable :: st(:,:) - allocate(idx(N_det)) - allocate(vt(N_states, N_det)) - allocate(st(N_states, N_det)) + allocate(idx(dav_size)) + allocate(vt(N_states, dav_size)) + allocate(st(N_states, dav_size)) - !call hobo_get() do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) @@ -205,6 +219,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) call davidson_process(block,N, idx, vt, st) + ! reverse ? call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) call davidson_push_results(zmq_socket_push, block, N, idx, vt, st, task_id) end do @@ -255,9 +270,9 @@ subroutine davidson_pull_results(zmq_socket_pull, block, N, idx, vt, st, task_id integer ,intent(out) :: task_id integer ,intent(out) :: block integer ,intent(out) :: N - integer ,intent(out) :: idx(N_det) - double precision ,intent(out) :: vt(N_states, N_det) - double precision ,intent(out) :: st(N_states, N_det) + integer ,intent(out) :: idx(dav_size) + double precision ,intent(out) :: vt(N_states, dav_size) + double precision ,intent(out) :: st(N_states, dav_size) integer :: rc @@ -266,7 +281,7 @@ subroutine davidson_pull_results(zmq_socket_pull, block, N, idx, vt, st, task_id rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0) if(rc /= 4) stop "davidson_push_results failed to pull N" - + rc = f77_zmq_recv( zmq_socket_pull, idx, 4*N, 0) if(rc /= 4*N) stop "davidson_push_results failed to pull idx" @@ -289,8 +304,8 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0) integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - double precision ,intent(inout) :: v0(N_det, N_states) - double precision ,intent(inout) :: s0(N_det, N_states) + double precision ,intent(inout) :: v0(dav_size, N_states) + double precision ,intent(inout) :: s0(dav_size, N_states) integer :: more, task_id @@ -300,20 +315,27 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0) integer , allocatable :: idx(:) double precision , allocatable :: vt(:,:) double precision , allocatable :: st(:,:) + integer :: deleted + logical, allocatable :: done(:) + allocate(done(shortcut_(0,1))) + deleted = 0 + done = .false. - - allocate(idx(N_det)) - allocate(vt(N_states, N_det)) - allocate(st(N_states, N_det)) + allocate(idx(dav_size)) + allocate(vt(N_states, dav_size)) + allocate(st(N_states, dav_size)) more = 1 do while (more == 1) call davidson_pull_results(zmq_socket_pull, block, N, idx, vt, st, task_id) call davidson_collect(block, N, idx, vt, st , v0, s0) - +! done(block) = .true. call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) + deleted += 1 +! print *, "DELETED", deleted, done end do +! print *, "done collector" end subroutine @@ -330,8 +352,8 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0) integer :: i integer, external :: omp_get_thread_num - double precision , intent(inout) :: v0(N_det, N_states) - double precision , intent(inout) :: s0(N_det, N_states) + double precision , intent(inout) :: v0(dav_size, N_states) + double precision , intent(inout) :: s0(dav_size, N_states) call zmq_set_running(zmq_to_qp_run_socket) @@ -342,7 +364,7 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0) PROVIDE nproc - !$OMP PARALLEL DEFAULT(shared) private(i) num_threads(nproc+1) + !$OMP PARALLEL DEFAULT(shared) private(i) num_threads(3) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! NPROC !!!!! i = omp_get_thread_num() if (i==0) then call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0) @@ -380,8 +402,8 @@ subroutine davidson_miniserver_run() rc = f77_zmq_recv(responder, buffer, 5, 0) if (buffer(1:rc) /= 'end') then rc = f77_zmq_send (responder, dav_size, 4, ZMQ_SNDMORE) - rc = f77_zmq_send (responder, dav_det, 16*N_int*N_det, ZMQ_SNDMORE) - rc = f77_zmq_send (responder, dav_ut, 8*N_det*N_states, 0) + rc = f77_zmq_send (responder, dav_det, 16*N_int*dav_size, ZMQ_SNDMORE) + rc = f77_zmq_send (responder, dav_ut, 8*dav_size*N_states, 0) else rc = f77_zmq_send (responder, "end", 3, 0) exit @@ -412,6 +434,7 @@ subroutine davidson_miniserver_end() rc = f77_zmq_recv(requester, buf, 3, 0) rc = f77_zmq_close(requester) rc = f77_zmq_ctx_destroy(context) +! print *, "closed miniserver" end subroutine @@ -439,6 +462,8 @@ subroutine davidson_miniserver_get() TOUCH dav_size rc = f77_zmq_recv(requester, dav_det, 16*N_int*dav_size, 0) rc = f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states, 0) + TOUCH dav_det dav_ut + rc = f77_zmq_close(requester) rc = f77_zmq_ctx_destroy(context) @@ -455,10 +480,6 @@ BEGIN_PROVIDER [ double precision, dav_ut, (N_states, dav_size) ] END_PROVIDER -BEGIN_PROVIDER [ double precision, ut, (N_states, N_det) ] -END_PROVIDER - - BEGIN_PROVIDER [ integer, dav_size ] END_PROVIDER diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index b4819067..418ada51 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -44,7 +44,7 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) double precision :: hij double precision, allocatable :: vt(:,:) -! double precision, allocatable :: ut(:,:) + double precision, allocatable :: ut(:,:) integer :: i,j,k,l, jj,ii integer :: i0, j0 @@ -197,7 +197,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) double precision :: hij,s2 double precision, allocatable :: vt(:,:), st(:,:) - !double precision, allocatable :: ut(:,:) + double precision, allocatable :: ut(:,:) integer :: i,j,k,l, jj,ii integer :: i0, j0 @@ -222,7 +222,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) PROVIDE ref_bitmask_energy allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2)) - !allocate(ut(N_st_8,n)) + allocate(ut(N_st_8,n)) v_0 = 0.d0 s_0 = 0.d0 @@ -240,9 +240,8 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) dav_size = n dav_det = psi_det dav_ut = ut - + call davidson_init(handler) - do sh=shortcut(0,1),1,-1 call davidson_add_task(handler, sh) enddo From 74ffa71dc663bdc47e037346b8f88c1cb7f6f9b2 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 3 Oct 2016 14:30:13 +0200 Subject: [PATCH 05/13] fci_zmq with N_states > 1 --- plugins/Full_CI_ZMQ/selection_double.irp.f | 38 ++++++++++++---------- plugins/Full_CI_ZMQ/selection_single.irp.f | 26 ++++++++------- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection_double.irp.f b/plugins/Full_CI_ZMQ/selection_double.irp.f index 3e602c21..a98252b0 100644 --- a/plugins/Full_CI_ZMQ/selection_double.irp.f +++ b/plugins/Full_CI_ZMQ/selection_double.irp.f @@ -31,7 +31,6 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) - !call assert(psi_det_generators(1,1,i_generator) == psi_det_sorted(1,1,i_generator), "sorted selex") do s1=1,2 do s2=s1,2 sp = s1 @@ -43,7 +42,6 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p h1 = hole_list(i1,s1) h2 = hole_list(i2,s2) call apply_holes(psi_det_generators(1,1,i_generator), s1,h1,s2,h2, mask, ok, N_int) - !call assert(ok, irp_here) logical :: banned(mo_tot_num, mo_tot_num,2) logical :: bannedOrb(mo_tot_num, 2) @@ -88,14 +86,13 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d double precision, intent(inout) :: pt2(N_states) type(selection_buffer), intent(inout) :: buf logical :: ok - integer :: s1, s2, p1, p2, ib, j + integer :: s1, s2, p1, p2, ib, j, istate integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) - double precision :: e_pert, delta_E, val, Hii + double precision :: e_pert, delta_E, val, Hii, max_e_pert double precision, external :: diag_H_mat_elem_fock logical, external :: detEq - - if(N_states > 1) stop "fill_buffer_double N_states > 1" + if(sp == 3) then s1 = 1 @@ -106,7 +103,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d end if call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int) - !call assert(ok, "sosoqs") + do p1=1,mo_tot_num if(bannedOrb(p1, s1)) cycle ib = 1 @@ -116,19 +113,24 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if(banned(p1,p2)) cycle if(mat(1, p1, p2) == 0d0) cycle call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) - !call assert(ok, "ododod") + val = mat(1, p1, p2) Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) - - delta_E = E0(1) - Hii - if (delta_E < 0.d0) then - e_pert = 0.5d0 * (-dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) - else - e_pert = 0.5d0 * ( dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) - endif - pt2(1) += e_pert - if(dabs(e_pert) > buf%mini) then + max_e_pert = 0d0 + + do istate=1,N_states + delta_E = E0(istate) - Hii + if (delta_E < 0.d0) then + e_pert = 0.5d0 * (-dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) + else + e_pert = 0.5d0 * ( dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) + endif + pt2(istate) += e_pert + if(dabs(e_pert) > dabs(max_e_pert)) max_e_pert = e_pert + end do + + if(dabs(max_e_pert) > buf%mini) then ! do j=1,buf%cur-1 ! if(detEq(buf%det(1,1,j), det, N_int)) then ! print *, "tops" @@ -136,7 +138,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d ! stop ! end if ! end do - call add_to_selection_buffer(buf, det, e_pert) + call add_to_selection_buffer(buf, det, max_e_pert) end if end do end do diff --git a/plugins/Full_CI_ZMQ/selection_single.irp.f b/plugins/Full_CI_ZMQ/selection_single.irp.f index 77d985af..cdeee318 100644 --- a/plugins/Full_CI_ZMQ/selection_single.irp.f +++ b/plugins/Full_CI_ZMQ/selection_single.irp.f @@ -72,12 +72,11 @@ subroutine fill_buffer_single(i_generator, sp, h1, bannedOrb, fock_diag_tmp, E0, double precision, intent(inout) :: pt2(N_states) type(selection_buffer), intent(inout) :: buf logical :: ok - integer :: s1, s2, p1, p2, ib + integer :: s1, s2, p1, p2, ib, istate integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) - double precision :: e_pert, delta_E, val, Hii + double precision :: e_pert, delta_E, val, Hii, max_e_pert double precision, external :: diag_H_mat_elem_fock - if(N_states > 1) stop "fill_buffer_single N_states > 1" call apply_hole(psi_det_generators(1,1,i_generator), sp, h1, mask, ok, N_int) @@ -88,15 +87,20 @@ subroutine fill_buffer_single(i_generator, sp, h1, bannedOrb, fock_diag_tmp, E0, val = vect(1, p1) Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) + max_e_pert = 0d0 - delta_E = E0(1) - Hii - if (delta_E < 0.d0) then - e_pert = 0.5d0 * (-dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) - else - e_pert = 0.5d0 * ( dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) - endif - pt2(1) += e_pert - if(dabs(e_pert) > buf%mini) call add_to_selection_buffer(buf, det, e_pert) + do istate=1,N_states + delta_E = E0(istate) - Hii + if (delta_E < 0.d0) then + e_pert = 0.5d0 * (-dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) + else + e_pert = 0.5d0 * ( dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) + endif + pt2(istate) += e_pert + if(dabs(e_pert) > dabs(max_e_pert)) max_e_pert = e_pert + end do + + if(dabs(max_e_pert) > buf%mini) call add_to_selection_buffer(buf, det, max_e_pert) end do end subroutine From eaaf864f2852626d969dd7f3208605c92689c328 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 3 Oct 2016 14:48:43 +0200 Subject: [PATCH 06/13] incorrect nproc in davidson --- src/Davidson/davidson_parallel.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 9b5130bc..2c1d7edc 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -364,7 +364,7 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0) PROVIDE nproc - !$OMP PARALLEL DEFAULT(shared) private(i) num_threads(3) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! NPROC !!!!! + !$OMP PARALLEL DEFAULT(shared) private(i) num_threads(nproc+2) i = omp_get_thread_num() if (i==0) then call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0) From 77f34c67adef29cb69996f02ad32bed3e5c5f43d Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 4 Oct 2016 09:52:41 +0200 Subject: [PATCH 07/13] some cleaning --- plugins/Full_CI_ZMQ/selection.irp.f | 14 ------- plugins/Full_CI_ZMQ/selection_double.irp.f | 43 +--------------------- plugins/Full_CI_ZMQ/selection_single.irp.f | 14 ------- src/Davidson/davidson_parallel.irp.f | 2 - 4 files changed, 1 insertion(+), 72 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index ff32d56b..11b078f3 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -1,19 +1,5 @@ use bitmasks -! BEGIN_PROVIDER [ double precision, integral8, (mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) ] -! use bitmasks -! implicit none -! -! integer :: h1, h2 -! -! integral8 = 0d0 -! do h1=1, mo_tot_num -! do h2=1, mo_tot_num -! call get_mo_bielec_integrals_ij(h1, h2 ,mo_tot_num,integral8(1,1,h1,h2),mo_integrals_map) -! end do -! end do -! END_PROVIDER - double precision function integral8(i,j,k,l) implicit none diff --git a/plugins/Full_CI_ZMQ/selection_double.irp.f b/plugins/Full_CI_ZMQ/selection_double.irp.f index a98252b0..a6f95329 100644 --- a/plugins/Full_CI_ZMQ/selection_double.irp.f +++ b/plugins/Full_CI_ZMQ/selection_double.irp.f @@ -131,13 +131,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d end do if(dabs(max_e_pert) > buf%mini) then -! do j=1,buf%cur-1 -! if(detEq(buf%det(1,1,j), det, N_int)) then -! print *, "tops" -! print *, i_generator, s1, s2, h1, h2,p1,p2 -! stop -! end if -! end do call add_to_selection_buffer(buf, det, max_e_pert) end if end do @@ -156,10 +149,8 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat) integer :: i, j, k, l, h(0:2,2), p(0:4,2), nt integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) - logical :: bandon mat = 0d0 - bandon = .false. do i=1,N_int negMask(i,1) = not(mask(i,1)) @@ -187,14 +178,11 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat) call bitstring_to_list(mobMask(1,1), p(1,1), p(0,1), N_int) call bitstring_to_list(mobMask(1,2), p(1,2), p(0,2), N_int) - !call assert(nt >= 2, irp_here//"qsd") if(i < i_gen) then if(nt == 4) call past_d2(banned, p, sp) if(nt == 3) call past_d1(bannedOrb, p) - !call assert(nt /= 2, "should have been discarded") else if(i == i_gen) then - bandon = .true. if(sp == 3) then banned(:,:,2) = transpose(banned(:,:,1)) else @@ -214,7 +202,6 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat) end if end if end do - call assert(bandon, "BANDON") end subroutine @@ -243,13 +230,12 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) bant = 1 tip = p(0,1) * p(0,2) - !call assert(p(0,1) + p(0,2) == 4, irp_here//"df") + ma = sp if(p(0,1) > p(0,2)) ma = 1 if(p(0,1) < p(0,2)) ma = 2 mi = mod(ma, 2) + 1 - !print *, "d2 SPtip", SP, tip if(sp == 3) then if(ma == 2) bant = 2 @@ -266,7 +252,6 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) h2 = h(2, ma) hij = (integral8(p1, p2, h1, h2) - integral8(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2) - !call debug_hij(hij, gen, mask, mi, ma, puti, putj) if(ma == 1) then mat(:, putj, puti) += coefs * hij else @@ -274,7 +259,6 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end if end do else - !call assert(tip == 4, "df") do i = 1,2 do j = 1,2 puti = p(i, 1) @@ -287,7 +271,6 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) h2 = h(1,2) hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) - !call debug_hij(hij, gen, mask, 1, 2, puti, putj) mat(:, puti, putj) += coefs * hij end do end do @@ -308,7 +291,6 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(i1, ma) p2 = p(i2, ma) hij = (integral8(p1, p2, h1, h2) - integral8(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2) - !call debug_hij(hij, gen, mask, ma, ma, puti, putj) mat(:, puti, putj) += coefs * hij end do end do @@ -316,7 +298,6 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) h1 = h(1, mi) h2 = h(1, ma) p1 = p(1, mi) - !call assert(ma == sp, "dldl") do i=1,3 puti = p(turn3(1,i), ma) putj = p(turn3(2,i), ma) @@ -324,11 +305,9 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(i, ma) hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2) - !call debug_hij(hij, gen, mask, ma, ma, puti, putj) mat(:, min(puti, putj), max(puti, putj)) += coefs * hij end do else ! tip == 4 - !call assert(tip == 4, "qsdf") puti = p(1, sp) putj = p(2, sp) if(.not. banned(puti,putj,1)) then @@ -337,7 +316,6 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) h1 = h(1, mi) h2 = h(2, mi) hij = (integral8(p1, p2, h1, h2) - integral8(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2) - !call debug_hij(hij, gen, mask,ma,ma, puti, putj) mat(:, puti, putj) += coefs * hij end if end if @@ -359,7 +337,6 @@ subroutine debug_hij(hij, gen, mask, s1, s2, p1, p2) integer :: exc(0:2,2,2) call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) - !call assert(ok, "nokey") call i_H_j_phase_out(gen,det,N_int,hij_ref,phase_ref,exc,degree) if(hij /= hij_ref) then print *, hij, hij_ref @@ -368,8 +345,6 @@ subroutine debug_hij(hij, gen, mask, s1, s2, p1, p2) call debug_det(mask, N_int) stop end if - - ! print *, "fourar", hij, hij_ref,s1,s2 end function @@ -411,11 +386,9 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) mi = turn2(ma) bant = 1 - !print *, "d1 SP", sp, p(0,1)*p(0,2) if(sp == 3) then !move MA - !call assert(p(0,1)*p(0,2) == 2, "ddmmm") if(ma == 2) bant = 2 puti = p(1,mi) hfix = h(1,ma) @@ -426,13 +399,11 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=1, hfix-1 if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle hij = (integral8(p1, p2, putj, hfix)-integral8(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2) - !call debug_hij(hij, gen, mask, mi, ma, puti, putj) tmp_row(1:N_states,putj) += hij * coefs(1:N_states) end do do putj=hfix+1, mo_tot_num if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle hij = (integral8(p1, p2, hfix, putj)-integral8(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2) - !call debug_hij(hij, gen, mask, mi, ma, puti, putj) tmp_row(1:N_states,putj) += hij * coefs(1:N_states) end do @@ -456,11 +427,9 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) tmp_row(:,puti) += hij * coefs end if - !call debug_hij(hij, gen, mask, mi, ma, puti, putj) putj = p2 if(.not. banned(putj,puti,bant)) then hij = integral8(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix) - !call debug_hij(hij, gen, mask, mi, ma, puti, putj) tmp_row2(:,puti) += hij * coefs end if end do @@ -483,13 +452,11 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=1,hfix-1 if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle hij = (integral8(p1, p2, putj, hfix)-integral8(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2) - !call debug_hij(hij, gen, mask, ma, ma, puti, putj) tmp_row(:,putj) += hij * coefs end do do putj=hfix+1,mo_tot_num if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle hij = (integral8(p1, p2, hfix, putj)-integral8(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2) - !call debug_hij(hij, gen, mask, ma, ma, puti, putj) tmp_row(:,putj) += hij * coefs end do @@ -497,7 +464,6 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) mat(:, puti, puti:) += tmp_row(:,puti:) end do else - !call assert(sp == ma, "sp == ma") hfix = h(1,mi) pfix = p(1,mi) p1 = p(1,ma) @@ -509,14 +475,12 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) putj = p2 if(.not. banned(puti,putj,1)) then hij = integral8(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1) - !call debug_hij(hij, gen, mask, ma, ma, putj, puti) tmp_row(:,puti) += hij * coefs end if putj = p1 if(.not. banned(puti,putj,1)) then hij = integral8(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2) - !call debug_hij(hij, gen, mask, ma, ma, putj, puti) tmp_row2(:,puti) += hij * coefs end if end do @@ -585,13 +549,11 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) if(banned(p1, p2, bant)) cycle ! rentable? if(p1 == h1 .or. p2 == h2) then call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) - !call assert(ok, "zsdq") call i_h_j(gen, det, N_int, hij) mat(:, p1, p2) += coefs(:) * hij else hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) - !call debug_hij(hij, gen, mask, 1, 2, p1, p2) mat(:, p1, p2) += coefs(:) * hij end if end do @@ -611,7 +573,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) else hij = (integral8(p1, p2, puti, putj) - integral8(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2) mat(:, puti, putj) += coefs(:) * hij - !call debug_hij(hij, gen, mask, sp, sp, puti, putj) end if end do end do @@ -699,8 +660,6 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch) call bitstring_to_list(myMask(1,1), list(1), na, N_int) call bitstring_to_list(myMask(1,2), list(na+1), nb, N_int) - !call assert(na + nb == 2, "oyo") - !call assert(na == 1 .or. list(1) < list(2), "sqdsmmmm") banned(list(1), list(2)) = .true. end do genl end subroutine diff --git a/plugins/Full_CI_ZMQ/selection_single.irp.f b/plugins/Full_CI_ZMQ/selection_single.irp.f index cdeee318..4b9f3f36 100644 --- a/plugins/Full_CI_ZMQ/selection_single.irp.f +++ b/plugins/Full_CI_ZMQ/selection_single.irp.f @@ -43,7 +43,6 @@ subroutine select_singles(i_gen,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf do i=1, N_holes(sp) h1 = hole_list(i,sp) call apply_hole(psi_det_generators(1,1,i_gen), sp, h1, mask, ok, N_int) - !call assert(ok, irp_here) bannedOrb = .true. do j=1,N_particles(sp) bannedOrb(particle_list(j, sp)) = .false. @@ -183,7 +182,6 @@ subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) p2 = p(turn3_2(2,i), sp) hij = integral8(p1, p2, h1, h2) - integral8(p2, p1, h1, h2) hij *= get_phase_bi(phasemask, sp, sp, h1, p1, h2, p2) - !call debug_hij_mo(hij, gen, mask, sp, puti) vect(:, puti) += hij * coefs end do else if(h(0,sp) == 1) then @@ -197,7 +195,6 @@ subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) pmob = p(turn2(j), sp) hij = integral8(pfix, pmob, hfix, hmob) hij *= get_phase_bi(phasemask, sp, sfix, hmob, pmob, hfix, pfix) - !call debug_hij_mo(hij, gen, mask, sp, puti) vect(:, puti) += hij * coefs end do else @@ -210,7 +207,6 @@ subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) h2 = h(2,sfix) hij = (integral8(p1,p2,h1,h2) - integral8(p2,p1,h1,h2)) hij *= get_phase_bi(phasemask, sfix, sfix, h1, p1, h2, p2) - !call debug_hij_mo(hij, gen, mask, sp, puti) vect(:, puti) += hij * coefs end if end if @@ -252,19 +248,16 @@ subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) if(lbanned(i)) cycle hij = (integral8(p1, p2, i, hole) - integral8(p2, p1, i, hole)) hij *= get_phase_bi(phasemask, sp, sp, i, p1, hole, p2) - !call debug_hij_mo(hij, gen, mask, sp, i) vect(:,i) += hij * coefs end do do i=hole+1,mo_tot_num if(lbanned(i)) cycle hij = (integral8(p1, p2, hole, i) - integral8(p2, p1, hole, i)) hij *= get_phase_bi(phasemask, sp, sp, hole, p1, i, p2) - !call debug_hij_mo(hij, gen, mask, sp, i) vect(:,i) += hij * coefs end do call apply_particle(mask, sp, p2, det, ok, N_int) - !call assert(ok, "OKE223") call i_h_j(gen, det, N_int, hij) vect(:, p2) += hij * coefs else @@ -273,17 +266,13 @@ subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) if(lbanned(i)) cycle hij = integral8(p1, p2, i, hole) hij *= get_phase_bi(phasemask, sp, sh, i, p1, hole, p2) - !call debug_hij_mo(hij, gen, mask, sp, i) vect(:,i) += hij * coefs end do end if call apply_particle(mask, sp, p1, det, ok, N_int) - !call assert(ok, "OKQQE2") call i_h_j(gen, det, N_int, hij) vect(:, p1) += hij * coefs - - !print *, "endouille" end subroutine @@ -307,7 +296,6 @@ subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) do i=1,mo_tot_num if(lbanned(i)) cycle call apply_particle(mask, sp, i, det, ok, N_int) - !call assert(ok, "qsdo") call i_h_j(gen, det, N_int, hij) vect(:, i) += hij * coefs end do @@ -379,8 +367,6 @@ subroutine debug_hij_mo(hij, gen, mask, s1, p1) logical, external :: detEq call apply_particle(mask, s1, p1, det, ok, N_int) - !call assert(ok, "nokey_mo") - !call assert(.not. detEq(det, gen, N_int), "Hii ...") call i_H_j_phase_out(gen,det,N_int,hij_ref,phase_ref,exc,degree) if(hij /= hij_ref) then print *, hij, hij_ref diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 2c1d7edc..00c6f9d9 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -19,7 +19,6 @@ subroutine davidson_process(block, N, idx, vt, st) integer(bit_kind) :: sorted_i(N_int) double precision :: s2, hij -! print *, "processing block", block, "/", shortcut_(0,1) vt = 0d0 st = 0d0 @@ -81,7 +80,6 @@ subroutine davidson_process(block, N, idx, vt, st) enddo end if end do -! print *, "done processing block", block, "/", shortcut_(0,1) end subroutine From ebc1707235490deb29cbe3a879a9474ff53f7591 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 4 Oct 2016 10:05:15 +0200 Subject: [PATCH 08/13] forgot to add davidson_slave --- src/Davidson/davidson_slave.irp.f | 40 +++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 src/Davidson/davidson_slave.irp.f diff --git a/src/Davidson/davidson_slave.irp.f b/src/Davidson/davidson_slave.irp.f new file mode 100644 index 00000000..15830b1d --- /dev/null +++ b/src/Davidson/davidson_slave.irp.f @@ -0,0 +1,40 @@ +program davidson_slave + use f77_zmq + implicit none + + + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + double precision :: energy(N_states_diag) + character*(64) :: state + +! call provide_everything + call switch_qp_run_to_master + + zmq_context = f77_zmq_ctx_new () + zmq_state = 'davidson' + state = 'Waiting' + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + do + call wait_for_state(zmq_state,state) + if(trim(state) /= "davidson") exit + !print *, 'Getting wave function' + !call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states_diag) + call davidson_miniserver_get() + + integer :: rc, i + + print *, 'Davidson slave running' + + !$OMP PARALLEL PRIVATE(i) + i = omp_get_thread_num() + call davidson_slave_tcp(i) + !$OMP END PARALLEL + end do +end + +! subroutine provide_everything +! PROVIDE mo_bielec_integrals_in_map psi_det_sorted_bit N_states zmq_context +! end subroutine From b49085733ae555d7eaa6e3e1c2221b60cb4d024d Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 4 Oct 2016 11:30:49 +0200 Subject: [PATCH 09/13] bug with fci_zmq with N_states > 1 --- plugins/Full_CI_ZMQ/selection_double.irp.f | 2 +- plugins/Full_CI_ZMQ/selection_single.irp.f | 3 ++- src/Davidson/davidson_parallel.irp.f | 3 --- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection_double.irp.f b/plugins/Full_CI_ZMQ/selection_double.irp.f index a6f95329..18aeecfe 100644 --- a/plugins/Full_CI_ZMQ/selection_double.irp.f +++ b/plugins/Full_CI_ZMQ/selection_double.irp.f @@ -114,13 +114,13 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if(mat(1, p1, p2) == 0d0) cycle call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) - val = mat(1, p1, p2) Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) max_e_pert = 0d0 do istate=1,N_states delta_E = E0(istate) - Hii + val = mat(istate, p1, p2) if (delta_E < 0.d0) then e_pert = 0.5d0 * (-dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) else diff --git a/plugins/Full_CI_ZMQ/selection_single.irp.f b/plugins/Full_CI_ZMQ/selection_single.irp.f index 4b9f3f36..8ec4df68 100644 --- a/plugins/Full_CI_ZMQ/selection_single.irp.f +++ b/plugins/Full_CI_ZMQ/selection_single.irp.f @@ -83,12 +83,13 @@ subroutine fill_buffer_single(i_generator, sp, h1, bannedOrb, fock_diag_tmp, E0, if(bannedOrb(p1)) cycle if(vect(1, p1) == 0d0) cycle call apply_particle(mask, sp, p1, det, ok, N_int) - val = vect(1, p1) + Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) max_e_pert = 0d0 do istate=1,N_states + val = vect(istate, p1) delta_E = E0(istate) - Hii if (delta_E < 0.d0) then e_pert = 0.5d0 * (-dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 00c6f9d9..93a387bc 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -444,9 +444,6 @@ subroutine davidson_miniserver_get() integer(ZMQ_PTR) requester character*(64) address character*(20) buffer -! integer(8), intent(inout) :: det(N_int,2,*) -! double precision, intent(inout) :: u_0(*) -! integer,intent(out) :: nd integer rc address = trim(qp_run_address)//':11223' From 71c84f78f11131e5894a4c376cc844a3bce95f4a Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 4 Oct 2016 15:08:13 +0200 Subject: [PATCH 10/13] cleaning + microlisted splash_pq --- plugins/Full_CI_ZMQ/selection.irp.f | 27 ----- plugins/Full_CI_ZMQ/selection_double.irp.f | 135 ++++++++++++--------- plugins/Full_CI_ZMQ/selection_single.irp.f | 27 ----- 3 files changed, 80 insertions(+), 109 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 11b078f3..a0209cc5 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -42,14 +42,12 @@ subroutine get_mask_phase(det, phasemask) integer :: s, ni, i logical :: change -! phasemask = 0_8 phasemask = 0_1 do s=1,2 change = .false. do ni=1,N_int do i=0,bit_kind_size-1 if(BTEST(det(ni, s), i)) change = .not. change -! if(change) phasemask(ni, s) = ibset(phasemask(ni, s), i) if(change) phasemask((ni-1)*bit_kind_size + i + 1, s) = 1_1 end do end do @@ -86,21 +84,6 @@ subroutine select_connected(i_generator,E0,pt2,b) end subroutine -subroutine spot_occupied(mask, bannedOrb) - use bitmasks - implicit none - - integer(bit_kind),intent(in) :: mask(N_int) - logical, intent(inout) :: bannedOrb(mo_tot_num) - integer :: i, ne, list(mo_tot_num) - - call bitstring_to_list(mask, list, ne, N_int) - do i=1, ne - bannedOrb(list(i)) = .true. - end do -end subroutine - - double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2) use bitmasks implicit none @@ -111,16 +94,6 @@ double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2) integer(1) :: np double precision, parameter :: res(0:1) = (/1d0, -1d0/) -! call assert(s1 /= s2 .or. (h1 <= h2 .and. p1 <= p2), irp_here) -! np = 0 -! change = btest(phasemask(1+ishft(h1, -6), s1), iand(h1-1, 63)) -! change = xor(change, btest(phasemask(1+ishft(p1, -6), s1), iand(p1-1, 63))) -! if(xor(change, p1 < h1)) np = 1 -! -! change = btest(phasemask(1+ishft(h2, -6), s2), iand(h2-1, 63)) -! change = xor(change, btest(phasemask(1+ishft(p2, -6), s2), iand(p2-1, 63))) -! if(xor(change, p2 < h2)) np = np + 1 - np = phasemask(h1,s1) + phasemask(p1,s1) + phasemask(h2,s2) + phasemask(p2,s2) if(p1 < h1) np = np + 1_1 if(p2 < h2) np = np + 1_1 diff --git a/plugins/Full_CI_ZMQ/selection_double.irp.f b/plugins/Full_CI_ZMQ/selection_double.irp.f index 18aeecfe..f5c4abd8 100644 --- a/plugins/Full_CI_ZMQ/selection_double.irp.f +++ b/plugins/Full_CI_ZMQ/selection_double.irp.f @@ -12,10 +12,14 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p type(selection_buffer), intent(inout) :: buf double precision :: mat(N_states, mo_tot_num, mo_tot_num) - integer :: h1,h2,s1,s2,s3,i1,i2,ib,sp,k,i - integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2) + integer :: h1,h2,s1,s2,s3,i1,i2,ib,sp,k,i,j,nt,ii + integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2), pmask(N_int, 2) logical :: fullMatch, ok + integer(bit_kind) :: mobMask(N_int, 2), negMask(N_int, 2) + integer :: preinteresting(0:N_det_selectors), interesting(0:N_det_selectors) + integer(bit_kind) :: minilist(N_int, 2, N_det_selectors) + do k=1,N_int hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) hole (k,2) = iand(psi_det_generators(k,2,i_generator), hole_mask(k,2)) @@ -30,25 +34,72 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) + + do i=1,N_int + negMask(i,1) = not(psi_det_generators(i,1,i_generator)) + negMask(i,2) = not(psi_det_generators(i,2,i_generator)) + end do + + preinteresting(0) = 0 + do i=1,N_det_selectors + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do + if(nt <= 4) then + preinteresting(0) += 1 + preinteresting(preinteresting(0)) = i + end if + end do + + do s1=1,2 do s2=s1,2 sp = s1 if(s1 /= s2) sp = 3 do i1=N_holes(s1),1,-1 ! Generate low excitations first + h1 = hole_list(i1,s1) + call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) + + + do i=1,N_int + negMask(i,1) = not(pmask(i,1)) + negMask(i,2) = not(pmask(i,2)) + end do + + interesting(0) = 0 + do ii=1,preinteresting(0) + i = preinteresting(ii) + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do + + if(nt <= 4) then + interesting(0) += 1 + interesting(interesting(0)) = i + minilist(:,:,interesting(0)) = psi_det_sorted(:,:,i) + end if + end do ib = 1 if(s1 == s2) ib = i1+1 do i2=N_holes(s2),ib,-1 ! Generate low excitations first - h1 = hole_list(i1,s1) + h2 = hole_list(i2,s2) - call apply_holes(psi_det_generators(1,1,i_generator), s1,h1,s2,h2, mask, ok, N_int) + call apply_hole(pmask, s2,h2, mask, ok, N_int) +! call apply_holes(psi_det_generators(1,1,i_generator), s1,h1,s2,h2, mask, ok, N_int) logical :: banned(mo_tot_num, mo_tot_num,2) logical :: bannedOrb(mo_tot_num, 2) banned = .false. - bannedOrb(h1, s1) = .true. - bannedOrb(h2, s2) = .true. + call spot_isinwf(mask, psi_det_sorted, i_generator, N_det, banned, fullMatch) + if(fullMatch) cycle bannedOrb(1:mo_tot_num, 1:2) = .true. do s3=1,2 @@ -56,15 +107,10 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p bannedOrb(particle_list(i,s3), s3) = .false. enddo enddo - - - call spot_isinwf(mask, psi_det_sorted, i_generator, N_det, banned, fullMatch) - if(fullMatch) cycle - if(sp /= 2) call spot_occupied(mask(1,1), bannedOrb(1,1)) - if(sp /= 1) call spot_occupied(mask(1,2), bannedOrb(1,2)) - + mat = 0d0 - call splash_pq(mask, sp, psi_det_sorted, i_generator, N_det_selectors, bannedOrb, banned, mat) +! call splash_pq(mask, sp, psi_det_sorted, i_generator, N_det_selectors, bannedOrb, banned, mat, interesting) + call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, mat, buf) enddo enddo @@ -138,18 +184,22 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d end subroutine -subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat) +subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting) use bitmasks implicit none - + + integer, intent(in) :: interesting(0:N_sel) + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N_sel) integer, intent(in) :: sp, i_gen, N_sel logical, intent(inout) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num, 2) double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num) - integer :: i, j, k, l, h(0:2,2), p(0:4,2), nt + integer :: i, ii, j, k, l, h(0:2,2), p(0:4,2), nt integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) - +! logical :: bandon +! +! bandon = .false. mat = 0d0 do i=1,N_int @@ -157,7 +207,9 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat) negMask(i,2) = not(mask(i,2)) end do - do i=1, N_sel + do i=1, N_sel ! interesting(0) + !i = interesting(ii) + nt = 0 do j=1,N_int mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) @@ -166,7 +218,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat) end do if(nt > 4) cycle - + do j=1,N_int perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) @@ -178,11 +230,12 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat) call bitstring_to_list(mobMask(1,1), p(1,1), p(0,1), N_int) call bitstring_to_list(mobMask(1,2), p(1,2), p(0,2), N_int) - if(i < i_gen) then + if(interesting(i) < i_gen) then if(nt == 4) call past_d2(banned, p, sp) if(nt == 3) call past_d1(bannedOrb, p) else - if(i == i_gen) then + if(interesting(i) == i_gen) then +! bandon = .true. if(sp == 3) then banned(:,:,2) = transpose(banned(:,:,1)) else @@ -194,11 +247,11 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat) end if end if if(nt == 4) then - call get_d2(det(1,1,i), psi_phasemask(1,1,i), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, i)) + call get_d2(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) else if(nt == 3) then - call get_d1(det(1,1,i), psi_phasemask(1,1,i), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, i)) + call get_d1(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) else - call get_d0(det(1,1,i), psi_phasemask(1,1,i), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, i)) + call get_d0(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) end if end if end do @@ -323,31 +376,6 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end subroutine -subroutine debug_hij(hij, gen, mask, s1, s2, p1, p2) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: gen(N_int,2), mask(N_int,2) - double precision, intent(in) :: hij - integer, intent(in) :: s1, s2, p1, p2 - integer(bit_kind) :: det(N_int,2) - double precision :: hij_ref, phase_ref - logical :: ok - integer :: degree - integer :: exc(0:2,2,2) - - call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) - call i_H_j_phase_out(gen,det,N_int,hij_ref,phase_ref,exc,degree) - if(hij /= hij_ref) then - print *, hij, hij_ref - print *, s1, s2, p1, p2 - call debug_det(gen, N_int) - call debug_det(mask, N_int) - stop - end if -end function - - subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) use bitmasks implicit none @@ -537,7 +565,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) integer :: bant bant = 1 - !print *, "d0 SP", sp if(sp == 3) then ! AB h1 = p(1,1) @@ -550,12 +577,11 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) if(p1 == h1 .or. p2 == h2) then call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) call i_h_j(gen, det, N_int, hij) - mat(:, p1, p2) += coefs(:) * hij else hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) - mat(:, p1, p2) += coefs(:) * hij end if + mat(:, p1, p2) += coefs(:) * hij end do end do else ! AA BB @@ -569,11 +595,10 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) call i_h_j(gen, det, N_int, hij) - mat(:, puti, putj) += coefs(:) * hij else hij = (integral8(p1, p2, puti, putj) - integral8(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2) - mat(:, puti, putj) += coefs(:) * hij end if + mat(:, puti, putj) += coefs(:) * hij end do end do end if diff --git a/plugins/Full_CI_ZMQ/selection_single.irp.f b/plugins/Full_CI_ZMQ/selection_single.irp.f index 8ec4df68..f107db11 100644 --- a/plugins/Full_CI_ZMQ/selection_single.irp.f +++ b/plugins/Full_CI_ZMQ/selection_single.irp.f @@ -49,7 +49,6 @@ subroutine select_singles(i_gen,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf end do call spot_hasBeen(mask, sp, psi_det_sorted, i_gen, N_det, bannedOrb, fullMatch) if(fullMatch) cycle - call spot_occupied(mask(1,sp), bannedOrb) vect = 0d0 call splash_p(mask, sp, psi_selectors(1,1,i_gen), psi_phasemask(1,1,i_gen), psi_selectors_coef_transp(1,i_gen), N_det_selectors - i_gen + 1, bannedOrb, vect) call fill_buffer_single(i_gen, sp, h1, bannedOrb, fock_diag_tmp, E0, pt2, vect, buf) @@ -353,29 +352,3 @@ end subroutine -subroutine debug_hij_mo(hij, gen, mask, s1, p1) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: gen(N_int,2), mask(N_int,2) - double precision, intent(in) :: hij - integer, intent(in) :: s1, p1 - integer(bit_kind) :: det(N_int,2) - double precision :: hij_ref, phase_ref - logical :: ok - integer :: degree - integer :: exc(0:2,2,2) - logical, external :: detEq - - call apply_particle(mask, s1, p1, det, ok, N_int) - call i_H_j_phase_out(gen,det,N_int,hij_ref,phase_ref,exc,degree) - if(hij /= hij_ref) then - print *, hij, hij_ref - print *, s1, p1 - call debug_det(gen, N_int) - call debug_det(mask, N_int) - call debug_det(det, N_int) - stop - end if -end function - From f46b9ebe87c0041bc53496b4d7710c33ea623c4d Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 4 Oct 2016 15:29:53 +0200 Subject: [PATCH 11/13] optimized davidson_process --- config/gfortran_avx.cfg | 2 +- src/Davidson/davidson_parallel.irp.f | 26 +++++++++++++++++++------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/config/gfortran_avx.cfg b/config/gfortran_avx.cfg index 6672bca1..80bbbec9 100644 --- a/config/gfortran_avx.cfg +++ b/config/gfortran_avx.cfg @@ -10,7 +10,7 @@ # # [COMMON] -FC : gfortran -ffree-line-length-none -I . -mavx +FC : gfortran -ffree-line-length-none -I . -mavx -g LAPACK_LIB : -llapack -lblas IRPF90 : irpf90 IRPF90_FLAGS : --ninja --align=32 diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 93a387bc..729afaeb 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -18,9 +18,11 @@ subroutine davidson_process(block, N, idx, vt, st) integer :: i, j, sh, sh2, exa, ext, org_i, org_j, istate, ni, endi integer(bit_kind) :: sorted_i(N_int) double precision :: s2, hij + logical :: wrotten(dav_size) + wrotten = .false. - vt = 0d0 - st = 0d0 +! vt = 0d0 +! st = 0d0 N = dav_size do i=1,N @@ -58,11 +60,21 @@ subroutine davidson_process(block, N, idx, vt, st) if(ext <= 4) then call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) ! psi_det call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) + if(.not. wrotten(org_i)) then + wrotten(org_i) = .true. + vt (:,org_i) = 0d0 + st (:,org_i) = 0d0 + end if + if(.not. wrotten(org_j)) then + wrotten(org_j) = .true. + vt (:,org_j) = 0d0 + st (:,org_j) = 0d0 + end if do istate=1,N_states - vt (istate,org_i) = vt (istate,org_i) + hij*dav_ut(istate,org_j) - vt (istate,org_j) = vt (istate,org_j) + hij*dav_ut(istate,org_i) - st (istate,org_i) = st (istate,org_i) + s2*dav_ut(istate,org_j) - st (istate,org_j) = st (istate,org_j) + s2*dav_ut(istate,org_i) + vt (istate,org_i) += hij*dav_ut(istate,org_j) + st (istate,org_i) += s2*dav_ut(istate,org_j) + vt (istate,org_j) += hij*dav_ut(istate,org_i) + st (istate,org_j) += s2*dav_ut(istate,org_i) enddo endif enddo @@ -71,7 +83,7 @@ subroutine davidson_process(block, N, idx, vt, st) N = 0 do i=1, dav_size - if(vt(1, i) /= 0d0 .or. st(1, i) /= 0d0) then + if(wrotten(i)) then N = N+1 do istate=1,N_states vt (istate,N) = vt (istate,i) From 32e578c261e46314fc8f3358c1355cb646a4c292 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 5 Oct 2016 10:10:28 +0200 Subject: [PATCH 12/13] further microlisting splash_pq and spot_isinwf --- plugins/Full_CI_ZMQ/selection_double.irp.f | 109 ++++++++++++++------- src/Davidson/davidson_parallel.irp.f | 11 +-- src/Davidson/u0Hu0.irp.f | 46 +-------- 3 files changed, 75 insertions(+), 91 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection_double.irp.f b/plugins/Full_CI_ZMQ/selection_double.irp.f index f5c4abd8..977622fd 100644 --- a/plugins/Full_CI_ZMQ/selection_double.irp.f +++ b/plugins/Full_CI_ZMQ/selection_double.irp.f @@ -17,8 +17,11 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p logical :: fullMatch, ok integer(bit_kind) :: mobMask(N_int, 2), negMask(N_int, 2) - integer :: preinteresting(0:N_det_selectors), interesting(0:N_det_selectors) - integer(bit_kind) :: minilist(N_int, 2, N_det_selectors) + integer,allocatable :: preinteresting(:), prefullinteresting(:), interesting(:), fullinteresting(:) + integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) + + allocate(minilist(N_int, 2, N_det_selectors), fullminilist(N_int, 2, N_det)) + allocate(preinteresting(0:N_det_selectors), prefullinteresting(0:N_det), interesting(0:N_det_selectors), fullinteresting(0:N_det)) do k=1,N_int hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) @@ -35,13 +38,15 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) + preinteresting(0) = 0 + prefullinteresting(0) = 0 + do i=1,N_int negMask(i,1) = not(psi_det_generators(i,1,i_generator)) negMask(i,2) = not(psi_det_generators(i,2,i_generator)) end do - preinteresting(0) = 0 - do i=1,N_det_selectors + do i=1,N_det nt = 0 do j=1,N_int mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) @@ -50,55 +55,85 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p end do if(nt <= 4) then - preinteresting(0) += 1 - preinteresting(preinteresting(0)) = i + if(i <= N_det_selectors) then + preinteresting(0) += 1 + preinteresting(preinteresting(0)) = i + else if(nt <= 2) then + prefullinteresting(0) += 1 + prefullinteresting(prefullinteresting(0)) = i + end if end if end do - + do s1=1,2 - do s2=s1,2 - sp = s1 - if(s1 /= s2) sp = 3 - do i1=N_holes(s1),1,-1 ! Generate low excitations first - h1 = hole_list(i1,s1) - call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) - - - do i=1,N_int - negMask(i,1) = not(pmask(i,1)) - negMask(i,2) = not(pmask(i,2)) + do i1=N_holes(s1),1,-1 ! Generate low excitations first + h1 = hole_list(i1,s1) + call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) + + do i=1,N_int + negMask(i,1) = not(pmask(i,1)) + negMask(i,2) = not(pmask(i,2)) + end do + + interesting(0) = 0 + fullinteresting(0) = 0 + + do ii=1,preinteresting(0) + i = preinteresting(ii) + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) end do - interesting(0) = 0 - do ii=1,preinteresting(0) - i = preinteresting(ii) - nt = 0 - do j=1,N_int - mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) - mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) - nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) - end do - - if(nt <= 4) then - interesting(0) += 1 - interesting(interesting(0)) = i - minilist(:,:,interesting(0)) = psi_det_sorted(:,:,i) + if(nt <= 4) then + interesting(0) += 1 + interesting(interesting(0)) = i + minilist(:,:,interesting(0)) = psi_det_sorted(:,:,i) + if(nt <= 2) then + fullinteresting(0) += 1 + fullinteresting(fullinteresting(0)) = i + fullminilist(:,:,fullinteresting(0)) = psi_det_sorted(:,:,i) end if + end if + end do + + do ii=1,prefullinteresting(0) + i = prefullinteresting(ii) + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) end do + + if(nt <= 2) then + fullinteresting(0) += 1 + fullinteresting(fullinteresting(0)) = i + fullminilist(:,:,fullinteresting(0)) = psi_det_sorted(:,:,i) + end if + end do + + do s2=s1,2 + sp = s1 + if(s1 /= s2) sp = 3 + ib = 1 if(s1 == s2) ib = i1+1 do i2=N_holes(s2),ib,-1 ! Generate low excitations first h2 = hole_list(i2,s2) call apply_hole(pmask, s2,h2, mask, ok, N_int) -! call apply_holes(psi_det_generators(1,1,i_generator), s1,h1,s2,h2, mask, ok, N_int) logical :: banned(mo_tot_num, mo_tot_num,2) logical :: bannedOrb(mo_tot_num, 2) banned = .false. - call spot_isinwf(mask, psi_det_sorted, i_generator, N_det, banned, fullMatch) + + call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) + if(fullMatch) cycle bannedOrb(1:mo_tot_num, 1:2) = .true. @@ -109,7 +144,6 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p enddo mat = 0d0 -! call splash_pq(mask, sp, psi_det_sorted, i_generator, N_det_selectors, bannedOrb, banned, mat, interesting) call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, mat, buf) enddo @@ -647,10 +681,11 @@ end subroutine -subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch) +subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) use bitmasks implicit none + integer, intent(in) :: interesting(0:N) integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) integer, intent(in) :: i_gen, N logical, intent(inout) :: banned(mo_tot_num, mo_tot_num) @@ -673,7 +708,7 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch) if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl end do - if(i < i_gen) then + if(interesting(i) < i_gen) then fullMatch = .true. return end if diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 729afaeb..168cdd08 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -19,16 +19,9 @@ subroutine davidson_process(block, N, idx, vt, st) integer(bit_kind) :: sorted_i(N_int) double precision :: s2, hij logical :: wrotten(dav_size) + + wrotten = .false. - -! vt = 0d0 -! st = 0d0 - - N = dav_size - do i=1,N - idx(i) = i - end do - sh = block do sh2=1,sh diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 418ada51..b91513c7 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -213,7 +213,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) integer(ZMQ_PTR) :: handler - if(N_st /= N_states .or. sze_8 /= N_det) stop "SPEP" + if(N_st /= N_states .or. sze_8 < N_det) stop "assert fail in H_S2_u_0_nstates" N_st_8 = N_st !! align_double(N_st) ASSERT (Nint > 0) @@ -255,50 +255,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) Vt = 0.d0 St = 0.d0 -! ! !$OMP DO SCHEDULE(dynamic) -! ! do sh=1,shortcut(0,1) -! ! do sh2=sh,shortcut(0,1) -! ! exa = 0 -! ! do ni=1,Nint -! ! exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1))) -! ! end do -! ! if(exa > 2) then -! ! cycle -! ! end if -! ! -! ! do i=shortcut(sh,1),shortcut(sh+1,1)-1 -! ! org_i = sort_idx(i,1) -! ! if(sh==sh2) then -! ! endi = i-1 -! ! else -! ! endi = shortcut(sh2+1,1)-1 -! ! end if -! ! do ni=1,Nint -! ! sorted_i(ni) = sorted(ni,i,1) -! ! enddo -! ! -! ! do j=shortcut(sh2,1),endi -! ! org_j = sort_idx(j,1) -! ! ext = exa -! ! do ni=1,Nint -! ! ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) -! ! end do -! ! if(ext <= 4) then -! ! call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) -! ! call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) -! ! do istate=1,n_st -! ! vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) -! ! vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) -! ! st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j) -! ! st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i) -! ! enddo -! ! endif -! ! enddo -! ! enddo -! ! enddo -! ! enddo -! ! !$OMP END DO NOWAIT - !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0,2) do i=shortcut(sh,2),shortcut(sh+1,2)-1 From 7cc21bc38d80f31a432912e54dfa8171ac9aefd1 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 5 Oct 2016 11:27:35 +0200 Subject: [PATCH 13/13] need to touch dav_size before filling dav_det for ifort --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 2 +- src/Davidson/u0Hu0.irp.f | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 47e79b7c..42337258 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -133,7 +133,7 @@ subroutine ZMQ_selection(N, pt2) integer :: i_generator, i_generator_start, i_generator_max, step ! step = int(max(1.,10*elec_num/mo_tot_num) - step = int(10000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) + step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) step = max(1,step) do i= N_det_generators, 1, -step i_generator_start = max(i-step+1,1) diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index b91513c7..8e2ef3f6 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -238,6 +238,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint) dav_size = n + touch dav_size dav_det = psi_det dav_ut = ut