From ade11beb256395020c386345f2b4d495f01ccfda Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 15 Mar 2018 17:47:32 +0100 Subject: [PATCH 01/29] corrected bitmask in Full_CI_ZMQ - seemingly working shiftedbk --- plugins/Full_CI_ZMQ/pt2_stoch.irp.f | 1 + plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 2 +- plugins/Full_CI_ZMQ/run_pt2_slave.irp.f | 2 +- plugins/Full_CI_ZMQ/selection.irp.f | 22 +++++--- plugins/Full_CI_ZMQ/selection_buffer.irp.f | 4 +- plugins/dress_zmq/dress_stoch_routines.irp.f | 2 +- plugins/dress_zmq/dressing.irp.f | 2 +- plugins/shiftedbk/shifted_bk.irp.f | 58 +++++++++++++------- 8 files changed, 57 insertions(+), 36 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f index cd96a906..7cf27d0e 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f @@ -19,6 +19,7 @@ subroutine run double precision :: E_CI_before, relative_error, absolute_error, eqt allocate (pt2(N_states)) + call diagonalize_CI() pt2 = 0.d0 E_CI_before = pt2_E0_denominator(1) + nuclear_repulsion diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 3d95d6b0..96c4db69 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -342,7 +342,7 @@ subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_ endif end if end do pullLoop - + E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index 03b62937..88c8aacb 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -41,7 +41,7 @@ subroutine run_pt2_slave(thread,iproc,energy) buf%N = 0 n_tasks = 0 - call create_selection_buffer(1, 2, buf) + call create_selection_buffer(0, 0, buf) done = .False. do while (.not.done) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index ce706d2d..c0778a1b 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -253,6 +253,7 @@ subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) deallocate(lbanned) end + subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf,subset) use bitmasks use selection_types @@ -292,10 +293,17 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d do k=1,N_int - hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) - hole (k,2) = iand(psi_det_generators(k,2,i_generator), hole_mask(k,2)) - particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), particle_mask(k,1)) - particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), particle_mask(k,2)) + !if(buf%N > 0) then + ! hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) + ! hole (k,2) = iand(psi_det_generators(k,2,i_generator), hole_mask(k,2)) + ! particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), particle_mask(k,1)) + ! particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), particle_mask(k,2)) + !else + hole (k,1) = iand(psi_det_generators(k,1,i_generator), full_ijkl_bitmask(k)) + hole (k,2) = iand(psi_det_generators(k,2,i_generator), full_ijkl_bitmask(k)) + particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), full_ijkl_bitmask(k)) + particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), full_ijkl_bitmask(k)) + !end if enddo @@ -598,7 +606,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d logical, external :: detEq - if(sp == 3) then s1 = 1 s2 = 2 @@ -616,15 +623,13 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d do p2=ib,mo_tot_num if(bannedOrb(p2, s2)) cycle if(banned(p1,p2)) cycle + if( sum(abs(mat(1:N_states, p1, p2))) == 0d0) cycle call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) min_e_pert = 0d0 -! double precision :: hij -! call i_h_j(psi_det_generators(1,1,i_generator), det, N_int, hij) - do istate=1,N_states delta_E = E0(istate) - Hii val = mat(istate, p1, p2) + mat(istate, p1, p2) @@ -635,7 +640,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d e_pert = 0.5d0 * (tmp - delta_E) pt2(istate) = pt2(istate) + e_pert min_e_pert = min(e_pert,min_e_pert) -! ci(istate) = e_pert / hij end do if(min_e_pert <= buf%mini) then diff --git a/plugins/Full_CI_ZMQ/selection_buffer.irp.f b/plugins/Full_CI_ZMQ/selection_buffer.irp.f index 50c17f91..17410b7b 100644 --- a/plugins/Full_CI_ZMQ/selection_buffer.irp.f +++ b/plugins/Full_CI_ZMQ/selection_buffer.irp.f @@ -39,7 +39,7 @@ subroutine add_to_selection_buffer(b, det, val) double precision, intent(in) :: val integer :: i - if(val <= b%mini) then + if(b%N > 0 .and. val <= b%mini) then b%cur += 1 b%det(1:N_int,1:2,b%cur) = det(1:N_int,1:2) b%val(b%cur) = val @@ -119,7 +119,7 @@ subroutine sort_selection_buffer(b) integer(bit_kind), pointer :: detmp(:,:,:) integer :: i, nmwen logical, external :: detEq - if (b%cur == 0) return + if (b%N == 0 .or. b%cur == 0) return nmwen = min(b%N, b%cur) allocate(iorder(b%cur), detmp(N_int, 2, size(b%det,3))) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 480fd415..06b5d538 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -286,7 +286,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, call wall_time(time) - if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 5) .or. total_computed == N_det_generators) then + if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. total_computed == N_det_generators) then ! Termination print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' if (zmq_abort(zmq_to_qp_run_socket) == -1) then diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index f835ffbf..0c15ee0b 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -86,7 +86,7 @@ BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det,2) ] ! else ! errr = 1d-4 ! end if - relative_error = 1.d-3 + relative_error = 1.d-4 call write_double(6,relative_error,"Convergence of the stochastic algorithm") call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error)) diff --git a/plugins/shiftedbk/shifted_bk.irp.f b/plugins/shiftedbk/shifted_bk.irp.f index a2826aae..d2b5bdeb 100644 --- a/plugins/shiftedbk/shifted_bk.irp.f +++ b/plugins/shiftedbk/shifted_bk.irp.f @@ -4,14 +4,19 @@ program mrcc_sto BEGIN_DOC ! TODO END_DOC + call diagonalize_CI() call dress_zmq() end BEGIN_PROVIDER [ double precision, fock_diag_tmp_, (2,mo_tot_num+1,Nproc) ] &BEGIN_PROVIDER [ integer, current_generator_, (Nproc) ] +&BEGIN_PROVIDER [ double precision, a_h_i, (N_det, Nproc) ] +&BEGIN_PROVIDER [ double precision, a_s2_i, (N_det, Nproc) ] implicit none current_generator_(:) = 0 + a_h_i = 0d0 + a_s2_i = 0d0 END_PROVIDER @@ -31,40 +36,48 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili integer(bit_kind), intent(in) :: alpha(Nint,2), det_minilist(Nint, 2, n_minilist) integer,intent(in) :: minilist(n_minilist) double precision, intent(inout) :: delta_ij_loc(Nstates,N_det,2) - double precision :: hii, hij, sij, delta_e + double precision :: haa, hij, sij double precision, external :: diag_H_mat_elem_fock integer :: i,j,k,l,m, l_sd - double precision, save :: tot = 0d0 - double precision :: de(N_states), val, tmp + double precision :: hdress, sdress + double precision :: de, a_h_psi(Nstates), c_alpha + a_h_psi = 0d0 + if(current_generator_(iproc) /= i_gen) then current_generator_(iproc) = i_gen call build_fock_tmp(fock_diag_tmp_(1,1,iproc),psi_det_generators(1,1,i_gen),N_int) end if - hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) - do i=1,N_states - de(i) = (E0_denominator(i) - hii) - end do - - do i=1,N_states - val = 0D0 - do l_sd=1,n_minilist - call i_h_j_s2(alpha,det_minilist(1,1,l_sd),N_int,hij, sij) - val += hij + haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) + + do l_sd=1,n_minilist + call i_h_j_s2(alpha,det_minilist(1,1,l_sd),N_int,hij, sij) + a_h_i(l_sd, iproc) = hij + a_s2_i(l_sd, iproc) = sij + do i=1,Nstates + a_h_psi(i) += hij * psi_coef(minilist(l_sd), i) + end do + end do + + + do i=1,Nstates + de = E0_denominator(i) - haa + if(DABS(de) < 1D-2) cycle + + c_alpha = a_h_psi(i) / de + + do l_sd=1,n_minilist + hdress = c_alpha * a_h_i(l_sd, iproc) + sdress = c_alpha * a_s2_i(l_sd, iproc) + delta_ij_loc(i, minilist(l_sd), 1) += hdress + delta_ij_loc(i, minilist(l_sd), 2) += sdress end do - val = 2d0 * val - tmp = dsqrt(de(i)**2 + val**2) - if(de(i) < 0d0) tmp = -tmp - delta_ij_loc(i, minilist(l_sd), 1) += 0.5d0 * (tmp - de(i)) ! * psi_coef(minilist(l_sd), i) end do end subroutine - - - BEGIN_PROVIDER [ logical, initialize_E0_denominator ] implicit none BEGIN_DOC @@ -72,7 +85,8 @@ BEGIN_PROVIDER [ logical, initialize_E0_denominator ] END_DOC initialize_E0_denominator = .True. END_PROVIDER - + + BEGIN_PROVIDER [ double precision, E0_denominator, (N_states) ] implicit none BEGIN_DOC @@ -88,3 +102,5 @@ BEGIN_PROVIDER [ double precision, E0_denominator, (N_states) ] E0_denominator = -huge(1.d0) endif END_PROVIDER + + From 9ca7b74362cfdaf2f36d4c7f032004ce6efd233f Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 20 Mar 2018 11:54:37 +0100 Subject: [PATCH 02/29] removed set_generators_bitmasks_as_holes_and_particles from dress_zmq --- plugins/Full_CI_ZMQ/selection.irp.f | 17 ++++---------- plugins/dress_zmq/alpha_factory.irp.f | 27 ++++++++-------------- plugins/dress_zmq/dress_zmq_routines.irp.f | 2 +- plugins/shiftedbk/shifted_bk.irp.f | 5 ++-- 4 files changed, 19 insertions(+), 32 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index c0778a1b..2463b762 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -290,20 +290,13 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d monoAdo = .true. monoBdo = .true. - + do k=1,N_int - !if(buf%N > 0) then - ! hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) - ! hole (k,2) = iand(psi_det_generators(k,2,i_generator), hole_mask(k,2)) - ! particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), particle_mask(k,1)) - ! particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), particle_mask(k,2)) - !else - hole (k,1) = iand(psi_det_generators(k,1,i_generator), full_ijkl_bitmask(k)) - hole (k,2) = iand(psi_det_generators(k,2,i_generator), full_ijkl_bitmask(k)) - particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), full_ijkl_bitmask(k)) - particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), full_ijkl_bitmask(k)) - !end if + hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) + hole (k,2) = iand(psi_det_generators(k,2,i_generator), hole_mask(k,2)) + particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), particle_mask(k,1)) + particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), particle_mask(k,2)) enddo diff --git a/plugins/dress_zmq/alpha_factory.irp.f b/plugins/dress_zmq/alpha_factory.irp.f index 39284da1..190a94ad 100644 --- a/plugins/dress_zmq/alpha_factory.irp.f +++ b/plugins/dress_zmq/alpha_factory.irp.f @@ -74,26 +74,19 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index monoAdo = .true. monoBdo = .true. + do k=1,N_int - !hole (k,1) = iand(psi_det_generators(k,1,i_generator), generators_bitmask(k,1,s_hole,bitmask_index)) - !hole (k,2) = iand(psi_det_generators(k,2,i_generator), generators_bitmask(k,2,s_hole,bitmask_index)) - !particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), generators_bitmask(k,1,s_part,bitmask_index)) - !particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), generators_bitmask(k,2,s_part,bitmask_index)) - hole (k,1) = iand(psi_det_generators(k,1,i_generator), full_ijkl_bitmask(k)) - hole (k,2) = iand(psi_det_generators(k,2,i_generator), full_ijkl_bitmask(k)) - particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), full_ijkl_bitmask(k)) - particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), full_ijkl_bitmask(k)) - + hole (k,1) = iand(psi_det_generators(k,1,i_generator), generators_bitmask(k,1,s_hole,bitmask_index)) + hole (k,2) = iand(psi_det_generators(k,2,i_generator), generators_bitmask(k,2,s_hole,bitmask_index)) + particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), generators_bitmask(k,1,s_part,bitmask_index)) + particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), generators_bitmask(k,2,s_part,bitmask_index)) + !hole (k,1) = iand(psi_det_generators(k,1,i_generator), full_ijkl_bitmask(k)) + !hole (k,2) = iand(psi_det_generators(k,2,i_generator), full_ijkl_bitmask(k)) + !particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), full_ijkl_bitmask(k)) + !particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), full_ijkl_bitmask(k)) + enddo - - !if(i_generator == 34) then - ! call debug_det(psi_det_generators(1,1,34), N_int) - ! call debug_det(generators_bitmask(1,1,s_part,bitmask_index), N_int) - ! call debug_det(particle, N_int) - ! print *, "dddddddddddd" - ! stop - !end if integer :: N_holes(2), N_particles(2) integer :: hole_list(N_int*bit_kind_size,2) diff --git a/plugins/dress_zmq/dress_zmq_routines.irp.f b/plugins/dress_zmq/dress_zmq_routines.irp.f index 4dc75236..bde2c6d8 100644 --- a/plugins/dress_zmq/dress_zmq_routines.irp.f +++ b/plugins/dress_zmq/dress_zmq_routines.irp.f @@ -5,7 +5,7 @@ subroutine dress_zmq() read_wf = .True. SOFT_TOUCH read_wf - call set_generators_bitmasks_as_holes_and_particles + if (.True.) then integer :: i,j do j=1,N_states diff --git a/plugins/shiftedbk/shifted_bk.irp.f b/plugins/shiftedbk/shifted_bk.irp.f index d2b5bdeb..4c0408d8 100644 --- a/plugins/shiftedbk/shifted_bk.irp.f +++ b/plugins/shiftedbk/shifted_bk.irp.f @@ -1,9 +1,10 @@ -program mrcc_sto +program shifted_bk implicit none BEGIN_DOC ! TODO END_DOC + call diagonalize_CI() call dress_zmq() end @@ -64,7 +65,7 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili do i=1,Nstates de = E0_denominator(i) - haa - if(DABS(de) < 1D-2) cycle + if(DABS(de) < 1D-5) cycle c_alpha = a_h_psi(i) / de From a3d7954fafc7d9f9313b66ca2a8bb554a3d90367 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 20 Mar 2018 12:08:09 +0100 Subject: [PATCH 03/29] removed dress_zmq dependency to MRCC_Utils --- plugins/dress_zmq/EZFIO.cfg | 17 +++++++++++++++++ plugins/shiftedbk/NEEDED_CHILDREN_MODULES | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 plugins/dress_zmq/EZFIO.cfg diff --git a/plugins/dress_zmq/EZFIO.cfg b/plugins/dress_zmq/EZFIO.cfg new file mode 100644 index 00000000..52d41568 --- /dev/null +++ b/plugins/dress_zmq/EZFIO.cfg @@ -0,0 +1,17 @@ +[energy] +type: double precision +doc: Calculated energy +interface: ezfio + +[thresh_dressed_ci] +type: Threshold +doc: Threshold on the convergence of the dressed CI energy +interface: ezfio,provider,ocaml +default: 1.e-5 + +[n_it_max_dressed_ci] +type: Strictly_positive_int +doc: Maximum number of dressed CI iterations +interface: ezfio,provider,ocaml +default: 10 + diff --git a/plugins/shiftedbk/NEEDED_CHILDREN_MODULES b/plugins/shiftedbk/NEEDED_CHILDREN_MODULES index bebf68a2..c3290687 100644 --- a/plugins/shiftedbk/NEEDED_CHILDREN_MODULES +++ b/plugins/shiftedbk/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -dress_zmq DavidsonDressed MRCC_Utils +dress_zmq DavidsonDressed From 8b7cb82cf85695bdb6d97dd344ea1719d0d6cddc Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 20 Mar 2018 13:37:05 +0100 Subject: [PATCH 04/29] bias when pt2_stoch does full computation --- plugins/Full_CI_ZMQ/pt2_stoch.irp.f | 4 +- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 41 ++++++++++++-------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f index 7cf27d0e..c4cb3453 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f @@ -25,8 +25,8 @@ subroutine run E_CI_before = pt2_E0_denominator(1) + nuclear_repulsion threshold_selectors = 1.d0 threshold_generators = 1d0 - relative_error = 1.d-9 - absolute_error = 1.d-9 + relative_error = 1.d-5 + absolute_error = 1.d-5 call ZMQ_pt2(E_CI_before, pt2, relative_error, absolute_error, eqt) print *, 'Final step' print *, 'N_det = ', N_det diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 96c4db69..f236efc9 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -269,7 +269,6 @@ subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_ parts_to_get(index(i)) -= 1 if(parts_to_get(index(i)) < 0) then print *, i, index(i), parts_to_get(index(i)) - print *, "PARTS ??" print *, parts_to_get stop "PARTS ??" end if @@ -295,7 +294,12 @@ subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_ end do integer, external :: zmq_abort - + double precision :: E0, avg, prop + + call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove) + firstTBDcomb = int(Nabove(1)) - orgTBDcomb + 1 + call get_first_tooth(actually_computed, tooth) + if (firstTBDcomb > Ncomb) then if (zmq_abort(zmq_to_qp_run_socket) == -1) then call sleep(1) @@ -305,12 +309,8 @@ subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_ endif exit pullLoop endif - - double precision :: E0, avg, prop - call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove) - firstTBDcomb = int(Nabove(1)) - orgTBDcomb + 1 - if(Nabove(1) < 5d0) cycle - call get_first_tooth(actually_computed, tooth) + + !if(Nabove(1) < 5d0) cycle E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) if (tooth <= comb_teeth) then @@ -323,7 +323,7 @@ subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_ eqt = 0.d0 endif call wall_time(time) - if ( (dabs(eqt/avg) < relative_error) .or. (dabs(eqt) < absolute_error) ) then + if ( (dabs(eqt/avg) < relative_error) .or. (dabs(eqt) < absolute_error) .and. Nabove(tooth) >= 30) then ! Termination pt2(pt2_stoch_istate) = avg error(pt2_stoch_istate) = eqt @@ -336,19 +336,26 @@ subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_ endif else if (Nabove(tooth) > Nabove_old) then + print *, loop print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' Nabove_old = Nabove(tooth) endif endif end if end do pullLoop - - E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) - prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) - prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) - E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop - pt2(pt2_stoch_istate) = E0 + (sumabove(tooth) / Nabove(tooth)) + if(tooth == comb_teeth+1) then + pt2(pt2_stoch_istate) = sum(pt2_detail(pt2_stoch_istate,:)) + error(pt2_stoch_istate) = 0d0 + else + E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) + prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) + prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) + E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop + pt2(pt2_stoch_istate) = E0 + (sumabove(tooth) / Nabove(tooth)) + error(pt2_stoch_istate) = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) + end if + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call sort_selection_buffer(b) end subroutine @@ -393,7 +400,7 @@ subroutine get_first_tooth(computed, first_teeth) integer, intent(out) :: first_teeth integer :: i, first_det - first_det = 1 + first_det = N_det_generators+1+1 first_teeth = 1 do i=first_det_of_comb, N_det_generators if(.not.(computed(i))) then @@ -402,7 +409,7 @@ subroutine get_first_tooth(computed, first_teeth) end if end do - do i=comb_teeth, 1, -1 + do i=comb_teeth+1, 1, -1 if(first_det_of_teeth(i) < first_det) then first_teeth = i exit From c786e9fe5846d059df8600a24b24cfff5b9ed262 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 21 Mar 2018 12:06:26 +0100 Subject: [PATCH 05/29] shifted_bk_slave --- plugins/Full_CI_ZMQ/pt2_stoch.irp.f | 5 +- plugins/dress_zmq/dress_slave.irp.f | 10 +- plugins/dress_zmq/run_dress_slave.irp.f | 1 - plugins/shiftedbk/shifted_bk.irp.f | 102 ++------------------ plugins/shiftedbk/shifted_bk_routines.irp.f | 97 +++++++++++++++++++ plugins/shiftedbk/shifted_bk_slave.irp.f | 16 +++ 6 files changed, 126 insertions(+), 105 deletions(-) create mode 100644 plugins/shiftedbk/shifted_bk_routines.irp.f create mode 100644 plugins/shiftedbk/shifted_bk_slave.irp.f diff --git a/plugins/Full_CI_ZMQ/pt2_stoch.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f index c4cb3453..99aaab6b 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f @@ -25,8 +25,9 @@ subroutine run E_CI_before = pt2_E0_denominator(1) + nuclear_repulsion threshold_selectors = 1.d0 threshold_generators = 1d0 - relative_error = 1.d-5 - absolute_error = 1.d-5 + relative_error = 1.d-9 + absolute_error = 1.d-9 + call ZMQ_pt2(E_CI_before, pt2, relative_error, absolute_error, eqt) print *, 'Final step' print *, 'N_det = ', N_det diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index c7633b91..57fce783 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -24,7 +24,7 @@ subroutine run_wf double precision :: energy(N_states_diag) character*(64) :: states(1) integer :: rc, i - + call provide_everything zmq_context = f77_zmq_ctx_new () @@ -33,14 +33,12 @@ subroutine run_wf zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() do - call wait_for_states(states,zmq_state,1) - - if(trim(zmq_state) == 'Stopped') then + if(zmq_state(:7) == 'Stopped') then exit - else if (trim(zmq_state) == 'dress') then + else if (zmq_state(:5) == 'dress') then ! Selection ! --------- @@ -55,7 +53,7 @@ subroutine run_wf !$OMP PARALLEL PRIVATE(i) i = omp_get_thread_num() - call dress_slave_tcp(i, energy) + call dress_slave_tcp(i+1, energy) !$OMP END PARALLEL print *, 'dress done' diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index b0896c00..1dc3176e 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -50,7 +50,6 @@ subroutine run_dress_slave(thread,iproc,energy) end do do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) - if(task_id /= 0) then read (task,*) subset, i_generator delta_ij_loc = 0d0 diff --git a/plugins/shiftedbk/shifted_bk.irp.f b/plugins/shiftedbk/shifted_bk.irp.f index 4c0408d8..270eec17 100644 --- a/plugins/shiftedbk/shifted_bk.irp.f +++ b/plugins/shiftedbk/shifted_bk.irp.f @@ -4,104 +4,14 @@ program shifted_bk BEGIN_DOC ! TODO END_DOC + + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order + call diagonalize_CI() call dress_zmq() end - - BEGIN_PROVIDER [ double precision, fock_diag_tmp_, (2,mo_tot_num+1,Nproc) ] -&BEGIN_PROVIDER [ integer, current_generator_, (Nproc) ] -&BEGIN_PROVIDER [ double precision, a_h_i, (N_det, Nproc) ] -&BEGIN_PROVIDER [ double precision, a_s2_i, (N_det, Nproc) ] - implicit none - current_generator_(:) = 0 - a_h_i = 0d0 - a_s2_i = 0d0 - END_PROVIDER - - - -subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minilist, det_minilist, n_minilist, alpha, iproc) - use bitmasks - implicit none - BEGIN_DOC - !delta_ij_loc(:,:,1) : dressing column for H - !delta_ij_loc(:,:,2) : dressing column for S2 - !i_gen : generator index in psi_det_generators - !minilist : indices of determinants connected to alpha ( in psi_det_sorted ) - !n_minilist : size of minilist - !alpha : alpha determinant - END_DOC - integer, intent(in) :: Nint, Ndet, Nstates, n_minilist, iproc, i_gen - integer(bit_kind), intent(in) :: alpha(Nint,2), det_minilist(Nint, 2, n_minilist) - integer,intent(in) :: minilist(n_minilist) - double precision, intent(inout) :: delta_ij_loc(Nstates,N_det,2) - double precision :: haa, hij, sij - double precision, external :: diag_H_mat_elem_fock - integer :: i,j,k,l,m, l_sd - double precision :: hdress, sdress - double precision :: de, a_h_psi(Nstates), c_alpha - - - a_h_psi = 0d0 - - if(current_generator_(iproc) /= i_gen) then - current_generator_(iproc) = i_gen - call build_fock_tmp(fock_diag_tmp_(1,1,iproc),psi_det_generators(1,1,i_gen),N_int) - end if - - haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) - - do l_sd=1,n_minilist - call i_h_j_s2(alpha,det_minilist(1,1,l_sd),N_int,hij, sij) - a_h_i(l_sd, iproc) = hij - a_s2_i(l_sd, iproc) = sij - do i=1,Nstates - a_h_psi(i) += hij * psi_coef(minilist(l_sd), i) - end do - end do - - - do i=1,Nstates - de = E0_denominator(i) - haa - if(DABS(de) < 1D-5) cycle - - c_alpha = a_h_psi(i) / de - - do l_sd=1,n_minilist - hdress = c_alpha * a_h_i(l_sd, iproc) - sdress = c_alpha * a_s2_i(l_sd, iproc) - delta_ij_loc(i, minilist(l_sd), 1) += hdress - delta_ij_loc(i, minilist(l_sd), 2) += sdress - end do - end do -end subroutine - - -BEGIN_PROVIDER [ logical, initialize_E0_denominator ] - implicit none - BEGIN_DOC - ! If true, initialize pt2_E0_denominator - END_DOC - initialize_E0_denominator = .True. -END_PROVIDER - - -BEGIN_PROVIDER [ double precision, E0_denominator, (N_states) ] - implicit none - BEGIN_DOC - ! E0 in the denominator of the PT2 - END_DOC - if (initialize_E0_denominator) then - E0_denominator(1:N_states) = psi_energy(1:N_states) - ! call ezfio_get_full_ci_zmq_energy(pt2_E0_denominator(1)) - ! pt2_E0_denominator(1) -= nuclear_repulsion - ! pt2_E0_denominator(1:N_states) = HF_energy - nuclear_repulsion - ! pt2_E0_denominator(1:N_states) = barycentric_electronic_energy(1:N_states) - else - E0_denominator = -huge(1.d0) - endif -END_PROVIDER - - diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f new file mode 100644 index 00000000..99b0fa79 --- /dev/null +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -0,0 +1,97 @@ + + + BEGIN_PROVIDER [ double precision, fock_diag_tmp_, (2,mo_tot_num+1,Nproc) ] +&BEGIN_PROVIDER [ integer, current_generator_, (Nproc) ] +&BEGIN_PROVIDER [ double precision, a_h_i, (N_det, Nproc) ] +&BEGIN_PROVIDER [ double precision, a_s2_i, (N_det, Nproc) ] + implicit none + current_generator_(:) = 0 + a_h_i = 0d0 + a_s2_i = 0d0 + END_PROVIDER + + + +subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minilist, det_minilist, n_minilist, alpha, iproc) + use bitmasks + implicit none + BEGIN_DOC + !delta_ij_loc(:,:,1) : dressing column for H + !delta_ij_loc(:,:,2) : dressing column for S2 + !i_gen : generator index in psi_det_generators + !minilist : indices of determinants connected to alpha ( in psi_det_sorted ) + !n_minilist : size of minilist + !alpha : alpha determinant + END_DOC + integer, intent(in) :: Nint, Ndet, Nstates, n_minilist, iproc, i_gen + integer(bit_kind), intent(in) :: alpha(Nint,2), det_minilist(Nint, 2, n_minilist) + integer,intent(in) :: minilist(n_minilist) + double precision, intent(inout) :: delta_ij_loc(Nstates,N_det,2) + double precision :: haa, hij, sij + double precision, external :: diag_H_mat_elem_fock + integer :: i,j,k,l,m, l_sd + double precision :: hdress, sdress + double precision :: de, a_h_psi(Nstates), c_alpha + + + a_h_psi = 0d0 + + if(current_generator_(iproc) /= i_gen) then + current_generator_(iproc) = i_gen + call build_fock_tmp(fock_diag_tmp_(1,1,iproc),psi_det_generators(1,1,i_gen),N_int) + end if + + haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) + + do l_sd=1,n_minilist + call i_h_j_s2(alpha,det_minilist(1,1,l_sd),N_int,hij, sij) + a_h_i(l_sd, iproc) = hij + a_s2_i(l_sd, iproc) = sij + do i=1,Nstates + a_h_psi(i) += hij * psi_coef(minilist(l_sd), i) + end do + end do + + + do i=1,Nstates + de = E0_denominator(i) - haa + if(DABS(de) < 1D-5) cycle + + c_alpha = a_h_psi(i) / de + + do l_sd=1,n_minilist + hdress = c_alpha * a_h_i(l_sd, iproc) + sdress = c_alpha * a_s2_i(l_sd, iproc) + delta_ij_loc(i, minilist(l_sd), 1) += hdress + delta_ij_loc(i, minilist(l_sd), 2) += sdress + end do + end do +end subroutine + + +BEGIN_PROVIDER [ logical, initialize_E0_denominator ] + implicit none + BEGIN_DOC + ! If true, initialize pt2_E0_denominator + END_DOC + initialize_E0_denominator = .True. +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, E0_denominator, (N_states) ] + implicit none + BEGIN_DOC + ! E0 in the denominator of the PT2 + END_DOC + if (initialize_E0_denominator) then + E0_denominator(1:N_states) = psi_energy(1:N_states) + ! call ezfio_get_full_ci_zmq_energy(pt2_E0_denominator(1)) + ! pt2_E0_denominator(1) -= nuclear_repulsion + ! pt2_E0_denominator(1:N_states) = HF_energy - nuclear_repulsion + ! pt2_E0_denominator(1:N_states) = barycentric_electronic_energy(1:N_states) + else + E0_denominator = -huge(1.d0) + endif +END_PROVIDER + + diff --git a/plugins/shiftedbk/shifted_bk_slave.irp.f b/plugins/shiftedbk/shifted_bk_slave.irp.f new file mode 100644 index 00000000..d7812b97 --- /dev/null +++ b/plugins/shiftedbk/shifted_bk_slave.irp.f @@ -0,0 +1,16 @@ + +program shifted_bk + implicit none + BEGIN_DOC +! TODO + END_DOC + + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order + + call diagonalize_CI() + call dress_slave() +end + From 2bf17db149467f22950ea3ecc1b58a9b4390db9d Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 22 Mar 2018 14:07:20 +0100 Subject: [PATCH 06/29] unfinished shifted_bk stochastic selection - no undressing --- plugins/dress_zmq/dress_general.irp.f | 15 ++++-- plugins/dress_zmq/dressing.irp.f | 28 ++++++++--- plugins/shiftedbk/shifted_bk_routines.irp.f | 52 +++++++++++++++++++-- 3 files changed, 79 insertions(+), 16 deletions(-) diff --git a/plugins/dress_zmq/dress_general.irp.f b/plugins/dress_zmq/dress_general.irp.f index 0bf7e715..e31f1742 100644 --- a/plugins/dress_zmq/dress_general.irp.f +++ b/plugins/dress_zmq/dress_general.irp.f @@ -12,11 +12,10 @@ subroutine run_dressing(N_st,energy) integer :: iteration integer :: n_it_dress_max - double precision :: thresh_dress + double precision :: thresh_dress, dummy thresh_dress = thresh_dressed_ci n_it_dress_max = n_it_max_dressed_ci - if(n_it_dress_max == 1) then do j=1,N_states do i=1,N_det @@ -32,14 +31,19 @@ subroutine run_dressing(N_st,energy) delta_E = 1.d0 iteration = 0 do while (delta_E > thresh_dress) + N_det_delta_ij = N_det + touch N_det_delta_ij iteration += 1 print *, '===============================================' print *, 'Iteration', iteration, '/', n_it_dress_max print *, '===============================================' print *, '' E_old = dress_e0_denominator(1) !sum(ci_energy_dressed(1:N_states)) + !print *, "DELTA IJ", delta_ij(1,1,1) + if(.true.) dummy = delta_ij_tmp(1,1,1) + if(.true.) call delta_ij_done() do i=1,N_st - call write_double(6,ci_energy_dressed(i),"Energy") + if(.true.) call write_double(6,ci_energy_dressed(i),"Energy") enddo call diagonalize_ci_dressed E_new = dress_e0_denominator(1) !sum(ci_energy_dressed(1:N_states)) @@ -55,8 +59,9 @@ subroutine run_dressing(N_st,energy) exit endif enddo - call write_double(6,ci_energy_dressed(1),"Final energy") + if(.true.) call write_double(6,ci_energy_dressed(1),"Final energy") endif - energy(1:N_st) = ci_energy_dressed(1:N_st) + + if(.true.) energy(1:N_st) = 0d0 ! ci_energy_dressed(1:N_st) end diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index 0c15ee0b..ce89415d 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -63,8 +63,18 @@ BEGIN_PROVIDER [ double precision, dress_norm_acc, (0:N_det, N_states) ] END_PROVIDER +BEGIN_PROVIDER [ integer , N_det_delta_ij ] + implicit none + !N_det_delta_ij = 0!N_det +END_PROVIDER -BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det,2) ] +BEGIN_PROVIDER [ double precision, delta_ij, (N_states, N_det, 2) ] + implicit none + if(.true.) delta_ij(:,:N_det_delta_ij, :) = delta_ij_tmp(:,:,:) + delta_ij(:,N_det_delta_ij+1:,:) = 0d0 +END_PROVIDER + +BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] use bitmasks implicit none @@ -72,11 +82,15 @@ BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det,2) ] double precision, allocatable :: dress(:), del(:,:), del_s2(:,:) double precision :: E_CI_before(N_states), relative_error -! double precision, save :: errr = 0d0 - allocate(dress(N_states), del(N_states, N_det), del_s2(N_states, N_det)) + ! prevents re-providing if delta_ij_tmp is + ! just being copied + if(N_det_delta_ij /= N_det) return + + if(.true.) then + allocate(dress(N_states), del(N_states, N_det_delta_ij), del_s2(N_states, N_det_delta_ij)) - delta_ij = 0d0 + delta_ij_tmp = 0d0 E_CI_before(:) = dress_E0_denominator(:) + nuclear_repulsion threshold_selectors = 1.d0 @@ -90,11 +104,11 @@ BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det,2) ] call write_double(6,relative_error,"Convergence of the stochastic algorithm") call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error)) - delta_ij(:,:,1) = del(:,:) - delta_ij(:,:,2) = del_s2(:,:) + delta_ij_tmp(:,:,1) = del(:,:) + delta_ij_tmp(:,:,2) = del_s2(:,:) deallocate(dress, del, del_s2) - + end if END_PROVIDER diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 99b0fa79..98faf2ec 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -1,17 +1,56 @@ - +use selection_types BEGIN_PROVIDER [ double precision, fock_diag_tmp_, (2,mo_tot_num+1,Nproc) ] &BEGIN_PROVIDER [ integer, current_generator_, (Nproc) ] &BEGIN_PROVIDER [ double precision, a_h_i, (N_det, Nproc) ] &BEGIN_PROVIDER [ double precision, a_s2_i, (N_det, Nproc) ] +&BEGIN_PROVIDER [ type(selection_buffer), sb, (Nproc) ] +&BEGIN_PROVIDER [ double precision, N_det_increase_factor ] implicit none + integer :: i + integer :: n_det_add + + N_det_increase_factor = 1d0 + current_generator_(:) = 0 + do i=1,Nproc + n_det_add = max(1, int(float(N_det) * N_det_increase_factor)) + call create_selection_buffer(n_det_add, n_det_add*2, sb(i)) + end do a_h_i = 0d0 a_s2_i = 0d0 END_PROVIDER +subroutine delta_ij_done() + implicit none + integer :: i, n_det_add + + call sort_selection_buffer(sb(1)) + + do i=2,Nproc + call sort_selection_buffer(sb(i)) + call merge_selection_buffers(sb(i), sb(1)) + end do + + call sort_selection_buffer(sb(1)) + + call fill_H_apply_buffer_no_selection(sb(1)%cur,sb(1)%det,N_int,0) + call copy_H_apply_buffer_to_wf() + if (s2_eig.or.(N_states > 1) ) then + call make_s2_eigenfunction + endif + !call save_wavefunction + n_det_add = max(1, int(float(N_det) * N_det_increase_factor)) + do i=1,Nproc + call delete_selection_buffer(sb(i)) + call create_selection_buffer(n_det_add, n_det_add*2, sb(i)) + end do + !delta_ij = 0d0 +end subroutine + + subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minilist, det_minilist, n_minilist, alpha, iproc) use bitmasks implicit none @@ -31,8 +70,8 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili double precision, external :: diag_H_mat_elem_fock integer :: i,j,k,l,m, l_sd double precision :: hdress, sdress - double precision :: de, a_h_psi(Nstates), c_alpha - + double precision :: de, a_h_psi(Nstates), c_alpha, contrib + a_h_psi = 0d0 @@ -52,13 +91,15 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili end do end do + contrib = 0d0 do i=1,Nstates de = E0_denominator(i) - haa if(DABS(de) < 1D-5) cycle c_alpha = a_h_psi(i) / de - + contrib = min(contrib, c_alpha * a_h_psi(i)) + do l_sd=1,n_minilist hdress = c_alpha * a_h_i(l_sd, iproc) sdress = c_alpha * a_s2_i(l_sd, iproc) @@ -66,6 +107,9 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili delta_ij_loc(i, minilist(l_sd), 2) += sdress end do end do + + call add_to_selection_buffer(sb(iproc), alpha, contrib) + end subroutine From 611137fad0c2449d2123065c84898c2b5e0f092e Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 23 Mar 2018 11:06:52 +0100 Subject: [PATCH 07/29] undressing without s2_eig --- plugins/shiftedbk/shifted_bk_routines.irp.f | 113 +++++++++++++++++--- 1 file changed, 96 insertions(+), 17 deletions(-) diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 98faf2ec..3bffcc0a 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -24,9 +24,13 @@ use selection_types subroutine delta_ij_done() + use bitmasks implicit none integer :: i, n_det_add + if(N_det /= N_det_delta_ij) stop "N_det /= N_det_delta_ij" + + call sort_selection_buffer(sb(1)) do i=2,Nproc @@ -36,10 +40,16 @@ subroutine delta_ij_done() call sort_selection_buffer(sb(1)) + call undress_with_alpha(sb(1)%det, sb(1)%cur) + call fill_H_apply_buffer_no_selection(sb(1)%cur,sb(1)%det,N_int,0) call copy_H_apply_buffer_to_wf() + if(N_det == N_det_delta_ij) stop "N_det == N_det_delta_ij" if (s2_eig.or.(N_states > 1) ) then - call make_s2_eigenfunction + print *, "***" + print *, "*** WARNING - SHIFTED_BK currently does not support s2_eig ***" + print *, "***" + !call make_s2_eigenfunction endif !call save_wavefunction n_det_add = max(1, int(float(N_det) * N_det_increase_factor)) @@ -47,13 +57,56 @@ subroutine delta_ij_done() call delete_selection_buffer(sb(i)) call create_selection_buffer(n_det_add, n_det_add*2, sb(i)) end do - !delta_ij = 0d0 end subroutine -subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minilist, det_minilist, n_minilist, alpha, iproc) - use bitmasks - implicit none +subroutine undress_with_alpha(alpha, n_alpha) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: alpha(N_int,2,n_alpha) + integer, intent(in) :: n_alpha + integer, allocatable :: minilist(:) + integer(bit_kind), allocatable :: det_minilist(:,:,:) + double precision, allocatable :: delta_ij_loc(:,:,:,:) + integer :: i, j, k, ex, n_minilist, iproc + double precision :: haa, contrib + integer, external :: omp_get_thread_num + allocate(minilist(N_det), det_minilist(N_int, 2, N_det), delta_ij_loc(N_states, N_det, 2, Nproc)) + delta_ij_loc = 0d0 + print *, "UNDRESSING..." + + !$OMP PARALLEL DO DEFAULT(SHARED) SCHEDULE(STATIC,1) PRIVATE(i, j, iproc, n_minilist, ex) & + !$OMP PRIVATE(det_minilist, minilist, haa, contrib) + do i=1, n_alpha + iproc = omp_get_thread_num()+1 + if(mod(i,10000) == 0) print *, "UNDRESSING", i, "/", n_alpha, iproc + n_minilist = 0 + do j=1, N_det + call get_excitation_degree(alpha(1,1,i), psi_det(1,1,j), ex, N_int) + if(ex <= 2) then + n_minilist += 1 + det_minilist(:,:,n_minilist) = psi_det(:,:,j) + minilist(n_minilist) = j + end if + end do + if(n_minilist > 0) then + call i_h_j(alpha(1,1,i), alpha(1,1,i), N_int, haa) + call dress_with_alpha_(N_states, N_det, N_int, delta_ij_loc(1,1,1,iproc), & + minilist, det_minilist, n_minilist, alpha(1,1,i), haa, contrib, iproc) + end if + end do + !$OMP END PARALLEL DO + + do i=Nproc,1,-1 + delta_ij_tmp(:,:,:) -= delta_ij_loc(:,:,:,i) + end do +end subroutine + + +subroutine dress_with_alpha_(Nstates,Ndet,Nint,delta_ij_loc,minilist, det_minilist, n_minilist, alpha, haa, contrib, iproc) + use bitmasks + implicit none BEGIN_DOC !delta_ij_loc(:,:,1) : dressing column for H !delta_ij_loc(:,:,2) : dressing column for S2 @@ -62,25 +115,20 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili !n_minilist : size of minilist !alpha : alpha determinant END_DOC - integer, intent(in) :: Nint, Ndet, Nstates, n_minilist, iproc, i_gen + integer, intent(in) :: Nint, Ndet, Nstates, n_minilist, iproc integer(bit_kind), intent(in) :: alpha(Nint,2), det_minilist(Nint, 2, n_minilist) integer,intent(in) :: minilist(n_minilist) double precision, intent(inout) :: delta_ij_loc(Nstates,N_det,2) - double precision :: haa, hij, sij - double precision, external :: diag_H_mat_elem_fock + double precision, intent(out) :: contrib + double precision, intent(in) :: haa + double precision :: hij, sij integer :: i,j,k,l,m, l_sd double precision :: hdress, sdress - double precision :: de, a_h_psi(Nstates), c_alpha, contrib - - + double precision :: de, a_h_psi(Nstates), c_alpha + + contrib = 0d0 a_h_psi = 0d0 - if(current_generator_(iproc) /= i_gen) then - current_generator_(iproc) = i_gen - call build_fock_tmp(fock_diag_tmp_(1,1,iproc),psi_det_generators(1,1,i_gen),N_int) - end if - - haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) do l_sd=1,n_minilist call i_h_j_s2(alpha,det_minilist(1,1,l_sd),N_int,hij, sij) @@ -107,7 +155,38 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili delta_ij_loc(i, minilist(l_sd), 2) += sdress end do end do +end subroutine + +subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minilist, det_minilist, n_minilist, alpha, iproc) + use bitmasks + implicit none + BEGIN_DOC + !delta_ij_loc(:,:,1) : dressing column for H + !delta_ij_loc(:,:,2) : dressing column for S2 + !i_gen : generator index in psi_det_generators + !minilist : indices of determinants connected to alpha ( in psi_det_sorted ) + !n_minilist : size of minilist + !alpha : alpha determinant + END_DOC + integer, intent(in) :: Nint, Ndet, Nstates, n_minilist, iproc, i_gen + integer(bit_kind), intent(in) :: alpha(Nint,2), det_minilist(Nint, 2, n_minilist) + integer,intent(in) :: minilist(n_minilist) + double precision, intent(inout) :: delta_ij_loc(Nstates,N_det,2) + double precision, external :: diag_H_mat_elem_fock + double precision :: haa, contrib + + + + if(current_generator_(iproc) /= i_gen) then + current_generator_(iproc) = i_gen + call build_fock_tmp(fock_diag_tmp_(1,1,iproc),psi_det_generators(1,1,i_gen),N_int) + end if + + haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) + + call dress_with_alpha_(Nstates, Ndet, Nint, delta_ij_loc, minilist, det_minilist, n_minilist, alpha, haa, contrib, iproc) + call add_to_selection_buffer(sb(iproc), alpha, contrib) end subroutine From a865a842d20e403e212c5b955501b661b1dd0796 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 26 Mar 2018 13:08:26 +0200 Subject: [PATCH 08/29] undressing with s2_eig --- plugins/dress_zmq/NEEDED_CHILDREN_MODULES | 2 +- plugins/dress_zmq/alpha_factory.irp.f | 3 +- plugins/dress_zmq/dressing.irp.f | 2 +- plugins/shiftedbk/NEEDED_CHILDREN_MODULES | 2 +- plugins/shiftedbk/shifted_bk_routines.irp.f | 88 +++++++++++++-------- 5 files changed, 57 insertions(+), 40 deletions(-) diff --git a/plugins/dress_zmq/NEEDED_CHILDREN_MODULES b/plugins/dress_zmq/NEEDED_CHILDREN_MODULES index 55f8f738..96b2cfdc 100644 --- a/plugins/dress_zmq/NEEDED_CHILDREN_MODULES +++ b/plugins/dress_zmq/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -Selectors_full Generators_full ZMQ +ZMQ diff --git a/plugins/dress_zmq/alpha_factory.irp.f b/plugins/dress_zmq/alpha_factory.irp.f index 190a94ad..ccbf177a 100644 --- a/plugins/dress_zmq/alpha_factory.irp.f +++ b/plugins/dress_zmq/alpha_factory.irp.f @@ -84,8 +84,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index !hole (k,1) = iand(psi_det_generators(k,1,i_generator), full_ijkl_bitmask(k)) !hole (k,2) = iand(psi_det_generators(k,2,i_generator), full_ijkl_bitmask(k)) !particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), full_ijkl_bitmask(k)) - !particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), full_ijkl_bitmask(k)) - + !particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), full_ijkl_bitmask(k)) enddo integer :: N_holes(2), N_particles(2) diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index ce89415d..9f4ede26 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -100,7 +100,7 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] ! else ! errr = 1d-4 ! end if - relative_error = 1.d-4 + relative_error = 1.d-5 call write_double(6,relative_error,"Convergence of the stochastic algorithm") call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error)) diff --git a/plugins/shiftedbk/NEEDED_CHILDREN_MODULES b/plugins/shiftedbk/NEEDED_CHILDREN_MODULES index c3290687..4f09bfc8 100644 --- a/plugins/shiftedbk/NEEDED_CHILDREN_MODULES +++ b/plugins/shiftedbk/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -dress_zmq DavidsonDressed +dress_zmq DavidsonDressed Selectors_full Generators_full diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 3bffcc0a..7e32568c 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -9,12 +9,12 @@ use selection_types implicit none integer :: i integer :: n_det_add - + N_det_increase_factor = 1d0 current_generator_(:) = 0 + n_det_add = max(1, int(float(N_det) * N_det_increase_factor)) do i=1,Nproc - n_det_add = max(1, int(float(N_det) * N_det_increase_factor)) call create_selection_buffer(n_det_add, n_det_add*2, sb(i)) end do a_h_i = 0d0 @@ -26,11 +26,13 @@ use selection_types subroutine delta_ij_done() use bitmasks implicit none - integer :: i, n_det_add + integer :: i, n_det_add, old_det_gen + integer(bit_kind), allocatable :: old_generators(:,:,:) - if(N_det /= N_det_delta_ij) stop "N_det /= N_det_delta_ij" - - + allocate(old_generators(N_int, 2, N_det_generators)) + old_generators(:,:,:) = psi_det_generators(:,:,:N_det_generators) + old_det_gen = N_det_generators + call sort_selection_buffer(sb(1)) do i=2,Nproc @@ -39,28 +41,19 @@ subroutine delta_ij_done() end do call sort_selection_buffer(sb(1)) - - call undress_with_alpha(sb(1)%det, sb(1)%cur) call fill_H_apply_buffer_no_selection(sb(1)%cur,sb(1)%det,N_int,0) call copy_H_apply_buffer_to_wf() - if(N_det == N_det_delta_ij) stop "N_det == N_det_delta_ij" + if (s2_eig.or.(N_states > 1) ) then - print *, "***" - print *, "*** WARNING - SHIFTED_BK currently does not support s2_eig ***" - print *, "***" - !call make_s2_eigenfunction + call make_s2_eigenfunction endif - !call save_wavefunction - n_det_add = max(1, int(float(N_det) * N_det_increase_factor)) - do i=1,Nproc - call delete_selection_buffer(sb(i)) - call create_selection_buffer(n_det_add, n_det_add*2, sb(i)) - end do + call undress_with_alpha(old_generators, old_det_gen, psi_det(1,1,N_det_delta_ij+1), N_det-N_det_delta_ij) + call save_wavefunction end subroutine -subroutine undress_with_alpha(alpha, n_alpha) +subroutine undress_with_alpha(old_generators, old_det_gen, alpha, n_alpha) use bitmasks implicit none @@ -69,20 +62,46 @@ subroutine undress_with_alpha(alpha, n_alpha) integer, allocatable :: minilist(:) integer(bit_kind), allocatable :: det_minilist(:,:,:) double precision, allocatable :: delta_ij_loc(:,:,:,:) - integer :: i, j, k, ex, n_minilist, iproc - double precision :: haa, contrib + integer :: exc(0:2,2,2), h1, h2, p1, p2, s1, s2 + integer :: i, j, k, ex, n_minilist, iproc, degree + double precision :: haa, contrib, phase + logical :: ok integer, external :: omp_get_thread_num - allocate(minilist(N_det), det_minilist(N_int, 2, N_det), delta_ij_loc(N_states, N_det, 2, Nproc)) + + integer,intent(in) :: old_det_gen + integer(bit_kind), intent(in) :: old_generators(N_int, 2, old_det_gen) + + allocate(minilist(N_det_delta_ij), det_minilist(N_int, 2, N_det_delta_ij), delta_ij_loc(N_states, N_det_delta_ij, 2, Nproc)) + delta_ij_loc = 0d0 - print *, "UNDRESSING..." - + !$OMP PARALLEL DO DEFAULT(SHARED) SCHEDULE(STATIC,1) PRIVATE(i, j, iproc, n_minilist, ex) & - !$OMP PRIVATE(det_minilist, minilist, haa, contrib) - do i=1, n_alpha + !$OMP PRIVATE(det_minilist, minilist, haa, contrib) & + !$OMP PRIVATE(exc, h1, h2, p1, p2, s1, s2, phase, degree, ok) + do i=n_alpha,1,-1 iproc = omp_get_thread_num()+1 if(mod(i,10000) == 0) print *, "UNDRESSING", i, "/", n_alpha, iproc n_minilist = 0 - do j=1, N_det + ok = .false. + + do j=1, old_det_gen + call get_excitation_degree(alpha(1,1,i), old_generators(1,1,j), ex, N_int) + if(ex <= 2) then + call get_excitation(old_generators(1,1,j), alpha(1,1,i), exc,degree,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + ok = (mo_class(h1)(1:1) == 'A' .or. mo_class(h1)(1:1) == 'I') .and. & + (mo_class(p1)(1:1) == 'A' .or. mo_class(p1)(1:1) == 'V') + if(ok .and. degree == 2) then + ok = (mo_class(h2)(1:1) == 'A' .or. mo_class(h2)(1:1) == 'I') .and. & + (mo_class(p2)(1:1) == 'A' .or. mo_class(p2)(1:1) == 'V') + end if + if(ok) exit + end if + end do + + if(.not. ok) cycle + + do j=1, N_det_delta_ij call get_excitation_degree(alpha(1,1,i), psi_det(1,1,j), ex, N_int) if(ex <= 2) then n_minilist += 1 @@ -90,16 +109,15 @@ subroutine undress_with_alpha(alpha, n_alpha) minilist(n_minilist) = j end if end do - if(n_minilist > 0) then - call i_h_j(alpha(1,1,i), alpha(1,1,i), N_int, haa) - call dress_with_alpha_(N_states, N_det, N_int, delta_ij_loc(1,1,1,iproc), & - minilist, det_minilist, n_minilist, alpha(1,1,i), haa, contrib, iproc) - end if + call i_h_j(alpha(1,1,i), alpha(1,1,i), N_int, haa) + call dress_with_alpha_(N_states, N_det_delta_ij, N_int, delta_ij_loc(1,1,1,iproc), & + minilist, det_minilist, n_minilist, alpha(1,1,i), haa, contrib, iproc) end do !$OMP END PARALLEL DO - do i=Nproc,1,-1 + do i=1,Nproc delta_ij_tmp(:,:,:) -= delta_ij_loc(:,:,:,i) + !print *, "DELTA_IJ_LOC", delta_ij_loc(:,2:5,2,i) end do end subroutine @@ -118,7 +136,7 @@ subroutine dress_with_alpha_(Nstates,Ndet,Nint,delta_ij_loc,minilist, det_minili integer, intent(in) :: Nint, Ndet, Nstates, n_minilist, iproc integer(bit_kind), intent(in) :: alpha(Nint,2), det_minilist(Nint, 2, n_minilist) integer,intent(in) :: minilist(n_minilist) - double precision, intent(inout) :: delta_ij_loc(Nstates,N_det,2) + double precision, intent(inout) :: delta_ij_loc(Nstates,Ndet,2) double precision, intent(out) :: contrib double precision, intent(in) :: haa double precision :: hij, sij From 4394c04728e929d4c98bc7b82d0ca8814ebe2e60 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 27 Mar 2018 16:22:14 +0200 Subject: [PATCH 09/29] reduced bandwidth and checkpoint updates --- plugins/dress_zmq/dress_slave.irp.f | 3 +- plugins/dress_zmq/dress_stoch_routines.irp.f | 170 +++++++++++++------ plugins/dress_zmq/dressing.irp.f | 1 + plugins/dress_zmq/run_dress_slave.irp.f | 40 ++++- plugins/shiftedbk/shifted_bk_routines.irp.f | 9 +- 5 files changed, 154 insertions(+), 69 deletions(-) diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index 57fce783..feea575e 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -43,14 +43,13 @@ subroutine run_wf ! Selection ! --------- - print *, 'dress' call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order - + !$OMP PARALLEL PRIVATE(i) i = omp_get_thread_num() call dress_slave_tcp(i+1, energy) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 06b5d538..3ccdc9f7 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -134,6 +134,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, implicit none + integer, parameter :: delta_loc_N = 2 integer(ZMQ_PTR), intent(in) :: zmq_socket_pull integer, intent(in) :: istate @@ -144,7 +145,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, double precision, intent(out) :: delta(N_states, N_det) double precision, intent(out) :: delta_s2(N_states, N_det) - double precision, allocatable :: delta_loc(:,:,:), delta_det(:,:,:,:) + double precision, allocatable :: delta_loc(:,:,:,:), delta_det(:,:,:,:) double precision, allocatable :: dress_detail(:,:) double precision :: dress_mwen(N_states) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket @@ -154,7 +155,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, integer :: more integer :: i, j, k, i_state, N - integer :: task_id, ind + integer :: task_id, ind, inds(delta_loc_N) double precision, save :: time0 = -1.d0 double precision :: time, timeLast, old_tooth double precision, external :: omp_get_wtime @@ -162,12 +163,17 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, integer, allocatable :: parts_to_get(:) logical, allocatable :: actually_computed(:) integer :: total_computed - + integer :: delta_loc_cur + double precision :: fac(delta_loc_N) , wei(delta_loc_N) + logical :: ok + + delta_loc_cur = 1 + delta = 0d0 delta_s2 = 0d0 allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2)) allocate(cp(N_states, N_det, N_cp, 2), dress_detail(N_states, N_det)) - allocate(delta_loc(N_states, N_det, 2)) + allocate(delta_loc(N_states, N_det, 2, delta_loc_N)) dress_detail = 0d0 delta_det = 0d0 cp = 0d0 @@ -196,58 +202,102 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, cur_cp = 0 old_cur_cp = 0 logical :: loop + integer :: felem, felem_loc loop = .true. - + felem = N_det+1 pullLoop : do while (loop) - call pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id) - dress_mwen(:) = 0d0 - - !!!!! A VERIFIER !!!!! - do i_state=1,N_states - do i=1, N_det - dress_mwen(i_state) += delta_loc(i_state, i, 1) * psi_coef(i, i_state) - end do - end do - - dress_detail(:, ind) += dress_mwen(:) - do j=1,N_cp !! optimizable - if(cps(ind, j) > 0d0) then - if(tooth_of_det(ind) < cp_first_tooth(j)) stop "coef on supposedely deterministic det" - double precision :: fac - integer :: toothMwen - logical :: fracted - fac = cps(ind, j) / cps_N(j) * dress_weight_inv(ind) * comb_step - cp(1:N_states,1:N_det,j,1) += delta_loc(1:N_states,1:N_det,1) * fac - cp(1:N_states,1:N_det,j,2) += delta_loc(1:N_states,1:N_det,2) * fac - end if - end do - toothMwen = tooth_of_det(ind) - fracted = (toothMwen /= 0) - if(fracted) fracted = (ind == first_det_of_teeth(toothMwen)) - - if(fracted) then - delta_det(1:N_states,1:N_det,toothMwen-1, 1) = delta_det(1:N_states,1:N_det,toothMwen-1, 1) + delta_loc(1:N_states,1:N_det,1) * (1d0-fractage(toothMwen)) - delta_det(1:N_states,1:N_det,toothMwen-1, 2) = delta_det(1:N_states,1:N_det,toothMwen-1, 2) + delta_loc(1:N_states,1:N_det,2) * (1d0-fractage(toothMwen)) - delta_det(1:N_states,1:N_det,toothMwen , 1) = delta_det(1:N_states,1:N_det,toothMwen , 1) + delta_loc(1:N_states,1:N_det,1) * (fractage(toothMwen)) - delta_det(1:N_states,1:N_det,toothMwen , 2) = delta_det(1:N_states,1:N_det,toothMwen , 2) + delta_loc(1:N_states,1:N_det,2) * (fractage(toothMwen)) - else - delta_det(1:N_states,1:N_det,toothMwen , 1) = delta_det(1:N_states,1:N_det,toothMwen , 1) + delta_loc(1:N_states,1:N_det,1) - delta_det(1:N_states,1:N_det,toothMwen , 2) = delta_det(1:N_states,1:N_det,toothMwen , 2) + delta_loc(1:N_states,1:N_det,2) - end if - - parts_to_get(ind) -= 1 - if(parts_to_get(ind) == 0) then - actually_computed(ind) = .true. - total_computed += 1 - end if - - + call pull_dress_results(zmq_socket_pull, ind, delta_loc(1,1,1,delta_loc_cur), task_id, felem_loc) + felem = min(felem_loc, felem) + dress_mwen(:) = 0d0 + integer, external :: zmq_delete_tasks + if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then stop 'Unable to delete tasks' endif if(more == 0) loop = .false. + + do i_state=1,N_states + do i=1, N_det + dress_mwen(i_state) += delta_loc(i_state, i, 1, delta_loc_cur) * psi_coef(i, i_state) + end do + end do + + dress_detail(:, ind) += dress_mwen(:) + wei(delta_loc_cur) = dress_weight_inv(ind) + inds(delta_loc_cur) = ind + + if(delta_loc_cur == delta_loc_N .or. .not. loop) then + do j=1,N_cp !! optimizable + fac = 0d0 + ok = .false. + + do i=1,delta_loc_cur + !fac(i) = cps(inds(i), j) / cps_N(j) * wei(i) * comb_step + fac(i) = cps(inds(i), j) * wei(i) * comb_step + if(fac(i) /= 0d0) ok = .true. + end do + + if(ok) then + do i=felem,N_det + cp(:,i,j,1) += delta_loc(:,i,1,1) * fac(1) & + + delta_loc(:,i,1,2) * fac(2) + !+ delta_loc(:,i,1,3) * fac(3) & + !+ delta_loc(:,i,1,4) * fac(4) & + !+ delta_loc(:,i,1,5) * fac(5) & + !+ delta_loc(:,i,1,6) * fac(6) & + !+ delta_loc(:,i,1,7) * fac(7) & + !+ delta_loc(:,i,1,8) * fac(8) + + cp(:,i,j,1) += delta_loc(:,i,2,1) * fac(1) & + + delta_loc(:,i,2,2) * fac(2) + !+ delta_loc(:,i,2,3) * fac(3) & + !+ delta_loc(:,i,2,4) * fac(4) & + !+ delta_loc(:,i,2,5) * fac(5) & + !+ delta_loc(:,i,2,6) * fac(6) & + !+ delta_loc(:,i,2,7) * fac(7) & + !+ delta_loc(:,i,2,8) * fac(8) + end do + !cp(1:N_states,indi:N_det,j,1) += delta_loc(1:N_states,indi:N_det,1) * fac + !cp(1:N_states,indi:N_det,j,2) += delta_loc(1:N_states,indi:N_det,2) * fac + end if + end do + + do i=1,delta_loc_cur + logical :: fracted + integer :: toothMwen + ind = inds(i) + + toothMwen = tooth_of_det(ind) + fracted = (toothMwen /= 0) + if(fracted) fracted = (ind == first_det_of_teeth(toothMwen)) + + if(fracted) then + delta_det(1:N_states,felem:N_det,toothMwen-1, 1) = delta_det(1:N_states,felem:N_det,toothMwen-1, 1) + delta_loc(felem:N_states,1:N_det,1,i) * (1d0-fractage(toothMwen)) + delta_det(1:N_states,felem:N_det,toothMwen-1, 2) = delta_det(1:N_states,felem:N_det,toothMwen-1, 2) + delta_loc(felem:N_states,1:N_det,2,i) * (1d0-fractage(toothMwen)) + delta_det(1:N_states,felem:N_det,toothMwen , 1) = delta_det(1:N_states,felem:N_det,toothMwen , 1) + delta_loc(felem:N_states,1:N_det,1,i) * (fractage(toothMwen)) + delta_det(1:N_states,felem:N_det,toothMwen , 2) = delta_det(1:N_states,felem:N_det,toothMwen , 2) + delta_loc(felem:N_states,1:N_det,2,i) * (fractage(toothMwen)) + else + delta_det(1:N_states,felem:N_det,toothMwen , 1) = delta_det(1:N_states,felem:N_det,toothMwen , 1) + delta_loc(1:N_states,felem:N_det,1,i) + delta_det(1:N_states,felem:N_det,toothMwen , 2) = delta_det(1:N_states,felem:N_det,toothMwen , 2) + delta_loc(1:N_states,felem:N_det,2,i) + end if + + parts_to_get(ind) -= 1 + if(parts_to_get(ind) == 0) then + actually_computed(ind) = .true. + total_computed += 1 + end if + end do + felem = N_det+1 + delta_loc_cur = 1 + else + delta_loc_cur += 1 + cycle + end if + + + time = omp_get_wtime() if((time - timeLast > 2d0) .or. (.not. loop)) then @@ -303,18 +353,22 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, endif end if end do pullLoop + + delta (1:N_states,1:N_det) = 0d0 + delta_s2(1:N_states,1:N_det) = 0d0 if(total_computed == N_det_generators) then - delta (1:N_states,1:N_det) = 0d0 - delta_s2(1:N_states,1:N_det) = 0d0 do i=comb_teeth+1,0,-1 delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1) delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2) end do else - - delta (1:N_states,1:N_det) = cp(1:N_states,1:N_det,cur_cp,1) - delta_s2(1:N_states,1:N_det) = cp(1:N_states,1:N_det,cur_cp,2) + do i=1,cur_cp + delta (1:N_states,1:N_det) += cp(1:N_states,1:N_det,i,1) + delta_s2(1:N_states,1:N_det) += cp(1:N_states,1:N_det,i,2) + end do + delta (1:N_states,1:N_det) = delta(1:N_states,1:N_det) / cps_N(cur_cp) + delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) / cps_N(cur_cp) do i=cp_first_tooth(cur_cp)-1,0,-1 delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1) delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2) @@ -363,7 +417,7 @@ end function ! gen_per_cp : number of generators per checkpoint END_DOC comb_teeth = 64 - N_cps_max = 64 + N_cps_max = 32 gen_per_cp = (N_det_generators / N_cps_max) + 1 END_PROVIDER @@ -455,13 +509,19 @@ END_PROVIDER end if end do end do - cps(:, N_cp) = 0d0 cp_first_tooth(N_cp) = comb_teeth+1 iorder = -1 do i=1,N_cp-1 call isort(dress_jobs(first_cp(i)+1:first_cp(i+1)),iorder,first_cp(i+1)-first_cp(i)) end do + + do i=1,N_det_generators + do j=N_cp,2,-1 + cps(i,j) -= cps(i,j-1) + end do + end do + cps(:, N_cp) = 0d0 END_PROVIDER diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index 9f4ede26..3a55f7b7 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -101,6 +101,7 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] ! errr = 1d-4 ! end if relative_error = 1.d-5 + call write_double(6,relative_error,"Convergence of the stochastic algorithm") call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error)) diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 1dc3176e..add0091e 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -74,15 +74,30 @@ subroutine push_dress_results(zmq_socket_push, ind, delta_loc, task_id) integer(ZMQ_PTR), intent(in) :: zmq_socket_push double precision, intent(in) :: delta_loc(N_states, N_det, 2) integer, intent(in) :: ind, task_id - integer :: rc, i - + integer :: rc, i, j, felem + + felem = 1 + + dloop : do i=1, N_det + do j=1,N_states + if(delta_loc(j,i,1) /= 0d0 .or. delta_loc(j,i,2) /= 0d0) then + felem = i + exit dloop + end if + end do + end do dloop rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" + + rc = f77_zmq_send( zmq_socket_push, felem, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" + + rc = f77_zmq_send( zmq_socket_push, delta_loc(1,felem,1), 8*N_states*(N_det-felem+1), ZMQ_SNDMORE) + if(rc /= 8*N_states*(N_det+1-felem)) stop "push" - - rc = f77_zmq_send( zmq_socket_push, delta_loc, 8*N_states*N_det*2, ZMQ_SNDMORE) - if(rc /= 8*N_states*N_det*2) stop "push" + rc = f77_zmq_send( zmq_socket_push, delta_loc(1,felem,2), 8*N_states*(N_det-felem+1), ZMQ_SNDMORE) + if(rc /= 8*N_states*(N_det+1-felem)) stop "push" rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) if(rc /= 4) stop "push" @@ -97,11 +112,12 @@ IRP_ENDIF end subroutine -subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id) +subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id, felem) use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_pull double precision, intent(inout) :: delta_loc(N_states, N_det, 2) + integer, intent(out) :: felem integer, intent(out) :: ind integer, intent(out) :: task_id integer :: rc, i @@ -110,8 +126,16 @@ subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id) rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) if(rc /= 4) stop "pull" - rc = f77_zmq_recv( zmq_socket_pull, delta_loc, N_states*8*N_det*2, 0) - if(rc /= 8*N_states*N_det*2) stop "pull" + rc = f77_zmq_recv( zmq_socket_pull, felem, 4, 0) + if(rc /= 4) stop "pull" + + delta_loc(:,:felem,:) = 0d0 + + rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,felem,1), N_states*8*(N_det+1-felem), 0) + if(rc /= 8*N_states*(N_det+1-felem)) stop "pull" + + rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,felem,2), N_states*8*(N_det+1-felem), 0) + if(rc /= 8*N_states*(N_det+1-felem)) stop "pull" rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) if(rc /= 4) stop "pull" diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 7e32568c..2aeff1e0 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -75,7 +75,7 @@ subroutine undress_with_alpha(old_generators, old_det_gen, alpha, n_alpha) delta_ij_loc = 0d0 - !$OMP PARALLEL DO DEFAULT(SHARED) SCHEDULE(STATIC,1) PRIVATE(i, j, iproc, n_minilist, ex) & + !$OMP PARALLEL DO DEFAULT(SHARED) SCHEDULE(STATIC) PRIVATE(i, j, iproc, n_minilist, ex) & !$OMP PRIVATE(det_minilist, minilist, haa, contrib) & !$OMP PRIVATE(exc, h1, h2, p1, p2, s1, s2, phase, degree, ok) do i=n_alpha,1,-1 @@ -115,10 +115,11 @@ subroutine undress_with_alpha(old_generators, old_det_gen, alpha, n_alpha) end do !$OMP END PARALLEL DO - do i=1,Nproc - delta_ij_tmp(:,:,:) -= delta_ij_loc(:,:,:,i) - !print *, "DELTA_IJ_LOC", delta_ij_loc(:,2:5,2,i) + do i=2,Nproc + delta_ij_loc(:,:,:,1) += delta_ij_loc(:,:,:,i) end do + + delta_ij_tmp(:,:,:) -= delta_ij_loc(:,:,:,1) end subroutine From de1b5d6874c54b3b66f70e453a7d86ff8bff9df4 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 28 Mar 2018 16:23:30 +0200 Subject: [PATCH 10/29] further small improvements --- plugins/dress_zmq/dress_stoch_routines.irp.f | 112 +++++++++++++------ plugins/dress_zmq/dressing.irp.f | 2 +- 2 files changed, 76 insertions(+), 38 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 3ccdc9f7..4941b6fa 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -98,7 +98,7 @@ subroutine ZMQ_dress(E, dress, delta, delta_s2, relative_error) print *, irp_here, ': Failed in zmq_set_running' endif - !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc+1) & + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc) & !$OMP PRIVATE(i) i = omp_get_thread_num() if (i==0) then @@ -163,7 +163,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, integer, allocatable :: parts_to_get(:) logical, allocatable :: actually_computed(:) integer :: total_computed - integer :: delta_loc_cur + integer :: delta_loc_cur, is double precision :: fac(delta_loc_N) , wei(delta_loc_N) logical :: ok @@ -219,7 +219,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, do i_state=1,N_states - do i=1, N_det + do i=felem_loc, N_det dress_mwen(i_state) += delta_loc(i_state, i, 1, delta_loc_cur) * psi_coef(i, i_state) end do end do @@ -232,35 +232,36 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, do j=1,N_cp !! optimizable fac = 0d0 ok = .false. - do i=1,delta_loc_cur !fac(i) = cps(inds(i), j) / cps_N(j) * wei(i) * comb_step fac(i) = cps(inds(i), j) * wei(i) * comb_step - if(fac(i) /= 0d0) ok = .true. + if(fac(i) /= 0d0) then + ok = .true. + end if end do - + if(ok) then - do i=felem,N_det - cp(:,i,j,1) += delta_loc(:,i,1,1) * fac(1) & - + delta_loc(:,i,1,2) * fac(2) - !+ delta_loc(:,i,1,3) * fac(3) & - !+ delta_loc(:,i,1,4) * fac(4) & + do i=felem,N_det_generators + do is=1,N_states + cp(is,i,j,1) += delta_loc(is,i,1,1) * fac(1) & + + delta_loc(is,i,1,2) * fac(2) + !+ delta_loc(is,i,1,3) * fac(3) & + !+ delta_loc(is,i,1,4) * fac(4) & !+ delta_loc(:,i,1,5) * fac(5) & !+ delta_loc(:,i,1,6) * fac(6) & !+ delta_loc(:,i,1,7) * fac(7) & !+ delta_loc(:,i,1,8) * fac(8) - cp(:,i,j,1) += delta_loc(:,i,2,1) * fac(1) & - + delta_loc(:,i,2,2) * fac(2) - !+ delta_loc(:,i,2,3) * fac(3) & - !+ delta_loc(:,i,2,4) * fac(4) & + cp(is,i,j,2) += delta_loc(is,i,2,1) * fac(1) & + + delta_loc(is,i,2,2) * fac(2) + !+ delta_loc(is,i,2,3) * fac(3) & + !+ delta_loc(is,i,2,4) * fac(4) & !+ delta_loc(:,i,2,5) * fac(5) & !+ delta_loc(:,i,2,6) * fac(6) & !+ delta_loc(:,i,2,7) * fac(7) & !+ delta_loc(:,i,2,8) * fac(8) end do - !cp(1:N_states,indi:N_det,j,1) += delta_loc(1:N_states,indi:N_det,1) * fac - !cp(1:N_states,indi:N_det,j,2) += delta_loc(1:N_states,indi:N_det,2) * fac + end do end if end do @@ -274,10 +275,10 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, if(fracted) fracted = (ind == first_det_of_teeth(toothMwen)) if(fracted) then - delta_det(1:N_states,felem:N_det,toothMwen-1, 1) = delta_det(1:N_states,felem:N_det,toothMwen-1, 1) + delta_loc(felem:N_states,1:N_det,1,i) * (1d0-fractage(toothMwen)) - delta_det(1:N_states,felem:N_det,toothMwen-1, 2) = delta_det(1:N_states,felem:N_det,toothMwen-1, 2) + delta_loc(felem:N_states,1:N_det,2,i) * (1d0-fractage(toothMwen)) - delta_det(1:N_states,felem:N_det,toothMwen , 1) = delta_det(1:N_states,felem:N_det,toothMwen , 1) + delta_loc(felem:N_states,1:N_det,1,i) * (fractage(toothMwen)) - delta_det(1:N_states,felem:N_det,toothMwen , 2) = delta_det(1:N_states,felem:N_det,toothMwen , 2) + delta_loc(felem:N_states,1:N_det,2,i) * (fractage(toothMwen)) + delta_det(1:N_states,felem:N_det,toothMwen-1, 1) = delta_det(1:N_states,felem:N_det,toothMwen-1, 1) + delta_loc(1:N_states,felem:N_det,1,i) * (1d0-fractage(toothMwen)) + delta_det(1:N_states,felem:N_det,toothMwen-1, 2) = delta_det(1:N_states,felem:N_det,toothMwen-1, 2) + delta_loc(1:N_states,felem:N_det,2,i) * (1d0-fractage(toothMwen)) + delta_det(1:N_states,felem:N_det,toothMwen , 1) = delta_det(1:N_states,felem:N_det,toothMwen , 1) + delta_loc(1:N_states,felem:N_det,1,i) * (fractage(toothMwen)) + delta_det(1:N_states,felem:N_det,toothMwen , 2) = delta_det(1:N_states,felem:N_det,toothMwen , 2) + delta_loc(1:N_states,felem:N_det,2,i) * (fractage(toothMwen)) else delta_det(1:N_states,felem:N_det,toothMwen , 1) = delta_det(1:N_states,felem:N_det,toothMwen , 1) + delta_loc(1:N_states,felem:N_det,1,i) delta_det(1:N_states,felem:N_det,toothMwen , 2) = delta_det(1:N_states,felem:N_det,toothMwen , 2) + delta_loc(1:N_states,felem:N_det,2,i) @@ -289,7 +290,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, total_computed += 1 end if end do - felem = N_det+1 + felem = N_det_generators+1 delta_loc_cur = 1 else delta_loc_cur += 1 @@ -431,18 +432,22 @@ END_PROVIDER &BEGIN_PROVIDER [ integer, dress_jobs, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, comb, (N_det_generators) ] implicit none - logical, allocatable :: computed(:) + logical, allocatable :: computed(:), comp_filler(:) integer :: i, j, last_full, dets(comb_teeth) integer :: k, l, cur_cp, under_det(comb_teeth+1) integer, allocatable :: iorder(:), first_cp(:) + integer, allocatable :: filler(:) + integer :: nfiller, lfiller, cfiller + allocate(filler(n_det_generators)) allocate(iorder(N_det_generators), first_cp(N_cps_max+1)) allocate(computed(N_det_generators)) + allocate(comp_filler(N_det_generators)) first_cp = 1 cps = 0d0 cur_cp = 1 done_cp_at = 0 - + comp_filler = .false. computed = .false. N_dress_jobs = first_det_of_comb - 1 @@ -453,6 +458,8 @@ END_PROVIDER l=first_det_of_comb call RANDOM_NUMBER(comb) + lfiller = 1 + nfiller = 1 do i=1,N_det_generators comb(i) = comb(i) * comb_step !DIR$ FORCEINLINE @@ -469,15 +476,45 @@ END_PROVIDER if (N_dress_jobs == N_det_generators) exit end if - do while (computed(l)) - l=l+1 - enddo - k=N_dress_jobs+1 - dress_jobs(k) = l - computed(l) = .True. - N_dress_jobs = k + + do l=1,N_det_generators + if((.not. computed(l)) .and. (.not. comp_filler(l))) exit + end do + + if(l > N_det_generators) exit + + cfiller = tooth_of_det(l) + if(cfiller > lfiller) then + do j=1,nfiller-1 + if(.not. computed(filler(j))) then + k=N_dress_jobs+1 + dress_jobs(k) = filler(j) + N_dress_jobs = k + end if + computed(filler(j)) = .true. + end do + nfiller = 2 + filler(1) = l + lfiller = cfiller + else + filler(nfiller) = l + nfiller += 1 + end if + comp_filler(l) = .True. enddo + + + do j=1,nfiller-1 + if(.not. computed(filler(j)))then + k=N_dress_jobs+1 + dress_jobs(k) = filler(j) + N_dress_jobs = k + end if + computed(filler(j)) = .true. + end do + N_cp = cur_cp + if(N_dress_jobs /= N_det_generators .or. N_cp > N_cps_max) then print *, N_dress_jobs, N_det_generators, N_cp, N_cps_max stop "error in jobs creation" @@ -510,18 +547,19 @@ END_PROVIDER end do end do cp_first_tooth(N_cp) = comb_teeth+1 - - iorder = -1 - do i=1,N_cp-1 - call isort(dress_jobs(first_cp(i)+1:first_cp(i+1)),iorder,first_cp(i+1)-first_cp(i)) - end do do i=1,N_det_generators do j=N_cp,2,-1 cps(i,j) -= cps(i,j-1) end do end do - cps(:, N_cp) = 0d0 + + iorder = -1 + do i=1,N_cp-1 + call isort(dress_jobs(first_cp(i)+1:first_cp(i+1)),iorder,first_cp(i+1)-first_cp(i)) + end do + + cps(:, N_cp) = 0d0 END_PROVIDER diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index 3a55f7b7..0a640d49 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -100,7 +100,7 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] ! else ! errr = 1d-4 ! end if - relative_error = 1.d-5 + relative_error = 0d0! 1.d-5 call write_double(6,relative_error,"Convergence of the stochastic algorithm") From 4aa4c6c96ef174293d14fdd4c4c704e06bb35994 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 30 Mar 2018 18:16:00 +0200 Subject: [PATCH 11/29] custom buffers in dress_zmq --- plugins/dress_zmq/dress_stoch_routines.irp.f | 12 +++- plugins/dress_zmq/run_dress_slave.irp.f | 74 +++++++++++++++++++- plugins/shiftedbk/shifted_bk_routines.irp.f | 29 ++++++++ 3 files changed, 109 insertions(+), 6 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 4941b6fa..6262920a 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -163,10 +163,14 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, integer, allocatable :: parts_to_get(:) logical, allocatable :: actually_computed(:) integer :: total_computed - integer :: delta_loc_cur, is + integer :: delta_loc_cur, is, N_buf(3) double precision :: fac(delta_loc_N) , wei(delta_loc_N) logical :: ok + integer, allocatable :: int_buf(:) + double precision, allocatable :: double_buf(:) + integer(bit_kind), allocatable :: det_buf(:,:,:) + allocate(int_buf(N_dress_int_buffer), double_buf(N_dress_double_buffer),det_buf(N_int,2,N_dress_det_buffer)) delta_loc_cur = 1 delta = 0d0 @@ -206,7 +210,8 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, loop = .true. felem = N_det+1 pullLoop : do while (loop) - call pull_dress_results(zmq_socket_pull, ind, delta_loc(1,1,1,delta_loc_cur), task_id, felem_loc) + call pull_dress_results(zmq_socket_pull, ind, delta_loc(1,1,1,delta_loc_cur), int_buf, double_buf, det_buf, N_buf, task_id, felem_loc) + call dress_pulled(int_buf, double_buf, det_buf, N_buf) felem = min(felem_loc, felem) dress_mwen(:) = 0d0 @@ -449,7 +454,8 @@ END_PROVIDER done_cp_at = 0 comp_filler = .false. computed = .false. - + cps_N = 1d0 + N_dress_jobs = first_det_of_comb - 1 do i=1, N_dress_jobs dress_jobs(i) = i diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index add0091e..375829df 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -1,3 +1,5 @@ +use bitmasks + BEGIN_PROVIDER [ integer, fragment_count ] implicit none BEGIN_DOC @@ -54,7 +56,7 @@ subroutine run_dress_slave(thread,iproc,energy) read (task,*) subset, i_generator delta_ij_loc = 0d0 call alpha_callback(delta_ij_loc, i_generator, subset, iproc) - + call generator_done(i_generator) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) call push_dress_results(zmq_socket_push, i_generator, delta_ij_loc, task_id) else @@ -67,6 +69,17 @@ subroutine run_dress_slave(thread,iproc,energy) end subroutine + BEGIN_PROVIDER [ integer, dress_int_buffer, (N_dress_int_buffer) ] +&BEGIN_PROVIDER [ double precision, dress_double_buffer, (N_dress_double_buffer) ] +&BEGIN_PROVIDER [ integer(bit_kind), dress_det_buffer, (N_int, 2, N_dress_det_buffer) ] + implicit none + + dress_int_buffer = 0 + dress_double_buffer = 0d0 + dress_det_buffer = 0_bit_kind +END_PROVIDER + + subroutine push_dress_results(zmq_socket_push, ind, delta_loc, task_id) use f77_zmq implicit none @@ -89,7 +102,7 @@ subroutine push_dress_results(zmq_socket_push, ind, delta_loc, task_id) rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" - + rc = f77_zmq_send( zmq_socket_push, felem, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" @@ -99,6 +112,29 @@ subroutine push_dress_results(zmq_socket_push, ind, delta_loc, task_id) rc = f77_zmq_send( zmq_socket_push, delta_loc(1,felem,2), 8*N_states*(N_det-felem+1), ZMQ_SNDMORE) if(rc /= 8*N_states*(N_det+1-felem)) stop "push" + + + + rc = f77_zmq_send( zmq_socket_push, N_dress_int_buffer, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" + + rc = f77_zmq_send( zmq_socket_push, dress_int_buffer, 4*N_dress_int_buffer, ZMQ_SNDMORE) + if(rc /= 4*N_dress_int_buffer) stop "push" + + rc = f77_zmq_send( zmq_socket_push, N_dress_double_buffer, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" + + rc = f77_zmq_send( zmq_socket_push, dress_double_buffer, 8*N_dress_double_buffer, ZMQ_SNDMORE) + if(rc /= 8*N_dress_double_buffer) stop "push" + + + rc = f77_zmq_send( zmq_socket_push, N_dress_det_buffer, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" + + rc = f77_zmq_send( zmq_socket_push, dress_det_buffer, 2*N_int*bit_kind*N_dress_det_buffer, ZMQ_SNDMORE) + if(rc /= 2*N_int*bit_kind*N_dress_det_buffer) stop "push" + + rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) if(rc /= 4) stop "push" @@ -112,15 +148,19 @@ IRP_ENDIF end subroutine -subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id, felem) +subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, felem) use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_pull double precision, intent(inout) :: delta_loc(N_states, N_det, 2) + double precision, intent(out) :: double_buf(*) + integer, intent(out) :: int_buf(*) + integer(bit_kind), intent(out) :: det_buf(N_int, 2, *) integer, intent(out) :: felem integer, intent(out) :: ind integer, intent(out) :: task_id integer :: rc, i + integer, intent(out) :: N_buf(3) rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) @@ -137,6 +177,34 @@ subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id, felem) rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,felem,2), N_states*8*(N_det+1-felem), 0) if(rc /= 8*N_states*(N_det+1-felem)) stop "pull" + + rc = f77_zmq_recv( zmq_socket_pull, N_buf(1), 4, 0) + if(rc /= 4) stop "pull" + if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" + + + rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0) + if(rc /= 4*N_buf(1)) stop "pull1" + + + rc = f77_zmq_recv( zmq_socket_pull, N_buf(2), 4, 0) + if(rc /= 4) stop "pull" + if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" + + rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0) + if(rc /= 8*N_buf(2)) stop "pull2" + + + + + rc = f77_zmq_recv( zmq_socket_pull, N_buf(3), 4, 0) + if(rc /= 4) stop "pull" + if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" + + rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0) + if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3" + + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) if(rc /= 4) stop "pull" diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 2aeff1e0..9574584b 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -1,5 +1,16 @@ use selection_types + + BEGIN_PROVIDER [ integer, N_dress_int_buffer ] +&BEGIN_PROVIDER [ integer, N_dress_double_buffer ] +&BEGIN_PROVIDER [ integer, N_dress_det_buffer ] + implicit none + N_dress_int_buffer = 1 + N_dress_double_buffer = 1 + N_dress_det_buffer = 1 +END_PROVIDER + + BEGIN_PROVIDER [ double precision, fock_diag_tmp_, (2,mo_tot_num+1,Nproc) ] &BEGIN_PROVIDER [ integer, current_generator_, (Nproc) ] &BEGIN_PROVIDER [ double precision, a_h_i, (N_det, Nproc) ] @@ -21,6 +32,24 @@ use selection_types a_s2_i = 0d0 END_PROVIDER +subroutine generator_done(i_gen) + implicit none + integer, intent(in) :: i_gen + + !dress_int_buffer = ... +end subroutine + + +subroutine dress_pulled(int_buf, double_buf, det_buf, N_buf) + use bitmasks + implicit none + + integer, intent(in) :: N_buf(3) + integer, intent(in) :: int_buf(*) + double precision, intent(in) :: double_buf(*) + integer(bit_kind), intent(in) :: det_buf(N_int,2,*) + +end subroutine subroutine delta_ij_done() From 2e3c54e278e949fd430fa71698e5e9104d6a5adf Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 4 Apr 2018 11:32:27 +0200 Subject: [PATCH 12/29] fixed custom buffers --- plugins/dress_zmq/dress_general.irp.f | 2 +- plugins/dress_zmq/dress_slave.irp.f | 6 +- plugins/dress_zmq/dress_stoch_routines.irp.f | 2 +- plugins/dress_zmq/dress_zmq_routines.irp.f | 2 + plugins/dress_zmq/dressing.irp.f | 4 +- plugins/dress_zmq/run_dress_slave.irp.f | 123 ++++++++++--------- plugins/shiftedbk/shifted_bk.irp.f | 2 +- plugins/shiftedbk/shifted_bk_routines.irp.f | 99 +++++++++------ plugins/shiftedbk/shifted_bk_slave.irp.f | 2 +- 9 files changed, 142 insertions(+), 100 deletions(-) diff --git a/plugins/dress_zmq/dress_general.irp.f b/plugins/dress_zmq/dress_general.irp.f index e31f1742..96bba521 100644 --- a/plugins/dress_zmq/dress_general.irp.f +++ b/plugins/dress_zmq/dress_general.irp.f @@ -45,7 +45,7 @@ subroutine run_dressing(N_st,energy) do i=1,N_st if(.true.) call write_double(6,ci_energy_dressed(i),"Energy") enddo - call diagonalize_ci_dressed + if(.true.) call diagonalize_ci_dressed E_new = dress_e0_denominator(1) !sum(ci_energy_dressed(1:N_states)) delta_E = (E_new - E_old)/dble(N_states) diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index feea575e..ff003a21 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -6,6 +6,10 @@ subroutine dress_slave read_wf = .False. distributed_davidson = .False. SOFT_TOUCH read_wf distributed_davidson + + threshold_selectors = 1.d0 + threshold_generators = 1d0 + call provide_everything call switch_qp_run_to_master call run_wf @@ -67,6 +71,6 @@ subroutine dress_slave_tcp(i,energy) integer, intent(in) :: i logical :: lstop lstop = .False. - call run_dress_slave(0,i,energy,lstop) + call run_dress_slave(0,i,energy) end diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 6262920a..80c93e84 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -211,7 +211,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, felem = N_det+1 pullLoop : do while (loop) call pull_dress_results(zmq_socket_pull, ind, delta_loc(1,1,1,delta_loc_cur), int_buf, double_buf, det_buf, N_buf, task_id, felem_loc) - call dress_pulled(int_buf, double_buf, det_buf, N_buf) + call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) felem = min(felem_loc, felem) dress_mwen(:) = 0d0 diff --git a/plugins/dress_zmq/dress_zmq_routines.irp.f b/plugins/dress_zmq/dress_zmq_routines.irp.f index bde2c6d8..dc47eb20 100644 --- a/plugins/dress_zmq/dress_zmq_routines.irp.f +++ b/plugins/dress_zmq/dress_zmq_routines.irp.f @@ -2,6 +2,8 @@ subroutine dress_zmq() implicit none double precision, allocatable :: energy(:) allocate (energy(N_states)) + threshold_selectors = 1.d0 + threshold_generators = 1d0 read_wf = .True. SOFT_TOUCH read_wf diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index 0a640d49..85279029 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -93,8 +93,8 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] delta_ij_tmp = 0d0 E_CI_before(:) = dress_E0_denominator(:) + nuclear_repulsion - threshold_selectors = 1.d0 - threshold_generators = 1d0 + !threshold_selectors = 1.d0 + !:threshold_generators = 1d0 ! if(errr /= 0d0) then ! errr = errr / 2d0 ! else diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 375829df..c7e0ed0c 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -36,6 +36,15 @@ subroutine run_dress_slave(thread,iproc,energy) integer :: h,p,n,i_state logical :: ok + integer, allocatable :: int_buf(:) + double precision, allocatable :: double_buf(:) + integer(bit_kind), allocatable :: det_buf(:,:,:) + integer :: N_buf(3) + + + allocate(int_buf(N_dress_int_buffer)) + allocate(double_buf(N_dress_double_buffer)) + allocate(det_buf(N_int, 2, N_dress_det_buffer)) allocate(delta_ij_loc(N_states,N_det,2)) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() @@ -55,10 +64,11 @@ subroutine run_dress_slave(thread,iproc,energy) if(task_id /= 0) then read (task,*) subset, i_generator delta_ij_loc = 0d0 + call generator_start(i_generator, iproc) call alpha_callback(delta_ij_loc, i_generator, subset, iproc) - call generator_done(i_generator) + call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) - call push_dress_results(zmq_socket_push, i_generator, delta_ij_loc, task_id) + call push_dress_results(zmq_socket_push, i_generator, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id) else exit end if @@ -69,23 +79,28 @@ subroutine run_dress_slave(thread,iproc,energy) end subroutine - BEGIN_PROVIDER [ integer, dress_int_buffer, (N_dress_int_buffer) ] -&BEGIN_PROVIDER [ double precision, dress_double_buffer, (N_dress_double_buffer) ] -&BEGIN_PROVIDER [ integer(bit_kind), dress_det_buffer, (N_int, 2, N_dress_det_buffer) ] - implicit none - - dress_int_buffer = 0 - dress_double_buffer = 0d0 - dress_det_buffer = 0_bit_kind -END_PROVIDER +! BEGIN_PROVIDER [ integer, dress_int_buffer, (N_dress_int_buffer) ] +!&BEGIN_PROVIDER [ double precision, dress_double_buffer, (N_dress_double_buffer) ] +!&BEGIN_PROVIDER [ integer(bit_kind), dress_det_buffer, (N_int, 2, N_dress_det_buffer) ] +! implicit none +! +! dress_int_buffer = 0 +! dress_double_buffer = 0d0 + ! dress_det_buffer = 0_bit_kind +!END_PROVIDER -subroutine push_dress_results(zmq_socket_push, ind, delta_loc, task_id) +!subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, felem) +subroutine push_dress_results(zmq_socket_push, ind, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id) use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_push double precision, intent(in) :: delta_loc(N_states, N_det, 2) + double precision, intent(in) :: double_buf(*) + integer, intent(in) :: int_buf(*) + integer(bit_kind), intent(in) :: det_buf(N_int, 2, *) + integer, intent(in) :: N_buf(3) integer, intent(in) :: ind, task_id integer :: rc, i, j, felem @@ -115,28 +130,31 @@ subroutine push_dress_results(zmq_socket_push, ind, delta_loc, task_id) - rc = f77_zmq_send( zmq_socket_push, N_dress_int_buffer, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" + rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) + if(rc /= 4*3) stop "push5" - rc = f77_zmq_send( zmq_socket_push, dress_int_buffer, 4*N_dress_int_buffer, ZMQ_SNDMORE) - if(rc /= 4*N_dress_int_buffer) stop "push" + if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" + if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" + if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" - rc = f77_zmq_send( zmq_socket_push, N_dress_double_buffer, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" - - rc = f77_zmq_send( zmq_socket_push, dress_double_buffer, 8*N_dress_double_buffer, ZMQ_SNDMORE) - if(rc /= 8*N_dress_double_buffer) stop "push" - - rc = f77_zmq_send( zmq_socket_push, N_dress_det_buffer, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" + if(N_buf(1) > 0) then + rc = f77_zmq_send( zmq_socket_push, int_buf, 4*N_buf(1), ZMQ_SNDMORE) + if(rc /= 4*N_buf(1)) stop "push6" + end if - rc = f77_zmq_send( zmq_socket_push, dress_det_buffer, 2*N_int*bit_kind*N_dress_det_buffer, ZMQ_SNDMORE) - if(rc /= 2*N_int*bit_kind*N_dress_det_buffer) stop "push" - + if(N_buf(2) > 0) then + rc = f77_zmq_send( zmq_socket_push, double_buf, 8*N_buf(2), ZMQ_SNDMORE) + if(rc /= 8*N_buf(2)) stop "push8" + end if + + if(N_buf(3) > 0) then + rc = f77_zmq_send( zmq_socket_push, det_buf, 2*N_int*bit_kind*N_buf(3), ZMQ_SNDMORE) + if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "push10" + end if rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) - if(rc /= 4) stop "push" + if(rc /= 4) stop "push11" ! Activate is zmq_socket_push is a REQ IRP_IF ZMQ_PUSH @@ -164,49 +182,44 @@ subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, int_buf, double_b rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) - if(rc /= 4) stop "pull" + if(rc /= 4) stop "pulla" rc = f77_zmq_recv( zmq_socket_pull, felem, 4, 0) - if(rc /= 4) stop "pull" + if(rc /= 4) stop "pullb" delta_loc(:,:felem,:) = 0d0 rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,felem,1), N_states*8*(N_det+1-felem), 0) - if(rc /= 8*N_states*(N_det+1-felem)) stop "pull" + if(rc /= 8*N_states*(N_det+1-felem)) stop "pullc" rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,felem,2), N_states*8*(N_det+1-felem), 0) - if(rc /= 8*N_states*(N_det+1-felem)) stop "pull" + if(rc /= 8*N_states*(N_det+1-felem)) stop "pulld" - rc = f77_zmq_recv( zmq_socket_pull, N_buf(1), 4, 0) - if(rc /= 4) stop "pull" + rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0) + if(rc /= 4*3) stop "pull" if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" - - - rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0) - if(rc /= 4*N_buf(1)) stop "pull1" - - - rc = f77_zmq_recv( zmq_socket_pull, N_buf(2), 4, 0) - if(rc /= 4) stop "pull" if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" - - rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0) - if(rc /= 8*N_buf(2)) stop "pull2" - - - - - rc = f77_zmq_recv( zmq_socket_pull, N_buf(3), 4, 0) - if(rc /= 4) stop "pull" if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" + - rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0) - if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3" - + if(N_buf(1) > 0) then + rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0) + if(rc /= 4*N_buf(1)) stop "pull1" + end if + + if(N_buf(2) > 0) then + rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0) + if(rc /= 8*N_buf(2)) stop "pull2" + end if + + if(N_buf(3) > 0) then + rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0) + if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3" + end if rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) - if(rc /= 4) stop "pull" + if(rc /= 4) stop "pull4" ! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH diff --git a/plugins/shiftedbk/shifted_bk.irp.f b/plugins/shiftedbk/shifted_bk.irp.f index 270eec17..897f39f0 100644 --- a/plugins/shiftedbk/shifted_bk.irp.f +++ b/plugins/shiftedbk/shifted_bk.irp.f @@ -11,7 +11,7 @@ program shifted_bk PROVIDE psi_bilinear_matrix_transp_order - call diagonalize_CI() + !call diagonalize_CI() call dress_zmq() end diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 9574584b..67f8424b 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -1,30 +1,21 @@ use selection_types - - BEGIN_PROVIDER [ integer, N_dress_int_buffer ] -&BEGIN_PROVIDER [ integer, N_dress_double_buffer ] -&BEGIN_PROVIDER [ integer, N_dress_det_buffer ] - implicit none - N_dress_int_buffer = 1 - N_dress_double_buffer = 1 - N_dress_det_buffer = 1 -END_PROVIDER - - BEGIN_PROVIDER [ double precision, fock_diag_tmp_, (2,mo_tot_num+1,Nproc) ] -&BEGIN_PROVIDER [ integer, current_generator_, (Nproc) ] +&BEGIN_PROVIDER [ integer, n_det_add ] &BEGIN_PROVIDER [ double precision, a_h_i, (N_det, Nproc) ] &BEGIN_PROVIDER [ double precision, a_s2_i, (N_det, Nproc) ] &BEGIN_PROVIDER [ type(selection_buffer), sb, (Nproc) ] +&BEGIN_PROVIDER [ type(selection_buffer), global_sb ] +&BEGIN_PROVIDER [ type(selection_buffer), mini_sb ] &BEGIN_PROVIDER [ double precision, N_det_increase_factor ] implicit none integer :: i - integer :: n_det_add N_det_increase_factor = 1d0 - current_generator_(:) = 0 n_det_add = max(1, int(float(N_det) * N_det_increase_factor)) + call create_selection_buffer(n_det_add, n_det_add*2, global_sb) + call create_selection_buffer(n_det_add, n_det_add*2, mini_sb) do i=1,Nproc call create_selection_buffer(n_det_add, n_det_add*2, sb(i)) end do @@ -32,46 +23,82 @@ END_PROVIDER a_s2_i = 0d0 END_PROVIDER -subroutine generator_done(i_gen) + + BEGIN_PROVIDER [ integer, N_dress_int_buffer ] +&BEGIN_PROVIDER [ integer, N_dress_double_buffer ] +&BEGIN_PROVIDER [ integer, N_dress_det_buffer ] implicit none - integer, intent(in) :: i_gen + N_dress_int_buffer = 1 + N_dress_double_buffer = n_det_add + N_dress_det_buffer = n_det_add +END_PROVIDER + + +subroutine generator_done(i_gen, int_buf, double_buf, det_buf, N_buf, iproc) + implicit none + integer, intent(in) :: i_gen, iproc + integer, intent(out) :: int_buf(N_dress_int_buffer), N_buf(3) + double precision, intent(out) :: double_buf(N_dress_double_buffer) + integer(bit_kind), intent(out) :: det_buf(N_int, 2, N_dress_det_buffer) + integer :: i - !dress_int_buffer = ... + call sort_selection_buffer(sb(iproc)) + det_buf(:,:,:sb(iproc)%cur) = sb(iproc)%det(:,:,:sb(iproc)%cur) + double_buf(:sb(iproc)%cur) = sb(iproc)%val(:sb(iproc)%cur) + if(sb(iproc)%cur > 0) then + !$OMP CRITICAL + call merge_selection_buffers(sb(iproc), mini_sb) + call sort_selection_buffer(mini_sb) + do i=1,Nproc + sb(i)%mini = min(sb(i)%mini, mini_sb%mini) + end do + !$OMP END CRITICAL + end if + N_buf(1) = 1 + N_buf(2) = sb(iproc)%cur + N_buf(3) = sb(iproc)%cur + sb(iproc)%cur = 0 end subroutine -subroutine dress_pulled(int_buf, double_buf, det_buf, N_buf) +subroutine generator_start(i_gen, iproc) + implicit none + integer, intent(in) :: i_gen, iproc + integer :: i + + call build_fock_tmp(fock_diag_tmp_(1,1,iproc),psi_det_generators(1,1,i_gen),N_int) +end subroutine + + +subroutine dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) use bitmasks implicit none - integer, intent(in) :: N_buf(3) + integer, intent(in) :: ind, N_buf(3) integer, intent(in) :: int_buf(*) double precision, intent(in) :: double_buf(*) integer(bit_kind), intent(in) :: det_buf(N_int,2,*) - + integer :: i + + do i=1,N_buf(2) + call add_to_selection_buffer(global_sb, det_buf(1,1,i), double_buf(i)) + end do end subroutine subroutine delta_ij_done() use bitmasks implicit none - integer :: i, n_det_add, old_det_gen + integer :: i, old_det_gen integer(bit_kind), allocatable :: old_generators(:,:,:) allocate(old_generators(N_int, 2, N_det_generators)) old_generators(:,:,:) = psi_det_generators(:,:,:N_det_generators) old_det_gen = N_det_generators - call sort_selection_buffer(sb(1)) - do i=2,Nproc - call sort_selection_buffer(sb(i)) - call merge_selection_buffers(sb(i), sb(1)) - end do - - call sort_selection_buffer(sb(1)) - - call fill_H_apply_buffer_no_selection(sb(1)%cur,sb(1)%det,N_int,0) + call sort_selection_buffer(global_sb) + call fill_H_apply_buffer_no_selection(global_sb%cur,global_sb%det,N_int,0) call copy_H_apply_buffer_to_wf() if (s2_eig.or.(N_states > 1) ) then @@ -226,17 +253,13 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili - if(current_generator_(iproc) /= i_gen) then - current_generator_(iproc) = i_gen - call build_fock_tmp(fock_diag_tmp_(1,1,iproc),psi_det_generators(1,1,i_gen),N_int) - end if - haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) call dress_with_alpha_(Nstates, Ndet, Nint, delta_ij_loc, minilist, det_minilist, n_minilist, alpha, haa, contrib, iproc) - - call add_to_selection_buffer(sb(iproc), alpha, contrib) - + + if(contrib < sb(iproc)%mini) then + call add_to_selection_buffer(sb(iproc), alpha, contrib) + end if end subroutine diff --git a/plugins/shiftedbk/shifted_bk_slave.irp.f b/plugins/shiftedbk/shifted_bk_slave.irp.f index d7812b97..db943a85 100644 --- a/plugins/shiftedbk/shifted_bk_slave.irp.f +++ b/plugins/shiftedbk/shifted_bk_slave.irp.f @@ -10,7 +10,7 @@ program shifted_bk PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order - call diagonalize_CI() + !call diagonalize_CI() call dress_slave() end From 46450f0826e2fc8975fc6ccfa00230e3e760ec4b Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 10 Apr 2018 14:25:28 +0200 Subject: [PATCH 13/29] compute sum of alpha2 --- plugins/shiftedbk/shifted_bk_routines.irp.f | 71 +++++++++++++++------ 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 67f8424b..3ee4dcf0 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -1,5 +1,13 @@ use selection_types + + BEGIN_PROVIDER [ double precision, global_sum_alpha2, (N_states) ] +&BEGIN_PROVIDER [ double precision, slave_sum_alpha2, (N_states, Nproc) ] + global_sum_alpha2 = 0d0 + slave_sum_alpha2 = 0d0 +END_PROVIDER + + BEGIN_PROVIDER [ double precision, fock_diag_tmp_, (2,mo_tot_num+1,Nproc) ] &BEGIN_PROVIDER [ integer, n_det_add ] &BEGIN_PROVIDER [ double precision, a_h_i, (N_det, Nproc) ] @@ -11,7 +19,8 @@ use selection_types implicit none integer :: i - N_det_increase_factor = 1d0 + N_det_increase_factor = 1d0 + n_det_add = max(1, int(float(N_det) * N_det_increase_factor)) call create_selection_buffer(n_det_add, n_det_add*2, global_sb) @@ -29,7 +38,7 @@ use selection_types &BEGIN_PROVIDER [ integer, N_dress_det_buffer ] implicit none N_dress_int_buffer = 1 - N_dress_double_buffer = n_det_add + N_dress_double_buffer = n_det_add+N_states N_dress_det_buffer = n_det_add END_PROVIDER @@ -45,6 +54,11 @@ subroutine generator_done(i_gen, int_buf, double_buf, det_buf, N_buf, iproc) call sort_selection_buffer(sb(iproc)) det_buf(:,:,:sb(iproc)%cur) = sb(iproc)%det(:,:,:sb(iproc)%cur) double_buf(:sb(iproc)%cur) = sb(iproc)%val(:sb(iproc)%cur) + double_buf(sb(iproc)%cur+1:sb(iproc)%cur+N_states) = slave_sum_alpha2(:,iproc) + N_buf(1) = 1 + N_buf(2) = sb(iproc)%cur+N_states + N_buf(3) = sb(iproc)%cur + if(sb(iproc)%cur > 0) then !$OMP CRITICAL call merge_selection_buffers(sb(iproc), mini_sb) @@ -54,10 +68,9 @@ subroutine generator_done(i_gen, int_buf, double_buf, det_buf, N_buf, iproc) end do !$OMP END CRITICAL end if - N_buf(1) = 1 - N_buf(2) = sb(iproc)%cur - N_buf(3) = sb(iproc)%cur + sb(iproc)%cur = 0 + slave_sum_alpha2(:,iproc) = 0d0 end subroutine @@ -80,9 +93,13 @@ subroutine dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) integer(bit_kind), intent(in) :: det_buf(N_int,2,*) integer :: i - do i=1,N_buf(2) + do i=1,N_buf(3) call add_to_selection_buffer(global_sb, det_buf(1,1,i), double_buf(i)) end do + if(N_buf(3) + N_states /= N_buf(2)) stop "buf size" + !$OMP CRITICAL + global_sum_alpha2(:) += double_buf(N_buf(3)+1:N_buf(2)) + !$OMP END CRITICAL end subroutine @@ -120,7 +137,7 @@ subroutine undress_with_alpha(old_generators, old_det_gen, alpha, n_alpha) double precision, allocatable :: delta_ij_loc(:,:,:,:) integer :: exc(0:2,2,2), h1, h2, p1, p2, s1, s2 integer :: i, j, k, ex, n_minilist, iproc, degree - double precision :: haa, contrib, phase + double precision :: haa, contrib, phase, c_alpha(N_states,Nproc), s_c_alpha(N_states) logical :: ok integer, external :: omp_get_thread_num @@ -129,10 +146,11 @@ subroutine undress_with_alpha(old_generators, old_det_gen, alpha, n_alpha) allocate(minilist(N_det_delta_ij), det_minilist(N_int, 2, N_det_delta_ij), delta_ij_loc(N_states, N_det_delta_ij, 2, Nproc)) + c_alpha = 0d0 delta_ij_loc = 0d0 !$OMP PARALLEL DO DEFAULT(SHARED) SCHEDULE(STATIC) PRIVATE(i, j, iproc, n_minilist, ex) & - !$OMP PRIVATE(det_minilist, minilist, haa, contrib) & + !$OMP PRIVATE(det_minilist, minilist, haa, contrib, s_c_alpha) & !$OMP PRIVATE(exc, h1, h2, p1, p2, s1, s2, phase, degree, ok) do i=n_alpha,1,-1 iproc = omp_get_thread_num()+1 @@ -167,19 +185,31 @@ subroutine undress_with_alpha(old_generators, old_det_gen, alpha, n_alpha) end do call i_h_j(alpha(1,1,i), alpha(1,1,i), N_int, haa) call dress_with_alpha_(N_states, N_det_delta_ij, N_int, delta_ij_loc(1,1,1,iproc), & - minilist, det_minilist, n_minilist, alpha(1,1,i), haa, contrib, iproc) + minilist, det_minilist, n_minilist, alpha(1,1,i), haa, contrib, s_c_alpha, iproc) + + c_alpha(:,iproc) += s_c_alpha(:)**2 end do !$OMP END PARALLEL DO do i=2,Nproc delta_ij_loc(:,:,:,1) += delta_ij_loc(:,:,:,i) + c_alpha(:,1) += c_alpha(:,i) end do delta_ij_tmp(:,:,:) -= delta_ij_loc(:,:,:,1) + + + print *, "SUM ALPHA2 PRE", global_sum_alpha2 + !global_sum_alpha2(:) -= c_alpha(:,1) + print *, "SUM ALPHA2 POST", c_alpha(:,1) + do i=1,N_states + delta_ij_tmp(i,:,:) = delta_ij_tmp(i,:,:) / (1d0 + global_sum_alpha2(i)) + end do + global_sum_alpha2 = 0d0 end subroutine -subroutine dress_with_alpha_(Nstates,Ndet,Nint,delta_ij_loc,minilist, det_minilist, n_minilist, alpha, haa, contrib, iproc) +subroutine dress_with_alpha_(Nstates,Ndet,Nint,delta_ij_loc,minilist, det_minilist, n_minilist, alpha, haa, contrib, c_alpha, iproc) use bitmasks implicit none BEGIN_DOC @@ -194,16 +224,16 @@ subroutine dress_with_alpha_(Nstates,Ndet,Nint,delta_ij_loc,minilist, det_minili integer(bit_kind), intent(in) :: alpha(Nint,2), det_minilist(Nint, 2, n_minilist) integer,intent(in) :: minilist(n_minilist) double precision, intent(inout) :: delta_ij_loc(Nstates,Ndet,2) - double precision, intent(out) :: contrib + double precision, intent(out) :: contrib, c_alpha(N_states) double precision, intent(in) :: haa double precision :: hij, sij integer :: i,j,k,l,m, l_sd double precision :: hdress, sdress - double precision :: de, a_h_psi(Nstates), c_alpha + double precision :: de, a_h_psi(Nstates)!, c_alpha contrib = 0d0 a_h_psi = 0d0 - + c_alpha = 0d0 do l_sd=1,n_minilist call i_h_j_s2(alpha,det_minilist(1,1,l_sd),N_int,hij, sij) @@ -220,12 +250,12 @@ subroutine dress_with_alpha_(Nstates,Ndet,Nint,delta_ij_loc,minilist, det_minili de = E0_denominator(i) - haa if(DABS(de) < 1D-5) cycle - c_alpha = a_h_psi(i) / de - contrib = min(contrib, c_alpha * a_h_psi(i)) + c_alpha(i) = a_h_psi(i) / de + contrib = min(contrib, c_alpha(i) * a_h_psi(i)) do l_sd=1,n_minilist - hdress = c_alpha * a_h_i(l_sd, iproc) - sdress = c_alpha * a_s2_i(l_sd, iproc) + hdress = c_alpha(i) * a_h_i(l_sd, iproc) + sdress = c_alpha(i) * a_s2_i(l_sd, iproc) delta_ij_loc(i, minilist(l_sd), 1) += hdress delta_ij_loc(i, minilist(l_sd), 2) += sdress end do @@ -249,14 +279,15 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili integer,intent(in) :: minilist(n_minilist) double precision, intent(inout) :: delta_ij_loc(Nstates,N_det,2) double precision, external :: diag_H_mat_elem_fock - double precision :: haa, contrib + double precision :: haa, contrib, c_alpha(N_states) haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) - call dress_with_alpha_(Nstates, Ndet, Nint, delta_ij_loc, minilist, det_minilist, n_minilist, alpha, haa, contrib, iproc) - + call dress_with_alpha_(Nstates, Ndet, Nint, delta_ij_loc, minilist, det_minilist, n_minilist, alpha, haa, contrib, c_alpha, iproc) + + slave_sum_alpha2(:,iproc) += c_alpha(:)**2 if(contrib < sb(iproc)%mini) then call add_to_selection_buffer(sb(iproc), alpha, contrib) end if From 274bb043c2de4cf1ee4c0bcad3c23947654d59dc Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 27 Apr 2018 12:41:39 +0200 Subject: [PATCH 14/29] reduce tasks --- plugins/dress_zmq/dress_stoch_routines.irp.f | 207 +++++++++++++------ plugins/dress_zmq/run_dress_slave.irp.f | 85 ++++++-- plugins/shiftedbk/shifted_bk_routines.irp.f | 4 +- 3 files changed, 211 insertions(+), 85 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 80c93e84..e37eb45d 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -10,7 +10,7 @@ subroutine ZMQ_dress(E, dress, delta, delta_s2, relative_error) implicit none character(len=64000) :: task - + character(len=3200) :: temp integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull integer, external :: omp_get_thread_num double precision, intent(in) :: E(N_states), relative_error @@ -27,6 +27,8 @@ subroutine ZMQ_dress(E, dress, delta, delta_s2, relative_error) double precision :: state_average_weight_save(N_states) + task(:) = CHAR(0) + temp(:) = CHAR(0) state_average_weight_save(:) = state_average_weight(:) do dress_stoch_istate=1,N_states SOFT_TOUCH dress_stoch_istate @@ -63,37 +65,68 @@ subroutine ZMQ_dress(E, dress, delta, delta_s2, relative_error) endif integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket - integer :: ipos + integer :: ipos, sz + integer :: block(50), block_i, cur_tooth_reduce, ntas + logical :: flushme + block = 0 + block_i = 0 + cur_tooth_reduce = 0 ipos=1 - do i=1,N_dress_jobs - if(dress_jobs(i) > fragment_first) then - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, dress_jobs(i) - ipos += 20 - if (ipos > 63980) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - - ipos=1 - endif - else - do j=1,fragment_count - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, dress_jobs(i) - ipos += 20 - if (ipos > 63980) then + ntas = 0 + do i=1,N_dress_jobs+1 + flushme = (i==N_dress_jobs+1 .or. block_i == size(block)) + if(.not. flushme) flushme = (tooth_reduce(dress_jobs(i)) == 0 .or. tooth_reduce(dress_jobs(i)) /= cur_tooth_reduce) + + if(flushme .and. block_i > 0) then + if(block(1) > fragment_first) then + ntas += 1 + write(temp, '(I9,1X,60(I9,1X))') 0, block(:block_i) + sz = len(trim(temp))+1 + temp(sz:sz) = '|' + !write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, dress_jobs(i) + write(task(ipos:ipos+sz), *) temp(:sz) + !ipos += 20 + ipos += sz+1 + if (ipos > 63000 .or. i==N_dress_jobs+1) then if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then stop 'Unable to add task to task server' endif + ipos=1 endif - end do + else + if(block_i /= 1) stop "reduced fragmented dets" + do j=1,fragment_count + ntas += 1 + write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, block(1) + ipos += 20 + if (ipos > 63000 .or. i==N_dress_jobs+1) then + ntas += 1 + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task to task server' + endif + ipos=1 + endif + end do + end if + block_i = 0 + block = 0 + end if + + if(i /= N_dress_jobs+1) then + cur_tooth_reduce = tooth_reduce(dress_jobs(i)) + block_i += 1 + block(block_i) = dress_jobs(i) end if end do - if (ipos > 1) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - endif + print *, "ACTUAL TASK NUM", ntas + !stop + + !if (ipos > 1) then + ! if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + ! stop 'Unable to add task to task server' + ! endif + !endif if (zmq_set_running(zmq_to_qp_run_socket) == -1) then print *, irp_here, ': Failed in zmq_set_running' endif @@ -134,7 +167,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, implicit none - integer, parameter :: delta_loc_N = 2 + integer, parameter :: delta_loc_N = 1 integer(ZMQ_PTR), intent(in) :: zmq_socket_pull integer, intent(in) :: istate @@ -205,22 +238,26 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, timeLast = time0 cur_cp = 0 old_cur_cp = 0 - logical :: loop - integer :: felem, felem_loc + logical :: loop, last + integer :: felem(0:delta_loc_N), felem_loc loop = .true. felem = N_det+1 pullLoop : do while (loop) - call pull_dress_results(zmq_socket_pull, ind, delta_loc(1,1,1,delta_loc_cur), int_buf, double_buf, det_buf, N_buf, task_id, felem_loc) + call pull_dress_results(zmq_socket_pull, ind, last, delta_loc(1,1,1,delta_loc_cur), int_buf, double_buf, det_buf, N_buf, task_id, felem_loc) call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) - felem = min(felem_loc, felem) + !print *, "felem", felem_loc, felem + felem(delta_loc_cur) = felem_loc + felem(0) = min(felem_loc, felem(0)) dress_mwen(:) = 0d0 integer, external :: zmq_delete_tasks - - if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then - stop 'Unable to delete tasks' - endif - if(more == 0) loop = .false. + + if(last) then + if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then + stop 'Unable to delete tasks' + endif + if(more == 0) loop = .false. + end if do i_state=1,N_states @@ -245,29 +282,36 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, end if end do - if(ok) then - do i=felem,N_det_generators + if(ok .and. .false.) then + do i=felem(0),N_det_generators do is=1,N_states - cp(is,i,j,1) += delta_loc(is,i,1,1) * fac(1) & - + delta_loc(is,i,1,2) * fac(2) + cp(is,i,j,1) += delta_loc(is,i,1,1) * fac(1)! & + !+ delta_loc(is,i,1,2) * fac(2) & !+ delta_loc(is,i,1,3) * fac(3) & - !+ delta_loc(is,i,1,4) * fac(4) & - !+ delta_loc(:,i,1,5) * fac(5) & - !+ delta_loc(:,i,1,6) * fac(6) & - !+ delta_loc(:,i,1,7) * fac(7) & - !+ delta_loc(:,i,1,8) * fac(8) + !+ delta_loc(is,i,1,4) * fac(4) & + !+ delta_loc(is,i,1,5) * fac(5) & + !+ delta_loc(is,i,1,6) * fac(6) & + !+ delta_loc(is,i,1,7) * fac(7) & + !+ delta_loc(is,i,1,8) * fac(8) + end do + end do - cp(is,i,j,2) += delta_loc(is,i,2,1) * fac(1) & - + delta_loc(is,i,2,2) * fac(2) + + do i=felem(0),N_det_generators + do is=1,N_states + cp(is,i,j,2) += delta_loc(is,i,2,1) * fac(1)! & + !+ delta_loc(is,i,2,2) * fac(2) & !+ delta_loc(is,i,2,3) * fac(3) & - !+ delta_loc(is,i,2,4) * fac(4) & - !+ delta_loc(:,i,2,5) * fac(5) & - !+ delta_loc(:,i,2,6) * fac(6) & - !+ delta_loc(:,i,2,7) * fac(7) & - !+ delta_loc(:,i,2,8) * fac(8) + !+ delta_loc(is,i,2,4) * fac(4) & + !+ delta_loc(is,i,2,5) * fac(5) & + !+ delta_loc(is,i,2,6) * fac(6) & + !+ delta_loc(is,i,2,7) * fac(7) & + !+ delta_loc(is,i,2,8) * fac(8) end do end do - end if + + + end if end do do i=1,delta_loc_cur @@ -279,14 +323,14 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, fracted = (toothMwen /= 0) if(fracted) fracted = (ind == first_det_of_teeth(toothMwen)) - if(fracted) then - delta_det(1:N_states,felem:N_det,toothMwen-1, 1) = delta_det(1:N_states,felem:N_det,toothMwen-1, 1) + delta_loc(1:N_states,felem:N_det,1,i) * (1d0-fractage(toothMwen)) - delta_det(1:N_states,felem:N_det,toothMwen-1, 2) = delta_det(1:N_states,felem:N_det,toothMwen-1, 2) + delta_loc(1:N_states,felem:N_det,2,i) * (1d0-fractage(toothMwen)) - delta_det(1:N_states,felem:N_det,toothMwen , 1) = delta_det(1:N_states,felem:N_det,toothMwen , 1) + delta_loc(1:N_states,felem:N_det,1,i) * (fractage(toothMwen)) - delta_det(1:N_states,felem:N_det,toothMwen , 2) = delta_det(1:N_states,felem:N_det,toothMwen , 2) + delta_loc(1:N_states,felem:N_det,2,i) * (fractage(toothMwen)) - else - delta_det(1:N_states,felem:N_det,toothMwen , 1) = delta_det(1:N_states,felem:N_det,toothMwen , 1) + delta_loc(1:N_states,felem:N_det,1,i) - delta_det(1:N_states,felem:N_det,toothMwen , 2) = delta_det(1:N_states,felem:N_det,toothMwen , 2) + delta_loc(1:N_states,felem:N_det,2,i) + if(fracted .and. .false.) then + delta_det(1:N_states,felem(i):N_det,toothMwen-1, 1) = delta_det(1:N_states,felem(i):N_det,toothMwen-1, 1) + delta_loc(1:N_states,felem(i):N_det,1,i) * (1d0-fractage(toothMwen)) + delta_det(1:N_states,felem(i):N_det,toothMwen-1, 2) = delta_det(1:N_states,felem(i):N_det,toothMwen-1, 2) + delta_loc(1:N_states,felem(i):N_det,2,i) * (1d0-fractage(toothMwen)) + delta_det(1:N_states,felem(i):N_det,toothMwen , 1) = delta_det(1:N_states,felem(i):N_det,toothMwen , 1) + delta_loc(1:N_states,felem(i):N_det,1,i) * (fractage(toothMwen)) + delta_det(1:N_states,felem(i):N_det,toothMwen , 2) = delta_det(1:N_states,felem(i):N_det,toothMwen , 2) + delta_loc(1:N_states,felem(i):N_det,2,i) * (fractage(toothMwen)) + else if(.false.) then + delta_det(1:N_states,felem(i):N_det,toothMwen , 1) = delta_det(1:N_states,felem(i):N_det,toothMwen , 1) + delta_loc(1:N_states,felem(i):N_det,1,i) + delta_det(1:N_states,felem(i):N_det,toothMwen , 2) = delta_det(1:N_states,felem(i):N_det,toothMwen , 2) + delta_loc(1:N_states,felem(i):N_det,2,i) end if parts_to_get(ind) -= 1 @@ -295,7 +339,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, total_computed += 1 end if end do - felem = N_det_generators+1 + felem = N_det+1 delta_loc_cur = 1 else delta_loc_cur += 1 @@ -320,7 +364,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, exit end if end do - if(cur_cp == 0) cycle pullLoop + if(cur_cp == 0 .or. (cur_cp == old_cur_cp .and. total_computed /= N_det_generators)) cycle pullLoop double precision :: su, su2, eqt, avg, E0, val integer, external :: zmq_abort @@ -436,13 +480,17 @@ END_PROVIDER &BEGIN_PROVIDER [ integer, N_dress_jobs ] &BEGIN_PROVIDER [ integer, dress_jobs, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, comb, (N_det_generators) ] +&BEGIN_PROVIDER [ integer, tooth_reduce, (N_det_generators) ] implicit none logical, allocatable :: computed(:), comp_filler(:) integer :: i, j, last_full, dets(comb_teeth) + integer :: k, l, cur_cp, under_det(comb_teeth+1) + integer :: cp_limit(N_cps_max) integer, allocatable :: iorder(:), first_cp(:) integer, allocatable :: filler(:) integer :: nfiller, lfiller, cfiller + logical :: fracted allocate(filler(n_det_generators)) allocate(iorder(N_det_generators), first_cp(N_cps_max+1)) @@ -455,6 +503,15 @@ END_PROVIDER comp_filler = .false. computed = .false. cps_N = 1d0 + tooth_reduce = 0 + + integer :: fragsize + fragsize = N_det_generators / ((N_cps_max+1)*(N_cps_max+2)/2) + + do i=1,N_cps_max + cp_limit(i) = fragsize * i * (i+1) / 2 + end do + print *, "CP_LIMIT", cp_limit N_dress_jobs = first_det_of_comb - 1 do i=1, N_dress_jobs @@ -471,7 +528,8 @@ END_PROVIDER !DIR$ FORCEINLINE call add_comb(comb(i), computed, cps(1, cur_cp), N_dress_jobs, dress_jobs) - if(N_dress_jobs / gen_per_cp > (cur_cp-1) .or. N_dress_jobs == N_det_generators) then + !if(N_dress_jobs / gen_per_cp > (cur_cp-1) .or. N_dress_jobs == N_det_generators) then + if(N_dress_jobs > cp_limit(cur_cp) .or. N_dress_jobs == N_det_generators) then first_cp(cur_cp+1) = N_dress_jobs done_cp_at(N_dress_jobs) = cur_cp cps_N(cur_cp) = dfloat(i) @@ -561,11 +619,32 @@ END_PROVIDER end do iorder = -1 + + cps(:, N_cp) = 0d0 + + iloop : do i=fragment_first+1,N_det_generators + k = tooth_of_det(i) + if(k == 0) cycle + if (i == first_det_of_teeth(k)) cycle + + do j=1,N_cp + if(cps(i, j) /= 0d0) cycle iloop + end do + + tooth_reduce(i) = k + end do iloop + + do i=1,N_det_generators + if(tooth_reduce(dress_jobs(i)) == 0) dress_jobs(i) = dress_jobs(i)+N_det*2 + end do + do i=1,N_cp-1 call isort(dress_jobs(first_cp(i)+1:first_cp(i+1)),iorder,first_cp(i+1)-first_cp(i)) end do - - cps(:, N_cp) = 0d0 + + do i=1,N_det_generators + if(dress_jobs(i) > N_det) dress_jobs(i) = dress_jobs(i) - N_det*2 + end do END_PROVIDER diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index c7e0ed0c..5ec39716 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -15,10 +15,10 @@ subroutine run_dress_slave(thread,iproc,energy) double precision, intent(in) :: energy(N_states_diag) integer, intent(in) :: thread, iproc - integer :: rc, i, subset, i_generator + integer :: rc, i, subset, i_generator(60) integer :: worker_id, task_id, ctask, ltask - character*(512) :: task + character*(5120) :: task integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -40,7 +40,9 @@ subroutine run_dress_slave(thread,iproc,energy) double precision, allocatable :: double_buf(:) integer(bit_kind), allocatable :: det_buf(:,:,:) integer :: N_buf(3) - + logical :: last + + task(:) = CHAR(0) allocate(int_buf(N_dress_int_buffer)) allocate(double_buf(N_dress_double_buffer)) @@ -62,13 +64,22 @@ subroutine run_dress_slave(thread,iproc,energy) do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) if(task_id /= 0) then + task = trim(task)//' 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' + + i_generator = 0 read (task,*) subset, i_generator + if(i_generator(size(i_generator)) /= 0) stop "i_generator buffer too small" delta_ij_loc = 0d0 - call generator_start(i_generator, iproc) - call alpha_callback(delta_ij_loc, i_generator, subset, iproc) - call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc) + i=1 + do while(i_generator(i) /= 0) + call generator_start(i_generator(i), iproc) + call alpha_callback(delta_ij_loc, i_generator(i), subset, iproc) + call generator_done(i_generator(i), int_buf, double_buf, det_buf, N_buf, iproc) + last = (i_generator(i+1) == 0) + call push_dress_results(zmq_socket_push, i_generator(i), last, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id) + i += 1 + end do call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) - call push_dress_results(zmq_socket_push, i_generator, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id) else exit end if @@ -91,19 +102,24 @@ end subroutine !subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, felem) -subroutine push_dress_results(zmq_socket_push, ind, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id) +subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id) use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_push double precision, intent(in) :: delta_loc(N_states, N_det, 2) double precision, intent(in) :: double_buf(*) + logical, intent(in) :: last integer, intent(in) :: int_buf(*) integer(bit_kind), intent(in) :: det_buf(N_int, 2, *) - integer, intent(in) :: N_buf(3) + integer, intent(in) :: N_bufi(3) + integer :: N_buf(3) integer, intent(in) :: ind, task_id integer :: rc, i, j, felem + double precision :: vast_emptiness(N_states) + integer :: fillness + vast_emptiness = 0d0 felem = 1 dloop : do i=1, N_det @@ -114,21 +130,48 @@ subroutine push_dress_results(zmq_socket_push, ind, delta_loc, int_buf, double_b end if end do end do dloop + + if(last) then + fillness = 0 + do i=felem,N_det + do j=1,N_states + if(delta_loc(j,i,1) /= 0d0 .or. delta_loc(j,i,2) /= 0d0) then + fillness += 1 + end if + end do + end do + !print *, "FILLNESS", float(fillness) / float((N_det-felem+1)*N_states) + end if + rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" - - rc = f77_zmq_send( zmq_socket_push, felem, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" - rc = f77_zmq_send( zmq_socket_push, delta_loc(1,felem,1), 8*N_states*(N_det-felem+1), ZMQ_SNDMORE) - if(rc /= 8*N_states*(N_det+1-felem)) stop "push" - - rc = f77_zmq_send( zmq_socket_push, delta_loc(1,felem,2), 8*N_states*(N_det-felem+1), ZMQ_SNDMORE) - if(rc /= 8*N_states*(N_det+1-felem)) stop "push" + rc = f77_zmq_send( zmq_socket_push, last, 1, ZMQ_SNDMORE) + if(rc /= 1) stop "push" + if(last) then + rc = f77_zmq_send( zmq_socket_push, felem, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" + + rc = f77_zmq_send( zmq_socket_push, delta_loc(1,felem,1), 8*N_states*(N_det-felem+1), ZMQ_SNDMORE) + if(rc /= 8*N_states*(N_det+1-felem)) stop "push" + + rc = f77_zmq_send( zmq_socket_push, delta_loc(1,felem,2), 8*N_states*(N_det-felem+1), ZMQ_SNDMORE) + if(rc /= 8*N_states*(N_det+1-felem)) stop "push" + else + rc = f77_zmq_send( zmq_socket_push, N_det, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" + + rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE) + if(rc /= 8*N_states) stop "push" + + rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE) + if(rc /= 8*N_states) stop "push" + end if + N_buf = N_bufi rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) if(rc /= 4*3) stop "push5" @@ -137,7 +180,7 @@ subroutine push_dress_results(zmq_socket_push, ind, delta_loc, int_buf, double_b if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" - + if(N_buf(1) > 0) then rc = f77_zmq_send( zmq_socket_push, int_buf, 4*N_buf(1), ZMQ_SNDMORE) if(rc /= 4*N_buf(1)) stop "push6" @@ -166,10 +209,11 @@ IRP_ENDIF end subroutine -subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, felem) +subroutine pull_dress_results(zmq_socket_pull, ind, last, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, felem) use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + logical, intent(out) :: last double precision, intent(inout) :: delta_loc(N_states, N_det, 2) double precision, intent(out) :: double_buf(*) integer, intent(out) :: int_buf(*) @@ -183,6 +227,9 @@ subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, int_buf, double_b rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) if(rc /= 4) stop "pulla" + + rc = f77_zmq_recv( zmq_socket_pull, last, 1, 0) + if(rc /= 1) stop "pulla" rc = f77_zmq_recv( zmq_socket_pull, felem, 4, 0) if(rc /= 4) stop "pullb" diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 3ee4dcf0..7213e831 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -62,7 +62,7 @@ subroutine generator_done(i_gen, int_buf, double_buf, det_buf, N_buf, iproc) if(sb(iproc)%cur > 0) then !$OMP CRITICAL call merge_selection_buffers(sb(iproc), mini_sb) - call sort_selection_buffer(mini_sb) + !call sort_selection_buffer(mini_sb) do i=1,Nproc sb(i)%mini = min(sb(i)%mini, mini_sb%mini) end do @@ -203,7 +203,7 @@ subroutine undress_with_alpha(old_generators, old_det_gen, alpha, n_alpha) !global_sum_alpha2(:) -= c_alpha(:,1) print *, "SUM ALPHA2 POST", c_alpha(:,1) do i=1,N_states - delta_ij_tmp(i,:,:) = delta_ij_tmp(i,:,:) / (1d0 + global_sum_alpha2(i)) + ! delta_ij_tmp(i,:,:) = delta_ij_tmp(i,:,:) / (1d0 + global_sum_alpha2(i)) end do global_sum_alpha2 = 0d0 end subroutine From 12e527157c3f9aa0bb8e14c446120f8f3ab5ab36 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Sun, 29 Apr 2018 17:09:46 +0200 Subject: [PATCH 15/29] sparse vectors --- config/ifort.cfg | 2 +- plugins/dress_zmq/dress_stoch_routines.irp.f | 162 ++++++++----------- plugins/dress_zmq/run_dress_slave.irp.f | 125 ++++++++------ 3 files changed, 141 insertions(+), 148 deletions(-) diff --git a/config/ifort.cfg b/config/ifort.cfg index 0c630114..b94d0cd4 100644 --- a/config/ifort.cfg +++ b/config/ifort.cfg @@ -32,7 +32,7 @@ OPENMP : 1 ; Append OpenMP flags # [OPT] FC : -traceback -FCFLAGS : -xAVX -O2 -ip -ftz -g +FCFLAGS : -xSSE4.2 -O2 -ip -ftz -g # Profiling flags ################# diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index e37eb45d..d80d10c4 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -66,7 +66,7 @@ subroutine ZMQ_dress(E, dress, delta, delta_s2, relative_error) integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket integer :: ipos, sz - integer :: block(50), block_i, cur_tooth_reduce, ntas + integer :: block(8), block_i, cur_tooth_reduce, ntas logical :: flushme block = 0 block_i = 0 @@ -167,8 +167,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, implicit none - integer, parameter :: delta_loc_N = 1 - integer(ZMQ_PTR), intent(in) :: zmq_socket_pull integer, intent(in) :: istate @@ -178,7 +176,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, double precision, intent(out) :: delta(N_states, N_det) double precision, intent(out) :: delta_s2(N_states, N_det) - double precision, allocatable :: delta_loc(:,:,:,:), delta_det(:,:,:,:) + double precision, allocatable :: delta_loc(:,:,:), delta_det(:,:,:,:) double precision, allocatable :: dress_detail(:,:) double precision :: dress_mwen(N_states) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket @@ -188,7 +186,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, integer :: more integer :: i, j, k, i_state, N - integer :: task_id, ind, inds(delta_loc_N) + integer :: task_id, ind double precision, save :: time0 = -1.d0 double precision :: time, timeLast, old_tooth double precision, external :: omp_get_wtime @@ -197,20 +195,20 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, logical, allocatable :: actually_computed(:) integer :: total_computed integer :: delta_loc_cur, is, N_buf(3) - double precision :: fac(delta_loc_N) , wei(delta_loc_N) + double precision :: fac , wei logical :: ok integer, allocatable :: int_buf(:) double precision, allocatable :: double_buf(:) integer(bit_kind), allocatable :: det_buf(:,:,:) - allocate(int_buf(N_dress_int_buffer), double_buf(N_dress_double_buffer),det_buf(N_int,2,N_dress_det_buffer)) + allocate(int_buf(N_dress_int_buffer), double_buf(N_dress_double_buffer), det_buf(N_int,2,N_dress_det_buffer)) delta_loc_cur = 1 delta = 0d0 delta_s2 = 0d0 allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2)) allocate(cp(N_states, N_det, N_cp, 2), dress_detail(N_states, N_det)) - allocate(delta_loc(N_states, N_det, 2, delta_loc_N)) + allocate(delta_loc(N_states, N_det, 2)) dress_detail = 0d0 delta_det = 0d0 cp = 0d0 @@ -219,7 +217,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators)) - dress_mwen =0.d0 parts_to_get(:) = 1 if(fragment_first > 0) then @@ -239,16 +236,14 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, cur_cp = 0 old_cur_cp = 0 logical :: loop, last - integer :: felem(0:delta_loc_N), felem_loc + integer, allocatable :: sparse(:) + allocate(sparse(0:N_det)) loop = .true. - felem = N_det+1 pullLoop : do while (loop) - call pull_dress_results(zmq_socket_pull, ind, last, delta_loc(1,1,1,delta_loc_cur), int_buf, double_buf, det_buf, N_buf, task_id, felem_loc) + call pull_dress_results(zmq_socket_pull, ind, last, delta_loc(1,1,1), int_buf, double_buf, det_buf, N_buf, task_id, sparse, dress_mwen) call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) - !print *, "felem", felem_loc, felem - felem(delta_loc_cur) = felem_loc - felem(0) = min(felem_loc, felem(0)) - dress_mwen(:) = 0d0 + + integer, external :: zmq_delete_tasks @@ -259,94 +254,67 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, if(more == 0) loop = .false. end if - - do i_state=1,N_states - do i=felem_loc, N_det - dress_mwen(i_state) += delta_loc(i_state, i, 1, delta_loc_cur) * psi_coef(i, i_state) - end do - end do + !dress_mwen = 0d0 + + !do i_state=1,N_states + ! do i=1,sparse(0) + ! dress_mwen(i_state) += delta_loc(i_state, i, 1) * psi_coef(sparse(i), i_state) + ! end do + !end do dress_detail(:, ind) += dress_mwen(:) - wei(delta_loc_cur) = dress_weight_inv(ind) - inds(delta_loc_cur) = ind + wei = dress_weight_inv(ind) - if(delta_loc_cur == delta_loc_N .or. .not. loop) then - do j=1,N_cp !! optimizable - fac = 0d0 - ok = .false. - do i=1,delta_loc_cur - !fac(i) = cps(inds(i), j) / cps_N(j) * wei(i) * comb_step - fac(i) = cps(inds(i), j) * wei(i) * comb_step - if(fac(i) /= 0d0) then - ok = .true. - end if + do j=1,N_cp !! optimizable + fac = 0d0 + ok = .false. + !fac(i) = cps(inds(i), j) / cps_N(j) * wei(i) * comb_step + fac = cps(ind, j) * wei * comb_step + + if(fac /= 0) then + do i=1,sparse(0) + do is=1,N_states + cp(is,sparse(i),j,1) += delta_loc(is,i,1) * fac + end do end do - - if(ok .and. .false.) then - do i=felem(0),N_det_generators - do is=1,N_states - cp(is,i,j,1) += delta_loc(is,i,1,1) * fac(1)! & - !+ delta_loc(is,i,1,2) * fac(2) & - !+ delta_loc(is,i,1,3) * fac(3) & - !+ delta_loc(is,i,1,4) * fac(4) & - !+ delta_loc(is,i,1,5) * fac(5) & - !+ delta_loc(is,i,1,6) * fac(6) & - !+ delta_loc(is,i,1,7) * fac(7) & - !+ delta_loc(is,i,1,8) * fac(8) - end do - end do - do i=felem(0),N_det_generators - do is=1,N_states - cp(is,i,j,2) += delta_loc(is,i,2,1) * fac(1)! & - !+ delta_loc(is,i,2,2) * fac(2) & - !+ delta_loc(is,i,2,3) * fac(3) & - !+ delta_loc(is,i,2,4) * fac(4) & - !+ delta_loc(is,i,2,5) * fac(5) & - !+ delta_loc(is,i,2,6) * fac(6) & - !+ delta_loc(is,i,2,7) * fac(7) & - !+ delta_loc(is,i,2,8) * fac(8) - end do - end do - - - end if - end do + do i=1,sparse(0) + do is=1,N_states + cp(is,sparse(i),j,2) += delta_loc(is,i,2) * fac + end do + end do + end if + end do - do i=1,delta_loc_cur - logical :: fracted - integer :: toothMwen - ind = inds(i) + ! do i=1,delta_loc_cur + logical :: fracted + integer :: toothMwen - toothMwen = tooth_of_det(ind) - fracted = (toothMwen /= 0) - if(fracted) fracted = (ind == first_det_of_teeth(toothMwen)) + toothMwen = tooth_of_det(ind) + fracted = (toothMwen /= 0) + if(fracted) fracted = (ind == first_det_of_teeth(toothMwen)) - if(fracted .and. .false.) then - delta_det(1:N_states,felem(i):N_det,toothMwen-1, 1) = delta_det(1:N_states,felem(i):N_det,toothMwen-1, 1) + delta_loc(1:N_states,felem(i):N_det,1,i) * (1d0-fractage(toothMwen)) - delta_det(1:N_states,felem(i):N_det,toothMwen-1, 2) = delta_det(1:N_states,felem(i):N_det,toothMwen-1, 2) + delta_loc(1:N_states,felem(i):N_det,2,i) * (1d0-fractage(toothMwen)) - delta_det(1:N_states,felem(i):N_det,toothMwen , 1) = delta_det(1:N_states,felem(i):N_det,toothMwen , 1) + delta_loc(1:N_states,felem(i):N_det,1,i) * (fractage(toothMwen)) - delta_det(1:N_states,felem(i):N_det,toothMwen , 2) = delta_det(1:N_states,felem(i):N_det,toothMwen , 2) + delta_loc(1:N_states,felem(i):N_det,2,i) * (fractage(toothMwen)) - else if(.false.) then - delta_det(1:N_states,felem(i):N_det,toothMwen , 1) = delta_det(1:N_states,felem(i):N_det,toothMwen , 1) + delta_loc(1:N_states,felem(i):N_det,1,i) - delta_det(1:N_states,felem(i):N_det,toothMwen , 2) = delta_det(1:N_states,felem(i):N_det,toothMwen , 2) + delta_loc(1:N_states,felem(i):N_det,2,i) - end if - - parts_to_get(ind) -= 1 - if(parts_to_get(ind) == 0) then - actually_computed(ind) = .true. - total_computed += 1 - end if + if(fracted .and. .false.) then + do i=1,sparse(0) + delta_det(1:N_states,sparse(i),toothMwen-1, 1) += delta_loc(1:N_states,i,1) * (1d0-fractage(toothMwen)) + delta_det(1:N_states,sparse(i),toothMwen-1, 2) += delta_loc(1:N_states,i,2) * (1d0-fractage(toothMwen)) + delta_det(1:N_states,sparse(i),toothMwen , 1) += delta_loc(1:N_states,i,1) * (fractage(toothMwen)) + delta_det(1:N_states,sparse(i),toothMwen , 2) += delta_loc(1:N_states,i,2) * (fractage(toothMwen)) end do - felem = N_det+1 - delta_loc_cur = 1 - else - delta_loc_cur += 1 - cycle - end if - + else if(.false.) then + do i=1,sparse(0) + delta_det(1:N_states,sparse(i),toothMwen , 1) = delta_loc(1:N_states,i,1) + delta_det(1:N_states,sparse(i),toothMwen , 2) = delta_loc(1:N_states,i,2) + end do + end if + parts_to_get(ind) -= 1 + if(parts_to_get(ind) == 0) then + actually_computed(ind) = .true. + total_computed += 1 + end if + !end do time = omp_get_wtime() @@ -492,6 +460,10 @@ END_PROVIDER integer :: nfiller, lfiller, cfiller logical :: fracted + + integer :: first_suspect + first_suspect = 1 + allocate(filler(n_det_generators)) allocate(iorder(N_det_generators), first_cp(N_cps_max+1)) allocate(computed(N_det_generators)) @@ -541,10 +513,10 @@ END_PROVIDER if (N_dress_jobs == N_det_generators) exit end if - do l=1,N_det_generators + do l=first_suspect,N_det_generators if((.not. computed(l)) .and. (.not. comp_filler(l))) exit end do - + first_suspect = l if(l > N_det_generators) exit cfiller = tooth_of_det(l) diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 5ec39716..11229c1b 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -107,7 +107,7 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_push - double precision, intent(in) :: delta_loc(N_states, N_det, 2) + double precision, intent(inout) :: delta_loc(N_states, N_det, 2) double precision, intent(in) :: double_buf(*) logical, intent(in) :: last integer, intent(in) :: int_buf(*) @@ -115,33 +115,14 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do integer, intent(in) :: N_bufi(3) integer :: N_buf(3) integer, intent(in) :: ind, task_id - integer :: rc, i, j, felem - double precision :: vast_emptiness(N_states) - integer :: fillness + integer :: rc, i, j + double precision :: tmp(N_states,2) + integer, allocatable :: sparse(:) + integer :: sparsei + double precision :: contrib(N_states) - vast_emptiness = 0d0 - felem = 1 - - dloop : do i=1, N_det - do j=1,N_states - if(delta_loc(j,i,1) /= 0d0 .or. delta_loc(j,i,2) /= 0d0) then - felem = i - exit dloop - end if - end do - end do dloop - - if(last) then - fillness = 0 - do i=felem,N_det - do j=1,N_states - if(delta_loc(j,i,1) /= 0d0 .or. delta_loc(j,i,2) /= 0d0) then - fillness += 1 - end if - end do - end do - !print *, "FILLNESS", float(fillness) / float((N_det-felem+1)*N_states) - end if + contrib = 0d0 + allocate(sparse(N_det)) rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) @@ -151,27 +132,57 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do if(rc /= 1) stop "push" if(last) then - rc = f77_zmq_send( zmq_socket_push, felem, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" - - rc = f77_zmq_send( zmq_socket_push, delta_loc(1,felem,1), 8*N_states*(N_det-felem+1), ZMQ_SNDMORE) - if(rc /= 8*N_states*(N_det+1-felem)) stop "push" - rc = f77_zmq_send( zmq_socket_push, delta_loc(1,felem,2), 8*N_states*(N_det-felem+1), ZMQ_SNDMORE) - if(rc /= 8*N_states*(N_det+1-felem)) stop "push" + sparsei = 0 + do i=1,N_det + do j=1,N_states + if(delta_loc(j,i,1) /= 0d0 .or. delta_loc(j,i,2) /= 0d0) then + sparsei += 1 + sparse(sparsei) = i + delta_loc(:,sparsei,:) = delta_loc(:,i,:) + contrib(:) += delta_loc(:,sparsei, 1) * psi_coef(i, :) + end if + end do + end do + + rc = f77_zmq_send( zmq_socket_push, sparsei, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" + + if(sparsei /= 0) then + rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE) + if(rc /= 8*N_states) stop "push" + + rc = f77_zmq_send( zmq_socket_push, sparse, 4*sparsei, ZMQ_SNDMORE) + if(rc /= 4*sparsei) stop "push" + + + rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,1), 8*N_states*sparsei, ZMQ_SNDMORE) + if(rc /= 8*N_states*sparsei) stop "push" + + rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,2), 8*N_states*sparsei, ZMQ_SNDMORE) + if(rc /= 8*N_states*sparsei) stop "push" + + do i=sparsei,1 + tmp(:,:) = delta_loc(:,i,:) + delta_loc(:,i,:) = 0d0 + delta_loc(:,sparse(i),:) = tmp(:,:) + end do + end if + + else - rc = f77_zmq_send( zmq_socket_push, N_det, 4, ZMQ_SNDMORE) + rc = f77_zmq_send( zmq_socket_push, 0, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" - rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE) - if(rc /= 8*N_states) stop "push" + !rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE) + !if(rc /= 8*N_states) stop "push" - rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE) - if(rc /= 8*N_states) stop "push" + !rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE) + !if(rc /= 8*N_states) stop "push" end if - N_buf = N_bufi + N_buf = (/0, 1, 0/) rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) if(rc /= 4*3) stop "push5" @@ -209,38 +220,48 @@ IRP_ENDIF end subroutine -subroutine pull_dress_results(zmq_socket_pull, ind, last, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, felem) +subroutine pull_dress_results(zmq_socket_pull, ind, last, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, sparse, contrib) use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_pull logical, intent(out) :: last double precision, intent(inout) :: delta_loc(N_states, N_det, 2) - double precision, intent(out) :: double_buf(*) + double precision, intent(out) :: double_buf(*), contrib(N_states) integer, intent(out) :: int_buf(*) integer(bit_kind), intent(out) :: det_buf(N_int, 2, *) - integer, intent(out) :: felem + integer, intent(out) :: sparse(0:N_det) integer, intent(out) :: ind integer, intent(out) :: task_id - integer :: rc, i + integer :: rc, i, sparsen integer, intent(out) :: N_buf(3) - + rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) if(rc /= 4) stop "pulla" rc = f77_zmq_recv( zmq_socket_pull, last, 1, 0) if(rc /= 1) stop "pulla" - rc = f77_zmq_recv( zmq_socket_pull, felem, 4, 0) + rc = f77_zmq_recv( zmq_socket_pull, sparse(0), 4, 0) if(rc /= 4) stop "pullb" - delta_loc(:,:felem,:) = 0d0 - - rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,felem,1), N_states*8*(N_det+1-felem), 0) - if(rc /= 8*N_states*(N_det+1-felem)) stop "pullc" + if(sparse(0) /= 0) then + rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0) + if(rc /= 8*N_states) stop "pullc" - rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,felem,2), N_states*8*(N_det+1-felem), 0) - if(rc /= 8*N_states*(N_det+1-felem)) stop "pulld" + + rc = f77_zmq_recv( zmq_socket_pull, sparse(1), 4*sparse(0), 0) + if(rc /= 4*sparse(0)) stop "pullc" + + + rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,1), N_states*8*sparse(0), 0) + if(rc /= 8*N_states*sparse(0)) stop "pullc" + + rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,2), N_states*8*sparse(0), 0) + if(rc /= 8*N_states*sparse(0)) stop "pulld" + else + contrib = 0d0 + end if rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0) From 9966697ab28731a1f20b419031dc813b76dabfc1 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 30 Apr 2018 09:33:25 +0200 Subject: [PATCH 16/29] real(4) dressing --- plugins/dress_zmq/dress_stoch_routines.irp.f | 14 +++-- plugins/dress_zmq/run_dress_slave.irp.f | 55 +++++++++++++------- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index d80d10c4..3c47ecab 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -177,6 +177,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, double precision, intent(out) :: delta(N_states, N_det) double precision, intent(out) :: delta_s2(N_states, N_det) double precision, allocatable :: delta_loc(:,:,:), delta_det(:,:,:,:) + real, allocatable :: delta_loc4(:,:,:) double precision, allocatable :: dress_detail(:,:) double precision :: dress_mwen(N_states) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket @@ -209,6 +210,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2)) allocate(cp(N_states, N_det, N_cp, 2), dress_detail(N_states, N_det)) allocate(delta_loc(N_states, N_det, 2)) + allocate(delta_loc4(N_states, N_det, 2)) dress_detail = 0d0 delta_det = 0d0 cp = 0d0 @@ -235,14 +237,20 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, timeLast = time0 cur_cp = 0 old_cur_cp = 0 - logical :: loop, last + logical :: loop, last, floop integer, allocatable :: sparse(:) allocate(sparse(0:N_det)) + floop = .true. loop = .true. + pullLoop : do while (loop) - call pull_dress_results(zmq_socket_pull, ind, last, delta_loc(1,1,1), int_buf, double_buf, det_buf, N_buf, task_id, sparse, dress_mwen) + call pull_dress_results(zmq_socket_pull, ind, last, delta_loc, delta_loc4, int_buf, double_buf, det_buf, N_buf, task_id, sparse, dress_mwen) call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) - + if(floop) then + call wall_time(time) + print *, "FIRST PULL", time-time0 + floop = .false. + end if integer, external :: zmq_delete_tasks diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 11229c1b..e57b8cf7 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -108,6 +108,7 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do integer(ZMQ_PTR), intent(in) :: zmq_socket_push double precision, intent(inout) :: delta_loc(N_states, N_det, 2) + real(kind=4), allocatable :: delta_loc4(:,:,:) double precision, intent(in) :: double_buf(*) logical, intent(in) :: last integer, intent(in) :: int_buf(*) @@ -115,15 +116,15 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do integer, intent(in) :: N_bufi(3) integer :: N_buf(3) integer, intent(in) :: ind, task_id - integer :: rc, i, j + integer :: rc, i, j, k, l double precision :: tmp(N_states,2) integer, allocatable :: sparse(:) integer :: sparsei double precision :: contrib(N_states) - + contrib = 0d0 allocate(sparse(N_det)) - + allocate(delta_loc4(N_states, N_det, 2)) rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" @@ -139,8 +140,12 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do if(delta_loc(j,i,1) /= 0d0 .or. delta_loc(j,i,2) /= 0d0) then sparsei += 1 sparse(sparsei) = i - delta_loc(:,sparsei,:) = delta_loc(:,i,:) - contrib(:) += delta_loc(:,sparsei, 1) * psi_coef(i, :) + do k=1,2 + do l=1,N_states + delta_loc4(l,sparsei,k) = real(delta_loc(l,i,k), kind=4) + end do + end do + contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :) end if end do end do @@ -156,17 +161,17 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do if(rc /= 4*sparsei) stop "push" - rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,1), 8*N_states*sparsei, ZMQ_SNDMORE) - if(rc /= 8*N_states*sparsei) stop "push" + rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,1), 4*N_states*sparsei, ZMQ_SNDMORE) + if(rc /= 4*N_states*sparsei) stop "push" - rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,2), 8*N_states*sparsei, ZMQ_SNDMORE) - if(rc /= 8*N_states*sparsei) stop "push" + rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,2), 4*N_states*sparsei, ZMQ_SNDMORE) + if(rc /= 4*N_states*sparsei) stop "push" - do i=sparsei,1 - tmp(:,:) = delta_loc(:,i,:) - delta_loc(:,i,:) = 0d0 - delta_loc(:,sparse(i),:) = tmp(:,:) - end do + !do i=sparsei,1 + ! tmp(:,:) = delta_loc(:,i,:) + ! delta_loc(:,i,:) = 0d0 + ! delta_loc(:,sparse(i),:) = tmp(:,:) + !end do end if @@ -220,7 +225,7 @@ IRP_ENDIF end subroutine -subroutine pull_dress_results(zmq_socket_pull, ind, last, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, sparse, contrib) +subroutine pull_dress_results(zmq_socket_pull, ind, last, delta_loc, delta_loc4, int_buf, double_buf, det_buf, N_buf, task_id, sparse, contrib) use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_pull @@ -232,8 +237,10 @@ subroutine pull_dress_results(zmq_socket_pull, ind, last, delta_loc, int_buf, do integer, intent(out) :: sparse(0:N_det) integer, intent(out) :: ind integer, intent(out) :: task_id - integer :: rc, i, sparsen + integer :: rc, i, j, k, sparsen integer, intent(out) :: N_buf(3) + real(kind=4), intent(out) :: delta_loc4(N_states, N_det, 2) + rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) @@ -254,11 +261,19 @@ subroutine pull_dress_results(zmq_socket_pull, ind, last, delta_loc, int_buf, do if(rc /= 4*sparse(0)) stop "pullc" - rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,1), N_states*8*sparse(0), 0) - if(rc /= 8*N_states*sparse(0)) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, delta_loc4(1,1,1), N_states*4*sparse(0), 0) + if(rc /= 4*N_states*sparse(0)) stop "pullc" - rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,2), N_states*8*sparse(0), 0) - if(rc /= 8*N_states*sparse(0)) stop "pulld" + rc = f77_zmq_recv( zmq_socket_pull, delta_loc4(1,1,2), N_states*4*sparse(0), 0) + if(rc /= 4*N_states*sparse(0)) stop "pulld" + + do j=1,2 + do i=1,sparse(0) + do k=1,N_states + delta_loc(k,i,j) = real(delta_loc4(k,i,j), kind=8) + end do + end do + end do else contrib = 0d0 end if From f61661a8326dbdca27b39f73c17b89baf95dbd2d Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 30 Apr 2018 13:25:58 +0200 Subject: [PATCH 17/29] OMP master --- plugins/dress_zmq/dress_stoch_routines.irp.f | 34 +++++---- plugins/dress_zmq/run_dress_slave.irp.f | 80 ++++++++++++++------ 2 files changed, 75 insertions(+), 39 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 3c47ecab..c1f64e7c 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -74,7 +74,7 @@ subroutine ZMQ_dress(E, dress, delta, delta_s2, relative_error) ipos=1 ntas = 0 do i=1,N_dress_jobs+1 - flushme = (i==N_dress_jobs+1 .or. block_i == size(block)) + flushme = (i==N_dress_jobs+1 .or. block_i == size(block) .or. block_i >=cur_tooth_reduce ) if(.not. flushme) flushme = (tooth_reduce(dress_jobs(i)) == 0 .or. tooth_reduce(dress_jobs(i)) /= cur_tooth_reduce) if(flushme .and. block_i > 0) then @@ -131,16 +131,16 @@ subroutine ZMQ_dress(E, dress, delta, delta_s2, relative_error) print *, irp_here, ': Failed in zmq_set_running' endif - !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc) & - !$OMP PRIVATE(i) - i = omp_get_thread_num() - if (i==0) then + !!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc) & + ! !$OMP PRIVATE(i) + !i = omp_get_thread_num() + !if (i==0) then call dress_collector(zmq_socket_pull,E, relative_error, delta, delta_s2, dress,& dress_stoch_istate) - else - call dress_slave_inproc(i) - endif - !$OMP END PARALLEL + !else + ! call dress_slave_inproc(i) + !endif + !!$OMP END PARALLEL call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'dress') print *, '========== ================= ================= =================' @@ -197,7 +197,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, integer :: total_computed integer :: delta_loc_cur, is, N_buf(3) double precision :: fac , wei - logical :: ok integer, allocatable :: int_buf(:) double precision, allocatable :: double_buf(:) integer(bit_kind), allocatable :: det_buf(:,:,:) @@ -275,23 +274,25 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, do j=1,N_cp !! optimizable fac = 0d0 - ok = .false. !fac(i) = cps(inds(i), j) / cps_N(j) * wei(i) * comb_step fac = cps(ind, j) * wei * comb_step if(fac /= 0) then + !$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i,is) do i=1,sparse(0) do is=1,N_states cp(is,sparse(i),j,1) += delta_loc(is,i,1) * fac end do end do + !$OMP END PARALLEL DO - + !$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i,is) do i=1,sparse(0) do is=1,N_states cp(is,sparse(i),j,2) += delta_loc(is,i,2) * fac end do end do + !$OMP END PARALLEL DO end if end do @@ -304,17 +305,21 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, if(fracted) fracted = (ind == first_det_of_teeth(toothMwen)) if(fracted .and. .false.) then + !$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i) do i=1,sparse(0) delta_det(1:N_states,sparse(i),toothMwen-1, 1) += delta_loc(1:N_states,i,1) * (1d0-fractage(toothMwen)) delta_det(1:N_states,sparse(i),toothMwen-1, 2) += delta_loc(1:N_states,i,2) * (1d0-fractage(toothMwen)) delta_det(1:N_states,sparse(i),toothMwen , 1) += delta_loc(1:N_states,i,1) * (fractage(toothMwen)) delta_det(1:N_states,sparse(i),toothMwen , 2) += delta_loc(1:N_states,i,2) * (fractage(toothMwen)) end do + !$OMP END PARALLEL DO else if(.false.) then + !$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i) do i=1,sparse(0) delta_det(1:N_states,sparse(i),toothMwen , 1) = delta_loc(1:N_states,i,1) delta_det(1:N_states,sparse(i),toothMwen , 2) = delta_loc(1:N_states,i,2) end do + !$OMP END PARALLEL DO end if parts_to_get(ind) -= 1 @@ -326,7 +331,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, time = omp_get_wtime() - if((time - timeLast > 2d0) .or. (.not. loop)) then + if((time - timeLast > 5d0) .or. (.not. loop)) then timeLast = time cur_cp = N_cp @@ -347,7 +352,8 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, su = 0d0 su2 = 0d0 - + !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i, val) SHARED(comb, dress_detail, & + !$OMP cur_cp,istate,cps_N) REDUCTION(+:su) REDUCTION(+:su2) do i=1, int(cps_N(cur_cp)) call get_comb_val(comb(i), dress_detail, cur_cp, val, istate) su += val diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index e57b8cf7..9b4a3863 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -150,28 +150,53 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do end do end do - rc = f77_zmq_send( zmq_socket_push, sparsei, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" - + if(sparsei /= 0) then - rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE) - if(rc /= 8*N_states) stop "push" - - rc = f77_zmq_send( zmq_socket_push, sparse, 4*sparsei, ZMQ_SNDMORE) - if(rc /= 4*sparsei) stop "push" - - - rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,1), 4*N_states*sparsei, ZMQ_SNDMORE) - if(rc /= 4*N_states*sparsei) stop "push" - - rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,2), 4*N_states*sparsei, ZMQ_SNDMORE) - if(rc /= 4*N_states*sparsei) stop "push" + if(sparsei < N_det / 2) then + rc = f77_zmq_send( zmq_socket_push, sparsei, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" - !do i=sparsei,1 - ! tmp(:,:) = delta_loc(:,i,:) - ! delta_loc(:,i,:) = 0d0 - ! delta_loc(:,sparse(i),:) = tmp(:,:) - !end do + rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE) + if(rc /= 8*N_states) stop "push" + + rc = f77_zmq_send( zmq_socket_push, sparse, 4*sparsei, ZMQ_SNDMORE) + if(rc /= 4*sparsei) stop "push" + + + rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,1), 4*N_states*sparsei, ZMQ_SNDMORE) + if(rc /= 4*N_states*sparsei) stop "push" + + rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,2), 4*N_states*sparsei, ZMQ_SNDMORE) + if(rc /= 4*N_states*sparsei) stop "push" + else + rc = f77_zmq_send( zmq_socket_push, -1, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" + + rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE) + if(rc /= 8*N_states) stop "push" + + do i=1,N_det + sparse(i) = i + do k=1,2 + do l=1,N_states + delta_loc4(l,i,k) = real(delta_loc(l,i,k), kind=4) + end do + end do + end do + + !rc = f77_zmq_send( zmq_socket_push, sparse, 4*sparsei, ZMQ_SNDMORE) + !if(rc /= 4*sparsei) stop "push" + + + rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,1), 4*N_states*N_det, ZMQ_SNDMORE) + if(rc /= 4*N_states*N_det) stop "push" + + rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,2), 4*N_states*N_det, ZMQ_SNDMORE) + if(rc /= 4*N_states*N_det) stop "push" + end if + else + rc = f77_zmq_send( zmq_socket_push, 0, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" end if @@ -187,7 +212,7 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do end if N_buf = N_bufi - N_buf = (/0, 1, 0/) + !N_buf = (/0, 1, 0/) rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) if(rc /= 4*3) stop "push5" @@ -256,10 +281,15 @@ subroutine pull_dress_results(zmq_socket_pull, ind, last, delta_loc, delta_loc4, rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0) if(rc /= 8*N_states) stop "pullc" - - rc = f77_zmq_recv( zmq_socket_pull, sparse(1), 4*sparse(0), 0) - if(rc /= 4*sparse(0)) stop "pullc" - + if(sparse(0) == -1) then + do i=1,N_det + sparse(i) = i + end do + sparse(0) = N_det + else + rc = f77_zmq_recv( zmq_socket_pull, sparse(1), 4*sparse(0), 0) + if(rc /= 4*sparse(0)) stop "pullc" + end if rc = f77_zmq_recv( zmq_socket_pull, delta_loc4(1,1,1), N_states*4*sparse(0), 0) if(rc /= 4*N_states*sparse(0)) stop "pullc" From c14fe5b99f4ae9bec09a9b78ad02040f5ba777f2 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 1 May 2018 13:16:10 +0200 Subject: [PATCH 18/29] per checkpoint dressing communication - buggy --- plugins/dress_zmq/dress_slave.irp.f | 10 +- plugins/dress_zmq/dress_stoch_routines.irp.f | 215 +++------- plugins/dress_zmq/run_dress_slave.irp.f | 415 ++++++++++--------- 3 files changed, 277 insertions(+), 363 deletions(-) diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index ff003a21..10453d2a 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -54,12 +54,12 @@ subroutine run_wf PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order - !$OMP PARALLEL PRIVATE(i) - i = omp_get_thread_num() - call dress_slave_tcp(i+1, energy) - !$OMP END PARALLEL + !!$OMP PARALLEL PRIVATE(i) + !i = omp_get_thread_num() +! call dress_slave_tcp(i+1, energy) + call dress_slave_tcp(0, energy) + !!$OMP END PARALLEL print *, 'dress done' - endif end do diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index c1f64e7c..ad58aa5c 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -66,7 +66,7 @@ subroutine ZMQ_dress(E, dress, delta, delta_s2, relative_error) integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket integer :: ipos, sz - integer :: block(8), block_i, cur_tooth_reduce, ntas + integer :: block(1), block_i, cur_tooth_reduce, ntas logical :: flushme block = 0 block_i = 0 @@ -176,8 +176,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, double precision, intent(out) :: delta(N_states, N_det) double precision, intent(out) :: delta_s2(N_states, N_det) - double precision, allocatable :: delta_loc(:,:,:), delta_det(:,:,:,:) - real, allocatable :: delta_loc4(:,:,:) + double precision, allocatable :: delta_loc(:,:,:) double precision, allocatable :: dress_detail(:,:) double precision :: dress_mwen(N_states) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket @@ -189,164 +188,78 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, integer :: i, j, k, i_state, N integer :: task_id, ind double precision, save :: time0 = -1.d0 - double precision :: time, timeLast, old_tooth + double precision :: time double precision, external :: omp_get_wtime - integer :: cur_cp, old_cur_cp - integer, allocatable :: parts_to_get(:) - logical, allocatable :: actually_computed(:) - integer :: total_computed + integer :: cur_cp integer :: delta_loc_cur, is, N_buf(3) - double precision :: fac , wei - integer, allocatable :: int_buf(:) + integer, allocatable :: int_buf(:), agreg_for_cp(:) double precision, allocatable :: double_buf(:) integer(bit_kind), allocatable :: det_buf(:,:,:) - + integer, external :: zmq_delete_tasks + + allocate(agreg_for_cp(N_cp)) + agreg_for_cp = 0 allocate(int_buf(N_dress_int_buffer), double_buf(N_dress_double_buffer), det_buf(N_int,2,N_dress_det_buffer)) delta_loc_cur = 1 delta = 0d0 delta_s2 = 0d0 - allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2)) allocate(cp(N_states, N_det, N_cp, 2), dress_detail(N_states, N_det)) allocate(delta_loc(N_states, N_det, 2)) - allocate(delta_loc4(N_states, N_det, 2)) - dress_detail = 0d0 - delta_det = 0d0 + dress_detail = -1000d0 cp = 0d0 - total_computed = 0 character*(512) :: task - - allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators)) - - - parts_to_get(:) = 1 - if(fragment_first > 0) then - do i=1,fragment_first - parts_to_get(i) = fragment_count - enddo - endif - - actually_computed = .false. zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() more = 1 if (time0 < 0.d0) then call wall_time(time0) endif - timeLast = time0 - cur_cp = 0 - old_cur_cp = 0 - logical :: loop, last, floop - integer, allocatable :: sparse(:) - allocate(sparse(0:N_det)) + logical :: loop, floop + integer :: finalcp + finalcp = N_cp*2 + floop = .true. loop = .true. pullLoop : do while (loop) - call pull_dress_results(zmq_socket_pull, ind, last, delta_loc, delta_loc4, int_buf, double_buf, det_buf, N_buf, task_id, sparse, dress_mwen) - call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) + call pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, dress_mwen) if(floop) then call wall_time(time) print *, "FIRST PULL", time-time0 floop = .false. end if - - integer, external :: zmq_delete_tasks - if(last) then + if(cur_cp == -1) then + call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then - stop 'Unable to delete tasks' + stop 'Unable to delete tasks' endif - if(more == 0) loop = .false. - end if - - !dress_mwen = 0d0 - - !do i_state=1,N_states - ! do i=1,sparse(0) - ! dress_mwen(i_state) += delta_loc(i_state, i, 1) * psi_coef(sparse(i), i_state) - ! end do - !end do + !if(more == 0) stop 'loop = .false.' !!!!!!!!!!!!!!!! + dress_detail(:, ind) = dress_mwen(:) + else if(cur_cp > 0) then - dress_detail(:, ind) += dress_mwen(:) - wei = dress_weight_inv(ind) - - do j=1,N_cp !! optimizable - fac = 0d0 - !fac(i) = cps(inds(i), j) / cps_N(j) * wei(i) * comb_step - fac = cps(ind, j) * wei * comb_step + if(ind == 0) cycle - if(fac /= 0) then - !$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i,is) - do i=1,sparse(0) - do is=1,N_states - cp(is,sparse(i),j,1) += delta_loc(is,i,1) * fac - end do - end do - !$OMP END PARALLEL DO - - !$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i,is) - do i=1,sparse(0) - do is=1,N_states - cp(is,sparse(i),j,2) += delta_loc(is,i,2) * fac - end do - end do - !$OMP END PARALLEL DO - end if - end do - - ! do i=1,delta_loc_cur - logical :: fracted - integer :: toothMwen - - toothMwen = tooth_of_det(ind) - fracted = (toothMwen /= 0) - if(fracted) fracted = (ind == first_det_of_teeth(toothMwen)) - - if(fracted .and. .false.) then - !$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i) - do i=1,sparse(0) - delta_det(1:N_states,sparse(i),toothMwen-1, 1) += delta_loc(1:N_states,i,1) * (1d0-fractage(toothMwen)) - delta_det(1:N_states,sparse(i),toothMwen-1, 2) += delta_loc(1:N_states,i,2) * (1d0-fractage(toothMwen)) - delta_det(1:N_states,sparse(i),toothMwen , 1) += delta_loc(1:N_states,i,1) * (fractage(toothMwen)) - delta_det(1:N_states,sparse(i),toothMwen , 2) += delta_loc(1:N_states,i,2) * (fractage(toothMwen)) - end do - !$OMP END PARALLEL DO - else if(.false.) then - !$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i) - do i=1,sparse(0) - delta_det(1:N_states,sparse(i),toothMwen , 1) = delta_loc(1:N_states,i,1) - delta_det(1:N_states,sparse(i),toothMwen , 2) = delta_loc(1:N_states,i,2) - end do - !$OMP END PARALLEL DO - end if - - parts_to_get(ind) -= 1 - if(parts_to_get(ind) == 0) then - actually_computed(ind) = .true. - total_computed += 1 - end if - !end do - - time = omp_get_wtime() - - if((time - timeLast > 5d0) .or. (.not. loop)) then - timeLast = time - cur_cp = N_cp - - do i=1,N_det_generators - if(.not. actually_computed(dress_jobs(i))) then - if(i /= 1) then - cur_cp = done_cp_at(i-1) - else - cur_cp = 0 - end if - exit - end if + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + do i=1,N_det + cp(:,i,cur_cp,1) += delta_loc(:,i,1) end do - if(cur_cp == 0 .or. (cur_cp == old_cur_cp .and. total_computed /= N_det_generators)) cycle pullLoop - + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + do i=1,N_det + cp(:,i,cur_cp,2) += delta_loc(:,i,2) + end do + + agreg_for_cp(cur_cp) += ind + if(agreg_for_cp(cur_cp) > needed_by_cp(cur_cp)) then + stop "too much results..." + end if + if(agreg_for_cp(cur_cp) /= needed_by_cp(cur_cp)) cycle + + print *, "FINISHED CP", cur_cp + double precision :: su, su2, eqt, avg, E0, val integer, external :: zmq_abort @@ -359,6 +272,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, su += val su2 += val*val end do + avg = su / cps_N(cur_cp) eqt = dsqrt( ((su2 / cps_N(cur_cp)) - avg*avg) / cps_N(cur_cp) ) E0 = sum(dress_detail(istate, :first_det_of_teeth(cp_first_tooth(cur_cp))-1)) @@ -366,47 +280,29 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, E0 = E0 + dress_detail(istate, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp))) end if - call wall_time(time) - if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. total_computed == N_det_generators) then + + print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' + if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. cur_cp == N_cp) then ! Termination - print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' + print *, "TERMINATE" if (zmq_abort(zmq_to_qp_run_socket) == -1) then call sleep(1) if (zmq_abort(zmq_to_qp_run_socket) == -1) then print *, irp_here, ': Error in sending abort signal (2)' endif - endif - else - if (cur_cp > old_cur_cp) then - old_cur_cp = cur_cp - print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' - endif + endif + !exit pullLoop endif end if end do pullLoop + print *, "exited" + + + delta(:,:) = cp(:,:,cur_cp,1) + delta_s2(:,:) = cp(:,:,cur_cp,2) - delta (1:N_states,1:N_det) = 0d0 - delta_s2(1:N_states,1:N_det) = 0d0 - if(total_computed == N_det_generators) then - do i=comb_teeth+1,0,-1 - delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1) - delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2) - end do - else - do i=1,cur_cp - delta (1:N_states,1:N_det) += cp(1:N_states,1:N_det,i,1) - delta_s2(1:N_states,1:N_det) += cp(1:N_states,1:N_det,i,2) - end do - delta (1:N_states,1:N_det) = delta(1:N_states,1:N_det) / cps_N(cur_cp) - delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) / cps_N(cur_cp) - do i=cp_first_tooth(cur_cp)-1,0,-1 - delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1) - delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2) - end do - - end if dress(istate) = E(istate)+E0 call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) end subroutine @@ -458,6 +354,8 @@ END_PROVIDER &BEGIN_PROVIDER [ double precision, cps_N, (N_cps_max) ] &BEGIN_PROVIDER [ integer, cp_first_tooth, (N_cps_max) ] &BEGIN_PROVIDER [ integer, done_cp_at, (N_det_generators) ] +&BEGIN_PROVIDER [ integer, done_cp_at_det, (N_det_generators) ] +&BEGIN_PROVIDER [ integer, needed_by_cp, (0:N_cps_max) ] &BEGIN_PROVIDER [ double precision, cps, (N_det_generators, N_cps_max) ] &BEGIN_PROVIDER [ integer, N_dress_jobs ] &BEGIN_PROVIDER [ integer, dress_jobs, (N_det_generators) ] @@ -486,6 +384,8 @@ END_PROVIDER cps = 0d0 cur_cp = 1 done_cp_at = 0 + done_cp_at_det = 0 + needed_by_cp = 0 comp_filler = .false. computed = .false. cps_N = 1d0 @@ -506,6 +406,7 @@ END_PROVIDER end do l=first_det_of_comb + call random_seed(put=(/321,654,65,321,65/)) call RANDOM_NUMBER(comb) lfiller = 1 nfiller = 1 @@ -574,6 +475,8 @@ END_PROVIDER do i=1,N_dress_jobs if(done_cp_at(i) /= 0) cur_cp = done_cp_at(i) done_cp_at(i) = cur_cp + done_cp_at_det(dress_jobs(i)) = cur_cp + needed_by_cp(cur_cp) += 1 end do @@ -625,7 +528,7 @@ END_PROVIDER end do do i=1,N_cp-1 - call isort(dress_jobs(first_cp(i)+1:first_cp(i+1)),iorder,first_cp(i+1)-first_cp(i)) + call isort(dress_jobs(first_cp(i)+1),iorder,first_cp(i+1)-first_cp(i)-1) end do do i=1,N_det_generators diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 9b4a3863..248b7d34 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -9,13 +9,13 @@ BEGIN_PROVIDER [ integer, fragment_count ] END_PROVIDER -subroutine run_dress_slave(thread,iproc,energy) +subroutine run_dress_slave(thread,iproce,energy) use f77_zmq implicit none double precision, intent(in) :: energy(N_states_diag) - integer, intent(in) :: thread, iproc - integer :: rc, i, subset, i_generator(60) + integer, intent(in) :: thread, iproce + integer :: rc, i, subset, i_generator integer :: worker_id, task_id, ctask, ltask character*(5120) :: task @@ -41,13 +41,24 @@ subroutine run_dress_slave(thread,iproc,energy) integer(bit_kind), allocatable :: det_buf(:,:,:) integer :: N_buf(3) logical :: last + integer, external :: omp_get_thread_num + double precision, allocatable :: delta_det(:,:,:,:), cp(:,:,:,:) + integer :: toothMwen + logical :: fracted + double precision :: fac + + + + if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" + + allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2)) + allocate(cp(N_states, N_det, N_cp, 2)) + delta_det = 0d9 + cp = 0d0 + task(:) = CHAR(0) - allocate(int_buf(N_dress_int_buffer)) - allocate(double_buf(N_dress_double_buffer)) - allocate(det_buf(N_int, 2, N_dress_det_buffer)) - allocate(delta_ij_loc(N_states,N_det,2)) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_push = new_zmq_push_socket(thread) @@ -61,48 +72,139 @@ subroutine run_dress_slave(thread,iproc,energy) do i=1,N_states div(i) = psi_coef(dressed_column_idx(i), i) end do - do - call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) - if(task_id /= 0) then - task = trim(task)//' 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' + + integer :: iproc, cur_cp, done_for(0:N_cp) + integer, allocatable :: tasks(:) + logical :: loop, donedone + integer :: res_task(Nproc), res_gen(Nproc), res_sub(Nproc) + res_gen = 0 + + donedone = .false. + allocate(tasks(0:N_det)) + done_for = 0 + + do cur_cp=0, N_cp + if(donedone) exit + print *, "DOING CP", cur_cp + tasks(0) = 0 + !$OMP PARALLEL DEFAULT(SHARED) & + !$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) & + !$OMP PRIVATE(toothMwen, fracted, fac) & + !$OMP PRIVATE(loop, i_generator, subset, iproc, N_buf) + iproc = omp_get_thread_num()+1 + loop = .true. + allocate(int_buf(N_dress_int_buffer)) + allocate(double_buf(N_dress_double_buffer)) + allocate(det_buf(N_int, 2, N_dress_det_buffer)) + allocate(delta_ij_loc(N_states,N_det,2)) + do while(loop) + if(res_gen(iproc) == 0) then + !$OMP CRITICAL + call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) + !$OMP END CRITICAL + task = task//" 0" + if(task_id == 0) then + donedone = .true. + print *, "DONEDONE" + exit !! LAST MESSAGE ??? + end if + read (task,*) subset, i_generator + else + subset = res_sub(iproc) + i_generator = res_gen(iproc) + task_id = res_task(iproc) + res_gen(iproc) = 0 + end if - i_generator = 0 - read (task,*) subset, i_generator - if(i_generator(size(i_generator)) /= 0) stop "i_generator buffer too small" - delta_ij_loc = 0d0 - i=1 - do while(i_generator(i) /= 0) - call generator_start(i_generator(i), iproc) - call alpha_callback(delta_ij_loc, i_generator(i), subset, iproc) - call generator_done(i_generator(i), int_buf, double_buf, det_buf, N_buf, iproc) - last = (i_generator(i+1) == 0) - call push_dress_results(zmq_socket_push, i_generator(i), last, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id) - i += 1 + !if(done_cp_at_det(i_generator) > cur_cp) loop = .false. + if(done_cp_at_det(i_generator) > cur_cp) then + res_gen(iproc) = i_generator + res_task(iproc) = task_id + res_sub(iproc) = subset + exit + end if + + !$OMP ATOMIC + done_for(done_cp_at_det(i_generator)) += 1 + + delta_ij_loc(:,:,:) = 0d0 + call generator_start(i_generator, iproc) + call alpha_callback(delta_ij_loc, i_generator, subset, iproc) + call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc) + + !if(.false.) then + !$OMP CRITICAL + do i=1,N_cp + fac = cps(i_generator, i) * dress_weight_inv(i_generator) * comb_step + if(fac == 0d0) cycle + cp(:,:,i,1) += (delta_ij_loc(:,:,1) * fac) + cp(:,:,i,2) += (delta_ij_loc(:,:,2) * fac) end do + + + toothMwen = tooth_of_det(i_generator) + fracted = (toothMwen /= 0) + if(fracted) fracted = (i_generator == first_det_of_teeth(toothMwen)) + if(fracted) then + delta_det(:,:,toothMwen-1, 1) += delta_ij_loc(:,:,1) * (1d0-fractage(toothMwen)) + delta_det(:,:,toothMwen-1, 2) += delta_ij_loc(:,:,2) * (1d0-fractage(toothMwen)) + delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) * (fractage(toothMwen)) + delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) * (fractage(toothMwen)) + else + delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) + delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) + end if + + + !$OMP END CRITICAL + !end if + + !$OMP CRITICAL + call push_dress_results(zmq_socket_push, i_generator, -1, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) - else - exit + !$OMP END CRITICAL + tasks(0) += 1 + tasks(tasks(0)) = task_id + + end do + print *, "SLAVE", iproc, "waits" + deallocate(int_buf,double_buf,det_buf,delta_ij_loc) + !$OMP END PARALLEL + + allocate(delta_ij_loc(N_states,N_det,2)) + allocate(int_buf(1), double_buf(1), det_buf(1,1,1)) + N_buf = (/0,1,0/) + + delta_ij_loc = 0d0 + + if(cur_cp > 0) then + do i=1,cur_cp + delta_ij_loc(:,:,:) += cp(:,:,i,:) + !delta_s2(:,:) += cp(:,:,i,2) + end do + + delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) / cps_N(cur_cp) + do i=cp_first_tooth(cur_cp)-1,0,-1 + delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) +delta_det(:,:,i,:) + end do end if + call sleep(1) + call push_dress_results(zmq_socket_push, done_for(cur_cp), cur_cp, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, -1) + !do i=1,tasks(0) + ! call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,tasks(i)) + !end do + deallocate(delta_ij_loc, int_buf, double_buf, det_buf) end do + + call sleep(10) call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) end subroutine -! BEGIN_PROVIDER [ integer, dress_int_buffer, (N_dress_int_buffer) ] -!&BEGIN_PROVIDER [ double precision, dress_double_buffer, (N_dress_double_buffer) ] -!&BEGIN_PROVIDER [ integer(bit_kind), dress_det_buffer, (N_int, 2, N_dress_det_buffer) ] -! implicit none -! -! dress_int_buffer = 0 -! dress_double_buffer = 0d0 - ! dress_det_buffer = 0_bit_kind -!END_PROVIDER - -!subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, felem) -subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id) +subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id) use f77_zmq implicit none @@ -110,135 +212,68 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do double precision, intent(inout) :: delta_loc(N_states, N_det, 2) real(kind=4), allocatable :: delta_loc4(:,:,:) double precision, intent(in) :: double_buf(*) - logical, intent(in) :: last integer, intent(in) :: int_buf(*) integer(bit_kind), intent(in) :: det_buf(N_int, 2, *) integer, intent(in) :: N_bufi(3) integer :: N_buf(3) - integer, intent(in) :: ind, task_id + integer, intent(in) :: ind, cur_cp, task_id integer :: rc, i, j, k, l - double precision :: tmp(N_states,2) - integer, allocatable :: sparse(:) - integer :: sparsei double precision :: contrib(N_states) - contrib = 0d0 - allocate(sparse(N_det)) - allocate(delta_loc4(N_states, N_det, 2)) + rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" - rc = f77_zmq_send( zmq_socket_push, last, 1, ZMQ_SNDMORE) - if(rc /= 1) stop "push" + rc = f77_zmq_send( zmq_socket_push, cur_cp, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" - if(last) then - sparsei = 0 + if(cur_cp /= -1) then + rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,1), 8*N_states*N_det, ZMQ_SNDMORE) + if(rc /= 8*N_states*N_det) stop "push" + + rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,2), 8*N_states*N_det, ZMQ_SNDMORE) + if(rc /= 8*N_states*N_det) stop "push" + else + contrib = 0d0 + do i=1,N_det - do j=1,N_states - if(delta_loc(j,i,1) /= 0d0 .or. delta_loc(j,i,2) /= 0d0) then - sparsei += 1 - sparse(sparsei) = i - do k=1,2 - do l=1,N_states - delta_loc4(l,sparsei,k) = real(delta_loc(l,i,k), kind=4) - end do - end do - contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :) - end if - end do + contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :) end do - - if(sparsei /= 0) then - if(sparsei < N_det / 2) then - rc = f77_zmq_send( zmq_socket_push, sparsei, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" + rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE) + if(rc /= 8*N_states) stop "push" - rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE) - if(rc /= 8*N_states) stop "push" - - rc = f77_zmq_send( zmq_socket_push, sparse, 4*sparsei, ZMQ_SNDMORE) - if(rc /= 4*sparsei) stop "push" + N_buf = N_bufi + N_buf = (/0,1,0/) - - rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,1), 4*N_states*sparsei, ZMQ_SNDMORE) - if(rc /= 4*N_states*sparsei) stop "push" - - rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,2), 4*N_states*sparsei, ZMQ_SNDMORE) - if(rc /= 4*N_states*sparsei) stop "push" - else - rc = f77_zmq_send( zmq_socket_push, -1, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" + rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) + if(rc /= 4*3) stop "push5" - rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE) - if(rc /= 8*N_states) stop "push" - - do i=1,N_det - sparse(i) = i - do k=1,2 - do l=1,N_states - delta_loc4(l,i,k) = real(delta_loc(l,i,k), kind=4) - end do - end do - end do + if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" + if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" + if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" - !rc = f77_zmq_send( zmq_socket_push, sparse, 4*sparsei, ZMQ_SNDMORE) - !if(rc /= 4*sparsei) stop "push" - - - rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,1), 4*N_states*N_det, ZMQ_SNDMORE) - if(rc /= 4*N_states*N_det) stop "push" - - rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,2), 4*N_states*N_det, ZMQ_SNDMORE) - if(rc /= 4*N_states*N_det) stop "push" - end if - else - rc = f77_zmq_send( zmq_socket_push, 0, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" + + if(N_buf(1) > 0) then + rc = f77_zmq_send( zmq_socket_push, int_buf, 4*N_buf(1), ZMQ_SNDMORE) + if(rc /= 4*N_buf(1)) stop "push6" + end if + + if(N_buf(2) > 0) then + rc = f77_zmq_send( zmq_socket_push, double_buf, 8*N_buf(2), ZMQ_SNDMORE) + if(rc /= 8*N_buf(2)) stop "push8" end if - - else - rc = f77_zmq_send( zmq_socket_push, 0, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" - - !rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE) - !if(rc /= 8*N_states) stop "push" + if(N_buf(3) > 0) then + rc = f77_zmq_send( zmq_socket_push, det_buf, 2*N_int*bit_kind*N_buf(3), ZMQ_SNDMORE) + if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "push10" + end if - !rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE) - !if(rc /= 8*N_states) stop "push" + rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) + if(rc /= 4) stop "push11" end if - - N_buf = N_bufi - !N_buf = (/0, 1, 0/) - - rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) - if(rc /= 4*3) stop "push5" - - if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" - - - if(N_buf(1) > 0) then - rc = f77_zmq_send( zmq_socket_push, int_buf, 4*N_buf(1), ZMQ_SNDMORE) - if(rc /= 4*N_buf(1)) stop "push6" - end if - - if(N_buf(2) > 0) then - rc = f77_zmq_send( zmq_socket_push, double_buf, 8*N_buf(2), ZMQ_SNDMORE) - if(rc /= 8*N_buf(2)) stop "push8" - end if - - if(N_buf(3) > 0) then - rc = f77_zmq_send( zmq_socket_push, det_buf, 2*N_int*bit_kind*N_buf(3), ZMQ_SNDMORE) - if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "push10" - end if - - rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) - if(rc /= 4) stop "push11" ! Activate is zmq_socket_push is a REQ IRP_IF ZMQ_PUSH @@ -250,90 +285,66 @@ IRP_ENDIF end subroutine -subroutine pull_dress_results(zmq_socket_pull, ind, last, delta_loc, delta_loc4, int_buf, double_buf, det_buf, N_buf, task_id, sparse, contrib) +subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, contrib) use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - logical, intent(out) :: last + integer, intent(out) :: cur_cp double precision, intent(inout) :: delta_loc(N_states, N_det, 2) double precision, intent(out) :: double_buf(*), contrib(N_states) integer, intent(out) :: int_buf(*) integer(bit_kind), intent(out) :: det_buf(N_int, 2, *) - integer, intent(out) :: sparse(0:N_det) integer, intent(out) :: ind integer, intent(out) :: task_id - integer :: rc, i, j, k, sparsen + integer :: rc, i, j, k integer, intent(out) :: N_buf(3) - real(kind=4), intent(out) :: delta_loc4(N_states, N_det, 2) rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) if(rc /= 4) stop "pulla" - rc = f77_zmq_recv( zmq_socket_pull, last, 1, 0) - if(rc /= 1) stop "pulla" + rc = f77_zmq_recv( zmq_socket_pull, cur_cp, 4, 0) + if(rc /= 4) stop "pulla" - rc = f77_zmq_recv( zmq_socket_pull, sparse(0), 4, 0) - if(rc /= 4) stop "pullb" - if(sparse(0) /= 0) then + + + if(cur_cp /= -1) then + rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,1), N_states*8*N_det, 0) + if(rc /= 8*N_states*N_det) stop "pullc" + + rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,2), N_states*8*N_det, 0) + if(rc /= 8*N_states*N_det) stop "pulld" + else rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0) if(rc /= 8*N_states) stop "pullc" - if(sparse(0) == -1) then - do i=1,N_det - sparse(i) = i - end do - sparse(0) = N_det - else - rc = f77_zmq_recv( zmq_socket_pull, sparse(1), 4*sparse(0), 0) - if(rc /= 4*sparse(0)) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0) + if(rc /= 4*3) stop "pull" + if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" + if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" + if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" + + + if(N_buf(1) > 0) then + rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0) + if(rc /= 4*N_buf(1)) stop "pull1" + end if + + if(N_buf(2) > 0) then + rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0) + if(rc /= 8*N_buf(2)) stop "pull2" + end if + + if(N_buf(3) > 0) then + rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0) + if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3" end if - - rc = f77_zmq_recv( zmq_socket_pull, delta_loc4(1,1,1), N_states*4*sparse(0), 0) - if(rc /= 4*N_states*sparse(0)) stop "pullc" - - rc = f77_zmq_recv( zmq_socket_pull, delta_loc4(1,1,2), N_states*4*sparse(0), 0) - if(rc /= 4*N_states*sparse(0)) stop "pulld" - do j=1,2 - do i=1,sparse(0) - do k=1,N_states - delta_loc(k,i,j) = real(delta_loc4(k,i,j), kind=8) - end do - end do - end do - else - contrib = 0d0 + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) + if(rc /= 4) stop "pull4" end if - - - rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0) - if(rc /= 4*3) stop "pull" - if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" - - - if(N_buf(1) > 0) then - rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0) - if(rc /= 4*N_buf(1)) stop "pull1" - end if - - if(N_buf(2) > 0) then - rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0) - if(rc /= 8*N_buf(2)) stop "pull2" - end if - - if(N_buf(3) > 0) then - rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0) - if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3" - end if - - rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) - if(rc /= 4) stop "pull4" - ! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH IRP_ELSE From c2343ae337e1a81f7206283362d267f289147549 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 1 May 2018 15:08:41 +0200 Subject: [PATCH 19/29] removed barrier at end of checkpoint --- plugins/dress_zmq/dress_stoch_routines.irp.f | 2 + plugins/dress_zmq/run_dress_slave.irp.f | 203 +++++++++---------- 2 files changed, 97 insertions(+), 108 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index ad58aa5c..d26a4d8c 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -232,8 +232,10 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, if(cur_cp == -1) then + !print *, "TASK DEL", task_id call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then + print *, "TASK ID", task_id stop 'Unable to delete tasks' endif !if(more == 0) stop 'loop = .false.' !!!!!!!!!!!!!!!! diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 248b7d34..84d9af6c 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -75,126 +75,113 @@ subroutine run_dress_slave(thread,iproce,energy) integer :: iproc, cur_cp, done_for(0:N_cp) integer, allocatable :: tasks(:) - logical :: loop, donedone - integer :: res_task(Nproc), res_gen(Nproc), res_sub(Nproc) - res_gen = 0 - - donedone = .false. - allocate(tasks(0:N_det)) + integer :: lastCp(Nproc) + integer :: lastSent, lastSendable + logical :: send + lastCp = 0 + lastSent = 0 + send = .false. done_for = 0 - do cur_cp=0, N_cp - if(donedone) exit - print *, "DOING CP", cur_cp - tasks(0) = 0 - !$OMP PARALLEL DEFAULT(SHARED) & - !$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) & - !$OMP PRIVATE(toothMwen, fracted, fac) & - !$OMP PRIVATE(loop, i_generator, subset, iproc, N_buf) - iproc = omp_get_thread_num()+1 - loop = .true. - allocate(int_buf(N_dress_int_buffer)) - allocate(double_buf(N_dress_double_buffer)) - allocate(det_buf(N_int, 2, N_dress_det_buffer)) - allocate(delta_ij_loc(N_states,N_det,2)) - do while(loop) - if(res_gen(iproc) == 0) then - !$OMP CRITICAL - call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) - !$OMP END CRITICAL - task = task//" 0" - if(task_id == 0) then - donedone = .true. - print *, "DONEDONE" - exit !! LAST MESSAGE ??? - end if - read (task,*) subset, i_generator - else - subset = res_sub(iproc) - i_generator = res_gen(iproc) - task_id = res_task(iproc) - res_gen(iproc) = 0 - end if - - !if(done_cp_at_det(i_generator) > cur_cp) loop = .false. - if(done_cp_at_det(i_generator) > cur_cp) then - res_gen(iproc) = i_generator - res_task(iproc) = task_id - res_sub(iproc) = subset - exit - end if - - !$OMP ATOMIC - done_for(done_cp_at_det(i_generator)) += 1 - - delta_ij_loc(:,:,:) = 0d0 - call generator_start(i_generator, iproc) - call alpha_callback(delta_ij_loc, i_generator, subset, iproc) - call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc) - - !if(.false.) then - !$OMP CRITICAL - do i=1,N_cp - fac = cps(i_generator, i) * dress_weight_inv(i_generator) * comb_step - if(fac == 0d0) cycle - cp(:,:,i,1) += (delta_ij_loc(:,:,1) * fac) - cp(:,:,i,2) += (delta_ij_loc(:,:,2) * fac) - end do - - - toothMwen = tooth_of_det(i_generator) - fracted = (toothMwen /= 0) - if(fracted) fracted = (i_generator == first_det_of_teeth(toothMwen)) - if(fracted) then - delta_det(:,:,toothMwen-1, 1) += delta_ij_loc(:,:,1) * (1d0-fractage(toothMwen)) - delta_det(:,:,toothMwen-1, 2) += delta_ij_loc(:,:,2) * (1d0-fractage(toothMwen)) - delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) * (fractage(toothMwen)) - delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) * (fractage(toothMwen)) - else - delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) - delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) - end if - - - !$OMP END CRITICAL - !end if - - !$OMP CRITICAL - call push_dress_results(zmq_socket_push, i_generator, -1, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id) - call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) - !$OMP END CRITICAL - tasks(0) += 1 - tasks(tasks(0)) = task_id - + !$OMP PARALLEL DEFAULT(SHARED) & + !$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) & + !$OMP PRIVATE(toothMwen, fracted, fac) & + !$OMP PRIVATE(send, i_generator, subset, iproc, N_buf) + iproc = omp_get_thread_num()+1 + allocate(int_buf(N_dress_int_buffer)) + allocate(double_buf(N_dress_double_buffer)) + allocate(det_buf(N_int, 2, N_dress_det_buffer)) + allocate(delta_ij_loc(N_states,N_det,2)) + do + !$OMP CRITICAL (SENDAGE) + call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) + !$OMP END CRITICAL (SENDAGE) + task = task//" 0" + if(task_id == 0) then + print *, "DONEDONE" + exit !! LAST MESSAGE ??? + end if + read (task,*) subset, i_generator + + + if(done_cp_at_det(i_generator) < lastCp(iproc)) stop 'loop = .false.' + !$OMP CRITICAL + send = .false. + lastSendable = N_cp*2 + do i=1,Nproc + lastSendable = min(lastCp(iproc), lastSendable) end do - print *, "SLAVE", iproc, "waits" - deallocate(int_buf,double_buf,det_buf,delta_ij_loc) - !$OMP END PARALLEL - - allocate(delta_ij_loc(N_states,N_det,2)) - allocate(int_buf(1), double_buf(1), det_buf(1,1,1)) - N_buf = (/0,1,0/) - - delta_ij_loc = 0d0 - - if(cur_cp > 0) then + lastSendable -= 1 + if(lastSendable > lastSent) then + lastSent = lastSendable + send = .true. + end if + !$OMP END CRITICAL + + if(send) then + !$OMP CRITICAL + N_buf = (/0,1,0/) + + delta_ij_loc = 0d0 + cur_cp = lastSent + if(cur_cp < 1) stop "cur_cp < 1" do i=1,cur_cp delta_ij_loc(:,:,:) += cp(:,:,i,:) - !delta_s2(:,:) += cp(:,:,i,2) end do delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) / cps_N(cur_cp) do i=cp_first_tooth(cur_cp)-1,0,-1 - delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) +delta_det(:,:,i,:) + delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) +delta_det(:,:,i,:) end do + !$OMP END CRITICAL + !$OMP CRITICAL (SENDAGE) + call push_dress_results(zmq_socket_push, done_for(cur_cp), cur_cp, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, -1) + !$OMP END CRITICAL (SENDAGE) end if - call sleep(1) - call push_dress_results(zmq_socket_push, done_for(cur_cp), cur_cp, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, -1) - !do i=1,tasks(0) - ! call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,tasks(i)) - !end do - deallocate(delta_ij_loc, int_buf, double_buf, det_buf) + + + !$OMP ATOMIC + done_for(done_cp_at_det(i_generator)) += 1 + + delta_ij_loc(:,:,:) = 0d0 + call generator_start(i_generator, iproc) + call alpha_callback(delta_ij_loc, i_generator, subset, iproc) + call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc) + + !if(.false.) then + !$OMP CRITICAL + do i=1,N_cp + fac = cps(i_generator, i) * dress_weight_inv(i_generator) * comb_step + if(fac == 0d0) cycle + cp(:,:,i,1) += (delta_ij_loc(:,:,1) * fac) + cp(:,:,i,2) += (delta_ij_loc(:,:,2) * fac) + end do + + + toothMwen = tooth_of_det(i_generator) + fracted = (toothMwen /= 0) + if(fracted) fracted = (i_generator == first_det_of_teeth(toothMwen)) + if(fracted) then + delta_det(:,:,toothMwen-1, 1) += delta_ij_loc(:,:,1) * (1d0-fractage(toothMwen)) + delta_det(:,:,toothMwen-1, 2) += delta_ij_loc(:,:,2) * (1d0-fractage(toothMwen)) + delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) * (fractage(toothMwen)) + delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) * (fractage(toothMwen)) + else + delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) + delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) + end if + + + !$OMP END CRITICAL + !end if + + !$OMP CRITICAL (SENDAGE) + call push_dress_results(zmq_socket_push, i_generator, -1, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id) + call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) + !$OMP END CRITICAL (SENDAGE) + lastCp(iproc) = done_cp_at_det(i_generator) end do + !$OMP END PARALLEL call sleep(10) call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id) From 727c9a84cd04a1aaa631406ff04945570843a1b4 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 1 May 2018 17:43:46 +0200 Subject: [PATCH 20/29] improved synchronization --- plugins/dress_zmq/dress_stoch_routines.irp.f | 4 +- plugins/dress_zmq/run_dress_slave.irp.f | 94 ++++++++++++-------- 2 files changed, 60 insertions(+), 38 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index d26a4d8c..25bec079 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -285,7 +285,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, call wall_time(time) print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' - if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. cur_cp == N_cp) then + if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. cur_cp == N_cp-4) then ! Termination print *, "TERMINATE" if (zmq_abort(zmq_to_qp_run_socket) == -1) then @@ -294,7 +294,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, print *, irp_here, ': Error in sending abort signal (2)' endif endif - !exit pullLoop + exit pullLoop endif end if end do pullLoop diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 84d9af6c..c38b2c90 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -11,6 +11,7 @@ END_PROVIDER subroutine run_dress_slave(thread,iproce,energy) use f77_zmq + use omp_lib implicit none double precision, intent(in) :: energy(N_states_diag) @@ -32,7 +33,6 @@ subroutine run_dress_slave(thread,iproce,energy) integer :: ind double precision,allocatable :: delta_ij_loc(:,:,:) - double precision :: div(N_states) integer :: h,p,n,i_state logical :: ok @@ -41,7 +41,7 @@ subroutine run_dress_slave(thread,iproce,energy) integer(bit_kind), allocatable :: det_buf(:,:,:) integer :: N_buf(3) logical :: last - integer, external :: omp_get_thread_num + !integer, external :: omp_get_thread_num double precision, allocatable :: delta_det(:,:,:,:), cp(:,:,:,:) integer :: toothMwen logical :: fracted @@ -60,24 +60,22 @@ subroutine run_dress_slave(thread,iproce,energy) task(:) = CHAR(0) - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - zmq_socket_push = new_zmq_push_socket(thread) - call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) - if(worker_id == -1) then - print *, "WORKER -1" - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - call end_zmq_push_socket(zmq_socket_push,thread) - return - end if - do i=1,N_states - div(i) = psi_coef(dressed_column_idx(i), i) - end do - + integer :: iproc, cur_cp, done_for(0:N_cp) integer, allocatable :: tasks(:) integer :: lastCp(Nproc) integer :: lastSent, lastSendable logical :: send + integer(kind=OMP_LOCK_KIND) :: lck_det(0:comb_teeth+1) + integer(kind=OMP_LOCK_KIND) :: lck_sto(0:N_cp+1) + + do i=0,N_cp+1 + call omp_init_lock(lck_sto(i)) + end do + do i=0,comb_teeth+1 + call omp_init_lock(lck_det(i)) + end do + lastCp = 0 lastSent = 0 send = .false. @@ -85,17 +83,30 @@ subroutine run_dress_slave(thread,iproce,energy) !$OMP PARALLEL DEFAULT(SHARED) & !$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) & - !$OMP PRIVATE(toothMwen, fracted, fac) & - !$OMP PRIVATE(send, i_generator, subset, iproc, N_buf) + !$OMP PRIVATE(lastSendable, toothMwen, fracted, fac) & + !$OMP PRIVATE(i, cur_cp, send, i_generator, subset, iproc, N_buf) & + !$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id) + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + zmq_socket_push = new_zmq_push_socket(thread) + call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) + if(worker_id == -1) then + print *, "WORKER -1" + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) + stop "WORKER -1" + end if + + iproc = omp_get_thread_num()+1 allocate(int_buf(N_dress_int_buffer)) allocate(double_buf(N_dress_double_buffer)) allocate(det_buf(N_int, 2, N_dress_det_buffer)) allocate(delta_ij_loc(N_states,N_det,2)) do - !$OMP CRITICAL (SENDAGE) + !!1$OMP CRITICAL (SENDAGE) call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) - !$OMP END CRITICAL (SENDAGE) + !!1$OMP END CRITICAL (SENDAGE) task = task//" 0" if(task_id == 0) then print *, "DONEDONE" @@ -109,7 +120,7 @@ subroutine run_dress_slave(thread,iproce,energy) send = .false. lastSendable = N_cp*2 do i=1,Nproc - lastSendable = min(lastCp(iproc), lastSendable) + lastSendable = min(lastCp(i), lastSendable) end do lastSendable -= 1 if(lastSendable > lastSent) then @@ -119,7 +130,7 @@ subroutine run_dress_slave(thread,iproce,energy) !$OMP END CRITICAL if(send) then - !$OMP CRITICAL + !!1$OMP CRITICAL N_buf = (/0,1,0/) delta_ij_loc = 0d0 @@ -131,12 +142,12 @@ subroutine run_dress_slave(thread,iproce,energy) delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) / cps_N(cur_cp) do i=cp_first_tooth(cur_cp)-1,0,-1 - delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) +delta_det(:,:,i,:) + delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) +delta_det(:,:,i,:) end do - !$OMP END CRITICAL - !$OMP CRITICAL (SENDAGE) + !!1$OMP END CRITICAL + !!1$OMP CRITICAL (SENDAGE) call push_dress_results(zmq_socket_push, done_for(cur_cp), cur_cp, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, -1) - !$OMP END CRITICAL (SENDAGE) + !!1$OMP END CRITICAL (SENDAGE) end if @@ -148,13 +159,14 @@ subroutine run_dress_slave(thread,iproce,energy) call alpha_callback(delta_ij_loc, i_generator, subset, iproc) call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc) - !if(.false.) then - !$OMP CRITICAL + !!1$OMP CRITICAL do i=1,N_cp fac = cps(i_generator, i) * dress_weight_inv(i_generator) * comb_step if(fac == 0d0) cycle + call omp_set_lock(lck_sto(i)) cp(:,:,i,1) += (delta_ij_loc(:,:,1) * fac) cp(:,:,i,2) += (delta_ij_loc(:,:,2) * fac) + call omp_unset_lock(lck_sto(i)) end do @@ -162,31 +174,41 @@ subroutine run_dress_slave(thread,iproce,energy) fracted = (toothMwen /= 0) if(fracted) fracted = (i_generator == first_det_of_teeth(toothMwen)) if(fracted) then + call omp_set_lock(lck_det(toothMwen)) + call omp_set_lock(lck_det(toothMwen-1)) delta_det(:,:,toothMwen-1, 1) += delta_ij_loc(:,:,1) * (1d0-fractage(toothMwen)) delta_det(:,:,toothMwen-1, 2) += delta_ij_loc(:,:,2) * (1d0-fractage(toothMwen)) delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) * (fractage(toothMwen)) delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) * (fractage(toothMwen)) + call omp_unset_lock(lck_det(toothMwen)) + call omp_unset_lock(lck_det(toothMwen-1)) else + call omp_set_lock(lck_det(toothMwen)) delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) - end if + call omp_unset_lock(lck_det(toothMwen)) + end if + !!!&$OMP END CRITICAL - - !$OMP END CRITICAL - !end if - - !$OMP CRITICAL (SENDAGE) + !!1$OMP CRITICAL (SENDAGE) call push_dress_results(zmq_socket_push, i_generator, -1, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) - !$OMP END CRITICAL (SENDAGE) + !!1$OMP END CRITICAL (SENDAGE) lastCp(iproc) = done_cp_at_det(i_generator) end do - !$OMP END PARALLEL call sleep(10) call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) + !$OMP END PARALLEL + + do i=0,N_cp+1 + call omp_destroy_lock(lck_sto(i)) + end do + do i=0,comb_teeth+1 + call omp_destroy_lock(lck_det(i)) + end do end subroutine @@ -233,7 +255,7 @@ subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, if(rc /= 8*N_states) stop "push" N_buf = N_bufi - N_buf = (/0,1,0/) + !N_buf = (/0,1,0/) rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) if(rc /= 4*3) stop "push5" From d5f66787fe4305af227bea09e6e57b7ece0ddf3f Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 2 May 2018 14:32:41 +0200 Subject: [PATCH 21/29] real(4) dressing restored --- plugins/dress_zmq/alpha_factory.irp.f | 8 +-- plugins/dress_zmq/dress_slave.irp.f | 3 - plugins/dress_zmq/dress_stoch_routines.irp.f | 51 +++++++++++++---- plugins/dress_zmq/run_dress_slave.irp.f | 60 ++++++++++++++------ plugins/shiftedbk/shifted_bk_routines.irp.f | 2 +- 5 files changed, 88 insertions(+), 36 deletions(-) diff --git a/plugins/dress_zmq/alpha_factory.irp.f b/plugins/dress_zmq/alpha_factory.irp.f index ccbf177a..1cb286fc 100644 --- a/plugins/dress_zmq/alpha_factory.irp.f +++ b/plugins/dress_zmq/alpha_factory.irp.f @@ -99,10 +99,10 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index allocate (indices(N_det), & exc_degree(max(N_det_alpha_unique,N_det_beta_unique))) - PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique - PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order - PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns - PROVIDE psi_bilinear_matrix_transp_order + !PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + !PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + !PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + !PROVIDE psi_bilinear_matrix_transp_order k=1 do i=1,N_det_alpha_unique diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index 10453d2a..b752507b 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -46,14 +46,11 @@ subroutine run_wf ! Selection ! --------- - call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) - PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order - !!$OMP PARALLEL PRIVATE(i) !i = omp_get_thread_num() ! call dress_slave_tcp(i+1, energy) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 25bec079..a3c59976 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -227,8 +227,13 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, if(floop) then call wall_time(time) print *, "FIRST PULL", time-time0 + time0 = time floop = .false. end if + if(cur_cp == -1 .and. ind == N_det_generators) then + call wall_time(time) + print *, "FINISHED_CPL", N_cp-1, time-time0 + end if if(cur_cp == -1) then @@ -260,7 +265,9 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, end if if(agreg_for_cp(cur_cp) /= needed_by_cp(cur_cp)) cycle - print *, "FINISHED CP", cur_cp + call wall_time(time) + + print *, "FINISHED_CP", cur_cp, time-time0 double precision :: su, su2, eqt, avg, E0, val integer, external :: zmq_abort @@ -282,10 +289,9 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, E0 = E0 + dress_detail(istate, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp))) end if - call wall_time(time) print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' - if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. cur_cp == N_cp-4) then + if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. cur_cp == N_cp) then ! Termination print *, "TERMINATE" if (zmq_abort(zmq_to_qp_run_socket) == -1) then @@ -347,7 +353,7 @@ end function ! gen_per_cp : number of generators per checkpoint END_DOC comb_teeth = 64 - N_cps_max = 32 + N_cps_max = 16 gen_per_cp = (N_det_generators / N_cps_max) + 1 END_PROVIDER @@ -373,7 +379,6 @@ END_PROVIDER integer, allocatable :: filler(:) integer :: nfiller, lfiller, cfiller logical :: fracted - integer :: first_suspect first_suspect = 1 @@ -394,11 +399,13 @@ END_PROVIDER tooth_reduce = 0 integer :: fragsize - fragsize = N_det_generators / ((N_cps_max+1)*(N_cps_max+2)/2) + fragsize = N_det_generators / ((N_cps_max-1+1)*(N_cps_max-1+2)/2) + print *, "FRAGSIZE", fragsize do i=1,N_cps_max cp_limit(i) = fragsize * i * (i+1) / 2 end do + cp_limit(N_cps_max) = N_det*2 print *, "CP_LIMIT", cp_limit N_dress_jobs = first_det_of_comb - 1 @@ -413,12 +420,14 @@ END_PROVIDER lfiller = 1 nfiller = 1 do i=1,N_det_generators + !print *, i, N_dress_jobs comb(i) = comb(i) * comb_step !DIR$ FORCEINLINE call add_comb(comb(i), computed, cps(1, cur_cp), N_dress_jobs, dress_jobs) !if(N_dress_jobs / gen_per_cp > (cur_cp-1) .or. N_dress_jobs == N_det_generators) then if(N_dress_jobs > cp_limit(cur_cp) .or. N_dress_jobs == N_det_generators) then + print *, "END CUR_CP", cur_cp, N_dress_jobs first_cp(cur_cp+1) = N_dress_jobs done_cp_at(N_dress_jobs) = cur_cp cps_N(cur_cp) = dfloat(i) @@ -427,16 +436,35 @@ END_PROVIDER cur_cp += 1 end if - if (N_dress_jobs == N_det_generators) exit + if (N_dress_jobs == N_det_generators) then + exit + end if end if + + !!!!!!!!!!!!!!!!!!!!!!!! + if(.FALSE.) then + do l=first_suspect,N_det_generators + if((.not. computed(l))) then + N_dress_jobs+=1 + dress_jobs(N_dress_jobs) = l + computed(l) = .true. + first_suspect = l + exit + end if + end do + if (N_dress_jobs == N_det_generators) exit + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ELSE + !!!!!!!!!!!!!!!!!!!!!!!!!!!! do l=first_suspect,N_det_generators if((.not. computed(l)) .and. (.not. comp_filler(l))) exit end do first_suspect = l - if(l > N_det_generators) exit + if(l > N_det_generators) cycle - cfiller = tooth_of_det(l) + cfiller = tooth_of_det(l)-1 if(cfiller > lfiller) then do j=1,nfiller-1 if(.not. computed(filler(j))) then @@ -454,6 +482,8 @@ END_PROVIDER nfiller += 1 end if comp_filler(l) = .True. + end if + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! enddo @@ -463,9 +493,10 @@ END_PROVIDER dress_jobs(k) = filler(j) N_dress_jobs = k end if - computed(filler(j)) = .true. + computed(filler(j)) = .true. end do + N_cp = cur_cp if(N_dress_jobs /= N_det_generators .or. N_cp > N_cps_max) then diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index c38b2c90..339f78b7 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -46,8 +46,7 @@ subroutine run_dress_slave(thread,iproce,energy) integer :: toothMwen logical :: fracted double precision :: fac - - + if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" @@ -81,6 +80,9 @@ subroutine run_dress_slave(thread,iproce,energy) send = .false. done_for = 0 + double precision :: hij, sij + call i_h_j_s2(psi_det(1,1,1),psi_det(1,1,2),N_int,hij, sij) + print *, E0_denominator(1) !$OMP PARALLEL DEFAULT(SHARED) & !$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) & !$OMP PRIVATE(lastSendable, toothMwen, fracted, fac) & @@ -208,11 +210,13 @@ subroutine run_dress_slave(thread,iproce,energy) end do do i=0,comb_teeth+1 call omp_destroy_lock(lck_det(i)) - end do + end do + stop end subroutine + subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id) use f77_zmq implicit none @@ -228,10 +232,9 @@ subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, integer, intent(in) :: ind, cur_cp, task_id integer :: rc, i, j, k, l double precision :: contrib(N_states) + real(4), allocatable :: r4buf(:,:,:) - - - rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) + rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" rc = f77_zmq_send( zmq_socket_push, cur_cp, 4, ZMQ_SNDMORE) @@ -239,14 +242,22 @@ subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, if(cur_cp /= -1) then - rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,1), 8*N_states*N_det, ZMQ_SNDMORE) - if(rc /= 8*N_states*N_det) stop "push" + allocate(r4buf(N_states, N_det, 2)) + do i=1,2 + do j=1,N_det + do k=1,N_states + r4buf(k,j,i) = real(delta_loc(k,j,i), 4) + end do + end do + end do - rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,2), 8*N_states*N_det, ZMQ_SNDMORE) - if(rc /= 8*N_states*N_det) stop "push" + rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,1), 4*N_states*N_det, ZMQ_SNDMORE) + if(rc /= 4*N_states*N_det) stop "push" + + rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,2), 4*N_states*N_det, ZMQ_SNDMORE) + if(rc /= 4*N_states*N_det) stop "push" else contrib = 0d0 - do i=1,N_det contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :) end do @@ -255,7 +266,7 @@ subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, if(rc /= 8*N_states) stop "push" N_buf = N_bufi - !N_buf = (/0,1,0/) + N_buf = (/0,1,0/) rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) if(rc /= 4*3) stop "push5" @@ -294,6 +305,11 @@ IRP_ENDIF end subroutine +BEGIN_PROVIDER [ real(4), real4buf, (N_states, N_det, 2) ] + +END_PROVIDER + + subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, contrib) use f77_zmq implicit none @@ -308,8 +324,6 @@ subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, integer :: rc, i, j, k integer, intent(out) :: N_buf(3) - - rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) if(rc /= 4) stop "pulla" @@ -320,11 +334,21 @@ subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, if(cur_cp /= -1) then - rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,1), N_states*8*N_det, 0) - if(rc /= 8*N_states*N_det) stop "pullc" + + rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,1), N_states*4*N_det, 0) + if(rc /= 4*N_states*N_det) stop "pullc" - rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,2), N_states*8*N_det, 0) - if(rc /= 8*N_states*N_det) stop "pulld" + rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,2), N_states*4*N_det, 0) + if(rc /= 4*N_states*N_det) stop "pulld" + + do i=1,2 + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j,k) + do j=1,N_det + do k=1,N_states + delta_loc(k,j,i) = real(real4buf(k,j,i), 8) + end do + end do + end do else rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0) if(rc /= 8*N_states) stop "pullc" diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 7213e831..56c86a91 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -283,7 +283,7 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili - haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) + haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) call dress_with_alpha_(Nstates, Ndet, Nint, delta_ij_loc, minilist, det_minilist, n_minilist, alpha, haa, contrib, c_alpha, iproc) From 7cc33f1ab385f377ac597328fb613d31d10f9e85 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 14 May 2018 13:00:04 +0200 Subject: [PATCH 22/29] shifted-bk selection iterates --- plugins/dress_zmq/dress_slave.irp.f | 1 - plugins/dress_zmq/dress_stoch_routines.irp.f | 40 +---- plugins/dress_zmq/dressing.irp.f | 2 +- plugins/dress_zmq/dressing_vector.irp.f | 7 +- plugins/dress_zmq/run_dress_slave.irp.f | 149 +++++++++---------- plugins/shiftedbk/selection_buffer.irp.f | 16 +- plugins/shiftedbk/selection_types.f90 | 9 ++ plugins/shiftedbk/shifted_bk_routines.irp.f | 16 +- 8 files changed, 102 insertions(+), 138 deletions(-) create mode 100644 plugins/shiftedbk/selection_types.f90 diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index b752507b..75c31422 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -56,7 +56,6 @@ subroutine run_wf ! call dress_slave_tcp(i+1, energy) call dress_slave_tcp(0, energy) !!$OMP END PARALLEL - print *, 'dress done' endif end do diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 106025cb..d8fec690 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -122,14 +122,6 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) block(block_i) = dress_jobs(i) end if end do - print *, "ACTUAL TASK NUM", ntas - !stop - - !if (ipos > 1) then - ! if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - ! stop 'Unable to add task to task server' - ! endif - !endif if (zmq_set_running(zmq_to_qp_run_socket) == -1) then print *, irp_here, ': Failed in zmq_set_running' endif @@ -196,7 +188,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, double precision, save :: time0 = -1.d0 double precision :: time double precision, external :: omp_get_wtime - integer :: cur_cp + integer :: cur_cp, last_cp integer :: delta_loc_cur, is, N_buf(3) integer, allocatable :: int_buf(:), agreg_for_cp(:) double precision, allocatable :: double_buf(:) @@ -222,8 +214,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, call wall_time(time0) endif logical :: loop, floop - integer :: finalcp - finalcp = N_cp*2 floop = .true. loop = .true. @@ -232,29 +222,23 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, call pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, dress_mwen) if(floop) then call wall_time(time) - print *, "FIRST PULL", time-time0 time0 = time floop = .false. end if if(cur_cp == -1 .and. ind == N_det_generators) then call wall_time(time) - print *, "FINISHED_CPL", N_cp-1, time-time0 end if if(cur_cp == -1) then - !print *, "TASK DEL", task_id call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then - print *, "TASK ID", task_id stop 'Unable to delete tasks' endif - !if(more == 0) stop 'loop = .false.' !!!!!!!!!!!!!!!! + if(more == 0) loop = .false. !stop 'loop = .false.' !!!!!!!!!!!!!!!! dress_detail(:, ind) = dress_mwen(:) else if(cur_cp > 0) then - if(ind == 0) cycle - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) do i=1,N_det cp(:,i,cur_cp,1) += delta_loc(:,i,1) @@ -273,8 +257,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, call wall_time(time) - print *, "FINISHED_CP", cur_cp, time-time0 - + last_cp = cur_cp double precision :: su, su2, eqt, avg, E0, val integer, external :: zmq_abort @@ -296,7 +279,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, end if print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' - if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. cur_cp == N_cp) then + if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. cur_cp == cur_cp-2) then ! Termination print *, "TERMINATE" if (zmq_abort(zmq_to_qp_run_socket) == -1) then @@ -305,18 +288,14 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, print *, irp_here, ': Error in sending abort signal (2)' endif endif - exit pullLoop endif end if end do pullLoop - print *, "exited" + delta(:,:) = cp(:,:,last_cp,1) + delta_s2(:,:) = cp(:,:,last_cp,2) - delta(:,:) = cp(:,:,cur_cp,1) - delta_s2(:,:) = cp(:,:,cur_cp,2) - - - dress(istate) = E(istate)+E0 + dress(istate) = E(istate)+E0+avg call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) end subroutine @@ -405,13 +384,11 @@ END_PROVIDER integer :: fragsize fragsize = N_det_generators / ((N_cps_max-1+1)*(N_cps_max-1+2)/2) - print *, "FRAGSIZE", fragsize do i=1,N_cps_max cp_limit(i) = fragsize * i * (i+1) / 2 end do cp_limit(N_cps_max) = N_det*2 - print *, "CP_LIMIT", cp_limit N_dress_jobs = first_det_of_comb - 1 do i=1, N_dress_jobs @@ -420,7 +397,7 @@ END_PROVIDER end do l=first_det_of_comb - call random_seed(put=(/321,654,65,321,65/)) + call random_seed(put=(/321,654,65,321,65,321,654,65,321,65321,654,65,321,65321,654,65,321,65321,654,65,321,65/)) call RANDOM_NUMBER(comb) lfiller = 1 nfiller = 1 @@ -432,7 +409,6 @@ END_PROVIDER !if(N_dress_jobs / gen_per_cp > (cur_cp-1) .or. N_dress_jobs == N_det_generators) then if(N_dress_jobs > cp_limit(cur_cp) .or. N_dress_jobs == N_det_generators) then - print *, "END CUR_CP", cur_cp, N_dress_jobs first_cp(cur_cp+1) = N_dress_jobs done_cp_at(N_dress_jobs) = cur_cp cps_N(cur_cp) = dfloat(i) diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index 85279029..bbca0c39 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -100,7 +100,7 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] ! else ! errr = 1d-4 ! end if - relative_error = 0d0! 1.d-5 + relative_error = 1.d-5 call write_double(6,relative_error,"Convergence of the stochastic algorithm") diff --git a/plugins/dress_zmq/dressing_vector.irp.f b/plugins/dress_zmq/dressing_vector.irp.f index 5a8fee3b..5a528c36 100644 --- a/plugins/dress_zmq/dressing_vector.irp.f +++ b/plugins/dress_zmq/dressing_vector.irp.f @@ -9,14 +9,16 @@ integer :: i,ii,k,j, l double precision :: f, tmp double precision, external :: u_dot_v - + logical, external :: detEq + dressing_column_h(:,:) = 0.d0 dressing_column_s(:,:) = 0.d0 do k=1,N_states do j = 1, n_det dressing_column_h(j,k) = delta_ij(k,j,1) - dressing_column_s(j,k) = delta_ij(k,j,2) + dressing_column_s(j,k) = delta_ij(k,j,2) +! print *, j, delta_ij(k,j,:) enddo ! tmp = u_dot_v(dressing_column_h(1,k), psi_coef(1,k), N_det) & ! - dressing_column_h(l,k) * psi_coef(l,k) @@ -25,6 +27,5 @@ ! - dressing_column_s(l,k) * psi_coef(l,k) ! dressing_column_s(l,k) -= tmp * f enddo - END_PROVIDER diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 339f78b7..7135c9cf 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -81,8 +81,10 @@ subroutine run_dress_slave(thread,iproce,energy) done_for = 0 double precision :: hij, sij - call i_h_j_s2(psi_det(1,1,1),psi_det(1,1,2),N_int,hij, sij) - print *, E0_denominator(1) + !call i_h_j_s2(psi_det(1,1,1),psi_det(1,1,2),N_int,hij, sij) + + hij = E0_denominator(1) !PROVIDE BEFORE OMP PARALLEL + !$OMP PARALLEL DEFAULT(SHARED) & !$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) & !$OMP PRIVATE(lastSendable, toothMwen, fracted, fac) & @@ -93,7 +95,6 @@ subroutine run_dress_slave(thread,iproce,energy) zmq_socket_push = new_zmq_push_socket(thread) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) if(worker_id == -1) then - print *, "WORKER -1" call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) stop "WORKER -1" @@ -106,18 +107,54 @@ subroutine run_dress_slave(thread,iproce,energy) allocate(det_buf(N_int, 2, N_dress_det_buffer)) allocate(delta_ij_loc(N_states,N_det,2)) do - !!1$OMP CRITICAL (SENDAGE) call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) - !!1$OMP END CRITICAL (SENDAGE) task = task//" 0" - if(task_id == 0) then - print *, "DONEDONE" - exit !! LAST MESSAGE ??? + if(task_id == 0) exit + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + if(task_id /= 0) then + read (task,*) subset, i_generator + + !$OMP ATOMIC + done_for(done_cp_at_det(i_generator)) += 1 + + delta_ij_loc(:,:,:) = 0d0 + call generator_start(i_generator, iproc) + call alpha_callback(delta_ij_loc, i_generator, subset, iproc) + call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc) + + do i=1,N_cp + fac = cps(i_generator, i) * dress_weight_inv(i_generator) * comb_step + if(fac == 0d0) cycle + call omp_set_lock(lck_sto(i)) + cp(:,:,i,1) += (delta_ij_loc(:,:,1) * fac) + cp(:,:,i,2) += (delta_ij_loc(:,:,2) * fac) + call omp_unset_lock(lck_sto(i)) + end do + + + toothMwen = tooth_of_det(i_generator) + fracted = (toothMwen /= 0) + if(fracted) fracted = (i_generator == first_det_of_teeth(toothMwen)) + if(fracted) then + call omp_set_lock(lck_det(toothMwen)) + call omp_set_lock(lck_det(toothMwen-1)) + delta_det(:,:,toothMwen-1, 1) += delta_ij_loc(:,:,1) * (1d0-fractage(toothMwen)) + delta_det(:,:,toothMwen-1, 2) += delta_ij_loc(:,:,2) * (1d0-fractage(toothMwen)) + delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) * (fractage(toothMwen)) + delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) * (fractage(toothMwen)) + call omp_unset_lock(lck_det(toothMwen)) + call omp_unset_lock(lck_det(toothMwen-1)) + else + call omp_set_lock(lck_det(toothMwen)) + delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) + delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) + call omp_unset_lock(lck_det(toothMwen)) + end if + call push_dress_results(zmq_socket_push, i_generator, -1, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id) + call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) + lastCp(iproc) = done_cp_at_det(i_generator) end if - read (task,*) subset, i_generator - - - if(done_cp_at_det(i_generator) < lastCp(iproc)) stop 'loop = .false.' + !$OMP CRITICAL send = .false. lastSendable = N_cp*2 @@ -125,18 +162,17 @@ subroutine run_dress_slave(thread,iproce,energy) lastSendable = min(lastCp(i), lastSendable) end do lastSendable -= 1 - if(lastSendable > lastSent) then + if(lastSendable > lastSent .or. (lastSendable == N_cp-1 .and. lastSent /= N_cp-1)) then lastSent = lastSendable + cur_cp = lastSent send = .true. end if !$OMP END CRITICAL - + if(send) then - !!1$OMP CRITICAL N_buf = (/0,1,0/) - + delta_ij_loc = 0d0 - cur_cp = lastSent if(cur_cp < 1) stop "cur_cp < 1" do i=1,cur_cp delta_ij_loc(:,:,:) += cp(:,:,i,:) @@ -146,61 +182,13 @@ subroutine run_dress_slave(thread,iproce,energy) do i=cp_first_tooth(cur_cp)-1,0,-1 delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) +delta_det(:,:,i,:) end do - !!1$OMP END CRITICAL - !!1$OMP CRITICAL (SENDAGE) call push_dress_results(zmq_socket_push, done_for(cur_cp), cur_cp, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, -1) - !!1$OMP END CRITICAL (SENDAGE) end if - - !$OMP ATOMIC - done_for(done_cp_at_det(i_generator)) += 1 - - delta_ij_loc(:,:,:) = 0d0 - call generator_start(i_generator, iproc) - call alpha_callback(delta_ij_loc, i_generator, subset, iproc) - call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc) - - !!1$OMP CRITICAL - do i=1,N_cp - fac = cps(i_generator, i) * dress_weight_inv(i_generator) * comb_step - if(fac == 0d0) cycle - call omp_set_lock(lck_sto(i)) - cp(:,:,i,1) += (delta_ij_loc(:,:,1) * fac) - cp(:,:,i,2) += (delta_ij_loc(:,:,2) * fac) - call omp_unset_lock(lck_sto(i)) - end do - - - toothMwen = tooth_of_det(i_generator) - fracted = (toothMwen /= 0) - if(fracted) fracted = (i_generator == first_det_of_teeth(toothMwen)) - if(fracted) then - call omp_set_lock(lck_det(toothMwen)) - call omp_set_lock(lck_det(toothMwen-1)) - delta_det(:,:,toothMwen-1, 1) += delta_ij_loc(:,:,1) * (1d0-fractage(toothMwen)) - delta_det(:,:,toothMwen-1, 2) += delta_ij_loc(:,:,2) * (1d0-fractage(toothMwen)) - delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) * (fractage(toothMwen)) - delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) * (fractage(toothMwen)) - call omp_unset_lock(lck_det(toothMwen)) - call omp_unset_lock(lck_det(toothMwen-1)) - else - call omp_set_lock(lck_det(toothMwen)) - delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) - delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) - call omp_unset_lock(lck_det(toothMwen)) - end if - !!!&$OMP END CRITICAL - - !!1$OMP CRITICAL (SENDAGE) - call push_dress_results(zmq_socket_push, i_generator, -1, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id) - call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) - !!1$OMP END CRITICAL (SENDAGE) - lastCp(iproc) = done_cp_at_det(i_generator) + if(task_id == 0) exit end do - call sleep(10) - call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id) + call disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) !$OMP END PARALLEL @@ -211,7 +199,6 @@ subroutine run_dress_slave(thread,iproce,energy) do i=0,comb_teeth+1 call omp_destroy_lock(lck_det(i)) end do - stop end subroutine @@ -220,7 +207,8 @@ end subroutine subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id) use f77_zmq implicit none - + + integer, parameter :: sendt = 4 integer(ZMQ_PTR), intent(in) :: zmq_socket_push double precision, intent(inout) :: delta_loc(N_states, N_det, 2) real(kind=4), allocatable :: delta_loc4(:,:,:) @@ -232,7 +220,7 @@ subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, integer, intent(in) :: ind, cur_cp, task_id integer :: rc, i, j, k, l double precision :: contrib(N_states) - real(4), allocatable :: r4buf(:,:,:) + real(sendt), allocatable :: r4buf(:,:,:) rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" @@ -246,16 +234,16 @@ subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, do i=1,2 do j=1,N_det do k=1,N_states - r4buf(k,j,i) = real(delta_loc(k,j,i), 4) + r4buf(k,j,i) = real(delta_loc(k,j,i), sendt) end do end do end do - rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,1), 4*N_states*N_det, ZMQ_SNDMORE) - if(rc /= 4*N_states*N_det) stop "push" + rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,1), sendt*N_states*N_det, ZMQ_SNDMORE) + if(rc /= sendt*N_states*N_det) stop "push" - rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,2), 4*N_states*N_det, ZMQ_SNDMORE) - if(rc /= 4*N_states*N_det) stop "push" + rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,2), sendt*N_states*N_det, ZMQ_SNDMORE) + if(rc /= sendt*N_states*N_det) stop "push" else contrib = 0d0 do i=1,N_det @@ -266,7 +254,7 @@ subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, if(rc /= 8*N_states) stop "push" N_buf = N_bufi - N_buf = (/0,1,0/) + !N_buf = (/0,1,0/) rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) if(rc /= 4*3) stop "push5" @@ -313,6 +301,7 @@ END_PROVIDER subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, contrib) use f77_zmq implicit none + integer, parameter :: sendt = 4 integer(ZMQ_PTR), intent(in) :: zmq_socket_pull integer, intent(out) :: cur_cp double precision, intent(inout) :: delta_loc(N_states, N_det, 2) @@ -335,11 +324,11 @@ subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, if(cur_cp /= -1) then - rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,1), N_states*4*N_det, 0) - if(rc /= 4*N_states*N_det) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,1), N_states*sendt*N_det, 0) + if(rc /= sendt*N_states*N_det) stop "pullc" - rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,2), N_states*4*N_det, 0) - if(rc /= 4*N_states*N_det) stop "pulld" + rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,2), N_states*sendt*N_det, 0) + if(rc /= sendt*N_states*N_det) stop "pulld" do i=1,2 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j,k) diff --git a/plugins/shiftedbk/selection_buffer.irp.f b/plugins/shiftedbk/selection_buffer.irp.f index 23f83f02..17410b7b 100644 --- a/plugins/shiftedbk/selection_buffer.irp.f +++ b/plugins/shiftedbk/selection_buffer.irp.f @@ -8,7 +8,6 @@ subroutine create_selection_buffer(N, siz_, res) integer :: siz siz = max(siz_,1) - allocate(res%det(N_int, 2, siz), res%val(siz)) res%val(:) = 0d0 @@ -18,19 +17,6 @@ subroutine create_selection_buffer(N, siz_, res) res%cur = 0 end subroutine -subroutine reset_selection_buffer(res) - use selection_types - implicit none - - type(selection_buffer), intent(out) :: res - - res%val(:) = 0d0 - res%det(:,:,:) = 0_8 - res%mini = 0d0 - res%cur = 0 -end subroutine - - subroutine delete_selection_buffer(b) use selection_types implicit none @@ -53,7 +39,7 @@ subroutine add_to_selection_buffer(b, det, val) double precision, intent(in) :: val integer :: i - if(b%N > 0 .and. val < b%mini) then + if(b%N > 0 .and. val <= b%mini) then b%cur += 1 b%det(1:N_int,1:2,b%cur) = det(1:N_int,1:2) b%val(b%cur) = val diff --git a/plugins/shiftedbk/selection_types.f90 b/plugins/shiftedbk/selection_types.f90 new file mode 100644 index 00000000..29e48524 --- /dev/null +++ b/plugins/shiftedbk/selection_types.f90 @@ -0,0 +1,9 @@ +module selection_types + type selection_buffer + integer :: N, cur + integer(8) , pointer :: det(:,:,:) + double precision, pointer :: val(:) + double precision :: mini + endtype +end module + diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 6dec4bb7..c022a88d 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -33,7 +33,6 @@ END_PROVIDER END_PROVIDER -<<<<<<< HEAD BEGIN_PROVIDER [ integer, N_dress_int_buffer ] &BEGIN_PROVIDER [ integer, N_dress_double_buffer ] &BEGIN_PROVIDER [ integer, N_dress_det_buffer ] @@ -197,14 +196,15 @@ subroutine undress_with_alpha(old_generators, old_det_gen, alpha, n_alpha) c_alpha(:,1) += c_alpha(:,i) end do - delta_ij_tmp(:,:,:) -= delta_ij_loc(:,:,:,1) + delta_ij_tmp(:,:,1) -= delta_ij_loc(:,:,1,1) + delta_ij_tmp(:,:,2) -= delta_ij_loc(:,:,2,1) - - print *, "SUM ALPHA2 PRE", global_sum_alpha2 + !print *, "SUM ALPHA2 PRE", global_sum_alpha2 !global_sum_alpha2(:) -= c_alpha(:,1) - print *, "SUM ALPHA2 POST", c_alpha(:,1) + print *, "SUM C_ALPHA^2 ", global_sum_alpha2(:) + print *, "*** DRESSINS DIVIDED BY 1+SUM C_ALPHA^2 ***" do i=1,N_states - ! delta_ij_tmp(i,:,:) = delta_ij_tmp(i,:,:) / (1d0 + global_sum_alpha2(i)) + delta_ij_tmp(i,:,:) = delta_ij_tmp(i,:,:) / (1d0 + global_sum_alpha2(i)) end do global_sum_alpha2 = 0d0 end subroutine @@ -257,6 +257,10 @@ subroutine dress_with_alpha_(Nstates,Ndet,Nint,delta_ij_loc,minilist, det_minili do l_sd=1,n_minilist hdress = c_alpha(i) * a_h_i(l_sd, iproc) sdress = c_alpha(i) * a_s2_i(l_sd, iproc) + !if(c_alpha(i) * a_s2_i(l_sd, iproc) > 1d-1) then + ! call debug_det(det_minilist(1,1,l_sd), N_int) + ! call debug_det(alpha,N_int) + !end if delta_ij_loc(i, minilist(l_sd), 1) += hdress delta_ij_loc(i, minilist(l_sd), 2) += sdress end do From f45fc46b4f3416b9ab04054ffaaf614f8b43ed8d Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 17 May 2018 16:02:51 +0200 Subject: [PATCH 23/29] multistate shifted_bk --- plugins/dress_zmq/dress_slave.irp.f | 24 +++++++++-- plugins/dress_zmq/dress_stoch_routines.irp.f | 45 +++++++++++++------- plugins/dress_zmq/dressing.irp.f | 7 +-- plugins/dress_zmq/run_dress_slave.irp.f | 8 ++-- plugins/shiftedbk/shifted_bk_routines.irp.f | 15 ++++--- 5 files changed, 66 insertions(+), 33 deletions(-) diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index 75c31422..acae326f 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -28,6 +28,11 @@ subroutine run_wf double precision :: energy(N_states_diag) character*(64) :: states(1) integer :: rc, i +integer, external :: zmq_get_dvector, zmq_get_N_det_generators + integer, external :: zmq_get_psi, zmq_get_N_det_selectors + integer, external :: zmq_get_N_states_diag + double precision :: tmp + call provide_everything @@ -43,10 +48,22 @@ subroutine run_wf exit else if (zmq_state(:5) == 'dress') then - - ! Selection + ! Dress ! --------- - call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + !call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle + !TOUCH psi_det + if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle + if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'dress_stoch_istate',tmp,1) == -1) cycle + dress_stoch_istate = int(tmp) + + + TOUCH dress_stoch_istate + TOUCH state_average_weight + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns @@ -57,7 +74,6 @@ subroutine run_wf call dress_slave_tcp(0, energy) !!$OMP END PARALLEL endif - end do end diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index d8fec690..cb43baad 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -4,11 +4,12 @@ BEGIN_PROVIDER [ integer, fragment_first ] END_PROVIDER -subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) +subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) use f77_zmq implicit none + integer, intent(in) :: lndet character(len=64000) :: task character(len=3200) :: temp integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull @@ -27,11 +28,9 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) double precision :: time integer, external :: add_task_to_taskserver double precision :: state_average_weight_save(N_states) - - task(:) = CHAR(0) temp(:) = CHAR(0) - allocate(delta(N_states,N_det), delta_s2(N_det,N_states)) + allocate(delta(N_states,N_det), delta_s2(N_states, N_det)) state_average_weight_save(:) = state_average_weight(:) do dress_stoch_istate=1,N_states SOFT_TOUCH dress_stoch_istate @@ -39,14 +38,14 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) state_average_weight(dress_stoch_istate) = 1.d0 TOUCH state_average_weight + !provide psi_coef_generators provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral dress_weight psi_selectors - + !print *, dress_e0_denominator print *, '========== ================= ================= =================' print *, ' Samples Energy Stat. Error Seconds ' print *, '========== ================= ================= =================' - - + call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull, 'dress') integer, external :: zmq_put_psi @@ -54,6 +53,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) integer, external :: zmq_put_N_det_selectors integer, external :: zmq_put_dvector integer, external :: zmq_set_running + if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then stop 'Unable to put psi on ZMQ server' endif @@ -66,7 +66,14 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',dress_e0_denominator,size(dress_e0_denominator)) == -1) then stop 'Unable to put energy on ZMQ server' endif - + if (zmq_put_dvector(zmq_to_qp_run_socket,1,"state_average_weight",state_average_weight,N_states) == -1) then + stop 'Unable to put state_average_weight on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,"dress_stoch_istate",real(dress_stoch_istate,8),1) == -1) then + stop 'Unable to put dress_stoch_istate on ZMQ server' + endif + + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket integer :: ipos, sz integer :: block(1), block_i, cur_tooth_reduce, ntas @@ -131,13 +138,13 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) !i = omp_get_thread_num() !if (i==0) then call dress_collector(zmq_socket_pull,E, relative_error, delta, delta_s2, dress,& - dress_stoch_istate) + dress_stoch_istate) !else ! call dress_slave_inproc(i) !endif !!$OMP END PARALLEL delta_out(dress_stoch_istate,1:N_det) = delta(dress_stoch_istate,1:N_det) - delta_s2_out(dress_stoch_istate,1:N_det) = delta_s2_out(dress_stoch_istate,1:N_det) + delta_s2_out(dress_stoch_istate,1:N_det) = delta_s2(dress_stoch_istate,1:N_det) call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'dress') print *, '========== ================= ================= =================' @@ -194,7 +201,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, double precision, allocatable :: double_buf(:) integer(bit_kind), allocatable :: det_buf(:,:,:) integer, external :: zmq_delete_tasks - + last_cp = 10000000 allocate(agreg_for_cp(N_cp)) agreg_for_cp = 0 allocate(int_buf(N_dress_int_buffer), double_buf(N_dress_double_buffer), det_buf(N_int,2,N_dress_det_buffer)) @@ -222,6 +229,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, call pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, dress_mwen) if(floop) then call wall_time(time) + print *, "first_pull", time-time0 time0 = time floop = .false. end if @@ -237,6 +245,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, endif if(more == 0) loop = .false. !stop 'loop = .false.' !!!!!!!!!!!!!!!! dress_detail(:, ind) = dress_mwen(:) + !print *, "DETAIL", ind, dress_mwen else if(cur_cp > 0) then if(ind == 0) cycle !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) @@ -248,8 +257,9 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, do i=1,N_det cp(:,i,cur_cp,2) += delta_loc(:,i,2) end do - + !$OMP END PARALLEL DO agreg_for_cp(cur_cp) += ind + !print *, agreg_for_cp(cur_cp), ind, needed_by_cp(cur_cp), cur_cp if(agreg_for_cp(cur_cp) > needed_by_cp(cur_cp)) then stop "too much results..." end if @@ -270,6 +280,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, su += val su2 += val*val end do + !$OMP END PARALLEL DO avg = su / cps_N(cur_cp) eqt = dsqrt( ((su2 / cps_N(cur_cp)) - avg*avg) / cps_N(cur_cp) ) @@ -278,8 +289,9 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, E0 = E0 + dress_detail(istate, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp))) end if - print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' - if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. cur_cp == cur_cp-2) then + !print '(I2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' + print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', cps_N(cur_cp), avg+E0+E(istate), eqt, time-time0, '' + if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30)) then ! Termination print *, "TERMINATE" if (zmq_abort(zmq_to_qp_run_socket) == -1) then @@ -294,7 +306,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, delta(:,:) = cp(:,:,last_cp,1) delta_s2(:,:) = cp(:,:,last_cp,2) - + dress(istate) = E(istate)+E0+avg call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) end subroutine @@ -365,6 +377,7 @@ END_PROVIDER logical :: fracted integer :: first_suspect + provide psi_coef_generators first_suspect = 1 allocate(filler(n_det_generators)) @@ -397,7 +410,7 @@ END_PROVIDER end do l=first_det_of_comb - call random_seed(put=(/321,654,65,321,65,321,654,65,321,65321,654,65,321,65321,654,65,321,65321,654,65,321,65/)) + call random_seed(put=(/321,654,65,321,65,321,654,65,321,6321,654,65,321,621,654,65,321,65,654,65,321,65/)) call RANDOM_NUMBER(comb) lfiller = 1 nfiller = 1 diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index bbca0c39..a25aaf2f 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -100,14 +100,15 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] ! else ! errr = 1d-4 ! end if - relative_error = 1.d-5 + relative_error = 5.d-5 call write_double(6,relative_error,"Convergence of the stochastic algorithm") - - call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error)) + + call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error), N_det_delta_ij) delta_ij_tmp(:,:,1) = del(:,:) delta_ij_tmp(:,:,2) = del_s2(:,:) + deallocate(dress, del, del_s2) end if END_PROVIDER diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 7135c9cf..b61a4d5a 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -116,7 +116,7 @@ subroutine run_dress_slave(thread,iproce,energy) !$OMP ATOMIC done_for(done_cp_at_det(i_generator)) += 1 - + ! print *, "IGEN", i_generator, done_cp_at_det(i_generator) delta_ij_loc(:,:,:) = 0d0 call generator_start(i_generator, iproc) call alpha_callback(delta_ij_loc, i_generator, subset, iproc) @@ -175,12 +175,14 @@ subroutine run_dress_slave(thread,iproce,energy) delta_ij_loc = 0d0 if(cur_cp < 1) stop "cur_cp < 1" do i=1,cur_cp - delta_ij_loc(:,:,:) += cp(:,:,i,:) + delta_ij_loc(:,:,1) += cp(:,:,i,1) + delta_ij_loc(:,:,2) += cp(:,:,i,2) end do delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) / cps_N(cur_cp) do i=cp_first_tooth(cur_cp)-1,0,-1 - delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) +delta_det(:,:,i,:) + delta_ij_loc(:,:,1) = delta_ij_loc(:,:,1) +delta_det(:,:,i,1) + delta_ij_loc(:,:,2) = delta_ij_loc(:,:,2) +delta_det(:,:,i,2) end do call push_dress_results(zmq_socket_push, done_for(cur_cp), cur_cp, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, -1) end if diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index c022a88d..30153dbc 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -196,16 +196,17 @@ subroutine undress_with_alpha(old_generators, old_det_gen, alpha, n_alpha) c_alpha(:,1) += c_alpha(:,i) end do - delta_ij_tmp(:,:,1) -= delta_ij_loc(:,:,1,1) - delta_ij_tmp(:,:,2) -= delta_ij_loc(:,:,2,1) + + delta_ij_tmp(:,:,1) -= delta_ij_loc(:,:,1,1) + delta_ij_tmp(:,:,2) -= delta_ij_loc(:,:,2,1) !print *, "SUM ALPHA2 PRE", global_sum_alpha2 !global_sum_alpha2(:) -= c_alpha(:,1) - print *, "SUM C_ALPHA^2 ", global_sum_alpha2(:) - print *, "*** DRESSINS DIVIDED BY 1+SUM C_ALPHA^2 ***" - do i=1,N_states - delta_ij_tmp(i,:,:) = delta_ij_tmp(i,:,:) / (1d0 + global_sum_alpha2(i)) - end do + print *, "SUM C_ALPHA^2 =", global_sum_alpha2(:) + !print *, "*** DRESSINS DIVIDED BY 1+SUM C_ALPHA^2 ***" + !do i=1,N_states + ! delta_ij_tmp(i,:,:) = delta_ij_tmp(i,:,:) / (1d0 + global_sum_alpha2(i)) + !end do global_sum_alpha2 = 0d0 end subroutine From 9245c090e29888dbb8cb65a91b19e97d26614883 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 22 May 2018 19:11:10 +0200 Subject: [PATCH 24/29] debugging --- plugins/dress_zmq/dress_general.irp.f | 4 +-- plugins/dress_zmq/dress_slave.irp.f | 6 ++--- plugins/dress_zmq/dress_stoch_routines.irp.f | 27 +++++++++++++------- plugins/dress_zmq/dressing.irp.f | 6 +++-- plugins/dress_zmq/run_dress_slave.irp.f | 2 +- plugins/mrcepa0/dressing.irp.f | 5 ++-- plugins/mrcepa0/mrcc_stoch_routines.irp.f | 1 - plugins/shiftedbk/shifted_bk_routines.irp.f | 20 +++++++++------ src/Determinants/H_apply.irp.f | 5 ++-- src/Utils/map_functions.irp.f | 4 +-- 10 files changed, 46 insertions(+), 34 deletions(-) diff --git a/plugins/dress_zmq/dress_general.irp.f b/plugins/dress_zmq/dress_general.irp.f index 3fe0c676..048605b8 100644 --- a/plugins/dress_zmq/dress_general.irp.f +++ b/plugins/dress_zmq/dress_general.irp.f @@ -63,11 +63,11 @@ subroutine run_dressing(N_st,energy) enddo print *, 'Variational energy ' do i=1,N_st - print *, i, psi_energy(i) + print *, i, psi_energy(i)+nuclear_repulsion enddo print *, 'Dressed energy ' do i=1,N_st - print *, i, ci_energy_dressed(i) + print *, i, ci_energy_dressed(i)+nuclear_repulsion enddo endif diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index acae326f..8983c2bc 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -59,10 +59,8 @@ integer, external :: zmq_get_dvector, zmq_get_N_det_generators if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle if (zmq_get_dvector(zmq_to_qp_run_socket,1,'dress_stoch_istate',tmp,1) == -1) cycle dress_stoch_istate = int(tmp) - - - TOUCH dress_stoch_istate - TOUCH state_average_weight + psi_energy(1:N_states) = energy(1:N_states) + TOUCH psi_energy dress_stoch_istate state_average_weight PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index cb43baad..b6a50024 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -133,16 +133,18 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) print *, irp_here, ': Failed in zmq_set_running' endif - !!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc) & - ! !$OMP PRIVATE(i) - !i = omp_get_thread_num() - !if (i==0) then + call omp_set_nested(.true.) + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) & + !$OMP PRIVATE(i) + i = omp_get_thread_num() + if (i==0) then call dress_collector(zmq_socket_pull,E, relative_error, delta, delta_s2, dress,& dress_stoch_istate) - !else - ! call dress_slave_inproc(i) - !endif - !!$OMP END PARALLEL + else + call dress_slave_inproc(i) + endif + !$OMP END PARALLEL + call omp_set_nested(.false.) delta_out(dress_stoch_istate,1:N_det) = delta(dress_stoch_istate,1:N_det) delta_s2_out(dress_stoch_istate,1:N_det) = delta_s2(dress_stoch_istate,1:N_det) call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'dress') @@ -237,6 +239,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, call wall_time(time) end if + print *, cur_cp, ind if(cur_cp == -1) then call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) @@ -348,7 +351,7 @@ end function ! ! gen_per_cp : number of generators per checkpoint END_DOC - comb_teeth = 64 + comb_teeth = min(1+N_det/10,64) N_cps_max = 16 gen_per_cp = (N_det_generators / N_cps_max) + 1 END_PROVIDER @@ -505,6 +508,12 @@ END_PROVIDER done_cp_at_det(dress_jobs(i)) = cur_cp needed_by_cp(cur_cp) += 1 end do + + +print *, 'needed_by_cp' +do i=1,cur_cp + print *, i, needed_by_cp(i) +enddo under_det = 0 diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index b2d3bddd..12fef07a 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -65,12 +65,14 @@ END_PROVIDER BEGIN_PROVIDER [ integer , N_det_delta_ij ] implicit none - !N_det_delta_ij = 0!N_det + N_det_delta_ij = 1 END_PROVIDER BEGIN_PROVIDER [ double precision, delta_ij, (N_states, N_det, 2) ] implicit none - if(.true.) delta_ij(:,:N_det_delta_ij, :) = delta_ij_tmp(:,:,:) + if(.true.) then + delta_ij(:,:N_det_delta_ij, :) = delta_ij_tmp(:,:,:) + endif delta_ij(:,N_det_delta_ij+1:,:) = 0d0 END_PROVIDER diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 6b392666..8801cb3f 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -48,7 +48,7 @@ subroutine run_dress_slave(thread,iproce,energy) double precision :: fac - if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" +! if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2)) allocate(cp(N_states, N_det, N_cp, 2)) diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index a376585c..ad557d1a 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -581,7 +581,7 @@ END_PROVIDER double precision, allocatable :: mrcc(:) double precision :: E_CI_before!, relative_error - double precision, save :: target_error = 0d0 + double precision, save :: target_error = 2d-2 allocate(mrcc(N_states)) @@ -594,11 +594,10 @@ END_PROVIDER threshold_selectors = 1.d0 threshold_generators = 1d0 if(target_error /= 0d0) then - target_error = target_error / 2d0 ! (-mrcc_E0_denominator(1) + mrcc_previous_E(1)) / 1d1 + target_error = target_error * 0.5d0 ! (-mrcc_E0_denominator(1) + mrcc_previous_E(1)) / 1d1 else target_error = 1d-4 end if - target_error = 0d0 call ZMQ_mrcc(E_CI_before, mrcc, delta_ij_mrcc_zmq, delta_ij_s2_mrcc_zmq, abs(target_error)) mrcc_previous_E(:) = mrcc_E0_denominator(:) diff --git a/plugins/mrcepa0/mrcc_stoch_routines.irp.f b/plugins/mrcepa0/mrcc_stoch_routines.irp.f index 1b605e6d..4aa6307e 100644 --- a/plugins/mrcepa0/mrcc_stoch_routines.irp.f +++ b/plugins/mrcepa0/mrcc_stoch_routines.irp.f @@ -310,7 +310,6 @@ subroutine mrcc_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, m end if end do if(cur_cp == 0) then - print *, "no checkpoint reached so far..." cycle pullLoop end if !!!!!!!!!!!! diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 8940a77f..9340ae97 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -51,6 +51,7 @@ subroutine generator_done(i_gen, int_buf, double_buf, det_buf, N_buf, iproc) double precision, intent(out) :: double_buf(N_dress_double_buffer) integer(bit_kind), intent(out) :: det_buf(N_int, 2, N_dress_det_buffer) integer :: i + int_buf(:) = 0 call sort_selection_buffer(sb(iproc)) det_buf(:,:,:sb(iproc)%cur) = sb(iproc)%det(:,:,:sb(iproc)%cur) @@ -115,15 +116,18 @@ subroutine delta_ij_done() old_det_gen = N_det_generators - call sort_selection_buffer(global_sb) - call fill_H_apply_buffer_no_selection(global_sb%cur,global_sb%det,N_int,0) - call copy_H_apply_buffer_to_wf() - - if (s2_eig.or.(N_states > 1) ) then - call make_s2_eigenfunction + if (dress_stoch_istate == N_states) then + ! Add buffer only when the last state is computed + call sort_selection_buffer(global_sb) + call fill_H_apply_buffer_no_selection(global_sb%cur,global_sb%det,N_int,0) + call copy_H_apply_buffer_to_wf() + if (s2_eig.or.(N_states > 1) ) then + call make_s2_eigenfunction + endif + call undress_with_alpha(old_generators, old_det_gen, psi_det(1,1,N_det_delta_ij+1), N_det-N_det_delta_ij) + call save_wavefunction endif - call undress_with_alpha(old_generators, old_det_gen, psi_det(1,1,N_det_delta_ij+1), N_det-N_det_delta_ij) - call save_wavefunction + end subroutine diff --git a/src/Determinants/H_apply.irp.f b/src/Determinants/H_apply.irp.f index e5197a21..cd1baa8f 100644 --- a/src/Determinants/H_apply.irp.f +++ b/src/Determinants/H_apply.irp.f @@ -192,8 +192,9 @@ subroutine copy_H_apply_buffer_to_wf call normalize(psi_coef,N_det) SOFT_TOUCH N_det psi_det psi_coef -! logical :: found_duplicates -! call remove_duplicates_in_psi_det(found_duplicates) + logical :: found_duplicates + call remove_duplicates_in_psi_det(found_duplicates) + end subroutine remove_duplicates_in_psi_det(found_duplicates) diff --git a/src/Utils/map_functions.irp.f b/src/Utils/map_functions.irp.f index c08182c6..bb82243e 100644 --- a/src/Utils/map_functions.irp.f +++ b/src/Utils/map_functions.irp.f @@ -101,8 +101,8 @@ subroutine map_load_from_disk(filename,map) k = map % consolidated_idx (i+2_8) l = map % consolidated_idx (i+1_8) n_elements = int(k - l, 4) - key_p => map % consolidated_key (l:l+n_elements) - value_p => map % consolidated_value ( l:l+n_elements ) + key_p => map % consolidated_key (l:l+n_elements-1) + value_p => map % consolidated_value ( l:l+n_elements-1 ) map % map(i) % key => key_p map % map(i) % value => value_p map % map(i) % sorted = .True. From 3044e7e72a1f35a909052594e69569b828b3ded9 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 23 May 2018 13:13:30 +0200 Subject: [PATCH 25/29] removed limit to deterministic set size --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 2 +- plugins/Generators_full/generators.irp.f | 2 +- plugins/dress_zmq/dress_stoch_routines.irp.f | 11 ++++++----- plugins/dress_zmq/dressing.irp.f | 2 +- plugins/shiftedbk/selection_buffer.irp.f | 19 +++++++++++++++++++ plugins/shiftedbk/shifted_bk_routines.irp.f | 20 ++++++++++++-------- 6 files changed, 40 insertions(+), 16 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index d3f7486f..73d71365 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -323,7 +323,7 @@ subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_ eqt = 0.d0 endif call wall_time(time) - if ( (dabs(eqt/avg) < relative_error) .or. (dabs(eqt) < absolute_error) .and. Nabove(tooth) >= 30) then + if ( ((dabs(eqt/avg) < relative_error) .or. (dabs(eqt) < absolute_error)) .and. Nabove(tooth) >= 30) then ! Termination pt2(pt2_stoch_istate) = avg error(pt2_stoch_istate) = eqt diff --git a/plugins/Generators_full/generators.irp.f b/plugins/Generators_full/generators.irp.f index 4f2c715e..98d49069 100644 --- a/plugins/Generators_full/generators.irp.f +++ b/plugins/Generators_full/generators.irp.f @@ -13,7 +13,7 @@ BEGIN_PROVIDER [ integer, N_det_generators ] N_det_generators = N_det do i=1,N_det norm = norm + psi_average_norm_contrib_sorted(i) - if (norm >= threshold_generators) then + if (norm > threshold_generators+1d-10) then N_det_generators = i exit endif diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index cb43baad..8844e064 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -227,6 +227,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, pullLoop : do while (loop) call pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, dress_mwen) + !print *, cur_cp, ind if(floop) then call wall_time(time) print *, "first_pull", time-time0 @@ -375,7 +376,7 @@ END_PROVIDER integer, allocatable :: filler(:) integer :: nfiller, lfiller, cfiller logical :: fracted - + integer :: first_suspect provide psi_coef_generators first_suspect = 1 @@ -436,7 +437,7 @@ END_PROVIDER end if !!!!!!!!!!!!!!!!!!!!!!!! - if(.FALSE.) then + if(.TRUE.) then do l=first_suspect,N_det_generators if((.not. computed(l))) then N_dress_jobs+=1 @@ -620,7 +621,6 @@ subroutine add_comb(com, computed, cp, N, tbc) !DIR$ FORCEINLINE call get_comb(com, dets) - k=N+1 do i = 1, comb_teeth l = dets(i) @@ -681,10 +681,11 @@ END_PROVIDER norm_left = 1d0 comb_step = 1d0/dfloat(comb_teeth) + !print *, "comb_step", comb_step first_det_of_comb = 1 - do i=1,min(100,N_det_generators) + do i=1,N_det_generators ! min(100,N_det_generators) + first_det_of_comb = i if(dress_weight(i)/norm_left < comb_step) then - first_det_of_comb = i exit end if norm_left -= dress_weight(i) diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index a25aaf2f..0e95ef56 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -100,7 +100,7 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] ! else ! errr = 1d-4 ! end if - relative_error = 5.d-5 + relative_error = 0d0 ! 5.d-5 call write_double(6,relative_error,"Convergence of the stochastic algorithm") diff --git a/plugins/shiftedbk/selection_buffer.irp.f b/plugins/shiftedbk/selection_buffer.irp.f index 17410b7b..8c3bee91 100644 --- a/plugins/shiftedbk/selection_buffer.irp.f +++ b/plugins/shiftedbk/selection_buffer.irp.f @@ -137,3 +137,22 @@ subroutine sort_selection_buffer(b) b%cur = nmwen end subroutine + + +subroutine truncate_to_mini(b) + use selection_types + implicit none + + type(selection_buffer), intent(inout) :: b + + do + if(b%cur == 0) exit + if(b%val(b%cur) <= b%mini) exit + b%cur -= 1 + end do +end subroutine + + + + + diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 30153dbc..c2775ab2 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -52,22 +52,26 @@ subroutine generator_done(i_gen, int_buf, double_buf, det_buf, N_buf, iproc) integer :: i call sort_selection_buffer(sb(iproc)) - det_buf(:,:,:sb(iproc)%cur) = sb(iproc)%det(:,:,:sb(iproc)%cur) - double_buf(:sb(iproc)%cur) = sb(iproc)%val(:sb(iproc)%cur) - double_buf(sb(iproc)%cur+1:sb(iproc)%cur+N_states) = slave_sum_alpha2(:,iproc) - N_buf(1) = 1 - N_buf(2) = sb(iproc)%cur+N_states - N_buf(3) = sb(iproc)%cur - + if(sb(iproc)%cur > 0) then !$OMP CRITICAL call merge_selection_buffers(sb(iproc), mini_sb) !call sort_selection_buffer(mini_sb) do i=1,Nproc - sb(i)%mini = min(sb(i)%mini, mini_sb%mini) + mini_sb%mini = min(sb(i)%mini, mini_sb%mini) + end do + do i=1,Nproc + sb(i)%mini = mini_sb%mini end do !$OMP END CRITICAL end if + call truncate_to_mini(sb(iproc)) + det_buf(:,:,:sb(iproc)%cur) = sb(iproc)%det(:,:,:sb(iproc)%cur) + double_buf(:sb(iproc)%cur) = sb(iproc)%val(:sb(iproc)%cur) + double_buf(sb(iproc)%cur+1:sb(iproc)%cur+N_states) = slave_sum_alpha2(:,iproc) + N_buf(1) = 1 + N_buf(2) = sb(iproc)%cur+N_states + N_buf(3) = sb(iproc)%cur sb(iproc)%cur = 0 slave_sum_alpha2(:,iproc) = 0d0 From bac9f32df14ddfb66a455b8fb0338e681cfd0dc5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 23 May 2018 13:48:42 +0200 Subject: [PATCH 26/29] 10 combs --- plugins/dress_zmq/dress_stoch_routines.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 633f782c..17fe0c95 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -352,7 +352,7 @@ end function ! ! gen_per_cp : number of generators per checkpoint END_DOC - comb_teeth = min(1+N_det/10,64) + comb_teeth = min(1+N_det/10,10) N_cps_max = 16 gen_per_cp = (N_det_generators / N_cps_max) + 1 END_PROVIDER From 4b1a77c5c0fbd34825fd3617d6e69ce608731e9f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 23 May 2018 18:16:33 +0200 Subject: [PATCH 27/29] Iterative sbk, broken --- plugins/Psiref_Utils/psi_ref_utils.irp.f | 4 +- plugins/dress_zmq/dress_general.irp.f | 4 +- plugins/dress_zmq/dress_stoch_routines.irp.f | 12 +- plugins/dress_zmq/dressing.irp.f | 3 +- plugins/shiftedbk/EZFIO.cfg | 39 +++++ plugins/shiftedbk/selection_buffer.irp.f | 86 ++++++++++ plugins/shiftedbk/shifted_bk_iter.irp.f | 159 +++++++++++++++++++ plugins/shiftedbk/shifted_bk_routines.irp.f | 21 ++- src/Determinants/H_apply.irp.f | 4 +- src/Determinants/determinants.irp.f | 15 +- src/Determinants/psi_cas.irp.f | 4 +- 11 files changed, 314 insertions(+), 37 deletions(-) create mode 100644 plugins/shiftedbk/shifted_bk_iter.irp.f diff --git a/plugins/Psiref_Utils/psi_ref_utils.irp.f b/plugins/Psiref_Utils/psi_ref_utils.irp.f index c59bbd9f..a63b0ded 100644 --- a/plugins/Psiref_Utils/psi_ref_utils.irp.f +++ b/plugins/Psiref_Utils/psi_ref_utils.irp.f @@ -9,7 +9,7 @@ use bitmasks ! function. END_DOC call sort_dets_by_det_search_key(N_det_ref, psi_ref, psi_ref_coef, & - psi_ref_sorted_bit, psi_ref_coef_sorted_bit) + psi_ref_sorted_bit, psi_ref_coef_sorted_bit, N_states) END_PROVIDER @@ -152,7 +152,7 @@ END_PROVIDER ! function. END_DOC call sort_dets_by_det_search_key(N_det_ref, psi_non_ref, psi_non_ref_coef, & - psi_non_ref_sorted_bit, psi_non_ref_coef_sorted_bit) + psi_non_ref_sorted_bit, psi_non_ref_coef_sorted_bit, N_states) END_PROVIDER diff --git a/plugins/dress_zmq/dress_general.irp.f b/plugins/dress_zmq/dress_general.irp.f index 048605b8..b99eb1d7 100644 --- a/plugins/dress_zmq/dress_general.irp.f +++ b/plugins/dress_zmq/dress_general.irp.f @@ -38,10 +38,10 @@ subroutine run_dressing(N_st,energy) E_old = sum(psi_energy(:)) print *, 'Variational energy ' do i=1,N_st - print *, i, psi_energy(i) + print *, i, psi_energy(i)+nuclear_repulsion enddo !print *, "DELTA IJ", delta_ij(1,1,1) - if(.true.) dummy = delta_ij_tmp(1,1,1) + PROVIDE delta_ij_tmp if(.true.) call delta_ij_done() print *, 'Dressed energy ' do i=1,N_st diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 17fe0c95..6b7bf396 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -229,10 +229,8 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, pullLoop : do while (loop) call pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, dress_mwen) - !print *, cur_cp, ind if(floop) then call wall_time(time) - print *, "first_pull", time-time0 time0 = time floop = .false. end if @@ -240,8 +238,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, call wall_time(time) end if - print *, cur_cp, ind - if(cur_cp == -1) then call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then @@ -295,7 +291,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, !print '(I2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', cps_N(cur_cp), avg+E0+E(istate), eqt, time-time0, '' - if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30)) then + if ((dabs(eqt/(avg+E0+E(istate))) < relative_error .and. cps_N(cur_cp) >= 10)) then ! Termination print *, "TERMINATE" if (zmq_abort(zmq_to_qp_run_socket) == -1) then @@ -511,12 +507,6 @@ END_PROVIDER end do -print *, 'needed_by_cp' -do i=1,cur_cp - print *, i, needed_by_cp(i) -enddo - - under_det = 0 cp_first_tooth = 0 do i=1,N_dress_jobs diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index 12fef07a..bf2ab207 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -96,12 +96,13 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] E_CI_before(:) = psi_energy(:) + nuclear_repulsion threshold_selectors = 1.d0 threshold_generators = 1.d0 + SOFT_TOUCH threshold_selectors threshold_generators ! if(errr /= 0d0) then ! errr = errr / 2d0 ! else ! errr = 1d-4 ! end if - relative_error = 5.d-5 + relative_error = 1.d-2 call write_double(6,relative_error,"Convergence of the stochastic algorithm") diff --git a/plugins/shiftedbk/EZFIO.cfg b/plugins/shiftedbk/EZFIO.cfg index 001535b9..be4998dd 100644 --- a/plugins/shiftedbk/EZFIO.cfg +++ b/plugins/shiftedbk/EZFIO.cfg @@ -3,3 +3,42 @@ type: Perturbation doc: Type of zeroth-order Hamiltonian [ EN | Barycentric ] interface: ezfio,provider,ocaml default: EN +[energy] +type: double precision +doc: Calculated Selected FCI energy +interface: ezfio + +[energy_pt2] +type: double precision +doc: Calculated FCI energy + PT2 +interface: ezfio + +[iterative_save] +type: integer +doc: Save data at each iteration : 1(Append) | 2(Overwrite) | 3(NoSave) +interface: ezfio,ocaml +default: 2 + +[n_iter] +interface: ezfio +doc: number of iterations +type:integer + +[n_det_iter] +interface: ezfio +doc: number of determinants at iteration +type: integer +size: (full_ci_zmq.n_iter) + +[energy_iter] +interface: ezfio +doc: The energy without a pt2 correction for n_det +type: double precision +size: (determinants.n_states,full_ci_zmq.n_iter) + +[pt2_iter] +interface: ezfio +doc: The pt2 correction for n_det +type: double precision +size: (determinants.n_states,full_ci_zmq.n_iter) + diff --git a/plugins/shiftedbk/selection_buffer.irp.f b/plugins/shiftedbk/selection_buffer.irp.f index 8c3bee91..8b140666 100644 --- a/plugins/shiftedbk/selection_buffer.irp.f +++ b/plugins/shiftedbk/selection_buffer.irp.f @@ -156,3 +156,89 @@ end subroutine +subroutine unique_selection_buffer(b) + use selection_types + implicit none + BEGIN_DOC +! Removes duplicate determinants in the selection buffer + END_DOC + type(selection_buffer), intent(inout) :: b + integer, allocatable :: iorder(:) + integer(bit_kind), pointer :: detmp(:,:,:) + double precision, pointer :: val(:) + integer :: i,j,k + integer(bit_kind), allocatable :: bit_tmp(:) + logical,allocatable :: duplicate(:) + + logical :: found_duplicates + integer*8, external :: det_search_key + + if (b%N == 0 .or. b%cur == 0) return + allocate (duplicate(b%cur), val(size(b%val)), detmp(N_int, 2, size(b%val)), bit_tmp(b%cur)) + call sort_dets_by_det_search_key(b%cur, b%det, b%val, detmp, val, 1) + + deallocate(b%det, b%val) + do i=b%cur+1,b%N + val(i) = 0.d0 + detmp(1:N_int,1:2,i) = 0_bit_kind + enddo + b%det => detmp + b%val => val + + do i=1,b%cur + bit_tmp(i) = det_search_key(b%det(1,1,i),N_int) + duplicate(i) = .False. + enddo + + do i=1,b%cur-1 + if (duplicate(i)) then + cycle + endif + j = i+1 + do while (bit_tmp(j)==bit_tmp(i)) + if (duplicate(j)) then + j += 1 + if (j > b%cur) then + exit + else + cycle + endif + endif + duplicate(j) = .True. + do k=1,N_int + if ( (b%det(k,1,i) /= b%det(k,1,j) ) & + .or. (b%det(k,2,i) /= b%det(k,2,j) ) ) then + duplicate(j) = .False. + exit + endif + enddo + j += 1 + if (j > b%cur) then + exit + endif + enddo + enddo + + found_duplicates = .False. + do i=1,b%cur + if (duplicate(i)) then + found_duplicates = .True. + exit + endif + enddo + + if (found_duplicates) then + k=0 + do i=1,N_det + if (.not.duplicate(i)) then + k += 1 + b%det(:,:,k) = b%det(:,:,i) + b%val(k) = b%val(i) + endif + enddo + b%cur = k + endif + deallocate (duplicate,bit_tmp) +end + + diff --git a/plugins/shiftedbk/shifted_bk_iter.irp.f b/plugins/shiftedbk/shifted_bk_iter.irp.f new file mode 100644 index 00000000..429efa4b --- /dev/null +++ b/plugins/shiftedbk/shifted_bk_iter.irp.f @@ -0,0 +1,159 @@ +program shifted_bk + implicit none + integer :: i,j,k + double precision, allocatable :: pt2(:) + integer :: degree + integer :: n_det_before + double precision :: threshold_davidson_in + + allocate (pt2(N_states)) + + double precision :: hf_energy_ref + logical :: has + double precision :: relative_error, absolute_error + integer :: N_states_p + character*(512) :: fmt + + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order + + + pt2 = -huge(1.e0) + threshold_davidson_in = threshold_davidson + threshold_davidson = threshold_davidson_in * 100.d0 + SOFT_TOUCH threshold_davidson + + call diagonalize_CI_dressed + call save_wavefunction + + call ezfio_has_hartree_fock_energy(has) + if (has) then + call ezfio_get_hartree_fock_energy(hf_energy_ref) + else + hf_energy_ref = ref_bitmask_energy + endif + + if (N_det > N_det_max) then + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + N_det = N_det_max + soft_touch N_det psi_det psi_coef + call diagonalize_CI_dressed + call save_wavefunction + N_states_p = min(N_det,N_states) + endif + + n_det_before = 0 + + character*(8) :: pt2_string + double precision :: threshold_selectors_save, threshold_generators_save + threshold_selectors_save = threshold_selectors + threshold_generators_save = threshold_generators + double precision :: error(N_states), energy(N_states) + error = 0.d0 + + threshold_selectors = 1.d0 + threshold_generators = 1d0 + + if (.True.) then + pt2_string = '(sh-Bk) ' + do while ( (N_det < N_det_max) ) + write(*,'(A)') '--------------------------------------------------------------------------------' + + N_det_delta_ij = N_det + + do i=1,N_states + energy(i) = psi_energy(i)+nuclear_repulsion + enddo + + PROVIDE delta_ij_tmp + call delta_ij_done() + + call diagonalize_ci_dressed + do i=1,N_states + pt2(i) = ci_energy_dressed(i) - energy(i) + enddo + + N_states_p = min(N_det,N_states) + + print *, '' + print '(A,I12)', 'Summary at N_det = ', N_det + print '(A)', '-----------------------------------' + print *, '' + print *, '' + + write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' + write(*,fmt) + write(fmt,*) '(12X,', N_states_p, '(6X,A7,1X,I6,10X))' + write(*,fmt) ('State',k, k=1,N_states_p) + write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' + write(*,fmt) + write(fmt,*) '(A12,', N_states_p, '(1X,F14.8,15X))' + write(*,fmt) '# E ', energy(1:N_states_p) + if (N_states_p > 1) then + write(*,fmt) '# Excit. (au)', energy(1:N_states_p)-energy(1) + write(*,fmt) '# Excit. (eV)', (energy(1:N_states_p)-energy(1))*27.211396641308d0 + endif + write(fmt,*) '(A12,', 2*N_states_p, '(1X,F14.8))' + write(*,fmt) '# PT2'//pt2_string, (pt2(k), error(k), k=1,N_states_p) + write(*,'(A)') '#' + write(*,fmt) '# E+PT2 ', (energy(k)+pt2(k),error(k), k=1,N_states_p) + if (N_states_p > 1) then + write(*,fmt) '# Excit. (au)', ( (energy(k)+pt2(k)-energy(1)-pt2(1)), & + dsqrt(error(k)*error(k)+error(1)*error(1)), k=1,N_states_p) + write(*,fmt) '# Excit. (eV)', ( (energy(k)+pt2(k)-energy(1)-pt2(1))*27.211396641308d0, & + dsqrt(error(k)*error(k)+error(1)*error(1))*27.211396641308d0, k=1,N_states_p) + endif + write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' + write(*,fmt) + print *, '' + + print *, 'N_det = ', N_det + print *, 'N_states = ', N_states + + do k=1, N_states_p + print*,'State ',k + print *, 'PT2 = ', pt2(k) + print *, 'E = ', energy(k) + print *, 'E+PT2'//pt2_string//' = ', energy(k)+pt2(k) + enddo + + print *, '-----' + if(N_states.gt.1)then + print *, 'Variational Energy difference (au | eV)' + do i=2, N_states_p + print*,'Delta E = ', (energy(i) - energy(1)), & + (energy(i) - energy(1)) * 27.211396641308d0 + enddo + print *, '-----' + print*, 'Variational + perturbative Energy difference (au | eV)' + do i=2, N_states_p + print*,'Delta E = ', (energy(i)+ pt2(i) - (energy(1) + pt2(1))), & + (energy(i)+ pt2(i) - (energy(1) + pt2(1))) * 27.211396641308d0 + enddo + endif + call ezfio_set_shiftedbk_energy_pt2(energy(1)+pt2(1)) +! call dump_fci_iterations_value(N_det,energy,pt2) + + n_det_before = N_det + + PROVIDE psi_coef + PROVIDE psi_det + PROVIDE psi_det_sorted + + if (N_det >= N_det_max) then + threshold_davidson = threshold_davidson_in + end if + call save_wavefunction + call ezfio_set_shiftedbk_energy(energy(1)) + call ezfio_set_shiftedbk_energy_pt2(ci_energy_dressed(1)) + enddo + endif + + + + +end + diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 3d4540bd..0663f75e 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -20,7 +20,7 @@ END_PROVIDER fock_diag_tmp_(:,:,:) = 0.d0 integer :: i - N_det_increase_factor = 1d0 + N_det_increase_factor = dble(N_states) n_det_add = max(1, int(float(N_det) * N_det_increase_factor)) @@ -120,17 +120,16 @@ subroutine delta_ij_done() old_det_gen = N_det_generators - if (dress_stoch_istate == N_states) then - ! Add buffer only when the last state is computed - call sort_selection_buffer(global_sb) - call fill_H_apply_buffer_no_selection(global_sb%cur,global_sb%det,N_int,0) - call copy_H_apply_buffer_to_wf() - if (s2_eig.or.(N_states > 1) ) then - call make_s2_eigenfunction - endif - call undress_with_alpha(old_generators, old_det_gen, psi_det(1,1,N_det_delta_ij+1), N_det-N_det_delta_ij) - call save_wavefunction + ! Add buffer only when the last state is computed + call unique_selection_buffer(global_sb) + call sort_selection_buffer(global_sb) + call fill_H_apply_buffer_no_selection(global_sb%cur,global_sb%det,N_int,0) + call copy_H_apply_buffer_to_wf() + if (s2_eig.or.(N_states > 1) ) then + call make_s2_eigenfunction endif + call undress_with_alpha(old_generators, old_det_gen, psi_det(1,1,N_det_delta_ij+1), N_det-N_det_delta_ij) + call save_wavefunction end subroutine diff --git a/src/Determinants/H_apply.irp.f b/src/Determinants/H_apply.irp.f index cd1baa8f..3ba674f1 100644 --- a/src/Determinants/H_apply.irp.f +++ b/src/Determinants/H_apply.irp.f @@ -192,8 +192,8 @@ subroutine copy_H_apply_buffer_to_wf call normalize(psi_coef,N_det) SOFT_TOUCH N_det psi_det psi_coef - logical :: found_duplicates - call remove_duplicates_in_psi_det(found_duplicates) +! logical :: found_duplicates +! call remove_duplicates_in_psi_det(found_duplicates) end diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f index 8530fa64..d01c80ff 100644 --- a/src/Determinants/determinants.irp.f +++ b/src/Determinants/determinants.irp.f @@ -321,21 +321,24 @@ end subroutine END_DOC call sort_dets_by_det_search_key(N_det, psi_det, psi_coef, & - psi_det_sorted_bit, psi_coef_sorted_bit) + psi_det_sorted_bit, psi_coef_sorted_bit, N_states) END_PROVIDER -subroutine sort_dets_by_det_search_key(Ndet, det_in, coef_in, det_out, coef_out) +subroutine sort_dets_by_det_search_key(Ndet, det_in, coef_in, det_out, coef_out, N_st) use bitmasks implicit none - integer, intent(in) :: Ndet + integer, intent(in) :: Ndet, N_st integer(bit_kind), intent(in) :: det_in (N_int,2,psi_det_size) - double precision , intent(in) :: coef_in(psi_det_size,N_states) + double precision , intent(in) :: coef_in(psi_det_size,N_st) integer(bit_kind), intent(out) :: det_out (N_int,2,psi_det_size) - double precision , intent(out) :: coef_out(psi_det_size,N_states) + double precision , intent(out) :: coef_out(psi_det_size,N_st) BEGIN_DOC ! Determinants are sorted are sorted according to their det_search_key. ! Useful to accelerate the search of a random determinant in the wave ! function. + ! + ! /!\ The first dimension of coef_out and coef_in need to be psi_det_size + ! END_DOC integer :: i,j,k integer, allocatable :: iorder(:) @@ -356,7 +359,7 @@ subroutine sort_dets_by_det_search_key(Ndet, det_in, coef_in, det_out, coef_out) det_out(j,1,i) = det_in(j,1,iorder(i)) det_out(j,2,i) = det_in(j,2,iorder(i)) enddo - do k=1,N_states + do k=1,N_st coef_out(i,k) = coef_in(iorder(i),k) enddo enddo diff --git a/src/Determinants/psi_cas.irp.f b/src/Determinants/psi_cas.irp.f index 591843f7..58a71fbc 100644 --- a/src/Determinants/psi_cas.irp.f +++ b/src/Determinants/psi_cas.irp.f @@ -54,7 +54,7 @@ END_PROVIDER ! function. END_DOC call sort_dets_by_det_search_key(N_det_cas, psi_cas, psi_cas_coef, & - psi_cas_sorted_bit, psi_cas_coef_sorted_bit) + psi_cas_sorted_bit, psi_cas_coef_sorted_bit, N_states) END_PROVIDER @@ -107,7 +107,7 @@ END_PROVIDER ! function. END_DOC call sort_dets_by_det_search_key(N_det_cas, psi_non_cas, psi_non_cas_coef, & - psi_non_cas_sorted_bit, psi_non_cas_coef_sorted_bit) + psi_non_cas_sorted_bit, psi_non_cas_coef_sorted_bit, N_states) END_PROVIDER From d903d43443062b011c8d9e19e6739099358d75ae Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 25 May 2018 17:40:27 +0200 Subject: [PATCH 28/29] psi_det_sorted_gen --- plugins/Generators_CAS/generators.irp.f | 22 +++++++++- plugins/Generators_full/generators.irp.f | 18 ++++++++ plugins/dress_zmq/alpha_factory.irp.f | 46 ++++++++++----------- plugins/dress_zmq/dress_slave.irp.f | 2 +- plugins/shiftedbk/shifted_bk.irp.f | 2 +- plugins/shiftedbk/shifted_bk_routines.irp.f | 4 +- plugins/shiftedbk/shifted_bk_slave.irp.f | 2 +- 7 files changed, 67 insertions(+), 29 deletions(-) diff --git a/plugins/Generators_CAS/generators.irp.f b/plugins/Generators_CAS/generators.irp.f index 67d3cc31..6f770c2e 100644 --- a/plugins/Generators_CAS/generators.irp.f +++ b/plugins/Generators_CAS/generators.irp.f @@ -22,6 +22,9 @@ END_PROVIDER BEGIN_PROVIDER [ integer(bit_kind), psi_det_generators, (N_int,2,psi_det_size) ] &BEGIN_PROVIDER [ double precision, psi_coef_generators, (psi_det_size,N_states) ] +&BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_gen, (N_int,2,psi_det_size) ] +&BEGIN_PROVIDER [ double precision, psi_coef_sorted_gen, (psi_det_size,N_states) ] +&BEGIN_PROVIDER [ integer, psi_det_sorted_gen_order, (psi_det_size) ] implicit none BEGIN_DOC ! For Single reference wave functions, the generator is the @@ -30,19 +33,36 @@ END_PROVIDER integer :: i, k, l, m logical :: good integer, external :: number_of_holes,number_of_particles + integer, allocatable :: nongen(:) + integer :: inongen + inongen = 0 + + allocate(nongen(N_det)) + m=0 do i=1,N_det good = ( number_of_holes(psi_det_sorted(1,1,i)) ==0).and.(number_of_particles(psi_det_sorted(1,1,i))==0 ) if (good) then m = m+1 + psi_det_sorted_gen_order(i) = m do k=1,N_int psi_det_generators(k,1,m) = psi_det_sorted(k,1,i) psi_det_generators(k,2,m) = psi_det_sorted(k,2,i) enddo - psi_coef_generators(m,:) = psi_coef_sorted(m,:) + psi_coef_generators(m,:) = psi_coef_sorted(i,:) + else + inongen += 1 + nongen(inongen) = i endif enddo + psi_det_sorted_gen(:,:,:N_det_generators) = psi_det_generators(:,:,:N_det_generators) + psi_coef_sorted_gen(:N_det_generators, :) = psi_coef_generators(:N_det_generators, :) + do i=1,inongen + psi_det_sorted_gen_order(nongen(i)) = N_det_generators+i + psi_det_sorted_gen(:,:,:N_det_generators+i) = psi_det_sorted(:,:,nongen(i)) + psi_coef_sorted_gen(N_det_generators+i, :) = psi_det_sorted(nongen(i),:) + end do END_PROVIDER BEGIN_PROVIDER [ integer, size_select_max] diff --git a/plugins/Generators_full/generators.irp.f b/plugins/Generators_full/generators.irp.f index 98d49069..16dade7e 100644 --- a/plugins/Generators_full/generators.irp.f +++ b/plugins/Generators_full/generators.irp.f @@ -35,6 +35,24 @@ END_PROVIDER END_PROVIDER + BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_gen, (N_int,2,psi_det_size) ] +&BEGIN_PROVIDER [ double precision, psi_coef_sorted_gen, (psi_det_size,N_states) ] +&BEGIN_PROVIDER [ integer, psi_det_sorted_gen_order, (psi_det_size) ] + + implicit none + BEGIN_DOC + ! For Single reference wave functions, the generator is the + ! Hartree-Fock determinant + END_DOC + integer :: i, k + psi_det_sorted_gen = psi_det_sorted + psi_coef_sorted_gen = psi_coef_sorted + !do i=1,N_det_generators + psi_det_sorted_gen_order = psi_det_sorted_order + !end do +END_PROVIDER + + BEGIN_PROVIDER [integer, degree_max_generators] implicit none BEGIN_DOC diff --git a/plugins/dress_zmq/alpha_factory.irp.f b/plugins/dress_zmq/alpha_factory.irp.f index 1cb286fc..261966be 100644 --- a/plugins/dress_zmq/alpha_factory.irp.f +++ b/plugins/dress_zmq/alpha_factory.irp.f @@ -20,15 +20,15 @@ subroutine alpha_callback(delta_ij_loc, i_generator, subset,iproc) end subroutine -BEGIN_PROVIDER [ integer, psi_from_sorted, (N_det) ] +BEGIN_PROVIDER [ integer, psi_from_sorted_gen, (N_det) ] implicit none integer :: i,inpsisor - psi_from_sorted = 0 + psi_from_sorted_gen = 0 do i=1,N_det - psi_from_sorted(psi_det_sorted_order(i)) = i - inpsisor = psi_det_sorted_order(i) + psi_from_sorted_gen(psi_det_sorted_gen_order(i)) = i + inpsisor = psi_det_sorted_gen_order(i) if(inpsisor <= 0) stop "idx_non_ref_from_sorted" end do END_PROVIDER @@ -100,7 +100,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index exc_degree(max(N_det_alpha_unique,N_det_beta_unique))) !PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique - !PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + !PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order !PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns !PROVIDE psi_bilinear_matrix_transp_order @@ -117,7 +117,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index do l_a=psi_bilinear_matrix_columns_loc(j), psi_bilinear_matrix_columns_loc(j+1)-1 i = psi_bilinear_matrix_rows(l_a) if (nt + exc_degree(i) <= 4) then - indices(k) = psi_det_sorted_order(psi_bilinear_matrix_order(l_a)) + indices(k) = psi_det_sorted_gen_order(psi_bilinear_matrix_order(l_a)) k=k+1 endif enddo @@ -136,7 +136,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index i = psi_bilinear_matrix_transp_columns(l_a) if (exc_degree(i) < 3) cycle if (nt + exc_degree(i) <= 4) then - indices(k) = psi_det_sorted_order( & + indices(k) = psi_det_sorted_gen_order( & psi_bilinear_matrix_order( & psi_bilinear_matrix_transp_order(l_a))) k=k+1 @@ -160,15 +160,15 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index negMask(i,1) = not(psi_det_generators(i,1,i_generator)) negMask(i,2) = not(psi_det_generators(i,2,i_generator)) end do - if(psi_det_generators(1,1,i_generator) /= psi_det_sorted(1,1,i_generator)) stop "gen <> sorted" + if(psi_det_generators(1,1,i_generator) /= psi_det_sorted_gen(1,1,i_generator)) stop "gen <> sorted" do k=1,nmax i = indices(k) - mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i)) - mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i)) + mobMask(1,1) = iand(negMask(1,1), psi_det_sorted_gen(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), psi_det_sorted_gen(1,2,i)) nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) do j=2,N_int - mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) - mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted_gen(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted_gen(j,2,i)) nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) end do @@ -177,8 +177,8 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index preinteresting(0) += 1 preinteresting(preinteresting(0)) = i do j=1,N_int - preinteresting_det(j,1,preinteresting(0)) = psi_det_sorted(j,1,i) - preinteresting_det(j,2,preinteresting(0)) = psi_det_sorted(j,2,i) + preinteresting_det(j,1,preinteresting(0)) = psi_det_sorted_gen(j,1,i) + preinteresting_det(j,2,preinteresting(0)) = psi_det_sorted_gen(j,2,i) enddo else if(nt <= 2) then prefullinteresting(0) += 1 @@ -287,13 +287,13 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index do ii=1,prefullinteresting(0) i = prefullinteresting(ii) nt = 0 - mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i)) - mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i)) + mobMask(1,1) = iand(negMask(1,1), psi_det_sorted_gen(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), psi_det_sorted_gen(1,2,i)) nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) if (nt > 2) cycle do j=N_int,2,-1 - mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) - mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted_gen(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted_gen(j,2,i)) nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) if (nt > 2) exit end do @@ -301,11 +301,11 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index if(nt <= 2) then fullinteresting(0) += 1 fullinteresting(fullinteresting(0)) = i - fullminilist(1,1,fullinteresting(0)) = psi_det_sorted(1,1,i) - fullminilist(1,2,fullinteresting(0)) = psi_det_sorted(1,2,i) + fullminilist(1,1,fullinteresting(0)) = psi_det_sorted_gen(1,1,i) + fullminilist(1,2,fullinteresting(0)) = psi_det_sorted_gen(1,2,i) do j=2,N_int - fullminilist(j,1,fullinteresting(0)) = psi_det_sorted(j,1,i) - fullminilist(j,2,fullinteresting(0)) = psi_det_sorted(j,2,i) + fullminilist(j,1,fullinteresting(0)) = psi_det_sorted_gen(j,1,i) + fullminilist(j,2,fullinteresting(0)) = psi_det_sorted_gen(j,2,i) enddo end if end do @@ -390,7 +390,7 @@ subroutine alpha_callback_mask(delta_ij_loc, i_gen, sp, mask, bannedOrb, banned, allocate(abuf(siz), labuf(N_det), putten(N_det), det_minilist(N_int, 2, N_det)) do i=1,siz - abuf(i) = psi_from_sorted(rabuf(i)) + abuf(i) = psi_from_sorted_gen(rabuf(i)) end do putten = .false. diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index acae326f..47053c3d 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -65,7 +65,7 @@ integer, external :: zmq_get_dvector, zmq_get_N_det_generators TOUCH state_average_weight PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique - PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order !!$OMP PARALLEL PRIVATE(i) diff --git a/plugins/shiftedbk/shifted_bk.irp.f b/plugins/shiftedbk/shifted_bk.irp.f index 5438ec90..df971b4f 100644 --- a/plugins/shiftedbk/shifted_bk.irp.f +++ b/plugins/shiftedbk/shifted_bk.irp.f @@ -5,7 +5,7 @@ program shifted_bk END_DOC PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique - PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index c2775ab2..6a481389 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -221,7 +221,7 @@ subroutine dress_with_alpha_(Nstates,Ndet,Nint,delta_ij_loc,minilist, det_minili BEGIN_DOC !delta_ij_loc(:,:,1) : dressing column for H !delta_ij_loc(:,:,2) : dressing column for S2 - !minilist : indices of determinants connected to alpha ( in psi_det_sorted ) + !minilist : indices of determinants connected to alpha ( in psi_det ) !n_minilist : size of minilist !alpha : alpha determinant END_DOC @@ -280,7 +280,7 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili !delta_ij_loc(:,:,1) : dressing column for H !delta_ij_loc(:,:,2) : dressing column for S2 !i_gen : generator index in psi_det_generators - !minilist : indices of determinants connected to alpha ( in psi_det_sorted ) + !minilist : indices of determinants connected to alpha ( in psi_det ) !n_minilist : size of minilist !alpha : alpha determinant END_DOC diff --git a/plugins/shiftedbk/shifted_bk_slave.irp.f b/plugins/shiftedbk/shifted_bk_slave.irp.f index d31b55d9..901940ed 100644 --- a/plugins/shiftedbk/shifted_bk_slave.irp.f +++ b/plugins/shiftedbk/shifted_bk_slave.irp.f @@ -5,7 +5,7 @@ program shifted_bk END_DOC PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique - PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order From 5b60b4bee12f1aecb2a1c63c3e174073c0347b6d Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 12 Jun 2018 14:44:32 +0200 Subject: [PATCH 29/29] syntax error in Generators_CAS --- plugins/Generators_CAS/generators.irp.f | 4 ++-- plugins/shiftedbk/NEEDED_CHILDREN_MODULES | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/Generators_CAS/generators.irp.f b/plugins/Generators_CAS/generators.irp.f index 6f770c2e..4be8c061 100644 --- a/plugins/Generators_CAS/generators.irp.f +++ b/plugins/Generators_CAS/generators.irp.f @@ -60,8 +60,8 @@ END_PROVIDER psi_coef_sorted_gen(:N_det_generators, :) = psi_coef_generators(:N_det_generators, :) do i=1,inongen psi_det_sorted_gen_order(nongen(i)) = N_det_generators+i - psi_det_sorted_gen(:,:,:N_det_generators+i) = psi_det_sorted(:,:,nongen(i)) - psi_coef_sorted_gen(N_det_generators+i, :) = psi_det_sorted(nongen(i),:) + psi_det_sorted_gen(:,:,N_det_generators+i) = psi_det_sorted(:,:,nongen(i)) + psi_coef_sorted_gen(N_det_generators+i, :) = psi_coef_sorted(nongen(i),:) end do END_PROVIDER diff --git a/plugins/shiftedbk/NEEDED_CHILDREN_MODULES b/plugins/shiftedbk/NEEDED_CHILDREN_MODULES index 4f09bfc8..0a06e986 100644 --- a/plugins/shiftedbk/NEEDED_CHILDREN_MODULES +++ b/plugins/shiftedbk/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -dress_zmq DavidsonDressed Selectors_full Generators_full +dress_zmq DavidsonDressed Selectors_full Generators_CAS