diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index 57fce783..feea575e 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -43,14 +43,13 @@ subroutine run_wf ! Selection ! --------- - print *, 'dress' call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order - + !$OMP PARALLEL PRIVATE(i) i = omp_get_thread_num() call dress_slave_tcp(i+1, energy) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 06b5d538..3ccdc9f7 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -134,6 +134,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, implicit none + integer, parameter :: delta_loc_N = 2 integer(ZMQ_PTR), intent(in) :: zmq_socket_pull integer, intent(in) :: istate @@ -144,7 +145,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, double precision, intent(out) :: delta(N_states, N_det) double precision, intent(out) :: delta_s2(N_states, N_det) - double precision, allocatable :: delta_loc(:,:,:), delta_det(:,:,:,:) + double precision, allocatable :: delta_loc(:,:,:,:), delta_det(:,:,:,:) double precision, allocatable :: dress_detail(:,:) double precision :: dress_mwen(N_states) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket @@ -154,7 +155,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, integer :: more integer :: i, j, k, i_state, N - integer :: task_id, ind + integer :: task_id, ind, inds(delta_loc_N) double precision, save :: time0 = -1.d0 double precision :: time, timeLast, old_tooth double precision, external :: omp_get_wtime @@ -162,12 +163,17 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, integer, allocatable :: parts_to_get(:) logical, allocatable :: actually_computed(:) integer :: total_computed - + integer :: delta_loc_cur + double precision :: fac(delta_loc_N) , wei(delta_loc_N) + logical :: ok + + delta_loc_cur = 1 + delta = 0d0 delta_s2 = 0d0 allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2)) allocate(cp(N_states, N_det, N_cp, 2), dress_detail(N_states, N_det)) - allocate(delta_loc(N_states, N_det, 2)) + allocate(delta_loc(N_states, N_det, 2, delta_loc_N)) dress_detail = 0d0 delta_det = 0d0 cp = 0d0 @@ -196,58 +202,102 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, cur_cp = 0 old_cur_cp = 0 logical :: loop + integer :: felem, felem_loc loop = .true. - + felem = N_det+1 pullLoop : do while (loop) - call pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id) - dress_mwen(:) = 0d0 - - !!!!! A VERIFIER !!!!! - do i_state=1,N_states - do i=1, N_det - dress_mwen(i_state) += delta_loc(i_state, i, 1) * psi_coef(i, i_state) - end do - end do - - dress_detail(:, ind) += dress_mwen(:) - do j=1,N_cp !! optimizable - if(cps(ind, j) > 0d0) then - if(tooth_of_det(ind) < cp_first_tooth(j)) stop "coef on supposedely deterministic det" - double precision :: fac - integer :: toothMwen - logical :: fracted - fac = cps(ind, j) / cps_N(j) * dress_weight_inv(ind) * comb_step - cp(1:N_states,1:N_det,j,1) += delta_loc(1:N_states,1:N_det,1) * fac - cp(1:N_states,1:N_det,j,2) += delta_loc(1:N_states,1:N_det,2) * fac - end if - end do - toothMwen = tooth_of_det(ind) - fracted = (toothMwen /= 0) - if(fracted) fracted = (ind == first_det_of_teeth(toothMwen)) - - if(fracted) then - delta_det(1:N_states,1:N_det,toothMwen-1, 1) = delta_det(1:N_states,1:N_det,toothMwen-1, 1) + delta_loc(1:N_states,1:N_det,1) * (1d0-fractage(toothMwen)) - delta_det(1:N_states,1:N_det,toothMwen-1, 2) = delta_det(1:N_states,1:N_det,toothMwen-1, 2) + delta_loc(1:N_states,1:N_det,2) * (1d0-fractage(toothMwen)) - delta_det(1:N_states,1:N_det,toothMwen , 1) = delta_det(1:N_states,1:N_det,toothMwen , 1) + delta_loc(1:N_states,1:N_det,1) * (fractage(toothMwen)) - delta_det(1:N_states,1:N_det,toothMwen , 2) = delta_det(1:N_states,1:N_det,toothMwen , 2) + delta_loc(1:N_states,1:N_det,2) * (fractage(toothMwen)) - else - delta_det(1:N_states,1:N_det,toothMwen , 1) = delta_det(1:N_states,1:N_det,toothMwen , 1) + delta_loc(1:N_states,1:N_det,1) - delta_det(1:N_states,1:N_det,toothMwen , 2) = delta_det(1:N_states,1:N_det,toothMwen , 2) + delta_loc(1:N_states,1:N_det,2) - end if - - parts_to_get(ind) -= 1 - if(parts_to_get(ind) == 0) then - actually_computed(ind) = .true. - total_computed += 1 - end if - - + call pull_dress_results(zmq_socket_pull, ind, delta_loc(1,1,1,delta_loc_cur), task_id, felem_loc) + felem = min(felem_loc, felem) + dress_mwen(:) = 0d0 + integer, external :: zmq_delete_tasks + if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then stop 'Unable to delete tasks' endif if(more == 0) loop = .false. + + do i_state=1,N_states + do i=1, N_det + dress_mwen(i_state) += delta_loc(i_state, i, 1, delta_loc_cur) * psi_coef(i, i_state) + end do + end do + + dress_detail(:, ind) += dress_mwen(:) + wei(delta_loc_cur) = dress_weight_inv(ind) + inds(delta_loc_cur) = ind + + if(delta_loc_cur == delta_loc_N .or. .not. loop) then + do j=1,N_cp !! optimizable + fac = 0d0 + ok = .false. + + do i=1,delta_loc_cur + !fac(i) = cps(inds(i), j) / cps_N(j) * wei(i) * comb_step + fac(i) = cps(inds(i), j) * wei(i) * comb_step + if(fac(i) /= 0d0) ok = .true. + end do + + if(ok) then + do i=felem,N_det + cp(:,i,j,1) += delta_loc(:,i,1,1) * fac(1) & + + delta_loc(:,i,1,2) * fac(2) + !+ delta_loc(:,i,1,3) * fac(3) & + !+ delta_loc(:,i,1,4) * fac(4) & + !+ delta_loc(:,i,1,5) * fac(5) & + !+ delta_loc(:,i,1,6) * fac(6) & + !+ delta_loc(:,i,1,7) * fac(7) & + !+ delta_loc(:,i,1,8) * fac(8) + + cp(:,i,j,1) += delta_loc(:,i,2,1) * fac(1) & + + delta_loc(:,i,2,2) * fac(2) + !+ delta_loc(:,i,2,3) * fac(3) & + !+ delta_loc(:,i,2,4) * fac(4) & + !+ delta_loc(:,i,2,5) * fac(5) & + !+ delta_loc(:,i,2,6) * fac(6) & + !+ delta_loc(:,i,2,7) * fac(7) & + !+ delta_loc(:,i,2,8) * fac(8) + end do + !cp(1:N_states,indi:N_det,j,1) += delta_loc(1:N_states,indi:N_det,1) * fac + !cp(1:N_states,indi:N_det,j,2) += delta_loc(1:N_states,indi:N_det,2) * fac + end if + end do + + do i=1,delta_loc_cur + logical :: fracted + integer :: toothMwen + ind = inds(i) + + toothMwen = tooth_of_det(ind) + fracted = (toothMwen /= 0) + if(fracted) fracted = (ind == first_det_of_teeth(toothMwen)) + + if(fracted) then + delta_det(1:N_states,felem:N_det,toothMwen-1, 1) = delta_det(1:N_states,felem:N_det,toothMwen-1, 1) + delta_loc(felem:N_states,1:N_det,1,i) * (1d0-fractage(toothMwen)) + delta_det(1:N_states,felem:N_det,toothMwen-1, 2) = delta_det(1:N_states,felem:N_det,toothMwen-1, 2) + delta_loc(felem:N_states,1:N_det,2,i) * (1d0-fractage(toothMwen)) + delta_det(1:N_states,felem:N_det,toothMwen , 1) = delta_det(1:N_states,felem:N_det,toothMwen , 1) + delta_loc(felem:N_states,1:N_det,1,i) * (fractage(toothMwen)) + delta_det(1:N_states,felem:N_det,toothMwen , 2) = delta_det(1:N_states,felem:N_det,toothMwen , 2) + delta_loc(felem:N_states,1:N_det,2,i) * (fractage(toothMwen)) + else + delta_det(1:N_states,felem:N_det,toothMwen , 1) = delta_det(1:N_states,felem:N_det,toothMwen , 1) + delta_loc(1:N_states,felem:N_det,1,i) + delta_det(1:N_states,felem:N_det,toothMwen , 2) = delta_det(1:N_states,felem:N_det,toothMwen , 2) + delta_loc(1:N_states,felem:N_det,2,i) + end if + + parts_to_get(ind) -= 1 + if(parts_to_get(ind) == 0) then + actually_computed(ind) = .true. + total_computed += 1 + end if + end do + felem = N_det+1 + delta_loc_cur = 1 + else + delta_loc_cur += 1 + cycle + end if + + + time = omp_get_wtime() if((time - timeLast > 2d0) .or. (.not. loop)) then @@ -303,18 +353,22 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, endif end if end do pullLoop + + delta (1:N_states,1:N_det) = 0d0 + delta_s2(1:N_states,1:N_det) = 0d0 if(total_computed == N_det_generators) then - delta (1:N_states,1:N_det) = 0d0 - delta_s2(1:N_states,1:N_det) = 0d0 do i=comb_teeth+1,0,-1 delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1) delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2) end do else - - delta (1:N_states,1:N_det) = cp(1:N_states,1:N_det,cur_cp,1) - delta_s2(1:N_states,1:N_det) = cp(1:N_states,1:N_det,cur_cp,2) + do i=1,cur_cp + delta (1:N_states,1:N_det) += cp(1:N_states,1:N_det,i,1) + delta_s2(1:N_states,1:N_det) += cp(1:N_states,1:N_det,i,2) + end do + delta (1:N_states,1:N_det) = delta(1:N_states,1:N_det) / cps_N(cur_cp) + delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) / cps_N(cur_cp) do i=cp_first_tooth(cur_cp)-1,0,-1 delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1) delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2) @@ -363,7 +417,7 @@ end function ! gen_per_cp : number of generators per checkpoint END_DOC comb_teeth = 64 - N_cps_max = 64 + N_cps_max = 32 gen_per_cp = (N_det_generators / N_cps_max) + 1 END_PROVIDER @@ -455,13 +509,19 @@ END_PROVIDER end if end do end do - cps(:, N_cp) = 0d0 cp_first_tooth(N_cp) = comb_teeth+1 iorder = -1 do i=1,N_cp-1 call isort(dress_jobs(first_cp(i)+1:first_cp(i+1)),iorder,first_cp(i+1)-first_cp(i)) end do + + do i=1,N_det_generators + do j=N_cp,2,-1 + cps(i,j) -= cps(i,j-1) + end do + end do + cps(:, N_cp) = 0d0 END_PROVIDER diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index 9f4ede26..3a55f7b7 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -101,6 +101,7 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] ! errr = 1d-4 ! end if relative_error = 1.d-5 + call write_double(6,relative_error,"Convergence of the stochastic algorithm") call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error)) diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 1dc3176e..add0091e 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -74,15 +74,30 @@ subroutine push_dress_results(zmq_socket_push, ind, delta_loc, task_id) integer(ZMQ_PTR), intent(in) :: zmq_socket_push double precision, intent(in) :: delta_loc(N_states, N_det, 2) integer, intent(in) :: ind, task_id - integer :: rc, i - + integer :: rc, i, j, felem + + felem = 1 + + dloop : do i=1, N_det + do j=1,N_states + if(delta_loc(j,i,1) /= 0d0 .or. delta_loc(j,i,2) /= 0d0) then + felem = i + exit dloop + end if + end do + end do dloop rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" + + rc = f77_zmq_send( zmq_socket_push, felem, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" + + rc = f77_zmq_send( zmq_socket_push, delta_loc(1,felem,1), 8*N_states*(N_det-felem+1), ZMQ_SNDMORE) + if(rc /= 8*N_states*(N_det+1-felem)) stop "push" - - rc = f77_zmq_send( zmq_socket_push, delta_loc, 8*N_states*N_det*2, ZMQ_SNDMORE) - if(rc /= 8*N_states*N_det*2) stop "push" + rc = f77_zmq_send( zmq_socket_push, delta_loc(1,felem,2), 8*N_states*(N_det-felem+1), ZMQ_SNDMORE) + if(rc /= 8*N_states*(N_det+1-felem)) stop "push" rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) if(rc /= 4) stop "push" @@ -97,11 +112,12 @@ IRP_ENDIF end subroutine -subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id) +subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id, felem) use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_pull double precision, intent(inout) :: delta_loc(N_states, N_det, 2) + integer, intent(out) :: felem integer, intent(out) :: ind integer, intent(out) :: task_id integer :: rc, i @@ -110,8 +126,16 @@ subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id) rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) if(rc /= 4) stop "pull" - rc = f77_zmq_recv( zmq_socket_pull, delta_loc, N_states*8*N_det*2, 0) - if(rc /= 8*N_states*N_det*2) stop "pull" + rc = f77_zmq_recv( zmq_socket_pull, felem, 4, 0) + if(rc /= 4) stop "pull" + + delta_loc(:,:felem,:) = 0d0 + + rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,felem,1), N_states*8*(N_det+1-felem), 0) + if(rc /= 8*N_states*(N_det+1-felem)) stop "pull" + + rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,felem,2), N_states*8*(N_det+1-felem), 0) + if(rc /= 8*N_states*(N_det+1-felem)) stop "pull" rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) if(rc /= 4) stop "pull" diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 7e32568c..2aeff1e0 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -75,7 +75,7 @@ subroutine undress_with_alpha(old_generators, old_det_gen, alpha, n_alpha) delta_ij_loc = 0d0 - !$OMP PARALLEL DO DEFAULT(SHARED) SCHEDULE(STATIC,1) PRIVATE(i, j, iproc, n_minilist, ex) & + !$OMP PARALLEL DO DEFAULT(SHARED) SCHEDULE(STATIC) PRIVATE(i, j, iproc, n_minilist, ex) & !$OMP PRIVATE(det_minilist, minilist, haa, contrib) & !$OMP PRIVATE(exc, h1, h2, p1, p2, s1, s2, phase, degree, ok) do i=n_alpha,1,-1 @@ -115,10 +115,11 @@ subroutine undress_with_alpha(old_generators, old_det_gen, alpha, n_alpha) end do !$OMP END PARALLEL DO - do i=1,Nproc - delta_ij_tmp(:,:,:) -= delta_ij_loc(:,:,:,i) - !print *, "DELTA_IJ_LOC", delta_ij_loc(:,2:5,2,i) + do i=2,Nproc + delta_ij_loc(:,:,:,1) += delta_ij_loc(:,:,:,i) end do + + delta_ij_tmp(:,:,:) -= delta_ij_loc(:,:,:,1) end subroutine