From df26d62868b68ef76896d9491aec95a50a67aa90 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 12 Oct 2017 10:05:27 +0200 Subject: [PATCH 01/17] experimental mrcc_sto --- plugins/mrcepa0/dressing.irp.f | 222 +++++++++++++++++++++++++++++++-- 1 file changed, 214 insertions(+), 8 deletions(-) diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index de59e725..875c996a 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -1,6 +1,199 @@ use bitmasks +BEGIN_PROVIDER [ integer, N_mrcc_teeth ] + N_mrcc_teeth = 10 +END_PROVIDER + +BEGIN_PROVIDER [ double precision, mrcc_norm_acc, (0:N_det_non_ref, N_states) ] +&BEGIN_PROVIDER [ double precision, mrcc_norm, (0:N_det_non_ref, N_states) ] +&BEGIN_PROVIDER [ double precision, mrcc_teeth_size, (0:N_det_non_ref, N_states) ] +&BEGIN_PROVIDER [ integer, mrcc_teeth, (0:N_mrcc_teeth+1, N_states) ] + implicit none + integer :: i, j, st, nt + double precision :: norm_sto, jump, norm_mwen, norm_loc + + if(N_states /= 1) stop "mrcc_sto may not work with N_states /= 1" + + do st=1,N_states + + !norm_non_ref = 1d0 + !do i=1,N_det_ref + ! norm_non_ref -= psi_ref_coef(i,st)**2 + !end do + + mrcc_teeth(0,st) = 1 + !norm_non_sto = norm_non_ref + norm_sto = 1d0 + do i=1,N_det + mrcc_teeth(1,st) = i + jump = (1d0 / dfloat(N_mrcc_teeth)) * norm_sto + if(psi_coef_generators(i,1)**2 < jump / 2d0) exit + !if(i==80) exit + norm_sto -= psi_coef_generators(i,1)**2 + end do + + print *, "FIRST", mrcc_teeth(1,st) + + norm_loc = 0d0 + mrcc_norm_acc(0,st) = 0d0 + nt = 1 + + do i=1,mrcc_teeth(1,st)-1 + mrcc_norm_acc(i,st) = mrcc_norm_acc(i-1,st) + psi_coef_generators(i,st)**2 + end do + + do i=mrcc_teeth(1,st), N_det_generators!-mrcc_teeth(1,st)+1 + norm_mwen = psi_coef_generators(i,st)**2!-1+mrcc_teeth(1,st),st)**2 + mrcc_norm_acc(i,st) = mrcc_norm_acc(i-1,st) + norm_mwen + norm_loc += norm_mwen + if(norm_loc > (jump*dfloat(nt))) then + nt = nt + 1 + mrcc_teeth(nt,st) = i + end if + end do + if(nt > N_mrcc_teeth+1) then + print *, "foireouse mrcc_teeth", nt, mrcc_teeth(nt,st), N_det_non_ref + stop + end if + + mrcc_teeth(N_mrcc_teeth+1,st) = N_det_non_ref+1 + !mrcc_norm_acc(:,st) = mrcc_norm_acc(:,st) / norm_non_ref + norm_loc = 0d0 + do i=N_mrcc_teeth, 0, -1 + mrcc_teeth_size(i,st) = mrcc_norm_acc(mrcc_teeth(i+1,st)-1,st) - mrcc_norm_acc(mrcc_teeth(i,st)-1, st) + mrcc_norm_acc(mrcc_teeth(i,st):mrcc_teeth(i+1,st)-1,st) -= mrcc_norm_acc(mrcc_teeth(i,st)-1, st) + mrcc_norm_acc(mrcc_teeth(i,st):mrcc_teeth(i+1,st)-1,st) = & + mrcc_norm_acc(mrcc_teeth(i,st):mrcc_teeth(i+1,st)-1,st) / mrcc_teeth_size(i,st) + mrcc_norm(mrcc_teeth(i,st), st) = mrcc_norm_acc(mrcc_teeth(i,st), st) + do j=mrcc_teeth(i,st)+1, mrcc_teeth(i+1,1)-1 + mrcc_norm(j,1) = mrcc_norm_acc(j, st) - mrcc_norm_acc(j-1, st) + end do + end do + end do +END_PROVIDER + + + BEGIN_PROVIDER [ double precision, delta_ij_mrcc_sto,(N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_mrcc_sto, (N_states, N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2_mrcc_sto, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2_mrcc_sto, (N_states, N_det_ref) ] + use bitmasks + implicit none + integer :: gen, h, p, n, t, i, j, h1, h2, p1, p2, s1, s2, iproc + integer(bit_kind) :: mask(N_int, 2), omask(N_int, 2) + integer(bit_kind),allocatable :: buf(:,:,:) + logical :: ok + logical, external :: detEq + integer, external :: omp_get_thread_num + double precision :: coefs(N_det_non_ref), myCoef + integer :: n_in_teeth + double precision :: curn, in_teeth_step, curlim, curnorm + + read(*,*) n_in_teeth + !n_in_teeth = 2 + in_teeth_step = 1d0 / dfloat(n_in_teeth) +!double precision :: delta_ij_mrcc_tmp,(N_states,N_det_non_ref,N_det_ref) ] + !double precision :: delta_ii_mrcc_tmp, (N_states,N_det_ref) ] + !double precision :: delta_ij_s2_mrcc_tmp(N_states,N_det_non_ref,N_det_ref) + !double precision :: delta_ii_s2_mrcc_tmp(N_states, N_det_ref) + + coefs = 0d0 + coefs(:mrcc_teeth(1,1)-1) = 1d0 + + do i=1,N_mrcc_teeth + print *, "TEETH SIZE", i, mrcc_teeth(i+1,1)-mrcc_teeth(i,1) + if(mrcc_teeth(i+1,1) - mrcc_teeth(i,1) <= n_in_teeth) then + coefs(mrcc_teeth(i,1):mrcc_teeth(i+1,1)-1) = 1d0 + else if(.false.) then + curnorm = 0d0 + curn = 0.5d0 + curlim = curn / dfloat(n_in_teeth) + do j=mrcc_teeth(i,1), mrcc_teeth(i+1,1)-1 + if(mrcc_norm_acc(j,1) >= curlim) then + coefs(j) = 1d0 + curnorm += mrcc_norm(j,1) + do while(mrcc_norm_acc(j,1) > curlim) + curn += 1d0 + curlim = curn / dfloat(n_in_teeth) + end do + end if + end do + do j=mrcc_teeth(i,1), mrcc_teeth(i+1,1)-1 + coefs(j) = coefs(j) / curnorm ! 1d0 / norm computed in teeth + end do + else if(.true.) then + coefs(mrcc_teeth(i,1):mrcc_teeth(i,1)+n_in_teeth-1) = 1d0 / mrcc_norm_acc(mrcc_teeth(i,1)+n_in_teeth-1, 1) + else + curnorm = 0d0 + n = mrcc_teeth(i+1,1) - mrcc_teeth(i,1) + do j=1,n_in_teeth + t = int((dfloat(j)-0.5d0) * dfloat(n) / dfloat(n_in_teeth)) + 1 + mrcc_teeth(i,1) - 1 + curnorm += mrcc_norm(t,1) + coefs(t) = 1d0 + end do + do j=mrcc_teeth(i,1), mrcc_teeth(i+1,1)-1 + coefs(j) = coefs(j) / curnorm ! 1d0 / norm computed in teeth + end do + end if + !coefs(mrcc_teeth(i,1)) = + end do + + !coefs = coefs * dfloat(N_det_generators) + + + delta_ij_mrcc_sto = 0d0 + delta_ii_mrcc_sto = 0d0 + delta_ij_s2_mrcc_sto = 0d0 + delta_ii_s2_mrcc_sto = 0d0 + PROVIDE dij + provide hh_shortcut psi_det_size! lambda_mrcc + !$OMP PARALLEL DO default(none) schedule(dynamic) & + !$OMP shared(psi_ref, psi_non_ref, hh_exists, pp_exists, N_int, hh_shortcut) & + !$OMP shared(N_det_generators, coefs,N_det_non_ref, N_det_ref, delta_ii_mrcc_sto, delta_ij_mrcc_sto) & + !$OMP shared(psi_det_generators, delta_ii_s2_mrcc_sto, delta_ij_s2_mrcc_sto) & + !$OMP private(i,j,curnorm,myCoef, h, n, mask, omask, buf, ok, iproc) + do gen= 1,N_det_generators + if(coefs(gen) == 0d0) cycle + myCoef = coefs(gen) + allocate(buf(N_int, 2, N_det_non_ref)) + iproc = omp_get_thread_num() + 1 + if(mod(gen, 1000) == 0) print *, "mrcc_sto ", gen, "/", N_det_generators + + do h=1, hh_shortcut(0) + call apply_hole_local(psi_det_generators(1,1,gen), hh_exists(1, h), mask, ok, N_int) + if(.not. ok) cycle + omask = 0_bit_kind + if(hh_exists(1, h) /= 0) omask = mask + n = 1 + do p=hh_shortcut(h), hh_shortcut(h+1)-1 + call apply_particle_local(mask, pp_exists(1, p), buf(1,1,n), ok, N_int) + if(ok) n = n + 1 + if(n > N_det_non_ref) stop "Buffer too small in MRCC..." + end do + n = n - 1 + if(n /= 0) then + call mrcc_part_dress(delta_ij_mrcc_sto, delta_ii_mrcc_sto, delta_ij_s2_mrcc_sto, & + delta_ii_s2_mrcc_sto, gen,n,buf,N_int,omask,myCoef) + endif + end do + deallocate(buf) + end do + !$OMP END PARALLEL DO + + + + curnorm = 0d0 + do i=1,N_det_ref + do j=1,N_det_non_ref + curnorm += delta_ij_mrcc_sto(1, j, i)**2 + end do + end do + print *, "NORM DELTA ", curnorm**0.5d0 + +END_PROVIDER + + BEGIN_PROVIDER [ double precision, delta_ij_mrcc, (N_states,N_det_non_ref,N_det_ref) ] &BEGIN_PROVIDER [ double precision, delta_ii_mrcc, (N_states, N_det_ref) ] @@ -14,7 +207,7 @@ use bitmasks logical :: ok logical, external :: detEq integer, external :: omp_get_thread_num - + delta_ij_mrcc = 0d0 delta_ii_mrcc = 0d0 delta_ij_s2_mrcc = 0d0 @@ -43,7 +236,7 @@ use bitmasks n = n - 1 if(n /= 0) then - call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc, delta_ij_s2_mrcc, delta_ii_s2_mrcc, gen,n,buf,N_int,omask) + call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc, delta_ij_s2_mrcc, delta_ii_s2_mrcc, gen,n,buf,N_int,omask,1d0) endif end do @@ -59,7 +252,7 @@ END_PROVIDER ! end subroutine -subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_generator,n_selected,det_buffer,Nint,key_mask) +subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_generator,n_selected,det_buffer,Nint,key_mask,coef) use bitmasks implicit none @@ -99,7 +292,8 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen logical, external :: detEq, is_generable !double precision, external :: get_dij, get_dij_index double precision :: Delta_E_inv(N_states) - + double precision, intent(in) :: coef + if (perturbative_triples) then PROVIDE one_anhil fock_virt_total fock_core_inactive_total one_creat endif @@ -290,8 +484,8 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen hla = hij_cache(k_sd) sla = sij_cache(k_sd) do i_state=1,N_states - dIa_hla(i_state,k_sd) = dIa(i_state) * hla - dIa_sla(i_state,k_sd) = dIa(i_state) * sla + dIa_hla(i_state,k_sd) = dIa(i_state) * hla * coef + dIa_sla(i_state,k_sd) = dIa(i_state) * sla * coef enddo enddo do i_state=1,N_states @@ -336,8 +530,20 @@ end integer :: i, j, i_state !mrmode : 1=mrcepa0, 2=mrsc2 add, 3=mrcc - - if(mrmode == 3) then + if(mrmode == 4) then + do i = 1, N_det_ref + do i_state = 1, N_states + delta_ii(i_state,i)= delta_ii_mrcc_sto(i_state,i) + delta_ii_s2(i_state,i)= delta_ii_s2_mrcc_sto(i_state,i) + enddo + do j = 1, N_det_non_ref + do i_state = 1, N_states + delta_ij(i_state,j,i) = delta_ij_mrcc_sto(i_state,j,i) + delta_ij_s2(i_state,j,i) = delta_ij_s2_mrcc_sto(i_state,j,i) + enddo + end do + end do + else if(mrmode == 3) then do i = 1, N_det_ref do i_state = 1, N_states delta_ii(i_state,i)= delta_ii_mrcc(i_state,i) From 88036594924fb894610dcebc983f02fb77f78dba Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 12 Oct 2017 10:06:25 +0200 Subject: [PATCH 02/17] missing file... --- plugins/mrcepa0/mrcc_sto.irp.f | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 plugins/mrcepa0/mrcc_sto.irp.f diff --git a/plugins/mrcepa0/mrcc_sto.irp.f b/plugins/mrcepa0/mrcc_sto.irp.f new file mode 100644 index 00000000..5c4d318d --- /dev/null +++ b/plugins/mrcepa0/mrcc_sto.irp.f @@ -0,0 +1,27 @@ +program mrsc2sub + implicit none + double precision, allocatable :: energy(:) + allocate (energy(N_states)) + + !!mrmode : 1=mrcepa0, 2=mrsc2 add, 3=mrcc + mrmode = 4 + + read_wf = .True. + SOFT_TOUCH read_wf + call set_generators_bitmasks_as_holes_and_particles + if (.True.) then + integer :: i,j + do j=1,N_states + do i=1,N_det + psi_coef(i,j) = CI_eigenvectors(i,j) + enddo + enddo + SOFT_TOUCH psi_coef + endif + call run(N_states,energy) + if(do_pt2)then + call run_pt2(N_states,energy) + endif + deallocate(energy) +end + From 59976b6c5884519cad2ed7fd67430182abf3b837 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 17 Oct 2017 16:05:51 +0200 Subject: [PATCH 03/17] experimental (not working) delta_cancel --- plugins/mrcepa0/dressing.irp.f | 80 ++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index de59e725..c226f10b 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -2,6 +2,73 @@ use bitmasks + BEGIN_PROVIDER [ double precision, delta_ij_cancel, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_cancel, (N_states, N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2_cancel, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2_cancel, (N_states, N_det_ref) ] + use bitmasks + implicit none + + + integer :: i_state, i, i_I, J, k, k2, k1, kk, ll, m,l, deg, ni, m2 + integer :: n(2) + integer :: p1,p2,h1,h2,s1,s2, blok, I_s, J_s, kn + logical :: ok + double precision :: phase_ia, phase_Ik, phase_Jl, phase_Ji,phase_la, phase_ka, phase_tmp + double precision :: Hka, Hla, Ska, Sla, tmp + double precision :: diI, hIi, hJi, delta_JI, dkI, HkI,ci_inv(N_states), cj_inv(N_states) + double precision :: contrib, contrib_s2, wall, iwall + integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ, exc + integer(bit_kind) :: det_tmp(N_int, 2), det_tmp2(N_int, 2),inac, virt + integer, external :: get_index_in_psi_det_sorted_bit, searchDet,detCmp + logical, external :: is_in_wavefunction + + + do i=1,N_det_ref + !$OMP PARALLEL DO default(shared) private(kk, k, blok, exc_Ik,det_tmp2,ok,deg,phase_Ik, l,ll) & + !$OMP private(contrib, contrib_s2, i_state) + do kk = 1, nlink(i) + k = det_cepa0_idx(linked(kk, i)) + blok = blokMwen(kk, i) + call get_excitation(psi_ref(1,1,i),psi_non_ref(1,1,k),exc_Ik,deg,phase_Ik,N_int) + + do j=1,N_det_ref + if(j == i) cycle + call apply_excitation(psi_ref(1,1,J),exc_Ik,det_tmp2,ok,N_int) + if(.not. ok) cycle + + l = searchDet(det_cepa0(1,1,cepa0_shortcut(blok)), det_tmp2,cepa0_shortcut(blok+1)-cepa0_shortcut(blok), N_int) + if(l == -1) cycle + ll = cepa0_shortcut(blok)-1+l + l = det_cepa0_idx(ll) + ll = child_num(ll, J) + + do i_state = 1, N_states + contrib = (dij(j, l, i_state) - dij(i, k, i_state)) * delta_cas(i,j,i_state)! * Hla *phase_ia * phase_ik + contrib_s2 = dij(j, l, i_state) - dij(i, k, i_state)! * Sla*phase_ia * phase_ik + if(dabs(psi_ref_coef(i,i_state)).ge.1.d-3) then + !$OMP ATOMIC + delta_ij_cancel(i_state,l,i) += contrib + !$OMP ATOMIC + delta_ij_s2_cancel(i_state,l,i) += contrib_s2 + !$OMP ATOMIC + delta_ii_cancel(i_state,i) -= contrib / psi_ref_coef(i, i_state) * psi_non_ref_coef(l,i_state) + !$OMP ATOMIC + delta_ii_s2_cancel(i_state,i) -= contrib_s2 / psi_ref_coef(i, i_state) * psi_non_ref_coef(l,i_state) + else + !$OMP ATOMIC + delta_ij_cancel(i_state,l,i) += contrib * 0.5d0 + !$OMP ATOMIC + delta_ij_s2_cancel(i_state,l,i) += contrib_s2 * 0.5d0 + endif + end do + end do + end do + enddo + +END_PROVIDER + + BEGIN_PROVIDER [ double precision, delta_ij_mrcc, (N_states,N_det_non_ref,N_det_ref) ] &BEGIN_PROVIDER [ double precision, delta_ii_mrcc, (N_states, N_det_ref) ] &BEGIN_PROVIDER [ double precision, delta_ij_s2_mrcc, (N_states,N_det_non_ref,N_det_ref) ] @@ -407,6 +474,19 @@ end else stop "invalid mrmode" end if + + if(mrmode == 2 .or. mrmode == 3) then + do i = 1, N_det_ref + do i_state = 1, N_states + delta_ii(i_state,i) += delta_ii_cancel(i_state,i) + enddo + do j = 1, N_det_non_ref + do i_state = 1, N_states + delta_ij(i_state,j,i) += delta_ij_cancel(i_state,j,i) + enddo + end do + end do + end if END_PROVIDER From 70543a965ff1d8d052b2205e76db4b939b89daf0 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 18 Oct 2017 15:10:15 +0200 Subject: [PATCH 04/17] delta_cancel was uninitialized --- plugins/mrcepa0/dressing.irp.f | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index c226f10b..a5272cc3 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -23,6 +23,10 @@ use bitmasks integer, external :: get_index_in_psi_det_sorted_bit, searchDet,detCmp logical, external :: is_in_wavefunction + provide dij + + delta_ij_cancel = 0d0 + delta_ii_cancel = 0d0 do i=1,N_det_ref !$OMP PARALLEL DO default(shared) private(kk, k, blok, exc_Ik,det_tmp2,ok,deg,phase_Ik, l,ll) & @@ -64,6 +68,7 @@ use bitmasks end do end do end do + !$OMP END PARALLEL DO enddo END_PROVIDER @@ -82,12 +87,14 @@ END_PROVIDER logical, external :: detEq integer, external :: omp_get_thread_num + provide dij hh_shortcut psi_det_size + delta_ij_mrcc = 0d0 delta_ii_mrcc = 0d0 delta_ij_s2_mrcc = 0d0 delta_ii_s2_mrcc = 0d0 - PROVIDE dij - provide hh_shortcut psi_det_size! lambda_mrcc + + !$OMP PARALLEL DO default(none) schedule(dynamic) & !$OMP shared(psi_det_generators, N_det_generators, hh_exists, pp_exists, N_int, hh_shortcut) & !$OMP shared(N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc, delta_ii_s2_mrcc, delta_ij_s2_mrcc) & @@ -854,8 +861,8 @@ end subroutine notf = notf+1 ! call i_h_j(psi_non_ref(1,1,det_cepa0_idx(k)),psi_ref(1,1,J),N_int,HJk) - contrib = delta_cas(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) - contrib_s2 = delta_cas_s2(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) + contrib = delta_cas(II, J, i_state)* dij(J, det_cepa0_idx(k), i_state) + contrib_s2 = delta_cas_s2(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) if(dabs(psi_ref_coef(J,i_state)).ge.1.d-3) then contrib2 = contrib / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state) From b5b333904f4130953b371085a497098e7e8e0a9b Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 23 Oct 2017 09:56:38 +0200 Subject: [PATCH 05/17] mrcc dressing on first column ; mrcc_stoch estimates energy from dressing --- plugins/mrcepa0/dressing.irp.f | 48 ++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index 875c996a..bd080138 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -88,8 +88,9 @@ END_PROVIDER integer, external :: omp_get_thread_num double precision :: coefs(N_det_non_ref), myCoef integer :: n_in_teeth - double precision :: curn, in_teeth_step, curlim, curnorm - + double precision :: contrib(N_states), curn, in_teeth_step, curlim, curnorm + + contrib = 0d0 read(*,*) n_in_teeth !n_in_teeth = 2 in_teeth_step = 1d0 / dfloat(n_in_teeth) @@ -151,7 +152,7 @@ END_PROVIDER !$OMP PARALLEL DO default(none) schedule(dynamic) & !$OMP shared(psi_ref, psi_non_ref, hh_exists, pp_exists, N_int, hh_shortcut) & !$OMP shared(N_det_generators, coefs,N_det_non_ref, N_det_ref, delta_ii_mrcc_sto, delta_ij_mrcc_sto) & - !$OMP shared(psi_det_generators, delta_ii_s2_mrcc_sto, delta_ij_s2_mrcc_sto) & + !$OMP shared(contrib,psi_det_generators, delta_ii_s2_mrcc_sto, delta_ij_s2_mrcc_sto) & !$OMP private(i,j,curnorm,myCoef, h, n, mask, omask, buf, ok, iproc) do gen= 1,N_det_generators if(coefs(gen) == 0d0) cycle @@ -174,7 +175,7 @@ END_PROVIDER n = n - 1 if(n /= 0) then call mrcc_part_dress(delta_ij_mrcc_sto, delta_ii_mrcc_sto, delta_ij_s2_mrcc_sto, & - delta_ii_s2_mrcc_sto, gen,n,buf,N_int,omask,myCoef) + delta_ii_s2_mrcc_sto, gen,n,buf,N_int,omask,myCoef,contrib) endif end do deallocate(buf) @@ -207,7 +208,9 @@ END_PROVIDER logical :: ok logical, external :: detEq integer, external :: omp_get_thread_num - + double precision :: contrib(N_states) + + contrib = 0d0 delta_ij_mrcc = 0d0 delta_ii_mrcc = 0d0 delta_ij_s2_mrcc = 0d0 @@ -215,7 +218,7 @@ END_PROVIDER PROVIDE dij provide hh_shortcut psi_det_size! lambda_mrcc !$OMP PARALLEL DO default(none) schedule(dynamic) & - !$OMP shared(psi_det_generators, N_det_generators, hh_exists, pp_exists, N_int, hh_shortcut) & + !$OMP shared(contrib,psi_det_generators, N_det_generators, hh_exists, pp_exists, N_int, hh_shortcut) & !$OMP shared(N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc, delta_ii_s2_mrcc, delta_ij_s2_mrcc) & !$OMP private(h, n, mask, omask, buf, ok, iproc) do gen= 1, N_det_generators @@ -236,7 +239,7 @@ END_PROVIDER n = n - 1 if(n /= 0) then - call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc, delta_ij_s2_mrcc, delta_ii_s2_mrcc, gen,n,buf,N_int,omask,1d0) + call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc, delta_ij_s2_mrcc, delta_ii_s2_mrcc, gen,n,buf,N_int,omask,1d0,contrib) endif end do @@ -252,7 +255,7 @@ END_PROVIDER ! end subroutine -subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_generator,n_selected,det_buffer,Nint,key_mask,coef) +subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_generator,n_selected,det_buffer,Nint,key_mask,coef,contrib) use bitmasks implicit none @@ -293,6 +296,8 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen !double precision, external :: get_dij, get_dij_index double precision :: Delta_E_inv(N_states) double precision, intent(in) :: coef + double precision, intent(inout) :: contrib(N_states) + double precision :: sdress, hdress if (perturbative_triples) then PROVIDE one_anhil fock_virt_total fock_core_inactive_total one_creat @@ -489,26 +494,37 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen enddo enddo do i_state=1,N_states - if(dabs(psi_ref_coef(i_I,i_state)).ge.1.d-3)then + if(dabs(psi_ref_coef(1,i_state)).ge.1.d-3)then do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) + p1 = 1 + hdress = dIa_hla(i_state,k_sd) * psi_ref_coef(i_I,i_state) / psi_ref_coef(p1,i_state) + sdress = dIa_sla(i_state,k_sd) * psi_ref_coef(i_I,i_state) / psi_ref_coef(p1,i_state) !$OMP ATOMIC - delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + dIa_hla(i_state,k_sd) + contrib(i_state) += hdress * psi_ref_coef(p1, i_state) * psi_non_ref_coef(k_sd, i_state) !$OMP ATOMIC - delta_ii_(i_state,i_I) = delta_ii_(i_state,i_I) - dIa_hla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) + delta_ij_(i_state,k_sd,p1) += hdress !$OMP ATOMIC - delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + dIa_sla(i_state,k_sd) + !delta_ii_(i_state,i_I) = delta_ii_(i_state,i_I) - dIa_hla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) + delta_ii_(i_state,p1) -= hdress / psi_ref_coef(p1,i_state) * psi_non_ref_coef_transp(i_state,k_sd) !$OMP ATOMIC - delta_ii_s2_(i_state,i_I) = delta_ii_s2_(i_state,i_I) - dIa_sla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) + delta_ij_s2_(i_state,k_sd,p1) += sdress + !$OMP ATOMIC + !delta_ii_s2_(i_state,i_I) = delta_ii_s2_(i_state,i_I) - dIa_sla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) + delta_ii_s2_(i_state,p1) -= sdress / psi_ref_coef(p1,i_state) * psi_non_ref_coef_transp(i_state,k_sd) enddo else - delta_ii_(i_state,i_I) = 0.d0 + !stop "dress with coef < 1d-3" + delta_ii_(i_state,1) = 0.d0 do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) + p1 = 1 + hdress = dIa_hla(i_state,k_sd) * psi_ref_coef(i_I,i_state) / psi_ref_coef(p1,i_state) + sdress = dIa_sla(i_state,k_sd) * psi_ref_coef(i_I,i_state) / psi_ref_coef(p1,i_state) !$OMP ATOMIC - delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + 0.5d0*dIa_hla(i_state,k_sd) + delta_ij_(i_state,k_sd,p1) = delta_ij_(i_state,k_sd,p1) + 0.5d0*hdress !$OMP ATOMIC - delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + 0.5d0*dIa_sla(i_state,k_sd) + delta_ij_s2_(i_state,k_sd,p1) = delta_ij_s2_(i_state,k_sd,p1) + 0.5d0*sdress enddo endif enddo From 2f2840ebb5d0fde11085086e803e9a7ad269fb0d Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 23 Oct 2017 09:57:33 +0200 Subject: [PATCH 06/17] missing files....... --- plugins/mrcepa0/energy.irp.f | 23 + plugins/mrcepa0/ezfio_interface.irp.f | 80 ++++ plugins/mrcepa0/mrcc_slave.irp.f | 76 ++++ plugins/mrcepa0/mrcc_stoch.irp.f | 38 ++ plugins/mrcepa0/mrcc_stoch_routines.irp.f | 530 ++++++++++++++++++++++ plugins/mrcepa0/run_mrcc_slave.irp.f | 215 +++++++++ plugins/mrcepa0/selection_buffer.irp.f | 133 ++++++ 7 files changed, 1095 insertions(+) create mode 100644 plugins/mrcepa0/energy.irp.f create mode 100644 plugins/mrcepa0/ezfio_interface.irp.f create mode 100644 plugins/mrcepa0/mrcc_slave.irp.f create mode 100644 plugins/mrcepa0/mrcc_stoch.irp.f create mode 100644 plugins/mrcepa0/mrcc_stoch_routines.irp.f create mode 100644 plugins/mrcepa0/run_mrcc_slave.irp.f create mode 100644 plugins/mrcepa0/selection_buffer.irp.f diff --git a/plugins/mrcepa0/energy.irp.f b/plugins/mrcepa0/energy.irp.f new file mode 100644 index 00000000..7eaf5562 --- /dev/null +++ b/plugins/mrcepa0/energy.irp.f @@ -0,0 +1,23 @@ +BEGIN_PROVIDER [ logical, initialize_mrcc_E0_denominator ] + implicit none + BEGIN_DOC + ! If true, initialize mrcc_E0_denominator + END_DOC + initialize_mrcc_E0_denominator = .True. +END_PROVIDER + +BEGIN_PROVIDER [ double precision, mrcc_E0_denominator, (N_states) ] + implicit none + BEGIN_DOC + ! E0 in the denominator of the mrcc + END_DOC + if (initialize_mrcc_E0_denominator) then + mrcc_E0_denominator(1:N_states) = psi_energy(1:N_states) +! mrcc_E0_denominator(1:N_states) = HF_energy - nuclear_repulsion +! mrcc_E0_denominator(1:N_states) = barycentric_electronic_energy(1:N_states) + call write_double(6,mrcc_E0_denominator(1)+nuclear_repulsion, 'mrcc Energy denominator') + else + mrcc_E0_denominator = -huge(1.d0) + endif +END_PROVIDER + diff --git a/plugins/mrcepa0/ezfio_interface.irp.f b/plugins/mrcepa0/ezfio_interface.irp.f new file mode 100644 index 00000000..3627abe6 --- /dev/null +++ b/plugins/mrcepa0/ezfio_interface.irp.f @@ -0,0 +1,80 @@ +! DO NOT MODIFY BY HAND +! Created by $QP_ROOT/scripts/ezfio_interface/ei_handler.py +! from file /home/garniron/quantum_package/src/mrcepa0/EZFIO.cfg + + +BEGIN_PROVIDER [ logical, perturbative_triples ] + implicit none + BEGIN_DOC +! Compute perturbative contribution of the Triples + END_DOC + + logical :: has + PROVIDE ezfio_filename + + call ezfio_has_mrcepa0_perturbative_triples(has) + if (has) then + call ezfio_get_mrcepa0_perturbative_triples(perturbative_triples) + else + print *, 'mrcepa0/perturbative_triples not found in EZFIO file' + stop 1 + endif + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, thresh_dressed_ci ] + implicit none + BEGIN_DOC +! Threshold on the convergence of the dressed CI energy + END_DOC + + logical :: has + PROVIDE ezfio_filename + + call ezfio_has_mrcepa0_thresh_dressed_ci(has) + if (has) then + call ezfio_get_mrcepa0_thresh_dressed_ci(thresh_dressed_ci) + else + print *, 'mrcepa0/thresh_dressed_ci not found in EZFIO file' + stop 1 + endif + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_it_max_dressed_ci ] + implicit none + BEGIN_DOC +! Maximum number of dressed CI iterations + END_DOC + + logical :: has + PROVIDE ezfio_filename + + call ezfio_has_mrcepa0_n_it_max_dressed_ci(has) + if (has) then + call ezfio_get_mrcepa0_n_it_max_dressed_ci(n_it_max_dressed_ci) + else + print *, 'mrcepa0/n_it_max_dressed_ci not found in EZFIO file' + stop 1 + endif + +END_PROVIDER + +BEGIN_PROVIDER [ integer, lambda_type ] + implicit none + BEGIN_DOC +! lambda type + END_DOC + + logical :: has + PROVIDE ezfio_filename + + call ezfio_has_mrcepa0_lambda_type(has) + if (has) then + call ezfio_get_mrcepa0_lambda_type(lambda_type) + else + print *, 'mrcepa0/lambda_type not found in EZFIO file' + stop 1 + endif + +END_PROVIDER diff --git a/plugins/mrcepa0/mrcc_slave.irp.f b/plugins/mrcepa0/mrcc_slave.irp.f new file mode 100644 index 00000000..95897570 --- /dev/null +++ b/plugins/mrcepa0/mrcc_slave.irp.f @@ -0,0 +1,76 @@ +program mrcc_slave + implicit none + BEGIN_DOC +! Helper program to compute the mrcc in distributed mode. + END_DOC + + read_wf = .False. + distributed_davidson = .False. + SOFT_TOUCH read_wf distributed_davidson + call provide_everything + call switch_qp_run_to_master + call run_wf +end + +subroutine provide_everything + PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context +end + +subroutine run_wf + use f77_zmq + implicit none + + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + double precision :: energy(N_states_diag) + character*(64) :: states(1) + integer :: rc, i + + call provide_everything + + zmq_context = f77_zmq_ctx_new () + states(1) = 'mrcc' + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + do + + call wait_for_states(states,zmq_state,1) + + if(trim(zmq_state) == 'Stopped') then + + exit + + else if (trim(zmq_state) == 'mrcc') then + + ! Selection + ! --------- + + print *, 'mrcc' + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order + + !$OMP PARALLEL PRIVATE(i) + i = omp_get_thread_num() + call mrcc_slave_tcp(i, energy) + !$OMP END PARALLEL + print *, 'mrcc done' + + endif + + end do +end + +subroutine mrcc_slave_tcp(i,energy) + implicit none + double precision, intent(in) :: energy(N_states_diag) + integer, intent(in) :: i + logical :: lstop + lstop = .False. + call run_mrcc_slave(0,i,energy,lstop) +end + diff --git a/plugins/mrcepa0/mrcc_stoch.irp.f b/plugins/mrcepa0/mrcc_stoch.irp.f new file mode 100644 index 00000000..ea97ff7c --- /dev/null +++ b/plugins/mrcepa0/mrcc_stoch.irp.f @@ -0,0 +1,38 @@ +program mrcc_stoch + implicit none + read_wf = .True. + SOFT_TOUCH read_wf + PROVIDE mo_bielec_integrals_in_map + call run +end + +subroutine run + implicit none + integer :: i,j,k + logical, external :: detEq + + double precision, allocatable :: mrcc(:) + integer :: degree + integer :: n_det_before, to_select + double precision :: threshold_davidson_in + + double precision :: E_CI_before, relative_error + + allocate (mrcc(N_states)) + mrcc = 0.d0 + + E_CI_before = mrcc_E0_denominator(1) + nuclear_repulsion + threshold_selectors = 1.d0 + threshold_generators = 1d0 + relative_error = 1.d-3 + call ZMQ_mrcc(E_CI_before, mrcc, relative_error) + !print *, 'Final step' + !print *, 'N_det = ', N_det + print *, 'mrcc = ', mrcc + !print *, 'E = ', E_CI_before + !print *, 'E+mrcc = ', E_CI_before+mrcc + !print *, '-----' + !call ezfio_set_full_ci_zmq_energy_mrcc(E_CI_before+mrcc(1)) +end + + diff --git a/plugins/mrcepa0/mrcc_stoch_routines.irp.f b/plugins/mrcepa0/mrcc_stoch_routines.irp.f new file mode 100644 index 00000000..ff4a6916 --- /dev/null +++ b/plugins/mrcepa0/mrcc_stoch_routines.irp.f @@ -0,0 +1,530 @@ +BEGIN_PROVIDER [ integer, fragment_first ] + implicit none + fragment_first = first_det_of_teeth(1) +END_PROVIDER + +subroutine ZMQ_mrcc(E, mrcc,relative_error) + use f77_zmq + use selection_types + + implicit none + + character(len=64000) :: task + integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_to_qp_run_socket2 + type(selection_buffer) :: b + integer, external :: omp_get_thread_num + double precision, intent(in) :: relative_error, E + double precision, intent(out) :: mrcc(N_states) + + + double precision, allocatable :: mrcc_detail(:,:), comb(:) + logical, allocatable :: computed(:) + integer, allocatable :: tbc(:) + integer :: i, j, k, Ncomb, generator_per_task, i_generator_end + integer, external :: mrcc_find + + double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) + double precision, external :: omp_get_wtime + double precision :: time + + !if (N_det < 10) then + ! !call ZMQ_selection(0, mrcc) + ! return + !else + + allocate(mrcc_detail(N_states, N_det_generators+1), comb(N_det_generators), computed(N_det_generators), tbc(0:size_tbc)) + sumabove = 0d0 + sum2above = 0d0 + Nabove = 0d0 + + provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral mrcc_weight psi_selectors + + computed = .false. + + tbc(0) = first_det_of_comb - 1 + do i=1, tbc(0) + tbc(i) = i + computed(i) = .true. + end do + + mrcc_detail = 0d0 + generator_per_task = 1 + print *, '========== ================= ================= =================' + print *, ' Samples Energy Stat. Error Seconds ' + print *, '========== ================= ================= =================' + + call new_parallel_job(zmq_to_qp_run_socket,'mrcc') + call zmq_put_psi(zmq_to_qp_run_socket,1,mrcc_e0_denominator,size(mrcc_e0_denominator)) + call create_selection_buffer(1, 1*2, b) + + Ncomb=size(comb) + call get_carlo_workbatch(computed, comb, Ncomb, tbc) + + do i=1,comb_teeth + print *, "TOOTH", first_det_of_teeth(i+1) - first_det_of_teeth(i) + end do + !stop + + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + integer :: ipos + ipos=1 + + do i=1,tbc(0) + if(tbc(i) > fragment_first) then + write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, tbc(i) + ipos += 20 + if (ipos > 63980) then + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) + ipos=1 + endif + else + do j=1,fragment_count + write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, tbc(i) + ipos += 20 + if (ipos > 63980) then + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) + ipos=1 + endif + end do + end if + end do + if (ipos > 1) then + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) + endif + + call zmq_set_running(zmq_to_qp_run_socket) + + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc+1) & + !$OMP PRIVATE(i) + i = omp_get_thread_num() + if (i==0) then + call mrcc_collector(E, b, tbc, comb, Ncomb, computed, mrcc_detail, sumabove, sum2above, Nabove, relative_error, mrcc) + else + call mrcc_slave_inproc(i) + endif + !$OMP END PARALLEL + call delete_selection_buffer(b) + call end_parallel_job(zmq_to_qp_run_socket, 'mrcc') + + print *, '========== ================= ================= =================' + + !endif + +end subroutine + + +subroutine do_carlo(tbc, Ncomb, comb, mrcc_detail, computed, sumabove, sum2above, Nabove) + integer, intent(in) :: tbc(0:size_tbc), Ncomb + logical, intent(in) :: computed(N_det_generators) + double precision, intent(in) :: comb(Ncomb), mrcc_detail(N_states, N_det_generators) + double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) + integer :: i, dets(comb_teeth) + double precision :: myVal, myVal2 + + mainLoop : do i=1,Ncomb + call get_comb(comb(i), dets, comb_teeth) + do j=1,comb_teeth + if(.not.(computed(dets(j)))) then + exit mainLoop + end if + end do + + myVal = 0d0 + myVal2 = 0d0 + do j=comb_teeth,1,-1 + myVal += mrcc_detail(1, dets(j)) * mrcc_weight_inv(dets(j)) * comb_step + sumabove(j) += myVal + sum2above(j) += myVal*myVal + Nabove(j) += 1 + end do + end do mainLoop +end subroutine + + +subroutine mrcc_slave_inproc(i) + implicit none + integer, intent(in) :: i + + call run_mrcc_slave(1,i,mrcc_e0_denominator) +end + +subroutine mrcc_collector(E, b, tbc, comb, Ncomb, computed, mrcc_detail, sumabove, sum2above, Nabove, relative_error, mrcc) + use f77_zmq + use selection_types + use bitmasks + implicit none + + + integer, intent(in) :: Ncomb + double precision, intent(inout) :: mrcc_detail(N_states, N_det_generators) + double precision, intent(in) :: comb(Ncomb), relative_error, E + logical, intent(inout) :: computed(N_det_generators) + integer, intent(in) :: tbc(0:size_tbc) + double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) + double precision, intent(out) :: mrcc(N_states) + + + type(selection_buffer), intent(inout) :: b + double precision, allocatable :: mrcc_mwen(:,:) + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_pull_socket + integer(ZMQ_PTR) :: zmq_socket_pull + + integer :: msg_size, rc, more + integer :: acc, i, j, robin, N, ntask + double precision, allocatable :: val(:) + integer(bit_kind), allocatable :: det(:,:,:) + integer, allocatable :: task_id(:) + integer :: Nindex + integer, allocatable :: index(:) + double precision, save :: time0 = -1.d0 + double precision :: time, timeLast, Nabove_old + double precision, external :: omp_get_wtime + integer :: tooth, firstTBDcomb, orgTBDcomb + integer, allocatable :: parts_to_get(:) + logical, allocatable :: actually_computed(:) + integer :: ncomputed + + ncomputed = 0 + + character*(512) :: task + Nabove_old = -1.d0 + + allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators), & + mrcc_mwen(N_states, N_det_generators) ) + mrcc_mwen(1:N_states, 1:N_det_generators) =0.d0 + do i=1,N_det_generators + actually_computed(i) = computed(i) + enddo + + parts_to_get(:) = 1 + if(fragment_first > 0) then + do i=1,fragment_first + parts_to_get(i) = fragment_count + enddo + endif + + do i=1,tbc(0) + actually_computed(tbc(i)) = .false. + end do + + orgTBDcomb = int(Nabove(1)) + firstTBDcomb = 1 + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + zmq_socket_pull = new_zmq_pull_socket() + allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det_generators), index(1)) + more = 1 + if (time0 < 0.d0) then + call wall_time(time0) + endif + timeLast = time0 + + call get_first_tooth(actually_computed, tooth) + Nabove_old = Nabove(tooth) + + pullLoop : do while (more == 1) + + call pull_mrcc_results(zmq_socket_pull, Nindex, index, mrcc_mwen, task_id, ntask) + do i=1,Nindex + mrcc_detail(1:N_states, index(i)) += mrcc_mwen(1:N_states,i) + parts_to_get(index(i)) -= 1 + if(parts_to_get(index(i)) < 0) then + print *, i, index(i), parts_to_get(index(i)), Nindex + print *, "PARTS ??" + print *, parts_to_get + stop "PARTS ??" + end if + if(parts_to_get(index(i)) == 0) then + actually_computed(index(i)) = .true. + ncomputed += 1 + end if + end do + + do i=1, ntask + if(task_id(i) == 0) then + print *, "Error in collector" + endif + call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) + end do + + time = omp_get_wtime() + + if(time - timeLast > 1d0 .or. more /= 1) then + timeLast = time + do i=1, first_det_of_teeth(1)-1 + if(.not.(actually_computed(i))) then + cycle pullLoop + end if + end do + + double precision :: E0, avg, eqt, prop + call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), mrcc_detail, actually_computed, sumabove, sum2above, Nabove) + firstTBDcomb = int(Nabove(1)) - orgTBDcomb + 1 + if(Nabove(1) < 5d0) cycle + call get_first_tooth(actually_computed, tooth) + + E0 = sum(mrcc_detail(1,:first_det_of_teeth(tooth)-1)) + if (tooth <= comb_teeth) then + prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - mrcc_cweight(first_det_of_teeth(tooth)-1)) + prop = prop * mrcc_weight_inv(first_det_of_teeth(tooth)) + E0 += mrcc_detail(1,first_det_of_teeth(tooth)) * prop + avg = E0 + (sumabove(tooth) / Nabove(tooth)) + eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) + else + eqt = 0.d0 + endif + call wall_time(time) + if (dabs(eqt/avg) < relative_error) then + ! Termination + mrcc(1) = avg + print '(G10.3, 2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' + print *, avg + call zmq_abort(zmq_to_qp_run_socket) + else + if (Nabove(tooth) > Nabove_old) then + print '(G10.3, 2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' + print *, avg + Nabove_old = Nabove(tooth) + endif + endif + end if + end do pullLoop + + E0 = sum(mrcc_detail(1,:first_det_of_teeth(tooth)-1)) + prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - mrcc_cweight(first_det_of_teeth(tooth)-1)) + prop = prop * mrcc_weight_inv(first_det_of_teeth(tooth)) + E0 += mrcc_detail(1,first_det_of_teeth(tooth)) * prop + mrcc(1) = E0 + (sumabove(tooth) / Nabove(tooth)) + + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_pull_socket(zmq_socket_pull) + call sort_selection_buffer(b) +end subroutine + +integer function mrcc_find(v, w, sze, imin, imax) + implicit none + integer, intent(in) :: sze, imin, imax + double precision, intent(in) :: v, w(sze) + integer :: i,l,h + integer, parameter :: block=64 + + l = imin + h = imax-1 + + do while(h-l >= block) + i = ishft(h+l,-1) + if(w(i+1) > v) then + h = i-1 + else + l = i+1 + end if + end do + !DIR$ LOOP COUNT (64) + do mrcc_find=l,h + if(w(mrcc_find) >= v) then + exit + end if + end do +end function + + +BEGIN_PROVIDER [ integer, comb_teeth ] + implicit none + comb_teeth = 100 +END_PROVIDER + + + +subroutine get_first_tooth(computed, first_teeth) + implicit none + logical, intent(in) :: computed(N_det_generators) + integer, intent(out) :: first_teeth + integer :: i, first_det + + first_det = 1 + first_teeth = 1 + do i=first_det_of_comb, N_det_generators + if(.not.(computed(i))) then + first_det = i + exit + end if + end do + + do i=comb_teeth, 1, -1 + if(first_det_of_teeth(i) < first_det) then + first_teeth = i + exit + end if + end do + +end subroutine + + +BEGIN_PROVIDER [ integer, size_tbc ] + implicit none + BEGIN_DOC +! Size of the tbc array + END_DOC + size_tbc = (comb_teeth+1)*N_det_generators + fragment_count*fragment_first +END_PROVIDER + +subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc) + implicit none + integer, intent(inout) :: Ncomb + double precision, intent(out) :: comb(Ncomb) + integer, intent(inout) :: tbc(0:size_tbc) + logical, intent(inout) :: computed(N_det_generators) + integer :: i, j, last_full, dets(comb_teeth) + integer :: icount, n + integer :: k, l + l=first_det_of_comb + call RANDOM_NUMBER(comb) + do i=1,size(comb) + comb(i) = comb(i) * comb_step + !DIR$ FORCEINLINE + call add_comb(comb(i), computed, tbc, size_tbc, comb_teeth) + Ncomb = i + if (tbc(0) == N_det_generators) return + do while (computed(l)) + l=l+1 + enddo + k=tbc(0)+1 + tbc(k) = l + computed(l) = .True. + tbc(0) = k + enddo + +end subroutine + + + +subroutine get_comb(stato, dets, ct) + implicit none + integer, intent(in) :: ct + double precision, intent(in) :: stato + integer, intent(out) :: dets(ct) + double precision :: curs + integer :: j + integer, external :: mrcc_find + + curs = 1d0 - stato + do j = comb_teeth, 1, -1 + !DIR$ FORCEINLINE + dets(j) = mrcc_find(curs, mrcc_cweight,size(mrcc_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) + curs -= comb_step + end do +end subroutine + + +subroutine add_comb(comb, computed, tbc, stbc, ct) + implicit none + integer, intent(in) :: stbc, ct + double precision, intent(in) :: comb + logical, intent(inout) :: computed(N_det_generators) + integer, intent(inout) :: tbc(0:stbc) + integer :: i, k, l, dets(ct) + + !DIR$ FORCEINLINE + call get_comb(comb, dets, ct) + + k=tbc(0)+1 + do i = 1, ct + l = dets(i) + if(.not.(computed(l))) then + tbc(k) = l + k = k+1 + computed(l) = .true. + end if + end do + tbc(0) = k-1 +end subroutine + + + + BEGIN_PROVIDER [ double precision, mrcc_weight, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, mrcc_cweight, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, mrcc_cweight_cache, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, comb_step ] +&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] +&BEGIN_PROVIDER [ integer, first_det_of_comb ] + implicit none + integer :: i + double precision :: norm_left, stato + integer, external :: mrcc_find + + mrcc_weight(1) = psi_coef_generators(1,1)**2 + mrcc_cweight(1) = psi_coef_generators(1,1)**2 + + do i=1,N_det_generators + mrcc_weight(i) = psi_coef_generators(i,1)**2 + enddo + + ! Important to loop backwards for numerical precision + mrcc_cweight(N_det_generators) = mrcc_weight(N_det_generators) + do i=N_det_generators-1,1,-1 + mrcc_cweight(i) = mrcc_weight(i) + mrcc_cweight(i+1) + end do + + do i=1,N_det_generators + mrcc_weight(i) = mrcc_weight(i) / mrcc_cweight(1) + mrcc_cweight(i) = mrcc_cweight(i) / mrcc_cweight(1) + enddo + + do i=1,N_det_generators-1 + mrcc_cweight(i) = 1.d0 - mrcc_cweight(i+1) + end do + mrcc_cweight(N_det_generators) = 1.d0 + + norm_left = 1d0 + + comb_step = 1d0/dfloat(comb_teeth) + first_det_of_comb = 1 + do i=1,N_det_generators + if(mrcc_weight(i)/norm_left < .5d0*comb_step) then + first_det_of_comb = i + exit + end if + norm_left -= mrcc_weight(i) + end do + first_det_of_comb = max(2,first_det_of_comb) + call write_int(6, first_det_of_comb-1, 'Size of deterministic set') + + comb_step = (1d0 - mrcc_cweight(first_det_of_comb-1)) * comb_step + + stato = 1d0 - comb_step + iloc = N_det_generators + do i=comb_teeth, 1, -1 + integer :: iloc + iloc = mrcc_find(stato, mrcc_cweight, N_det_generators, 1, iloc) + first_det_of_teeth(i) = iloc + stato -= comb_step + end do + first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 + first_det_of_teeth(1) = first_det_of_comb + if(first_det_of_teeth(1) /= first_det_of_comb) then + print *, 'Error in ', irp_here + stop "comb provider" + endif + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, mrcc_weight_inv, (N_det_generators) ] + implicit none + BEGIN_DOC +! Inverse of mrcc_weight array + END_DOC + integer :: i + do i=1,N_det_generators + mrcc_weight_inv(i) = 1.d0/mrcc_weight(i) + enddo + +END_PROVIDER + + + + + + diff --git a/plugins/mrcepa0/run_mrcc_slave.irp.f b/plugins/mrcepa0/run_mrcc_slave.irp.f new file mode 100644 index 00000000..93b6e0f7 --- /dev/null +++ b/plugins/mrcepa0/run_mrcc_slave.irp.f @@ -0,0 +1,215 @@ + +BEGIN_PROVIDER [ integer, fragment_count ] + implicit none + BEGIN_DOC + ! Number of fragments for the deterministic part + END_DOC + fragment_count = 1 ! (elec_alpha_num-n_core_orb)**2 +END_PROVIDER + + +subroutine run_mrcc_slave(thread,iproc,energy) + use f77_zmq + use selection_types + implicit none + + double precision, intent(in) :: energy(N_states_diag) + integer, intent(in) :: thread, iproc + integer :: rc, i + + integer :: worker_id, task_id(1), ctask, ltask + character*(512) :: task + + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_push_socket + integer(ZMQ_PTR) :: zmq_socket_push + + !type(selection_buffer) :: buf + logical :: done + + double precision,allocatable :: mrcc_detail(:,:) + integer :: index + integer :: Nindex + + integer(bit_kind),allocatable :: abuf(:,:,:) + integer(bit_kind) :: mask(N_int,2), omask(N_int,2) + + double precision,allocatable :: delta_ij_loc(:,:,:) + double precision,allocatable :: delta_ii_loc(:,:) + double precision,allocatable :: delta_ij_s2_loc(:,:,:) + double precision,allocatable :: delta_ii_s2_loc(:,:) + integer :: h,p,n + logical :: ok + double precision :: contrib(N_states) + + allocate(delta_ij_loc(N_states,N_det_non_ref,N_det_ref) & + ,delta_ii_loc(N_states,N_det_ref) & + ,delta_ij_s2_loc(N_states,N_det_non_ref,N_det_ref) & + ,delta_ii_s2_loc(N_states, N_det_ref)) + + + allocate(abuf(N_int, 2, N_det_non_ref)) + + allocate(mrcc_detail(N_states, N_det_generators)) + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + zmq_socket_push = new_zmq_push_socket(thread) + call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) + if(worker_id == -1) then + print *, "WORKER -1" + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) + return + end if + !buf%N = 0 + ctask = 1 + Nindex=1 + mrcc_detail = 0d0 + do + call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id(ctask), task) + + done = task_id(ctask) == 0 + if (done) then + ctask = ctask - 1 + else + integer :: i_generator, i_i_generator, subset + read (task,*) subset, index + +! if(buf%N == 0) then +! ! Only first time +! call create_selection_buffer(1, 2, buf) +! end if + do i_i_generator=1, Nindex + contrib = 0d0 + i_generator = index + !call select_connected(i_generator,energy,mrcc_detail(1, i_i_generator),buf,subset) + +!!!!!!!!!!!!!!!!!!!!!! + !if(mod(gen, 1000) == 0) print *, "mrcc ", gen, "/", N_det_generators + do h=1, hh_shortcut(0) + call apply_hole_local(psi_det_generators(1,1,i_generator), hh_exists(1, h), mask, ok, N_int) + if(.not. ok) cycle + omask = 0_bit_kind + if(hh_exists(1, h) /= 0) omask = mask + n = 1 + do p=hh_shortcut(h), hh_shortcut(h+1)-1 + call apply_particle_local(mask, pp_exists(1, p), abuf(1,1,n), ok, N_int) + if(ok) n = n + 1 + if(n > N_det_non_ref) stop "Buffer too small in MRCC..." + end do + n = n - 1 + + if(n /= 0) then + call mrcc_part_dress(delta_ij_loc, delta_ii_loc, delta_ij_s2_loc, delta_ii_s2_loc, & + i_generator,n,abuf,N_int,omask,1d0,contrib) + endif + end do +!!!!!!!!!!!!!!!!!!!!!! + !print *, "contrib", i_generator, contrib + mrcc_detail(:, i_i_generator) = contrib + enddo + endif + + if(done .or. (ctask == size(task_id)) ) then +! if(buf%N == 0 .and. ctask > 0) stop "uninitialized selection_buffer" + do i=1, ctask + call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) + end do + if(ctask > 0) then + call push_mrcc_results(zmq_socket_push, Nindex, index, mrcc_detail, task_id(1), ctask) + mrcc_detail(:,:Nindex) = 0d0 +! buf%cur = 0 + end if + ctask = 0 + end if + + if(done) exit + ctask = ctask + 1 + end do + call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id) + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) +! call delete_selection_buffer(buf) +end subroutine + + +subroutine push_mrcc_results(zmq_socket_push, N, index, mrcc_detail, task_id, ntask) + use f77_zmq + use selection_types + implicit none + + integer(ZMQ_PTR), intent(in) :: zmq_socket_push + double precision, intent(in) :: mrcc_detail(N_states, N_det_generators) + integer, intent(in) :: ntask, N, index, task_id(*) + integer :: rc + + + rc = f77_zmq_send( zmq_socket_push, N, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" + + rc = f77_zmq_send( zmq_socket_push, index, 4, ZMQ_SNDMORE) + if(rc /= 4*N) stop "push" + + + rc = f77_zmq_send( zmq_socket_push, mrcc_detail, 8*N_states*N, ZMQ_SNDMORE) + if(rc /= 8*N_states*N) stop "push" + + rc = f77_zmq_send( zmq_socket_push, ntask, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" + + rc = f77_zmq_send( zmq_socket_push, task_id, ntask*4, 0) + if(rc /= 4*ntask) stop "push" + +! Activate is zmq_socket_push is a REQ +IRP_IF ZMQ_PUSH +IRP_ELSE + character*(2) :: ok + rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0) +IRP_ENDIF + +end subroutine + + +subroutine pull_mrcc_results(zmq_socket_pull, N, index, mrcc_detail, task_id, ntask) + use f77_zmq + use selection_types + implicit none + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + double precision, intent(inout) :: mrcc_detail(N_states, N_det_generators) + integer, intent(out) :: index + integer, intent(out) :: N, ntask, task_id(*) + integer :: rc, rn, i + + rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0) + if(rc /= 4) stop "pull" + + rc = f77_zmq_recv( zmq_socket_pull, index, 4, 0) + if(rc /= 4*N) stop "pull" + + rc = f77_zmq_recv( zmq_socket_pull, mrcc_detail, N_states*8*N, 0) + if(rc /= 8*N_states*N) stop "pull" + + rc = f77_zmq_recv( zmq_socket_pull, ntask, 4, 0) + if(rc /= 4) stop "pull" + + rc = f77_zmq_recv( zmq_socket_pull, task_id, ntask*4, 0) + if(rc /= 4*ntask) stop "pull" + +! Activate is zmq_socket_pull is a REP +IRP_IF ZMQ_PUSH +IRP_ELSE + rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0) +IRP_ENDIF + +end subroutine + + +BEGIN_PROVIDER [ double precision, mrcc_workload, (N_det_generators) ] + integer :: i + do i=1,N_det_generators + mrcc_workload(i) = dfloat(N_det_generators - i + 1)**2 + end do + mrcc_workload = mrcc_workload / sum(mrcc_workload) +END_PROVIDER + diff --git a/plugins/mrcepa0/selection_buffer.irp.f b/plugins/mrcepa0/selection_buffer.irp.f new file mode 100644 index 00000000..3deccb12 --- /dev/null +++ b/plugins/mrcepa0/selection_buffer.irp.f @@ -0,0 +1,133 @@ + +subroutine create_selection_buffer(N, siz, res) + use selection_types + implicit none + + integer, intent(in) :: N, siz + type(selection_buffer), intent(out) :: res + + allocate(res%det(N_int, 2, siz), res%val(siz)) + + res%val = 0d0 + res%det = 0_8 + res%N = N + res%mini = 0d0 + res%cur = 0 +end subroutine + +subroutine delete_selection_buffer(b) + use selection_types + implicit none + type(selection_buffer), intent(inout) :: b + if (associated(b%det)) then + deallocate(b%det) + endif + if (associated(b%val)) then + deallocate(b%val) + endif +end + + +subroutine add_to_selection_buffer(b, det, val) + use selection_types + implicit none + + type(selection_buffer), intent(inout) :: b + integer(bit_kind), intent(in) :: det(N_int, 2) + double precision, intent(in) :: val + integer :: i + + if(val <= b%mini) then + b%cur += 1 + b%det(1:N_int,1:2,b%cur) = det(1:N_int,1:2) + b%val(b%cur) = val + if(b%cur == size(b%val)) then + call sort_selection_buffer(b) + end if + end if +end subroutine + +subroutine merge_selection_buffers(b1, b2) + use selection_types + implicit none + BEGIN_DOC +! Merges the selection buffers b1 and b2 into b2 + END_DOC + type(selection_buffer), intent(inout) :: b1 + type(selection_buffer), intent(inout) :: b2 + integer(bit_kind), pointer :: detmp(:,:,:) + double precision, pointer :: val(:) + integer :: i, i1, i2, k, nmwen + if (b1%cur == 0) return + do while (b1%val(b1%cur) > b2%mini) + b1%cur = b1%cur-1 + if (b1%cur == 0) then + return + endif + enddo + nmwen = min(b1%N, b1%cur+b2%cur) + allocate( val(size(b1%val)), detmp(N_int, 2, size(b1%det,3)) ) + i1=1 + i2=1 + do i=1,nmwen + if ( (i1 > b1%cur).and.(i2 > b2%cur) ) then + exit + else if (i1 > b1%cur) then + val(i) = b2%val(i2) + detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2) + detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2) + i2=i2+1 + else if (i2 > b2%cur) then + val(i) = b1%val(i1) + detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1) + detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1) + i1=i1+1 + else + if (b1%val(i1) <= b2%val(i2)) then + val(i) = b1%val(i1) + detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1) + detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1) + i1=i1+1 + else + val(i) = b2%val(i2) + detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2) + detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2) + i2=i2+1 + endif + endif + enddo + deallocate(b2%det, b2%val) + b2%det => detmp + b2%val => val + b2%mini = min(b2%mini,b2%val(b2%N)) + b2%cur = nmwen +end + + +subroutine sort_selection_buffer(b) + use selection_types + implicit none + + type(selection_buffer), intent(inout) :: b + integer, allocatable :: iorder(:) + integer(bit_kind), pointer :: detmp(:,:,:) + integer :: i, nmwen + logical, external :: detEq + if (b%cur == 0) return + nmwen = min(b%N, b%cur) + + allocate(iorder(b%cur), detmp(N_int, 2, size(b%det,3))) + do i=1,b%cur + iorder(i) = i + end do + call dsort(b%val, iorder, b%cur) + do i=1, nmwen + detmp(1:N_int,1,i) = b%det(1:N_int,1,iorder(i)) + detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i)) + end do + deallocate(b%det,iorder) + b%det => detmp + b%mini = min(b%mini,b%val(b%N)) + b%cur = nmwen +end subroutine + From d84fbabb8dbae6182d3200495f33de3e105b0b5a Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 27 Oct 2017 11:14:22 +0200 Subject: [PATCH 07/17] mrcc_zmq --- plugins/mrcepa0/dressing.irp.f | 66 +++++++ plugins/mrcepa0/mrcc_slave.irp.f | 2 +- plugins/mrcepa0/mrcc_stoch.irp.f | 4 +- plugins/mrcepa0/mrcc_stoch_routines.irp.f | 227 +++++++++++++++++----- plugins/mrcepa0/mrcepa0_general.irp.f | 4 +- plugins/mrcepa0/run_mrcc_slave.irp.f | 63 ++++-- plugins/mrcepa0/selection_buffer.irp.f | 133 ------------- 7 files changed, 293 insertions(+), 206 deletions(-) delete mode 100644 plugins/mrcepa0/selection_buffer.irp.f diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index bd080138..e853221f 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -393,6 +393,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen k_sd = idx_alpha(l_sd) call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hij_cache(k_sd)) call get_s2(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,sij_cache(k_sd)) + !if(sij_cache(k_sd) /= 0D0) PRINT *, "SIJ ", sij_cache(k_sd) enddo ! |I> @@ -535,6 +536,56 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen end + BEGIN_PROVIDER [ double precision, mrcc_previous_E, (N_states) ] + implicit none + BEGIN_DOC + !energy difference between last two mrcc iterations + END_DOC + mrcc_previous_E(:) = ref_bitmask_energy + END_PROVIDER + + + BEGIN_PROVIDER [ double precision, delta_ij_mrcc_zmq, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_mrcc_zmq, (N_states, N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2_mrcc_zmq, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2_mrcc_zmq, (N_states, N_det_ref) ] + use bitmasks + implicit none + + integer :: i,j,k + + double precision, allocatable :: mrcc(:) + double precision :: E_CI_before, relative_error + double precision, save :: errr = 0d0 + + allocate(mrcc(N_states)) + + + delta_ij_mrcc_zmq = 0d0 + delta_ii_mrcc_zmq = 0d0 + delta_ij_s2_mrcc_zmq = 0d0 + delta_ii_s2_mrcc_zmq = 0d0 + + !call random_seed() + E_CI_before = mrcc_E0_denominator(1) + nuclear_repulsion + threshold_selectors = 1.d0 + threshold_generators = 1d0 + !errr = errr / 2d0 + if(errr /= 0d0) then + errr = errr / 4d0 ! (-mrcc_E0_denominator(1) + mrcc_previous_E(1)) / 1d1 + else + errr = 4d-4 + end if + relative_error = errr + print *, "RELATIVE ERROR", relative_error + call ZMQ_mrcc(E_CI_before, mrcc, delta_ij_mrcc_zmq, delta_ij_s2_mrcc_zmq, abs(relative_error)) + !errr = + mrcc_previous_E(:) = mrcc_E0_denominator(:) + do i=N_det_non_ref,1,-1 + delta_ii_mrcc_zmq(:,1) -= delta_ij_mrcc_zmq(:, i, 1) / psi_ref_coef(1,1) * psi_non_ref_coef(i, 1) + delta_ii_s2_mrcc_zmq(:,1) -= delta_ij_s2_mrcc_zmq(:, i, 1) / psi_ref_coef(1,1) * psi_non_ref_coef(i, 1) + end do +END_PROVIDER BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det_non_ref,N_det_ref) ] @@ -559,6 +610,21 @@ end enddo end do end do + else if(mrmode == 5) then + do i = 1, N_det_ref + do i_state = 1, N_states + delta_ii(i_state,i)= delta_ii_mrcc_zmq(i_state,i) + delta_ii_s2(i_state,i)= delta_ii_s2_mrcc_zmq(i_state,i) + enddo + do j = 1, N_det_non_ref + do i_state = 1, N_states + delta_ij(i_state,j,i) = delta_ij_mrcc_zmq(i_state,j,i) + delta_ij_s2(i_state,j,i) = delta_ij_s2_mrcc_zmq(i_state,j,i) + enddo + end do + end do + print *, "De", delta_ij(1,:5,1) + print *, "Ds", delta_ij_s2(1,1000:1005,1) else if(mrmode == 3) then do i = 1, N_det_ref do i_state = 1, N_states diff --git a/plugins/mrcepa0/mrcc_slave.irp.f b/plugins/mrcepa0/mrcc_slave.irp.f index 95897570..83295985 100644 --- a/plugins/mrcepa0/mrcc_slave.irp.f +++ b/plugins/mrcepa0/mrcc_slave.irp.f @@ -3,7 +3,7 @@ program mrcc_slave BEGIN_DOC ! Helper program to compute the mrcc in distributed mode. END_DOC - + print *, "SLAVE" read_wf = .False. distributed_davidson = .False. SOFT_TOUCH read_wf distributed_davidson diff --git a/plugins/mrcepa0/mrcc_stoch.irp.f b/plugins/mrcepa0/mrcc_stoch.irp.f index ea97ff7c..7a11d292 100644 --- a/plugins/mrcepa0/mrcc_stoch.irp.f +++ b/plugins/mrcepa0/mrcc_stoch.irp.f @@ -20,11 +20,11 @@ subroutine run allocate (mrcc(N_states)) mrcc = 0.d0 - + !call random_seed() E_CI_before = mrcc_E0_denominator(1) + nuclear_repulsion threshold_selectors = 1.d0 threshold_generators = 1d0 - relative_error = 1.d-3 + relative_error = 5.d-2 call ZMQ_mrcc(E_CI_before, mrcc, relative_error) !print *, 'Final step' !print *, 'N_det = ', N_det diff --git a/plugins/mrcepa0/mrcc_stoch_routines.irp.f b/plugins/mrcepa0/mrcc_stoch_routines.irp.f index ff4a6916..e3b4772f 100644 --- a/plugins/mrcepa0/mrcc_stoch_routines.irp.f +++ b/plugins/mrcepa0/mrcc_stoch_routines.irp.f @@ -3,18 +3,20 @@ BEGIN_PROVIDER [ integer, fragment_first ] fragment_first = first_det_of_teeth(1) END_PROVIDER -subroutine ZMQ_mrcc(E, mrcc,relative_error) +subroutine ZMQ_mrcc(E, mrcc, delta, delta_s2, relative_error) + use dress_types use f77_zmq - use selection_types implicit none character(len=64000) :: task integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_to_qp_run_socket2 - type(selection_buffer) :: b integer, external :: omp_get_thread_num double precision, intent(in) :: relative_error, E double precision, intent(out) :: mrcc(N_states) + double precision, intent(out) :: delta(N_states, N_det_non_ref) + double precision, intent(out) :: delta_s2(N_states, N_det_non_ref) + type(dress_buffer) :: db(2) double precision, allocatable :: mrcc_detail(:,:), comb(:) @@ -31,7 +33,11 @@ subroutine ZMQ_mrcc(E, mrcc,relative_error) ! !call ZMQ_selection(0, mrcc) ! return !else - + + + call init_dress_buffer(db(1)) + call init_dress_buffer(db(2)) + allocate(mrcc_detail(N_states, N_det_generators+1), comb(N_det_generators), computed(N_det_generators), tbc(0:size_tbc)) sumabove = 0d0 sum2above = 0d0 @@ -55,7 +61,6 @@ subroutine ZMQ_mrcc(E, mrcc,relative_error) call new_parallel_job(zmq_to_qp_run_socket,'mrcc') call zmq_put_psi(zmq_to_qp_run_socket,1,mrcc_e0_denominator,size(mrcc_e0_denominator)) - call create_selection_buffer(1, 1*2, b) Ncomb=size(comb) call get_carlo_workbatch(computed, comb, Ncomb, tbc) @@ -68,7 +73,6 @@ subroutine ZMQ_mrcc(E, mrcc,relative_error) integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket integer :: ipos ipos=1 - do i=1,tbc(0) if(tbc(i) > fragment_first) then write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, tbc(i) @@ -98,28 +102,36 @@ subroutine ZMQ_mrcc(E, mrcc,relative_error) !$OMP PRIVATE(i) i = omp_get_thread_num() if (i==0) then - call mrcc_collector(E, b, tbc, comb, Ncomb, computed, mrcc_detail, sumabove, sum2above, Nabove, relative_error, mrcc) + call mrcc_collector(E, db, tbc, comb, Ncomb, computed, mrcc_detail, sumabove, sum2above, Nabove, relative_error, mrcc) else call mrcc_slave_inproc(i) endif !$OMP END PARALLEL - call delete_selection_buffer(b) call end_parallel_job(zmq_to_qp_run_socket, 'mrcc') print *, '========== ================= ================= =================' !endif + call flush_dress_buffer(db(1), delta) + call flush_dress_buffer(db(2), delta_s2) + deallocate(db(1)%buf) + deallocate(db(2)%buf) end subroutine -subroutine do_carlo(tbc, Ncomb, comb, mrcc_detail, computed, sumabove, sum2above, Nabove) +subroutine do_carlo(tbc, Ncomb, comb, mrcc_detail, db, computed, sumabove, sum2above, sumin, isincomb, Nabove) + use dress_types + implicit none + integer, intent(in) :: tbc(0:size_tbc), Ncomb logical, intent(in) :: computed(N_det_generators) double precision, intent(in) :: comb(Ncomb), mrcc_detail(N_states, N_det_generators) - double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) - integer :: i, dets(comb_teeth) - double precision :: myVal, myVal2 + double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), sumin(comb_teeth), Nabove(comb_teeth) + double precision, intent(inout) :: isincomb(N_det_generators) + type(dress_buffer), intent(inout) :: db(2) + integer :: i, j, dets(comb_teeth) + double precision :: myVal, myVal2, myValcur mainLoop : do i=1,Ncomb call get_comb(comb(i), dets, comb_teeth) @@ -132,11 +144,18 @@ subroutine do_carlo(tbc, Ncomb, comb, mrcc_detail, computed, sumabove, sum2above myVal = 0d0 myVal2 = 0d0 do j=comb_teeth,1,-1 - myVal += mrcc_detail(1, dets(j)) * mrcc_weight_inv(dets(j)) * comb_step + isincomb(dets(j)) = 1d0 + myValcur = mrcc_detail(1, dets(j)) * mrcc_weight_inv(dets(j)) * comb_step + sumin(j) += myValcur**2 + myVal += myValcur sumabove(j) += myVal + call dress_buffer_drew(db(1), dets(j), mrcc_weight_inv(dets(j)) * comb_step) + call dress_buffer_drew(db(2), dets(j), mrcc_weight_inv(dets(j)) * comb_step) sum2above(j) += myVal*myVal Nabove(j) += 1 end do + db(1)%N += 1d0 + db(2)%N += 1d0 end do mainLoop end subroutine @@ -148,23 +167,24 @@ subroutine mrcc_slave_inproc(i) call run_mrcc_slave(1,i,mrcc_e0_denominator) end -subroutine mrcc_collector(E, b, tbc, comb, Ncomb, computed, mrcc_detail, sumabove, sum2above, Nabove, relative_error, mrcc) + +subroutine mrcc_collector(E, db, tbc, comb, Ncomb, computed, mrcc_detail, sumabove, sum2above, Nabove, relative_error, mrcc) + use dress_types use f77_zmq - use selection_types use bitmasks implicit none integer, intent(in) :: Ncomb double precision, intent(inout) :: mrcc_detail(N_states, N_det_generators) + type(dress_buffer), intent(inout) :: db(2) double precision, intent(in) :: comb(Ncomb), relative_error, E logical, intent(inout) :: computed(N_det_generators) integer, intent(in) :: tbc(0:size_tbc) double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) double precision, intent(out) :: mrcc(N_states) + - - type(selection_buffer), intent(inout) :: b double precision, allocatable :: mrcc_mwen(:,:) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -174,27 +194,37 @@ subroutine mrcc_collector(E, b, tbc, comb, Ncomb, computed, mrcc_detail, sumabov integer :: msg_size, rc, more integer :: acc, i, j, robin, N, ntask - double precision, allocatable :: val(:) integer(bit_kind), allocatable :: det(:,:,:) integer, allocatable :: task_id(:) integer :: Nindex - integer, allocatable :: index(:) + integer, allocatable :: ind(:) double precision, save :: time0 = -1.d0 double precision :: time, timeLast, Nabove_old double precision, external :: omp_get_wtime integer :: tooth, firstTBDcomb, orgTBDcomb integer, allocatable :: parts_to_get(:) logical, allocatable :: actually_computed(:) - integer :: ncomputed - - ncomputed = 0 + double precision :: ncomputed + integer :: total_computed + double precision :: sumin(comb_teeth) + double precision :: isincomb(N_det_generators) + + print *, "collecture" + + ncomputed = 0d0 + total_computed = 0 + sumin = 0d0 + isincomb = 0d0 + character*(512) :: task Nabove_old = -1.d0 allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators), & mrcc_mwen(N_states, N_det_generators) ) mrcc_mwen(1:N_states, 1:N_det_generators) =0.d0 + + do i=1,N_det_generators actually_computed(i) = computed(i) enddo @@ -215,7 +245,7 @@ subroutine mrcc_collector(E, b, tbc, comb, Ncomb, computed, mrcc_detail, sumabov zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_pull = new_zmq_pull_socket() - allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det_generators), index(1)) + allocate(task_id(N_det_generators), ind(db(1)%N_slot)) more = 1 if (time0 < 0.d0) then call wall_time(time0) @@ -224,22 +254,26 @@ subroutine mrcc_collector(E, b, tbc, comb, Ncomb, computed, mrcc_detail, sumabov call get_first_tooth(actually_computed, tooth) Nabove_old = Nabove(tooth) + db(1)%free_under = first_det_of_teeth(1)-1 + db(2)%free_under = first_det_of_teeth(1)-1 pullLoop : do while (more == 1) - call pull_mrcc_results(zmq_socket_pull, Nindex, index, mrcc_mwen, task_id, ntask) + call pull_mrcc_results(zmq_socket_pull, Nindex, ind, mrcc_mwen, db, task_id, ntask) do i=1,Nindex - mrcc_detail(1:N_states, index(i)) += mrcc_mwen(1:N_states,i) - parts_to_get(index(i)) -= 1 - if(parts_to_get(index(i)) < 0) then - print *, i, index(i), parts_to_get(index(i)), Nindex + mrcc_detail(1:N_states, ind(i)) += mrcc_mwen(1:N_states,i) + + parts_to_get(ind(i)) -= 1 + if(parts_to_get(ind(i)) < 0) then + print *, i, ind(i), parts_to_get(ind(i)), Nindex print *, "PARTS ??" print *, parts_to_get stop "PARTS ??" end if - if(parts_to_get(index(i)) == 0) then - actually_computed(index(i)) = .true. - ncomputed += 1 + if(parts_to_get(ind(i)) == 0) then + !print *, "COMPUTED", ind(i) + actually_computed(ind(i)) = .true. + total_computed += 1 end if end do @@ -249,9 +283,13 @@ subroutine mrcc_collector(E, b, tbc, comb, Ncomb, computed, mrcc_detail, sumabov endif call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) end do - + + call get_first_tooth(actually_computed, tooth) + db(1)%free_under = first_det_of_teeth(tooth)-1 + db(2)%free_under = first_det_of_teeth(tooth)-1 + time = omp_get_wtime() - + if(time - timeLast > 1d0 .or. more /= 1) then timeLast = time do i=1, first_det_of_teeth(1)-1 @@ -260,13 +298,13 @@ subroutine mrcc_collector(E, b, tbc, comb, Ncomb, computed, mrcc_detail, sumabov end if end do - double precision :: E0, avg, eqt, prop - call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), mrcc_detail, actually_computed, sumabove, sum2above, Nabove) - firstTBDcomb = int(Nabove(1)) - orgTBDcomb + 1 + double precision :: E0, fco, ffco, avg, tavg, mavg, var, su, su2, meqt, eqt, prop + if(firstTBDcomb <= size(comb)) then + call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), mrcc_detail, db, actually_computed, sumabove, sum2above, sumin, isincomb, Nabove) + firstTBDcomb = int(Nabove(1)) - orgTBDcomb + 1 + end if if(Nabove(1) < 5d0) cycle - call get_first_tooth(actually_computed, tooth) - - E0 = sum(mrcc_detail(1,:first_det_of_teeth(tooth)-1)) + E0 = sum(mrcc_detail(1,:first_det_of_teeth(tooth)-1)) if (tooth <= comb_teeth) then prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - mrcc_cweight(first_det_of_teeth(tooth)-1)) prop = prop * mrcc_weight_inv(first_det_of_teeth(tooth)) @@ -277,17 +315,82 @@ subroutine mrcc_collector(E, b, tbc, comb, Ncomb, computed, mrcc_detail, sumabov eqt = 0.d0 endif call wall_time(time) - if (dabs(eqt/avg) < relative_error) then + !if (dabs(eqt/avg) < relative_error) then + if (dabs(eqt) < relative_error .or. total_computed == N_det_generators) then ! Termination - mrcc(1) = avg - print '(G10.3, 2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' - print *, avg + print *, "converged", Nabove(1), first_det_of_teeth(tooth) + !mrcc(1) = avg+E + print '(A7, G10.3, 2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', "GREPME", Nabove(tooth), E+avg, eqt, time-time0, '' call zmq_abort(zmq_to_qp_run_socket) else if (Nabove(tooth) > Nabove_old) then - print '(G10.3, 2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' - print *, avg + meqt = 0d0 + ncomputed = 0 + mavg = 0d0 + do i=tooth, comb_teeth + !do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1 + ! if(actually_computed(j)) ncomputed(i) = ncomputed(i) + 1 + !end do + fco = 0d0 + ffco = 0d0 + do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1 + fco += isincomb(j) * mrcc_weight(j) + ffco += mrcc_weight(j) + end do + ncomputed = sum(isincomb(first_det_of_teeth(i):first_det_of_teeth(i+1)-1)) + + !if(i /= comb_teeth) then + ! var = abs((sumin(i))/Nabove(i) - ((sumabove(i)-sumabove(i+1))/Nabove(i))**2) / (Nabove(i)-1) + !else + ! var = abs((sumin(i))/Nabove(i) - ((sumabove(i))/Nabove(i))**2) / (Nabove(i)-1) + !end if + n = first_det_of_teeth(i+1) - first_det_of_teeth(i) + + + !var = var * ((ffco-fco) /ffco) !((dfloat(n)-ncomputed) / dfloat(n))**2 + !eqt += var / (Nabove(i)-1) + + tavg = 0d0 + ncomputed = 0d0 + su = 0d0 + su2 = 0d0 + do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1 + if(actually_computed(j)) then + tavg += (mrcc_detail(1, j))! * isincomb(j) + ncomputed += 1d0 !isincomb(j) + su2 += (mrcc_detail(1, j))**2 + end if + end do + if(ncomputed /= 0) then + tavg = tavg / ncomputed * dfloat(n) + su2 = su2 / ncomputed * dfloat(n) + var = (su2 - (tavg**2)) / ncomputed + else + print *, "ZERO NCOMPUTED" + tavg = 0d0 + su2 = 0d0 + var = 0d0 + end if + + !fco = ffco + !if(i /= comb_teeth) then + ! mavg += (tavg) + (((sumabove(i)-sumabove(i+1))/Nabove(i)) * (ffco-fco)) & + ! / ffco + !else + ! mavg += (tavg) + (((sumabove(i))/Nabove(i)) * (ffco-fco)) & + ! / ffco + !end if + + + var = var * ((dfloat(n)-ncomputed) / dfloat(n)) + meqt += var + mavg += tavg + end do + meqt = sqrt(abs(meqt)) Nabove_old = Nabove(tooth) + + print '(G10.3, 2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' + !print *, "GREPME", avg, eqt, mavg+E0, meqt endif endif end if @@ -297,11 +400,10 @@ subroutine mrcc_collector(E, b, tbc, comb, Ncomb, computed, mrcc_detail, sumabov prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - mrcc_cweight(first_det_of_teeth(tooth)-1)) prop = prop * mrcc_weight_inv(first_det_of_teeth(tooth)) E0 += mrcc_detail(1,first_det_of_teeth(tooth)) * prop - mrcc(1) = E0 + (sumabove(tooth) / Nabove(tooth)) - + mrcc(1) = E + E0 + (sumabove(tooth) / Nabove(tooth)) + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_pull_socket(zmq_socket_pull) - call sort_selection_buffer(b) end subroutine integer function mrcc_find(v, w, sze, imin, imax) @@ -333,7 +435,7 @@ end function BEGIN_PROVIDER [ integer, comb_teeth ] implicit none - comb_teeth = 100 + comb_teeth = 10 END_PROVIDER @@ -483,7 +585,7 @@ end subroutine comb_step = 1d0/dfloat(comb_teeth) first_det_of_comb = 1 do i=1,N_det_generators - if(mrcc_weight(i)/norm_left < .5d0*comb_step) then + if(mrcc_weight(i)/norm_left < .25d0*comb_step) then first_det_of_comb = i exit end if @@ -492,6 +594,7 @@ end subroutine first_det_of_comb = max(2,first_det_of_comb) call write_int(6, first_det_of_comb-1, 'Size of deterministic set') + comb_step = (1d0 - mrcc_cweight(first_det_of_comb-1)) * comb_step stato = 1d0 - comb_step @@ -504,6 +607,30 @@ end subroutine end do first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 first_det_of_teeth(1) = first_det_of_comb + + !do i=1,comb_teeth + ! mrcc_weight(first_det_of_teeth(i):first_det_of_teeth(i+1)-1) = & + ! (mrcc_cweight(first_det_of_teeth(i+1)-1)-mrcc_cweight(first_det_of_teeth(i)-1)) / & + ! dfloat(first_det_of_teeth(i+1)-first_det_of_teeth(i)) + !end do + + !mrcc_cweight = 0d0 + !mrcc_cweight(N_det_generators) = mrcc_weight(N_det_generators) + !do i=N_det_generators-1,1,-1 + ! mrcc_cweight(i) = mrcc_weight(i) + mrcc_cweight(i+1) + !end do + + !do i=1,N_det_generators + ! mrcc_weight(i) = mrcc_weight(i) / mrcc_cweight(1) + ! mrcc_cweight(i) = mrcc_cweight(i) / mrcc_cweight(1) + ! enddo + + !do i=1,N_det_generators-1 + ! mrcc_cweight(i) = 1.d0 - mrcc_cweight(i+1) + !end do + !mrcc_cweight(N_det_generators) = 1.d0 + + if(first_det_of_teeth(1) /= first_det_of_comb) then print *, 'Error in ', irp_here stop "comb provider" diff --git a/plugins/mrcepa0/mrcepa0_general.irp.f b/plugins/mrcepa0/mrcepa0_general.irp.f index 53fa74a1..4bb225a3 100644 --- a/plugins/mrcepa0/mrcepa0_general.irp.f +++ b/plugins/mrcepa0/mrcepa0_general.irp.f @@ -41,12 +41,12 @@ subroutine run(N_st,energy) print *, 'Iteration', iteration, '/', n_it_mrcc_max print *, '===============================================' print *, '' - E_old = sum(ci_energy_dressed(1:N_states)) + E_old = mrcc_e0_denominator(1) !sum(ci_energy_dressed(1:N_states)) do i=1,N_st call write_double(6,ci_energy_dressed(i),"Energy") enddo call diagonalize_ci_dressed(lambda) - E_new = sum(ci_energy_dressed(1:N_states)) + E_new = mrcc_e0_denominator(1) !sum(ci_energy_dressed(1:N_states)) ! if (.true.) then ! provide delta_ij_mrcc_pouet ! endif diff --git a/plugins/mrcepa0/run_mrcc_slave.irp.f b/plugins/mrcepa0/run_mrcc_slave.irp.f index 93b6e0f7..c7f61822 100644 --- a/plugins/mrcepa0/run_mrcc_slave.irp.f +++ b/plugins/mrcepa0/run_mrcc_slave.irp.f @@ -9,8 +9,8 @@ END_PROVIDER subroutine run_mrcc_slave(thread,iproc,energy) + use dress_types use f77_zmq - use selection_types implicit none double precision, intent(in) :: energy(N_states_diag) @@ -26,11 +26,10 @@ subroutine run_mrcc_slave(thread,iproc,energy) integer(ZMQ_PTR), external :: new_zmq_push_socket integer(ZMQ_PTR) :: zmq_socket_push - !type(selection_buffer) :: buf logical :: done double precision,allocatable :: mrcc_detail(:,:) - integer :: index + integer :: ind(1) integer :: Nindex integer(bit_kind),allocatable :: abuf(:,:,:) @@ -74,7 +73,7 @@ subroutine run_mrcc_slave(thread,iproc,energy) ctask = ctask - 1 else integer :: i_generator, i_i_generator, subset - read (task,*) subset, index + read (task,*) subset, ind ! if(buf%N == 0) then ! ! Only first time @@ -82,7 +81,11 @@ subroutine run_mrcc_slave(thread,iproc,energy) ! end if do i_i_generator=1, Nindex contrib = 0d0 - i_generator = index + i_generator = ind(i_i_generator) + delta_ij_loc = 0d0 + delta_ii_loc = 0d0 + delta_ij_s2_loc = 0d0 + delta_ii_s2_loc = 0d0 !call select_connected(i_generator,energy,mrcc_detail(1, i_i_generator),buf,subset) !!!!!!!!!!!!!!!!!!!!!! @@ -108,6 +111,7 @@ subroutine run_mrcc_slave(thread,iproc,energy) !!!!!!!!!!!!!!!!!!!!!! !print *, "contrib", i_generator, contrib mrcc_detail(:, i_i_generator) = contrib + if(Nindex /= 1) stop "run_mrcc_slave multiple dress not supported" enddo endif @@ -117,7 +121,7 @@ subroutine run_mrcc_slave(thread,iproc,energy) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) end do if(ctask > 0) then - call push_mrcc_results(zmq_socket_push, Nindex, index, mrcc_detail, task_id(1), ctask) + call push_mrcc_results(zmq_socket_push, Nindex, ind, mrcc_detail, delta_ij_loc, delta_ij_s2_loc, task_id(1), ctask) mrcc_detail(:,:Nindex) = 0d0 ! buf%cur = 0 end if @@ -134,27 +138,34 @@ subroutine run_mrcc_slave(thread,iproc,energy) end subroutine -subroutine push_mrcc_results(zmq_socket_push, N, index, mrcc_detail, task_id, ntask) +subroutine push_mrcc_results(zmq_socket_push, N, ind, mrcc_detail, mrcc_dress, mrcc_s2_dress, task_id, ntask) use f77_zmq - use selection_types implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_push double precision, intent(in) :: mrcc_detail(N_states, N_det_generators) - integer, intent(in) :: ntask, N, index, task_id(*) - integer :: rc - - + double precision, intent(in) :: mrcc_dress(N_states, N_det_non_ref, *) + double precision, intent(in) :: mrcc_s2_dress(N_states, N_det_non_ref, *) + integer, intent(in) :: ntask, N, ind(*), task_id(*) + integer :: rc, i + rc = f77_zmq_send( zmq_socket_push, N, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" - rc = f77_zmq_send( zmq_socket_push, index, 4, ZMQ_SNDMORE) + rc = f77_zmq_send( zmq_socket_push, ind, 4*N, ZMQ_SNDMORE) if(rc /= 4*N) stop "push" rc = f77_zmq_send( zmq_socket_push, mrcc_detail, 8*N_states*N, ZMQ_SNDMORE) if(rc /= 8*N_states*N) stop "push" + rc = f77_zmq_send( zmq_socket_push, mrcc_dress, 8*N_states*N_det_non_ref*N, ZMQ_SNDMORE) + if(rc /= 8*N_states*N_det_non_ref*N) stop "push" + + rc = f77_zmq_send( zmq_socket_push, mrcc_s2_dress, 8*N_states*N_det_non_ref*N, ZMQ_SNDMORE) + if(rc /= 8*N_states*N_det_non_ref*N) stop "push" + + rc = f77_zmq_send( zmq_socket_push, ntask, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" @@ -171,24 +182,40 @@ IRP_ENDIF end subroutine -subroutine pull_mrcc_results(zmq_socket_pull, N, index, mrcc_detail, task_id, ntask) +subroutine pull_mrcc_results(zmq_socket_pull, N, ind, mrcc_detail, mrcc_dress, task_id, ntask) + use dress_types use f77_zmq - use selection_types implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_pull double precision, intent(inout) :: mrcc_detail(N_states, N_det_generators) - integer, intent(out) :: index + double precision, allocatable :: mrcc_dress_mwen(:,:) + type(dress_buffer), intent(inout) :: mrcc_dress(2) + integer, intent(out) :: ind(*) integer, intent(out) :: N, ntask, task_id(*) integer :: rc, rn, i + allocate(mrcc_dress_mwen(N_states, N_det_non_ref)) + rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0) if(rc /= 4) stop "pull" - rc = f77_zmq_recv( zmq_socket_pull, index, 4, 0) + rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) if(rc /= 4*N) stop "pull" - + rc = f77_zmq_recv( zmq_socket_pull, mrcc_detail, N_states*8*N, 0) if(rc /= 8*N_states*N) stop "pull" + + do i=1,N + rc = f77_zmq_recv( zmq_socket_pull, mrcc_dress_mwen, N_states*8*N_det_non_ref, 0) + if(rc /= 8*N_states*N_det_non_ref) stop "pull" + call add_to_dress_buffer(mrcc_dress(1), ind(i), mrcc_dress_mwen) + end do + + do i=1,N + rc = f77_zmq_recv( zmq_socket_pull, mrcc_dress_mwen, N_states*8*N_det_non_ref, 0) + if(rc /= 8*N_states*N_det_non_ref) stop "pull" + call add_to_dress_buffer(mrcc_dress(2), ind(i), mrcc_dress_mwen) + end do rc = f77_zmq_recv( zmq_socket_pull, ntask, 4, 0) if(rc /= 4) stop "pull" diff --git a/plugins/mrcepa0/selection_buffer.irp.f b/plugins/mrcepa0/selection_buffer.irp.f deleted file mode 100644 index 3deccb12..00000000 --- a/plugins/mrcepa0/selection_buffer.irp.f +++ /dev/null @@ -1,133 +0,0 @@ - -subroutine create_selection_buffer(N, siz, res) - use selection_types - implicit none - - integer, intent(in) :: N, siz - type(selection_buffer), intent(out) :: res - - allocate(res%det(N_int, 2, siz), res%val(siz)) - - res%val = 0d0 - res%det = 0_8 - res%N = N - res%mini = 0d0 - res%cur = 0 -end subroutine - -subroutine delete_selection_buffer(b) - use selection_types - implicit none - type(selection_buffer), intent(inout) :: b - if (associated(b%det)) then - deallocate(b%det) - endif - if (associated(b%val)) then - deallocate(b%val) - endif -end - - -subroutine add_to_selection_buffer(b, det, val) - use selection_types - implicit none - - type(selection_buffer), intent(inout) :: b - integer(bit_kind), intent(in) :: det(N_int, 2) - double precision, intent(in) :: val - integer :: i - - if(val <= b%mini) then - b%cur += 1 - b%det(1:N_int,1:2,b%cur) = det(1:N_int,1:2) - b%val(b%cur) = val - if(b%cur == size(b%val)) then - call sort_selection_buffer(b) - end if - end if -end subroutine - -subroutine merge_selection_buffers(b1, b2) - use selection_types - implicit none - BEGIN_DOC -! Merges the selection buffers b1 and b2 into b2 - END_DOC - type(selection_buffer), intent(inout) :: b1 - type(selection_buffer), intent(inout) :: b2 - integer(bit_kind), pointer :: detmp(:,:,:) - double precision, pointer :: val(:) - integer :: i, i1, i2, k, nmwen - if (b1%cur == 0) return - do while (b1%val(b1%cur) > b2%mini) - b1%cur = b1%cur-1 - if (b1%cur == 0) then - return - endif - enddo - nmwen = min(b1%N, b1%cur+b2%cur) - allocate( val(size(b1%val)), detmp(N_int, 2, size(b1%det,3)) ) - i1=1 - i2=1 - do i=1,nmwen - if ( (i1 > b1%cur).and.(i2 > b2%cur) ) then - exit - else if (i1 > b1%cur) then - val(i) = b2%val(i2) - detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2) - detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2) - i2=i2+1 - else if (i2 > b2%cur) then - val(i) = b1%val(i1) - detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1) - detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1) - i1=i1+1 - else - if (b1%val(i1) <= b2%val(i2)) then - val(i) = b1%val(i1) - detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1) - detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1) - i1=i1+1 - else - val(i) = b2%val(i2) - detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2) - detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2) - i2=i2+1 - endif - endif - enddo - deallocate(b2%det, b2%val) - b2%det => detmp - b2%val => val - b2%mini = min(b2%mini,b2%val(b2%N)) - b2%cur = nmwen -end - - -subroutine sort_selection_buffer(b) - use selection_types - implicit none - - type(selection_buffer), intent(inout) :: b - integer, allocatable :: iorder(:) - integer(bit_kind), pointer :: detmp(:,:,:) - integer :: i, nmwen - logical, external :: detEq - if (b%cur == 0) return - nmwen = min(b%N, b%cur) - - allocate(iorder(b%cur), detmp(N_int, 2, size(b%det,3))) - do i=1,b%cur - iorder(i) = i - end do - call dsort(b%val, iorder, b%cur) - do i=1, nmwen - detmp(1:N_int,1,i) = b%det(1:N_int,1,iorder(i)) - detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i)) - end do - deallocate(b%det,iorder) - b%det => detmp - b%mini = min(b%mini,b%val(b%N)) - b%cur = nmwen -end subroutine - From 19f36717759b2c1d6fb42216c3c12f058d63a952 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 6 Nov 2017 13:18:23 +0100 Subject: [PATCH 08/17] checkpointed mrcc_zmq --- plugins/mrcepa0/dressing.irp.f | 284 +++++++++- plugins/mrcepa0/mrcc_stoch_routines.irp.f | 644 +++++++++------------- plugins/mrcepa0/run_mrcc_slave.irp.f | 56 +- 3 files changed, 575 insertions(+), 409 deletions(-) diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index e853221f..937e5e2a 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -536,6 +536,288 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen end + +subroutine mrcc_part_dress_1c(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_generator,n_selected,det_buffer,Nint,key_mask,coef,contrib) + use bitmasks + implicit none + + integer, intent(in) :: i_generator,n_selected, Nint + double precision, intent(inout) :: delta_ij_(N_states,N_det_non_ref) + double precision, intent(inout) :: delta_ii_(N_states) + double precision, intent(inout) :: delta_ij_s2_(N_states,N_det_non_ref) + double precision, intent(inout) :: delta_ii_s2_(N_states) + + integer(bit_kind), intent(in) :: det_buffer(Nint,2,n_selected) + integer :: i,j,k,l,m + integer,allocatable :: idx_alpha(:), degree_alpha(:) + logical :: good, fullMatch + + integer(bit_kind),allocatable :: tq(:,:,:) + integer :: N_tq, c_ref ,degree1, degree2, degree + + double precision :: hIk, hla, hIl, sla, dIk(N_states), dka(N_states), dIa(N_states), hka + double precision, allocatable :: dIa_hla(:,:), dIa_sla(:,:) + double precision :: haj, phase, phase2 + double precision :: f(N_states), ci_inv(N_states) + integer :: exc(0:2,2,2) + integer :: h1,h2,p1,p2,s1,s2 + integer(bit_kind) :: tmp_det(Nint,2) + integer :: iint, ipos + integer :: i_state, k_sd, l_sd, i_I, i_alpha + + integer(bit_kind),allocatable :: miniList(:,:,:) + integer(bit_kind),intent(in) :: key_mask(Nint, 2) + integer,allocatable :: idx_miniList(:) + integer :: N_miniList, ni, leng + double precision, allocatable :: hij_cache(:), sij_cache(:) + + integer(bit_kind), allocatable :: microlist(:,:,:), microlist_zero(:,:,:) + integer, allocatable :: idx_microlist(:), N_microlist(:), ptr_microlist(:), idx_microlist_zero(:) + integer :: mobiles(2), smallerlist + logical, external :: detEq, is_generable + !double precision, external :: get_dij, get_dij_index + double precision :: Delta_E_inv(N_states) + double precision, intent(in) :: coef + double precision, intent(inout) :: contrib(N_states) + double precision :: sdress, hdress + + if (perturbative_triples) then + PROVIDE one_anhil fock_virt_total fock_core_inactive_total one_creat + endif + + + leng = max(N_det_generators, N_det_non_ref) + allocate(miniList(Nint, 2, leng), tq(Nint,2,n_selected), idx_minilist(leng), hij_cache(N_det_non_ref), sij_cache(N_det_non_ref)) + allocate(idx_alpha(0:psi_det_size), degree_alpha(psi_det_size)) + call create_minilist_find_previous(key_mask, psi_det_generators, miniList, i_generator-1, N_miniList, fullMatch, Nint) + + allocate(ptr_microlist(0:mo_tot_num*2+1), & + N_microlist(0:mo_tot_num*2) ) + allocate( microlist(Nint,2,N_minilist*4), & + idx_microlist(N_minilist*4)) + + if(key_mask(1,1) /= 0) then + call create_microlist(miniList, N_minilist, key_mask, microlist, idx_microlist, N_microlist, ptr_microlist, Nint) + call filter_tq_micro(i_generator,n_selected,det_buffer,Nint,tq,N_tq,microlist,ptr_microlist,N_microlist,key_mask) + else + call filter_tq(i_generator,n_selected,det_buffer,Nint,tq,N_tq,miniList,N_minilist) + end if + + + + deallocate(microlist, idx_microlist) + + allocate (dIa_hla(N_states,N_det_non_ref), dIa_sla(N_states,N_det_non_ref)) + + ! |I> + + ! |alpha> + + if(N_tq > 0) then + call create_minilist(key_mask, psi_non_ref, miniList, idx_minilist, N_det_non_ref, N_minilist, Nint) + if(N_minilist == 0) return + + + if(sum(abs(key_mask(1:N_int,1))) /= 0) then + allocate(microlist_zero(Nint,2,N_minilist), idx_microlist_zero(N_minilist)) + + allocate( microlist(Nint,2,N_minilist*4), & + idx_microlist(N_minilist*4)) + call create_microlist(miniList, N_minilist, key_mask, microlist, idx_microlist, N_microlist, ptr_microlist, Nint) + + + do i=0,mo_tot_num*2 + do k=ptr_microlist(i),ptr_microlist(i+1)-1 + idx_microlist(k) = idx_minilist(idx_microlist(k)) + end do + end do + + do l=1,N_microlist(0) + do k=1,Nint + microlist_zero(k,1,l) = microlist(k,1,l) + microlist_zero(k,2,l) = microlist(k,2,l) + enddo + idx_microlist_zero(l) = idx_microlist(l) + enddo + end if + end if + + + do i_alpha=1,N_tq + if(key_mask(1,1) /= 0) then + call getMobiles(tq(1,1,i_alpha), key_mask, mobiles, Nint) + + if(N_microlist(mobiles(1)) < N_microlist(mobiles(2))) then + smallerlist = mobiles(1) + else + smallerlist = mobiles(2) + end if + + + do l=0,N_microlist(smallerlist)-1 + microlist_zero(:,:,ptr_microlist(1) + l) = microlist(:,:,ptr_microlist(smallerlist) + l) + idx_microlist_zero(ptr_microlist(1) + l) = idx_microlist(ptr_microlist(smallerlist) + l) + end do + + call get_excitation_degree_vector(microlist_zero,tq(1,1,i_alpha),degree_alpha,Nint,N_microlist(smallerlist)+N_microlist(0),idx_alpha) + do j=1,idx_alpha(0) + idx_alpha(j) = idx_microlist_zero(idx_alpha(j)) + end do + + else + call get_excitation_degree_vector(miniList,tq(1,1,i_alpha),degree_alpha,Nint,N_minilist,idx_alpha) + do j=1,idx_alpha(0) + idx_alpha(j) = idx_miniList(idx_alpha(j)) + end do + end if + + do l_sd=1,idx_alpha(0) + k_sd = idx_alpha(l_sd) + call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hij_cache(k_sd)) + call get_s2(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,sij_cache(k_sd)) + !if(sij_cache(k_sd) /= 0D0) PRINT *, "SIJ ", sij_cache(k_sd) + enddo + + ! |I> + do i_I=1,N_det_ref + ! Find triples and quadruple grand parents + call get_excitation_degree(tq(1,1,i_alpha),psi_ref(1,1,i_I),degree1,Nint) + if (degree1 > 4) then + cycle + endif + + do i_state=1,N_states + dIa(i_state) = 0.d0 + enddo + + ! |alpha> + do k_sd=1,idx_alpha(0) + + call get_excitation_degree(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(k_sd)),degree,Nint) + if (degree > 2) then + cycle + endif + + ! + + ! |l> = Exc(k -> alpha) |I> + call get_excitation(psi_non_ref(1,1,idx_alpha(k_sd)),tq(1,1,i_alpha),exc,degree2,phase,Nint) + call decode_exc(exc,degree2,h1,p1,h2,p2,s1,s2) + do k=1,N_int + tmp_det(k,1) = psi_ref(k,1,i_I) + tmp_det(k,2) = psi_ref(k,2,i_I) + enddo + logical :: ok + call apply_excitation(psi_ref(1,1,i_I), exc, tmp_det, ok, Nint) + + do i_state=1,N_states + dIK(i_state) = dij(i_I, idx_alpha(k_sd), i_state) + enddo + + ! + do i_state=1,N_states + dka(i_state) = 0.d0 + enddo + + if (ok) then + do l_sd=k_sd+1,idx_alpha(0) + call get_excitation_degree(tmp_det,psi_non_ref(1,1,idx_alpha(l_sd)),degree,Nint) + if (degree == 0) then + call get_excitation(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(l_sd)),exc,degree,phase2,Nint) + do i_state=1,N_states + dka(i_state) = dij(i_I, idx_alpha(l_sd), i_state) * phase * phase2 + enddo + exit + endif + enddo + + else if (perturbative_triples) then + ! Linked + + hka = hij_cache(idx_alpha(k_sd)) + if (dabs(hka) > 1.d-12) then + call get_delta_e_dyall_general_mp(psi_ref(1,1,i_I),tq(1,1,i_alpha),Delta_E_inv) + + do i_state=1,N_states + ASSERT (Delta_E_inv(i_state) < 0.d0) + dka(i_state) = hka / Delta_E_inv(i_state) + enddo + endif + + endif + + if (perturbative_triples.and. (degree2 == 1) ) then + call i_h_j(psi_ref(1,1,i_I),tmp_det,Nint,hka) + hka = hij_cache(idx_alpha(k_sd)) - hka + if (dabs(hka) > 1.d-12) then + call get_delta_e_dyall_general_mp(psi_ref(1,1,i_I),tq(1,1,i_alpha),Delta_E_inv) + do i_state=1,N_states + ASSERT (Delta_E_inv(i_state) < 0.d0) + dka(i_state) = hka / Delta_E_inv(i_state) + enddo + endif + + endif + + do i_state=1,N_states + dIa(i_state) = dIa(i_state) + dIk(i_state) * dka(i_state) + enddo + enddo + + do i_state=1,N_states + ci_inv(i_state) = psi_ref_coef_inv(i_I,i_state) + enddo + do l_sd=1,idx_alpha(0) + k_sd = idx_alpha(l_sd) + hla = hij_cache(k_sd) + sla = sij_cache(k_sd) + do i_state=1,N_states + dIa_hla(i_state,k_sd) = dIa(i_state) * hla * coef + dIa_sla(i_state,k_sd) = dIa(i_state) * sla * coef + enddo + enddo + do i_state=1,N_states + if(dabs(psi_ref_coef(1,i_state)).ge.1.d-3)then + do l_sd=1,idx_alpha(0) + k_sd = idx_alpha(l_sd) + p1 = 1 + hdress = dIa_hla(i_state,k_sd) * psi_ref_coef(i_I,i_state) / psi_ref_coef(p1,i_state) + sdress = dIa_sla(i_state,k_sd) * psi_ref_coef(i_I,i_state) / psi_ref_coef(p1,i_state) + !$OMP ATOMIC + contrib(i_state) += hdress * psi_ref_coef(p1, i_state) * psi_non_ref_coef(k_sd, i_state) + !$OMP ATOMIC + delta_ij_(i_state,k_sd) += hdress + !$OMP ATOMIC + !delta_ii_(i_state,i_I) = delta_ii_(i_state,i_I) - dIa_hla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) + delta_ii_(i_state) -= hdress / psi_ref_coef(p1,i_state) * psi_non_ref_coef_transp(i_state,k_sd) + !$OMP ATOMIC + delta_ij_s2_(i_state,k_sd) += sdress + !$OMP ATOMIC + !delta_ii_s2_(i_state,i_I) = delta_ii_s2_(i_state,i_I) - dIa_sla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) + delta_ii_s2_(i_state) -= sdress / psi_ref_coef(p1,i_state) * psi_non_ref_coef_transp(i_state,k_sd) + enddo + else + !stop "dress with coef < 1d-3" + delta_ii_(i_state) = 0.d0 + do l_sd=1,idx_alpha(0) + k_sd = idx_alpha(l_sd) + p1 = 1 + hdress = dIa_hla(i_state,k_sd) * psi_ref_coef(i_I,i_state) / psi_ref_coef(p1,i_state) + sdress = dIa_sla(i_state,k_sd) * psi_ref_coef(i_I,i_state) / psi_ref_coef(p1,i_state) + !$OMP ATOMIC + delta_ij_(i_state,k_sd) = delta_ij_(i_state,k_sd) + 0.5d0*hdress + !$OMP ATOMIC + delta_ij_s2_(i_state,k_sd) = delta_ij_s2_(i_state,k_sd) + 0.5d0*sdress + enddo + endif + enddo + enddo + enddo + deallocate (dIa_hla,dIa_sla,hij_cache,sij_cache) + deallocate(miniList, idx_miniList) +end + + BEGIN_PROVIDER [ double precision, mrcc_previous_E, (N_states) ] implicit none BEGIN_DOC @@ -572,7 +854,7 @@ end threshold_generators = 1d0 !errr = errr / 2d0 if(errr /= 0d0) then - errr = errr / 4d0 ! (-mrcc_E0_denominator(1) + mrcc_previous_E(1)) / 1d1 + errr = errr / 2d0 ! (-mrcc_E0_denominator(1) + mrcc_previous_E(1)) / 1d1 else errr = 4d-4 end if diff --git a/plugins/mrcepa0/mrcc_stoch_routines.irp.f b/plugins/mrcepa0/mrcc_stoch_routines.irp.f index e3b4772f..769ab2e8 100644 --- a/plugins/mrcepa0/mrcc_stoch_routines.irp.f +++ b/plugins/mrcepa0/mrcc_stoch_routines.irp.f @@ -3,6 +3,7 @@ BEGIN_PROVIDER [ integer, fragment_first ] fragment_first = first_det_of_teeth(1) END_PROVIDER + subroutine ZMQ_mrcc(E, mrcc, delta, delta_s2, relative_error) use dress_types use f77_zmq @@ -16,147 +17,73 @@ subroutine ZMQ_mrcc(E, mrcc, delta, delta_s2, relative_error) double precision, intent(out) :: mrcc(N_states) double precision, intent(out) :: delta(N_states, N_det_non_ref) double precision, intent(out) :: delta_s2(N_states, N_det_non_ref) - type(dress_buffer) :: db(2) - double precision, allocatable :: mrcc_detail(:,:), comb(:) - logical, allocatable :: computed(:) - integer, allocatable :: tbc(:) - integer :: i, j, k, Ncomb, generator_per_task, i_generator_end - integer, external :: mrcc_find + integer :: i, j, k, Ncp - double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) double precision, external :: omp_get_wtime double precision :: time + + - !if (N_det < 10) then - ! !call ZMQ_selection(0, mrcc) - ! return - !else + provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral mrcc_weight psi_selectors + + + + print *, '========== ================= ================= =================' + print *, ' Samples Energy Stat. Error Seconds ' + print *, '========== ================= ================= =================' + + call new_parallel_job(zmq_to_qp_run_socket,'mrcc') + call zmq_put_psi(zmq_to_qp_run_socket,1,mrcc_e0_denominator,size(mrcc_e0_denominator)) + +! call get_carlo_workbatch(Ncp, tbc, cp, cp_at, cp_N) + do i=1,comb_teeth + print *, "TOOTH", first_det_of_teeth(i+1) - first_det_of_teeth(i) + end do + !stop - call init_dress_buffer(db(1)) - call init_dress_buffer(db(2)) - - allocate(mrcc_detail(N_states, N_det_generators+1), comb(N_det_generators), computed(N_det_generators), tbc(0:size_tbc)) - sumabove = 0d0 - sum2above = 0d0 - Nabove = 0d0 - - provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral mrcc_weight psi_selectors - - computed = .false. - - tbc(0) = first_det_of_comb - 1 - do i=1, tbc(0) - tbc(i) = i - computed(i) = .true. - end do - - mrcc_detail = 0d0 - generator_per_task = 1 - print *, '========== ================= ================= =================' - print *, ' Samples Energy Stat. Error Seconds ' - print *, '========== ================= ================= =================' - - call new_parallel_job(zmq_to_qp_run_socket,'mrcc') - call zmq_put_psi(zmq_to_qp_run_socket,1,mrcc_e0_denominator,size(mrcc_e0_denominator)) - - Ncomb=size(comb) - call get_carlo_workbatch(computed, comb, Ncomb, tbc) - - do i=1,comb_teeth - print *, "TOOTH", first_det_of_teeth(i+1) - first_det_of_teeth(i) - end do - !stop - - integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket - integer :: ipos - ipos=1 - do i=1,tbc(0) - if(tbc(i) > fragment_first) then - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, tbc(i) + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + integer :: ipos + ipos=1 + do i=1,N_mrcc_jobs + if(mrcc_jobs(i) > fragment_first) then + write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, mrcc_jobs(i) + ipos += 20 + if (ipos > 63980) then + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) + ipos=1 + endif + else + do j=1,fragment_count + write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, mrcc_jobs(i) ipos += 20 if (ipos > 63980) then call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) ipos=1 endif - else - do j=1,fragment_count - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, tbc(i) - ipos += 20 - if (ipos > 63980) then - call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) - ipos=1 - endif - end do - end if - end do - if (ipos > 1) then - call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) - endif - - call zmq_set_running(zmq_to_qp_run_socket) - - !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc+1) & - !$OMP PRIVATE(i) - i = omp_get_thread_num() - if (i==0) then - call mrcc_collector(E, db, tbc, comb, Ncomb, computed, mrcc_detail, sumabove, sum2above, Nabove, relative_error, mrcc) - else - call mrcc_slave_inproc(i) - endif - !$OMP END PARALLEL - call end_parallel_job(zmq_to_qp_run_socket, 'mrcc') - - print *, '========== ================= ================= =================' - - !endif - call flush_dress_buffer(db(1), delta) - call flush_dress_buffer(db(2), delta_s2) - deallocate(db(1)%buf) - deallocate(db(2)%buf) - -end subroutine - - -subroutine do_carlo(tbc, Ncomb, comb, mrcc_detail, db, computed, sumabove, sum2above, sumin, isincomb, Nabove) - use dress_types - implicit none - - integer, intent(in) :: tbc(0:size_tbc), Ncomb - logical, intent(in) :: computed(N_det_generators) - double precision, intent(in) :: comb(Ncomb), mrcc_detail(N_states, N_det_generators) - double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), sumin(comb_teeth), Nabove(comb_teeth) - double precision, intent(inout) :: isincomb(N_det_generators) - type(dress_buffer), intent(inout) :: db(2) - integer :: i, j, dets(comb_teeth) - double precision :: myVal, myVal2, myValcur - - mainLoop : do i=1,Ncomb - call get_comb(comb(i), dets, comb_teeth) - do j=1,comb_teeth - if(.not.(computed(dets(j)))) then - exit mainLoop - end if - end do - - myVal = 0d0 - myVal2 = 0d0 - do j=comb_teeth,1,-1 - isincomb(dets(j)) = 1d0 - myValcur = mrcc_detail(1, dets(j)) * mrcc_weight_inv(dets(j)) * comb_step - sumin(j) += myValcur**2 - myVal += myValcur - sumabove(j) += myVal - call dress_buffer_drew(db(1), dets(j), mrcc_weight_inv(dets(j)) * comb_step) - call dress_buffer_drew(db(2), dets(j), mrcc_weight_inv(dets(j)) * comb_step) - sum2above(j) += myVal*myVal - Nabove(j) += 1 - end do - db(1)%N += 1d0 - db(2)%N += 1d0 - end do mainLoop + end do + end if + end do + if (ipos > 1) then + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) + endif + + call zmq_set_running(zmq_to_qp_run_socket) + + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc+1) & + !$OMP PRIVATE(i) + i = omp_get_thread_num() + if (i==0) then + call mrcc_collector(E, relative_error, delta, delta_s2, mrcc) + else + call mrcc_slave_inproc(i) + endif + !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, 'mrcc') + + print *, '========== ================= ================= =================' end subroutine @@ -168,66 +95,54 @@ subroutine mrcc_slave_inproc(i) end -subroutine mrcc_collector(E, db, tbc, comb, Ncomb, computed, mrcc_detail, sumabove, sum2above, Nabove, relative_error, mrcc) +subroutine mrcc_collector(E, relative_error, delta, delta_s2, mrcc) use dress_types use f77_zmq use bitmasks implicit none - integer, intent(in) :: Ncomb - double precision, intent(inout) :: mrcc_detail(N_states, N_det_generators) - type(dress_buffer), intent(inout) :: db(2) - double precision, intent(in) :: comb(Ncomb), relative_error, E - logical, intent(inout) :: computed(N_det_generators) - integer, intent(in) :: tbc(0:size_tbc) - double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) + double precision, intent(in) :: relative_error, E double precision, intent(out) :: mrcc(N_states) - + double precision, allocatable :: cp(:,:,:,:) - double precision, allocatable :: mrcc_mwen(:,:) + double precision, intent(out) :: delta(N_states, N_det_non_ref) + double precision, intent(out) :: delta_s2(N_states, N_det_non_ref) + double precision, allocatable :: delta_loc(:,:,:), delta_det(:,:,:,:) + double precision, allocatable :: mrcc_detail(:,:) + double precision :: mrcc_mwen(N_states) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket integer(ZMQ_PTR), external :: new_zmq_pull_socket integer(ZMQ_PTR) :: zmq_socket_pull - integer :: msg_size, rc, more - integer :: acc, i, j, robin, N, ntask - integer(bit_kind), allocatable :: det(:,:,:) + integer :: more + integer :: i, j, k, i_state, N, ntask integer, allocatable :: task_id(:) integer :: Nindex integer, allocatable :: ind(:) double precision, save :: time0 = -1.d0 - double precision :: time, timeLast, Nabove_old + double precision :: time, timeLast, old_tooth double precision, external :: omp_get_wtime - integer :: tooth, firstTBDcomb, orgTBDcomb + integer :: cur_cp, old_cur_cp integer, allocatable :: parts_to_get(:) logical, allocatable :: actually_computed(:) - double precision :: ncomputed integer :: total_computed - double precision :: sumin(comb_teeth) - double precision :: isincomb(N_det_generators) - - print *, "collecture" - - ncomputed = 0d0 + allocate(delta_det(N_states, N_det_non_ref, 0:comb_teeth, 2)) + allocate(cp(N_states, N_det_non_ref, N_cp, 2), mrcc_detail(N_states, N_det_generators)) + allocate(delta_loc(N_states, N_det_non_ref, 2)) + mrcc_detail = 0d0 + delta_det = 0d0 + !mrcc_detail = mrcc_detail / 0d0 + cp = 0d0 total_computed = 0 - sumin = 0d0 - isincomb = 0d0 - character*(512) :: task - Nabove_old = -1.d0 - allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators), & - mrcc_mwen(N_states, N_det_generators) ) - mrcc_mwen(1:N_states, 1:N_det_generators) =0.d0 - - - do i=1,N_det_generators - actually_computed(i) = computed(i) - enddo + allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators)) + + mrcc_mwen =0.d0 parts_to_get(:) = 1 if(fragment_first > 0) then @@ -236,176 +151,118 @@ subroutine mrcc_collector(E, db, tbc, comb, Ncomb, computed, mrcc_detail, sumabo enddo endif - do i=1,tbc(0) - actually_computed(tbc(i)) = .false. - end do - - orgTBDcomb = int(Nabove(1)) - firstTBDcomb = 1 + actually_computed = .false. zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_pull = new_zmq_pull_socket() - allocate(task_id(N_det_generators), ind(db(1)%N_slot)) + allocate(task_id(N_det_generators), ind(1)) more = 1 if (time0 < 0.d0) then call wall_time(time0) endif timeLast = time0 - - call get_first_tooth(actually_computed, tooth) - Nabove_old = Nabove(tooth) - db(1)%free_under = first_det_of_teeth(1)-1 - db(2)%free_under = first_det_of_teeth(1)-1 - + cur_cp = 0 + old_cur_cp = 0 pullLoop : do while (more == 1) + call pull_mrcc_results(zmq_socket_pull, Nindex, ind, mrcc_mwen, delta_loc, task_id, ntask) + + if(Nindex /= 1) stop "tried pull multiple Nindex" - call pull_mrcc_results(zmq_socket_pull, Nindex, ind, mrcc_mwen, db, task_id, ntask) do i=1,Nindex - mrcc_detail(1:N_states, ind(i)) += mrcc_mwen(1:N_states,i) + mrcc_detail(:, ind(i)) += mrcc_mwen(:) + do j=1,N_cp !! optimizable + if(cps(ind(i), j) > 0d0) then + if(tooth_of_det(ind(i)) < cp_first_tooth(j)) stop "coef on supposedely deterministic det" + double precision :: fac + fac = cps(ind(i), j) / cps_N(j) * mrcc_weight_inv(ind(i)) * comb_step + do k=1,N_det_non_ref + do i_state=1,N_states + cp(i_state,k,j,1) += delta_loc(i_state,k,1) * fac + cp(i_state,k,j,2) += delta_loc(i_state,k,2) * fac + end do + end do + end if + end do + delta_det(:,:,tooth_of_det(ind(i)), 1) += delta_loc(:,:,1) + delta_det(:,:,tooth_of_det(ind(i)), 2) += delta_loc(:,:,2) parts_to_get(ind(i)) -= 1 - if(parts_to_get(ind(i)) < 0) then - print *, i, ind(i), parts_to_get(ind(i)), Nindex - print *, "PARTS ??" - print *, parts_to_get - stop "PARTS ??" - end if if(parts_to_get(ind(i)) == 0) then - !print *, "COMPUTED", ind(i) actually_computed(ind(i)) = .true. + !print *, "CONTRIB", ind(i), psi_non_ref_coef(ind(i),1), mrcc_detail(1, ind(i)) total_computed += 1 end if end do do i=1, ntask - if(task_id(i) == 0) then - print *, "Error in collector" - endif call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) end do - - call get_first_tooth(actually_computed, tooth) - db(1)%free_under = first_det_of_teeth(tooth)-1 - db(2)%free_under = first_det_of_teeth(tooth)-1 time = omp_get_wtime() + + if(time - timeLast > 1d0 .or. more /= 1) then timeLast = time - do i=1, first_det_of_teeth(1)-1 - if(.not.(actually_computed(i))) then - cycle pullLoop + cur_cp = N_cp + if(.not. actually_computed(1)) cycle pullLoop + + do i=2,N_det_generators + if(.not. actually_computed(i)) then + cur_cp = done_cp_at(i-1) + exit end if end do + if(cur_cp == 0) cycle pullLoop + + !!!!!!!!!!!! + double precision :: su, su2, eqt, avg, E0, val + su = 0d0 + su2 = 0d0 + + if(N_states > 1) stop "mrcc_stoch : N_states == 1" + do i=1, int(cps_N(cur_cp)) + !if(.not. actually_computed(i)) stop "not computed" + call get_comb_val(comb(i), mrcc_detail, cp_first_tooth(cur_cp), val) + !val = mrcc_detail(1, i) * mrcc_weight_inv(i) * comb_step + su += val ! cps(i, cur_cp) * val + su2 += val**2 ! cps(i, cur_cp) * val**2 + end do + avg = su / cps_N(cur_cp) + eqt = dsqrt( ((su2 / cps_N(cur_cp)) - avg**2) / cps_N(cur_cp) ) + E0 = sum(mrcc_detail(1, :first_det_of_teeth(cp_first_tooth(cur_cp))-1)) + - double precision :: E0, fco, ffco, avg, tavg, mavg, var, su, su2, meqt, eqt, prop - if(firstTBDcomb <= size(comb)) then - call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), mrcc_detail, db, actually_computed, sumabove, sum2above, sumin, isincomb, Nabove) - firstTBDcomb = int(Nabove(1)) - orgTBDcomb + 1 - end if - if(Nabove(1) < 5d0) cycle - E0 = sum(mrcc_detail(1,:first_det_of_teeth(tooth)-1)) - if (tooth <= comb_teeth) then - prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - mrcc_cweight(first_det_of_teeth(tooth)-1)) - prop = prop * mrcc_weight_inv(first_det_of_teeth(tooth)) - E0 += mrcc_detail(1,first_det_of_teeth(tooth)) * prop - avg = E0 + (sumabove(tooth) / Nabove(tooth)) - eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) - else - eqt = 0.d0 - endif call wall_time(time) - !if (dabs(eqt/avg) < relative_error) then - if (dabs(eqt) < relative_error .or. total_computed == N_det_generators) then + if (dabs(eqt) < relative_error .or. total_computed >= N_det_generators) then ! Termination - print *, "converged", Nabove(1), first_det_of_teeth(tooth) - !mrcc(1) = avg+E - print '(A7, G10.3, 2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', "GREPME", Nabove(tooth), E+avg, eqt, time-time0, '' + !print '(G10.3, 2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' + print *, "GREPME", cur_cp, E+E0+avg, eqt, time-time0, total_computed call zmq_abort(zmq_to_qp_run_socket) else - if (Nabove(tooth) > Nabove_old) then - meqt = 0d0 - ncomputed = 0 - mavg = 0d0 - do i=tooth, comb_teeth - !do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1 - ! if(actually_computed(j)) ncomputed(i) = ncomputed(i) + 1 - !end do - fco = 0d0 - ffco = 0d0 - do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1 - fco += isincomb(j) * mrcc_weight(j) - ffco += mrcc_weight(j) - end do - ncomputed = sum(isincomb(first_det_of_teeth(i):first_det_of_teeth(i+1)-1)) - - !if(i /= comb_teeth) then - ! var = abs((sumin(i))/Nabove(i) - ((sumabove(i)-sumabove(i+1))/Nabove(i))**2) / (Nabove(i)-1) - !else - ! var = abs((sumin(i))/Nabove(i) - ((sumabove(i))/Nabove(i))**2) / (Nabove(i)-1) - !end if - n = first_det_of_teeth(i+1) - first_det_of_teeth(i) - - - !var = var * ((ffco-fco) /ffco) !((dfloat(n)-ncomputed) / dfloat(n))**2 - !eqt += var / (Nabove(i)-1) - - tavg = 0d0 - ncomputed = 0d0 - su = 0d0 - su2 = 0d0 - do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1 - if(actually_computed(j)) then - tavg += (mrcc_detail(1, j))! * isincomb(j) - ncomputed += 1d0 !isincomb(j) - su2 += (mrcc_detail(1, j))**2 - end if - end do - if(ncomputed /= 0) then - tavg = tavg / ncomputed * dfloat(n) - su2 = su2 / ncomputed * dfloat(n) - var = (su2 - (tavg**2)) / ncomputed - else - print *, "ZERO NCOMPUTED" - tavg = 0d0 - su2 = 0d0 - var = 0d0 - end if - - !fco = ffco - !if(i /= comb_teeth) then - ! mavg += (tavg) + (((sumabove(i)-sumabove(i+1))/Nabove(i)) * (ffco-fco)) & - ! / ffco - !else - ! mavg += (tavg) + (((sumabove(i))/Nabove(i)) * (ffco-fco)) & - ! / ffco - !end if - - - var = var * ((dfloat(n)-ncomputed) / dfloat(n)) - meqt += var - mavg += tavg - end do - meqt = sqrt(abs(meqt)) - Nabove_old = Nabove(tooth) - - print '(G10.3, 2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' - !print *, "GREPME", avg, eqt, mavg+E0, meqt + if (cur_cp > old_cur_cp) then + old_cur_cp = cur_cp + print *, "GREPME", cur_cp, E+E0+avg, eqt, time-time0, total_computed + !print '(G10.3, 2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' endif endif end if end do pullLoop - - E0 = sum(mrcc_detail(1,:first_det_of_teeth(tooth)-1)) - prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - mrcc_cweight(first_det_of_teeth(tooth)-1)) - prop = prop * mrcc_weight_inv(first_det_of_teeth(tooth)) - E0 += mrcc_detail(1,first_det_of_teeth(tooth)) * prop - mrcc(1) = E + E0 + (sumabove(tooth) / Nabove(tooth)) + + delta = cp(:,:,cur_cp,1) + delta_s2 = cp(:,:,cur_cp,2) + + do i=0, cp_first_tooth(cur_cp)-1 + delta += delta_det(:,:,i,1) + delta_s2 += delta_det(:,:,i,2) + end do + mrcc(1) = E call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_pull_socket(zmq_socket_pull) end subroutine + integer function mrcc_find(v, w, sze, imin, imax) implicit none integer, intent(in) :: sze, imin, imax @@ -433,81 +290,127 @@ integer function mrcc_find(v, w, sze, imin, imax) end function -BEGIN_PROVIDER [ integer, comb_teeth ] + BEGIN_PROVIDER [ integer, comb_per_cp ] +&BEGIN_PROVIDER [ integer, comb_teeth ] +&BEGIN_PROVIDER [ integer, N_cps_max ] implicit none - comb_teeth = 10 + comb_teeth = 16 + comb_per_cp = 64 + N_cps_max = N_det_generators / comb_per_cp + 1 END_PROVIDER - -subroutine get_first_tooth(computed, first_teeth) + BEGIN_PROVIDER [ integer, N_cp ] +&BEGIN_PROVIDER [ double precision, cps_N, (N_cps_max) ] +&BEGIN_PROVIDER [ integer, cp_first_tooth, (N_cps_max) ] +&BEGIN_PROVIDER [ integer, done_cp_at, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, cps, (N_det_generators, N_cps_max) ] +&BEGIN_PROVIDER [ integer, N_mrcc_jobs ] +&BEGIN_PROVIDER [ integer, mrcc_jobs, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, comb, (N_det_generators) ] +! subroutine get_carlo_workbatch(Ncp, tbc, cps, done_cp_at) implicit none - logical, intent(in) :: computed(N_det_generators) - integer, intent(out) :: first_teeth - integer :: i, first_det - - first_det = 1 - first_teeth = 1 - do i=first_det_of_comb, N_det_generators - if(.not.(computed(i))) then - first_det = i - exit - end if + logical, allocatable :: computed(:) + integer :: i, j, last_full, dets(comb_teeth) + integer :: k, l, cur_cp, under_det(comb_teeth+1) + + allocate(computed(N_det_generators)) + cps = 0d0 + cur_cp = 1 + done_cp_at = 0 + + computed = .false. + + N_mrcc_jobs = first_det_of_comb - 1 + do i=1, N_mrcc_jobs + mrcc_jobs(i) = i + computed(i) = .true. end do - do i=comb_teeth, 1, -1 - if(first_det_of_teeth(i) < first_det) then - first_teeth = i - exit - end if - end do - -end subroutine - - -BEGIN_PROVIDER [ integer, size_tbc ] - implicit none - BEGIN_DOC -! Size of the tbc array - END_DOC - size_tbc = (comb_teeth+1)*N_det_generators + fragment_count*fragment_first -END_PROVIDER - -subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc) - implicit none - integer, intent(inout) :: Ncomb - double precision, intent(out) :: comb(Ncomb) - integer, intent(inout) :: tbc(0:size_tbc) - logical, intent(inout) :: computed(N_det_generators) - integer :: i, j, last_full, dets(comb_teeth) - integer :: icount, n - integer :: k, l l=first_det_of_comb call RANDOM_NUMBER(comb) - do i=1,size(comb) + do i=1,N_det_generators comb(i) = comb(i) * comb_step !DIR$ FORCEINLINE - call add_comb(comb(i), computed, tbc, size_tbc, comb_teeth) - Ncomb = i - if (tbc(0) == N_det_generators) return + call add_comb(comb(i), computed, cps(1, cur_cp), N_mrcc_jobs, mrcc_jobs) + + if(mod(i, comb_per_cp) == 0 .or. N_mrcc_jobs == N_det_generators) then + cps(:, cur_cp+1) = cps(:, cur_cp) + !cps(:, cur_cp) = cps(:, cur_cp) / dfloat(i) + + done_cp_at(N_mrcc_jobs) = cur_cp + cps_N(cur_cp) = dfloat(i) + cur_cp += 1 + if (N_mrcc_jobs == N_det_generators) exit + end if do while (computed(l)) l=l+1 enddo - k=tbc(0)+1 - tbc(k) = l + k=N_mrcc_jobs+1 + mrcc_jobs(k) = l computed(l) = .True. - tbc(0) = k + N_mrcc_jobs = k enddo + N_cp = cur_cp + + if(N_mrcc_jobs /= N_det_generators) then + stop "carlo workcarlo_workbatch" + end if + + cur_cp = 0 + do i=1,N_mrcc_jobs + if(done_cp_at(i) /= 0) cur_cp = done_cp_at(i) + done_cp_at(i) = cur_cp + end do + + under_det = 0 + cp_first_tooth = 0 + do i=1,N_mrcc_jobs + do j=comb_teeth+1,1,-1 + if(mrcc_jobs(i) <= first_det_of_teeth(j)) then + under_det(j) = under_det(j) + 1 + if(under_det(j) == first_det_of_teeth(j)) then + do l=done_cp_at(i)+1, N_cp + cps(:first_det_of_teeth(j)-1, l) = 0d0 + cp_first_tooth(l) = j + end do + end if + else + exit + end if + end do + end do +! end subroutine +END_PROVIDER + + +subroutine get_comb_val(stato, detail, first, val) + implicit none + integer, intent(in) :: first + double precision, intent(in) :: stato, detail(N_states, N_det_generators) + double precision, intent(out) :: val + double precision :: curs + integer :: j, k + integer, external :: mrcc_find + + curs = 1d0 - stato + val = 0d0 + + do j = comb_teeth, 1, -1 + !DIR$ FORCEINLINE + k = mrcc_find(curs, mrcc_cweight,size(mrcc_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) + if(k < first_det_of_teeth(first)) exit + val += detail(1, k) * mrcc_weight_inv(k) * comb_step + curs -= comb_step + end do end subroutine - -subroutine get_comb(stato, dets, ct) +subroutine get_comb(stato, dets) implicit none - integer, intent(in) :: ct double precision, intent(in) :: stato - integer, intent(out) :: dets(ct) + integer, intent(out) :: dets(comb_teeth) double precision :: curs integer :: j integer, external :: mrcc_find @@ -521,37 +424,41 @@ subroutine get_comb(stato, dets, ct) end subroutine -subroutine add_comb(comb, computed, tbc, stbc, ct) +subroutine add_comb(com, computed, cp, N, tbc) implicit none - integer, intent(in) :: stbc, ct - double precision, intent(in) :: comb + double precision, intent(in) :: com + integer, intent(inout) :: N + double precision, intent(inout) :: cp(N_det_non_ref) logical, intent(inout) :: computed(N_det_generators) - integer, intent(inout) :: tbc(0:stbc) - integer :: i, k, l, dets(ct) + integer, intent(inout) :: tbc(N_det_generators) + integer :: i, k, l, dets(comb_teeth) !DIR$ FORCEINLINE - call get_comb(comb, dets, ct) + call get_comb(com, dets) - k=tbc(0)+1 - do i = 1, ct + k=N+1 + do i = 1, comb_teeth l = dets(i) + cp(l) += 1d0 ! mrcc_weight_inv(l) * comb_step if(.not.(computed(l))) then tbc(k) = l k = k+1 computed(l) = .true. end if end do - tbc(0) = k-1 + N = k-1 end subroutine BEGIN_PROVIDER [ double precision, mrcc_weight, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, mrcc_weight_inv, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, mrcc_cweight, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, mrcc_cweight_cache, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, comb_step ] &BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] &BEGIN_PROVIDER [ integer, first_det_of_comb ] +&BEGIN_PROVIDER [ integer, tooth_of_det, (N_det_generators) ] implicit none integer :: i double precision :: norm_left, stato @@ -608,46 +515,20 @@ end subroutine first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 first_det_of_teeth(1) = first_det_of_comb - !do i=1,comb_teeth - ! mrcc_weight(first_det_of_teeth(i):first_det_of_teeth(i+1)-1) = & - ! (mrcc_cweight(first_det_of_teeth(i+1)-1)-mrcc_cweight(first_det_of_teeth(i)-1)) / & - ! dfloat(first_det_of_teeth(i+1)-first_det_of_teeth(i)) - !end do - - !mrcc_cweight = 0d0 - !mrcc_cweight(N_det_generators) = mrcc_weight(N_det_generators) - !do i=N_det_generators-1,1,-1 - ! mrcc_cweight(i) = mrcc_weight(i) + mrcc_cweight(i+1) - !end do - - !do i=1,N_det_generators - ! mrcc_weight(i) = mrcc_weight(i) / mrcc_cweight(1) - ! mrcc_cweight(i) = mrcc_cweight(i) / mrcc_cweight(1) - ! enddo - - !do i=1,N_det_generators-1 - ! mrcc_cweight(i) = 1.d0 - mrcc_cweight(i+1) - !end do - !mrcc_cweight(N_det_generators) = 1.d0 - if(first_det_of_teeth(1) /= first_det_of_comb) then print *, 'Error in ', irp_here stop "comb provider" endif -END_PROVIDER - -BEGIN_PROVIDER [ double precision, mrcc_weight_inv, (N_det_generators) ] - implicit none - BEGIN_DOC -! Inverse of mrcc_weight array - END_DOC - integer :: i do i=1,N_det_generators mrcc_weight_inv(i) = 1.d0/mrcc_weight(i) enddo + tooth_of_det(:first_det_of_teeth(1)-1) = 0 + do i=1,comb_teeth + tooth_of_det(first_det_of_teeth(i):first_det_of_teeth(i+1)-1) = i + end do END_PROVIDER @@ -655,3 +536,4 @@ END_PROVIDER + diff --git a/plugins/mrcepa0/run_mrcc_slave.irp.f b/plugins/mrcepa0/run_mrcc_slave.irp.f index c7f61822..cf87a7ea 100644 --- a/plugins/mrcepa0/run_mrcc_slave.irp.f +++ b/plugins/mrcepa0/run_mrcc_slave.irp.f @@ -4,7 +4,7 @@ BEGIN_PROVIDER [ integer, fragment_count ] BEGIN_DOC ! Number of fragments for the deterministic part END_DOC - fragment_count = 1 ! (elec_alpha_num-n_core_orb)**2 + fragment_count = 1 !! (elec_alpha_num-n_core_orb)**2 END_PROVIDER @@ -37,16 +37,16 @@ subroutine run_mrcc_slave(thread,iproc,energy) double precision,allocatable :: delta_ij_loc(:,:,:) double precision,allocatable :: delta_ii_loc(:,:) - double precision,allocatable :: delta_ij_s2_loc(:,:,:) - double precision,allocatable :: delta_ii_s2_loc(:,:) + !double precision,allocatable :: delta_ij_s2_loc(:,:,:) + !double precision,allocatable :: delta_ii_s2_loc(:,:) integer :: h,p,n logical :: ok double precision :: contrib(N_states) - allocate(delta_ij_loc(N_states,N_det_non_ref,N_det_ref) & - ,delta_ii_loc(N_states,N_det_ref) & - ,delta_ij_s2_loc(N_states,N_det_non_ref,N_det_ref) & - ,delta_ii_s2_loc(N_states, N_det_ref)) + allocate(delta_ij_loc(N_states,N_det_non_ref,2) & + ,delta_ii_loc(N_states,2))! & + !,delta_ij_s2_loc(N_states,N_det_non_ref,N_det_ref) & + !,delta_ii_s2_loc(N_states, N_det_ref)) allocate(abuf(N_int, 2, N_det_non_ref)) @@ -84,8 +84,8 @@ subroutine run_mrcc_slave(thread,iproc,energy) i_generator = ind(i_i_generator) delta_ij_loc = 0d0 delta_ii_loc = 0d0 - delta_ij_s2_loc = 0d0 - delta_ii_s2_loc = 0d0 + !delta_ij_s2_loc = 0d0 + !delta_ii_s2_loc = 0d0 !call select_connected(i_generator,energy,mrcc_detail(1, i_i_generator),buf,subset) !!!!!!!!!!!!!!!!!!!!!! @@ -104,7 +104,7 @@ subroutine run_mrcc_slave(thread,iproc,energy) n = n - 1 if(n /= 0) then - call mrcc_part_dress(delta_ij_loc, delta_ii_loc, delta_ij_s2_loc, delta_ii_s2_loc, & + call mrcc_part_dress_1c(delta_ij_loc(1,1,1), delta_ii_loc(1,1), delta_ij_loc(1,1,2), delta_ii_loc(1,2), & i_generator,n,abuf,N_int,omask,1d0,contrib) endif end do @@ -121,7 +121,7 @@ subroutine run_mrcc_slave(thread,iproc,energy) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) end do if(ctask > 0) then - call push_mrcc_results(zmq_socket_push, Nindex, ind, mrcc_detail, delta_ij_loc, delta_ij_s2_loc, task_id(1), ctask) + call push_mrcc_results(zmq_socket_push, Nindex, ind, mrcc_detail, delta_ij_loc(1,1,1), task_id(1), ctask) mrcc_detail(:,:Nindex) = 0d0 ! buf%cur = 0 end if @@ -138,17 +138,18 @@ subroutine run_mrcc_slave(thread,iproc,energy) end subroutine -subroutine push_mrcc_results(zmq_socket_push, N, ind, mrcc_detail, mrcc_dress, mrcc_s2_dress, task_id, ntask) +subroutine push_mrcc_results(zmq_socket_push, N, ind, mrcc_detail, delta_loc, task_id, ntask) use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_push double precision, intent(in) :: mrcc_detail(N_states, N_det_generators) - double precision, intent(in) :: mrcc_dress(N_states, N_det_non_ref, *) - double precision, intent(in) :: mrcc_s2_dress(N_states, N_det_non_ref, *) + double precision, intent(in) :: delta_loc(N_states, N_det_non_ref, 2) integer, intent(in) :: ntask, N, ind(*), task_id(*) integer :: rc, i + if(N/=1) stop "mrcc_stoch, tried to push N>1" + rc = f77_zmq_send( zmq_socket_push, N, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" @@ -159,10 +160,10 @@ subroutine push_mrcc_results(zmq_socket_push, N, ind, mrcc_detail, mrcc_dress, m rc = f77_zmq_send( zmq_socket_push, mrcc_detail, 8*N_states*N, ZMQ_SNDMORE) if(rc /= 8*N_states*N) stop "push" - rc = f77_zmq_send( zmq_socket_push, mrcc_dress, 8*N_states*N_det_non_ref*N, ZMQ_SNDMORE) + rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,1), 8*N_states*N_det_non_ref*N, ZMQ_SNDMORE) if(rc /= 8*N_states*N_det_non_ref*N) stop "push" - rc = f77_zmq_send( zmq_socket_push, mrcc_s2_dress, 8*N_states*N_det_non_ref*N, ZMQ_SNDMORE) + rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,2), 8*N_states*N_det_non_ref*N, ZMQ_SNDMORE) if(rc /= 8*N_states*N_det_non_ref*N) stop "push" @@ -182,14 +183,14 @@ IRP_ENDIF end subroutine -subroutine pull_mrcc_results(zmq_socket_pull, N, ind, mrcc_detail, mrcc_dress, task_id, ntask) +subroutine pull_mrcc_results(zmq_socket_pull, N, ind, mrcc_detail, delta_loc, task_id, ntask) use dress_types use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - double precision, intent(inout) :: mrcc_detail(N_states, N_det_generators) + double precision, intent(inout) :: mrcc_detail(N_states) + double precision, intent(inout) :: delta_loc(N_states, N_det_non_ref, 2) double precision, allocatable :: mrcc_dress_mwen(:,:) - type(dress_buffer), intent(inout) :: mrcc_dress(2) integer, intent(out) :: ind(*) integer, intent(out) :: N, ntask, task_id(*) integer :: rc, rn, i @@ -198,6 +199,7 @@ subroutine pull_mrcc_results(zmq_socket_pull, N, ind, mrcc_detail, mrcc_dress, t rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0) if(rc /= 4) stop "pull" + if(N /= 1) stop "mrcc : pull with N>1 not supported" rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) if(rc /= 4*N) stop "pull" @@ -205,17 +207,17 @@ subroutine pull_mrcc_results(zmq_socket_pull, N, ind, mrcc_detail, mrcc_dress, t rc = f77_zmq_recv( zmq_socket_pull, mrcc_detail, N_states*8*N, 0) if(rc /= 8*N_states*N) stop "pull" - do i=1,N - rc = f77_zmq_recv( zmq_socket_pull, mrcc_dress_mwen, N_states*8*N_det_non_ref, 0) + !do i=1,N + rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,1), N_states*8*N_det_non_ref, 0) if(rc /= 8*N_states*N_det_non_ref) stop "pull" - call add_to_dress_buffer(mrcc_dress(1), ind(i), mrcc_dress_mwen) - end do + !call add_to_dress_buffer(mrcc_dress(1), ind(i), mrcc_dress_mwen) + !end do - do i=1,N - rc = f77_zmq_recv( zmq_socket_pull, mrcc_dress_mwen, N_states*8*N_det_non_ref, 0) + !do i=1,N + rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,2), N_states*8*N_det_non_ref, 0) if(rc /= 8*N_states*N_det_non_ref) stop "pull" - call add_to_dress_buffer(mrcc_dress(2), ind(i), mrcc_dress_mwen) - end do + !call add_to_dress_buffer(mrcc_dress(2), ind(i), mrcc_dress_mwen) + !end do rc = f77_zmq_recv( zmq_socket_pull, ntask, 4, 0) if(rc /= 4) stop "pull" From db5d32906276f65de3ba644d5fabe85522612725 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 7 Nov 2017 11:46:39 +0100 Subject: [PATCH 09/17] corrected small bias - better balanced checkpoints --- plugins/mrcepa0/mrcc_stoch_routines.irp.f | 90 +++++++++++++++++------ 1 file changed, 66 insertions(+), 24 deletions(-) diff --git a/plugins/mrcepa0/mrcc_stoch_routines.irp.f b/plugins/mrcepa0/mrcc_stoch_routines.irp.f index 769ab2e8..d6c98cb7 100644 --- a/plugins/mrcepa0/mrcc_stoch_routines.irp.f +++ b/plugins/mrcepa0/mrcc_stoch_routines.irp.f @@ -130,7 +130,7 @@ subroutine mrcc_collector(E, relative_error, delta, delta_s2, mrcc) logical, allocatable :: actually_computed(:) integer :: total_computed - allocate(delta_det(N_states, N_det_non_ref, 0:comb_teeth, 2)) + allocate(delta_det(N_states, N_det_non_ref, 0:comb_teeth+1, 2)) allocate(cp(N_states, N_det_non_ref, N_cp, 2), mrcc_detail(N_states, N_det_generators)) allocate(delta_loc(N_states, N_det_non_ref, 2)) mrcc_detail = 0d0 @@ -174,6 +174,8 @@ subroutine mrcc_collector(E, relative_error, delta, delta_s2, mrcc) if(cps(ind(i), j) > 0d0) then if(tooth_of_det(ind(i)) < cp_first_tooth(j)) stop "coef on supposedely deterministic det" double precision :: fac + integer :: toothMwen + logical :: fracted fac = cps(ind(i), j) / cps_N(j) * mrcc_weight_inv(ind(i)) * comb_step do k=1,N_det_non_ref do i_state=1,N_states @@ -183,8 +185,19 @@ subroutine mrcc_collector(E, relative_error, delta, delta_s2, mrcc) end do end if end do - delta_det(:,:,tooth_of_det(ind(i)), 1) += delta_loc(:,:,1) - delta_det(:,:,tooth_of_det(ind(i)), 2) += delta_loc(:,:,2) + toothMwen = tooth_of_det(ind(i)) + fracted = (toothMwen /= 0) + if(fracted) fracted = (ind(i) == first_det_of_teeth(toothMwen)) + + if(fracted) then + delta_det(:,:,toothMwen-1, 1) += delta_loc(:,:,1) * (1d0-fractage(toothMwen)) + delta_det(:,:,toothMwen-1, 2) += delta_loc(:,:,2) * (1d0-fractage(toothMwen)) + delta_det(:,:,toothMwen, 1) += delta_loc(:,:,1) * (fractage(toothMwen)) + delta_det(:,:,toothMwen, 2) += delta_loc(:,:,2) * (fractage(toothMwen)) + else + delta_det(:,:,toothMwen, 1) += delta_loc(:,:,1) + delta_det(:,:,toothMwen, 2) += delta_loc(:,:,2) + end if parts_to_get(ind(i)) -= 1 if(parts_to_get(ind(i)) == 0) then @@ -223,7 +236,8 @@ subroutine mrcc_collector(E, relative_error, delta, delta_s2, mrcc) if(N_states > 1) stop "mrcc_stoch : N_states == 1" do i=1, int(cps_N(cur_cp)) !if(.not. actually_computed(i)) stop "not computed" - call get_comb_val(comb(i), mrcc_detail, cp_first_tooth(cur_cp), val) + !call get_comb_val(comb(i), mrcc_detail, cp_first_tooth(cur_cp), val) + call get_comb_val(comb(i), mrcc_detail, cur_cp, val) !val = mrcc_detail(1, i) * mrcc_weight_inv(i) * comb_step su += val ! cps(i, cur_cp) * val su2 += val**2 ! cps(i, cur_cp) * val**2 @@ -231,10 +245,11 @@ subroutine mrcc_collector(E, relative_error, delta, delta_s2, mrcc) avg = su / cps_N(cur_cp) eqt = dsqrt( ((su2 / cps_N(cur_cp)) - avg**2) / cps_N(cur_cp) ) E0 = sum(mrcc_detail(1, :first_det_of_teeth(cp_first_tooth(cur_cp))-1)) - - + if(cp_first_tooth(cur_cp) <= comb_teeth) then + E0 = E0 + mrcc_detail(1, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp))) + end if call wall_time(time) - if (dabs(eqt) < relative_error .or. total_computed >= N_det_generators) then + if ((dabs(eqt) < relative_error*0d0 .and. cps_N(cur_cp) >= 30) .or. total_computed >= N_det_generators) then ! Termination !print '(G10.3, 2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' print *, "GREPME", cur_cp, E+E0+avg, eqt, time-time0, total_computed @@ -252,7 +267,7 @@ subroutine mrcc_collector(E, relative_error, delta, delta_s2, mrcc) delta = cp(:,:,cur_cp,1) delta_s2 = cp(:,:,cur_cp,2) - do i=0, cp_first_tooth(cur_cp)-1 + do i=cp_first_tooth(cur_cp)-1,0,-1 delta += delta_det(:,:,i,1) delta_s2 += delta_det(:,:,i,2) end do @@ -290,13 +305,16 @@ integer function mrcc_find(v, w, sze, imin, imax) end function - BEGIN_PROVIDER [ integer, comb_per_cp ] + BEGIN_PROVIDER [ integer, gen_per_cp ] &BEGIN_PROVIDER [ integer, comb_teeth ] &BEGIN_PROVIDER [ integer, N_cps_max ] implicit none comb_teeth = 16 - comb_per_cp = 64 - N_cps_max = N_det_generators / comb_per_cp + 1 + N_cps_max = 100 + !comb_per_cp = 64 + gen_per_cp = (N_det_generators / N_cps_max) + 1 + N_cps_max += 1 + !N_cps_max = N_det_generators / comb_per_cp + 1 END_PROVIDER @@ -334,13 +352,16 @@ END_PROVIDER !DIR$ FORCEINLINE call add_comb(comb(i), computed, cps(1, cur_cp), N_mrcc_jobs, mrcc_jobs) - if(mod(i, comb_per_cp) == 0 .or. N_mrcc_jobs == N_det_generators) then - cps(:, cur_cp+1) = cps(:, cur_cp) - !cps(:, cur_cp) = cps(:, cur_cp) / dfloat(i) - + if(N_mrcc_jobs / gen_per_cp > (cur_cp-1) .or. N_mrcc_jobs == N_det_generators) then + !if(mod(i, comb_per_cp) == 0 .or. N_mrcc_jobs == N_det_generators) then done_cp_at(N_mrcc_jobs) = cur_cp cps_N(cur_cp) = dfloat(i) - cur_cp += 1 + if(N_mrcc_jobs /= N_det_generators) then + cps(:, cur_cp+1) = cps(:, cur_cp) + cur_cp += 1 + end if + !cps(:, cur_cp) = cps(:, cur_cp) / dfloat(i) + if (N_mrcc_jobs == N_det_generators) exit end if do while (computed(l)) @@ -352,8 +373,8 @@ END_PROVIDER N_mrcc_jobs = k enddo N_cp = cur_cp - - if(N_mrcc_jobs /= N_det_generators) then + if(N_mrcc_jobs /= N_det_generators .or. N_cp > N_cps_max) then + print *, N_mrcc_jobs, N_det_generators, N_cp, N_cps_max stop "carlo workcarlo_workbatch" end if @@ -370,24 +391,30 @@ END_PROVIDER do j=comb_teeth+1,1,-1 if(mrcc_jobs(i) <= first_det_of_teeth(j)) then under_det(j) = under_det(j) + 1 - if(under_det(j) == first_det_of_teeth(j)) then + if(under_det(j) == first_det_of_teeth(j))then do l=done_cp_at(i)+1, N_cp cps(:first_det_of_teeth(j)-1, l) = 0d0 cp_first_tooth(l) = j end do + cps(first_det_of_teeth(j), done_cp_at(i)+1) = & + cps(first_det_of_teeth(j), done_cp_at(i)+1) * fractage(j) end if else exit end if end do end do + cps(:, N_cp) = 0d0 + cp_first_tooth(N_cp) = comb_teeth+1 + ! end subroutine END_PROVIDER -subroutine get_comb_val(stato, detail, first, val) +subroutine get_comb_val(stato, detail, cur_cp, val) implicit none - integer, intent(in) :: first + integer, intent(in) :: cur_cp + integer :: first double precision, intent(in) :: stato, detail(N_states, N_det_generators) double precision, intent(out) :: val double precision :: curs @@ -396,12 +423,18 @@ subroutine get_comb_val(stato, detail, first, val) curs = 1d0 - stato val = 0d0 + first = cp_first_tooth(cur_cp) - do j = comb_teeth, 1, -1 + do j = comb_teeth, first, -1 !DIR$ FORCEINLINE k = mrcc_find(curs, mrcc_cweight,size(mrcc_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) - if(k < first_det_of_teeth(first)) exit - val += detail(1, k) * mrcc_weight_inv(k) * comb_step + !if(k < first_det_of_teeth(first)) exit + if(k == first_det_of_teeth(first)) then + val += detail(1, k) * mrcc_weight_inv(k) * comb_step * fractage(first) + else + val += detail(1, k) * mrcc_weight_inv(k) * comb_step + end if + curs -= comb_step end do end subroutine @@ -455,6 +488,7 @@ end subroutine &BEGIN_PROVIDER [ double precision, mrcc_weight_inv, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, mrcc_cweight, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, mrcc_cweight_cache, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, fractage, (comb_teeth) ] &BEGIN_PROVIDER [ double precision, comb_step ] &BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] &BEGIN_PROVIDER [ integer, first_det_of_comb ] @@ -510,6 +544,7 @@ end subroutine integer :: iloc iloc = mrcc_find(stato, mrcc_cweight, N_det_generators, 1, iloc) first_det_of_teeth(i) = iloc + fractage(i) = (mrcc_cweight(iloc) - stato) / mrcc_weight(iloc) stato -= comb_step end do first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 @@ -529,6 +564,13 @@ end subroutine do i=1,comb_teeth tooth_of_det(first_det_of_teeth(i):first_det_of_teeth(i+1)-1) = i end do + + !double precision :: cur + !fractage = 1d0 + !do i=1,comb_teeth-1 + ! cur = 1d0 - dfloat(i)*comb_step + + !end do END_PROVIDER From 159b4ce4987297099ff93fea716a73ac12a2b3e3 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 7 Nov 2017 15:04:12 +0100 Subject: [PATCH 10/17] checkpoints were triggered at wrong time --- plugins/mrcepa0/mrcc_stoch_routines.irp.f | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/plugins/mrcepa0/mrcc_stoch_routines.irp.f b/plugins/mrcepa0/mrcc_stoch_routines.irp.f index d6c98cb7..7aa54c36 100644 --- a/plugins/mrcepa0/mrcc_stoch_routines.irp.f +++ b/plugins/mrcepa0/mrcc_stoch_routines.irp.f @@ -218,10 +218,11 @@ subroutine mrcc_collector(E, relative_error, delta, delta_s2, mrcc) if(time - timeLast > 1d0 .or. more /= 1) then timeLast = time cur_cp = N_cp - if(.not. actually_computed(1)) cycle pullLoop + if(.not. actually_computed(mrcc_jobs(1))) cycle pullLoop do i=2,N_det_generators - if(.not. actually_computed(i)) then + if(.not. actually_computed(mrcc_jobs(i))) then + print *, "first not comp", i cur_cp = done_cp_at(i-1) exit end if @@ -249,7 +250,8 @@ subroutine mrcc_collector(E, relative_error, delta, delta_s2, mrcc) E0 = E0 + mrcc_detail(1, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp))) end if call wall_time(time) - if ((dabs(eqt) < relative_error*0d0 .and. cps_N(cur_cp) >= 30) .or. total_computed >= N_det_generators) then + !if ((dabs(eqt) < relative_error*0d0 .and. cps_N(cur_cp) >= 30) .or. total_computed == N_det_generators) then + if(cur_cp > 10) then ! Termination !print '(G10.3, 2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' print *, "GREPME", cur_cp, E+E0+avg, eqt, time-time0, total_computed @@ -331,8 +333,11 @@ END_PROVIDER logical, allocatable :: computed(:) integer :: i, j, last_full, dets(comb_teeth) integer :: k, l, cur_cp, under_det(comb_teeth+1) - + integer, allocatable :: iorder(:), first_cp(:) + + allocate(iorder(N_det_generators), first_cp(N_cps_max+1)) allocate(computed(N_det_generators)) + first_cp = 1 cps = 0d0 cur_cp = 1 done_cp_at = 0 @@ -354,6 +359,7 @@ END_PROVIDER if(N_mrcc_jobs / gen_per_cp > (cur_cp-1) .or. N_mrcc_jobs == N_det_generators) then !if(mod(i, comb_per_cp) == 0 .or. N_mrcc_jobs == N_det_generators) then + first_cp(cur_cp+1) = N_mrcc_jobs done_cp_at(N_mrcc_jobs) = cur_cp cps_N(cur_cp) = dfloat(i) if(N_mrcc_jobs /= N_det_generators) then @@ -407,6 +413,10 @@ END_PROVIDER cps(:, N_cp) = 0d0 cp_first_tooth(N_cp) = comb_teeth+1 + iorder = -1132154665 + do i=1,N_cp-1 + call isort(mrcc_jobs(first_cp(i)+1:first_cp(i+1)),iorder,first_cp(i+1)-first_cp(i)) + end do ! end subroutine END_PROVIDER From e1fb8eded24a4cc4e9ab822373c33d4f79ff028f Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 20 Nov 2017 10:56:04 +0100 Subject: [PATCH 11/17] hardcoded and arbitrary target absolute error for mrcc_zmq --- plugins/mrcepa0/mrcc_stoch_routines.irp.f | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/plugins/mrcepa0/mrcc_stoch_routines.irp.f b/plugins/mrcepa0/mrcc_stoch_routines.irp.f index 7aa54c36..8418caec 100644 --- a/plugins/mrcepa0/mrcc_stoch_routines.irp.f +++ b/plugins/mrcepa0/mrcc_stoch_routines.irp.f @@ -250,8 +250,7 @@ subroutine mrcc_collector(E, relative_error, delta, delta_s2, mrcc) E0 = E0 + mrcc_detail(1, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp))) end if call wall_time(time) - !if ((dabs(eqt) < relative_error*0d0 .and. cps_N(cur_cp) >= 30) .or. total_computed == N_det_generators) then - if(cur_cp > 10) then + if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. total_computed == N_det_generators) then ! Termination !print '(G10.3, 2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' print *, "GREPME", cur_cp, E+E0+avg, eqt, time-time0, total_computed From aec9872ccba9e91edffb3cd96605333e8887e880 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 21 Nov 2017 14:55:31 +0100 Subject: [PATCH 12/17] removed delta_ij_cancel --- plugins/mrcepa0/dressing.irp.f | 28 +++++++++++------------ plugins/mrcepa0/mrcc_stoch_routines.irp.f | 18 +++++++++++++-- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index b40d4bd2..d8828637 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -930,7 +930,7 @@ end if(errr /= 0d0) then errr = errr / 2d0 ! (-mrcc_E0_denominator(1) + mrcc_previous_E(1)) / 1d1 else - errr = 4d-4 + errr = 1d-4 end if relative_error = errr print *, "RELATIVE ERROR", relative_error @@ -979,8 +979,6 @@ END_PROVIDER enddo end do end do - print *, "De", delta_ij(1,:5,1) - print *, "Ds", delta_ij_s2(1,1000:1005,1) else if(mrmode == 3) then do i = 1, N_det_ref do i_state = 1, N_states @@ -1052,18 +1050,18 @@ END_PROVIDER stop "invalid mrmode" end if - if(mrmode == 2 .or. mrmode == 3) then - do i = 1, N_det_ref - do i_state = 1, N_states - delta_ii(i_state,i) += delta_ii_cancel(i_state,i) - enddo - do j = 1, N_det_non_ref - do i_state = 1, N_states - delta_ij(i_state,j,i) += delta_ij_cancel(i_state,j,i) - enddo - end do - end do - end if + !if(mrmode == 2 .or. mrmode == 3) then + ! do i = 1, N_det_ref + ! do i_state = 1, N_states + ! delta_ii(i_state,i) += delta_ii_cancel(i_state,i) + ! enddo + ! do j = 1, N_det_non_ref + ! do i_state = 1, N_states + ! delta_ij(i_state,j,i) += delta_ij_cancel(i_state,j,i) + ! enddo + ! end do + ! end do + !end if END_PROVIDER diff --git a/plugins/mrcepa0/mrcc_stoch_routines.irp.f b/plugins/mrcepa0/mrcc_stoch_routines.irp.f index 8418caec..0e2d9bb5 100644 --- a/plugins/mrcepa0/mrcc_stoch_routines.irp.f +++ b/plugins/mrcepa0/mrcc_stoch_routines.irp.f @@ -264,7 +264,18 @@ subroutine mrcc_collector(E, relative_error, delta, delta_s2, mrcc) endif end if end do pullLoop - + + if(total_computed == N_det_generators) then + print *, "TOTALLY COMPUTED" + delta = 0d0 + delta_s2 = 0d0 + do i=comb_teeth+1,0,-1 + delta += delta_det(:,:,i,1) + delta_s2 += delta_det(:,:,i,2) + end do + else + + delta = cp(:,:,cur_cp,1) delta_s2 = cp(:,:,cur_cp,2) @@ -272,6 +283,9 @@ subroutine mrcc_collector(E, relative_error, delta, delta_s2, mrcc) delta += delta_det(:,:,i,1) delta_s2 += delta_det(:,:,i,2) end do + + end if + mrcc(1) = E call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) @@ -311,7 +325,7 @@ end function &BEGIN_PROVIDER [ integer, N_cps_max ] implicit none comb_teeth = 16 - N_cps_max = 100 + N_cps_max = 32 !comb_per_cp = 64 gen_per_cp = (N_det_generators / N_cps_max) + 1 N_cps_max += 1 From efd37a5adc258dab81ae04a3f7b2f5a73294cf76 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 21 Nov 2017 15:10:52 +0100 Subject: [PATCH 13/17] removed useless parameter coef in mrcc_part_dress_1c --- plugins/mrcepa0/dressing.irp.f | 7 +++---- plugins/mrcepa0/run_mrcc_slave.irp.f | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index d8828637..620605ff 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -611,7 +611,7 @@ end -subroutine mrcc_part_dress_1c(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_generator,n_selected,det_buffer,Nint,key_mask,coef,contrib) +subroutine mrcc_part_dress_1c(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_generator,n_selected,det_buffer,Nint,key_mask,contrib) use bitmasks implicit none @@ -651,7 +651,6 @@ subroutine mrcc_part_dress_1c(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_ logical, external :: detEq, is_generable !double precision, external :: get_dij, get_dij_index double precision :: Delta_E_inv(N_states) - double precision, intent(in) :: coef double precision, intent(inout) :: contrib(N_states) double precision :: sdress, hdress @@ -846,8 +845,8 @@ subroutine mrcc_part_dress_1c(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_ hla = hij_cache(k_sd) sla = sij_cache(k_sd) do i_state=1,N_states - dIa_hla(i_state,k_sd) = dIa(i_state) * hla * coef - dIa_sla(i_state,k_sd) = dIa(i_state) * sla * coef + dIa_hla(i_state,k_sd) = dIa(i_state) * hla + dIa_sla(i_state,k_sd) = dIa(i_state) * sla enddo enddo do i_state=1,N_states diff --git a/plugins/mrcepa0/run_mrcc_slave.irp.f b/plugins/mrcepa0/run_mrcc_slave.irp.f index cf87a7ea..b6088424 100644 --- a/plugins/mrcepa0/run_mrcc_slave.irp.f +++ b/plugins/mrcepa0/run_mrcc_slave.irp.f @@ -105,7 +105,7 @@ subroutine run_mrcc_slave(thread,iproc,energy) if(n /= 0) then call mrcc_part_dress_1c(delta_ij_loc(1,1,1), delta_ii_loc(1,1), delta_ij_loc(1,1,2), delta_ii_loc(1,2), & - i_generator,n,abuf,N_int,omask,1d0,contrib) + i_generator,n,abuf,N_int,omask,contrib) endif end do !!!!!!!!!!!!!!!!!!!!!! From ad58d8c55d67eb5326227d80d0d16e5687672ca9 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 8 Dec 2017 14:12:39 +0100 Subject: [PATCH 14/17] Removed ezfio_interface file --- plugins/mrcepa0/ezfio_interface.irp.f | 80 --------------------------- 1 file changed, 80 deletions(-) delete mode 100644 plugins/mrcepa0/ezfio_interface.irp.f diff --git a/plugins/mrcepa0/ezfio_interface.irp.f b/plugins/mrcepa0/ezfio_interface.irp.f deleted file mode 100644 index 3627abe6..00000000 --- a/plugins/mrcepa0/ezfio_interface.irp.f +++ /dev/null @@ -1,80 +0,0 @@ -! DO NOT MODIFY BY HAND -! Created by $QP_ROOT/scripts/ezfio_interface/ei_handler.py -! from file /home/garniron/quantum_package/src/mrcepa0/EZFIO.cfg - - -BEGIN_PROVIDER [ logical, perturbative_triples ] - implicit none - BEGIN_DOC -! Compute perturbative contribution of the Triples - END_DOC - - logical :: has - PROVIDE ezfio_filename - - call ezfio_has_mrcepa0_perturbative_triples(has) - if (has) then - call ezfio_get_mrcepa0_perturbative_triples(perturbative_triples) - else - print *, 'mrcepa0/perturbative_triples not found in EZFIO file' - stop 1 - endif - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, thresh_dressed_ci ] - implicit none - BEGIN_DOC -! Threshold on the convergence of the dressed CI energy - END_DOC - - logical :: has - PROVIDE ezfio_filename - - call ezfio_has_mrcepa0_thresh_dressed_ci(has) - if (has) then - call ezfio_get_mrcepa0_thresh_dressed_ci(thresh_dressed_ci) - else - print *, 'mrcepa0/thresh_dressed_ci not found in EZFIO file' - stop 1 - endif - -END_PROVIDER - -BEGIN_PROVIDER [ integer, n_it_max_dressed_ci ] - implicit none - BEGIN_DOC -! Maximum number of dressed CI iterations - END_DOC - - logical :: has - PROVIDE ezfio_filename - - call ezfio_has_mrcepa0_n_it_max_dressed_ci(has) - if (has) then - call ezfio_get_mrcepa0_n_it_max_dressed_ci(n_it_max_dressed_ci) - else - print *, 'mrcepa0/n_it_max_dressed_ci not found in EZFIO file' - stop 1 - endif - -END_PROVIDER - -BEGIN_PROVIDER [ integer, lambda_type ] - implicit none - BEGIN_DOC -! lambda type - END_DOC - - logical :: has - PROVIDE ezfio_filename - - call ezfio_has_mrcepa0_lambda_type(has) - if (has) then - call ezfio_get_mrcepa0_lambda_type(lambda_type) - else - print *, 'mrcepa0/lambda_type not found in EZFIO file' - stop 1 - endif - -END_PROVIDER From f93cfb8a6f86e1e81cef088f2f45028ac910c233 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 8 Dec 2017 16:17:54 +0100 Subject: [PATCH 15/17] Revert 4idx --- plugins/Properties/print_hcc.irp.f | 13 ++++++++++++- src/Integrals_Bielec/mo_bi_integrals.irp.f | 14 +++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/plugins/Properties/print_hcc.irp.f b/plugins/Properties/print_hcc.irp.f index 45bca5e6..7f74f996 100644 --- a/plugins/Properties/print_hcc.irp.f +++ b/plugins/Properties/print_hcc.irp.f @@ -2,5 +2,16 @@ program print_hcc_main implicit none read_wf = .True. touch read_wf - call print_hcc +! call print_hcc + call routine end + + +subroutine routine + implicit none + integer :: i + do i = 1, mo_tot_num + write(*,'(1000(F16.10,X))')one_body_dm_mo_beta(i,:,1) + enddo +end + diff --git a/src/Integrals_Bielec/mo_bi_integrals.irp.f b/src/Integrals_Bielec/mo_bi_integrals.irp.f index 3fa01292..d7b0fa5b 100644 --- a/src/Integrals_Bielec/mo_bi_integrals.irp.f +++ b/src/Integrals_Bielec/mo_bi_integrals.irp.f @@ -124,16 +124,16 @@ BEGIN_PROVIDER [ logical, mo_bielec_integrals_in_map ] else ! call add_integrals_to_map(full_ijkl_bitmask_4) - call four_index_transform_zmq(ao_integrals_map,mo_integrals_map, & - mo_coef, size(mo_coef,1), & - 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & - 1, 1, 1, 1, mo_num, mo_num, mo_num, mo_num) -! -! call four_index_transform_block(ao_integrals_map,mo_integrals_map, & +! call four_index_transform_zmq(ao_integrals_map,mo_integrals_map, & ! mo_coef, size(mo_coef,1), & ! 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & ! 1, 1, 1, 1, mo_num, mo_num, mo_num, mo_num) -! + + call four_index_transform_block(ao_integrals_map,mo_integrals_map, & + mo_coef, size(mo_coef,1), & + 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & + 1, 1, 1, 1, mo_num, mo_num, mo_num, mo_num) + ! call four_index_transform(ao_integrals_map,mo_integrals_map, & ! mo_coef, size(mo_coef,1), & ! 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & From 660e87bb30e25c249b8d41de0f4d5b74ff40b592 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 8 Dec 2017 17:02:26 +0100 Subject: [PATCH 16/17] Fixed one-body DM --- src/Determinants/density_matrix.irp.f | 87 ++++++++++++++++++--------- 1 file changed, 60 insertions(+), 27 deletions(-) diff --git a/src/Determinants/density_matrix.irp.f b/src/Determinants/density_matrix.irp.f index b948182d..86c7fcd4 100644 --- a/src/Determinants/density_matrix.irp.f +++ b/src/Determinants/density_matrix.irp.f @@ -99,7 +99,7 @@ END_PROVIDER ! Alpha and beta one-body density matrix for each state END_DOC - integer :: j,k,l,m + integer :: j,k,l,m,k_a,k_b integer :: occ(N_int*bit_kind_size,2) double precision :: ck, cl, ckl double precision :: phase @@ -114,7 +114,7 @@ END_PROVIDER one_body_dm_mo_alpha = 0.d0 one_body_dm_mo_beta = 0.d0 !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(j,k,l,m,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc, & + !$OMP PRIVATE(j,k,k_a,k_b,l,m,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc, & !$OMP tmp_a, tmp_b, n_occ, krow, kcol, lrow, lcol, tmp_det, tmp_det2)& !$OMP SHARED(psi_det,psi_coef,N_int,N_states,elec_alpha_num,& !$OMP elec_beta_num,one_body_dm_mo_alpha,one_body_dm_mo_beta,N_det,& @@ -124,28 +124,31 @@ END_PROVIDER !$OMP psi_bilinear_matrix_values, psi_bilinear_matrix_transp_values) allocate(tmp_a(mo_tot_num,mo_tot_num,N_states), tmp_b(mo_tot_num,mo_tot_num,N_states) ) tmp_a = 0.d0 - tmp_b = 0.d0 - !$OMP DO SCHEDULE(guided) - do k=1,N_det - krow = psi_bilinear_matrix_rows(k) - kcol = psi_bilinear_matrix_columns(k) - tmp_det(:,1) = psi_det_alpha_unique(:,krow) - tmp_det(:,2) = psi_det_beta_unique (:,kcol) + !$OMP DO SCHEDULE(dynamic,64) + do k_a=1,N_det + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow) + tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol) + + ! Diagonal part + ! ------------- + call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int) do m=1,N_states - ck = psi_bilinear_matrix_values(k,m)*psi_bilinear_matrix_values(k,m) + ck = psi_bilinear_matrix_values(k_a,m)*psi_bilinear_matrix_values(k_a,m) do l=1,elec_alpha_num j = occ(l,1) tmp_a(j,j,m) += ck enddo - do l=1,elec_beta_num - j = occ(l,2) - tmp_b(j,j,m) += ck - enddo enddo - if (k == N_det) cycle - l = k+1 + if (k_a == N_det) cycle + l = k_a+1 lrow = psi_bilinear_matrix_rows(l) lcol = psi_bilinear_matrix_columns(l) ! Fix beta determinant, loop over alphas @@ -157,7 +160,7 @@ END_PROVIDER call get_mono_excitation_spin(tmp_det(1,1),tmp_det2,exc,phase,N_int) call decode_exc_spin(exc,h1,p1,h2,p2) do m=1,N_states - ckl = psi_bilinear_matrix_values(k,m)*psi_bilinear_matrix_values(l,m) * phase + ckl = psi_bilinear_matrix_values(k_a,m)*psi_bilinear_matrix_values(l,m) * phase tmp_a(h1,p1,m) += ckl tmp_a(p1,h1,m) += ckl enddo @@ -168,20 +171,52 @@ END_PROVIDER lcol = psi_bilinear_matrix_columns(l) enddo - l = psi_bilinear_matrix_order_reverse(k) - if (l == N_det) cycle - l = l+1 - ! Fix alpha determinant, loop over betas + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + one_body_dm_mo_alpha(:,:,:) = one_body_dm_mo_alpha(:,:,:) + tmp_a(:,:,:) + !$OMP END CRITICAL + deallocate(tmp_a) + + tmp_b = 0.d0 + !$OMP DO SCHEDULE(dynamic,64) + do k_b=1,N_det + krow = psi_bilinear_matrix_transp_rows(k_b) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_transp_columns(k_b) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow) + tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol) + + ! Diagonal part + ! ------------- + + call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int) + do m=1,N_states + ck = psi_bilinear_matrix_transp_values(k_b,m)*psi_bilinear_matrix_transp_values(k_b,m) + do l=1,elec_beta_num + j = occ(l,2) + tmp_b(j,j,m) += ck + enddo + enddo + + if (k_b == N_det) cycle + l = k_b+1 lrow = psi_bilinear_matrix_transp_rows(l) lcol = psi_bilinear_matrix_transp_columns(l) + ! Fix beta determinant, loop over alphas do while ( lrow == krow ) - tmp_det2(:) = psi_det_beta_unique (:, lcol) + tmp_det2(:) = psi_det_beta_unique(:, lcol) call get_excitation_degree_spin(tmp_det(1,2),tmp_det2,degree,N_int) if (degree == 1) then + exc = 0 call get_mono_excitation_spin(tmp_det(1,2),tmp_det2,exc,phase,N_int) call decode_exc_spin(exc,h1,p1,h2,p2) do m=1,N_states - ckl = psi_bilinear_matrix_values(k,m)*psi_bilinear_matrix_transp_values(l,m) * phase + ckl = psi_bilinear_matrix_transp_values(k_b,m)*psi_bilinear_matrix_transp_values(l,m) * phase tmp_b(h1,p1,m) += ckl tmp_b(p1,h1,m) += ckl enddo @@ -195,12 +230,10 @@ END_PROVIDER enddo !$OMP END DO NOWAIT !$OMP CRITICAL - one_body_dm_mo_alpha(:,:,:) = one_body_dm_mo_alpha(:,:,:) + tmp_a(:,:,:) - !$OMP END CRITICAL - !$OMP CRITICAL one_body_dm_mo_beta(:,:,:) = one_body_dm_mo_beta(:,:,:) + tmp_b(:,:,:) !$OMP END CRITICAL - deallocate(tmp_a,tmp_b) + + deallocate(tmp_b) !$OMP END PARALLEL END_PROVIDER From 61d1a2b551f6436f45420379e32e850f712c32b0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 8 Dec 2017 17:07:28 +0100 Subject: [PATCH 17/17] Fixed old FCI: --- plugins/Full_CI/H_apply.irp.f | 4 ---- 1 file changed, 4 deletions(-) diff --git a/plugins/Full_CI/H_apply.irp.f b/plugins/Full_CI/H_apply.irp.f index 4f63e29e..a37e2165 100644 --- a/plugins/Full_CI/H_apply.irp.f +++ b/plugins/Full_CI/H_apply.irp.f @@ -5,24 +5,20 @@ from generate_h_apply import * s = H_apply("FCI") s.set_selection_pt2("epstein_nesbet_2x2") #s.set_selection_pt2("qdpt") -s.unset_skip() print s s = H_apply("FCI_PT2") s.set_perturbation("epstein_nesbet_2x2") #s.set_perturbation("qdpt") -s.unset_skip() s.unset_openmp() print s s = H_apply("FCI_no_selection") s.set_selection_pt2("dummy") -s.unset_skip() print s s = H_apply("FCI_mono") s.set_selection_pt2("epstein_nesbet_2x2") -s.unset_skip() s.unset_double_excitations() s.unset_openmp() print s