From ea99cc29b3b9f82d46c1cf4b809b5fe269b59c63 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 20 Feb 2018 17:34:51 +0100 Subject: [PATCH] working and less slow mrcc_sto --- plugins/dress_zmq/alpha_factory.irp.f | 17 +-- plugins/dress_zmq/run_dress_slave.irp.f | 2 +- plugins/mrcc_sto/mrcc_sto.irp.f | 151 +++++++++++------------- 3 files changed, 79 insertions(+), 91 deletions(-) diff --git a/plugins/dress_zmq/alpha_factory.irp.f b/plugins/dress_zmq/alpha_factory.irp.f index 7f2952b7..99ae53fa 100644 --- a/plugins/dress_zmq/alpha_factory.irp.f +++ b/plugins/dress_zmq/alpha_factory.irp.f @@ -2,11 +2,12 @@ use bitmasks -subroutine alpha_callback(delta_ij_loc, i_generator, subset) +subroutine alpha_callback(delta_ij_loc, i_generator, subset,iproc) use bitmasks implicit none integer, intent(in) :: i_generator, subset double precision,intent(inout) :: delta_ij_loc(N_states,N_det,2) + integer, intent(in) :: iproc integer :: k,l @@ -14,12 +15,12 @@ subroutine alpha_callback(delta_ij_loc, i_generator, subset) do l=1,N_generators_bitmask - call generate_singles_and_doubles(delta_ij_loc, i_generator,l,subset) + call generate_singles_and_doubles(delta_ij_loc, i_generator,l,subset,iproc) enddo end subroutine -subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index, subset) +subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index, subset, iproc) use bitmasks implicit none BEGIN_DOC @@ -28,6 +29,8 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index double precision,intent(inout) :: delta_ij_loc(N_states,N_det,2) integer, intent(in) :: i_generator, subset, bitmask_index + integer, intent(in) :: iproc + integer :: h1,h2,s1,s2,s3,i1,i2,ib,sp,k,i,j,nt,ii,n integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2), pmask(N_int, 2) @@ -356,7 +359,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index !print *, "IND1", indexes(1,:) !print *, "IND2", indexes_end(1,:) !stop - call alpha_callback_mask(delta_ij_loc, sp, mask, bannedOrb, banned, indexes, indexes_end, abuf, siz) + call alpha_callback_mask(delta_ij_loc, sp, mask, bannedOrb, banned, indexes, indexes_end, abuf, siz, iproc) !call dress_with_alpha_buffer(delta_ij_loc, minilist, interesting(0), abuf, n) end if @@ -368,12 +371,12 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index end subroutine -subroutine alpha_callback_mask(delta_ij_loc, sp, mask, bannedOrb, banned, indexes, indexes_end, abuf, siz) +subroutine alpha_callback_mask(delta_ij_loc, sp, mask, bannedOrb, banned, indexes, indexes_end, abuf, siz, iproc) use bitmasks implicit none double precision,intent(inout) :: delta_ij_loc(N_states,N_det,2) - integer, intent(in) :: sp, indexes(0:mo_tot_num, 0:mo_tot_num), siz + integer, intent(in) :: sp, indexes(0:mo_tot_num, 0:mo_tot_num), siz, iproc integer, intent(in) :: indexes_end(0:mo_tot_num, 0:mo_tot_num), abuf(*) logical, intent(in) :: bannedOrb(mo_tot_num,2), banned(mo_tot_num, mo_tot_num) integer(bit_kind), intent(in) :: mask(N_int, 2) @@ -456,7 +459,7 @@ subroutine alpha_callback_mask(delta_ij_loc, sp, mask, bannedOrb, banned, indexe call apply_particles(mask, s1, i, s2, j, alpha, ok, N_int) !if(.not. ok) stop "non existing alpha......" !print *, "willcall", st4-1, size(labuf) - call dress_with_alpha_buffer(delta_ij_loc, labuf, st4-1, alpha) + call dress_with_alpha_buffer(delta_ij_loc, labuf, st4-1, alpha, iproc) !call dress_with_alpha_buffer(delta_ij_loc, abuf, siz, alpha, 1) end if end do diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index b40de73d..08d8af3d 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -54,7 +54,7 @@ subroutine run_dress_slave(thread,iproc,energy) if(task_id /= 0) then read (task,*) subset, i_generator delta_ij_loc = 0d0 - call alpha_callback(delta_ij_loc, i_generator, subset) + call alpha_callback(delta_ij_loc, i_generator, subset, iproc) !!! SET DRESSING COLUMN? !do i=1,N_det diff --git a/plugins/mrcc_sto/mrcc_sto.irp.f b/plugins/mrcc_sto/mrcc_sto.irp.f index cde31adb..ea02d85f 100644 --- a/plugins/mrcc_sto/mrcc_sto.irp.f +++ b/plugins/mrcc_sto/mrcc_sto.irp.f @@ -24,7 +24,17 @@ end END_PROVIDER -subroutine dress_with_alpha_buffer(delta_ij_loc, minilist, n_minilist, alpha) + BEGIN_PROVIDER [ double precision, hij_cache_, (N_det,Nproc) ] +&BEGIN_PROVIDER [ double precision, sij_cache_, (N_det,Nproc) ] +&BEGIN_PROVIDER [ double precision, dIa_hla_, (N_states,N_det,Nproc) ] +&BEGIN_PROVIDER [ double precision, dIa_sla_, (N_states,N_det,Nproc) ] +&BEGIN_PROVIDER [ integer, idx_alpha_, (0:N_det,Nproc) ] + BEGIN_DOC + ! temporay arrays for dress_with_alpha_buffer. Avoids realocation. +END_DOC +END_PROVIDER + +subroutine dress_with_alpha_buffer(delta_ij_loc, minilist, n_minilist, alpha, iproc) use bitmasks implicit none BEGIN_DOC @@ -35,31 +45,29 @@ subroutine dress_with_alpha_buffer(delta_ij_loc, minilist, n_minilist, alpha) !alpha : alpha determinant END_DOC integer(bit_kind), intent(in) :: alpha(N_int,2) - integer,intent(in) :: minilist(n_minilist), n_minilist + integer,intent(in) :: minilist(n_minilist), n_minilist, iproc double precision, intent(inout) :: delta_ij_loc(N_states,N_det,2) integer :: i,j,k,l,m integer :: degree1, degree2, degree - integer, allocatable :: idx_alpha(:) double precision :: hIk, hla, hIl, sla, dIk(N_states), dka(N_states), dIa(N_states), hka - double precision, allocatable :: dIa_hla(:,:), dIa_sla(:,:) double precision :: phase, phase2 - double precision :: ci_inv(N_states) integer :: exc(0:2,2,2) integer :: h1,h2,p1,p2,s1,s2 integer(bit_kind) :: tmp_det(N_int,2), ctrl integer :: i_state, k_sd, l_sd, m_sd, ll_sd, i_I - double precision, allocatable :: hij_cache(:), sij_cache(:) double precision :: Delta_E_inv(N_states) double precision :: sdress, hdress - double precision :: c0(N_states) logical :: ok, ok2 integer :: old_ninc double precision :: shdress + PROVIDE mo_class + + if(n_minilist == 1) return shdress = 0d0 @@ -74,53 +82,45 @@ subroutine dress_with_alpha_buffer(delta_ij_loc, minilist, n_minilist, alpha) if(degree1 <= 2) return end do - allocate(hij_cache(N_det), sij_cache(N_det)) - allocate (dIa_hla(N_states,N_det), dIa_sla(N_states,N_det)) - allocate (idx_alpha(0:n_minilist)) - do i_state=1,N_states - c0(i_state) = 1.d0/psi_coef(dressed_column_idx(i_state),i_state) - enddo ll_sd = 0 do l_sd=1,n_minilist ok = .true. k_sd = minilist(l_sd) !if(idx_non_ref_rev(k_sd) == 0) cycle - do i_I=1,N_det_ref - call get_excitation_degree(psi_det_sorted(1,1,k_sd),psi_ref(1,1,i_I),degree1,N_int) - if(degree1 == 0) then - ok = .false. - exit - end if - end do - - !if(ok) then - ! call get_excitation(psi_det_sorted(1,1,k_sd),alpha,exc,degree1,phase,N_int) - ! if(degree1 == 0 .or. degree1 > 2) stop "minilist error" - ! call decode_exc(exc,degree1,h1,p1,h2,p2,s1,s2) - ! - ! if(h1 > 10 .or. p1 < 7 .or. p1 == 8 .or. p1 == 9) ok = .false. - ! if(ok .and. degree1 == 2) then - ! if(h2 > 10 .or. p2 < 7 .or. p2 == 8 .or. p2 == 9) ok = .false. + !do i_I=1,N_det_ref + ! call get_excitation_degree(psi_det_sorted(1,1,k_sd),psi_ref(1,1,i_I),degree1,N_int) + ! if(degree1 == 0) then + ! ok = .false. + ! exit ! end if - ! !if(degree1 == 0 .or. degree1 > 2) stop "minilist error" - ! !iand(xor(psi_det_sorted(i,2,k_sd), alpha(i,2)), alpha(i,2)) - !end if + !end do + if(idx_non_ref_from_sorted(k_sd) == 0) ok = .false. + + !if(ok) then + ! call get_excitation(psi_det_sorted(1,1,k_sd),alpha,exc,degree1,phase,N_int) + ! if(degree1 == 0 .or. degree1 > 2) stop "minilist error" + ! call decode_exc(exc,degree1,h1,p1,h2,p2,s1,s2) + ! + ! ok = (mo_class(h1)(1:1) == 'A' .or. mo_class(h1)(1:1) == 'I') .and. & + ! (mo_class(p1)(1:1) == 'A' .or. mo_class(p1)(1:1) == 'V') + ! if(ok .and. degree1 == 2) then + ! ok = (mo_class(h2)(1:1) == 'A' .or. mo_class(h2)(1:1) == 'I') .and. & + ! (mo_class(p2)(1:1) == 'A' .or. mo_class(p2)(1:1) == 'V') + ! end if + !end if - !if( xor(ok, idx_non_ref_from_sorted(k_sd) > 0)) stop "BUGUE" if(ok) then ll_sd += 1 - idx_alpha(ll_sd) = k_sd -! call i_h_j(alpha,psi_non_ref(1,1,idx_alpha(l_sd)),N_int,hij_cache(k_sd)) -! call get_s2(alpha,psi_non_ref(1,1,idx_alpha(l_sd)),N_int,sij_cache(k_sd)) - call i_h_j(alpha,psi_det_sorted(1,1,k_sd),N_int,hij_cache(k_sd)) - call get_s2(alpha,psi_det_sorted(1,1,k_sd),N_int,sij_cache(k_sd)) + idx_alpha_(ll_sd,iproc) = k_sd + call i_h_j(alpha,psi_det_sorted(1,1,k_sd),N_int,hij_cache_(k_sd,iproc)) + call get_s2(alpha,psi_det_sorted(1,1,k_sd),N_int,sij_cache_(k_sd,iproc)) end if enddo if(ll_sd <= 1) return - idx_alpha(0) = ll_sd + idx_alpha_(0,iproc) = ll_sd do i_I=1,N_det_ref @@ -133,18 +133,14 @@ subroutine dress_with_alpha_buffer(delta_ij_loc, minilist, n_minilist, alpha) dIa(i_state) = 0.d0 enddo - do k_sd=1,idx_alpha(0) - !print *, "idx ", k_sd - !print *, "idx2", idx_alpha(k_sd) - !print *, "ref - !if(idx_non_ref_rev(idx_alpha(k_sd)) == 0) cycle - call get_excitation_degree(psi_ref(1,1,i_I),psi_det_sorted(1,1,idx_alpha(k_sd)),degree,N_int) + do k_sd=1,idx_alpha_(0,iproc) + call get_excitation_degree(psi_ref(1,1,i_I),psi_det_sorted(1,1,idx_alpha_(k_sd,iproc)),degree,N_int) ! print *, "diden" if (degree > 2) then cycle endif - call get_excitation(psi_det_sorted(1,1,idx_alpha(k_sd)),alpha,exc,degree2,phase,N_int) + call get_excitation(psi_det_sorted(1,1,idx_alpha_(k_sd,iproc)),alpha,exc,degree2,phase,N_int) !print *, "DEG", degree2 call decode_exc(exc,degree2,h1,p1,h2,p2,s1,s2) do k=1,N_int @@ -155,8 +151,11 @@ subroutine dress_with_alpha_buffer(delta_ij_loc, minilist, n_minilist, alpha) ok2 = .false. do i_state=1,N_states - dIK(i_state) = dij(i_I, idx_non_ref_from_sorted(idx_alpha(k_sd)), i_state) - if(dIK(i_state) /= 0d0) ok2 = .true. + dIK(i_state) = dij(i_I, idx_non_ref_from_sorted(idx_alpha_(k_sd,iproc)), i_state) + if(dIK(i_state) /= 0d0) then + ok2 = .true. + exit + endif enddo if(.not. ok2) cycle @@ -166,18 +165,18 @@ subroutine dress_with_alpha_buffer(delta_ij_loc, minilist, n_minilist, alpha) enddo if (ok) then - do l_sd=k_sd+1,idx_alpha(0) - call get_excitation_degree(tmp_det,psi_det_sorted(1,1,idx_alpha(l_sd)),degree,N_int) + do l_sd=k_sd+1,idx_alpha_(0,iproc) + call get_excitation_degree(tmp_det,psi_det_sorted(1,1,idx_alpha_(l_sd,iproc)),degree,N_int) if (degree == 0) then - call get_excitation(psi_ref(1,1,i_I),psi_det_sorted(1,1,idx_alpha(l_sd)),exc,degree,phase2,N_int) + call get_excitation(psi_ref(1,1,i_I),psi_det_sorted(1,1,idx_alpha_(l_sd,iproc)),exc,degree,phase2,N_int) do i_state=1,N_states - dka(i_state) = dij(i_I, idx_non_ref_from_sorted(idx_alpha(l_sd)), i_state) * phase * phase2 + dka(i_state) = dij(i_I, idx_non_ref_from_sorted(idx_alpha_(l_sd,iproc)), i_state) * phase * phase2 enddo exit endif enddo else if (perturbative_triples) then - hka = hij_cache(idx_alpha(k_sd)) + hka = hij_cache_(idx_alpha_(k_sd,iproc),iproc) if (dabs(hka) > 1.d-12) then call get_delta_e_dyall_general_mp(psi_ref(1,1,i_I),alpha,Delta_E_inv) @@ -190,7 +189,7 @@ subroutine dress_with_alpha_buffer(delta_ij_loc, minilist, n_minilist, alpha) if (perturbative_triples.and. (degree2 == 1) ) then call i_h_j(psi_ref(1,1,i_I),tmp_det,N_int,hka) - hka = hij_cache(idx_alpha(k_sd)) - hka + hka = hij_cache_(idx_alpha_(k_sd,iproc),iproc) - hka if (dabs(hka) > 1.d-12) then call get_delta_e_dyall_general_mp(psi_ref(1,1,i_I),alpha,Delta_E_inv) do i_state=1,N_states @@ -210,33 +209,23 @@ subroutine dress_with_alpha_buffer(delta_ij_loc, minilist, n_minilist, alpha) enddo if(.not. ok2) cycle - do i_state=1,N_states - ci_inv(i_state) = psi_ref_coef_inv(i_I,i_state) - enddo - do l_sd=1,idx_alpha(0) - k_sd = idx_alpha(l_sd) - hla = hij_cache(k_sd) - sla = sij_cache(k_sd) + do l_sd=1,idx_alpha_(0,iproc) + k_sd = idx_alpha_(l_sd,iproc) + hla = hij_cache_(k_sd,iproc) + sla = sij_cache_(k_sd,iproc) do i_state=1,N_states - dIa_hla(i_state,k_sd) = dIa(i_state) * hla - dIa_sla(i_state,k_sd) = dIa(i_state) * sla - enddo - enddo - do i_state=1,N_states - do l_sd=1,idx_alpha(0) - !print *, "DRES" - !print *, i_state, idx_alpha(l_sd) - k_sd = idx_alpha(l_sd) + ! dIa_hla_(i_state,k_sd,iproc) = dIa(i_state) * hla + ! dIa_sla_(i_state,k_sd,iproc) = dIa(i_state) * sla + ! enddo + ! enddo + ! do l_sd=1,idx_alpha_(0,iproc) + ! do i_state=1,N_states + k_sd = idx_alpha_(l_sd,iproc) m_sd = psi_from_sorted(k_sd) - hdress = dIa_hla(i_state,k_sd) * psi_ref_coef(i_I,i_state)! * c0(i_state) - sdress = dIa_sla(i_state,k_sd) * psi_ref_coef(i_I,i_state)! * c0(i_state) - !!$OMP ATOMIC - !shdress += 1d0 - nalp += 1 - if(hdress /= 0d0) then - ninc = ninc + 1 - !print *, "grepme2", hdress, shdress - end if + hdress = dIa(i_state) * hla * psi_ref_coef(i_I,i_state) + sdress = dIa(i_state) * sla * psi_ref_coef(i_I,i_state) + ! hdress = dIa_hla_(i_state,k_sd,iproc) * psi_ref_coef(i_I,i_state) + ! sdress = dIa_sla_(i_state,k_sd,iproc) * psi_ref_coef(i_I,i_state) !$OMP ATOMIC delta_ij_loc(i_state,m_sd,1) += hdress !$OMP ATOMIC @@ -246,10 +235,6 @@ subroutine dress_with_alpha_buffer(delta_ij_loc, minilist, n_minilist, alpha) enddo enddo - !if(ninc /= old_ninc) then - ! nalp = nalp + 1 - ! !print "(A8,I20,I20,E15.5)", "grepme", alpha(1,1), alpha(1,2), shdress - !end if end subroutine