From 38c6fc7bb8c28426f0b8550d63ccf58263a59e07 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 18 Nov 2016 19:17:34 +0100 Subject: [PATCH] Implemented dressed S2 matrix --- plugins/MRCC_Utils/davidson.irp.f | 95 ++++++++++++++++++++-------- plugins/MRCC_Utils/mrcc_utils.irp.f | 2 +- plugins/mrcepa0/dressing.irp.f | 81 +++++++++++++++++------- plugins/mrcepa0/dressing_slave.irp.f | 87 ++++++++++++++++--------- plugins/mrcepa0/mrcc.irp.f | 2 +- 5 files changed, 186 insertions(+), 81 deletions(-) diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index d1b82dfc..608b427b 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -715,6 +715,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz double precision :: r1, r2 do k=N_st+1,N_st_diag + u_in(k,k) = 10.d0 do i=1,sze call random_number(r1) call random_number(r2) @@ -762,6 +763,44 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz 1.d0, U, size(U,1), S, size(S,1), & 0.d0, s_, size(s_,1)) + ! Diagonalize S^2 + ! --------------- + call lapack_diag(s2,y,s_,size(s_,1),shift2) + +! ! Rotate H in the basis of eigenfunctions of s2 +! ! --------------------------------------------- +! +! call dgemm('N','N',shift2,shift2,shift2, & +! 1.d0, h, size(h,1), y, size(y,1), & +! 0.d0, s_tmp, size(s_tmp,1)) +! +! call dgemm('T','N',shift2,shift2,shift2, & +! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & +! 0.d0, h, size(h,1)) +! +! ! Damp interaction between different spin states +! ! ------------------------------------------------ +! +! do k=1,shift2 +! do l=1,shift2 +! if (dabs(s2(k) - s2(l)) > 1.d0) then +! h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l)))) +! endif +! enddo +! enddo +! +! ! Rotate back H +! ! ------------- +! +! call dgemm('N','T',shift2,shift2,shift2, & +! 1.d0, h, size(h,1), y, size(y,1), & +! 0.d0, s_tmp, size(s_tmp,1)) +! +! call dgemm('N','N',shift2,shift2,shift2, & +! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & +! 0.d0, h, size(h,1)) + + ! Diagonalize h ! ------------- call lapack_diag(lambda,y,h,size(h,1),shift2) @@ -784,7 +823,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz if (s2_eig) then logical :: state_ok(N_st_diag*davidson_sze_max) do k=1,shift2 - state_ok(k) = (dabs(s2(k)-expected_s2) < 0.5d0) + state_ok(k) = (dabs(s2(k)-expected_s2) < 0.3d0) enddo else state_ok(k) = .True. @@ -803,22 +842,11 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz endif enddo endif - ! Randomize components with bad - if (.not. state_ok(k)) then - do i=1,shift2 - call random_number(r1) - call random_number(r2) - r1 = dsqrt(-2.d0*dlog(r1)) - r2 = dtwo_pi*r2 - y(i,k) = r1*dcos(r2) - lambda(k) = 1.d0 - enddo - endif enddo -! ! Compute overlap with U_in -! ! ------------------------- -! + ! Compute overlap with U_in + ! ------------------------- + ! integer :: coord(2), order(N_st_diag) ! overlap = -1.d0 ! do k=1,shift2 @@ -865,21 +893,30 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz ! ----------------------- do k=1,N_st_diag + if (state_ok(k)) then do i=1,sze U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz & )/max(H_jj(i) - lambda (k),1.d-2) enddo -! else -! ! Randomize components with bad -! do i=1,sze -! call random_number(r1) -! call random_number(r2) -! r1 = dsqrt(-2.d0*dlog(r1)) -! r2 = dtwo_pi*r2 -! U(i,shift2+k) = r1*dcos(r2) -! enddo -! endif + else + ! Randomize components with bad + do i=1,sze-2,2 + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + U(i,shift2+k) = r1*dcos(r2) + U(i+1,shift2+k) = r1*dsin(r2) + enddo + do i=sze-2+1,sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + U(i,shift2+k) = r1*dcos(r2) + enddo + endif if (k <= N_st) then residual_norm(k) = u_dot_u(U(1,shift2+k),sze) @@ -914,8 +951,8 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz energies(k) = lambda(k) enddo - call dgemm('N','N', sze, N_st_diag, N_st_diag*iter, 1.d0, & - U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + call dgemm('N','N', sze, N_st_diag, shift2, & + 1.d0, U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) enddo @@ -995,7 +1032,7 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8, & - !$OMP N_det_ref, idx_ref, N_det_non_ref, idx_non_ref, delta_ij,istate_in) + !$OMP N_det_ref, idx_ref, N_det_non_ref, idx_non_ref, delta_ij, delta_ij_s2,istate_in) allocate(vt(N_st_8,n),st(N_st_8,n)) Vt = 0.d0 St = 0.d0 @@ -1080,6 +1117,8 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i do istate=1,N_st vt (istate,i) = vt (istate,i) + delta_ij(istate_in,jj,ii)*ut(istate,j) vt (istate,j) = vt (istate,j) + delta_ij(istate_in,jj,ii)*ut(istate,i) + st (istate,i) = st (istate,i) + delta_ij_s2(istate_in,jj,ii)*ut(istate,j) + st (istate,j) = st (istate,j) + delta_ij_s2(istate_in,jj,ii)*ut(istate,i) enddo enddo enddo diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index b1c68ef7..f28ccf25 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -761,7 +761,7 @@ END_PROVIDER print *, "res ", k, res end if - if(res < 1d-6) exit + if(res < 1d-9) exit end do norm = 0.d0 diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index 9f041cd3..46c56d9d 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -4,6 +4,8 @@ use bitmasks BEGIN_PROVIDER [ double precision, delta_ij_mrcc, (N_states,N_det_non_ref,N_det_ref) ] &BEGIN_PROVIDER [ double precision, delta_ii_mrcc, (N_states, N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2_mrcc, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2_mrcc, (N_states, N_det_ref) ] use bitmasks implicit none integer :: gen, h, p, n, t, i, h1, h2, p1, p2, s1, s2, iproc @@ -14,11 +16,13 @@ use bitmasks delta_ij_mrcc = 0d0 delta_ii_mrcc = 0d0 + delta_ij_s2_mrcc = 0d0 + delta_ii_s2_mrcc = 0d0 print *, "Dij", dij(1,1,1) provide hh_shortcut psi_det_size! lambda_mrcc !$OMP PARALLEL DO default(none) schedule(dynamic) & !$OMP shared(psi_det_generators, N_det_generators, hh_exists, pp_exists, N_int, hh_shortcut) & - !$OMP shared(N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc) & + !$OMP shared(N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc, delta_ii_s2_mrcc, delta_ij_s2_mrcc) & !$OMP private(h, n, mask, omask, buf, ok, iproc) do gen= 1, N_det_generators allocate(buf(N_int, 2, N_det_non_ref)) @@ -37,7 +41,9 @@ use bitmasks end do n = n - 1 - if(n /= 0) call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc,gen,n,buf,N_int,omask) + if(n /= 0) then + call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc, delta_ij_s2_mrcc, delta_ii_s2_mrcc, gen,n,buf,N_int,omask) + endif end do deallocate(buf) @@ -52,13 +58,15 @@ END_PROVIDER ! end subroutine -subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffer,Nint,key_mask) +subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_generator,n_selected,det_buffer,Nint,key_mask) use bitmasks implicit none integer, intent(in) :: i_generator,n_selected, Nint double precision, intent(inout) :: delta_ij_(N_states,N_det_non_ref,N_det_ref) double precision, intent(inout) :: delta_ii_(N_states,N_det_ref) + double precision, intent(inout) :: delta_ij_s2_(N_states,N_det_non_ref,N_det_ref) + double precision, intent(inout) :: delta_ii_s2_(N_states,N_det_ref) integer(bit_kind), intent(in) :: det_buffer(Nint,2,n_selected) integer :: i,j,k,l,m @@ -68,8 +76,8 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe integer(bit_kind),allocatable :: tq(:,:,:) integer :: N_tq, c_ref ,degree - double precision :: hIk, hla, hIl, dIk(N_states), dka(N_states), dIa(N_states) - double precision, allocatable :: dIa_hla(:,:) + double precision :: hIk, hla, hIl, sla, dIk(N_states), dka(N_states), dIa(N_states) + double precision, allocatable :: dIa_hla(:,:), dIa_sla(:,:) double precision :: haj, phase, phase2 double precision :: f(N_states), ci_inv(N_states) integer :: exc(0:2,2,2) @@ -82,7 +90,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe integer(bit_kind),intent(in) :: key_mask(Nint, 2) integer,allocatable :: idx_miniList(:) integer :: N_miniList, ni, leng - double precision, allocatable :: hij_cache(:) + double precision, allocatable :: hij_cache(:), sij_cache(:) integer(bit_kind), allocatable :: microlist(:,:,:), microlist_zero(:,:,:) integer, allocatable :: idx_microlist(:), N_microlist(:), ptr_microlist(:), idx_microlist_zero(:) @@ -92,7 +100,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe leng = max(N_det_generators, N_det_non_ref) - allocate(miniList(Nint, 2, leng), tq(Nint,2,n_selected), idx_minilist(leng), hij_cache(N_det_non_ref)) + allocate(miniList(Nint, 2, leng), tq(Nint,2,n_selected), idx_minilist(leng), hij_cache(N_det_non_ref), sij_cache(N_det_non_ref)) allocate(idx_alpha(0:psi_det_size), degree_alpha(psi_det_size)) !create_minilist_find_previous(key_mask, fullList, miniList, N_fullList, N_miniList, fullMatch, Nint) call create_minilist_find_previous(key_mask, psi_det_generators, miniList, i_generator-1, N_miniList, fullMatch, Nint) @@ -117,7 +125,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe deallocate(microlist, idx_microlist) - allocate (dIa_hla(N_states,N_det_non_ref)) + allocate (dIa_hla(N_states,N_det_non_ref), dIa_sla(N_states,N_det_non_ref)) ! |I> @@ -185,6 +193,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hij_cache(k_sd)) + call get_s2(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,sij_cache(k_sd)) enddo ! |I> do i_I=1,N_det_ref @@ -282,9 +291,11 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) hla = hij_cache(k_sd) + sla = sij_cache(k_sd) ! call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hla) do i_state=1,N_states dIa_hla(i_state,k_sd) = dIa(i_state) * hla + dIa_sla(i_state,k_sd) = dIa(i_state) * sla enddo enddo call omp_set_lock( psi_ref_lock(i_I) ) @@ -294,19 +305,22 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe k_sd = idx_alpha(l_sd) delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + dIa_hla(i_state,k_sd) delta_ii_(i_state,i_I) = delta_ii_(i_state,i_I) - dIa_hla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) + delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + dIa_sla(i_state,k_sd) + delta_ii_s2_(i_state,i_I) = delta_ii_s2_(i_state,i_I) - dIa_sla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) enddo else delta_ii_(i_state,i_I) = 0.d0 do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + 0.5d0*dIa_hla(i_state,k_sd) + delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + 0.5d0*dIa_sla(i_state,k_sd) enddo endif enddo call omp_unset_lock( psi_ref_lock(i_I) ) enddo enddo - deallocate (dIa_hla,hij_cache) + deallocate (dIa_hla,dIa_sla,hij_cache,sij_cache) deallocate(miniList, idx_miniList) end @@ -315,6 +329,8 @@ end BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det_non_ref,N_det_ref) ] &BEGIN_PROVIDER [ double precision, delta_ii, (N_states, N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2, (N_states, N_det_ref) ] use bitmasks implicit none integer :: i, j, i_state @@ -325,10 +341,12 @@ end do i = 1, N_det_ref do i_state = 1, N_states delta_ii(i_state,i)= delta_ii_mrcc(i_state,i) + delta_ii_s2(i_state,i)= delta_ii_s2_mrcc(i_state,i) enddo do j = 1, N_det_non_ref do i_state = 1, N_states delta_ij(i_state,j,i) = delta_ij_mrcc(i_state,j,i) + delta_ij_s2(i_state,j,i) = delta_ij_s2_mrcc(i_state,j,i) enddo end do end do @@ -343,10 +361,12 @@ end do i = 1, N_det_ref do i_state = 1, N_states delta_ii(i_state,i)= delta_ii_old(i_state,i) + delta_ii_s2(i_state,i)= delta_ii_s2_old(i_state,i) enddo do j = 1, N_det_non_ref do i_state = 1, N_states delta_ij(i_state,j,i) = delta_ij_old(i_state,j,i) + delta_ij_s2(i_state,j,i) = delta_ij_s2_old(i_state,j,i) enddo end do end do @@ -354,10 +374,12 @@ end do i = 1, N_det_ref do i_state = 1, N_states delta_ii(i_state,i)= delta_mrcepa0_ii(i,i_state) + delta_ii_s2(i_state,i)= delta_mrcepa0_ii_s2(i,i_state) enddo do j = 1, N_det_non_ref do i_state = 1, N_states delta_ij(i_state,j,i) = delta_mrcepa0_ij(i,j,i_state) + delta_ij_s2(i_state,j,i) = delta_mrcepa0_ij_s2(i,j,i_state) enddo end do end do @@ -547,28 +569,32 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, delta_cas, (N_det_ref, N_det_ref, N_states) ] +&BEGIN_PROVIDER [ double precision, delta_cas_s2, (N_det_ref, N_det_ref, N_states) ] use bitmasks implicit none integer :: i,j,k - double precision :: Hjk, Hki, Hij + double precision :: Sjk,Hjk, Hki, Hij !double precision, external :: get_dij integer i_state, degree provide lambda_mrcc dIj do i_state = 1, N_states - !$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Hjk,Hki,degree) shared(lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,N_det_ref,dij) + !$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Sjk,Hjk,Hki,degree) shared(lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,delta_cas_s2,N_det_ref,dij) do i=1,N_det_ref do j=1,i call get_excitation_degree(psi_ref(1,1,i), psi_ref(1,1,j), degree, N_int) delta_cas(i,j,i_state) = 0d0 + delta_cas_s2(i,j,i_state) = 0d0 do k=1,N_det_non_ref call i_h_j(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Hjk) + call get_s2(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Sjk) delta_cas(i,j,i_state) += Hjk * dij(i, k, i_state) ! * Hki * lambda_mrcc(i_state, k) - !print *, Hjk * get_dij(psi_ref(1,1,i), psi_non_ref(1,1,k), N_int), Hki * get_dij(psi_ref(1,1,j), psi_non_ref(1,1,k), N_int) + delta_cas_s2(i,j,i_state) += Sjk * dij(i, k, i_state) ! * Ski * lambda_mrcc(i_state, k) end do delta_cas(j,i,i_state) = delta_cas(i,j,i_state) + delta_cas_s2(j,i,i_state) = delta_cas_s2(i,j,i_state) end do end do !$OMP END PARALLEL DO @@ -649,6 +675,8 @@ end function BEGIN_PROVIDER [ double precision, delta_mrcepa0_ij, (N_det_ref,N_det_non_ref,N_states) ] &BEGIN_PROVIDER [ double precision, delta_mrcepa0_ii, (N_det_ref,N_states) ] +&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ij_s2, (N_det_ref,N_det_non_ref,N_states) ] +&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ii_s2, (N_det_ref,N_states) ] use bitmasks implicit none @@ -656,7 +684,7 @@ end function integer :: p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_, sortRefIdx(N_det_ref) logical :: ok double precision :: phase_iI, phase_Ik, phase_Jl, phase_IJ, phase_al, diI, hIi, hJi, delta_JI, dkI(1), HkI, ci_inv(1), dia_hla(1) - double precision :: contrib, contrib2, HIIi, HJk, wall + double precision :: contrib, contrib2, contrib_s2, contrib2_s2, HIIi, HJk, wall integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ integer(bit_kind) :: det_tmp(N_int, 2), made_hole(N_int,2), made_particle(N_int,2), myActive(N_int,2) integer(bit_kind),allocatable :: sortRef(:,:,:) @@ -681,14 +709,16 @@ end function ! To provide everything contrib = dij(1, 1, 1) - do i_state = 1, N_states - delta_mrcepa0_ii(:,:) = 0d0 - delta_mrcepa0_ij(:,:,:) = 0d0 + delta_mrcepa0_ii(:,:) = 0d0 + delta_mrcepa0_ij(:,:,:) = 0d0 + delta_mrcepa0_ii_s2(:,:) = 0d0 + delta_mrcepa0_ij_s2(:,:,:) = 0d0 - !$OMP PARALLEL DO default(none) schedule(dynamic) shared(delta_mrcepa0_ij, delta_mrcepa0_ii) & - !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib,contrib2) & + do i_state = 1, N_states + !$OMP PARALLEL DO default(none) schedule(dynamic) shared(delta_mrcepa0_ij, delta_mrcepa0_ii, delta_mrcepa0_ij_s2, delta_mrcepa0_ii_s2) & + !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib,contrib2,contrib_s2,contrib2_s2) & !$OMP shared(active_sorb, psi_non_ref, psi_non_ref_coef, psi_ref, psi_ref_coef, cepa0_shortcut, det_cepa0_active) & - !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_cas) & + !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_cas, delta_cas_s2) & !$OMP shared(notf,i_state, sortRef, sortRefIdx, dij) do blok=1,cepa0_shortcut(0) do i=cepa0_shortcut(blok), cepa0_shortcut(blok+1)-1 @@ -731,16 +761,21 @@ end function ! call i_h_j(psi_non_ref(1,1,det_cepa0_idx(k)),psi_ref(1,1,J),N_int,HJk) contrib = delta_cas(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) + contrib_s2 = delta_cas_s2(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then contrib2 = contrib / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state) + contrib2_s2 = contrib_s2 / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state) !$OMP ATOMIC delta_mrcepa0_ii(J,i_state) -= contrib2 + delta_mrcepa0_ii_s2(J,i_state) -= contrib2_s2 else contrib = contrib * 0.5d0 + contrib_s2 = contrib_s2 * 0.5d0 end if !$OMP ATOMIC delta_mrcepa0_ij(J, det_cepa0_idx(i), i_state) += contrib + delta_mrcepa0_ij_s2(J, det_cepa0_idx(i), i_state) += contrib_s2 end do kloop end do @@ -751,7 +786,7 @@ end function deallocate(idx_sorted_bit) call wall_time(wall) print *, "cepa0", wall, notf - !stop + END_PROVIDER @@ -870,12 +905,14 @@ subroutine set_det_bit(det, p, s) end subroutine -BEGIN_PROVIDER [ double precision, h_, (N_det_ref,N_det_non_ref) ] + BEGIN_PROVIDER [ double precision, h_cache, (N_det_ref,N_det_non_ref) ] +&BEGIN_PROVIDER [ double precision, s2_cache, (N_det_ref,N_det_non_ref) ] implicit none integer :: i,j do i=1,N_det_ref do j=1,N_det_non_ref - call i_h_j(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, h_(i,j)) + call i_h_j(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, h_cache(i,j)) + call get_s2(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, s2_cache(i,j)) end do end do END_PROVIDER diff --git a/plugins/mrcepa0/dressing_slave.irp.f b/plugins/mrcepa0/dressing_slave.irp.f index f1d6f029..ae76597c 100644 --- a/plugins/mrcepa0/dressing_slave.irp.f +++ b/plugins/mrcepa0/dressing_slave.irp.f @@ -37,7 +37,7 @@ subroutine mrsc2_dressing_slave(thread,iproc) integer(ZMQ_PTR), external :: new_zmq_push_socket integer(ZMQ_PTR) :: zmq_socket_push - double precision, allocatable :: delta(:,:,:) + double precision, allocatable :: delta(:,:,:), delta_s2(:,:,:) @@ -47,8 +47,8 @@ subroutine mrsc2_dressing_slave(thread,iproc) logical :: ok double precision :: phase_iI, phase_Ik, phase_Jl, phase_Ji, phase_al double precision :: diI, hIi, hJi, delta_JI, dkI, HkI, ci_inv(N_states), cj_inv(N_states) - double precision :: contrib, wall, iwall - double precision, allocatable :: dleat(:,:,:) + double precision :: contrib, contrib_s2, wall, iwall + double precision, allocatable :: dleat(:,:,:), dleat_s2(:,:,:) integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ integer(bit_kind) :: det_tmp(N_int, 2), det_tmp2(N_int, 2), inac, virt integer, external :: get_index_in_psi_det_sorted_bit, searchDet, detCmp @@ -63,6 +63,7 @@ subroutine mrsc2_dressing_slave(thread,iproc) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) allocate (dleat(N_states, N_det_non_ref, 2), delta(N_states,0:N_det_non_ref, 2)) + allocate (dleat_s2(N_states, N_det_non_ref, 2), delta_s2(N_states,0:N_det_non_ref, 2)) allocate(komon(0:N_det_non_ref)) do @@ -74,10 +75,14 @@ subroutine mrsc2_dressing_slave(thread,iproc) cj_inv(i_state) = 1.d0 / psi_ref_coef(J,i_state) end do !delta = 0.d0 + !delta_s2 = 0.d0 n = 0 delta(:,0,:) = 0d0 delta(:,:nlink(J),1) = 0d0 delta(:,:nlink(i_I),2) = 0d0 + delta_s2(:,0,:) = 0d0 + delta_s2(:,:nlink(J),1) = 0d0 + delta_s2(:,:nlink(i_I),2) = 0d0 komon(0) = 0 komoned = .false. @@ -121,8 +126,8 @@ subroutine mrsc2_dressing_slave(thread,iproc) end if i = det_cepa0_idx(linked(m, i_I)) - if(h_(J,i) == 0.d0) cycle - if(h_(i_I,i) == 0.d0) cycle + if(h_cache(J,i) == 0.d0) cycle + if(h_cache(i_I,i) == 0.d0) cycle !ok = .false. !do i_state=1, N_states @@ -144,10 +149,13 @@ subroutine mrsc2_dressing_slave(thread,iproc) ! if(I_i == J) phase_Ii = phase_Ji do i_state = 1,N_states - dkI = h_(J,i) * dij(i_I, i, i_state)!get_dij(psi_ref(1,1,i_I), psi_non_ref(1,1,i), N_int) - !dkI = h_(J,i) * h_(i_I,i) * lambda_mrcc(i_state, i) + dkI = h_cache(J,i) * dij(i_I, i, i_state) dleat(i_state, kn, 1) = dkI dleat(i_state, kn, 2) = dkI + + dkI = s2_cache(J,i) * dij(i_I, i, i_state) + dleat_s2(i_state, kn, 1) = dkI + dleat_s2(i_state, kn, 2) = dkI end do end do @@ -173,26 +181,32 @@ subroutine mrsc2_dressing_slave(thread,iproc) !if(lambda_mrcc(i_state, i) == 0d0) cycle - !contrib = h_(i_I,k) * lambda_mrcc(i_state, k) * dleat(i_state, m, 2)! * phase_al + !contrib = h_cache(i_I,k) * lambda_mrcc(i_state, k) * dleat(i_state, m, 2)! * phase_al contrib = dij(i_I, k, i_state) * dleat(i_state, m, 2) + contrib_s2 = dij(i_I, k, i_state) * dleat_s2(i_state, m, 2) delta(i_state,ll,1) += contrib + delta_s2(i_state,ll,1) += contrib_s2 if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5) then delta(i_state,0,1) -= contrib * ci_inv(i_state) * psi_non_ref_coef(l,i_state) + delta_s2(i_state,0,1) -= contrib_s2 * ci_inv(i_state) * psi_non_ref_coef(l,i_state) endif if(I_i == J) cycle - !contrib = h_(J,l) * lambda_mrcc(i_state, l) * dleat(i_state, m, 1)! * phase_al + !contrib = h_cache(J,l) * lambda_mrcc(i_state, l) * dleat(i_state, m, 1)! * phase_al contrib = dij(J, l, i_state) * dleat(i_state, m, 1) + contrib_s2 = dij(J, l, i_state) * dleat_s2(i_state, m, 1) delta(i_state,kk,2) += contrib + delta_s2(i_state,kk,2) += contrib_s2 if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then delta(i_state,0,2) -= contrib * cj_inv(i_state) * psi_non_ref_coef(k,i_state) + delta_s2(i_state,0,2) -= contrib_s2 * cj_inv(i_state) * psi_non_ref_coef(k,i_state) end if enddo !i_state end do ! while end do ! kk - call push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) + call push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) ! end if @@ -208,7 +222,7 @@ subroutine mrsc2_dressing_slave(thread,iproc) end -subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) +subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) use f77_zmq implicit none BEGIN_DOC @@ -218,6 +232,7 @@ subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) integer, intent(in) :: i_I, J integer(ZMQ_PTR), intent(in) :: zmq_socket_push double precision,intent(inout) :: delta(N_states, 0:N_det_non_ref, 2) + double precision,intent(inout) :: delta_s2(N_states, 0:N_det_non_ref, 2) integer, intent(in) :: task_id integer :: rc , i_state, i, kk, li integer,allocatable :: idx(:,:) @@ -278,6 +293,12 @@ subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) print *, irp_here, 'f77_zmq_send( zmq_socket_push, delta, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)' stop 'error' endif + + rc = f77_zmq_send( zmq_socket_push, delta_s2(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE) ! delta_s2(1,0,1) = delta_I delta_s2(1,0,2) = delta_J + if (rc /= (n(kk)+1)*8*N_states) then + print *, irp_here, 'f77_zmq_send( zmq_socket_push, delta_s2, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)' + stop 'error' + endif rc = f77_zmq_send( zmq_socket_push, idx(1,kk), n(kk)*4, ZMQ_SNDMORE) if (rc /= n(kk)*4) then @@ -305,7 +326,7 @@ end -subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) +subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, task_id) use f77_zmq implicit none BEGIN_DOC @@ -315,6 +336,7 @@ subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) integer(ZMQ_PTR), intent(in) :: zmq_socket_pull integer, intent(out) :: i_I, J, n(2) double precision, intent(inout) :: delta(N_states, 0:N_det_non_ref, 2) + double precision, intent(inout) :: delta_s2(N_states, 0:N_det_non_ref, 2) integer, intent(out) :: task_id integer :: rc , i, kk integer,intent(inout) :: idx(N_det_non_ref,2) @@ -346,9 +368,15 @@ subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) stop 'error' endif + rc = f77_zmq_recv( zmq_socket_pull, delta_s2(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE) + if (rc /= (n(kk)+1)*8*N_states) then + print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, delta_s2, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)' + stop 'error' + endif + rc = f77_zmq_recv( zmq_socket_pull, idx(1,kk), n(kk)*4, ZMQ_SNDMORE) if (rc /= n(kk)*4) then - print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, delta, n(kk)*4, ZMQ_SNDMORE)' + print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, idx(1,kk), n(kk)*4, ZMQ_SNDMORE)' stop 'error' endif end if @@ -372,7 +400,7 @@ end -subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_) +subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_,delta_ii_s2_,delta_ij_s2_) use f77_zmq implicit none BEGIN_DOC @@ -381,11 +409,13 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_) double precision,intent(inout) :: delta_ij_(N_states,N_det_non_ref,N_det_ref) double precision,intent(inout) :: delta_ii_(N_states,N_det_ref) + double precision,intent(inout) :: delta_ij_s2_(N_states,N_det_non_ref,N_det_ref) + double precision,intent(inout) :: delta_ii_s2_(N_states,N_det_ref) ! integer :: j,l integer :: rc - double precision, allocatable :: delta(:,:,:) + double precision, allocatable :: delta(:,:,:), delta_s2(:,:,:) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -401,49 +431,46 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_) delta_ii_(:,:) = 0d0 delta_ij_(:,:,:) = 0d0 + delta_ii_s2_(:,:) = 0d0 + delta_ij_s2_(:,:,:) = 0d0 zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_pull = new_zmq_pull_socket() - allocate ( delta(N_states,0:N_det_non_ref,2) ) + allocate ( delta(N_states,0:N_det_non_ref,2), delta_s2(N_states,0:N_det_non_ref,2) ) allocate(idx(N_det_non_ref,2)) more = 1 do while (more == 1) - call pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) + call pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, task_id) do l=1, n(1) do i_state=1,N_states delta_ij_(i_state,idx(l,1),i_I) += delta(i_state,l,1) + delta_ij_s2_(i_state,idx(l,1),i_I) += delta_s2(i_state,l,1) end do end do do l=1, n(2) do i_state=1,N_states - delta_ij_(i_state,idx(l,2),J) += delta(i_state,l,2) + delta_ij_s2_(i_state,idx(l,2),J) += delta_s2(i_state,l,2) end do end do -! -! do l=1,nlink(J) -! do i_state=1,N_states -! delta_ij_(i_state,det_cepa0_idx(linked(l,J)),i_I) += delta(i_state,l,1) -! delta_ij_(i_state,det_cepa0_idx(linked(l,i_I)),j) += delta(i_state,l,2) -! end do -! end do -! if(n(1) /= 0) then do i_state=1,N_states delta_ii_(i_state,i_I) += delta(i_state,0,1) + delta_ii_s2_(i_state,i_I) += delta_s2(i_state,0,1) end do end if if(n(2) /= 0) then do i_state=1,N_states delta_ii_(i_state,J) += delta(i_state,0,2) + delta_ii_s2_(i_state,J) += delta_s2(i_state,0,2) end do end if @@ -454,7 +481,7 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_) enddo - deallocate( delta ) + deallocate( delta, delta_s2 ) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_pull_socket(zmq_socket_pull) @@ -466,6 +493,8 @@ end BEGIN_PROVIDER [ double precision, delta_ij_old, (N_states,N_det_non_ref,N_det_ref) ] &BEGIN_PROVIDER [ double precision, delta_ii_old, (N_states,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2_old, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2_old, (N_states,N_det_ref) ] implicit none integer :: i_state, i, i_I, J, k, kk, degree, degree2, m, l, deg, ni, m2 @@ -574,10 +603,10 @@ end ! rc = pthread_create(collector_thread, mrsc2_dressing_collector) print *, nzer, ntot, float(nzer) / float(ntot) provide nproc - !$OMP PARALLEL DEFAULT(none) SHARED(delta_ii_old,delta_ij_old) PRIVATE(i) NUM_THREADS(nproc+1) + !$OMP PARALLEL DEFAULT(none) SHARED(delta_ii_old,delta_ij_old,delta_ii_s2_old,delta_ij_s2_old) PRIVATE(i) NUM_THREADS(nproc+1) i = omp_get_thread_num() if (i==0) then - call mrsc2_dressing_collector(delta_ii_old,delta_ij_old) + call mrsc2_dressing_collector(delta_ii_old,delta_ij_old,delta_ii_s2_old,delta_ij_s2_old) else call mrsc2_dressing_slave_inproc(i) endif diff --git a/plugins/mrcepa0/mrcc.irp.f b/plugins/mrcepa0/mrcc.irp.f index a28d4be3..a5614942 100644 --- a/plugins/mrcepa0/mrcc.irp.f +++ b/plugins/mrcepa0/mrcc.irp.f @@ -16,7 +16,7 @@ program mrsc2sub psi_coef(i,j) = CI_eigenvectors(i,j) enddo enddo - TOUCH psi_coef + SOFT_TOUCH psi_coef endif call run(N_states,energy) if(do_pt2_end)then