From b780a6540abaa08142be8d8912131a50bb41c6ca Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 14 Oct 2016 12:40:29 +0200 Subject: [PATCH 1/2] bugs in mrcepa0_general and mrcc_utils --- plugins/MRCC_Utils/mrcc_utils.irp.f | 5 +++-- plugins/mrcepa0/mrcepa0_general.irp.f | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 79249051..81893781 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -644,7 +644,7 @@ END_PROVIDER AtA_ind = 0 AtA_val = 0d0 x = 0d0 - A_val_mwen = 0d0 + !A_val_mwen = 0d0 N_col = 0 col_shortcut = 0 @@ -699,7 +699,8 @@ END_PROVIDER !$OMP private(at_row, a_col, t, i, r1, r2, wk, A_ind_mwen, A_val_mwen)& !$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s) allocate(A_val_mwen(nex), A_ind_mwen(nex)) - A_ind_mwen = 0 + !A_ind_mwen = 0 + !A_val_mwen = 0d0 !$OMP DO schedule(dynamic, 100) do at_row = 1, nex wk = 0 diff --git a/plugins/mrcepa0/mrcepa0_general.irp.f b/plugins/mrcepa0/mrcepa0_general.irp.f index 1e7ad68d..63f03360 100644 --- a/plugins/mrcepa0/mrcepa0_general.irp.f +++ b/plugins/mrcepa0/mrcepa0_general.irp.f @@ -28,7 +28,7 @@ subroutine run(N_st,energy) enddo SOFT_TOUCH psi_coef ci_energy_dressed call write_double(6,ci_energy_dressed(1),"Final MRCC energy") - call ezfio_set_mrcc_cassd_energy(ci_energy_dressed(1)) + call ezfio_set_mrcepa0_energy(ci_energy_dressed(1)) call save_wavefunction energy(:) = ci_energy_dressed(:) else From 1f4cd4c318847520bf7fd84c7e545c5cadabf870 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 17 Oct 2016 14:40:09 +0200 Subject: [PATCH 2/2] optimized calculation of inactive amplitudes --- plugins/MRCC_Utils/mrcc_utils.irp.f | 500 +++++++++++++++++----------- 1 file changed, 301 insertions(+), 199 deletions(-) diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 81893781..14885153 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -616,204 +616,301 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, dIj_unique, (hh_shortcut(hh_shortcut(0)+1)-1, N_states) ] &BEGIN_PROVIDER [ double precision, rho_mrcc, (N_det_non_ref, N_states) ] - implicit none - logical :: ok - integer :: i, j, k, s, II, pp, hh, ind, wk, nex, a_col, at_row - integer, external :: searchDet, unsortedSearchDet - integer(bit_kind) :: myDet(N_int, 2), myMask(N_int, 2) - integer :: N, INFO, AtA_size, r1, r2 - double precision , allocatable :: AtB(:), AtA_val(:), A_val(:,:), x(:), x_new(:), A_val_mwen(:) - double precision :: t, norm, cx, res - integer, allocatable :: A_ind(:,:), lref(:), AtA_ind(:), A_ind_mwen(:), col_shortcut(:), N_col(:) - double precision :: phase - - - - nex = hh_shortcut(hh_shortcut(0)+1)-1 - print *, "TI", nex, N_det_non_ref - allocate(A_ind(0:N_det_ref+1, nex), A_val(N_det_ref+1, nex)) - allocate(AtA_ind(N_det_ref * nex), AtA_val(N_det_ref * nex)) !!!!! MAY BE TOO SMALL ? !!!!!!!! - allocate(x(nex), AtB(nex)) - allocate(N_col(nex), col_shortcut(nex)) - allocate(x_new(nex)) - - do s = 1, N_states - - A_val = 0d0 - A_ind = 0 - AtA_ind = 0 - AtA_val = 0d0 - x = 0d0 - !A_val_mwen = 0d0 - N_col = 0 - col_shortcut = 0 - - !$OMP PARALLEL default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind)& - !$OMP shared(s, hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref)& - !$OMP private(lref, pp, II, ok, myMask, myDet, ind, phase, wk) - allocate(lref(N_det_non_ref)) - !$OMP DO schedule(static,10) - do hh = 1, hh_shortcut(0) - do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 - lref = 0 - do II = 1, N_det_ref - call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) - if(.not. ok) cycle - call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int) - if(.not. ok) cycle - ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) - if(ind /= -1) then - call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int) - if (phase > 0.d0) then - lref(psi_non_ref_sorted_idx(ind)) = II - else - lref(psi_non_ref_sorted_idx(ind)) = -II - endif - end if - end do - wk = 0 - do i=1, N_det_non_ref - if(lref(i) > 0) then - wk += 1 - A_val(wk, pp) = psi_ref_coef(lref(i), s) - A_ind(wk, pp) = i - else if(lref(i) < 0) then - wk += 1 - A_val(wk, pp) = -psi_ref_coef(-lref(i), s) - A_ind(wk, pp) = i - end if - end do - A_ind(0,pp) = wk - end do - end do - !$OMP END DO - deallocate(lref) - !$OMP END PARALLEL - print *, 'Done building A_val, A_ind' - - AtB = 0d0 - AtA_size = 0 - col_shortcut = 0 - N_col = 0 - !$OMP PARALLEL default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref)& - !$OMP private(at_row, a_col, t, i, r1, r2, wk, A_ind_mwen, A_val_mwen)& - !$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s) - allocate(A_val_mwen(nex), A_ind_mwen(nex)) - !A_ind_mwen = 0 - !A_val_mwen = 0d0 - !$OMP DO schedule(dynamic, 100) - do at_row = 1, nex - wk = 0 - if(mod(at_row, 10000) == 0) print *, "AtA", at_row, "/", nex - do i=1,A_ind(0,at_row) - AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_row), s) * A_val(i, at_row) - end do + implicit none + logical :: ok + integer :: i, j, k, s, II, pp, ppp, hh, ind, wk, nex, a_col, at_row + integer, external :: searchDet, unsortedSearchDet + integer(bit_kind) :: myDet(N_int, 2), myMask(N_int, 2) + integer :: N, INFO, AtA_size, r1, r2 + double precision , allocatable :: AtB(:), AtA_val(:), A_val(:,:), x(:), x_new(:), A_val_mwen(:) + double precision :: t, norm, cx, res + integer, allocatable :: A_ind(:,:), lref(:), AtA_ind(:), A_ind_mwen(:), col_shortcut(:), N_col(:) + double precision :: phase + + + integer, allocatable :: pathTo(:), active_hh_idx(:), active_pp_idx(:) + logical, allocatable :: active(:) + double precision, allocatable :: rho_mrcc_init(:,:) + integer :: nactive + + nex = hh_shortcut(hh_shortcut(0)+1)-1 + print *, "TI", nex, N_det_non_ref + + allocate(pathTo(N_det_non_ref), active(nex)) + allocate(active_pp_idx(nex), active_hh_idx(nex)) + allocate(rho_mrcc_init(N_det_non_ref, N_states)) + + pathTo = 0 + active = .false. + nactive = 0 + + + do hh = 1, hh_shortcut(0) + do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 + do II = 1, N_det_ref + call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) + if(.not. ok) cycle + call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int) + if(.not. ok) cycle + ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) + if(ind == -1) cycle + ind = psi_non_ref_sorted_idx(ind) + if(pathTo(ind) == 0) then + pathTo(ind) = pp + else + active(pp) = .true. + active(pathTo(ind)) = .true. + end if + end do + end do + end do + + do hh = 1, hh_shortcut(0) + do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 + if(active(pp)) then + nactive = nactive + 1 + active_hh_idx(nactive) = hh + active_pp_idx(nactive) = pp + end if + end do + end do + + print *, nactive, "inact/", size(active) + + allocate(A_ind(0:N_det_ref+1, nactive), A_val(N_det_ref+1, nactive)) + allocate(AtA_ind(N_det_ref * nactive), AtA_val(N_det_ref * nactive)) + allocate(x(nex), AtB(nex)) + allocate(N_col(nactive), col_shortcut(nactive)) + allocate(x_new(nex)) + - do a_col = 1, nex - t = 0d0 - r1 = 1 - r2 = 1 - do while ((A_ind(r1, at_row) /= 0).and.(A_ind(r2, a_col) /= 0)) - if(A_ind(r1, at_row) > A_ind(r2, a_col)) then - r2 = r2+1 - else if(A_ind(r1, at_row) < A_ind(r2, a_col)) then - r1 = r1+1 - else - t = t - A_val(r1, at_row) * A_val(r2, a_col) - r1 = r1+1 - r2 = r2+1 - end if - end do - - if(a_col == at_row) then - t = t + 1.d0 - end if - if(t /= 0.d0) then - wk += 1 - A_ind_mwen(wk) = a_col - A_val_mwen(wk) = t - end if - end do - - if(wk /= 0) then - !$OMP CRITICAL - col_shortcut(at_row) = AtA_size+1 - N_col(at_row) = wk - if (AtA_size+wk > size(AtA_ind,1)) then - print *, AtA_size+wk , size(AtA_ind,1) - stop 'too small' - endif - do i=1,wk - AtA_ind(AtA_size+i) = A_ind_mwen(i) - AtA_val(AtA_size+i) = A_val_mwen(i) - enddo - AtA_size += wk - !$OMP END CRITICAL - end if - end do - !$OMP END DO NOWAIT - deallocate (A_ind_mwen, A_val_mwen) - !$OMP END PARALLEL - - if(AtA_size > size(AtA_val)) stop "SIZA" - print *, "ATA SIZE", ata_size - do i=1,nex - x(i) = AtB(i) - enddo - - double precision :: factor, resold - factor = 1.d0 - resold = huge(1.d0) - do k=0,100000 - !$OMP PARALLEL default(shared) private(cx, i, j, a_col) - - !$OMP DO - do i=1,N_det_non_ref - rho_mrcc(i,s) = 0.d0 - enddo - !$OMP END DO - - !$OMP DO - do a_col = 1, nex - cx = 0d0 - do i=col_shortcut(a_col), col_shortcut(a_col) + N_col(a_col) - 1 - cx = cx + x(AtA_ind(i)) * AtA_val(i) - end do - x_new(a_col) = AtB(a_col) + cx * factor - end do - !$OMP END DO - - !$OMP END PARALLEL - - res = 0.d0 - do a_col=1,nex - res = res + (X_new(a_col) - X(a_col))*(X_new(a_col) - X(a_col)) - end do - - if (res < resold) then - do a_col=1,nex - do j=1,N_det_non_ref - i = A_ind(j,a_col) - if (i==0) exit - rho_mrcc(i,s) = rho_mrcc(i,s) + A_val(j,a_col) * X_new(a_col) - enddo - X(a_col) = X_new(a_col) - end do -! factor = 1.d0 - else - factor = -factor * 0.5d0 - endif - resold = res + + do s = 1, N_states + + A_val = 0d0 + A_ind = 0 + AtA_ind = 0 + AtB = 0d0 + AtA_val = 0d0 + x = 0d0 + N_col = 0 + col_shortcut = 0 + + !$OMP PARALLEL default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind)& + !$OMP shared(s, hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref)& + !$OMP shared(active, active_hh_idx, active_pp_idx, nactive)& + !$OMP private(lref, pp, II, ok, myMask, myDet, ind, phase, wk, ppp, hh) + allocate(lref(N_det_non_ref)) + !$OMP DO schedule(static,10) + do ppp=1,nactive + pp = active_pp_idx(ppp) + hh = active_hh_idx(ppp) + lref = 0 + do II = 1, N_det_ref + call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) + if(.not. ok) cycle + call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int) + if(.not. ok) cycle + ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) + if(ind /= -1) then + call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int) + if (phase > 0.d0) then + lref(psi_non_ref_sorted_idx(ind)) = II + else + lref(psi_non_ref_sorted_idx(ind)) = -II + endif + end if + end do + wk = 0 + do i=1, N_det_non_ref + if(lref(i) > 0) then + wk += 1 + A_val(wk, ppp) = psi_ref_coef(lref(i), s) + A_ind(wk, ppp) = i + else if(lref(i) < 0) then + wk += 1 + A_val(wk, ppp) = -psi_ref_coef(-lref(i), s) + A_ind(wk, ppp) = i + end if + end do + A_ind(0,ppp) = wk + end do + !$OMP END DO + deallocate(lref) + !$OMP END PARALLEL - if(mod(k, 100) == 0) then - print *, "res", k, res - end if - - if(res < 1d-8) exit - end do - ! rho_mrcc now contains A.X - norm = 0.d0 + print *, 'Done building A_val, A_ind' + + AtA_size = 0 + col_shortcut = 0 + N_col = 0 + integer :: a_coll, at_roww + + + !$OMP PARALLEL default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref)& + !$OMP private(at_row, a_col, t, i, j, r1, r2, wk, A_ind_mwen, A_val_mwen, a_coll, at_roww)& + !$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s, nactive, active_pp_idx) + allocate(A_val_mwen(nex), A_ind_mwen(nex)) + + !$OMP DO schedule(dynamic, 100) + do at_roww = 1, nactive ! nex + at_row = active_pp_idx(at_roww) + wk = 0 + if(mod(at_roww, 100) == 0) print *, "AtA", at_row, "/", nex + do i=1,A_ind(0,at_roww) + j = active_pp_idx(i) + AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_roww), s) * A_val(i, at_roww) + end do + + do a_coll = 1, nactive + a_col = active_pp_idx(a_coll) + t = 0d0 + r1 = 1 + r2 = 1 + do while ((A_ind(r1, at_roww) /= 0).and.(A_ind(r2, a_coll) /= 0)) + if(A_ind(r1, at_roww) > A_ind(r2, a_coll)) then + r2 = r2+1 + else if(A_ind(r1, at_roww) < A_ind(r2, a_coll)) then + r1 = r1+1 + else + t = t - A_val(r1, at_roww) * A_val(r2, a_coll) + r1 = r1+1 + r2 = r2+1 + end if + end do + + if(a_col == at_row) then + t = t + 1.d0 + end if + if(t /= 0.d0) then + wk += 1 + A_ind_mwen(wk) = a_col + A_val_mwen(wk) = t + end if + end do + + if(wk /= 0) then + !$OMP CRITICAL + col_shortcut(at_roww) = AtA_size+1 + N_col(at_roww) = wk + if (AtA_size+wk > size(AtA_ind,1)) then + print *, AtA_size+wk , size(AtA_ind,1) + stop 'too small' + endif + do i=1,wk + AtA_ind(AtA_size+i) = A_ind_mwen(i) + AtA_val(AtA_size+i) = A_val_mwen(i) + enddo + AtA_size += wk + !$OMP END CRITICAL + end if + end do + !$OMP END DO NOWAIT + deallocate (A_ind_mwen, A_val_mwen) + !$OMP END PARALLEL + + print *, "ATA SIZE", ata_size + x = 0d0 + + + do a_coll = 1, nactive + a_col = active_pp_idx(a_coll) + X(a_col) = AtB(a_col) + end do + + rho_mrcc_init = 0d0 + + allocate(lref(N_det_ref)) + !$OMP PARALLEL DO default(shared) schedule(static, 1) & + !$OMP private(lref, hh, pp, II, myMask, myDet, ok, ind, phase) + do hh = 1, hh_shortcut(0) + do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 + if(active(pp)) cycle + lref = 0 + do II=1,N_det_ref + call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) + if(.not. ok) cycle + call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int) + if(.not. ok) cycle + ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) + if(ind == -1) cycle + ind = psi_non_ref_sorted_idx(ind) + call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int) + X(pp) += psi_ref_coef(II,s)**2 + AtB(pp) += psi_non_ref_coef(ind, s) * psi_ref_coef(II, s) * phase + lref(II) = ind + if(phase < 0d0) lref(II) = -ind + end do + X(pp) = AtB(pp) / X(pp) + do II=1,N_det_ref + if(lref(II) > 0) then + rho_mrcc_init(lref(II),s) = psi_ref_coef(II,s) * X(pp) + else if(lref(II) < 0) then + rho_mrcc_init(-lref(II),s) = -psi_ref_coef(II,s) * X(pp) + end if + end do + end do + end do + !$OMP END PARALLEL DO + + x_new = x + + double precision :: factor, resold + factor = 1.d0 + resold = huge(1.d0) + do k=0,100000 + !$OMP PARALLEL default(shared) private(cx, i, j, a_col, a_coll) + + !$OMP DO + do i=1,N_det_non_ref + rho_mrcc(i,s) = rho_mrcc_init(i,s) ! 0d0 + enddo + !$OMP END DO + + !$OMP DO + do a_coll = 1, nactive !: nex + a_col = active_pp_idx(a_coll) + cx = 0d0 + do i=col_shortcut(a_coll), col_shortcut(a_coll) + N_col(a_coll) - 1 + cx = cx + x(AtA_ind(i)) * AtA_val(i) + end do + x_new(a_col) = AtB(a_col) + cx * factor + end do + !$OMP END DO + + !$OMP END PARALLEL + + res = 0.d0 + + + if (res < resold) then + do a_coll=1,nactive ! nex + a_col = active_pp_idx(a_coll) + do j=1,N_det_non_ref + i = A_ind(j,a_coll) + if (i==0) exit + rho_mrcc(i,s) = rho_mrcc(i,s) + A_val(j,a_coll) * X_new(a_col) + enddo + res = res + (X_new(a_col) - X(a_col))*(X_new(a_col) - X(a_col)) + X(a_col) = X_new(a_col) + end do + factor = 1.d0 + else + factor = -factor * 0.5d0 + endif + resold = res + + if(mod(k, 5) == 0) then + print *, "res ", k, res + end if + + if(res < 1d-12) exit + end do + + + + norm = 0.d0 do i=1,N_det_non_ref norm = norm + rho_mrcc(i,s)*rho_mrcc(i,s) enddo @@ -826,7 +923,7 @@ END_PROVIDER print *, k, "res : ", res, "norm : ", sqrt(norm) - dIj_unique(:size(X), s) = X(:) + !dIj_unique(:size(X), s) = X(:) norm = 0.d0 double precision :: f @@ -871,12 +968,14 @@ END_PROVIDER enddo ! rho_mrcc now contains the product of the scaling factors and the ! normalization constant - - end do - + + dIj_unique(:size(X), s) = X(:) + end do END_PROVIDER + + BEGIN_PROVIDER [ double precision, dij, (N_det_ref, N_det_non_ref, N_states) ] integer :: s,i,j double precision, external :: get_dij_index @@ -1142,3 +1241,6 @@ subroutine apply_particle_local(det, exc, res, ok, Nint) ok = .true. end subroutine + + +