diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 315006ff..85fd40e8 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -225,6 +225,7 @@ logical function is_generable(det1, det2, Nint) integer, external :: searchExc logical, external :: excEq double precision :: phase + integer*2 :: tmp_array(4) is_generable = .false. call get_excitation(det1, det2, exc, degree, phase, Nint) @@ -247,19 +248,20 @@ logical function is_generable(det1, det2, Nint) end if if(h1 + (s1-1)*mo_tot_num < h2 + (s2-1)*mo_tot_num) then - f = searchExc(hh_exists(1,1), (/s1, h1, s2, h2/), hh_shortcut(0)) + tmp_array = (/s1, h1, s2, h2/) else - f = searchExc(hh_exists(1,1), (/s2, h2, s1, h1/), hh_shortcut(0)) + tmp_array = (/s2, h2, s1, h1/) end if - if(f == -1) return - + f = searchExc(hh_exists(1,1), tmp_array, hh_shortcut(0)) + if(p1 + (s1-1)*mo_tot_num < p2 + (s2-1)*mo_tot_num) then - f = searchExc(pp_exists(1,hh_shortcut(f)), (/s1, p1, s2, p2/), hh_shortcut(f+1)-hh_shortcut(f)) + tmp_array = (/s1, p1, s2, p2/) else - f = searchExc(pp_exists(1,hh_shortcut(f)), (/s2, p2, s1, p1/), hh_shortcut(f+1)-hh_shortcut(f)) + tmp_array = (/s2, p2, s1, p1/) end if - - if(f /= -1) is_generable = .true. + f = searchExc(pp_exists(1,hh_shortcut(f)), tmp_array, hh_shortcut(f+1)-hh_shortcut(f)) + + is_generable = (f /= -1) end function @@ -542,187 +544,189 @@ END_PROVIDER END_PROVIDER -BEGIN_PROVIDER [ double precision, dIj_unique, (hh_shortcut(hh_shortcut(0)+1)-1, N_states) ] - implicit none - logical :: ok - integer :: i, j, k, s, II, pp, hh, ind, wk, nex, a_col, at_row - integer, external :: searchDet, unsortedSearchDet - integer(bit_kind) :: myDet(N_int, 2), myMask(N_int, 2) - integer :: N, INFO, AtA_size, r1, r2 - double precision , allocatable:: B(:), AtB(:), AtA_val(:), A_val(:,:), x(:), x_new(:), A_val_mwen(:) - double precision :: t, norm, cx - integer, allocatable :: A_ind(:,:), lref(:), AtA_ind(:), A_ind_mwen(:), col_shortcut(:), N_col(:) - - - - nex = hh_shortcut(hh_shortcut(0)+1)-1 - print *, "TI", nex, N_det_non_ref - allocate(A_ind(N_det_ref+1, nex), A_val(N_det_ref+1, nex)) - allocate(AtA_ind(N_det_ref * nex), AtA_val(N_det_ref * nex)) !!!!! MAY BE TOO SMALL ? !!!!!!!! - allocate(x(nex), AtB(nex)) - allocate(A_val_mwen(nex), A_ind_mwen(nex)) - allocate(N_col(nex), col_shortcut(nex), B(N_det_non_ref)) - allocate (x_new(nex)) - - do s = 1, N_states - - A_val = 0d0 - A_ind = 0 - AtA_ind = 0 - AtA_val = 0d0 - x = 0d0 - AtB = 0d0 - A_val_mwen = 0d0 - A_ind_mwen = 0 - N_col = 0 - col_shortcut = 0 - B = 0d0 - x_new = 0d0 - - !$OMP PARALLEL DO schedule(static,10) default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind) & - !$OMP shared(s, hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref) & - !$OMP private(lref, pp, II, ok, myMask, myDet, ind, wk) - do hh = 1, hh_shortcut(0) - do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 - allocate(lref(N_det_non_ref)) - lref = 0 - do II = 1, N_det_ref - call apply_hole(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) - if(.not. ok) cycle - call apply_particle(myMask, pp_exists(1, pp), myDet, ok, N_int) - if(.not. ok) cycle - ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) - if(ind /= -1) then - lref(psi_non_ref_sorted_idx(ind)) = II - end if - end do - wk = 0 - do i=1, N_det_non_ref - if(lref(i) /= 0) then - wk += 1 - A_val(wk, pp) = psi_ref_coef(lref(i), s) - A_ind(wk, pp) = i - end if - end do - deallocate(lref) - end do - end do - !$OMP END PARALLEL DO - - AtB = 0d0 - AtA_size = 0 - wk = 0 - col_shortcut = 0 - N_col = 0 - !$OMP PARALLEL DO schedule(dynamic, 100) default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref) & - !$OMP private(at_row, a_col, t, i, r1, r2, wk, A_ind_mwen, A_val_mwen) & - !$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s) - do at_row = 1, nex - wk = 0 - if(mod(at_row, 10000) == 0) print *, "AtA", at_row, "/", nex - do i=1,N_det_ref - if(A_ind(i, at_row) == 0) exit - AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_row), s) * A_val(i, at_row) - end do - do a_col = 1, nex - t = 0d0 - r1 = 1 - r2 = 1 - do while ((A_ind(r1, at_row) /= 0).and.(A_ind(r2, a_col) /= 0)) - if(A_ind(r1, at_row) < A_ind(r2, a_col)) then - r1 += 1 - else if(A_ind(r1, at_row) > A_ind(r2, a_col)) then - r2 += 1 - else - t = t - A_val(r1, at_row) * A_val(r2, a_col) - r1 += 1 - r2 += 1 - end if - end do - - if(a_col == at_row) then - t = (t + 1d0) - end if - if(t /= 0d0) then - wk += 1 - A_ind_mwen(wk) = a_col - A_val_mwen(wk) = t - end if - end do - - if(wk /= 0) then - !$OMP CRITICAL - col_shortcut(at_row) = AtA_size+1 - N_col(at_row) = wk - AtA_ind(AtA_size+1:AtA_size+wk) = A_ind_mwen(:wk) - AtA_val(AtA_size+1:AtA_size+wk) = A_val_mwen(:wk) - AtA_size += wk - !$OMP END CRITICAL - end if - end do - - x = AtB - if(AtA_size > size(AtA_val)) stop "SIZA" - print *, "ATA SIZE", ata_size - integer :: iproc, omp_get_thread_num - iproc = omp_get_thread_num() - do i=1,nex - x_new(i) = 0.D0 - enddo - - do k=0,100000 - !$OMP PARALLEL DO default(shared) - do i=1,nex - x_new(i) = AtB(i) - enddo - - !$OMP PARALLEL DO default(shared) private(cx, i) - do a_col = 1, nex - cx = 0d0 - do i=col_shortcut(a_col), col_shortcut(a_col) + N_col(a_col) - 1 - cx += x(AtA_ind(i)) * AtA_val(i) + BEGIN_PROVIDER [ double precision, dIj_unique, (hh_shortcut(hh_shortcut(0)+1)-1, N_states) ] +&BEGIN_PROVIDER [ double precision, rho_mrcc, (N_det_non_ref, N_states) ] + implicit none + logical :: ok + integer :: i, j, k, s, II, pp, hh, ind, wk, nex, a_col, at_row + integer, external :: searchDet, unsortedSearchDet + integer(bit_kind) :: myDet(N_int, 2), myMask(N_int, 2) + integer :: N, INFO, AtA_size, r1, r2 + double precision , allocatable :: AtB(:), AtA_val(:), A_val(:,:), x(:), x_new(:), A_val_mwen(:) + double precision :: t, norm, cx, res + integer, allocatable :: A_ind(:,:), lref(:), AtA_ind(:), A_ind_mwen(:), col_shortcut(:), N_col(:) + + + + nex = hh_shortcut(hh_shortcut(0)+1)-1 + print *, "TI", nex, N_det_non_ref + allocate(A_ind(N_det_ref+1, nex), A_val(N_det_ref+1, nex)) + allocate(AtA_ind(N_det_ref * nex), AtA_val(N_det_ref * nex)) !!!!! MAY BE TOO SMALL ? !!!!!!!! + allocate(x(nex), AtB(nex)) + allocate(A_val_mwen(nex), A_ind_mwen(nex)) + allocate(N_col(nex), col_shortcut(nex)) + allocate (x_new(nex)) + + do s = 1, N_states + + A_val = 0d0 + A_ind = 0 + AtA_ind = 0 + AtA_val = 0d0 + x = 0d0 + A_val_mwen = 0d0 + A_ind_mwen = 0 + N_col = 0 + col_shortcut = 0 + + !$OMP PARALLEL DO schedule(static,10) default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind)& + !$OMP shared(s, hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref)& + !$OMP private(lref, pp, II, ok, myMask, myDet, ind, wk) + do hh = 1, hh_shortcut(0) + do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 + allocate(lref(N_det_non_ref)) + lref = 0 + do II = 1, N_det_ref + call apply_hole(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) + if(.not. ok) cycle + call apply_particle(myMask, pp_exists(1, pp), myDet, ok, N_int) + if(.not. ok) cycle + ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) + if(ind /= -1) then + lref(psi_non_ref_sorted_idx(ind)) = II + end if + end do + wk = 0 + do i=1, N_det_non_ref + if(lref(i) /= 0) then + wk += 1 + A_val(wk, pp) = psi_ref_coef(lref(i), s) + A_ind(wk, pp) = i + end if + end do + deallocate(lref) + end do end do - x_new(a_col) += cx + !$OMP END PARALLEL DO + + AtB = 0d0 + AtA_size = 0 + wk = 0 + col_shortcut = 0 + N_col = 0 + !$OMP PARALLEL DO schedule(dynamic, 100) default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref)& + !$OMP private(at_row, a_col, t, i, r1, r2, wk, A_ind_mwen, A_val_mwen)& + !$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s) + do at_row = 1, nex + wk = 0 + if(mod(at_row, 10000) == 0) print *, "AtA", at_row, "/", nex + do i=1,N_det_ref + if(A_ind(i, at_row) == 0) exit + AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_row), s) * A_val(i, at_row) + end do + do a_col = 1, nex + t = 0d0 + r1 = 1 + r2 = 1 + do while ((A_ind(r1, at_row) /= 0).and.(A_ind(r2, a_col) /= 0)) + if(A_ind(r1, at_row) < A_ind(r2, a_col)) then + r1 += 1 + else if(A_ind(r1, at_row) > A_ind(r2, a_col)) then + r2 += 1 + else + t = t - A_val(r1, at_row) * A_val(r2, a_col) + r1 += 1 + r2 += 1 + end if + end do + + if(a_col == at_row) then + t = (t + 1d0) + end if + if(t /= 0d0) then + wk += 1 + A_ind_mwen(wk) = a_col + A_val_mwen(wk) = t + end if + end do + + if(wk /= 0) then + !$OMP CRITICAL + col_shortcut(at_row) = AtA_size+1 + N_col(at_row) = wk + AtA_ind(AtA_size+1:AtA_size+wk) = A_ind_mwen(:wk) + AtA_val(AtA_size+1:AtA_size+wk) = A_val_mwen(:wk) + AtA_size += wk + !$OMP END CRITICAL + end if + end do + + if(AtA_size > size(AtA_val)) stop "SIZA" + print *, "ATA SIZE", ata_size + integer :: iproc, omp_get_thread_num + iproc = omp_get_thread_num() + do i=1,nex + x(i) = AtB(i) + enddo + + do k=0,100000 + !$OMP PARALLEL default(shared) private(cx, i, j, a_col) + + !$OMP DO + do i=1,N_det_non_ref + rho_mrcc(i,s) = 0.d0 + enddo + !$OMP END DO + + !$OMP DO + do a_col = 1, nex + cx = 0d0 + do i=col_shortcut(a_col), col_shortcut(a_col) + N_col(a_col) - 1 + cx = cx + x(AtA_ind(i)) * AtA_val(i) + end do + x_new(a_col) = AtB(a_col) + cx + end do + !$OMP END DO + + !$OMP END PARALLEL + + res = 0.d0 + do a_col=1,nex + do j=1,N_det_non_ref + i = A_ind(j,a_col) + if (i==0) exit + rho_mrcc(i,s) = rho_mrcc(i,s) + A_val(j,a_col) * X_new(a_col) + enddo + res = res + (X_new(a_col) - X(a_col))**2 + X(a_col) = X_new(a_col) + end do + + if(mod(k, 100) == 0) then + print *, "residu ", k, res + end if + + if(res < 1d-16) exit + end do + + norm = 0.d0 + do i=1,N_det_non_ref + norm = norm + rho_mrcc(i,s)*rho_mrcc(i,s) + enddo + do i=1,N_det_ref + norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s) + enddo + + print *, k, "res : ", res, "norm : ", sqrt(norm) + + dIj_unique(:size(X), s) = X(:) + + do i=1,N_det_non_ref + rho_mrcc(i,s) = psi_non_ref_coef(i,s) / rho_mrcc(i,s) + enddo + end do - !$OMP END PARALLEL DO - double precision :: norm_cas - norm_cas = 0d0 - do i = 1, N_det_ref - norm_cas += psi_ref_coef(i,s)**2 - end do - - norm = 0d0 - t = 0d0 - - do j=1, size(X) - t = t + X_new(j) * X_new(j) - end do - - - t = (1d0 - norm_cas ) / t - x_new = x_new * sqrt(t) - - do j=1, size(X) - norm += (X_new(j) - X(j))**2 - x(j) = x_new(j) - end do - - - if(mod(k, 100) == 0) then - print *, "residu ", k, norm, "norm t", sqrt(t) - end if - - if(norm < 1d-16) exit - end do - print *, "CONVERGENCE : ", norm - - dIj_unique(:size(X), s) = X(:) - - - end do - - - print *, "done" + + print *, "done" END_PROVIDER @@ -767,6 +771,7 @@ double precision function get_dij(det1, det2, s, Nint) integer, external :: searchExc logical, external :: excEq double precision :: phase + integer*2 :: tmp_array(4) get_dij = 0d0 call get_excitation(det1, det2, exc, degree, phase, Nint) @@ -787,18 +792,21 @@ double precision function get_dij(det1, det2, s, Nint) end if if(h1 + (s1-1)*mo_tot_num < h2 + (s2-1)*mo_tot_num) then - f = searchExc(hh_exists(1,1), (/s1, h1, s2, h2/), hh_shortcut(0)) + tmp_array = (/s1, h1, s2, h2/) else - f = searchExc(hh_exists(1,1), (/s2, h2, s1, h1/), hh_shortcut(0)) + tmp_array = (/s2, h2, s1, h1/) end if + f = searchExc(hh_exists(1,1), tmp_array, hh_shortcut(0)) + if(f == -1) return if(p1 + (s1-1)*mo_tot_num < p2 + (s2-1)*mo_tot_num) then - t = searchExc(pp_exists(1,hh_shortcut(f)), (/s1, p1, s2, p2/), hh_shortcut(f+1)-hh_shortcut(f)) + tmp_array = (/s1, p1, s2, p2/) else - t = searchExc(pp_exists(1,hh_shortcut(f)), (/s2, p2, s1, p1/), hh_shortcut(f+1)-hh_shortcut(f)) + tmp_array = (/s2, p2, s1, p1/) end if - + t = searchExc(pp_exists(1,hh_shortcut(f)), tmp_array, hh_shortcut(f+1)-hh_shortcut(f)) + if(t /= -1) then get_dij = dIj_unique(t - 1 + hh_shortcut(f), s) end if