From 6333429d200be8cdb92678864912f2f0df6249e5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 21 May 2021 00:55:28 +0200 Subject: [PATCH] Reduced memory --- devel/svdwf/buildpsi_diagSVDit_modif_v2.irp.f | 181 ++++++++---------- 1 file changed, 78 insertions(+), 103 deletions(-) diff --git a/devel/svdwf/buildpsi_diagSVDit_modif_v2.irp.f b/devel/svdwf/buildpsi_diagSVDit_modif_v2.irp.f index c5addb1..34411d3 100644 --- a/devel/svdwf/buildpsi_diagSVDit_modif_v2.irp.f +++ b/devel/svdwf/buildpsi_diagSVDit_modif_v2.irp.f @@ -120,7 +120,7 @@ subroutine run it_svd_max = 100 E_prev = 0.d0 - allocate(H(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique)) + allocate(H(n_selected,n_selected,n_det_alpha_unique,n_det_beta_unique)) allocate(H_diag(n_det_alpha_unique,n_det_beta_unique)) allocate(psi_postsvd(n_det_alpha_unique,n_det_beta_unique)) do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-6 ) ) @@ -149,7 +149,7 @@ subroutine run enddo enddo E0 = E0 + nuclear_repulsion - print *,' E0 =', E0 +! print *,' E0 =', E0 double precision, allocatable :: eigval0(:) double precision, allocatable :: eigvec0(:,:,:) @@ -170,7 +170,7 @@ subroutine run ! print *, ' --- Diag post-SVD --- ' call lapack_diag(eigval0, eigvec0, H_tmp, n_selected**2, n_selected**2) - print *, 'eig =', eigval0(1) + nuclear_repulsion +! print *, 'eig =', eigval0(1) + nuclear_repulsion deallocate(H_tmp, eigval0) ! print *, ' --- SVD --- ' @@ -186,7 +186,7 @@ subroutine run E0 = 0.d0 norm = 0.d0 do j = 1, n_det_beta_unique - do i = 1, n_det_beta_unique + do i = 1, n_selected E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j) enddo norm = norm + Dref(j)*Dref(j) @@ -244,7 +244,7 @@ subroutine run print '(I5, 3(3X, F20.10))', it_svd, E0, E0 + Ept2, tol_energy E_prev = E0 - print *, ' --- SVD --- ' +! print *, ' --- SVD --- ' call perform_newpostSVD(n_det_beta_unique, psi_postsvd, Uref, Vref, Dref) end do @@ -334,101 +334,87 @@ subroutine const_H_uv(Uref, Vref, H, H_diag, n_selected) implicit none - integer, intent(in) :: n_selected - double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) - double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) - double precision, intent(out) :: H(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique,n_det_beta_unique) - double precision, intent(out) :: H_diag(n_det_alpha_unique,n_det_beta_unique) + integer, intent(in) :: n_selected + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H(n_selected,n_selected, n_det_alpha_unique, n_det_beta_unique) + double precision, intent(out) :: H_diag(n_det_alpha_unique,n_det_beta_unique) - integer(bit_kind) :: det1(N_int,2), det2(N_int,2) - integer :: i, j, k, l, degree - integer :: ii0, jj0, ii, jj, n, m, np, mp - integer :: nn0, mm0, na, nb, mm, ind_gs - integer :: p,q,r,s - double precision :: h12, x + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree + integer :: ii0, jj0, ii, jj, n, m, np, mp + integer :: nn0, mm0, na, nb, mm, ind_gs + integer :: p,q,r,s + double precision :: h12, x + + double precision, allocatable :: H0(:,:,:,:) + double precision, allocatable :: H1(:,:,:,:) + double precision, allocatable :: tmp3(:,:,:) + double precision, allocatable :: tmp1(:,:), tmp0(:,:) + double precision :: c_tmp - double precision, allocatable :: H0(:,:,:,:) - double precision, allocatable :: H1(:,:,:,:) na = n_det_alpha_unique nb = n_det_beta_unique - allocate( H0(na,nb,na,nb) ) - det1(:,1) = psi_det_alpha_unique(:,1) det2(:,1) = psi_det_alpha_unique(:,1) det1(:,2) = psi_det_beta_unique(:,1) det2(:,2) = psi_det_beta_unique(:,1) call i_H_j(det1, det2, N_int, h12) - H0 = 0.d0 call wall_time(t0) - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(p,q,r,s,i,j,k,l,det1,det2,degree,h12) & - !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, & - !$OMP N_int,Uref,Vref,H0,H1,H) + tmp3 = 0.d0 - !$OMP DO - do l = 1, nb - det2(:,2) = psi_det_beta_unique(:,l) - do j = 1, nb - det1(:,2) = psi_det_beta_unique(:,j) - call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) - if (degree > 2) cycle - do k = 1, na - det2(:,1) = psi_det_alpha_unique(:,k) - do i = 1, na - det1(:,1) = psi_det_alpha_unique(:,i) - call get_excitation_degree(det1,det2,degree,N_int) - if ( degree > 2) cycle - call i_H_j(det1, det2, N_int, h12) - H0(i,j,k,l) = h12 - enddo - enddo - enddo - enddo - !$OMP END DO + allocate( H0(na,nb,n_selected,n_selected) ) + allocate (tmp3(nb,nb,nb)) + H0 = 0.d0 - !$OMP END PARALLEL - call wall_time(t1) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,m,n,det1,det2,degree,h12,c_tmp,tmp1,tmp0)& + !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique,& + !$OMP N_int,tmp3,Uref,Vref,H_diag,H0,n_selected) - double precision :: H0_d(n_det_alpha_unique,n_det_beta_unique) - double precision :: H1_d(n_det_alpha_unique,n_det_beta_unique) - double precision :: tmp3(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique) - double precision, allocatable :: tmp1(:,:), tmp0(:,:) + allocate(tmp1(na,na), tmp0(na,na)) - tmp3 = 0.d0 + do i=1,na + do m=1,na + tmp1(m,i) = Uref(i,m) + enddo + enddo - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,j,k,l,m,det1,det2,degree,h12,tmp1,tmp0)& - !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique,& - !$OMP N_int,tmp3,Uref,Vref,H_diag) + !$OMP DO + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) - allocate(tmp1(na,na), tmp0(na,na)) + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) - do i=1,na - do m=1,na - tmp1(m,i) = Uref(i,m) - enddo - enddo + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if (degree > 2) cycle - !$OMP DO - do l = 1, nb - det2(:,2) = psi_det_beta_unique(:,l) - do j = 1, nb - det1(:,2) = psi_det_beta_unique(:,j) - call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) - if (degree > 2) cycle - do k = 1, na - det2(:,1) = psi_det_alpha_unique(:,k) - do i = 1, na - det1(:,1) = psi_det_alpha_unique(:,i) - call get_excitation_degree(det1,det2,degree,N_int) - if ( degree > 2) cycle - call i_H_j(det1, det2, N_int, h12) - do m=1,nb + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + + call get_excitation_degree(det1,det2,degree,N_int) + if ( degree > 2) cycle + + call i_H_j(det1, det2, N_int, h12) + + do m=1,nb tmp3(m,j,l) = tmp3(m,j,l) + h12 * tmp1(m,i) * tmp1(m,k) enddo + + do n=1,n_selected + c_tmp = h12 * Vref(j,n) + do m=1,n_selected + H0(k,l,m,n) = H0(k,l,m,n) + c_tmp * tmp1(m,i) + enddo + enddo + enddo enddo enddo @@ -447,7 +433,7 @@ subroutine const_H_uv(Uref, Vref, H, H_diag, n_selected) Vref, size(Vref,1), & 0.d0, tmp0, size(tmp0,1)) - do n=1,na + do n=1,nb H_diag(m,n) = 0.d0 do j=1,nb H_diag(m,n) = H_diag(m,n) + tmp0(j,n) * Vref(j,n) @@ -458,40 +444,29 @@ subroutine const_H_uv(Uref, Vref, H, H_diag, n_selected) deallocate(tmp1, tmp0) !$OMP END PARALLEL + call wall_time(t1) - ! (i,j,k,l) -> (j,k,l,p) - allocate( H1(nb,na,nb,na) ) - call DGEMM('T','N', nb * na * nb, na, na, & - 1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3)) - deallocate( H0 ) - ! (j,k,l,p) -> (k,l,p,q) - allocate( H0(na,nb,na,nb) ) - call DGEMM('T','N', na * nb * na, nb, nb, & - 1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H0, size(H0,1)*size(H0,2)*size(H0,3)) - deallocate( H1 ) - - ! (k,l,p,q) -> (l,p,q,r) - allocate( H1(nb,na,nb,na) ) - call DGEMM('T','N', nb * na * nb, na, na, & - 1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3)) + allocate( H1(nb,n_selected,n_selected,na) ) + call DGEMM('T','N', nb * n_selected * n_selected, na, na, & + 1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3)) deallocate( H0 ) ! (l,p,q,r) -> (p,q,r,s) - call DGEMM('T','N', na * nb * na, nb, nb, & - 1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3)) + call DGEMM('T','N', n_selected * n_selected * na, nb, nb, & + 1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3)) - do j=1,n_selected - do i=1,n_selected - print *, H_diag(i,j), H(i,j,i,j) - enddo - enddo +! do j=1,n_selected +! do i=1,n_selected +! print *, H_diag(i,j), H(i,j,i,j) +! enddo +! enddo deallocate(H1) call wall_time(t2) - print *, 't=', t1-t0, t2-t1 - double precision :: t0, t1, t2 - stop +! print *, 't=', t1-t0, t2-t1 + double precision :: t0, t1, t2 +! stop end