Reduced memory

2024-12-22 04:13:40 +01:00 · 2021-05-21 00:55:28 +02:00 · 2021-05-21 00:55:28 +02:00 · 6333429d20
commit 6333429d20
parent 693b81265e
1 changed files with 78 additions and 103 deletions
--- a/devel/svdwf/buildpsi_diagSVDit_modif_v2.irp.f
+++ b/devel/svdwf/buildpsi_diagSVDit_modif_v2.irp.f
@ -120,7 +120,7 @@ subroutine run
  it_svd_max = 100
  E_prev = 0.d0

-  allocate(H(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique))
+  allocate(H(n_selected,n_selected,n_det_alpha_unique,n_det_beta_unique))
  allocate(H_diag(n_det_alpha_unique,n_det_beta_unique))
  allocate(psi_postsvd(n_det_alpha_unique,n_det_beta_unique))
  do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-6 ) )
@ -149,7 +149,7 @@ subroutine run
      enddo
    enddo
    E0 = E0 + nuclear_repulsion
-    print *,' E0   =', E0
+!    print *,' E0   =', E0

    double precision, allocatable :: eigval0(:)
    double precision, allocatable :: eigvec0(:,:,:)
@ -170,7 +170,7 @@ subroutine run

 !    print *,  ' --- Diag post-SVD --- '
    call lapack_diag(eigval0, eigvec0, H_tmp, n_selected**2, n_selected**2)
-    print *, 'eig =', eigval0(1) + nuclear_repulsion
+!    print *, 'eig =', eigval0(1) + nuclear_repulsion
    deallocate(H_tmp, eigval0)

 !    print *,  ' --- SVD --- '
@ -186,7 +186,7 @@ subroutine run
    E0 = 0.d0
    norm = 0.d0
    do j = 1, n_det_beta_unique
-      do i = 1, n_det_beta_unique
+      do i = 1, n_selected
         E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j)
      enddo
      norm = norm + Dref(j)*Dref(j)
@ -244,7 +244,7 @@ subroutine run
    print '(I5, 3(3X, F20.10))',  it_svd, E0, E0 + Ept2, tol_energy
    E_prev = E0

-    print *,  ' --- SVD --- '
+!    print *,  ' --- SVD --- '
    call perform_newpostSVD(n_det_beta_unique, psi_postsvd, Uref, Vref, Dref)

  end do
@ -334,101 +334,87 @@ subroutine const_H_uv(Uref, Vref, H, H_diag, n_selected)

  implicit none

-  integer, intent(in)  :: n_selected
-  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_beta_unique)
-  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
-  double precision, intent(out) :: H(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique,n_det_beta_unique)
-  double precision, intent(out) :: H_diag(n_det_alpha_unique,n_det_beta_unique)
+  integer, intent(in)            :: n_selected
+  double precision, intent(in)   :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)   :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out)  :: H(n_selected,n_selected, n_det_alpha_unique, n_det_beta_unique)
+  double precision, intent(out)  :: H_diag(n_det_alpha_unique,n_det_beta_unique)

-  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
-  integer                       :: i, j, k, l, degree
-  integer                       :: ii0, jj0, ii, jj, n, m, np, mp
-  integer                       :: nn0, mm0, na, nb, mm, ind_gs
-  integer                       :: p,q,r,s
-  double precision              :: h12, x
+  integer(bit_kind)              :: det1(N_int,2), det2(N_int,2)
+  integer                        :: i, j, k, l, degree
+  integer                        :: ii0, jj0, ii, jj, n, m, np, mp
+  integer                        :: nn0, mm0, na, nb, mm, ind_gs
+  integer                        :: p,q,r,s
+  double precision               :: h12, x
+
+  double precision, allocatable  :: H0(:,:,:,:)
+  double precision, allocatable  :: H1(:,:,:,:)
+  double precision, allocatable  :: tmp3(:,:,:)
+  double precision, allocatable  :: tmp1(:,:), tmp0(:,:)
+  double precision :: c_tmp

-  double precision, allocatable :: H0(:,:,:,:)
-  double precision, allocatable :: H1(:,:,:,:)

  na = n_det_alpha_unique
  nb = n_det_beta_unique

-  allocate( H0(na,nb,na,nb) )
-
  det1(:,1) = psi_det_alpha_unique(:,1)
  det2(:,1) = psi_det_alpha_unique(:,1)
  det1(:,2) = psi_det_beta_unique(:,1)
  det2(:,2) = psi_det_beta_unique(:,1)
  call i_H_j(det1, det2, N_int, h12)

-  H0 = 0.d0
  call wall_time(t0)
- !$OMP PARALLEL DEFAULT(NONE)                                                                         &
- !$OMP          PRIVATE(p,q,r,s,i,j,k,l,det1,det2,degree,h12)  &
- !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, &
- !$OMP                 N_int,Uref,Vref,H0,H1,H)
+  tmp3 = 0.d0

- !$OMP DO
- do l = 1, nb
-   det2(:,2) = psi_det_beta_unique(:,l)
-   do j = 1, nb
-     det1(:,2) = psi_det_beta_unique(:,j)
-     call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
-     if (degree > 2) cycle
-     do k = 1, na
-       det2(:,1) = psi_det_alpha_unique(:,k)
-       do i = 1, na
-         det1(:,1) = psi_det_alpha_unique(:,i)
-         call get_excitation_degree(det1,det2,degree,N_int)
-         if ( degree > 2) cycle
-         call i_H_j(det1, det2, N_int, h12)
-         H0(i,j,k,l) = h12
-       enddo
-     enddo
-   enddo
- enddo
- !$OMP END DO
+  allocate( H0(na,nb,n_selected,n_selected) )
+  allocate (tmp3(nb,nb,nb))
+  H0 = 0.d0

- !$OMP END PARALLEL
- call wall_time(t1)
+  !$OMP PARALLEL DEFAULT(NONE)                                       &
+      !$OMP          PRIVATE(i,j,k,l,m,n,det1,det2,degree,h12,c_tmp,tmp1,tmp0)&
+      !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique,&
+      !$OMP                 N_int,tmp3,Uref,Vref,H_diag,H0,n_selected)

- double precision               :: H0_d(n_det_alpha_unique,n_det_beta_unique)
- double precision               :: H1_d(n_det_alpha_unique,n_det_beta_unique)
- double precision               :: tmp3(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique)
- double precision, allocatable  :: tmp1(:,:), tmp0(:,:)
+  allocate(tmp1(na,na), tmp0(na,na))

- tmp3 = 0.d0
+  do i=1,na
+    do m=1,na
+      tmp1(m,i) = Uref(i,m)
+    enddo
+  enddo

- !$OMP PARALLEL DEFAULT(NONE)                                        &
-     !$OMP          PRIVATE(i,j,k,l,m,det1,det2,degree,h12,tmp1,tmp0)&
-     !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique,&
-     !$OMP                 N_int,tmp3,Uref,Vref,H_diag)
+  !$OMP DO
+  do l = 1, nb
+    det2(:,2) = psi_det_beta_unique(:,l)

- allocate(tmp1(na,na), tmp0(na,na))
+    do j = 1, nb
+      det1(:,2) = psi_det_beta_unique(:,j)

- do i=1,na
-   do m=1,na
-     tmp1(m,i) = Uref(i,m)
-   enddo
- enddo
+      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+      if (degree > 2) cycle

- !$OMP DO
- do l = 1, nb
-   det2(:,2) = psi_det_beta_unique(:,l)
-   do j = 1, nb
-     det1(:,2) = psi_det_beta_unique(:,j)
-     call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
-     if (degree > 2) cycle
-     do k = 1, na
-       det2(:,1) = psi_det_alpha_unique(:,k)
-       do i = 1, na
-         det1(:,1) = psi_det_alpha_unique(:,i)
-         call get_excitation_degree(det1,det2,degree,N_int)
-         if ( degree > 2) cycle
-         call i_H_j(det1, det2, N_int, h12)
-         do m=1,nb
+      do k = 1, na
+        det2(:,1) = psi_det_alpha_unique(:,k)
+
+        do i = 1, na
+          det1(:,1) = psi_det_alpha_unique(:,i)
+
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if ( degree > 2) cycle
+
+          call i_H_j(det1, det2, N_int, h12)
+
+          do m=1,nb
            tmp3(m,j,l) = tmp3(m,j,l) + h12 * tmp1(m,i) * tmp1(m,k)
          enddo
+
+          do n=1,n_selected
+            c_tmp = h12 * Vref(j,n)
+            do m=1,n_selected
+              H0(k,l,m,n) = H0(k,l,m,n) + c_tmp * tmp1(m,i)
+            enddo
+          enddo
+
        enddo
      enddo
    enddo
@ -447,7 +433,7 @@ subroutine const_H_uv(Uref, Vref, H, H_diag, n_selected)
        Vref, size(Vref,1),                                          &
        0.d0, tmp0, size(tmp0,1))

-    do n=1,na
+    do n=1,nb
      H_diag(m,n) = 0.d0
      do j=1,nb
        H_diag(m,n) = H_diag(m,n) + tmp0(j,n) * Vref(j,n)
@ -458,40 +444,29 @@ subroutine const_H_uv(Uref, Vref, H, H_diag, n_selected)
  deallocate(tmp1, tmp0)
  !$OMP END PARALLEL

+  call wall_time(t1)

-  ! (i,j,k,l) -> (j,k,l,p)
-  allocate( H1(nb,na,nb,na) )
-  call DGEMM('T','N', nb * na * nb, na, na, &
-     1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))
-  deallocate( H0 )

-  ! (j,k,l,p) -> (k,l,p,q)
-  allocate( H0(na,nb,na,nb) )
-  call DGEMM('T','N', na * nb * na, nb, nb, &
-     1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H0, size(H0,1)*size(H0,2)*size(H0,3))
-  deallocate( H1 )
-
-  ! (k,l,p,q) -> (l,p,q,r)
-  allocate( H1(nb,na,nb,na) )
-  call DGEMM('T','N', nb * na * nb, na, na, &
-     1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))
+  allocate( H1(nb,n_selected,n_selected,na) )
+  call DGEMM('T','N', nb * n_selected * n_selected, na, na,          &
+      1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))
  deallocate( H0 )

  ! (l,p,q,r) -> (p,q,r,s)
-  call DGEMM('T','N',  na * nb * na, nb, nb, &
-     1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3))
+  call DGEMM('T','N',  n_selected * n_selected * na, nb, nb,         &
+      1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3))

-  do j=1,n_selected
-    do i=1,n_selected
-      print *, H_diag(i,j), H(i,j,i,j)
-    enddo
-  enddo
+!  do j=1,n_selected
+!    do i=1,n_selected
+!      print *, H_diag(i,j), H(i,j,i,j)
+!    enddo
+!  enddo
  deallocate(H1)

  call wall_time(t2)
-  print *, 't=', t1-t0, t2-t1
-  double precision :: t0, t1, t2
-  stop
+!  print *, 't=', t1-t0, t2-t1
+  double precision               :: t0, t1, t2
+!  stop
 end