qp_plugins_scemama/devel/svdwf/buildpsi_diagSVDit_Anthony_v1.irp.f

program buildpsi_diagSVDit_Anthony_v1

  implicit none

  BEGIN_DOC
  ! perturbative approach to build psi_postsvd
  END_DOC

  read_wf = .True.
  TOUCH read_wf

  PROVIDE N_int

  call run()
end


subroutine run

  USE OMP_LIB

  implicit none

  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
  integer                       :: degree, i_state
  double precision              :: h12

  integer                       :: i, j, k, l, ii, jj, na, nb

  double precision              :: norm_psi, inv_sqrt_norm_psi
  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)

  double precision              :: err0, err_tmp, e_tmp, E0, overlap, E0_old, tol_energy
  double precision              :: ctmp, htmp, Ept2
  double precision              :: E0_postsvd, overlap_postsvd, E_prev
  double precision              :: norm_coeff_psi, inv_sqrt_norm_coeff_psi
  double precision              :: overlapU, overlapU_mat, overlapV, overlapV_mat, overlap_psi

  double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:), H(:,:,:,:)
  double precision, allocatable :: psi_postsvd(:,:), coeff_psi_perturb(:)

  integer                       :: n_TSVD, n_FSVD, n_selected, n_toselect, n_tmp, it_svd, it_svd_max
  integer                       :: n_selected2
  integer,          allocatable :: numalpha_selected(:), numbeta_selected(:)
  integer,          allocatable :: numalpha_toselect(:), numbeta_toselect(:)
  integer,          allocatable :: numalpha_tmp(:), numbeta_tmp(:)

  integer(kind=8)               :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir
  real(kind=8)                  :: W_tot_time, W_tot_time_it
  real(kind=8)                  :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it
  real(kind=8)                  :: CPU_tot_time, CPU_tot_time_it
  real(kind=8)                  :: speedup, speedup_it
  integer                       :: nb_taches

  !$OMP PARALLEL
  nb_taches = OMP_GET_NUM_THREADS()
  !$OMP END PARALLEL

  call CPU_TIME(CPU_tbeg)
  call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)

  i_state = 1

  det1(:,1) = psi_det_alpha_unique(:,1)
  det2(:,1) = psi_det_alpha_unique(:,1)
  det1(:,2) = psi_det_beta_unique(:,1)
  det2(:,2) = psi_det_beta_unique(:,1)
  call i_H_j(det1, det2, N_int, h12)

  ! ---------------------------------------------------------------------------------------
  !                      construct the initial CISD matrix

  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
  print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique
  print *, ' N det    :', N_det
  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'

  norm_psi = 0.d0
  do k = 1, N_det
    norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) &
                        * psi_bilinear_matrix_values(k,i_state)
  enddo
  print *, ' initial norm = ', norm_psi

  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
  Aref(:,:) = 0.d0
  do k = 1, N_det
    i         = psi_bilinear_matrix_rows(k)
    j         = psi_bilinear_matrix_columns(k)
    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
  enddo

  ! ---------------------------------------------------------------------------------------


  ! ---------------------------------------------------------------------------------------
  !                                  perform a Full SVD

  allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) )
  !allocate( Dref(max(n_det_beta_unique,n_det_alpha_unique)) )
  allocate( Dref(min(n_det_beta_unique,n_det_alpha_unique)) )
  allocate( Vref(n_det_beta_unique,n_det_beta_unique)  )
  allocate( Vtref(n_det_beta_unique,n_det_beta_unique) )
  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1)          &
       , n_det_alpha_unique, n_det_beta_unique)

  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '

  do l = 1, n_det_beta_unique
    do i = 1, n_det_beta_unique
      Vref(i,l) = Vtref(l,i)
    enddo
  enddo
  deallocate( Vtref )

  ! Truncated rank
  n_TSVD      = 20
  n_selected  = n_TSVD
  call write_int(6,n_TSVD, 'Rank of psi')


  !________________________________________________________________________________________________________
  !
  !                                   loop over SVD iterations
  !________________________________________________________________________________________________________

  tol_energy = 1.d0
  it_svd     = 0
  it_svd_max = 100
  E_prev     = 0.d0

  allocate(H(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique))
  allocate(psi_postsvd(n_det_alpha_unique,n_det_beta_unique))
  do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) )

    call CPU_TIME(CPU_tbeg_it)
    call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir)

    it_svd = it_svd + 1
    print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +'
    print*, '                                                                         '
    print*, '                                                                         '
    print*, '                                                                         '
    print*, ' iteration', it_svd

    double precision :: norm
    norm = 0.d0
    do j = 1, n_selected
      norm = norm + Dref(j)*Dref(j)
    enddo
    Dref = Dref / dsqrt(norm)

    call const_H_uv(Uref, Vref, H)

    E0 = 0.d0
    do j = 1, n_selected
      do i = 1, n_selected
         E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j)
      enddo
    enddo
    double precision :: E0_av, E0_ap, E0pt2
    E0_av = E0 + nuclear_repulsion
    print *,' E0 (avant SVD) =', E0_av
    print *,  ''

    double precision, allocatable :: eigval0(:)
    double precision, allocatable :: eigvec0(:,:,:)
    double precision, allocatable :: H_tmp(:,:,:,:)

    allocate( H_tmp(n_selected,n_selected,n_selected,n_selected) )
    do l=1,n_selected
     do k=1,n_selected
      do j=1,n_selected
       do i=1,n_selected
        H_tmp(i,j,k,l) = H(i,j,k,l)
       enddo
      enddo
     enddo
    enddo
    allocate( eigval0(n_selected**2),eigvec0(n_selected,n_selected,n_selected**2))
    eigvec0 = 0.d0

    call lapack_diag(eigval0, eigvec0, H_tmp, n_selected**2, n_selected**2)
    E0_postsvd = eigval0(1) + nuclear_repulsion
    print*, ' postsvd energy  = ', E0_postsvd
    deallocate(H_tmp, eigval0)

    Dref = 0.d0
    call perform_newpostSVD(n_selected, eigvec0(1,1,1), Uref, Vref, Dref)
    deallocate(eigvec0)

    print *,  ' --- Compute H --- '
    call const_H_uv(Uref, Vref, H)

    E0 = 0.d0
    norm = 0.d0
    do j = 1, n_det_beta_unique
      do i = 1, n_det_beta_unique
         E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j)
      enddo
      norm = norm + Dref(j)*Dref(j)
    enddo
    E0_ap = E0 + nuclear_repulsion
    print *,' E0 (apres SVD) =', E0_ap

    psi_postsvd = 0.d0
    do i=1,n_selected
      psi_postsvd(i,i) = Dref(i)
    enddo

    E0   = E0_ap
    Ept2 = 0.d0
    do j=1,n_selected
      do i=n_selected+1,n_det_alpha_unique
        ctmp = 0.d0
        do l=1,n_selected
          do k=1,n_selected
            ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l)
          enddo
        enddo
        psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
        Ept2            += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
      enddo
    enddo
    do j=n_selected+1,n_det_beta_unique
      do i=1,n_selected
        ctmp = 0.d0
        do l=1,n_selected
          do k=1,n_selected
            ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l)
          enddo
        enddo
        psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
        Ept2            += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
      enddo
    enddo
    do j=n_selected+1,n_det_beta_unique
      do i=n_selected+1,n_det_alpha_unique
        ctmp = 0.d0
        do l=1,n_selected
          do k=1,n_selected
            ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l)
          enddo
        enddo
        psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
        Ept2            += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
      enddo
    enddo
    E0pt2 = E0_ap + ept2
    print *, ' perturb energy = ', E0pt2, ept2

    tol_energy = dabs(E_prev - E0_ap)
    E_prev = E0_ap

    call perform_newpostSVD(n_det_beta_unique, psi_postsvd, Uref, Vref, Dref)

    write(44,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0_ap+Ept2


    call CPU_TIME(CPU_tend_it)
    call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir)
    CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it
    W_tot_time_it   = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8)
    speedup_it      = CPU_tot_time_it / W_tot_time_it
    print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /,  &
              & 3X, "CPU time     = ", 1PE10.3, " min.", /,  &
              & 3X, "speed up     = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it


  end do


  deallocate( Uref, Vref, Dref )


  call CPU_TIME(CPU_tend)
  call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
  CPU_tot_time = CPU_tend - CPU_tbeg
  W_tot_time   = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
  speedup      = CPU_tot_time / W_tot_time
  print *,' ___________________________________________________________________'
  print '(//,3X,"Execution avec ",i2," threads")',nb_taches
  print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /,  &
            & 3X, "CPU time     = ", 1PE10.3, " min.", /,  &
            & 3X, "speed up     = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup
  print *,' ___________________________________________________________________'


end


subroutine perform_newpostSVD(n_selected, psi_postsvd, Uref, Vref, Dref)

  USE OMP_LIB

  integer, intent(in)             :: n_selected
  double precision, intent(in)    :: psi_postsvd(n_selected,n_selected)
  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
  double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique))

  integer                         :: mm, nn, i, j, ii0, ii, l, jj, na, nb
  double precision                :: err0, err_norm, err_tmp, norm_tmp
  double precision                :: overlapU_mat, overlapV_mat, overlapU, overlapV
  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)

  mm = n_det_alpha_unique
  nn = n_det_beta_unique

  allocate( U_svd(mm,n_selected) , V_svd(nn,n_selected) , S_mat(n_selected,n_selected) )

  U_svd(1:mm,1:n_selected) = Uref(1:mm,1:n_selected)
  V_svd(1:nn,1:n_selected) = Vref(1:nn,1:n_selected)
  S_mat(1:n_selected,1:n_selected) = psi_postsvd(1:n_selected,1:n_selected)

  ! first compute S_mat x transpose(V_svd)
  allocate( SxVt(n_selected,nn) )
  call dgemm( 'N', 'T', n_selected, nn, n_selected, 1.d0     &
             , S_mat     , size(S_mat,1)     &
             , V_svd     , size(V_svd,1)     &
             , 0.d0, SxVt, size(SxVt ,1)     )
  deallocate(S_mat)

  ! then compute U_svd x SxVt
  allocate( A_newsvd(mm,nn) )
  call dgemm( 'N', 'N', mm, nn, n_selected, 1.d0            &
             , U_svd         , size(U_svd   ,1)     &
             , SxVt          , size(SxVt    ,1)     &
             , 0.d0, A_newsvd, size(A_newsvd,1)     )
  deallocate( SxVt )

  ! perform new SVD
  allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) )
  call svd_s( A_newsvd, size(A_newsvd,1), &
              U_newsvd, size(U_newsvd,1), &
              D_newsvd, &
              Vt_newsvd, size(Vt_newsvd,1), &
              mm, nn)
  deallocate(A_newsvd)

  allocate( V_newsvd(nn,nn) )
  do l = 1, nn
    do j = 1, nn
      V_newsvd(j,l) = Vt_newsvd(l,j)
    enddo
  enddo
  deallocate(Vt_newsvd)


  !do l = 1, n_selected
  !  Dref(l)   = D_newsvd(l)
  !  Uref(1:mm,l) = U_newsvd(1:mm,l)
  !  Vref(1:nn,l) = V_newsvd(1:nn,l)
  !enddo
  Dref(1:n_selected) = D_newsvd(1:n_selected)
  Uref(1:mm,1:mm)    = U_newsvd(1:mm,1:mm)
  Vref(1:nn,1:nn)    = V_newsvd(1:nn,1:nn)


  deallocate(U_newsvd)
  deallocate(V_newsvd)
  deallocate(D_newsvd)

end subroutine perform_newpostSVD


subroutine const_H_uv(Uref, Vref, H)

  USE OMP_LIB

  implicit none

  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_beta_unique)
  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
  double precision, intent(out) :: H(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique,n_det_beta_unique)

  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
  integer                       :: i, j, k, l, degree
  integer                       :: ii0, jj0, ii, jj, n, m, np, mp
  integer                       :: nn0, mm0, na, nb, mm, ind_gs
  integer                       :: p,q,r,s
  double precision              :: h12, x

  double precision, allocatable :: H0(:,:,:,:)
  double precision, allocatable :: H1(:,:,:,:)

  na = n_det_alpha_unique
  nb = n_det_beta_unique

  allocate( H0(na,nb,na,nb) )
  allocate( H1(nb,na,nb,na) )

  H0 = 0.d0
  call wall_time(t0)
 !$OMP PARALLEL DEFAULT(NONE)                                                                         &
 !$OMP          PRIVATE(p,q,r,s,i,j,k,l,det1,det2,degree,h12)  &
 !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, &
 !$OMP                 N_int,Uref,Vref,H0,H1,H)

 !$OMP DO
 do l = 1, nb
   det2(:,2) = psi_det_beta_unique(:,l)
   do j = 1, nb
     det1(:,2) = psi_det_beta_unique(:,j)
     call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
     if (degree > 2) cycle
     do k = 1, na
       det2(:,1) = psi_det_alpha_unique(:,k)
       do i = 1, na
         det1(:,1) = psi_det_alpha_unique(:,i)
         call get_excitation_degree(det1,det2,degree,N_int)
         if ( degree > 2) cycle
         call i_H_j(det1, det2, N_int, h12)
         H0(i,j,k,l) = h12
       enddo
     enddo
   enddo
 enddo
 !$OMP END DO

 !$OMP END PARALLEL

  call wall_time(t1)
  ! (i,j,k,l) -> (j,k,l,p)
  call DGEMM('T','N', nb * na * nb, na, na, &
     1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))

  ! (j,k,l,p) -> (k,l,p,q)
  call DGEMM('T','N', na * nb * na, nb, nb, &
     1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H0, size(H0,1)*size(H0,2)*size(H0,3))

  ! (k,l,p,q) -> (l,p,q,r)
  call DGEMM('T','N', nb * na * nb, na, na, &
     1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))

  ! (l,p,q,r) -> (p,q,r,s)
  call DGEMM('T','N',  na * nb * na, nb, nb, &
     1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3))
  call wall_time(t2)
  print *, t1-t0, t2-t1
  double precision :: t0, t1, t2

  deallocate(H1,H0)

end
svd save 2021-07-28 17:19:18 +02:00			`program buildpsi_diagSVDit_Anthony_v1`

			`implicit none`

			`BEGIN_DOC`
			`! perturbative approach to build psi_postsvd`
			`END_DOC`

			`read_wf = .True.`
			`TOUCH read_wf`

			`PROVIDE N_int`

			`call run()`
			`end`


			`subroutine run`

			`USE OMP_LIB`

			`implicit none`

			`integer(bit_kind) :: det1(N_int,2), det2(N_int,2)`
			`integer :: degree, i_state`
			`double precision :: h12`

			`integer :: i, j, k, l, ii, jj, na, nb`

			`double precision :: norm_psi, inv_sqrt_norm_psi`
			`double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)`

			`double precision :: err0, err_tmp, e_tmp, E0, overlap, E0_old, tol_energy`
			`double precision :: ctmp, htmp, Ept2`
			`double precision :: E0_postsvd, overlap_postsvd, E_prev`
			`double precision :: norm_coeff_psi, inv_sqrt_norm_coeff_psi`
			`double precision :: overlapU, overlapU_mat, overlapV, overlapV_mat, overlap_psi`

			`double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:), H(:,:,:,:)`
			`double precision, allocatable :: psi_postsvd(:,:), coeff_psi_perturb(:)`

			`integer :: n_TSVD, n_FSVD, n_selected, n_toselect, n_tmp, it_svd, it_svd_max`
			`integer :: n_selected2`
			`integer, allocatable :: numalpha_selected(:), numbeta_selected(:)`
			`integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:)`
			`integer, allocatable :: numalpha_tmp(:), numbeta_tmp(:)`

			`integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir`
			`real(kind=8) :: W_tot_time, W_tot_time_it`
			`real(kind=8) :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it`
			`real(kind=8) :: CPU_tot_time, CPU_tot_time_it`
			`real(kind=8) :: speedup, speedup_it`
			`integer :: nb_taches`

			`!$OMP PARALLEL`
			`nb_taches = OMP_GET_NUM_THREADS()`
			`!$OMP END PARALLEL`

			`call CPU_TIME(CPU_tbeg)`
			`call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)`

			`i_state = 1`

			`det1(:,1) = psi_det_alpha_unique(:,1)`
			`det2(:,1) = psi_det_alpha_unique(:,1)`
			`det1(:,2) = psi_det_beta_unique(:,1)`
			`det2(:,2) = psi_det_beta_unique(:,1)`
			`call i_H_j(det1, det2, N_int, h12)`

			`! ---------------------------------------------------------------------------------------`
			`! construct the initial CISD matrix`

			`print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'`
			`print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique`
			`print *, ' N det :', N_det`
			`print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'`

			`norm_psi = 0.d0`
			`do k = 1, N_det`
			`norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) &`
			`* psi_bilinear_matrix_values(k,i_state)`
			`enddo`
			`print *, ' initial norm = ', norm_psi`

			`allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )`
			`Aref(:,:) = 0.d0`
			`do k = 1, N_det`
			`i = psi_bilinear_matrix_rows(k)`
			`j = psi_bilinear_matrix_columns(k)`
			`Aref(i,j) = psi_bilinear_matrix_values(k,i_state)`
			`enddo`

			`! ---------------------------------------------------------------------------------------`


			`! ---------------------------------------------------------------------------------------`
			`! perform a Full SVD`

			`allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) )`
			`!allocate( Dref(max(n_det_beta_unique,n_det_alpha_unique)) )`
			`allocate( Dref(min(n_det_beta_unique,n_det_alpha_unique)) )`
			`allocate( Vref(n_det_beta_unique,n_det_beta_unique) )`
			`allocate( Vtref(n_det_beta_unique,n_det_beta_unique) )`
			`call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1) &`
			`, n_det_alpha_unique, n_det_beta_unique)`

			`print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '`

			`do l = 1, n_det_beta_unique`
			`do i = 1, n_det_beta_unique`
			`Vref(i,l) = Vtref(l,i)`
			`enddo`
			`enddo`
			`deallocate( Vtref )`

			`! Truncated rank`
			`n_TSVD = 20`
			`n_selected = n_TSVD`
			`call write_int(6,n_TSVD, 'Rank of psi')`


			`!________________________________________________________________________________________________________`
			`!`
			`! loop over SVD iterations`
			`!________________________________________________________________________________________________________`

			`tol_energy = 1.d0`
			`it_svd = 0`
			`it_svd_max = 100`
			`E_prev = 0.d0`

			`allocate(H(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique))`
			`allocate(psi_postsvd(n_det_alpha_unique,n_det_beta_unique))`
			`do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) )`

			`call CPU_TIME(CPU_tbeg_it)`
			`call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir)`

			`it_svd = it_svd + 1`
			`print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +'`
			`print*, ' '`
			`print*, ' '`
			`print*, ' '`
			`print*, ' iteration', it_svd`

			`double precision :: norm`
			`norm = 0.d0`
			`do j = 1, n_selected`
			`norm = norm + Dref(j)*Dref(j)`
			`enddo`
			`Dref = Dref / dsqrt(norm)`

			`call const_H_uv(Uref, Vref, H)`

			`E0 = 0.d0`
			`do j = 1, n_selected`
			`do i = 1, n_selected`
			`E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j)`
			`enddo`
			`enddo`
			`double precision :: E0_av, E0_ap, E0pt2`
			`E0_av = E0 + nuclear_repulsion`
			`print *,' E0 (avant SVD) =', E0_av`
			`print *, ''`

			`double precision, allocatable :: eigval0(:)`
			`double precision, allocatable :: eigvec0(:,:,:)`
			`double precision, allocatable :: H_tmp(:,:,:,:)`

			`allocate( H_tmp(n_selected,n_selected,n_selected,n_selected) )`
			`do l=1,n_selected`
			`do k=1,n_selected`
			`do j=1,n_selected`
			`do i=1,n_selected`
			`H_tmp(i,j,k,l) = H(i,j,k,l)`
			`enddo`
			`enddo`
			`enddo`
			`enddo`
			`allocate( eigval0(n_selected2),eigvec0(n_selected,n_selected,n_selected2))`
			`eigvec0 = 0.d0`

			`call lapack_diag(eigval0, eigvec0, H_tmp, n_selected2, n_selected2)`
			`E0_postsvd = eigval0(1) + nuclear_repulsion`
			`print*, ' postsvd energy = ', E0_postsvd`
			`deallocate(H_tmp, eigval0)`

			`Dref = 0.d0`
			`call perform_newpostSVD(n_selected, eigvec0(1,1,1), Uref, Vref, Dref)`
			`deallocate(eigvec0)`

			`print *, ' --- Compute H --- '`
			`call const_H_uv(Uref, Vref, H)`

			`E0 = 0.d0`
			`norm = 0.d0`
			`do j = 1, n_det_beta_unique`
			`do i = 1, n_det_beta_unique`
			`E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j)`
			`enddo`
			`norm = norm + Dref(j)*Dref(j)`
			`enddo`
			`E0_ap = E0 + nuclear_repulsion`
			`print *,' E0 (apres SVD) =', E0_ap`

			`psi_postsvd = 0.d0`
			`do i=1,n_selected`
			`psi_postsvd(i,i) = Dref(i)`
			`enddo`

			`E0 = E0_ap`
			`Ept2 = 0.d0`
			`do j=1,n_selected`
			`do i=n_selected+1,n_det_alpha_unique`
			`ctmp = 0.d0`
			`do l=1,n_selected`
			`do k=1,n_selected`
			`ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l)`
			`enddo`
			`enddo`
			`psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )`
			`Ept2 += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )`
			`enddo`
			`enddo`
			`do j=n_selected+1,n_det_beta_unique`
			`do i=1,n_selected`
			`ctmp = 0.d0`
			`do l=1,n_selected`
			`do k=1,n_selected`
			`ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l)`
			`enddo`
			`enddo`
			`psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )`
			`Ept2 += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )`
			`enddo`
			`enddo`
			`do j=n_selected+1,n_det_beta_unique`
			`do i=n_selected+1,n_det_alpha_unique`
			`ctmp = 0.d0`
			`do l=1,n_selected`
			`do k=1,n_selected`
			`ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l)`
			`enddo`
			`enddo`
			`psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )`
			`Ept2 += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )`
			`enddo`
			`enddo`
			`E0pt2 = E0_ap + ept2`
			`print *, ' perturb energy = ', E0pt2, ept2`

			`tol_energy = dabs(E_prev - E0_ap)`
			`E_prev = E0_ap`

			`call perform_newpostSVD(n_det_beta_unique, psi_postsvd, Uref, Vref, Dref)`

			`write(44,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0_ap+Ept2`


			`call CPU_TIME(CPU_tend_it)`
			`call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir)`
			`CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it`
			`W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8)`
			`speedup_it = CPU_tot_time_it / W_tot_time_it`
			`print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, &`
			`& 3X, "CPU time = ", 1PE10.3, " min.", /, &`
			`& 3X, "speed up = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it`


			`end do`


			`deallocate( Uref, Vref, Dref )`


			`call CPU_TIME(CPU_tend)`
			`call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)`
			`CPU_tot_time = CPU_tend - CPU_tbeg`
			`W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)`
			`speedup = CPU_tot_time / W_tot_time`
			`print *,' ___________________________________________________________________'`
			`print '(//,3X,"Execution avec ",i2," threads")',nb_taches`
			`print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, &`
			`& 3X, "CPU time = ", 1PE10.3, " min.", /, &`
			`& 3X, "speed up = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup`
			`print *,' ___________________________________________________________________'`


			`end`



			`subroutine perform_newpostSVD(n_selected, psi_postsvd, Uref, Vref, Dref)`

			`USE OMP_LIB`

			`integer, intent(in) :: n_selected`
			`double precision, intent(in) :: psi_postsvd(n_selected,n_selected)`
			`double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)`
			`double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)`
			`double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique))`

			`integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb`
			`double precision :: err0, err_norm, err_tmp, norm_tmp`
			`double precision :: overlapU_mat, overlapV_mat, overlapU, overlapV`
			`double precision, allocatable :: S_mat(:,:), SxVt(:,:)`
			`double precision, allocatable :: U_svd(:,:), V_svd(:,:)`
			`double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)`

			`mm = n_det_alpha_unique`
			`nn = n_det_beta_unique`

			`allocate( U_svd(mm,n_selected) , V_svd(nn,n_selected) , S_mat(n_selected,n_selected) )`

			`U_svd(1:mm,1:n_selected) = Uref(1:mm,1:n_selected)`
			`V_svd(1:nn,1:n_selected) = Vref(1:nn,1:n_selected)`
			`S_mat(1:n_selected,1:n_selected) = psi_postsvd(1:n_selected,1:n_selected)`

			`! first compute S_mat x transpose(V_svd)`
			`allocate( SxVt(n_selected,nn) )`
			`call dgemm( 'N', 'T', n_selected, nn, n_selected, 1.d0 &`
			`, S_mat , size(S_mat,1) &`
			`, V_svd , size(V_svd,1) &`
			`, 0.d0, SxVt, size(SxVt ,1) )`
			`deallocate(S_mat)`

			`! then compute U_svd x SxVt`
			`allocate( A_newsvd(mm,nn) )`
			`call dgemm( 'N', 'N', mm, nn, n_selected, 1.d0 &`
			`, U_svd , size(U_svd ,1) &`
			`, SxVt , size(SxVt ,1) &`
			`, 0.d0, A_newsvd, size(A_newsvd,1) )`
			`deallocate( SxVt )`

			`! perform new SVD`
			`allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) )`
			`call svd_s( A_newsvd, size(A_newsvd,1), &`
			`U_newsvd, size(U_newsvd,1), &`
			`D_newsvd, &`
			`Vt_newsvd, size(Vt_newsvd,1), &`
			`mm, nn)`
			`deallocate(A_newsvd)`

			`allocate( V_newsvd(nn,nn) )`
			`do l = 1, nn`
			`do j = 1, nn`
			`V_newsvd(j,l) = Vt_newsvd(l,j)`
			`enddo`
			`enddo`
			`deallocate(Vt_newsvd)`


			`!do l = 1, n_selected`
			`! Dref(l) = D_newsvd(l)`
			`! Uref(1:mm,l) = U_newsvd(1:mm,l)`
			`! Vref(1:nn,l) = V_newsvd(1:nn,l)`
			`!enddo`
			`Dref(1:n_selected) = D_newsvd(1:n_selected)`
			`Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm)`
			`Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn)`


			`deallocate(U_newsvd)`
			`deallocate(V_newsvd)`
			`deallocate(D_newsvd)`

			`end subroutine perform_newpostSVD`



			`subroutine const_H_uv(Uref, Vref, H)`

			`USE OMP_LIB`

			`implicit none`

			`double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique)`
			`double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique)`
			`double precision, intent(out) :: H(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique,n_det_beta_unique)`

			`integer(bit_kind) :: det1(N_int,2), det2(N_int,2)`
			`integer :: i, j, k, l, degree`
			`integer :: ii0, jj0, ii, jj, n, m, np, mp`
			`integer :: nn0, mm0, na, nb, mm, ind_gs`
			`integer :: p,q,r,s`
			`double precision :: h12, x`

			`double precision, allocatable :: H0(:,:,:,:)`
			`double precision, allocatable :: H1(:,:,:,:)`

			`na = n_det_alpha_unique`
			`nb = n_det_beta_unique`

			`allocate( H0(na,nb,na,nb) )`
			`allocate( H1(nb,na,nb,na) )`

			`H0 = 0.d0`
			`call wall_time(t0)`
			`!$OMP PARALLEL DEFAULT(NONE) &`
			`!$OMP PRIVATE(p,q,r,s,i,j,k,l,det1,det2,degree,h12) &`
			`!$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, &`
			`!$OMP N_int,Uref,Vref,H0,H1,H)`

			`!$OMP DO`
			`do l = 1, nb`
			`det2(:,2) = psi_det_beta_unique(:,l)`
			`do j = 1, nb`
			`det1(:,2) = psi_det_beta_unique(:,j)`
			`call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)`
			`if (degree > 2) cycle`
			`do k = 1, na`
			`det2(:,1) = psi_det_alpha_unique(:,k)`
			`do i = 1, na`
			`det1(:,1) = psi_det_alpha_unique(:,i)`
			`call get_excitation_degree(det1,det2,degree,N_int)`
			`if ( degree > 2) cycle`
			`call i_H_j(det1, det2, N_int, h12)`
			`H0(i,j,k,l) = h12`
			`enddo`
			`enddo`
			`enddo`
			`enddo`
			`!$OMP END DO`

			`!$OMP END PARALLEL`

			`call wall_time(t1)`
			`! (i,j,k,l) -> (j,k,l,p)`
			`call DGEMM('T','N', nb * na * nb, na, na, &`
			`1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)size(H1,2)size(H1,3))`

			`! (j,k,l,p) -> (k,l,p,q)`
			`call DGEMM('T','N', na * nb * na, nb, nb, &`
			`1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H0, size(H0,1)size(H0,2)size(H0,3))`

			`! (k,l,p,q) -> (l,p,q,r)`
			`call DGEMM('T','N', nb * na * nb, na, na, &`
			`1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)size(H1,2)size(H1,3))`

			`! (l,p,q,r) -> (p,q,r,s)`
			`call DGEMM('T','N', na * nb * na, nb, nb, &`
			`1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)size(H,2)size(H,3))`
			`call wall_time(t2)`
			`print *, t1-t0, t2-t1`
			`double precision :: t0, t1, t2`

			`deallocate(H1,H0)`

			`end`