qp_plugins_scemama/devel/svdwf/buildpsi_diagSVDit_Anthony_v4.irp.f

program buildpsi_diagSVDit_Anthony_v4

  implicit none

  BEGIN_DOC
  ! perturbative approach to build psi_postsvd
  END_DOC

  read_wf = .True.
  TOUCH read_wf

  PROVIDE N_int

  call run()
end


subroutine run

  USE OMP_LIB

  implicit none

  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
  integer                       :: degree, i_state
  double precision              :: h12

  integer                       :: i, j, k, l

  double precision              :: norm_psi
  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)

  double precision              :: E0, tol_energy, ctmp, Ept2, E_prev

  double precision, allocatable :: H_diag(:,:), Hkl(:,:), H0(:,:), H(:,:,:,:)
  double precision, allocatable :: psi_postsvd(:,:)

  integer                       :: n_TSVD, it_svd, it_svd_max

  integer                       :: ii, jj, n_perturb
  double precision              :: norm, lambda, E_perturb
  double precision, allocatable :: eigval0(:), eigvec0(:,:,:), H_tmp(:,:,:,:)
  double precision, allocatable :: norm_row(:), norm_col(:), Utmp(:,:), Vtmp(:,:)

  det1(:,1) = psi_det_alpha_unique(:,1)
  det2(:,1) = psi_det_alpha_unique(:,1)
  det1(:,2) = psi_det_beta_unique(:,1)
  det2(:,2) = psi_det_beta_unique(:,1)
  call i_H_j(det1, det2, N_int, h12)


  i_state = 1

  ! ---------------------------------------------------------------------------------------
  !                      construct the initial CISD matrix

  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
  print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique
  print *, ' N det    :', N_det
  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'

  norm_psi = 0.d0
  do k = 1, N_det
    norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) &
                        * psi_bilinear_matrix_values(k,i_state)
  enddo
  print *, ' initial norm = ', norm_psi

  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
  Aref(:,:) = 0.d0
  do k = 1, N_det
    i         = psi_bilinear_matrix_rows(k)
    j         = psi_bilinear_matrix_columns(k)
    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
  enddo

  ! ---------------------------------------------------------------------------------------


  ! ---------------------------------------------------------------------------------------
  !                                  perform a Full SVD

  allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) )
  !allocate( Dref(max(n_det_beta_unique,n_det_alpha_unique)) )
  allocate( Dref(min(n_det_beta_unique,n_det_alpha_unique)) )
  allocate( Vref(n_det_beta_unique,n_det_beta_unique)  )
  allocate( Vtref(n_det_beta_unique,n_det_beta_unique) )
  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1)          &
       , n_det_alpha_unique, n_det_beta_unique)

  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '

  do l = 1, n_det_beta_unique
    do i = 1, n_det_beta_unique
      Vref(i,l) = Vtref(l,i)
    enddo
  enddo
  deallocate( Vtref )

  ! Truncated rank
  !n_TSVD = 100
  !call write_int(6,n_TSVD, 'Rank of psi')


  !________________________________________________________________________________________________________
  !
  !                                   loop over SVD iterations
  !________________________________________________________________________________________________________

  tol_energy = 1.d0
  it_svd     = 0
  it_svd_max = 100
  E_prev = 0.d0

  !print *, ci_energy(1)

  allocate(H_diag(n_det_alpha_unique,n_det_beta_unique))
  allocate(psi_postsvd(n_det_alpha_unique,n_det_beta_unique))

  do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-6 ) )

    it_svd = it_svd + 1

    ! Truncated rank
    !n_TSVD = min(n_det_alpha_unique,n_det_beta_unique)
    !do i = min(n_det_alpha_unique,n_det_beta_unique), 10, -1
    !  if( dabs(Dref(i)) .lt. 1d-2 ) then
    !    n_TSVD = n_TSVD - 1
    !  else
    !    exit
    !  endif
    !enddo
    !do i = 1, min(n_det_alpha_unique,n_det_beta_unique)
    !  print *, i, Dref(i)
    !enddo
    !call write_int(6,n_TSVD, 'Rank of psi')
    n_TSVD = 30 !min(n_TSVD,100)
    call write_int(6,n_TSVD, 'Rank of psi')

    print *,  '-- Compute H --'
    allocate(H(n_TSVD,n_TSVD,n_det_alpha_unique,n_det_beta_unique))
    call const_H_uv_modif(Uref, Vref, H, H_diag, n_TSVD)
    !call const_H_uv(Uref, Vref, H, H_diag, n_TSVD)

    ! E0 = < psi_0 | H | psi_0 >
    !norm = 0.d0
    !do j = 1, n_TSVD
    !  norm = norm + Dref(j)*Dref(j)
    !enddo
    !Dref = Dref / dsqrt(norm)
    !E0   = 0.d0
    !do j = 1, n_TSVD
    !  do i = 1, n_TSVD
    !     E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j)
    !  enddo
    !enddo
    !E0 = E0 + nuclear_repulsion
    !print *,' E0 bef diag =', E0

    allocate( H_tmp(n_TSVD,n_TSVD,n_TSVD,n_TSVD) )
    do l = 1, n_TSVD
      do k = 1, n_TSVD
        do j = 1, n_TSVD
          do i = 1, n_TSVD
            H_tmp(i,j,k,l) = H(i,j,k,l)
          enddo
        enddo
      enddo
    enddo
    allocate( eigval0(n_TSVD**2) , eigvec0(n_TSVD,n_TSVD,n_TSVD**2) )
    eigvec0 = 0.d0
    !print *,  ' --- Diag post-SVD --- '
    call lapack_diag(eigval0, eigvec0, H_tmp, n_TSVD**2, n_TSVD**2)
    !print *, 'eig =', eigval0(1) + nuclear_repulsion
    deallocate(H_tmp, eigval0)

    print *,  ' --- first SVD --- '
    Dref = 0.d0
    call perform_newpostSVD(n_TSVD, eigvec0(1,1,1), size(eigvec0,1), Uref, Vref, Dref)
    deallocate(eigvec0)

    print *,  ' --- Compute H --- '
    call const_H_uv_modif(Uref, Vref, H, H_diag, n_TSVD)
    !call const_H_uv(Uref, Vref, H, H_diag, n_TSVD)

    ! E0 = < psi_0 | H | psi_0 >
    E0 = 0.d0
    norm = 0.d0
    do j = 1, n_det_beta_unique
      do i = 1, n_TSVD
         E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j)
      enddo
      norm = norm + Dref(j)*Dref(j)
    enddo
    E0 = E0 + nuclear_repulsion
    print *,' E0 aft diag =', E0

    ! -----------------------------------------------------------------

    !print *,  ' --- Perturbation --- '
    psi_postsvd = 0.d0
    do i = 1, n_TSVD
      psi_postsvd(i,i) = Dref(i)
    enddo

    lambda = 1.d0
    Ept2   = 0.d0
    do j = 1, n_TSVD
      do i = n_TSVD+1, n_det_alpha_unique
        ctmp = 0.d0
        do k = 1, n_TSVD
          ctmp = ctmp + H(k,k,i,j) * Dref(k)
        enddo
        psi_postsvd(i,j) = lambda * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) )
        Ept2            +=   ctmp * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) )
      enddo
    enddo

    do j = n_TSVD+1, n_det_beta_unique
      do i = 1, n_TSVD
        ctmp = 0.d0
        do k = 1, n_TSVD
          ctmp = ctmp + H(k,k,i,j) * Dref(k)
        enddo
        psi_postsvd(i,j) = lambda * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) )
        Ept2            +=   ctmp * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) )
      enddo
    enddo

    deallocate( H )
    ! -----------------------------------------------------------------

    n_perturb = n_TSVD + n_TSVD

    allocate( norm_row(n_det_alpha_unique) , norm_col(n_det_beta_unique) )
    do i = 1, n_det_alpha_unique
      norm_row(i) = 0.d0
      do j = 1, n_det_beta_unique
        norm_row(i) += psi_postsvd(i,j) * psi_postsvd(i,j)
      enddo
    enddo
    do j = 1, n_det_beta_unique
      norm_col(j) = 0.d0
      do i = 1, n_det_alpha_unique
        norm_col(j) += psi_postsvd(i,j) * psi_postsvd(i,j)
      enddo
    enddo

    allocate( Utmp(n_det_alpha_unique,n_perturb) , Vtmp(n_det_beta_unique,n_perturb) )
    do i = 1, n_perturb
      ii           = MAXLOC( norm_row , DIM=1 )
      jj           = MAXLOC( norm_col , DIM=1 )
      if( (norm_row(ii).lt.1.d-12) .or. (norm_col(jj).lt.1.d-12) ) then
        print *, '  !!!!!!!    '
        print *, ii, norm_row(ii)
        print *, jj, norm_col(jj)
        stop
      endif
      Utmp(:,i)    = Uref(:,ii)
      Vtmp(:,i)    = Vref(:,jj)
      norm_row(ii) = 0.d0
      norm_col(jj) = 0.d0
    enddo
    deallocate( norm_row , norm_col )

    print *,  ' --- Compute H in n_perturb space --- '
    allocate(H(n_perturb,n_perturb,n_det_alpha_unique,n_det_beta_unique))
    call const_H_TSVD(Uref, Vref, H, n_perturb)
    allocate( H_tmp(n_perturb,n_perturb,n_perturb,n_perturb) )
    do l = 1, n_perturb
      do k = 1, n_perturb
        do j = 1, n_perturb
          do i = 1, n_perturb
            H_tmp(i,j,k,l) = H(i,j,k,l)
          enddo
        enddo
      enddo
    enddo
    deallocate( H )
    allocate( eigval0(n_perturb**2) , eigvec0(n_perturb,n_perturb,n_perturb**2) )
    eigvec0 = 0.d0
    call lapack_diag(eigval0, eigvec0, H_tmp, n_perturb**2, n_perturb**2)
    E_perturb = eigval0(1) + nuclear_repulsion
    print *, ' diag in n_perturb space: ', E_perturb
    deallocate(H_tmp, eigval0)

    print *,  ' --- second SVD --- '
    Uref = 0.d0
    Vref = 0.d0
    Dref = 0.d0
    do l = 1, n_perturb
      Uref(1:n_det_alpha_unique,l) = Utmp(1:n_det_alpha_unique,l)
      Vref(1:n_det_beta_unique ,l) = Vtmp(1:n_det_beta_unique ,l)
    enddo
    deallocate( Utmp , Vtmp )

    call perform_perturbSVD(n_TSVD, n_perturb, eigvec0(1,1,1), size(eigvec0,1), Uref, Vref, Dref)
    deallocate(eigvec0)


    tol_energy = dabs(E_prev - E0)
    print     '(I5, 2(2X,I5), 3(3X, F20.10))',  it_svd, n_TSVD, n_perturb, E0, E_perturb, tol_energy
    write(222,'(I5, 2(2X,I5), 3(3X, F20.10))')  it_svd, n_TSVD, n_perturb, E0, E_perturb, tol_energy
    E_prev = E0

    !print *,  ' --- SVD --- '
    !call perform_newpostSVD(n_TSVD, psi_postsvd, size(psi_postsvd,1), Uref, Vref, Dref)
    !call perform_newSVD(n_TSVD, psi_postsvd, size(psi_postsvd,1), Uref, Vref, Dref)


  end do


end


subroutine perform_newpostSVD(n_TSVD, psi_postsvd, LDP, Uref, Vref, Dref)

  integer, intent(in)             :: n_TSVD, LDP
  double precision, intent(in)    :: psi_postsvd(LDP,n_TSVD)
  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
  !double precision, intent(inout) :: Dref(max(n_det_beta_unique,n_det_alpha_unique))
  double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique))

  integer                         :: mm, nn, i, j, l, na, nb
  double precision                :: err0, err_norm, err_tmp, norm_tmp
  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)

  mm = n_det_alpha_unique
  nn = n_det_beta_unique

  allocate( U_svd(mm,n_TSVD) , V_svd(nn,n_TSVD) , S_mat(n_TSVD,n_TSVD) )

  U_svd(1:mm,1:n_TSVD) = Uref(1:mm,1:n_TSVD)
  V_svd(1:nn,1:n_TSVD) = Vref(1:nn,1:n_TSVD)

  S_mat(1:n_TSVD,1:n_TSVD) = psi_postsvd(1:n_TSVD,1:n_TSVD)

  ! first compute S_mat x transpose(V_svd)
  allocate( SxVt(n_TSVD,nn) )
  call dgemm( 'N', 'T', n_TSVD, nn, n_TSVD, 1.d0    &
             , S_mat     , size(S_mat,1)            &
             , V_svd     , size(V_svd,1)            &
             , 0.d0, SxVt, size(SxVt ,1)            )
  deallocate(S_mat)

  ! then compute U_svd x SxVt
  allocate( A_newsvd(mm,nn) )
  call dgemm( 'N', 'N', mm, nn, n_TSVD, 1.d0        &
             , U_svd         , size(U_svd   ,1)     &
             , SxVt          , size(SxVt    ,1)     &
             , 0.d0, A_newsvd, size(A_newsvd,1)     )
  deallocate( SxVt )

  ! perform new SVD
  !allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(max(mm,nn)) )
  allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) )
  call svd_s( A_newsvd, size(A_newsvd,1), &
              U_newsvd, size(U_newsvd,1), &
              D_newsvd, &
              Vt_newsvd, size(Vt_newsvd,1), &
              mm, nn)
  deallocate(A_newsvd)

  allocate( V_newsvd(nn,nn) )
  do l = 1, nn
    do j = 1, nn
      V_newsvd(j,l) = Vt_newsvd(l,j)
    enddo
  enddo
  deallocate(Vt_newsvd)

  !do l = 1, n_TSVD
  !  Dref(l)      = D_newsvd(l)
  !  Uref(1:mm,l) = U_newsvd(1:mm,l)
  !  Vref(1:nn,l) = V_newsvd(1:nn,l)
  !enddo
  Dref(1:n_TSVD)  = D_newsvd(1:n_TSVD)
  Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm)
  Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn)


  deallocate(U_newsvd)
  deallocate(V_newsvd)
  deallocate(D_newsvd)

end subroutine perform_newpostSVD


subroutine perform_perturbSVD(n_TSVD, n_perturb, psi_postsvd, LDP, Uref, Vref, Dref)

  integer, intent(in)             :: n_TSVD, LDP, n_perturb
  double precision, intent(in)    :: psi_postsvd(LDP,n_TSVD)
  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
  double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique))

  integer                         :: mm, nn, i, j, l, na, nb
  double precision                :: err0, err_norm, err_tmp, norm_tmp
  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)

  mm = n_det_alpha_unique
  nn = n_det_beta_unique

  allocate( U_svd(mm,n_perturb) , V_svd(nn,n_perturb) , S_mat(n_perturb,n_perturb) )
  U_svd(1:mm,1:n_perturb)        = Uref(1:mm,1:n_perturb)
  V_svd(1:nn,1:n_perturb)        = Vref(1:nn,1:n_perturb)
  S_mat(1:n_perturb,1:n_perturb) = psi_postsvd(1:n_perturb,1:n_perturb)

  ! first compute S_mat x transpose(V_svd)
  allocate( SxVt(n_perturb,nn) )
  call dgemm( 'N', 'T', n_perturb, nn, n_perturb, 1.d0    &
             , S_mat     , size(S_mat,1)                  &
             , V_svd     , size(V_svd,1)                  &
             , 0.d0, SxVt, size(SxVt ,1)                  )
  deallocate(S_mat)

  ! then compute U_svd x SxVt
  allocate( A_newsvd(mm,nn) )
  call dgemm( 'N', 'N', mm, nn, n_perturb, 1.d0           &
             , U_svd         , size(U_svd   ,1)           &
             , SxVt          , size(SxVt    ,1)           &
             , 0.d0, A_newsvd, size(A_newsvd,1)           )
  deallocate( SxVt )

  ! perform new SVD
  allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) )
  call svd_s( A_newsvd, size(A_newsvd,1), &
              U_newsvd, size(U_newsvd,1), &
              D_newsvd, &
              Vt_newsvd, size(Vt_newsvd,1), &
              mm, nn)
  deallocate(A_newsvd)

  allocate( V_newsvd(nn,nn) )
  do l = 1, nn
    do j = 1, nn
      V_newsvd(j,l) = Vt_newsvd(l,j)
    enddo
  enddo
  deallocate(Vt_newsvd)

  Dref(1:n_TSVD)  = D_newsvd(1:n_TSVD)
  Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm)
  Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn)

  deallocate(U_newsvd)
  deallocate(V_newsvd)
  deallocate(D_newsvd)

end subroutine perform_perturbSVD


subroutine perform_newSVD(n_TSVD, psi_postsvd, LDP, Uref, Vref, Dref)

  integer, intent(in)             :: n_TSVD, LDP
  double precision, intent(in)    :: psi_postsvd(LDP,n_TSVD)
  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
  double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique))

  integer                         :: mm, nn, i, j, l, na, nb
  double precision                :: err0, err_norm, err_tmp, norm_tmp
  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)

  mm = n_det_alpha_unique
  nn = n_det_beta_unique

  allocate( U_svd(mm,mm) , V_svd(nn,nn) , S_mat(mm,nn) )

  U_svd(1:mm,1:mm) = Uref(1:mm,1:mm)
  V_svd(1:nn,1:nn) = Vref(1:nn,1:nn)

  norm_tmp = 0.d0
  do i = 1, nn
    do j = 1, mm
      S_mat(j,i) = psi_postsvd(j,i)
      norm_tmp  += psi_postsvd(j,i) * psi_postsvd(j,i)
    enddo
  enddo
  norm_tmp = 1.d0 / dsqrt(norm_tmp)
  do i = 1, nn
    do j = 1, mm
      S_mat(j,i) = S_mat(j,i) * norm_tmp
    enddo
  enddo

  ! first compute S_mat x transpose(V_svd)
  allocate( SxVt(mm,nn) )
  call dgemm( 'N', 'T', mm, nn, nn, 1.d0     &
             , S_mat     , size(S_mat,1)     &
             , V_svd     , size(V_svd,1)     &
             , 0.d0, SxVt, size(SxVt ,1)     )
  deallocate(S_mat)
  ! then compute U_svd x SxVt
  allocate( A_newsvd(mm,nn) )
  call dgemm( 'N', 'N', mm, nn, mm, 1.d0            &
             , U_svd         , size(U_svd   ,1)     &
             , SxVt          , size(SxVt    ,1)     &
             , 0.d0, A_newsvd, size(A_newsvd,1)     )
  deallocate( SxVt )

  ! perform new SVD
  allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) )
  call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn)
  deallocate(A_newsvd)
  allocate( V_newsvd(nn,nn) )
  do l = 1, nn
    do j = 1, nn
      V_newsvd(j,l) = Vt_newsvd(l,j)
    enddo
  enddo
  deallocate(Vt_newsvd)

  !Dref(1:n_TSVD)  = D_newsvd(1:n_TSVD)
  Dref            = D_newsvd
  Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm)
  Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn)

  deallocate(U_newsvd)
  deallocate(V_newsvd)
  deallocate(D_newsvd)

  return

end subroutine perform_newSVD


subroutine const_H_uv_modif(Uref, Vref, H, H_diag, n_TSVD)

  USE OMP_LIB

  implicit none

  integer, intent(in)            :: n_TSVD
  double precision, intent(in)   :: Uref(n_det_alpha_unique,n_det_alpha_unique)
  double precision, intent(in)   :: Vref(n_det_beta_unique ,n_det_beta_unique)
  double precision, intent(out)  :: H(n_TSVD,n_TSVD, n_det_alpha_unique, n_det_beta_unique)
  double precision, intent(out)  :: H_diag(n_det_alpha_unique,n_det_beta_unique)

  integer(bit_kind)              :: det1(N_int,2), det2(N_int,2)
  integer                        :: i, j, k, l, degree, n, m, na, nb
  double precision               :: h12

  double precision, allocatable  :: H0(:,:,:,:)
  double precision, allocatable  :: H1(:,:,:,:)
  double precision, allocatable  :: tmp3(:,:,:)
  double precision, allocatable  :: tmp1(:,:), tmp0(:,:), tmp4(:,:)
  double precision :: c_tmp


  na = n_det_alpha_unique
  nb = n_det_beta_unique

  call wall_time(t0)

  allocate( H0(na,nb,n_TSVD,n_TSVD) )
  allocate( tmp3(na,nb,nb) )
  H0   = 0.d0
  tmp3 = 0.d0

 !$OMP PARALLEL DEFAULT(NONE)                                                  &
 !$OMP          PRIVATE(i,j,k,l,m,n,det1,det2,degree,h12,c_tmp,tmp1,tmp0,tmp4) &
 !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique,         &
 !$OMP                 N_int,tmp3,Uref,Vref,H_diag,H0,n_TSVD)

  allocate(tmp1(na,na), tmp0(nb,nb), tmp4(nb,nb))

  do i = 1, na
    do m = 1, na
      tmp1(m,i) = Uref(i,m)
    enddo
  enddo

  !$OMP DO
  do l = 1, nb
    det2(:,2) = psi_det_beta_unique(:,l)
    do j = 1, nb
      det1(:,2) = psi_det_beta_unique(:,j)

      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
      if (degree > 2) cycle

      do k = 1, na
        det2(:,1) = psi_det_alpha_unique(:,k)
        do i = 1, na
          det1(:,1) = psi_det_alpha_unique(:,i)

          call get_excitation_degree(det1,det2,degree,N_int)
          if ( degree > 2) cycle

          call i_H_j(det1, det2, N_int, h12)
          if (h12 == 0.d0) cycle

          do m = 1, na
            tmp3(m,j,l) = tmp3(m,j,l) + h12 * tmp1(m,i) * tmp1(m,k)
          enddo

          do n = 1, n_TSVD
            c_tmp = h12 * Vref(j,n)
            if (c_tmp == 0.d0) cycle
            do m = 1, n_TSVD
              H0(k,l,m,n) = H0(k,l,m,n) + c_tmp * tmp1(m,i)
            enddo
          enddo

        enddo
      enddo
    enddo
  enddo
  !$OMP END DO

  !$OMP DO
  do m = 1, na
    do l = 1, nb
      do j = 1, nb
        tmp4(j,l) = tmp3(m,j,l)
      enddo
    enddo
    call DGEMM('N','N',nb,nb,nb,1.d0,                                &
        tmp4, size(tmp4,1),                                          &
        Vref, size(Vref,1),                                          &
        0.d0, tmp0, size(tmp0,1))
    do n = 1, nb
      H_diag(m,n) = 0.d0
      do j = 1, nb
        H_diag(m,n) = H_diag(m,n) + tmp0(j,n) * Vref(j,n)
      enddo
    enddo
  enddo
  !$OMP END DO
  deallocate(tmp1, tmp0)
  deallocate(tmp4)
  !$OMP END PARALLEL

  deallocate(tmp3)

  call wall_time(t1)

  ! H0(na,nb,n_TSVD,n_TSVD)
  allocate( H1(nb,n_TSVD,n_TSVD,na) )
  call DGEMM('T', 'N', nb * n_TSVD * n_TSVD, na, na, 1.d0, &
              H0  , size(H0,1)  ,                          &
              Uref, size(Uref,1), 0.d0,                    &
              H1  , size(H1,1) * size(H1,2) * size(H1,3)   )
  deallocate( H0 )

  ! (l,p,q,r) -> (p,q,r,s)
  call DGEMM('T','N',  n_TSVD * n_TSVD * na, nb, nb,         &
      1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3))

!  do j=1,n_TSVD
!    do i=1,n_TSVD
!      print *, H_diag(i,j), H(i,j,i,j)
!    enddo
!  enddo
  deallocate(H1)

  call wall_time(t2)
  print *, 't=', t1-t0, t2-t1
  double precision               :: t0, t1, t2
!  stop
end subroutine const_H_uv_modif


subroutine const_H_TSVD(Uref, Vref, H, n_TSVD)

  USE OMP_LIB

  implicit none

  integer, intent(in)            :: n_TSVD
  double precision, intent(in)   :: Uref(n_det_alpha_unique,n_TSVD)
  double precision, intent(in)   :: Vref(n_det_beta_unique ,n_TSVD)
  double precision, intent(out)  :: H(n_TSVD,n_TSVD, n_det_alpha_unique, n_det_beta_unique)

  integer(bit_kind)              :: det1(N_int,2), det2(N_int,2)
  integer                        :: i, j, k, l, degree, n, m, na, nb
  double precision               :: h12

  double precision, allocatable  :: H0(:,:,:,:)
  double precision, allocatable  :: H1(:,:,:,:)
  double precision, allocatable  :: tmp1(:,:)
  double precision               :: c_tmp
  double precision               :: t0, t1, t2


  na = n_det_alpha_unique
  nb = n_det_beta_unique

  call wall_time(t0)

  allocate( H0(na,nb,n_TSVD,n_TSVD) )
  H0 = 0.d0

  !$OMP PARALLEL DEFAULT(NONE)                                             &
      !$OMP          PRIVATE(i,j,k,l,m,n,det1,det2,degree,h12,c_tmp,tmp1)  &
      !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique &
      !$OMP                 ,N_int,Uref,Vref,H0,n_TSVD)

  allocate(tmp1(na,na))
  do i = 1, na
    do m = 1, na
      tmp1(m,i) = Uref(i,m)
    enddo
  enddo

  !$OMP DO
  do l = 1, nb
    det2(:,2) = psi_det_beta_unique(:,l)
    do j = 1, nb
      det1(:,2) = psi_det_beta_unique(:,j)

      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
      if (degree > 2) cycle

      do k = 1, na
        det2(:,1) = psi_det_alpha_unique(:,k)
        do i = 1, na
          det1(:,1) = psi_det_alpha_unique(:,i)

          call get_excitation_degree(det1,det2,degree,N_int)
          if ( degree > 2) cycle

          call i_H_j(det1, det2, N_int, h12)
          if (h12 == 0.d0) cycle

          do n = 1, n_TSVD
            c_tmp = h12 * Vref(j,n)
            if (c_tmp == 0.d0) cycle
            do m = 1, n_TSVD
              H0(k,l,m,n) = H0(k,l,m,n) + c_tmp * tmp1(m,i)
            enddo
          enddo

        enddo
      enddo
    enddo
  enddo
  !$OMP END DO
  deallocate(tmp1)
  !$OMP END PARALLEL

  call wall_time(t1)
  allocate( H1(nb,n_TSVD,n_TSVD,na) )
  call DGEMM('T','N', nb * n_TSVD * n_TSVD, na, na,          &
      1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))
  deallocate( H0 )
  ! (l,p,q,r) -> (p,q,r,s)
  call DGEMM('T','N',  n_TSVD * n_TSVD * na, nb, nb,         &
      1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3))
  deallocate(H1)
  call wall_time(t2)

  print *, 't=', t1-t0, t2-t1

end subroutine const_H_TSVD