Merge branch 'master' into 'master'

# Conflicts: # devel/svdwf/Evar_TruncSVD.irp.f
2025-04-18 06:20:11 +02:00 · 2021-07-31 09:07:58 +00:00 · 2021-07-31 09:07:58 +00:00 · 46aadac71c
commit 46aadac71c
parent e153c88f62 2fd2fcac5d
30 changed files with 13094 additions and 123 deletions
--- a/devel/svdwf/Evar_TruncSVD.irp.f
+++ b/devel/svdwf/Evar_TruncSVD.irp.f
@ -9,158 +9,443 @@ program Evar_TruncSVD
  call run()
  ! !!!
 end
-
-
-
-
+! !!!
 subroutine run
+  ! !!!
  implicit none
  include 'constants.include.F'
-  double precision, allocatable :: A(:,:), U(:,:), V(:,:), D(:)
-  integer :: r, i, j, k, l, m, n, iter, iter_max
-  double precision, allocatable :: Z(:,:), P(:,:), Yt(:,:), UYt(:,:), r1(:,:)
  ! !!!
-  m = n_det_alpha_unique
-  n = n_det_beta_unique
-  r = n
+  integer                       :: m, n, i_state
+  double precision              :: error_thr, error_RRRSVD, norm_psi, norm_SVD, err_verif, err_tmp
+  integer                       :: i, j, k, l, It, PowerIt
+  integer                       :: It_max, PowerIt_max, nb_oversamp
+  integer                       :: r_init, delta_r, low_rank
+  double precision, allocatable :: B_old(:,:), Q_old(:,:)
+  double precision, allocatable :: UB(:,:), D(:), Vt(:,:), U(:,:)
+  double precision, allocatable :: P(:,:), r1(:,:)
+  double precision, allocatable :: Q_new(:,:), Q_tmp(:,:), Q_mult(:,:)
+  double precision, allocatable :: B_new(:,:), B_tmp(:,:)
+  double precision, allocatable :: URSVD(:,:), DRSVD(:), VtRSVD(:,:)
+  double precision, allocatable :: Uverif(:,:), Dverif(:), Vtverif(:,:), Averif(:,:)
+  double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:)
+  double precision :: tmp
+  ! !!!
+  i_state = 1
+  m       = n_det_alpha_unique
+  n       = n_det_beta_unique
+  ! !!!
+  !open(111, file = 'data_to_python.txt', action = 'WRITE' )  
+  !  do k = 1, N_det
+  !    write(111, '(I8, 5X, I8, 5X, E15.7)' ) psi_bilinear_matrix_rows(k), psi_bilinear_matrix_columns(k), psi_bilinear_matrix_values(k,i_state)
+  !  end do  
+  !close(111)
+  ! !!!
+  ! !!!
  print *, 'matrix:', m,'x',n
  print *, 'N det:', N_det
-  print *, 'rank = ', r
-  iter_max = 20
  ! !!!
-  allocate( Z(m,r) , P(n,r) ) ! Z(m,r) = A(m,n) @ P(n,r)
-  Z(:,:) = 0.d0
+  r_init  = 78
+  delta_r = 5
  ! !!!
-  ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !
-  ! first we apply a RSVD for a pre-fixed rank (r)
-  ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !
-  !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,r1)
-  allocate(r1(N_det,2))
+  PowerIt_max = 10
+  nb_oversamp = 10
+  ! !!!
+  It_max    = 10
+  error_thr = 1.d-3 !! don't touche it but rather increase r_init
+  ! !!!
+  ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! !
+  ! !!! ~      Rank Revealing Randomized SVD      ~ !!! !
+  ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! !
+  ! !!!
+  norm_psi = 0.d0
+  do k = 1, N_det
+    norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) * psi_bilinear_matrix_values(k,i_state)
+  enddo
+  ! !!!
+  ! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !
+  ! !!!  build initial QB decomposition !!! !
+  ! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !
+  ! !!!
+  allocate( Q_old(m, r_init) )
+  !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,k,l,r1)
+  allocate( r1(N_det,2) )
  !$OMP DO
-  do l=1,r
-   call random_number(r1)
-   r1(:,1) = dsqrt(-2.d0*dlog(r1(:,1)))
-   r1(:,1) = r1(:,1) * dcos(dtwo_pi*r1(:,2))
-   do k=1,N_det
-     i = psi_bilinear_matrix_rows(k)
-     j = psi_bilinear_matrix_columns(k)
-     Z(i,l) = Z(i,l) + psi_bilinear_matrix_values(k,1) * r1(k,1)
-   enddo
+  do l = 1, r_init
+    Q_old(:,l) = 0.d0
+    call random_number(r1)
+    r1(:,1) = dsqrt(-2.d0*dlog(r1(:,1)))
+    r1(:,1) = r1(:,1) * dcos(dtwo_pi*r1(:,2))
+    do k = 1, N_det
+      i          = psi_bilinear_matrix_rows(k)
+      Q_old(i,l) = Q_old(i,l) + psi_bilinear_matrix_values(k,i_state) * r1(k,1)
+    enddo
  enddo
  !$OMP END DO
  deallocate(r1)
  !$OMP END PARALLEL
  ! !!!
-  ! Power iterations
+  ! power scheme
  ! !!!
-  do iter=1,iter_max
-   ! !!!
-   print *, 'Power iteration ', iter, '/', 20
-   ! !!!
-   ! P(n,r) = At(n,m) @ Z(m,r)
-   ! !!!
-   !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l)
-   !$OMP DO
-   do l=1,r
-    P(:,l) = 0.d0
-    do k=1,N_det
-      i = psi_bilinear_matrix_rows(k)
-      j = psi_bilinear_matrix_columns(k)
-      P(j,l) = P(j,l) + psi_bilinear_matrix_values(k,1) * Z(i,l)
+  allocate( P(n, r_init) ) 
+  do PowerIt = 1, PowerIt_max
+    ! !!!
+    call my_ortho_qr(Q_old, size(Q_old,1), m, r_init)
+    ! !!!
+    !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l)
+    !$OMP DO
+    do l = 1, r_init
+      P(:,l) = 0.d0
+      do k = 1, N_det
+        i      = psi_bilinear_matrix_rows(k)
+        j      = psi_bilinear_matrix_columns(k)
+        P(j,l) = P(j,l) + psi_bilinear_matrix_values(k,i_state) * Q_old(i,l)
+      enddo
    enddo
-   enddo
-   !$OMP END DO
-   ! !!!
-   ! Z(m,r) = A(m,n) @ P(n,r)
-   ! !!!
-   !$OMP BARRIER
-   !$OMP DO
-   do l=1,r
-    Z(:,l) = 0.d0
-    do k=1,N_det
-      i = psi_bilinear_matrix_rows(k)
-      j = psi_bilinear_matrix_columns(k)
-      Z(i,l) = Z(i,l) + psi_bilinear_matrix_values(k,1) * P(j,l)
+    !$OMP END DO
+    !$OMP END PARALLEL
+    ! !!!
+    call my_ortho_qr(P, size(P,1), n, r_init)
+    ! !!!
+    !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l)
+    !$OMP DO
+    do l = 1, r_init
+      Q_old(:,l) = 0.d0
+      do k = 1 , N_det
+        i          = psi_bilinear_matrix_rows(k)
+        j          = psi_bilinear_matrix_columns(k)
+        Q_old(i,l) = Q_old(i,l) + psi_bilinear_matrix_values(k,i_state) * P(j,l)
+      enddo
    enddo
-   enddo
-   !$OMP END DO
-   !$OMP END PARALLEL
-   ! !!!
-   ! Compute QR: at return: Q is Z(m,r)
-   ! !!!
-   call ortho_qr(Z,size(Z,1),m,r)
-   ! !!!
+    !$OMP END DO
+    !$OMP END PARALLEL
+    ! !!!
  enddo
+  deallocate( P )
  ! !!!
-  ! Y(r,n)  = Zt(r,m) @ A(m,n) or Yt(n,r) = At(n,m) @ Z(m,r)
+  call my_ortho_qr(Q_old, size(Q_old,1), m, r_init)
  ! !!!
-  allocate(Yt(n,r))
+  allocate( B_old(r_init,n) )
  !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l)
  !$OMP DO
-  do l=1,r
-    do k=1,n
-      Yt(k,l) = 0.d0
+  do l = 1, r_init
+    B_old(l,:) = 0.d0
+    do k = 1, N_det
+      i          = psi_bilinear_matrix_rows(k)
+      j          = psi_bilinear_matrix_columns(k)
+      B_old(l,j) = B_old(l,j) + Q_old(i,l) * psi_bilinear_matrix_values(k,i_state)
    enddo
-    do k=1,N_det
-     i = psi_bilinear_matrix_rows(k)
-     j = psi_bilinear_matrix_columns(k)
-     Yt(j,l) = Yt(j,l) + Z(i,l) * psi_bilinear_matrix_values(k,1)
-   enddo
  enddo
  !$OMP END DO
  !$OMP END PARALLEL
+  norm_SVD = 0.d0
+  do j = 1, n
+    do l = 1, r_init
+      norm_SVD = norm_SVD + B_old(l,j) * B_old(l,j)
+    enddo
+  enddo
+  error_RRRSVD  = dabs( norm_psi - norm_SVD ) / norm_psi
+  It       = 1
+  low_rank = r_init
+  print *, It, low_rank, error_RRRSVD
  ! !!!
-  ! Y  = UY @ D @ Vt or Yt = V @ Dt @ UYt
+  ! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !
+  ! !!! incrementally build up QB decomposition !!! !
+  ! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !
  ! !!!
-  allocate(D(r),V(n,r), UYt(r,r))
-  ! !!!
-  call svd(Yt,size(Yt,1),V,size(V,1),D,UYt,size(UYt,1),n,r)
-  deallocate(Yt)
-  ! !!!
-  ! U(m,r) = Z(m,r) @ UY(r,r)  or U = Z @ (UYt).T
-  ! !!!
-  allocate(U(m,r))
-  call dgemm('N','T',m,r,r,1.d0,Z,size(Z,1),UYt,size(UYt,1),0.d0,U,size(U,1))
-  deallocate(UYt,Z)
-  ! !!!
-  !do i=1,r
-  !  print *, i, real(D(i)), real(D(i)**2), real(sum(D(1:i)**2))
-  !  if (D(i) < 1.d-15) then
-  !    k = i
-  !    exit
-  !  endif
-  !enddo
-  !print *, 'threshold: ', 2.858 * D(k/2)
-  ! !!!
-  ! Build the new determinant: U @ D @ Vt
-  ! !!!
-  !!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l)
-  !!$OMP DO
-  !!
-  !print *, 'ok 1'
-  !N_det = m * n
-  !print *, 'ok 11'
-  !TOUCH N_det
-  !psi_bilinear_matrix_values(:,1) = 0.d0
-  !TOUCH psi_bilinear_matrix_values
-  ! print *, size(psi_bilinear_matrix_values,1), size(D), size(U,1), size(U,2), size(V,1), size(V,2)
-  print*, PSI_energy(1) + nuclear_repulsion
-  psi_bilinear_matrix(:,:,:) = 0.d0
-  do r = 1, n
-    call generate_all_alpha_beta_det_products
-    do i = 1, N_det_beta_unique
-      do j = 1, N_det_alpha_unique
-        psi_bilinear_matrix(j,i,1) = 0.d0
-        do l = 1, r
-          psi_bilinear_matrix(j,i,1) = psi_bilinear_matrix(j,i,1) + D(l) * U(j,l) * V(i,l)
-        enddo
+  do while( ( error_RRRSVD.gt.error_thr ).and.( It.lt.It_max ).and.( low_rank.lt.(min(m,n)-delta_r) ) )
+    ! !!!
+    allocate( Q_new(m,delta_r) ) 
+    allocate( r1(N_det, 2) )
+    do l = 1, delta_r
+      Q_new(:,l) = 0.d0
+      call random_number(r1)
+      r1(:,1) = dsqrt(-2.d0*dlog(r1(:,1)))
+      r1(:,1) = r1(:,1) * dcos(dtwo_pi*r1(:,2))
+      do k = 1, N_det
+        i          = psi_bilinear_matrix_rows(k)
+        Q_new(i,l) = Q_new(i,l) + psi_bilinear_matrix_values(k,i_state) * r1(k,1)
      enddo
    enddo
-    TOUCH psi_bilinear_matrix
-    call update_wf_of_psi_bilinear_matrix(.False.)
-    print*, r, PSI_energy(1) + nuclear_repulsion, s2_values(1) !CI_energy(1)
-    call save_wavefunction()
+    deallocate(r1)
+    ! !!!
+    ! orthogonalization with Q_old:  Q_new = Q_new - Q_old @ Q_old.T @ Q_new
+    ! !!!
+    !allocate( Q_mult(m,m) )
+    !call dgemm( 'N', 'T', m, m, low_rank, +1.d0, Q_old,  size(Q_old,1),  Q_old, size(Q_old,1), 0.d0, Q_mult, size(Q_mult,1) )
+    !!do i = 1, m
+    !!  do j = 1, m
+    !!    Q_mult(j,i) = 0.d0
+    !!    do l = 1, low_rank
+    !!      Q_mult(j,i) = Q_mult(j,i) +  Q_old(i,l) * Q_old(j,l)
+    !!    enddo
+    !!  enddo
+    !!enddo
+    !!call dgemm( 'N', 'N', m, delta_r, m,  -1.d0, Q_mult, size(Q_mult,1), Q_new, size(Q_new,1), 1.d0, Q_new,  size(Q_new,1)  )
+    !do l = 1, delta_r
+    !  do i = 1, m
+    !    tmp = 0.d0
+    !    do j = 1, m
+    !      tmp = tmp + Q_mult(i,j) * Q_new(j,l)
+    !    enddo
+    !    Q_new(i,l) = Q_new(i,l) - tmp
+    !  enddo
+    !enddo
+    !deallocate( Q_mult )
+    ! !!!
+    ! power scheme
+    ! !!!
+    allocate( P(n, delta_r) )
+    do PowerIt = 1, PowerIt_max
+      ! !!!
+      call my_ortho_qr(Q_new, size(Q_new,1), m, delta_r)
+      ! !!!
+      do l = 1, delta_r
+        P(:,l) = 0.d0
+        do k = 1, N_det
+          i      = psi_bilinear_matrix_rows(k)
+          j      = psi_bilinear_matrix_columns(k)
+          P(j,l) = P(j,l) + psi_bilinear_matrix_values(k,i_state) * Q_new(i,l)
+        enddo
+      enddo
+      ! !!!
+      call my_ortho_qr(P, size(P,1), n, delta_r)
+      ! !!!
+      do l = 1, delta_r
+        Q_new(:,l) = 0.d0
+        do k = 1, N_det
+          i          = psi_bilinear_matrix_rows(k)
+          j          = psi_bilinear_matrix_columns(k)
+          Q_new(i,l) = Q_new(i,l) + psi_bilinear_matrix_values(k,i_state) * P(j,l)
+        enddo
+      enddo
+      ! !!!
+    enddo
+    deallocate( P )
+    ! !!!
+    ! orthogonalization with Q_old:  Q_new = Q_new - Q_old @ Q_old.T @ Q_new
+    ! !!!
+    allocate( Q_mult(m,m) )
+    call dgemm( 'N', 'T', m, m, low_rank, +1.d0, Q_old,  size(Q_old,1),  Q_old, size(Q_old,1), 0.d0, Q_mult, size(Q_mult,1) )
+    !do i = 1, m
+    !  do j = 1, m
+    !    Q_mult(j,i) = 0.d0
+    !    do l = 1, low_rank
+    !      Q_mult(j,i) = Q_mult(j,i) +  Q_old(i,l) * Q_old(j,l)
+    !    enddo
+    !  enddo
+    !enddo
+    !call dgemm( 'N', 'N', m, delta_r, m,  -1.d0, Q_mult, size(Q_mult,1), Q_new, size(Q_new,1), 1.d0, Q_new,  size(Q_new,1)  )
+    do l = 1, delta_r
+      do i = 1, m
+        tmp = 0.d0
+        do j = 1, m
+          tmp = tmp + Q_mult(i,j) * Q_new(j,l)
+        enddo
+        Q_new(i,l) = Q_new(i,l) - tmp
+      enddo
+    enddo
+    deallocate( Q_mult )
+    ! !!!
+    call my_ortho_qr(Q_new, size(Q_new,1), m, delta_r)
+    ! !!!
+    allocate( B_new(delta_r,n) )
+    do l = 1 , delta_r
+      B_new(l,:) = 0.d0
+      do k = 1 , N_det
+        i          = psi_bilinear_matrix_rows(k)
+        j          = psi_bilinear_matrix_columns(k)
+        B_new(l,j) = B_new(l,j) + Q_new(i,l) * psi_bilinear_matrix_values(k,i_state)
+      enddo
+    enddo
+    ! !!!
+    do j = 1, n
+      do l = 1, delta_r
+        norm_SVD = norm_SVD + B_new(l,j) * B_new(l,j)
+      enddo
+    enddo
+    ! !!!
+    error_RRRSVD = dabs( norm_psi - norm_SVD ) / norm_psi
+    It           = It + 1
+    low_rank     = low_rank + delta_r
+    ! !!!
+    ! build up approximate basis:
+    !         Q_old  = np.append(Q_new, Q_old, axis=1)
+    !         B      = np.append(B_new, B, axis=0)
+    ! !!!
+    allocate( Q_tmp(m,low_rank) , B_tmp(low_rank,n) )
+    ! !!!
+    !do l = 1, delta_r
+    !  do i = 1, m
+    !    Q_tmp(i,l) = Q_new(i,l)
+    !  enddo
+    !enddo
+    !do l = 1 , low_rank-delta_r
+    !  do i = 1, m
+    !    Q_tmp(i,l+delta_r) = Q_old(i,l)
+    !  enddo
+    !enddo
+    !do i = 1, n
+    !  do l = 1 , delta_r
+    !    B_tmp(l,i) = B_new(l,i)
+    !  enddo
+    !enddo
+    !do i = 1, n
+    !  do l = 1 , low_rank-delta_r
+    !    B_tmp(l+delta_r,i) = B_old(l,i)
+    !  enddo
+    !enddo
+    ! !!!
+    do i = 1, m
+      do l = 1, low_rank-delta_r
+        Q_tmp(i,l) = Q_old(i,l)
+      enddo
+      do l = 1, delta_r
+        Q_tmp(i,l+low_rank-delta_r) = Q_new(i,l)
+      enddo
+    enddo
+    do i = 1, n
+      do l = 1 , low_rank-delta_r
+        B_tmp(l,i) = B_old(l,i)
+      enddo
+      do l = 1 , delta_r
+        B_tmp(l+low_rank-delta_r,i) = B_new(l,i)
+      enddo
+    enddo
+    ! !!!
+    deallocate( Q_old, B_old, Q_new, B_new )
+    allocate( Q_old(m,low_rank) , B_old(low_rank,n) )
+    ! !!!
+    do l = 1, low_rank
+      do i = 1, m
+        Q_old(i,l) = Q_tmp(i,l)
+      enddo
+    enddo
+    do l = 1, n
+      do i = 1, low_rank
+        B_old(i,l) = B_tmp(i,l)
+      enddo
+    enddo
+    deallocate( Q_tmp , B_tmp )
+    ! !!!
+  print *, It, low_rank, error_RRRSVD
+  ! !!!
  enddo
-  deallocate(U,D,V)
+  ! !!!
+  ! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !
+  ! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !
+  ! !!!
+  allocate( UB(low_rank,low_rank), D(low_rank), Vt(low_rank,n) )
+  call svd_s(B_old, size(B_old,1), UB, size(UB,1), D, Vt, size(Vt,1), low_rank, n)
+  deallocate(B_old)
+  print*, 'ok 1'
+  ! !!!
+  allocate( U(m,low_rank) )
+  call dgemm('N', 'N', m, low_rank, low_rank, 1.d0, Q_old, size(Q_old,1), UB, size(UB,1), 0.d0, U, size(U,1))
+  deallocate( Q_old,UB )
+  print*, 'ok 2'
+  ! !!!
+  ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! !
+  ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! !
+  ! !!!
+  allocate( URSVD(m,low_rank), DRSVD(low_rank), VtRSVD(low_rank,n) )
+  call RSVD( i_state, low_rank, PowerIt_max, nb_oversamp, URSVD, DRSVD, VtRSVD )
+  print*, 'ok 3'
+  ! !!!
+  ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! !
+  ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! !
+  ! !!!
+  allocate( Averif(m,n), Uverif(m,m), Dverif(min(m,n)), Vtverif(n,n) )
+  do i = 1, n
+    Averif(:,i) = 1d-16
+  enddo
+  do k = 1, N_det
+    i           = psi_bilinear_matrix_rows(k)
+    j           = psi_bilinear_matrix_columns(k)
+    Averif(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+  call svd_s( Averif, size(Averif,1), Uverif, size(Uverif,1), Dverif, Vtverif, size(Vtverif,1), m, n)
+  print*, 'ok 4'
+  ! !!!
+  err_verif = 0.d0
+  do j = 1, n
+    do i = 1, m
+    err_tmp = 0.d0
+    do l = 1, low_rank
+      err_tmp = err_tmp + Dverif(l) * Uverif(i,l) * Vtverif(l,j)
+    enddo
+    err_verif = err_verif + ( Averif(i,j) - err_tmp )**2.d0
+    enddo
+  enddo
+  print*, 'err verif (%) = ', 100.d0 * dsqrt(err_verif/norm_psi)
+  ! !!!
+  ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! !
+  ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! !
+  ! !!!
+  open(111, file = 'singular_values.txt', action = 'WRITE' )  
+    do i = 1, low_rank  
+      write(111, '(I8, 5X, E15.7, 2(5X, E15.7, E15.7) )' ) i, Dverif(i), D(i), 100.d0*dabs(D(i)-Dverif(i))/Dverif(i), DRSVD(i), 100.d0*dabs(DRSVD(i)-Dverif(i))/Dverif(i)
+    end do  
+  close(111)
+  ! !!!
+  !deallocate( Averif, Uverif, Dverif, Vtverif )
+  ! !!!
+  low_rank = 10
+  ! !!!
+  err_verif = 0.d0
+  do j = 1, n
+    do i = 1, m
+    err_tmp = 0.d0
+    do l = 1, low_rank
+      err_tmp = err_tmp + Dverif(l) * Uverif(i,l) * Vtverif(l,j)
+    enddo
+    err_verif = err_verif + ( Averif(i,j) - err_tmp )**2.d0
+    enddo
+  enddo
+  print*, 'err verif (%) = ', 100.d0 * dsqrt(err_verif/norm_psi)
+  ! !!!
+  print*, 'low_rank =', low_rank
+  allocate(Uezfio(m,low_rank,1), Dezfio(low_rank,1), Vezfio(n,low_rank,1))
+  do l = 1, low_rank
+    Dezfio(l,1) = Dverif(l)
+    do j = 1, m
+      Uezfio(j,l,1) = Uverif(j,l)
+    enddo
+    do j = 1, n
+      Vezfio(j,l,1) = Vtverif(l,j)
+    enddo
+  enddo
+  deallocate( U, D, Vt )
+  ! !!!
+  call ezfio_set_spindeterminants_n_det(N_det)
+  call ezfio_set_spindeterminants_n_states(N_states)
+  call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique)
+  call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique)
+  call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows)
+  call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns)
+  call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values)
+  ! !!!
+  call ezfio_set_spindeterminants_n_svd_coefs(low_rank)
+  call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio)
+  call ezfio_set_spindeterminants_psi_svd_beta(Vezfio )
+  call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio)
+  deallocate(Uezfio, Dezfio, Vezfio)
+  ! !!!
+  !print*, PSI_energy(1) + nuclear_repulsion
+  !psi_bilinear_matrix(:,:,:) = 0.d0
+  !do low_rank = n, n
+  !  call generate_all_alpha_beta_det_products
+  !  do i = 1, N_det_beta_unique
+  !    do j = 1, N_det_alpha_unique
+  !      psi_bilinear_matrix(j,i,1) = 0.d0
+  !      do l = 1, r
+  !        psi_bilinear_matrix(j,i,1) = psi_bilinear_matrix(j,i,1) + D(l) * U(j,l) * V(i,l)
+  !      enddo
+  !    enddo
+  !  enddo
+  !  TOUCH psi_bilinear_matrix
+  !  call update_wf_of_psi_bilinear_matrix(.False.)
+  !  print*, low_rank, PSI_energy(1) + nuclear_repulsion !CI_energy(1)
+  !enddo
+  !deallocate(U,D,V)
  ! !!!
 end
--- a/devel/svdwf/FSVD_trunc.irp.f
+++ b/devel/svdwf/FSVD_trunc.irp.f
@ -0,0 +1,97 @@
+
+program FSVD_trunc
+  implicit none
+  BEGIN_DOC
+  ! study precision variation with truncated SVD
+  END_DOC
+  read_wf = .True.
+  TOUCH read_wf
+  ! !!!
+  call run()
+  ! !!!
+end
+
+
+subroutine run
+
+  implicit none
+
+  integer                       :: mm, nn, i_state, low_rank, lrank_min, lrank_max
+  integer                       :: i, j, k, l
+  double precision              :: norm_psi, err_verif, err_tmp
+  double precision, allocatable :: U_FSVD(:,:), D_FSVD(:), Vt_FSVD(:,:), A_FSVD(:,:)
+  double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:)
+
+  i_state = 1
+  mm      = n_det_alpha_unique
+  nn      = n_det_beta_unique
+
+  print *, ' matrix dimensions:', mm,'x',nn
+  print *, ' N det:', N_det
+
+  allocate( A_FSVD(mm,nn), U_FSVD(mm,mm), D_FSVD(min(mm,nn)), Vt_FSVD(nn,nn) )
+
+  norm_psi    = 0.d0
+  A_FSVD(:,:) = 0.d0
+  do k = 1, N_det
+    i           = psi_bilinear_matrix_rows(k)
+    j           = psi_bilinear_matrix_columns(k)
+    A_FSVD(i,j) = psi_bilinear_matrix_values(k,i_state)
+    norm_psi   += psi_bilinear_matrix_values(k,i_state) * psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  call svd_s( A_FSVD, size(A_FSVD,1), U_FSVD, size(U_FSVD,1), D_FSVD, Vt_FSVD, size(Vt_FSVD,1), mm, nn)
+  print *, ' --- Full SVD: ok --- '
+
+  !lrank_min = 100
+  !lrank_max = nn
+  !do low_rank = lrank_min, lrank_max, 1
+  !  err_verif = 0.d0
+  !  do j = 1, nn
+  !    do i = 1, mm
+  !      err_tmp = 0.d0
+  !      do l = 1, low_rank
+  !        err_tmp = err_tmp + D_FSVD(l) * U_FSVD(i,l) * Vt_FSVD(l,j)
+  !      enddo
+  !      err_verif = err_verif + (A_FSVD(i,j)-err_tmp) * (A_FSVD(i,j)-err_tmp) 
+  !    enddo
+  !  enddo
+  !  print*, ' low rank      = ', low_rank
+  !  print*, ' err verif (%) = ', 100.d0 * dsqrt(err_verif/norm_psi)
+  !enddo
+
+  ! ------------------------------------------------------------------------------------------------
+  !                           set to EZFIO for a fixed low rank
+
+  low_rank = min(mm,nn)
+  allocate( Uezfio(mm,low_rank,1), Dezfio(low_rank,1), Vezfio(nn,low_rank,1))
+
+  do l = 1, low_rank
+    Dezfio(l,1) = D_FSVD(l)
+    do j = 1, mm
+      Uezfio(j,l,1) = U_FSVD(j,l)
+    enddo
+    do j = 1, nn
+      Vezfio(j,l,1) = Vt_FSVD(l,j)
+    enddo
+  enddo
+
+  !call ezfio_set_spindeterminants_n_det(N_det)
+  !call ezfio_set_spindeterminants_n_states(N_states)
+  !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique)
+  !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values)
+
+  call ezfio_set_spindeterminants_n_svd_coefs(low_rank)
+  call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio)
+  call ezfio_set_spindeterminants_psi_svd_beta(Vezfio )
+  call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio)
+
+  ! ------------------------------------------------------------------------------------------------
+
+  deallocate( Uezfio, Dezfio, Vezfio )
+  deallocate( U_FSVD, D_FSVD, Vt_FSVD )
+
+end
--- a/devel/svdwf/NEED
+++ b/devel/svdwf/NEED
@ -1,2 +1,2 @@
 determinants
-davidson_undressed
+davidson_undressed 
--- a/devel/svdwf/QR.py
+++ b/devel/svdwf/QR.py
@ -0,0 +1,10 @@
+# !!!
+import numpy as np
+# !!!
+def QR_fact(X):
+    Q, R     = np.linalg.qr(X, mode="reduced")
+    D        = np.diag( np.sign( np.diag(R) ) )
+    Qunique  = np.dot(Q,D)
+    #Runique = np.dot(D,R)
+    return(Qunique)
+# !!!
--- a/devel/svdwf/R3SVD_AMMAR.py
+++ b/devel/svdwf/R3SVD_AMMAR.py
@ -0,0 +1,68 @@
+# !!!
+import numpy as np
+from QR   import QR_fact
+from RSVD import powit_RSVD
+# !!!
+def R3SVD_AMMAR(A, t, delta_t, npow, nb_oversamp, err_thr, maxit, tol):
+    # !!!
+    # build initial QB decomposition
+    # !!!
+    n = A.shape[1]
+    G = np.random.randn(n, t)
+    normA  = np.linalg.norm(A, ord='fro')**2
+    i_it = 0
+    rank = 0
+    Y = np.dot(A,G)
+    # The power scheme
+    for j in range(npow):
+        Q = QR_fact(Y)
+        Q = QR_fact( np.dot(A.T,Q) )
+        Y = np.dot(A,Q)
+    # orthogonalization process
+    Q_old = QR_fact(Y)
+    B     = np.dot(Q_old.T,A)
+    normB = np.linalg.norm(B, ord='fro')**2
+    # error percentage
+    errpr = abs( normA - normB ) / normA
+    rank += t
+    i_it += 1
+    print("iteration = {}, rank = {}, error = {}".format(i_it, rank, errpr))
+    # !!!
+    # incrementally build up QB decomposition
+    # !!!
+    while ( (errpr>err_thr) and (i_it<maxit) and (rank<=min(A.shape)-delta_t) ): #
+        G = np.random.randn(n, delta_t)
+        Y = np.dot(A,G)
+        #Y = Y - np.dot(Q_old, np.dot(Q_old.T,Y) ) # orthogonalization with Q
+        # power scheme
+        for j in range(npow):
+            Q = QR_fact(Y)
+            Q = QR_fact( np.dot(A.T,Q) )
+            Y = np.dot(A,Q)
+        Y  = Y - np.dot(Q_old, np.dot(Q_old.T,Y) ) # orthogonalization with Q
+        Q_new = QR_fact(Y)
+        B_new = np.dot(Q_new.T,A)
+        # build up approximate basis
+        Q_old  = np.append(Q_new, Q_old, axis=1)
+        #B      = np.append(B_new, B, axis=0)
+        normB += np.linalg.norm(B_new, ord='fro')**2
+        errpr  = abs( normA - normB ) / normA
+        rank  += delta_t
+        i_it  += 1
+        print("iteration = {}, rank = {}, error = {}".format(i_it, rank, errpr))
+    # !!!
+    #UL, SL, VLT = np.linalg.svd(B, full_matrices=0)
+    #UL = np.dot(Q_old,UL)
+    # !!!
+    print("iteration = {}, rank = {}, error = {}".format(i_it, rank, errpr))
+    UL, SL, VLT = powit_RSVD(A, rank, npow, nb_oversamp)
+    #return UL, SL, VLT
+    # !!!
+    rank  = SL.shape[0]
+    new_r = rank
+    for i in range(rank):
+        if( SL[i] <= tol ):
+            new_r = i
+            break
+    return UL[:,:(new_r)], SL[:(new_r)], VLT[:(new_r),:]
+# !!!
--- a/devel/svdwf/R3SVD_LiYu.py
+++ b/devel/svdwf/R3SVD_LiYu.py
@ -0,0 +1,58 @@
+# !!!
+import numpy as np
+from QR import QR_fact
+# !!!
+def R3SVD_LiYu(A, t, delta_t, npow, err_thr, maxit):
+    # !!!
+    # build initial QB decomposition
+    # !!!
+    n = A.shape[1]
+    G = np.random.randn(n, t) # n x t Gaussian random matrix
+    normA  = np.linalg.norm(A, ord='fro')**2
+    i_it = 0
+    rank = 0
+    Y = np.dot(A,G)
+    # The power scheme
+    for j in range(npow):
+        Q = QR_fact(Y)
+        Q = QR_fact( np.dot(A.T,Q) )
+        Y = np.dot(A,Q)
+    # orthogonalization process
+    Q_old = QR_fact(Y)
+    B     = np.dot(Q_old.T,A)
+    normB = np.linalg.norm(B, ord='fro')**2
+    # error percentage
+    errpr = abs( normA - normB ) / normA
+    rank += t
+    i_it += 1
+    print("iteration = {}, rank = {}, error = {}".format(i_it, rank, errpr))
+    # !!!
+    # incrementally build up QB decomposition
+    # !!!
+    while ( (errpr>err_thr) and (i_it<maxit) and (rank<=min(A.shape)-delta_t) ): #
+        G = np.random.randn(n, delta_t) # n x delta_t Gaussian random matrix
+        Y = np.dot(A,G)
+        Y = Y - np.dot(Q_old, np.dot(Q_old.T,Y) ) # orthogonalization with Q
+        # power scheme
+        for j in range(npow):
+            Q = QR_fact(Y)
+            Q = QR_fact( np.dot(A.T,Q) )
+            Y = np.dot(A,Q)
+        Y  = Y - np.dot(Q_old, np.dot(Q_old.T,Y) ) # orthogonalization with Q
+        Q_new = QR_fact(Y)
+        B_new = np.dot(Q_new.T,A)
+        # build up approximate basis
+        Q_old  = np.append(Q_new, Q_old, axis=1)
+        B      = np.append(B_new, B, axis=0)
+        rank  += delta_t
+        i_it  += 1
+        normB += np.linalg.norm(B_new, ord='fro')**2
+        errpr  = abs( normA - normB ) / normA
+        print("iteration = {}, rank = {}, error = {}".format(i_it, rank, errpr))
+    # !!!
+    print("iteration = {}, rank = {}, error = {}".format(i_it, rank, errpr))
+    UL, SL, VLT = np.linalg.svd(B, full_matrices=0)
+    UL = np.dot(Q_old,UL)
+    # !!!
+    return UL, SL, VLT
+# !!!
--- a/devel/svdwf/RSVD.irp.f
+++ b/devel/svdwf/RSVD.irp.f
@ -0,0 +1,114 @@
+! !!!
+subroutine RSVD( i_state, low_rank, PowerIt_max, nb_oversamp, URSVD, DRSVD, VtRSVD )
+  ! !!!
+  BEGIN_DOC
+  ! standard RSVD for a prefixed rank
+  END_DOC
+  ! !!!
+  implicit none
+  include 'constants.include.F'
+  ! !!!
+  integer, intent(in)           :: i_state, low_rank, PowerIt_max, nb_oversamp
+  double precision, intent(out) :: URSVD(n_det_alpha_unique,low_rank), DRSVD(low_rank), VtRSVD(low_rank,n_det_beta_unique)
+  ! !!!
+  integer                       :: i, j, k, l, PowerIt, m, n 
+  double precision, allocatable :: r1(:,:), Q(:,:), P(:,:), B(:,:)
+  double precision, allocatable :: UB(:,:), D(:), Vt(:,:), U(:,:)
+  ! !!!
+  m = n_det_alpha_unique
+  n = n_det_beta_unique
+  ! !!!
+  ! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !
+  ! !!!
+  allocate( Q(m, low_rank+nb_oversamp) )
+  !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,k,l,r1)
+  allocate( r1(N_det,2) )
+  !$OMP DO
+  do l = 1, low_rank+nb_oversamp
+    Q(:,l) = 0.d0
+    call random_number(r1)
+    r1(:,1) = dsqrt(-2.d0*dlog(r1(:,1)))
+    r1(:,1) = r1(:,1) * dcos(dtwo_pi*r1(:,2))
+    do k = 1, N_det
+      i      = psi_bilinear_matrix_rows(k)
+      Q(i,l) = Q(i,l) + psi_bilinear_matrix_values(k,i_state) * r1(k,1)
+    enddo
+  enddo
+  !$OMP END DO
+  deallocate(r1)
+  !$OMP END PARALLEL
+  ! !!!
+  call ortho_qr(Q, size(Q,1), m, low_rank+nb_oversamp)
+  ! !!!
+  ! power scheme
+  ! !!!
+  allocate( P(n, low_rank+nb_oversamp) ) 
+  do PowerIt = 1, PowerIt_max
+    ! !!!
+    !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l)
+    !$OMP DO
+    do l = 1, low_rank+nb_oversamp
+      P(:,l) = 0.d0
+      do k = 1, N_det
+        i      = psi_bilinear_matrix_rows(k)
+        j      = psi_bilinear_matrix_columns(k)
+        P(j,l) = P(j,l) + psi_bilinear_matrix_values(k,i_state) * Q(i,l)
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+    ! !!!
+    call ortho_qr(P, size(P,1), n, low_rank+nb_oversamp)
+    ! !!!
+    !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l)
+    !$OMP DO
+    do l = 1, low_rank+nb_oversamp
+      Q(:,l) = 0.d0
+      do k = 1 , N_det
+        i      = psi_bilinear_matrix_rows(k)
+        j      = psi_bilinear_matrix_columns(k)
+        Q(i,l) = Q(i,l) + psi_bilinear_matrix_values(k,i_state) * P(j,l)
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+    ! !!!
+    call ortho_qr(Q, size(Q,1), m, low_rank+nb_oversamp)
+    ! !!!
+  enddo
+  deallocate(P)
+  ! !!!
+  allocate( B(low_rank+nb_oversamp,n) )
+  !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l)
+  !$OMP DO
+  do l = 1, low_rank+nb_oversamp
+    B(l,:) = 0.d0
+    do k = 1, N_det
+      i      = psi_bilinear_matrix_rows(k)
+      j      = psi_bilinear_matrix_columns(k)
+      B(l,j) = B(l,j) + psi_bilinear_matrix_values(k,i_state) * Q(i,l)
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  ! !!!
+  allocate( UB(low_rank+nb_oversamp,low_rank+nb_oversamp), D(low_rank+nb_oversamp), Vt(low_rank+nb_oversamp,n) )
+  call svd_s( B, size(B,1), UB, size(UB,1), D, Vt, size(Vt,1), low_rank+nb_oversamp, n)
+  deallocate(B)
+  allocate( U(m,low_rank+nb_oversamp) )
+  call dgemm('N', 'N', m, low_rank+nb_oversamp, low_rank+nb_oversamp, 1.d0, Q, size(Q,1), UB, size(UB,1), 0.d0, U, size(U,1))
+  deallocate( Q,UB )
+  ! !!!
+  do l = 1, low_rank
+    DRSVD(l) = D(l)
+    do i = 1, m
+      URSVD(i,l) = U(i,l)
+    enddo
+    do i = 1, n
+      VtRSVD(l,i) = Vt(l,i)
+    enddo
+  enddo
+  ! !!!
+  return
+  ! !!!
+end
--- a/devel/svdwf/RSVD.py
+++ b/devel/svdwf/RSVD.py
@ -0,0 +1,20 @@
+# !!!
+import numpy as np
+from QR import QR_fact
+# !!!
+def powit_RSVD(X, new_r, nb_powit, nb_oversamp):
+    # !!!
+    G  = np.random.randn(X.shape[1], new_r+nb_oversamp)
+    Q  = QR_fact( np.dot(X,G) )
+    # !!!
+    for _ in range(nb_powit):
+        Q = QR_fact( np.dot(X.T,Q) )
+        Q = QR_fact( np.dot(X,Q) )
+        # !!!
+    Y = np.dot(Q.T,X)
+    # !!!
+    U, S, VT = np.linalg.svd(Y, full_matrices=0)
+    U = np.dot(Q,U)
+    return U[:,:(new_r)], S[:(new_r)], VT[:(new_r),:]
+    # !!!
+# !!!
--- a/devel/svdwf/buildpsi_SVDit.irp.f
+++ b/devel/svdwf/buildpsi_SVDit.irp.f
@ -0,0 +1,677 @@
+program buildpsi_SVDit
+
+  implicit none
+
+  BEGIN_DOC
+  ! perturbative approach to build psi_postsvd
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state
+  double precision              :: h12
+
+  integer                       :: i, j, k, l, ii, jj
+
+  double precision              :: norm_psi
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  double precision              :: err0, err_tmp, e_tmp
+  double precision              :: E0, E0pt2, ept2, E0_old, tol_energy
+  double precision              :: ctmp, htmp
+
+  double precision, allocatable :: H0(:,:), Hdiag(:), Hkl(:,:)
+  double precision, allocatable :: coeff_psi_selected(:), coeff_psi_toselect(:)
+
+  integer                       :: n_FSVD, n_selected, n_toselect, it_svd, it_svd_max
+  integer,          allocatable :: numalpha_selected(:), numbeta_selected(:)
+  integer,          allocatable :: numalpha_toselect(:), numbeta_toselect(:)
+  integer,          allocatable :: numalpha_tmp(:), numbeta_tmp(:)
+
+  integer(kind=8)               :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir
+  real(kind=8)                  :: W_tot_time, W_tot_time_it
+  real(kind=8)                  :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it
+  real(kind=8)                  :: CPU_tot_time, CPU_tot_time_it
+  real(kind=8)                  :: speedup, speedup_it
+  integer                       :: nb_taches
+
+  !$OMP PARALLEL
+  nb_taches = OMP_GET_NUM_THREADS()
+  !$OMP END PARALLEL
+
+  call CPU_TIME(CPU_tbeg)
+  call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
+
+  i_state = 1
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call get_excitation_degree(det1,det2,degree,N_int)
+  call i_H_j(det1, det2, N_int, h12)
+
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det      :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+ 
+  norm_psi = 0.d0
+  do k = 1, N_det
+    norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) &
+                        * psi_bilinear_matrix_values(k,i_state)
+  enddo
+  print *, ' initial norm = ', norm_psi
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_beta_unique) )
+  allocate( Dref(n_det_beta_unique) )
+  allocate( Vref(n_det_beta_unique,n_det_beta_unique)  )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique) )
+  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1)          &
+       , n_det_alpha_unique, n_det_beta_unique)
+
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+  print *, ' ---   First SVD: ok    ---  '
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+
+  ! ---------------------------------------------------------------------------------------
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                               numerote vectors 
+
+  ! Full rank
+  n_FSVD = n_det_beta_unique*n_det_beta_unique
+  print*, ' Full psi space rank = ', n_FSVD
+
+  ! Truncated rank
+  n_selected = 20
+  print*, ' initial psi space rank = ', n_selected
+  allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) )
+  do i = 1, n_selected
+    numalpha_selected(i) = i
+    numbeta_selected (i) = i
+  enddo
+
+  ! check SVD error
+  err0 = 0.d0
+  do j = 1, n_det_beta_unique
+    do i = 1, n_det_alpha_unique
+      err_tmp = 0.d0
+      do l = 1, n_selected
+        ii = numalpha_selected(l)
+        jj = numbeta_selected (l)
+        err_tmp = err_tmp + Dref(l) * Uref(i,ii) * Vref(j,jj)
+      enddo
+      err_tmp = Aref(i,j) - err_tmp
+      err0   += err_tmp * err_tmp
+    enddo
+  enddo
+  print *, ' SVD err (%)     = ', 100.d0 * dsqrt(err0/norm_psi)
+
+  deallocate( Aref )
+
+  ! perturbative space rank
+  l = 3
+  k = 0
+  if( l.eq.1 ) then
+
+    n_toselect =  2*n_selected * ( n_det_beta_unique - n_selected )
+    allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) )
+    ! nondiagonal blocs
+    do i = 1, n_selected
+      do j = n_selected+1, n_det_beta_unique
+        k = k + 1
+        numalpha_toselect(k) = i 
+        numbeta_toselect (k) = j
+      enddo
+    enddo
+    do j = 1, n_selected
+      do i = n_selected+1, n_det_beta_unique
+        k = k + 1
+        numalpha_toselect(k) = i 
+        numbeta_toselect (k) = j
+      enddo
+    enddo
+
+  elseif( l.eq.2 ) then
+
+    n_toselect =  n_FSVD - n_selected*n_selected
+    allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) )
+    ! nondiagonal blocs
+    do i = 1, n_selected
+      do j = n_selected+1, n_det_beta_unique
+        k = k + 1
+        numalpha_toselect(k) = i 
+        numbeta_toselect (k) = j
+      enddo
+    enddo
+    do j = 1, n_selected
+      do i = n_selected+1, n_det_beta_unique
+        k = k + 1
+        numalpha_toselect(k) = i 
+        numbeta_toselect (k) = j
+      enddo
+    enddo
+    ! diagonal bloc
+    do i = n_selected+1, n_det_beta_unique
+      do j = n_selected+1, n_det_beta_unique
+        k = k + 1
+        numalpha_toselect(k) = i 
+        numbeta_toselect (k) = j
+      enddo
+    enddo
+
+  elseif( l.eq.3 ) then
+
+    n_toselect = n_FSVD - n_selected
+    allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) )
+    do i = 1, n_det_beta_unique
+      do j = 1, n_det_beta_unique
+        if( (i.eq.j).and.(i.le.n_selected)) then 
+          cycle
+        else
+          k = k + 1
+          numalpha_toselect(k) = i 
+          numbeta_toselect (k) = j
+        endif
+      enddo
+    enddo
+
+  elseif( l.eq.4 ) then
+
+    n_toselect = n_FSVD - n_selected - (n_det_beta_unique-n_selected)**2
+    allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) )
+    ! nondiagonal blocs
+    do i = 1, n_selected
+      do j = i+1, n_det_beta_unique
+        k = k + 1
+        numalpha_toselect(k) = i 
+        numbeta_toselect (k) = j
+      enddo
+    enddo
+    do j = 1, n_selected
+      do i = j+1, n_det_beta_unique
+        k = k + 1
+        numalpha_toselect(k) = i 
+        numbeta_toselect (k) = j
+      enddo
+    enddo
+
+  endif
+
+  if( k.ne.n_toselect ) then
+    print*, ' error in numeroting '
+    stop
+  endif
+  print*, ' perturbative psi space rank = ', n_toselect
+
+  ! ---------------------------------------------------------------------------------------
+
+
+  !________________________________________________________________________________________________________
+  !
+  !                                   loop over SVD iterations
+  !________________________________________________________________________________________________________
+
+  E0_old     = 0.d0
+  tol_energy = 1.d0
+  it_svd     = 0 
+  it_svd_max = 100
+
+  do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) )
+
+    call CPU_TIME(CPU_tbeg_it)
+    call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) 
+
+    it_svd = it_svd + 1
+    print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +'
+    print*, '                                                                         '
+    print*, ' iteration', it_svd
+
+    ! ---------------------------------------------------------------------------------------
+    !                            calculate the energy
+
+    allocate( coeff_psi_selected(n_selected) )
+    ! normalise | psi0 >
+    norm_psi = 0.d0
+    do i = 1, n_selected
+      norm_psi += Dref(i) * Dref(i)
+    enddo
+    norm_psi = 1.d0 / dsqrt(norm_psi) 
+    do i = 1, n_selected
+      coeff_psi_selected(i) = Dref(i) * norm_psi
+    enddo
+
+    ! H0(i,j) = < u_i v_j | H | u_i v_j >
+    print *,  ''
+    print *,  ''
+    print *,  ''
+    print *,  '-- Compute H --'
+    allocate( H0(n_selected,n_selected) )
+    call const_psiHpsi(n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0)
+
+    ! avant SVD
+    ! E0 = < psi_0 | H | psi_0 > / < psi_0 | psi_0 >
+    E0 = 0.d0 
+    do i = 1, n_selected
+      ii = numalpha_selected(i)
+      htmp = 0.d0
+      do j = 1, n_selected
+        jj = numalpha_selected(j)
+        htmp = htmp + coeff_psi_selected(j) * H0(jj,ii)
+      enddo
+      E0 = E0 + htmp * coeff_psi_selected(i)
+    enddo
+    E0 = E0 + nuclear_repulsion
+    print *,' E0 (avant SVD) =', E0
+
+    deallocate( H0 )
+
+    ! ---------------------------------------------------------------------------------------
+
+
+
+    ! ---------------------------------------------------------------------------------------
+    !                                nondiagonal elements
+
+    print *,  ' --- Perturbation --- '
+
+    allocate( Hdiag(n_toselect), Hkl(n_selected,n_toselect) )
+    call const_Hdiag_Hkl(n_selected, n_toselect, Uref, Vref                                    & 
+         , numalpha_selected, numbeta_selected, numalpha_toselect, numbeta_toselect, Hdiag, Hkl)
+
+    ! evaluate the coefficients for all the vectors
+    allocate( coeff_psi_toselect(n_toselect) )
+    ept2 = 0.d0
+    do ii = 1, n_toselect
+      ctmp = 0.d0
+      do l = 1, n_selected
+        ctmp += coeff_psi_selected(l) * Hkl(l,ii)
+      enddo
+      coeff_psi_toselect(ii) = ctmp / ( E0 - (Hdiag(ii)+nuclear_repulsion) )
+      ept2           += ctmp * ctmp / ( E0 - (Hdiag(ii)+nuclear_repulsion) )
+    enddo
+    E0pt2 = E0 + ept2
+    deallocate( Hdiag, Hkl)
+
+    print *, ' perturb energy  = ', E0pt2, ept2
+    print*, ' delta E0         = ', E0pt2 - E0_old
+    tol_energy = 100.d0 * dabs(E0pt2-E0_old)/dabs(E0pt2)
+    E0_old     = E0pt2
+
+    ! normalize the new psi and perform a new SVD
+    norm_psi = 0.d0
+    do l = 1, n_toselect
+      norm_psi = norm_psi + coeff_psi_toselect(l)*coeff_psi_toselect(l)
+    enddo
+    norm_psi = norm_psi + 1.d0
+    norm_psi = 1.d0 / dsqrt(norm_psi)
+    do i = 1, n_toselect
+      coeff_psi_toselect(i) = coeff_psi_toselect(i) * norm_psi
+    enddo
+    do i = 1, n_selected
+      coeff_psi_selected(i) = coeff_psi_selected(i) * norm_psi
+    enddo
+
+    print *,  ' --- SVD --- '
+    call perform_newSVD(n_selected, n_toselect, numalpha_selected, numbeta_selected      &
+         , numalpha_toselect, numbeta_toselect, coeff_psi_selected, coeff_psi_toselect   & 
+         , Uref, Vref, Dref                                                              )
+
+    ! ---------------------------------------------------------------------------------------
+
+    deallocate( coeff_psi_toselect )
+    deallocate( coeff_psi_selected )
+
+
+    write(55,'(i5,4x,4(f22.15,2x))') it_svd, E0, E0pt2
+
+    call CPU_TIME(CPU_tend_it)
+    call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir)
+    CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it
+    W_tot_time_it   = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8)
+    speedup_it      = CPU_tot_time_it / W_tot_time_it
+    print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /,  &
+              & 3X, "CPU time     = ", 1PE10.3, " min.", /,  &
+              & 3X, "speed up     = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it
+
+    !print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +'
+    !print*, '                                                                         '
+ 
+  end do
+  !________________________________________________________________________________________________________
+  !________________________________________________________________________________________________________
+
+
+  deallocate( Uref, Vref, Dref )
+
+
+  call CPU_TIME(CPU_tend)
+  call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
+  CPU_tot_time = CPU_tend - CPU_tbeg
+  W_tot_time   = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
+  speedup      = CPU_tot_time / W_tot_time
+  print *,' ___________________________________________________________________'
+  print '(//,3X,"Execution avec ",i2," threads")',nb_taches
+  print *,' ___________________________________________________________________'
+  print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /,  &
+            & 3X, "CPU time     = ", 1PE10.3, " min.", /,  &
+            & 3X, "speed up     = ", 1PE10.3 ,//)', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup
+
+
+end
+
+
+
+
+
+
+
+subroutine const_psiHpsi(n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0)
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer, intent(in)           :: n_selected
+  integer, intent(in)           :: numalpha_selected(n_selected), numbeta_selected(n_selected)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_beta_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: H0(n_selected,n_selected)
+
+  integer(bit_kind)             :: det1(N_int,2)
+  integer(bit_kind)             :: det2(N_int,2)
+  integer                       :: degree
+
+  integer                       :: i, j, k, l
+  integer                       :: ii0, jj0, ii, jj, n, m, np, mp
+  double precision              :: h12, x
+
+  H0(:,:) = 0.d0
+
+  do i = 1, n_det_alpha_unique
+    det1(:,1) = psi_det_alpha_unique(:,i)
+    do k = 1, n_det_alpha_unique
+      det2(:,1) = psi_det_alpha_unique(:,k)
+      call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+      if (degree .gt. 2) then
+        cycle
+      endif
+      do j = 1, n_det_beta_unique
+        det1(:,2) = psi_det_beta_unique(:,j)
+        do l = 1, n_det_beta_unique
+          det2(:,2) = psi_det_beta_unique(:,l)
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if (degree .gt. 2) then
+            cycle
+          endif
+          ! !!!
+          call i_H_j(det1, det2, N_int, h12)
+          ! !!!
+          ! ~~~            H0                  ~~~
+          do n = 1, n_selected
+            ii0 = numalpha_selected(n)
+            jj0 = numbeta_selected (n)
+            x   = Uref(k,ii0) * Vref(l,jj0) * h12
+            do m = 1, n_selected
+              ii = numalpha_selected(m)
+              jj = numbeta_selected (m)
+              H0(m,n) += Uref(i,ii) * Vref(j,jj) * x
+            enddo
+          enddo
+          ! ~~~          ~~~~~~                ~~~
+        enddo
+      enddo
+    enddo
+  enddo
+
+end subroutine const_psiHpsi
+
+
+
+
+
+subroutine const_Hdiag_Hkl(n_selected, n_toselect, Uref, Vref                                    & 
+           , numalpha_selected, numbeta_selected, numalpha_toselect, numbeta_toselect, Hdiag, Hkl)
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer, intent(in)           :: n_selected,n_toselect
+  integer, intent(in)           :: numalpha_selected(n_selected), numbeta_selected(n_selected)
+  integer, intent(in)           :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_beta_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: Hdiag(n_toselect), Hkl(n_selected,n_toselect)
+
+  integer(bit_kind)             :: det1(N_int,2)
+  integer(bit_kind)             :: det2(N_int,2)
+  integer                       :: degree
+  integer                       :: i, j, k, l
+  integer                       :: ii0, jj0, ii, jj, n, m, np, mp
+  double precision              :: h12, y
+  double precision, allocatable :: Hdiag_tmp(:), Hkl_tmp(:,:) 
+
+  Hdiag(:) = 0.d0
+  Hkl(:,:) = 0.d0
+ !$OMP PARALLEL DEFAULT(NONE)                                                                         &
+ !$OMP          PRIVATE(n,ii0,jj0,y,m,ii,jj,i,j,k,l,h12,det1,det2,Hdiag_tmp,Hkl_tmp,degree)           &
+ !$OMP          SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, &
+ !$OMP                 N_int,n_selected,n_toselect,Uref,Vref,numalpha_toselect,numbeta_toselect,      &
+ !$OMP                 numalpha_selected, numbeta_selected,Hkl,Hdiag )
+  allocate( Hdiag_tmp(n_toselect), Hkl_tmp(n_selected,n_toselect) )
+  Hdiag_tmp(:) = 0.d0
+  Hkl_tmp(:,:) = 0.d0
+ !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8)
+  do i = 1, n_det_alpha_unique
+    do k = 1, n_det_alpha_unique
+      det1(:,1) = psi_det_alpha_unique(:,i)
+      det2(:,1) = psi_det_alpha_unique(:,k)
+      ! !!!
+      ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 
+      call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+      if (degree .gt. 2) then
+        cycle
+      endif
+      ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 
+      ! !!!
+      do j = 1, n_det_beta_unique
+        det1(:,2) = psi_det_beta_unique(:,j)
+        do l = 1, n_det_beta_unique
+          det2(:,2) = psi_det_beta_unique(:,l)
+          ! !!!
+          ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if (degree .gt. 2) then
+            cycle
+          endif
+          ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 
+          ! !!!
+          call i_H_j(det1, det2, N_int, h12)
+          ! ~ ~ ~               H                ~ ~ ~
+          do n = 1, n_toselect
+            ii0 = numalpha_toselect(n)
+            jj0 = numbeta_toselect (n)
+            y   = Uref(k,ii0) * Vref(l,jj0) * h12
+            ! Hdiag
+            Hdiag_tmp(n) += Uref(i,ii0) * Vref(j,jj0) * y
+            do m = 1, n_selected
+              ii = numalpha_selected(m)
+              jj = numbeta_selected (m)
+              ! Hkl
+              Hkl_tmp(m,n) += Uref(i,ii) * Vref(j,jj) * y
+            enddo
+          enddo
+          ! ~ ~ ~             ! ! !              ~ ~ ~
+        enddo
+      enddo
+      ! !!!
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP CRITICAL
+  do n = 1, n_toselect
+     Hdiag(n) += Hdiag_tmp(n)
+     do m = 1, n_selected
+       Hkl(m,n) += Hkl_tmp(m,n)
+     enddo
+  enddo
+ !$OMP END CRITICAL
+  deallocate( Hdiag_tmp,Hkl_tmp )
+ !$OMP END PARALLEL
+
+end subroutine const_Hdiag_Hkl
+
+
+
+
+
+
+subroutine perform_newSVD(n_selected, n_toselect, numalpha_selected, numbeta_selected      &
+           , numalpha_toselect, numbeta_toselect, coeff_psi_selected, coeff_psi_toselect   & 
+           , Uref, Vref, Dref                                                              )
+
+  USE OMP_LIB
+ 
+  implicit none
+
+  integer, intent(in)             :: n_selected, n_toselect
+  integer, intent(in)             :: numalpha_selected(n_selected), numbeta_selected(n_selected)
+  integer, intent(in)             :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect)
+  double precision, intent(in)    :: coeff_psi_selected(n_selected), coeff_psi_toselect(n_toselect)
+  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique)
+  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(inout) :: Dref(n_det_beta_unique)
+
+  integer                         :: mm, nn, i, j, ii0, ii, l, jj
+  double precision                :: err0, err_norm, err_tmp
+  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
+  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
+  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
+
+  mm = n_det_alpha_unique
+  nn = n_det_beta_unique
+
+  allocate( U_svd(n_det_alpha_unique,n_det_beta_unique) )
+  allocate( V_svd(n_det_beta_unique ,n_det_beta_unique) )
+  allocate( S_mat(n_det_beta_unique ,n_det_beta_unique) )
+
+  U_svd(:,:) = Uref(:,:)
+  V_svd(:,:) = Vref(:,:)
+
+  S_mat(:,:) = 0.d0
+  do l = 1, n_selected
+    ii = numalpha_selected(l)
+    jj = numbeta_selected (l)
+    S_mat(ii,jj) = coeff_psi_selected(l)
+  enddo
+  do l = 1, n_toselect
+    ii = numalpha_toselect(l)
+    jj = numbeta_toselect (l)
+    S_mat(ii,jj) = coeff_psi_toselect(l)
+  enddo
+
+  ! construct the new matrix: U_svd    x  S_mat    x transpose(V_svd)
+  !                          (NaxNb)     (NbxNb)     transpose(NbxNb)
+
+  ! first compute S_mat x transpose(V_svd)
+  allocate( SxVt(nn,nn) )
+  call dgemm( 'N', 'T', nn, nn, nn, 1.d0     &
+             , S_mat     , size(S_mat,1)     &
+             , V_svd     , size(V_svd,1)     &
+             , 0.d0, SxVt, size(SxVt ,1)     )
+
+  ! then compute U_svd x SxVt
+  allocate( A_newsvd(mm,nn) )
+  call dgemm( 'N', 'N', mm, nn, nn, 1.d0            &
+             , U_svd         , size(U_svd   ,1)     &
+             , SxVt          , size(SxVt    ,1)     &
+             , 0.d0, A_newsvd, size(A_newsvd,1)     )
+  deallocate( SxVt )
+
+
+  ! perform new SVD
+  allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) )
+  call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd         &
+            , Vt_newsvd, size(Vt_newsvd,1), mm, nn)
+
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+  print *, '    +++     new SVD is performed     +++            '
+
+  allocate( V_newsvd(nn,nn) )
+  do l = 1, nn
+    do j = 1, nn
+      V_newsvd(j,l) = Vt_newsvd(l,j)
+    enddo
+  enddo
+
+  ! check SVD error
+  err0     = 0.d0
+  err_norm = 0.d0
+  do j = 1, nn
+    do i = 1, mm
+      err_tmp  = 0.d0
+      do l = 1, mm
+        err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l)
+      enddo
+      err_tmp   = A_newsvd(i,j) - err_tmp
+      err0     += err_tmp * err_tmp
+      err_norm += A_newsvd(i,j) * A_newsvd(i,j)
+    enddo
+  enddo
+  print *, ' SVD err (%)     = ', 100.d0 * dsqrt(err0/err_norm)
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+
+  do l = 1, nn 
+    Dref(l)   = D_newsvd(l)
+    Uref(:,l) = U_newsvd(:,l)
+    Vref(:,l) = V_newsvd(:,l)
+  enddo
+
+  deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd )
+
+  return
+
+end subroutine perform_newSVD
+
--- a/devel/svdwf/buildpsi_diagSVDit.py
+++ b/devel/svdwf/buildpsi_diagSVDit.py
@ -0,0 +1,614 @@
+import sys, os
+QMCCHEM_PATH=os.environ["QMCCHEM_PATH"]
+sys.path.insert(0,QMCCHEM_PATH+"/EZFIO/Python/")
+
+from ezfio import ezfio
+from datetime import datetime
+import time
+import numpy as np
+import subprocess
+from scipy.linalg import eig, eigh
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+def get_energy():
+    buffer = subprocess.check_output( ['qmcchem', 'result', '-e', 'e_loc', filename]
+                                      , encoding='UTF-8' )
+    if buffer.strip() != "":
+        buffer = buffer.splitlines()[-1]
+        _, energy, error = [float(x) for x in buffer.split()]
+        return energy, error
+    else:
+      return None, None
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+def run_qmc():
+    return subprocess.check_output(['qmcchem', 'run', filename])
+
+def stop_qmc():
+    subprocess.check_output(['qmcchem', 'stop', filename])
+
+def set_vmc_params():
+    #subprocess.check_output(['qmcchem', 'edit', '-c', '-j', 'Simple',
+    #                                 '-m', 'VMC',
+    #                                 '-l', str(20),
+    #                                 '--time-step=0.3',
+    #                                 '--stop-time=36000',
+    #                                 '--norm=1.e-5',
+    #                                 '-w', '10',
+    #                                 filename])
+    subprocess.check_output(['qmcchem', 'edit', '-c'
+                                      , '-j', 'None'
+                                      , '-l', str(block_time)
+                                      , '-t', str(total_time)
+                                      , filename])
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+def get_Aref():
+
+    Aref = np.zeros( (n_alpha, n_beta) )
+    for k in range(n_det):
+        i         = A_rows[k] - 1
+        j         = A_cols[k] - 1
+        Aref[i,j] = A_vals[0][k]
+    return( Aref )
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+def check_svd_error( A, U, S, V ):
+
+    # || A - U x S x transpose(V) ||
+
+    # to normalize
+    norm_A = np.linalg.norm(A, ord='fro')
+    
+    # vector S ==> matrix S
+    _, na  = U.shape
+    _, nb  = V.shape
+    S_mat  = np.zeros( (na,nb) )
+    for i in range( min(na,nb)  ):
+        S_mat[i,i] = S[i]
+
+    #A_SVD   = np.linalg.multi_dot([ U, np.diag(S), Vt ])
+    A_SVD   = np.linalg.multi_dot([ U, S_mat, V.T ])
+    err_SVD = 100. * np.linalg.norm( A - A_SVD, ord="fro") / norm_A
+
+    print(' error between A_SVD and Aref = {} %\n'.format(err_SVD) )
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+def FSVD_save_EZFIO():
+
+    U_toEZFIO = np.zeros( ( 1, U_FSVD.shape[1], U_FSVD.shape[0] ) )
+    V_toEZFIO = np.zeros( ( 1, V_FSVD.shape[1], V_FSVD.shape[0] ) )
+    U_toEZFIO[0,:,:] = U_FSVD.T
+    V_toEZFIO[0,:,:] = V_FSVD.T
+
+    ezfio.set_spindeterminants_psi_svd_alpha_unique( U_toEZFIO )
+    ezfio.set_spindeterminants_psi_svd_beta_unique ( V_toEZFIO )
+    ezfio.set_spindeterminants_psi_svd_coefs_unique( S_FSVD )
+
+    print(' Full SVD unique vectors & coeff are saved to EZFIO ')
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+def numerote_selected_vectors():
+    
+    numalpha_selected = []
+    numbeta_selected  = []
+
+    for i in range(n_TSVD): 
+        for j in range(n_TSVD):
+            numalpha_selected.append(j+1)
+            numbeta_selected.append(i+1)
+
+    if( (len(numalpha_selected)!=n_selected) or (len(numbeta_selected)!=n_selected) ) :
+        print(' error in numerating selectod vectors')
+        print(' {} != {} != {}'.format(n_selected,len(numalpha_selected),len(numbeta_selected)) )
+    else:
+        ezfio.set_spindeterminants_psi_svd_alpha_numselected(numalpha_selected)
+        ezfio.set_spindeterminants_psi_svd_beta_numselected (numbeta_selected)
+        print(' selected vectors are numeroted in EZFIO')
+
+    return( numalpha_selected,numbeta_selected  )
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+def numerote_toselect_vectors( choix  ):
+    
+    numalpha_toselect = []
+    numbeta_toselect  = []
+
+    if( choix == 3 ):
+        # nondiagonal blocs
+        for i in range(n_TSVD): 
+            for j in range(n_TSVD,n_beta):
+                numalpha_toselect.append(i+1)
+                numbeta_toselect.append(j+1)
+        for i in range(n_TSVD,n_alpha):
+            for j in range(n_TSVD): 
+                numalpha_toselect.append(i+1)
+                numbeta_toselect.append(j+1)
+        # diagonal bloc
+        for i in range(n_TSVD,n_alpha):
+            for j in range(n_TSVD,n_beta):
+                numalpha_toselect.append(i+1)
+                numbeta_toselect.append(j+1)
+    elif( choix == 2 ):
+        # nondiagonal blocs
+        for i in range(n_TSVD): 
+            for j in range(n_TSVD,n_beta):
+                numalpha_toselect.append(i+1)
+                numbeta_toselect.append(j+1)
+        for i in range(n_TSVD,n_alpha):
+            for j in range(n_TSVD): 
+                numalpha_toselect.append(i+1)
+                numbeta_toselect.append(j+1)
+    else:
+        print(' choix = 2 ou 3' )
+        exit()
+
+    if( (len(numalpha_toselect)!=n_toselect) or (len(numbeta_toselect)!=n_toselect) ) :
+        print(' error in numerating vectors to select')
+        print(' {} != {} != {}'.format(n_toselect,len(numalpha_toselect),len(numbeta_toselect)) )
+    else:
+        ezfio.set_spindeterminants_psi_svd_alpha_numtoselect(numalpha_toselect)
+        ezfio.set_spindeterminants_psi_svd_beta_numtoselect (numbeta_toselect)
+        print(' vectors to select are numeroted in EZFIO')
+
+    return( numalpha_toselect,numbeta_toselect  )
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+def normalize_S_TSVD():
+
+    global S_TSVD
+
+    norm_S_TSVD = np.linalg.norm(S_TSVD, ord='fro')
+    S_TSVD = S_TSVD / norm_S_TSVD
+
+    return()
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+def get_h_selected_matrix():
+
+    h_selected_matrix = np.zeros( (n_selected,n_selected) )
+    h_selected_stater = np.zeros( (n_selected,n_selected) )
+
+    beg_h_selected_matrix = results.find('h_selected_matrix : [ ') + len('h_selected_matrix : [ ')
+    end_h_selected_matrix = len(results)
+    h_selected_matrix_buf = results[beg_h_selected_matrix:end_h_selected_matrix]
+    h_selected_matrix_buf = h_selected_matrix_buf.split( '\n' )
+
+    for i in range(1,n_selected+1):
+        ii0 = (i-1) * n_selected
+        for j in range(1,n_selected+1):
+            iline = ii0 + j
+
+            line = h_selected_matrix_buf[iline].split()
+            indc = int  ( line[0] )
+
+            if( indc != iline ):
+                print('Error in get_h_selected_matrix')
+                exit()
+            else:
+                h_selected_matrix[i-1][j-1] = float( line[2] )
+                h_selected_stater[i-1][j-1] = float( line[4] )
+
+    return(h_selected_matrix,h_selected_stater)
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+def get_o_selected_matrix():
+
+    o_selected_matrix = np.zeros( (n_selected,n_selected) )
+    o_selected_stater = np.zeros( (n_selected,n_selected) )
+
+    beg_o_selected_matrix = results.find('overlop_selected_matrix : [ ') + len('overlop_selected_matrix : [ ')
+    end_o_selected_matrix = len(results)
+    o_selected_matrix_buf = results[beg_o_selected_matrix:end_o_selected_matrix]
+    o_selected_matrix_buf = o_selected_matrix_buf.split( '\n' )
+
+    for i in range(1,n_selected+1):
+        ii0 = (i-1) * n_selected
+        for j in range(1,n_selected+1):
+            iline = ii0 + j
+
+            line = o_selected_matrix_buf[iline].split()
+            indc = int  ( line[0] )
+
+            if( indc != iline ):
+                print('Error in get_o_selected_matrix')
+                exit()
+            else:
+                o_selected_matrix[i-1][j-1] = float( line[2] )
+                o_selected_stater[i-1][j-1] = float( line[4] )
+
+    return(o_selected_matrix,o_selected_stater)
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+def get_Epostsvd():
+
+    # symmetrise and diagonalise
+    aa = h_selected_matrix
+    aa = 0.5*( aa + aa.T )
+    bb = o_selected_matrix
+    eigvals_postsvd, vr = eig(aa, bb, left=False, right=True, overwrite_a=True, overwrite_b=True,
+            check_finite=True, homogeneous_eigvals=False)
+
+    d_postsvd = np.diagflat(S_TSVD)
+    d_postsvd = d_postsvd.reshape( (1,n_selected*n_selected) )
+    recouvre_postsvd = np.abs(d_postsvd @ vr)
+    ind_gspostsvd = np.argmax(recouvre_postsvd)
+
+    E_postsvd = eigvals_postsvd[ind_gspostsvd]
+
+    return( E_postsvd,  vr[:,ind_gspostsvd] )
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+def Hqmc_svd_diag():
+
+    # read CI_SVD matrices
+    Ci_h_matrix_svd       = get_Ci_h_matrix_svd()
+    Ci_overlap_matrix_svd = get_Ci_overlap_matrix_svd()
+
+    # symmetrise
+    aa = Ci_h_matrix_svd
+    aa = 0.5*( aa + aa.T )
+
+    bb = Ci_overlap_matrix_svd
+
+    # diagonalise
+    eigvals_svd, vr = eig( aa, bb, left=False, right=True
+                         , overwrite_a=True, overwrite_b=True
+                         , check_finite=True, homogeneous_eigvals=False)
+
+    recouvre_svd = np.abs( np.dot( S_FSVD, vr) )
+    ind_gssvd    = np.argmax(recouvre_svd)
+    E_svd        = eigvals_svd[ind_gssvd] + nuc_energy
+
+    return( E_svd, vr[:,ind_gssvd] )
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+def SVD_postsvd():
+
+    # reshape sigma_postsvd
+    sigma_postsvd_mat = np.zeros( (n_selected,n_selected) )
+    for i in range(n_TSVD):
+        ii = i*n_TSVD
+        for j in range(n_TSVD):
+            jj = i*n_TSVD + j
+            sigma_postsvd_mat[i][j] = sigma_postsvd[jj]
+
+    # construct the new matrix Y & perform a new SVD
+    Y = np.dot( U_TSVD , np.dot(sigma_postsvd_mat , Vt_TSVD) )
+    U, S, Vt = np.linalg.svd(Y, full_matrices=True)
+
+    return(U, S, Vt)
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+def get_hij_fm():
+
+    hij_fm        = np.zeros( (n_toselect) )
+    hij_fm_stater = np.zeros( (n_toselect) )
+
+    beg_hij = results.find('hij_fm : [ ') + len('hij_fm : [ ')
+    end_hij = len(results)
+    hij_buf = results[beg_hij:end_hij]
+    hij_buf = hij_buf.split( '\n' )
+
+    for iline in range(1,n_toselect+1):
+        line = hij_buf[iline].split()
+        indc = int( line[0] )
+        if( indc != iline ):
+            print('Error in get_hij_fm')
+            exit()
+        else:
+            hij_fm       [iline-1] = float( line[2] )
+            hij_fm_stater[iline-1] = float( line[4] )
+
+    return(hij_fm, hij_fm_stater)
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+def get_hij_sm():
+
+    hij_sm        = np.zeros( (n_toselect) )
+    hij_sm_stater = np.zeros( (n_toselect) )
+
+    beg_hij = results.find('hij_sm : [ ') + len('hij_sm : [ ')
+    end_hij = len(results)
+    hij_buf = results[beg_hij:end_hij]
+    hij_buf = hij_buf.split( '\n' )
+
+    for iline in range(1,n_toselect+1):
+        line = hij_buf[iline].split()
+        indc = int( line[0] )
+        if( indc != iline ):
+            print('Error in get_hij_sm')
+            exit()
+        else:
+            hij_sm       [iline-1] = float( line[2] )
+            hij_sm_stater[iline-1] = float( line[4] )
+
+    return(hij_sm, hij_sm_stater)
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+def get_xij_diag():
+
+    xij_diag        = np.zeros( (n_toselect) )
+    xij_diag_stater = np.zeros( (n_toselect) )
+
+    beg_xij = results.find('xij_diag : [ ') + len('xij_diag : [ ')
+    end_xij = len(results)
+    xij_buf = results[beg_xij:end_xij]
+    xij_buf = xij_buf.split( '\n' )
+
+    for iline in range(1,n_toselect+1):
+        line = xij_buf[iline].split()
+        indc = int( line[0] )
+        if( indc != iline ):
+            print('Error in get_xij_diag')
+            exit()
+        else:
+            xij_diag       [iline-1] = float( line[2] )
+            xij_diag_stater[iline-1] = float( line[4] )
+
+    return(xij_diag, xij_diag_stater)
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ !
+
+
+
+
+
+if __name__ == '__main__':
+
+    t0 = time.time()
+    print("Today's date:", datetime.now() )
+
+    # EZFIO file
+    filename = "/home/aammar/qp2/src/svdwf/2h2_work/2h2_cisd"
+    ezfio.set_file(filename)
+    print("filename = {}".format(filename))
+
+    # parameters
+    energ_nuc = 1.711353545183182	# for 2h2 
+
+    # get spindeterminant data from EZFIO
+    n_alpha = ezfio.get_spindeterminants_n_det_alpha()
+    n_beta  = ezfio.get_spindeterminants_n_det_beta()
+    A_rows  = np.array(ezfio.get_spindeterminants_psi_coef_matrix_rows()   )
+    A_cols  = np.array(ezfio.get_spindeterminants_psi_coef_matrix_columns())
+    A_vals  = np.array(ezfio.get_spindeterminants_psi_coef_matrix_values() )
+    n_det   = A_rows.shape[0]
+
+    print('~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~')
+    print(' matrix: {} x {}'.format(n_alpha,n_beta))
+    print(' n_det = {}'.format(n_det))
+    print('~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~')
+
+    # construct intial dense matrix
+    Aref = get_Aref()
+
+    # perform initial Full SVD
+    print('')
+    print(' ----- Performing Full SVD ----- ')
+    U_FSVD, S_FSVD, Vt_FSVD  = np.linalg.svd(Aref, full_matrices=True)
+    V_FSVD = Vt_FSVD.T
+
+    # check Full SVD error
+    check_svd_error( Aref, U_FSVD, S_FSVD, V_FSVD )
+
+    # save SVD vectors & coefficients in EZFIO 
+    ezfio.set_spindeterminants_n_svd_coefs_unique(min(n_alpha,n_beta))
+    FSVD_save_EZFIO()
+
+    # truncated SVD
+    n_TSVD = 15
+    ezfio.set_spindeterminants_n_svd_coefs(n_TSVD) 
+    U_TSVD = U_FSVD[:,:n_TSVD]
+    V_TSVD = V_FSVD[:,:n_TSVD]
+    S_TSVD = S_FSVD[:n_TSVD]
+
+    # check truncataed SVD error
+    check_svd_error( Aref, U_TSVD, S_TSVD, V_TSVD )
+
+    # numerote selected vectors & save to EZFIO
+    n_selected = n_TSVD * n_TSVD
+    print(' n_selected = {}'.format(n_selected))
+    ezfio.set_spindeterminants_n_svd_selected(n_selected)
+    numalpha_selected,numbeta_selected = numerote_selected_vectors()
+
+    # numerote vectors to select & save to EZFIO
+    n_toselect = n_alpha * n_beta - n_selected 
+    print(' n_toselect = {}'.format(n_toselect))
+    ezfio.set_spindeterminants_n_svd_toselect(n_toselect)
+    numalpha_toselect,numbeta_toselect = numerote_toselect_vectors(choix=3)
+
+
+    #_________________________________________________________________________________________
+    #
+    #                          loop over SVD iterations
+    #_________________________________________________________________________________________
+    
+    it_svd     = 0
+    it_svd_max = 1
+
+    while( it_svd < it_svd_max ):
+
+        it_svd = it_svd + 1
+
+        # normalize sigular values in the truncated space
+        #normalize_S_TSVD()
+
+        # run QMC to get H_postsvd
+        block_time = 300	# in sec
+        total_time = 1800	# in sec
+        set_vmc_params()
+        ezfio.set_properties_hij_fm(False)
+        ezfio.set_properties_hij_sm(False)
+        ezfio.set_properties_xij_diag(False)
+        ezfio.set_properties_h_selected_matrix(True)
+        ezfio.set_properties_overlop_selected_matrix(True)
+        run_qmc()
+
+        # read QMC=CHEM results
+        t_read = time.time()
+        print(' getting QMCCHEM results from {}'.format(EZFIO_file) )
+        results =  subprocess.check_output(['qmcchem', 'result', EZFIO_file, '>> results.dat'],  encoding='UTF-8')
+        print(' getting results after {} minutes \n'.format( (time.time()-t_read)/60. ))
+
+        # < E_loc >
+        Eloc, ErrEloc = get_energy()
+        print(' Eloc = {} +/- {}'.format(Eloc, ErrEloc))
+
+        # get H and overlop from QMC=CHEM
+        h_selected_matrix, h_selected_stater = get_h_selected_matrix()
+        o_selected_matrix, o_selected_stater = get_o_selected_matrix()
+
+        # ground state from H S = E S
+        E_postsvd, sigma_postsvd = get_Epostsvd()
+        print(' post svd energy = {}'.format(E_postsvd+energ_nuc) )
+        
+        # perform new SVD: U x sigma_postsvd x Vt --> U' x S' x Vt'
+        U_FSVD, S_FSVD, Vt_FSVD = SVD_postsvd()
+        V_FSVD = Vt_FSVD.T
+
+        # save in EZFIO
+        FSVD_save_EZFIO()
+
+        # run QMC to get hij, e and xij_diag from QMC=CHEM
+        block_time = 300	# in sec
+        total_time = 1800	# in sec
+        set_vmc_params()
+        ezfio.set_properties_h_selected_matrix(False)
+        ezfio.set_properties_overlop_selected_matrix(False)
+        ezfio.set_properties_hij_fm(True)
+        ezfio.set_properties_hij_sm(True)
+        ezfio.set_properties_xij_diag(True)
+        run_qmc()
+
+        # read QMC=CHEM results
+        t_read = time.time()
+        print(' getting QMCCHEM results from {}'.format(EZFIO_file) )
+        results =  subprocess.check_output(['qmcchem', 'result', EZFIO_file, '>> results.dat'],  encoding='UTF-8')
+        print(' getting results after {} minutes \n'.format( (time.time()-t_read)/60. ))
+
+        # < E_loc >
+        Eloc, ErrEloc = get_energy()
+        print(' Eloc = {} +/- {}'.format(Eloc, ErrEloc))
+
+        # hij  =  < psi_svd J | H | J l l' > / < psi_svd J | psi_svd J > 
+        #      =  < H (J l l')/(psi_svd J) >    ( first  method: fm )
+        #      =  < E_loc (l l') / psi_svd >    ( second method: sm )
+        hij_fm, hif_fm_stater = get_hij_fm()
+        hij_sm, hif_sm_stater = get_hij_sm()
+
+        # get xij_diag
+        xij_diag, xij_diag_stater = get_xij_diag()
+        
+        # first method
+        dij_fm        = np.zeros( (n_toselect) )
+        dij_fm_stater = np.zeros( (n_toselect) )
+        for i in range(n_toselect):
+            dij_fm[i] = hij_fm[i] / ( Eloc - xij_diag[i] ) 
+            # statistic error 
+            a = ( hij_fm_stater[i]*hij_fm_stater[i] ) / ( hij_fm[i]*hij_fm[i] )
+            b = ( ErrEloc*ErrEloc + xij_diag_stater[i]*xij_diag_stater[i] ) / ( (Eloc-xij_diag[i])*(Eloc-xij_diag[i]) ) 
+            dij_fm_stater[i] = abs(dij_fm[i]) * np.sqrt( a + b )
+
+        cc = np.concatenate((dij_fm,dij_fm_stater),axis=1)
+        np.savetxt('dij_fm.txt',cc)
+
+        # second method
+        dij_sm        = np.zeros( (n_toselect) )
+        dij_sm_stater = np.zeros( (n_toselect) )
+        for i in range(n_toselect):
+            dij_sm[i] = hij_sm[i] / ( Eloc - xij_diag[i] ) 
+            # statistic error 
+            a = ( hij_fm_stater[i]*hij_fm_stater[i] ) / ( hij_sm[i]*hij_sm[i] )
+            b = ( ErrEloc*ErrEloc + xij_diag_stater[i]*xij_diag_stater[i] ) / ( (Eloc-xij_diag[i])*(Eloc-xij_diag[i]) ) 
+            dij_sm_stater[i] = abs(dij_sm[i]) * np.sqrt( a + b )
+
+        cc = np.concatenate((dij_sm,dij_sm_stater),axis=1)
+        np.savetxt('dij_sm.txt',cc)
+
+        # TODO
+        # choose fm or sm & perform a new SVD 
+
+
+    #_________________________________________________________________________________________
+
+    print("end after {:.3f} minutes".format((time.time()-t0)/60.) )
+
--- a/devel/svdwf/buildpsi_diagSVDit_Anthony_v0.irp.f
+++ b/devel/svdwf/buildpsi_diagSVDit_Anthony_v0.irp.f
@ -0,0 +1,612 @@
+program buildpsi_diagSVDit_Anthony_v0
+
+  implicit none
+
+  BEGIN_DOC
+  ! perturbative approach to build psi_postsvd
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state
+  double precision              :: h12
+
+  integer                       :: i, j, k, l, ii, jj, na, nb
+
+  double precision              :: norm_psi, inv_sqrt_norm_psi
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  double precision              :: E0_av, E0_ap, E0pt2
+  double precision              :: err0, err_tmp, e_tmp, E0, overlop, E0_old, tol_energy
+  double precision              :: ctmp, htmp, Ept2
+  double precision              :: E0_postsvd, overlop_postsvd
+  double precision              :: norm_coeff_psi, inv_sqrt_norm_coeff_psi
+  double precision              :: overlopU, overlopU_mat, overlopV, overlopV_mat, overlop_psi
+
+  double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:), H(:,:,:,:)
+  double precision, allocatable :: psi_postsvd(:,:), coeff_psi_perturb(:)
+
+  integer                       :: n_FSVD, n_selected, n_toselect, n_tmp, it_svd, it_svd_max
+  integer                       :: n_selected2
+  integer,          allocatable :: numalpha_selected(:), numbeta_selected(:)
+  integer,          allocatable :: numalpha_toselect(:), numbeta_toselect(:)
+  integer,          allocatable :: numalpha_tmp(:), numbeta_tmp(:)
+
+  integer(kind=8)               :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir
+  real(kind=8)                  :: W_tot_time, W_tot_time_it
+  real(kind=8)                  :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it
+  real(kind=8)                  :: CPU_tot_time, CPU_tot_time_it
+  real(kind=8)                  :: speedup, speedup_it
+  integer                       :: nb_taches
+
+  !$OMP PARALLEL
+  nb_taches = OMP_GET_NUM_THREADS()
+  !$OMP END PARALLEL
+
+  call CPU_TIME(CPU_tbeg)
+  call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call get_excitation_degree(det1,det2,degree,N_int)
+  call i_H_j(det1, det2, N_int, h12)
+
+  i_state = 1
+
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det      :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+ 
+  norm_psi = 0.d0
+  do k = 1, N_det
+    norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) &
+                        * psi_bilinear_matrix_values(k,i_state)
+  enddo
+  print *, ' initial norm = ', norm_psi
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_beta_unique) )
+  allocate( Dref(n_det_beta_unique) )
+  allocate( Vref(n_det_beta_unique,n_det_beta_unique)  )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique) )
+  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1)          &
+       , n_det_alpha_unique, n_det_beta_unique)
+
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+  print *, ' ---   First SVD: ok    ---  '
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                               numerote vectors 
+
+  ! Truncated rank
+  n_selected  = 20
+  print*, ' initial psi space rank = ', n_selected
+
+  ! check SVD error
+  err0 = 0.d0
+  do j = 1, n_det_beta_unique
+    do i = 1, n_det_alpha_unique
+      err_tmp = 0.d0
+      do l = 1, n_selected
+        err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l)
+      enddo
+      err_tmp = Aref(i,j) - err_tmp
+      err0   += err_tmp * err_tmp
+    enddo
+  enddo
+  print *, ' SVD err (%)     = ', 100.d0 * dsqrt(err0/norm_psi)
+
+  deallocate( Aref )
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  !________________________________________________________________________________________________________
+  !
+  !                                   loop over SVD iterations
+  !________________________________________________________________________________________________________
+
+  E0_old     = 0.d0
+  tol_energy = 1.d0
+  it_svd     = 0 
+  it_svd_max = 100
+
+  allocate( H(n_det_beta_unique,n_det_beta_unique,n_det_beta_unique,n_det_beta_unique) )
+  allocate( psi_postsvd(n_det_beta_unique,n_det_beta_unique) )
+
+  do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) )
+
+    call CPU_TIME(CPU_tbeg_it)
+    call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) 
+
+    it_svd = it_svd + 1
+    print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +'
+    print*, '                                                                         '
+    print*, ' iteration', it_svd
+
+    double precision :: norm
+    norm = 0.d0
+    do j = 1, n_selected
+      norm = norm + Dref(j)*Dref(j)
+    enddo
+    Dref = Dref / dsqrt(norm)
+
+    print *,  ''
+    print *,  ''
+    print *,  ''
+    print *,  '-- Compute H --'
+    call const_H_uv(Uref, Vref, H)
+
+    ! H0(i,j) = < u_i v_j | H | u_i v_j >
+    ! E0 = < psi_0 | H | psi_0 >
+    E0 = 0.d0 
+    do j = 1, n_selected
+      do i = 1, n_selected
+         E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j) 
+      enddo
+    enddo
+    E0_av = E0 + nuclear_repulsion
+    print *,' E0 (avant SVD) =', E0_av
+
+    double precision, allocatable :: eigval0(:)
+    double precision, allocatable :: eigvec0(:,:,:)
+    double precision, allocatable :: H_tmp(:,:,:,:)
+
+    allocate( H_tmp(n_selected,n_selected,n_selected,n_selected) )
+    do l=1,n_selected
+     do k=1,n_selected
+      do j=1,n_selected
+       do i=1,n_selected
+        H_tmp(i,j,k,l) = H(i,j,k,l)
+       enddo
+      enddo
+     enddo
+    enddo
+    allocate( eigval0(n_selected**2),eigvec0(n_selected,n_selected,n_selected**2))
+    eigvec0 = 0.d0
+
+    print *,  ' --- Diag post-SVD --- '
+    call lapack_diag(eigval0, eigvec0, H_tmp, n_selected**2, n_selected**2)
+    E0_postsvd = eigval0(1)+nuclear_repulsion
+    print*, ' postsvd energy  = ', E0_postsvd
+    deallocate(H_tmp, eigval0)
+
+    print *,  ' --- SVD --- '
+    Dref = 0.d0
+    call perform_newpostSVD(n_selected, eigvec0(1,1,1), Uref, Vref, Dref)
+    deallocate(eigvec0)
+
+    print *,  ' --- Compute H --- '
+    call const_H_uv(Uref, Vref, H)
+
+    ! H0(i,j) = < u_i v_j | H | u_i v_j >
+    ! E0 = < psi_0 | H | psi_0 >
+    E0 = 0.d0 
+    norm = 0.d0
+    do j = 1, n_det_beta_unique
+      do i = 1, n_det_beta_unique
+         E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j) 
+      enddo
+      norm = norm + Dref(j)*Dref(j)
+    enddo
+    E0_ap = E0 + nuclear_repulsion
+    print *,' E0 (apres SVD) =', E0_ap
+    !print *,' norm =', norm
+
+    print *,  ' --- Perturbation --- '
+    psi_postsvd = 0.d0
+    do i=1,n_selected
+      psi_postsvd(i,i) = Dref(i) 
+    enddo
+
+    !do j=1,n_selected
+    !  do i=n_selected+1,n_det_beta_unique
+    !    print *, i,j, H(i,j,i,j)
+    !  enddo
+    !enddo
+    !do j=n_selected+1,n_det_beta_unique
+    !  do i=1,n_selected
+    !    print *, i,j, H(i,j,i,j)
+    !  enddo
+    !enddo
+    !do j=n_selected+1,n_det_beta_unique
+    !  do i=n_selected+1,n_det_beta_unique
+    !    print *, i,j, H(i,j,i,j)
+    !  enddo
+    !enddo
+
+    Ept2 = 0.d0
+    do j=1,n_selected
+      do i=n_selected+1,n_det_beta_unique
+        ctmp = 0.d0
+        do l=1,n_selected
+          do k=1,n_selected
+            ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l)
+          enddo
+        enddo
+        psi_postsvd(i,j) = ctmp / (E0_ap - (H(i,j,i,j)+nuclear_repulsion) )
+        Ept2       += ctmp*ctmp / (E0_ap - (H(i,j,i,j)+nuclear_repulsion) )
+      enddo
+    enddo
+
+    do j=n_selected+1,n_det_beta_unique
+      do i=1,n_selected
+        ctmp = 0.d0
+        do l=1,n_selected
+          do k=1,n_selected
+            ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l)
+          enddo
+        enddo
+        psi_postsvd(i,j) = ctmp / (E0_ap - (H(i,j,i,j)+nuclear_repulsion) )
+        Ept2       += ctmp*ctmp / (E0_ap - (H(i,j,i,j)+nuclear_repulsion) )
+      enddo
+    enddo
+    do j=n_selected+1,n_det_beta_unique
+      do i=n_selected+1,n_det_beta_unique
+        ctmp = 0.d0
+        do l=1,n_selected
+          do k=1,n_selected
+            ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l)
+          enddo
+        enddo
+        psi_postsvd(i,j) = ctmp / (E0_ap - (H(i,j,i,j)+nuclear_repulsion) )
+        Ept2       += ctmp*ctmp / (E0_ap - (H(i,j,i,j)+nuclear_repulsion) )
+      enddo
+    enddo
+    E0pt2 = E0_ap + Ept2
+    print *, ' perturb energy = ', E0pt2, Ept2
+    
+    tol_energy = 100.d0 * dabs(E0pt2-E0_old) / dabs(E0pt2)
+    E0_old     = E0pt2
+
+    print *,  ' --- SVD --- '
+    call perform_newpostSVD(n_det_beta_unique, psi_postsvd, Uref, Vref, Dref)
+
+    write(22,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0pt2
+
+    call CPU_TIME(CPU_tend_it)
+    call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir)
+    CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it
+    W_tot_time_it   = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8)
+    speedup_it      = CPU_tot_time_it / W_tot_time_it
+    print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /,  &
+              & 3X, "CPU time     = ", 1PE10.3, " min.", /,  &
+              & 3X, "speed up     = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it
+
+  end do
+
+  !________________________________________________________________________________________________________
+  !________________________________________________________________________________________________________
+
+
+  deallocate( H, psi_postsvd )
+  deallocate( Uref, Vref, Dref )
+
+  call CPU_TIME(CPU_tend)
+  call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
+  CPU_tot_time = CPU_tend - CPU_tbeg
+  W_tot_time   = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
+  speedup      = CPU_tot_time / W_tot_time
+  print *,' ___________________________________________________________________'
+  print '(//,3X,"Execution avec ",i2," threads")',nb_taches
+  print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /,  &
+            & 3X, "CPU time     = ", 1PE10.3, " min.", /,  &
+            & 3X, "speed up     = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup
+  print *,' ___________________________________________________________________'
+
+
+end
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+subroutine perform_newpostSVD(n_selected, psi_postsvd, Uref, Vref, Dref)
+
+  USE OMP_LIB
+
+  integer, intent(in)             :: n_selected
+  double precision, intent(in)    :: psi_postsvd(n_selected,n_selected)
+  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique)
+  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(inout) :: Dref(n_det_beta_unique)
+
+  integer                         :: mm, nn, i, j, ii0, ii, l, jj, na, nb
+  double precision                :: err0, err_norm, err_tmp, norm_tmp
+  double precision                :: overlopU_mat, overlopV_mat, overlopU, overlopV
+  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
+  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
+  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
+
+  mm = n_det_alpha_unique
+  nn = n_det_beta_unique
+
+  allocate( U_svd(mm,n_selected) , V_svd(nn,n_selected) , S_mat(n_selected,n_selected) )
+
+  U_svd(:,:) = Uref(:,1:n_selected)
+  V_svd(:,:) = Vref(:,1:n_selected)
+
+  S_mat(:,:) = 0.d0
+  do j  = 1, n_selected
+    do i  = 1, n_selected
+      S_mat(i,j) = psi_postsvd(i,j)
+    enddo
+  enddo
+
+  ! first compute S_mat x transpose(V_svd)
+  allocate( SxVt(n_selected,nn) )
+  call dgemm( 'N', 'T', n_selected, nn, n_selected, 1.d0     &
+             , S_mat     , size(S_mat,1)     &
+             , V_svd     , size(V_svd,1)     &
+             , 0.d0, SxVt, size(SxVt ,1)     )
+  ! then compute U_svd x SxVt
+  allocate( A_newsvd(mm,nn) )
+  call dgemm( 'N', 'N', mm, nn, n_selected, 1.d0            &
+             , U_svd         , size(U_svd   ,1)     &
+             , SxVt          , size(SxVt    ,1)     &
+             , 0.d0, A_newsvd, size(A_newsvd,1)     )
+  deallocate( SxVt )
+
+  ! perform new SVD
+  allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) )
+  call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn)
+  print *, '    +++ new perturbative SVD is performed +++       '
+  allocate( V_newsvd(nn,nn) )
+  do l = 1, nn
+    do j = 1, nn
+      V_newsvd(j,l) = Vt_newsvd(l,j)
+    enddo
+  enddo
+
+  ! check SVD error
+  err0     = 0.d0
+  err_norm = 0.d0
+  do j = 1, nn
+    do i = 1, mm
+      err_tmp  = 0.d0
+      do l = 1, n_selected
+        err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l)
+      enddo
+      err_tmp   = A_newsvd(i,j) - err_tmp
+      err0     += err_tmp * err_tmp
+      err_norm += A_newsvd(i,j) * A_newsvd(i,j)
+    enddo
+  enddo
+  print *, ' SVD err (%)              = ', 100.d0 * dsqrt(err0/err_norm)
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+
+
+  do l = 1, n_selected
+    Dref(l)   = D_newsvd(l)
+    Uref(:,l) = U_newsvd(:,l)
+    Vref(:,l) = V_newsvd(:,l)
+  enddo
+ ! print *, Dref(:)
+
+  overlopU_mat = 0.d0
+  overlopV_mat = 0.d0
+  do i = 1, nn
+    do j = 1, nn
+      overlopU = 0.d0 
+      do ii = 1, mm
+        overlopU += Uref(ii,j) * Uref(ii,i)
+      enddo
+      overlopU_mat += overlopU
+      overlopV = 0.d0 
+      do ii = 1, nn
+        overlopV += Vref(ii,j) * Vref(ii,i)
+      enddo
+      overlopV_mat += overlopV
+    enddo
+  enddo
+  print *, 'overlop U =', overlopU_mat
+  print *, 'overlop V =', overlopV_mat
+
+
+  deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd )
+
+  return
+
+end subroutine perform_newpostSVD
+
+
+
+subroutine const_H_uv(Uref, Vref, H)
+
+  USE OMP_LIB
+
+  implicit none
+
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_beta_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: H(n_det_beta_unique,n_det_beta_unique, n_det_beta_unique,n_det_beta_unique) 
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: i, j, k, l, degree
+  integer                       :: ii0, jj0, ii, jj, n, m, np, mp
+  integer                       :: nn0, mm0, nn, mm, ind_gs
+  integer                       :: p,q,r,s
+  double precision              :: h12, x
+
+  double precision, allocatable :: H0(:,:,:,:) 
+  double precision, allocatable :: H1(:,:,:,:) 
+
+
+
+  allocate( H0(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique, n_det_beta_unique) )
+  allocate( H1(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique, n_det_beta_unique) )
+
+  H0(:,:,:,:) = 0.d0
+ !$OMP PARALLEL DEFAULT(NONE)                                                                         &
+ !$OMP          PRIVATE(p,q,r,s,i,j,k,l,det1,det2,degree)  &
+ !$OMP          SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, &
+ !$OMP                 N_int,Uref,Vref,H0,H1,H)
+ !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8)
+  do i = 1, n_det_alpha_unique
+    do k = 1, n_det_alpha_unique
+      det1(:,1) = psi_det_alpha_unique(:,i)
+      det2(:,1) = psi_det_alpha_unique(:,k)
+      call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+      if (degree .gt. 2) then
+        cycle
+      endif
+      do j = 1, n_det_beta_unique
+        det1(:,2) = psi_det_beta_unique(:,j)
+        do l = 1, n_det_beta_unique
+          det2(:,2) = psi_det_beta_unique(:,l)
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if (degree .gt. 2) then
+            cycle
+          endif
+          ! !!!
+          call i_H_j(det2, det1, N_int, H0(k,l,i,j) )
+          ! !!!
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+
+
+ !$OMP SINGLE
+ H1 = 0.d0
+ !$OMP END SINGLE
+ !$OMP DO 
+  do s = 1, n_det_beta_unique
+    do l = 1, n_det_beta_unique
+      do k = 1, n_det_alpha_unique
+       do j = 1, n_det_beta_unique
+         do i = 1, n_det_alpha_unique
+            H1(i,j,k,s) = H1(i,j,k,s) + H0(i,j,k,l) * Vref(l,s)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP SINGLE
+ H0 = 0.d0
+ !$OMP END SINGLE
+ !$OMP DO
+  do s = 1, n_det_beta_unique
+    do r = 1, n_det_beta_unique
+      do k = 1, n_det_alpha_unique
+       do j = 1, n_det_beta_unique
+         do i = 1, n_det_alpha_unique
+            H0(i,j,r,s) = H0(i,j,r,s) + H1(i,j,k,s) * Uref(k,r)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP SINGLE
+ H1 = 0.d0
+ !$OMP END SINGLE
+ !$OMP DO
+  do s = 1, n_det_beta_unique
+    do j = 1, n_det_beta_unique
+      do r = 1, n_det_alpha_unique
+       do q = 1, n_det_beta_unique
+         do i = 1, n_det_alpha_unique
+            H1(i,q,r,s) = H1(i,q,r,s) + H0(i,j,r,s) * Vref(j,q)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ 
+ !$OMP SINGLE
+ H = 0.d0
+ !$OMP END SINGLE
+ !$OMP DO 
+  do s = 1, n_det_beta_unique
+   do r = 1, n_det_beta_unique
+    do q = 1, n_det_beta_unique
+     do p = 1, n_det_beta_unique
+      do i = 1, n_det_alpha_unique
+            H(p,q,r,s) = H(p,q,r,s) + H1(i,q,r,s) * Uref(i,p)
+        enddo
+       enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP END PARALLEL
+
+  deallocate(H1,H0)
+
+  return
+end
+
+
+
+
+
--- a/devel/svdwf/buildpsi_diagSVDit_Anthony_v1.irp.f
+++ b/devel/svdwf/buildpsi_diagSVDit_Anthony_v1.irp.f
@ -0,0 +1,454 @@
+program buildpsi_diagSVDit_Anthony_v1
+
+  implicit none
+
+  BEGIN_DOC
+  ! perturbative approach to build psi_postsvd
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state
+  double precision              :: h12
+
+  integer                       :: i, j, k, l, ii, jj, na, nb
+
+  double precision              :: norm_psi, inv_sqrt_norm_psi
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  double precision              :: err0, err_tmp, e_tmp, E0, overlap, E0_old, tol_energy
+  double precision              :: ctmp, htmp, Ept2
+  double precision              :: E0_postsvd, overlap_postsvd, E_prev
+  double precision              :: norm_coeff_psi, inv_sqrt_norm_coeff_psi
+  double precision              :: overlapU, overlapU_mat, overlapV, overlapV_mat, overlap_psi
+
+  double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:), H(:,:,:,:)
+  double precision, allocatable :: psi_postsvd(:,:), coeff_psi_perturb(:)
+
+  integer                       :: n_TSVD, n_FSVD, n_selected, n_toselect, n_tmp, it_svd, it_svd_max
+  integer                       :: n_selected2
+  integer,          allocatable :: numalpha_selected(:), numbeta_selected(:)
+  integer,          allocatable :: numalpha_toselect(:), numbeta_toselect(:)
+  integer,          allocatable :: numalpha_tmp(:), numbeta_tmp(:)
+
+  integer(kind=8)               :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir
+  real(kind=8)                  :: W_tot_time, W_tot_time_it
+  real(kind=8)                  :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it
+  real(kind=8)                  :: CPU_tot_time, CPU_tot_time_it
+  real(kind=8)                  :: speedup, speedup_it
+  integer                       :: nb_taches
+
+  !$OMP PARALLEL
+  nb_taches = OMP_GET_NUM_THREADS()
+  !$OMP END PARALLEL
+
+  call CPU_TIME(CPU_tbeg)
+  call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
+
+  i_state = 1
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call i_H_j(det1, det2, N_int, h12)
+
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det    :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+
+  norm_psi = 0.d0
+  do k = 1, N_det
+    norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) &
+                        * psi_bilinear_matrix_values(k,i_state)
+  enddo
+  print *, ' initial norm = ', norm_psi
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) )
+  !allocate( Dref(max(n_det_beta_unique,n_det_alpha_unique)) )
+  allocate( Dref(min(n_det_beta_unique,n_det_alpha_unique)) )
+  allocate( Vref(n_det_beta_unique,n_det_beta_unique)  )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique) )
+  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1)          &
+       , n_det_alpha_unique, n_det_beta_unique)
+
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+
+  ! Truncated rank
+  n_TSVD      = 20
+  n_selected  = n_TSVD
+  call write_int(6,n_TSVD, 'Rank of psi')
+
+
+  !________________________________________________________________________________________________________
+  !
+  !                                   loop over SVD iterations
+  !________________________________________________________________________________________________________
+
+  tol_energy = 1.d0
+  it_svd     = 0
+  it_svd_max = 100
+  E_prev     = 0.d0
+
+  allocate(H(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique))
+  allocate(psi_postsvd(n_det_alpha_unique,n_det_beta_unique))
+  do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) )
+
+    call CPU_TIME(CPU_tbeg_it)
+    call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir)
+
+    it_svd = it_svd + 1
+    print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +'
+    print*, '                                                                         '
+    print*, '                                                                         '
+    print*, '                                                                         '
+    print*, ' iteration', it_svd
+
+    double precision :: norm
+    norm = 0.d0
+    do j = 1, n_selected
+      norm = norm + Dref(j)*Dref(j)
+    enddo
+    Dref = Dref / dsqrt(norm)
+
+    call const_H_uv(Uref, Vref, H)
+
+    E0 = 0.d0
+    do j = 1, n_selected
+      do i = 1, n_selected
+         E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j)
+      enddo
+    enddo
+    double precision :: E0_av, E0_ap, E0pt2
+    E0_av = E0 + nuclear_repulsion
+    print *,' E0 (avant SVD) =', E0_av
+    print *,  ''
+
+    double precision, allocatable :: eigval0(:)
+    double precision, allocatable :: eigvec0(:,:,:)
+    double precision, allocatable :: H_tmp(:,:,:,:)
+
+    allocate( H_tmp(n_selected,n_selected,n_selected,n_selected) )
+    do l=1,n_selected
+     do k=1,n_selected
+      do j=1,n_selected
+       do i=1,n_selected
+        H_tmp(i,j,k,l) = H(i,j,k,l)
+       enddo
+      enddo
+     enddo
+    enddo
+    allocate( eigval0(n_selected**2),eigvec0(n_selected,n_selected,n_selected**2))
+    eigvec0 = 0.d0
+
+    call lapack_diag(eigval0, eigvec0, H_tmp, n_selected**2, n_selected**2)
+    E0_postsvd = eigval0(1) + nuclear_repulsion
+    print*, ' postsvd energy  = ', E0_postsvd
+    deallocate(H_tmp, eigval0)
+
+    Dref = 0.d0
+    call perform_newpostSVD(n_selected, eigvec0(1,1,1), Uref, Vref, Dref)
+    deallocate(eigvec0)
+
+    print *,  ' --- Compute H --- '
+    call const_H_uv(Uref, Vref, H)
+
+    E0 = 0.d0
+    norm = 0.d0
+    do j = 1, n_det_beta_unique
+      do i = 1, n_det_beta_unique
+         E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j)
+      enddo
+      norm = norm + Dref(j)*Dref(j)
+    enddo
+    E0_ap = E0 + nuclear_repulsion
+    print *,' E0 (apres SVD) =', E0_ap
+
+    psi_postsvd = 0.d0
+    do i=1,n_selected
+      psi_postsvd(i,i) = Dref(i)
+    enddo
+
+    E0   = E0_ap
+    Ept2 = 0.d0
+    do j=1,n_selected
+      do i=n_selected+1,n_det_alpha_unique
+        ctmp = 0.d0
+        do l=1,n_selected
+          do k=1,n_selected
+            ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l)
+          enddo
+        enddo
+        psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
+        Ept2            += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
+      enddo
+    enddo
+    do j=n_selected+1,n_det_beta_unique
+      do i=1,n_selected
+        ctmp = 0.d0
+        do l=1,n_selected
+          do k=1,n_selected
+            ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l)
+          enddo
+        enddo
+        psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
+        Ept2            += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
+      enddo
+    enddo
+    do j=n_selected+1,n_det_beta_unique
+      do i=n_selected+1,n_det_alpha_unique
+        ctmp = 0.d0
+        do l=1,n_selected
+          do k=1,n_selected
+            ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l)
+          enddo
+        enddo
+        psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
+        Ept2            += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
+      enddo
+    enddo
+    E0pt2 = E0_ap + ept2
+    print *, ' perturb energy = ', E0pt2, ept2
+
+    tol_energy = dabs(E_prev - E0_ap)
+    E_prev = E0_ap
+
+    call perform_newpostSVD(n_det_beta_unique, psi_postsvd, Uref, Vref, Dref)
+
+    write(44,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0_ap+Ept2
+
+
+    call CPU_TIME(CPU_tend_it)
+    call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir)
+    CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it
+    W_tot_time_it   = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8)
+    speedup_it      = CPU_tot_time_it / W_tot_time_it
+    print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /,  &
+              & 3X, "CPU time     = ", 1PE10.3, " min.", /,  &
+              & 3X, "speed up     = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it
+
+
+  end do
+
+
+  deallocate( Uref, Vref, Dref )
+
+
+  call CPU_TIME(CPU_tend)
+  call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
+  CPU_tot_time = CPU_tend - CPU_tbeg
+  W_tot_time   = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
+  speedup      = CPU_tot_time / W_tot_time
+  print *,' ___________________________________________________________________'
+  print '(//,3X,"Execution avec ",i2," threads")',nb_taches
+  print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /,  &
+            & 3X, "CPU time     = ", 1PE10.3, " min.", /,  &
+            & 3X, "speed up     = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup
+  print *,' ___________________________________________________________________'
+
+
+end
+
+
+
+subroutine perform_newpostSVD(n_selected, psi_postsvd, Uref, Vref, Dref)
+
+  USE OMP_LIB
+
+  integer, intent(in)             :: n_selected
+  double precision, intent(in)    :: psi_postsvd(n_selected,n_selected)
+  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique))
+
+  integer                         :: mm, nn, i, j, ii0, ii, l, jj, na, nb
+  double precision                :: err0, err_norm, err_tmp, norm_tmp
+  double precision                :: overlapU_mat, overlapV_mat, overlapU, overlapV
+  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
+  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
+  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
+
+  mm = n_det_alpha_unique
+  nn = n_det_beta_unique
+
+  allocate( U_svd(mm,n_selected) , V_svd(nn,n_selected) , S_mat(n_selected,n_selected) )
+
+  U_svd(1:mm,1:n_selected) = Uref(1:mm,1:n_selected)
+  V_svd(1:nn,1:n_selected) = Vref(1:nn,1:n_selected)
+  S_mat(1:n_selected,1:n_selected) = psi_postsvd(1:n_selected,1:n_selected)
+
+  ! first compute S_mat x transpose(V_svd)
+  allocate( SxVt(n_selected,nn) )
+  call dgemm( 'N', 'T', n_selected, nn, n_selected, 1.d0     &
+             , S_mat     , size(S_mat,1)     &
+             , V_svd     , size(V_svd,1)     &
+             , 0.d0, SxVt, size(SxVt ,1)     )
+  deallocate(S_mat)
+
+  ! then compute U_svd x SxVt
+  allocate( A_newsvd(mm,nn) )
+  call dgemm( 'N', 'N', mm, nn, n_selected, 1.d0            &
+             , U_svd         , size(U_svd   ,1)     &
+             , SxVt          , size(SxVt    ,1)     &
+             , 0.d0, A_newsvd, size(A_newsvd,1)     )
+  deallocate( SxVt )
+
+  ! perform new SVD
+  allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) )
+  call svd_s( A_newsvd, size(A_newsvd,1), &
+              U_newsvd, size(U_newsvd,1), &
+              D_newsvd, &
+              Vt_newsvd, size(Vt_newsvd,1), &
+              mm, nn)
+  deallocate(A_newsvd)
+
+  allocate( V_newsvd(nn,nn) )
+  do l = 1, nn
+    do j = 1, nn
+      V_newsvd(j,l) = Vt_newsvd(l,j)
+    enddo
+  enddo
+  deallocate(Vt_newsvd)
+
+
+  !do l = 1, n_selected
+  !  Dref(l)   = D_newsvd(l)
+  !  Uref(1:mm,l) = U_newsvd(1:mm,l)
+  !  Vref(1:nn,l) = V_newsvd(1:nn,l)
+  !enddo
+  Dref(1:n_selected) = D_newsvd(1:n_selected)
+  Uref(1:mm,1:mm)    = U_newsvd(1:mm,1:mm)
+  Vref(1:nn,1:nn)    = V_newsvd(1:nn,1:nn)
+
+
+  deallocate(U_newsvd)
+  deallocate(V_newsvd)
+  deallocate(D_newsvd)
+
+end subroutine perform_newpostSVD
+
+
+
+subroutine const_H_uv(Uref, Vref, H)
+
+  USE OMP_LIB
+
+  implicit none
+
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_beta_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: H(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique,n_det_beta_unique)
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: i, j, k, l, degree
+  integer                       :: ii0, jj0, ii, jj, n, m, np, mp
+  integer                       :: nn0, mm0, na, nb, mm, ind_gs
+  integer                       :: p,q,r,s
+  double precision              :: h12, x
+
+  double precision, allocatable :: H0(:,:,:,:)
+  double precision, allocatable :: H1(:,:,:,:)
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+
+  allocate( H0(na,nb,na,nb) )
+  allocate( H1(nb,na,nb,na) )
+
+  H0 = 0.d0
+  call wall_time(t0)
+ !$OMP PARALLEL DEFAULT(NONE)                                                                         &
+ !$OMP          PRIVATE(p,q,r,s,i,j,k,l,det1,det2,degree,h12)  &
+ !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, &
+ !$OMP                 N_int,Uref,Vref,H0,H1,H)
+
+ !$OMP DO
+ do l = 1, nb
+   det2(:,2) = psi_det_beta_unique(:,l)
+   do j = 1, nb
+     det1(:,2) = psi_det_beta_unique(:,j)
+     call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+     if (degree > 2) cycle
+     do k = 1, na
+       det2(:,1) = psi_det_alpha_unique(:,k)
+       do i = 1, na
+         det1(:,1) = psi_det_alpha_unique(:,i)
+         call get_excitation_degree(det1,det2,degree,N_int)
+         if ( degree > 2) cycle
+         call i_H_j(det1, det2, N_int, h12)
+         H0(i,j,k,l) = h12
+       enddo
+     enddo
+   enddo
+ enddo
+ !$OMP END DO
+
+ !$OMP END PARALLEL
+
+  call wall_time(t1)
+  ! (i,j,k,l) -> (j,k,l,p)
+  call DGEMM('T','N', nb * na * nb, na, na, &
+     1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))
+
+  ! (j,k,l,p) -> (k,l,p,q)
+  call DGEMM('T','N', na * nb * na, nb, nb, &
+     1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H0, size(H0,1)*size(H0,2)*size(H0,3))
+
+  ! (k,l,p,q) -> (l,p,q,r)
+  call DGEMM('T','N', nb * na * nb, na, na, &
+     1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))
+
+  ! (l,p,q,r) -> (p,q,r,s)
+  call DGEMM('T','N',  na * nb * na, nb, nb, &
+     1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3))
+  call wall_time(t2)
+  print *, t1-t0, t2-t1
+  double precision :: t0, t1, t2
+
+  deallocate(H1,H0)
+
+end
+
+
+
+
+
--- a/devel/svdwf/buildpsi_diagSVDit_Anthony_v2.irp.f
+++ b/devel/svdwf/buildpsi_diagSVDit_Anthony_v2.irp.f
@ -0,0 +1,754 @@
+program buildpsi_diagSVDit_Anthony_v2
+
+  implicit none
+
+  BEGIN_DOC
+  ! perturbative approach to build psi_postsvd
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state
+  double precision              :: h12
+
+  integer                       :: i, j, k, l, na, nb
+
+  double precision              :: norm_psi
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  double precision              :: err0, err_tmp, e_tmp, E0, E0_old, tol_energy
+  double precision              :: ctmp, htmp, Ept2
+  double precision              :: E0_postsvd, overlap_postsvd, E_prev
+
+  double precision, allocatable :: H_diag(:,:), Hkl(:,:), H0(:,:), H(:,:,:,:)
+  double precision, allocatable :: psi_postsvd(:,:), coeff_psi_perturb(:)
+
+  integer                       :: n_TSVD, it_svd, it_svd_max
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call i_H_j(det1, det2, N_int, h12)
+
+
+  i_state = 1
+
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det    :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+
+  norm_psi = 0.d0
+  do k = 1, N_det
+    norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) &
+                        * psi_bilinear_matrix_values(k,i_state)
+  enddo
+  print *, ' initial norm = ', norm_psi
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) )
+  !allocate( Dref(max(n_det_beta_unique,n_det_alpha_unique)) )
+  allocate( Dref(min(n_det_beta_unique,n_det_alpha_unique)) )
+  allocate( Vref(n_det_beta_unique,n_det_beta_unique)  )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique) )
+  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1)          &
+       , n_det_alpha_unique, n_det_beta_unique)
+
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+
+  ! Truncated rank
+  !n_TSVD = 100
+  !call write_int(6,n_TSVD, 'Rank of psi')
+
+
+  !________________________________________________________________________________________________________
+  !
+  !                                   loop over SVD iterations
+  !________________________________________________________________________________________________________
+
+  tol_energy = 1.d0
+  it_svd     = 0
+  it_svd_max = 100
+  E_prev = 0.d0
+
+  print *, ci_energy(1)
+
+  allocate(H_diag(n_det_alpha_unique,n_det_beta_unique))
+  allocate(psi_postsvd(n_det_alpha_unique,n_det_beta_unique))
+
+  do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-6 ) )
+
+    it_svd = it_svd + 1
+
+    ! Truncated rank
+    n_TSVD = min(n_det_alpha_unique,n_det_beta_unique)
+    do i = min(n_det_alpha_unique,n_det_beta_unique), 10, -1
+      if( dabs(Dref(i)) .lt. 1d-2 ) then
+        n_TSVD = n_TSVD - 1
+      else
+        exit
+      endif
+    enddo
+    !do i = 1, min(n_det_alpha_unique,n_det_beta_unique)
+    !  print *, i, Dref(i)
+    !enddo
+    call write_int(6,n_TSVD, 'Rank of psi')
+    n_TSVD = min(n_TSVD,100)
+    call write_int(6,n_TSVD, 'Rank of psi')
+
+    allocate(H(n_TSVD,n_TSVD,n_det_alpha_unique,n_det_beta_unique))
+
+    double precision :: norm
+    norm = 0.d0
+    do j = 1, n_TSVD
+      norm = norm + Dref(j)*Dref(j)
+    enddo
+    Dref = Dref / dsqrt(norm)
+
+    print *,  '-- Compute H --'
+    !call const_H_uv_modif(Uref, Vref, H, H_diag, n_TSVD)
+    call const_H_uv(Uref, Vref, H, H_diag, n_TSVD)
+
+    ! H0(i,j) = < u_i v_j | H | u_i v_j >
+    ! E0 = < psi_0 | H | psi_0 >
+    E0 = 0.d0
+    do j = 1, n_TSVD
+      do i = 1, n_TSVD
+         E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j)
+      enddo
+    enddo
+    E0 = E0 + nuclear_repulsion
+    print *,' E0   =', E0
+
+    double precision, allocatable :: eigval0(:)
+    double precision, allocatable :: eigvec0(:,:,:)
+    double precision, allocatable :: H_tmp(:,:,:,:)
+
+    allocate( H_tmp(n_TSVD,n_TSVD,n_TSVD,n_TSVD) )
+    do l=1,n_TSVD
+     do k=1,n_TSVD
+      do j=1,n_TSVD
+       do i=1,n_TSVD
+        H_tmp(i,j,k,l) = H(i,j,k,l)
+       enddo
+      enddo
+     enddo
+    enddo
+    allocate( eigval0(n_TSVD**2),eigvec0(n_TSVD,n_TSVD,n_TSVD**2))
+    eigvec0 = 0.d0
+
+    print *,  ' --- Diag post-SVD --- '
+    call lapack_diag(eigval0, eigvec0, H_tmp, n_TSVD**2, n_TSVD**2)
+    print *, 'eig =', eigval0(1) + nuclear_repulsion
+    deallocate(H_tmp, eigval0)
+
+    print *,  ' --- SVD --- '
+    Dref = 0.d0
+    call perform_newpostSVD(n_TSVD, eigvec0(1,1,1), size(eigvec0,1), Uref, Vref, Dref)
+    deallocate(eigvec0)
+
+    print *,  ' --- Compute H --- '
+    !call const_H_uv_modif(Uref, Vref, H, H_diag, n_TSVD)
+    call const_H_uv(Uref, Vref, H, H_diag, n_TSVD)
+
+    ! H0(i,j) = < u_i v_j | H | u_i v_j >
+    ! E0 = < psi_0 | H | psi_0 >
+    E0 = 0.d0
+    norm = 0.d0
+    do j = 1, n_det_beta_unique
+      do i = 1, n_TSVD
+         E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j)
+      enddo
+      norm = norm + Dref(j)*Dref(j)
+    enddo
+    E0 = E0 + nuclear_repulsion
+    print *,' E0   =', E0
+!    print *,' norm =', norm
+
+    print *,  ' --- Perturbation --- '
+    psi_postsvd = 0.d0
+    !do i=1,n_TSVD
+    !  psi_postsvd(i,i) = Dref(i)
+    !enddo
+
+    double precision :: lambda
+
+    lambda = 1.d0
+
+    Ept2 = 0.d0
+    do j=1,n_TSVD
+      do i=n_TSVD+1,n_det_alpha_unique
+        ctmp = 0.d0
+        do k=1,n_TSVD
+          ctmp = ctmp + H(k,k,i,j) * Dref(k)
+        enddo
+        psi_postsvd(i,j) = lambda * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) )
+        Ept2            += ctmp*ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) )
+      enddo
+    enddo
+
+    do j=n_TSVD+1,n_det_beta_unique
+      do i=1,n_TSVD
+        ctmp = 0.d0
+        do k=1,n_TSVD
+          ctmp = ctmp + H(k,k,i,j) * Dref(k)
+        enddo
+        psi_postsvd(i,j) = lambda * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) )
+        Ept2            += ctmp*ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) )
+      enddo
+    enddo
+
+    norm = 0.d0
+    do l = 1, n_det_beta_unique
+     do k = 1, n_det_alpha_unique
+       norm = norm + psi_postsvd(k,l)**2
+     enddo
+    enddo
+  
+    norm = dsqrt(norm)
+    print *, norm
+    if( norm .gt. 0.01d0 ) then
+      psi_postsvd = 0.01d0 * psi_postsvd / norm
+    endif
+
+    do i=1,n_TSVD
+      psi_postsvd(i,i) = Dref(i)
+    enddo
+
+    norm = 0.d0
+    do l = 1, n_det_beta_unique
+     do k = 1, n_det_alpha_unique
+       norm = norm + psi_postsvd(k,l)**2
+     enddo
+    enddo
+    psi_postsvd = psi_postsvd / dsqrt(norm)
+
+
+    tol_energy = dabs(E_prev - E0)
+    print       '(I5, 2X, I5, 3(3X, F20.10))', it_svd, n_TSVD, E0, E0 + Ept2, tol_energy
+    write(114,'(I5, 2X, I5, 3(3X, F20.10))')   it_svd,n_TSVD,  E0, E0 + Ept2, tol_energy
+    E_prev = E0
+
+    E0 = 0.d0
+    do j = 1, n_TSVD
+     do i = 1, n_TSVD
+      do l = 1, n_det_beta_unique
+       do k = 1, n_det_alpha_unique
+         E0 = E0 + psi_postsvd(i,j) * H(i,j,k,l) * psi_postsvd(k,l)
+       enddo
+      enddo
+     enddo
+    enddo
+    norm = 0.d0
+    do l = 1, n_det_beta_unique
+     do k = 1, n_det_alpha_unique
+       norm = norm + psi_postsvd(k,l)**2
+     enddo
+    enddo
+
+    E0 = E0/norm + nuclear_repulsion
+    print *,' E0 avant   =', E0
+
+    !print *,  ' --- SVD --- '
+    !call perform_newpostSVD(n_TSVD, psi_postsvd, size(psi_postsvd,1), Uref, Vref, Dref)
+    call perform_newSVD(n_TSVD, psi_postsvd, size(psi_postsvd,1), Uref, Vref, Dref)
+
+
+
+    deallocate( H ) 
+
+  end do
+
+
+end
+
+
+
+subroutine perform_newpostSVD(n_TSVD, psi_postsvd, LDP, Uref, Vref, Dref)
+
+  USE OMP_LIB
+
+  integer, intent(in)             :: n_TSVD, LDP
+  double precision, intent(in)    :: psi_postsvd(LDP,n_TSVD)
+  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  !double precision, intent(inout) :: Dref(max(n_det_beta_unique,n_det_alpha_unique))
+  double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique))
+
+  integer                         :: mm, nn, i, j, l, na, nb
+  double precision                :: err0, err_norm, err_tmp, norm_tmp
+  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
+  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
+  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
+
+  mm = n_det_alpha_unique
+  nn = n_det_beta_unique
+
+  allocate( U_svd(mm,n_TSVD) , V_svd(nn,n_TSVD) , S_mat(n_TSVD,n_TSVD) )
+
+  U_svd(1:mm,1:n_TSVD) = Uref(1:mm,1:n_TSVD)
+  V_svd(1:nn,1:n_TSVD) = Vref(1:nn,1:n_TSVD)
+
+  S_mat(1:n_TSVD,1:n_TSVD) = psi_postsvd(1:n_TSVD,1:n_TSVD)
+
+  ! first compute S_mat x transpose(V_svd)
+  allocate( SxVt(n_TSVD,nn) )
+  call dgemm( 'N', 'T', n_TSVD, nn, n_TSVD, 1.d0    &
+             , S_mat     , size(S_mat,1)            &
+             , V_svd     , size(V_svd,1)            &
+             , 0.d0, SxVt, size(SxVt ,1)            )
+  deallocate(S_mat)
+
+  ! then compute U_svd x SxVt
+  allocate( A_newsvd(mm,nn) )
+  call dgemm( 'N', 'N', mm, nn, n_TSVD, 1.d0        &
+             , U_svd         , size(U_svd   ,1)     &
+             , SxVt          , size(SxVt    ,1)     &
+             , 0.d0, A_newsvd, size(A_newsvd,1)     )
+  deallocate( SxVt )
+
+  ! perform new SVD
+  !allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(max(mm,nn)) )
+  allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) )
+  call svd_s( A_newsvd, size(A_newsvd,1), &
+              U_newsvd, size(U_newsvd,1), &
+              D_newsvd, &
+              Vt_newsvd, size(Vt_newsvd,1), &
+              mm, nn)
+  deallocate(A_newsvd)
+
+  allocate( V_newsvd(nn,nn) )
+  do l = 1, nn
+    do j = 1, nn
+      V_newsvd(j,l) = Vt_newsvd(l,j)
+    enddo
+  enddo
+  deallocate(Vt_newsvd)
+
+  !do l = 1, n_TSVD
+  !  Dref(l)      = D_newsvd(l)
+  !  Uref(1:mm,l) = U_newsvd(1:mm,l)
+  !  Vref(1:nn,l) = V_newsvd(1:nn,l)
+  !enddo
+  Dref(1:n_TSVD)  = D_newsvd(1:n_TSVD)
+  Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm)
+  Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn)
+
+
+  deallocate(U_newsvd)
+  deallocate(V_newsvd)
+  deallocate(D_newsvd)
+
+end subroutine perform_newpostSVD
+
+
+
+subroutine perform_newSVD(n_TSVD, psi_postsvd, LDP, Uref, Vref, Dref)
+
+  integer, intent(in)             :: n_TSVD, LDP
+  double precision, intent(in)    :: psi_postsvd(LDP,n_TSVD)
+  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique))
+
+  integer                         :: mm, nn, i, j, l, na, nb
+  double precision                :: err0, err_norm, err_tmp, norm_tmp
+  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
+  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
+  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
+
+  mm = n_det_alpha_unique
+  nn = n_det_beta_unique
+
+  allocate( U_svd(mm,mm) , V_svd(nn,nn) , S_mat(mm,nn) )
+
+  U_svd(1:mm,1:mm) = Uref(1:mm,1:mm)
+  V_svd(1:nn,1:nn) = Vref(1:nn,1:nn)
+
+  norm_tmp = 0.d0
+  do i = 1, nn
+    do j = 1, mm
+      S_mat(j,i) = psi_postsvd(j,i)
+      norm_tmp  += psi_postsvd(j,i) * psi_postsvd(j,i)
+    enddo
+  enddo
+  norm_tmp = 1.d0 / dsqrt(norm_tmp)
+  do i = 1, nn
+    do j = 1, mm
+      S_mat(j,i) = S_mat(j,i) * norm_tmp
+    enddo
+  enddo
+
+  ! first compute S_mat x transpose(V_svd)
+  allocate( SxVt(mm,nn) )
+  call dgemm( 'N', 'T', mm, nn, nn, 1.d0     &
+             , S_mat     , size(S_mat,1)     &
+             , V_svd     , size(V_svd,1)     &
+             , 0.d0, SxVt, size(SxVt ,1)     )
+  deallocate(S_mat)
+  ! then compute U_svd x SxVt
+  allocate( A_newsvd(mm,nn) )
+  call dgemm( 'N', 'N', mm, nn, mm, 1.d0            &
+             , U_svd         , size(U_svd   ,1)     &
+             , SxVt          , size(SxVt    ,1)     &
+             , 0.d0, A_newsvd, size(A_newsvd,1)     )
+  deallocate( SxVt )
+
+  ! perform new SVD
+  allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) )
+  call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn)
+  deallocate(A_newsvd)
+  allocate( V_newsvd(nn,nn) )
+  do l = 1, nn
+    do j = 1, nn
+      V_newsvd(j,l) = Vt_newsvd(l,j)
+    enddo
+  enddo
+  deallocate(Vt_newsvd)
+
+  !do l = 1, n_TSVD
+  !  Dref(l)      = D_newsvd(l)
+  !  Uref(1:mm,l) = U_newsvd(1:mm,l)
+  !  Vref(1:nn,l) = V_newsvd(1:nn,l)
+  !enddo
+
+  !Dref(1:n_TSVD)  = D_newsvd(1:n_TSVD)
+  Dref            = D_newsvd
+  Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm)
+  Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn)
+
+  deallocate(U_newsvd)
+  deallocate(V_newsvd)
+  deallocate(D_newsvd)
+
+  return
+
+end subroutine perform_newSVD
+
+
+
+
+
+subroutine const_H_uv(Uref, Vref, H, H_diag, n_TSVD)
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer, intent(in)            :: n_TSVD
+  double precision, intent(in)   :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)   :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out)  :: H(n_TSVD,n_TSVD, n_det_alpha_unique, n_det_beta_unique)
+  double precision, intent(out)  :: H_diag(n_det_alpha_unique,n_det_beta_unique)
+
+  integer(bit_kind)              :: det1(N_int,2), det2(N_int,2)
+  integer                        :: i, j, k, l, degree
+  integer                        :: jj0, n, m, np, mp
+  integer                        :: nn0, mm0, na, nb, mm, ind_gs
+  integer                        :: p,q,r,s
+  double precision               :: h12, x
+
+  double precision, allocatable  :: H0(:,:,:,:)
+  double precision, allocatable  :: H1(:,:,:,:)
+  double precision, allocatable  :: tmp3(:,:,:)
+  double precision, allocatable  :: tmp1(:,:), tmp0(:,:)
+  double precision :: c_tmp
+
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+
+  call wall_time(t0)
+  tmp3 = 0.d0
+
+  allocate( H0(na,nb,n_TSVD,n_TSVD) )
+  allocate (tmp3(nb,nb,nb))
+  H0 = 0.d0
+
+  !$OMP PARALLEL DEFAULT(NONE)                                       &
+      !$OMP          PRIVATE(i,j,k,l,m,n,det1,det2,degree,h12,c_tmp,tmp1,tmp0)&
+      !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique,&
+      !$OMP                 N_int,tmp3,Uref,Vref,H_diag,H0,n_TSVD)
+
+  allocate(tmp1(na,na), tmp0(na,na))
+
+  do i=1,na
+    do m=1,na
+      tmp1(m,i) = Uref(i,m)
+    enddo
+  enddo
+
+  !$OMP DO
+  do l = 1, nb
+    det2(:,2) = psi_det_beta_unique(:,l)
+
+    do j = 1, nb
+      det1(:,2) = psi_det_beta_unique(:,j)
+
+      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+      if (degree > 2) cycle
+
+      do k = 1, na
+        det2(:,1) = psi_det_alpha_unique(:,k)
+
+        do i = 1, na
+          det1(:,1) = psi_det_alpha_unique(:,i)
+
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if ( degree > 2) cycle
+
+          call i_H_j(det1, det2, N_int, h12)
+          if (h12 == 0.d0) cycle
+
+          do m=1,nb
+            tmp3(m,j,l) = tmp3(m,j,l) + h12 * tmp1(m,i) * tmp1(m,k)
+          enddo
+
+          do n=1,n_TSVD
+            c_tmp = h12 * Vref(j,n)
+            if (c_tmp == 0.d0) cycle
+            do m=1,n_TSVD
+              H0(k,l,m,n) = H0(k,l,m,n) + c_tmp * tmp1(m,i)
+            enddo
+          enddo
+
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do m=1,nb
+
+    do l=1,nb
+      do j=1,nb
+        tmp1(j,l) = tmp3(m,j,l)
+      enddo
+    enddo
+    !print *, 'DGEMM1'
+    call DGEMM('N','N',nb,nb,nb,1.d0,                                &
+        tmp1, size(tmp1,1),                                          &
+        Vref, size(Vref,1),                                          &
+        0.d0, tmp0, size(tmp0,1))
+
+    do n=1,nb
+      H_diag(m,n) = 0.d0
+      do j=1,nb
+        H_diag(m,n) = H_diag(m,n) + tmp0(j,n) * Vref(j,n)
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  deallocate(tmp1, tmp0)
+  !$OMP END PARALLEL
+
+  call wall_time(t1)
+
+  allocate( H1(nb,n_TSVD,n_TSVD,na) )
+  !print *, 'DGEMM2'
+  call DGEMM('T','N', nb * n_TSVD * n_TSVD, na, na,          &
+      1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))
+  deallocate( H0 )
+
+  ! (l,p,q,r) -> (p,q,r,s)
+  !print *, 'DGEMM3'
+  call DGEMM('T','N',  n_TSVD * n_TSVD * na, nb, nb,         &
+      1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3))
+
+!  do j=1,n_TSVD
+!    do i=1,n_TSVD
+!      print *, H_diag(i,j), H(i,j,i,j)
+!    enddo
+!  enddo
+  deallocate(H1)
+
+  call wall_time(t2)
+  print *, 't=', t1-t0, t2-t1
+  double precision               :: t0, t1, t2
+!  stop
+end subroutine const_H_uv
+
+
+
+
+
+
+
+subroutine const_H_uv_modif(Uref, Vref, H, H_diag, n_TSVD)
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer, intent(in)            :: n_TSVD
+  double precision, intent(in)   :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)   :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out)  :: H(n_TSVD,n_TSVD, n_det_alpha_unique, n_det_beta_unique)
+  double precision, intent(out)  :: H_diag(n_det_alpha_unique,n_det_beta_unique)
+
+  integer(bit_kind)              :: det1(N_int,2), det2(N_int,2)
+  integer                        :: i, j, k, l, degree, n, m, na, nb
+  double precision               :: h12
+
+  double precision, allocatable  :: H0(:,:,:,:)
+  double precision, allocatable  :: H1(:,:,:,:)
+  double precision, allocatable  :: tmp3(:,:,:)
+  double precision, allocatable  :: tmp1(:,:), tmp0(:,:), tmp4(:,:)
+  double precision :: c_tmp
+
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+
+  call wall_time(t0)
+
+  allocate( H0(na,nb,n_TSVD,n_TSVD) )
+  allocate( tmp3(na,nb,nb) )
+  H0   = 0.d0
+  tmp3 = 0.d0
+
+  !$OMP PARALLEL DEFAULT(NONE)                                       &
+      !$OMP          PRIVATE(i,j,k,l,m,n,det1,det2,degree,h12,c_tmp,tmp1,tmp0,tmp4)&
+      !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique,&
+      !$OMP                 N_int,tmp3,Uref,Vref,H_diag,H0,n_TSVD)
+
+  allocate(tmp1(na,na), tmp0(nb,nb), tmp4(nb,nb))
+
+  do i=1,na
+    do m=1,na
+      tmp1(m,i) = Uref(i,m)
+    enddo
+  enddo
+
+  !$OMP DO
+  do l = 1, nb
+    det2(:,2) = psi_det_beta_unique(:,l)
+
+    do j = 1, nb
+      det1(:,2) = psi_det_beta_unique(:,j)
+
+      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+      if (degree > 2) cycle
+
+      do k = 1, na
+        det2(:,1) = psi_det_alpha_unique(:,k)
+
+        do i = 1, na
+          det1(:,1) = psi_det_alpha_unique(:,i)
+
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if ( degree > 2) cycle
+
+          call i_H_j(det1, det2, N_int, h12)
+          if (h12 == 0.d0) cycle
+
+          do m=1,na
+            tmp3(m,j,l) = tmp3(m,j,l) + h12 * tmp1(m,i) * tmp1(m,k)
+          enddo
+
+          do n=1,n_TSVD
+            c_tmp = h12 * Vref(j,n)
+            if (c_tmp == 0.d0) cycle
+            do m=1,n_TSVD
+              H0(k,l,m,n) = H0(k,l,m,n) + c_tmp * tmp1(m,i)
+            enddo
+          enddo
+
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do m=1,na
+
+    do l=1,nb
+      do j=1,nb
+        tmp4(j,l) = tmp3(m,j,l)
+      enddo
+    enddo
+
+    call DGEMM('N','N',nb,nb,nb,1.d0,                                &
+        tmp4, size(tmp4,1),                                          &
+        Vref, size(Vref,1),                                          &
+        0.d0, tmp0, size(tmp0,1))
+
+    do n=1,nb
+      H_diag(m,n) = 0.d0
+      do j=1,nb
+        H_diag(m,n) = H_diag(m,n) + tmp0(j,n) * Vref(j,n)
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  deallocate(tmp1, tmp0)
+  deallocate(tmp4)
+  !$OMP END PARALLEL
+
+  deallocate(tmp3)
+
+  call wall_time(t1)
+
+  allocate( H1(nb,n_TSVD,n_TSVD,na) )
+  call DGEMM('T','N', nb * n_TSVD * n_TSVD, na, na,          &
+      1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))
+  deallocate( H0 )
+
+  ! (l,p,q,r) -> (p,q,r,s)
+  call DGEMM('T','N',  n_TSVD * n_TSVD * na, nb, nb,         &
+      1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3))
+
+!  do j=1,n_TSVD
+!    do i=1,n_TSVD
+!      print *, H_diag(i,j), H(i,j,i,j)
+!    enddo
+!  enddo
+  deallocate(H1)
+
+  call wall_time(t2)
+  print *, 't=', t1-t0, t2-t1
+  double precision               :: t0, t1, t2
+!  stop
+end subroutine const_H_uv_modif
+
+
--- a/devel/svdwf/buildpsi_diagSVDit_Anthony_v4.irp.f
+++ b/devel/svdwf/buildpsi_diagSVDit_Anthony_v4.irp.f
@ -0,0 +1,776 @@
+program buildpsi_diagSVDit_Anthony_v4
+
+  implicit none
+
+  BEGIN_DOC
+  ! perturbative approach to build psi_postsvd
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state
+  double precision              :: h12
+
+  integer                       :: i, j, k, l
+
+  double precision              :: norm_psi
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  double precision              :: E0, tol_energy, ctmp, Ept2, E_prev
+
+  double precision, allocatable :: H_diag(:,:), Hkl(:,:), H0(:,:), H(:,:,:,:)
+  double precision, allocatable :: psi_postsvd(:,:)
+
+  integer                       :: n_TSVD, it_svd, it_svd_max
+
+  integer                       :: ii, jj, n_perturb
+  double precision              :: norm, lambda, E_perturb
+  double precision, allocatable :: eigval0(:), eigvec0(:,:,:), H_tmp(:,:,:,:)
+  double precision, allocatable :: norm_row(:), norm_col(:), Utmp(:,:), Vtmp(:,:)
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call i_H_j(det1, det2, N_int, h12)
+
+
+  i_state = 1
+
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det    :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+
+  norm_psi = 0.d0
+  do k = 1, N_det
+    norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) &
+                        * psi_bilinear_matrix_values(k,i_state)
+  enddo
+  print *, ' initial norm = ', norm_psi
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) )
+  !allocate( Dref(max(n_det_beta_unique,n_det_alpha_unique)) )
+  allocate( Dref(min(n_det_beta_unique,n_det_alpha_unique)) )
+  allocate( Vref(n_det_beta_unique,n_det_beta_unique)  )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique) )
+  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1)          &
+       , n_det_alpha_unique, n_det_beta_unique)
+
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+
+  ! Truncated rank
+  !n_TSVD = 100
+  !call write_int(6,n_TSVD, 'Rank of psi')
+
+
+  !________________________________________________________________________________________________________
+  !
+  !                                   loop over SVD iterations
+  !________________________________________________________________________________________________________
+
+  tol_energy = 1.d0
+  it_svd     = 0
+  it_svd_max = 100
+  E_prev = 0.d0
+
+  !print *, ci_energy(1)
+
+  allocate(H_diag(n_det_alpha_unique,n_det_beta_unique))
+  allocate(psi_postsvd(n_det_alpha_unique,n_det_beta_unique))
+
+  do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-6 ) )
+
+    it_svd = it_svd + 1
+
+    ! Truncated rank
+    !n_TSVD = min(n_det_alpha_unique,n_det_beta_unique)
+    !do i = min(n_det_alpha_unique,n_det_beta_unique), 10, -1
+    !  if( dabs(Dref(i)) .lt. 1d-2 ) then
+    !    n_TSVD = n_TSVD - 1
+    !  else
+    !    exit
+    !  endif
+    !enddo
+    !do i = 1, min(n_det_alpha_unique,n_det_beta_unique)
+    !  print *, i, Dref(i)
+    !enddo
+    !call write_int(6,n_TSVD, 'Rank of psi')
+    n_TSVD = 30 !min(n_TSVD,100)
+    call write_int(6,n_TSVD, 'Rank of psi')
+
+    print *,  '-- Compute H --'
+    allocate(H(n_TSVD,n_TSVD,n_det_alpha_unique,n_det_beta_unique))
+    call const_H_uv_modif(Uref, Vref, H, H_diag, n_TSVD)
+    !call const_H_uv(Uref, Vref, H, H_diag, n_TSVD)
+
+    ! E0 = < psi_0 | H | psi_0 >
+    !norm = 0.d0
+    !do j = 1, n_TSVD
+    !  norm = norm + Dref(j)*Dref(j)
+    !enddo
+    !Dref = Dref / dsqrt(norm)
+    !E0   = 0.d0
+    !do j = 1, n_TSVD
+    !  do i = 1, n_TSVD
+    !     E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j)
+    !  enddo
+    !enddo
+    !E0 = E0 + nuclear_repulsion
+    !print *,' E0 bef diag =', E0
+
+    allocate( H_tmp(n_TSVD,n_TSVD,n_TSVD,n_TSVD) )
+    do l = 1, n_TSVD
+      do k = 1, n_TSVD
+        do j = 1, n_TSVD
+          do i = 1, n_TSVD
+            H_tmp(i,j,k,l) = H(i,j,k,l)
+          enddo
+        enddo
+      enddo
+    enddo
+    allocate( eigval0(n_TSVD**2) , eigvec0(n_TSVD,n_TSVD,n_TSVD**2) )
+    eigvec0 = 0.d0
+    !print *,  ' --- Diag post-SVD --- '
+    call lapack_diag(eigval0, eigvec0, H_tmp, n_TSVD**2, n_TSVD**2)
+    !print *, 'eig =', eigval0(1) + nuclear_repulsion
+    deallocate(H_tmp, eigval0)
+
+    print *,  ' --- first SVD --- '
+    Dref = 0.d0
+    call perform_newpostSVD(n_TSVD, eigvec0(1,1,1), size(eigvec0,1), Uref, Vref, Dref)
+    deallocate(eigvec0)
+
+    print *,  ' --- Compute H --- '
+    call const_H_uv_modif(Uref, Vref, H, H_diag, n_TSVD)
+    !call const_H_uv(Uref, Vref, H, H_diag, n_TSVD)
+
+    ! E0 = < psi_0 | H | psi_0 >
+    E0 = 0.d0
+    norm = 0.d0
+    do j = 1, n_det_beta_unique
+      do i = 1, n_TSVD
+         E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j)
+      enddo
+      norm = norm + Dref(j)*Dref(j)
+    enddo
+    E0 = E0 + nuclear_repulsion
+    print *,' E0 aft diag =', E0
+
+    ! -----------------------------------------------------------------
+
+    !print *,  ' --- Perturbation --- '
+    psi_postsvd = 0.d0
+    do i = 1, n_TSVD
+      psi_postsvd(i,i) = Dref(i)
+    enddo
+
+    lambda = 1.d0
+    Ept2   = 0.d0
+    do j = 1, n_TSVD
+      do i = n_TSVD+1, n_det_alpha_unique
+        ctmp = 0.d0
+        do k = 1, n_TSVD
+          ctmp = ctmp + H(k,k,i,j) * Dref(k)
+        enddo
+        psi_postsvd(i,j) = lambda * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) )
+        Ept2            +=   ctmp * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) )
+      enddo
+    enddo
+
+    do j = n_TSVD+1, n_det_beta_unique
+      do i = 1, n_TSVD
+        ctmp = 0.d0
+        do k = 1, n_TSVD
+          ctmp = ctmp + H(k,k,i,j) * Dref(k)
+        enddo
+        psi_postsvd(i,j) = lambda * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) )
+        Ept2            +=   ctmp * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) )
+      enddo
+    enddo
+
+    deallocate( H ) 
+    ! -----------------------------------------------------------------
+
+    n_perturb = n_TSVD + n_TSVD
+
+    allocate( norm_row(n_det_alpha_unique) , norm_col(n_det_beta_unique) )
+    do i = 1, n_det_alpha_unique
+      norm_row(i) = 0.d0
+      do j = 1, n_det_beta_unique
+        norm_row(i) += psi_postsvd(i,j) * psi_postsvd(i,j)
+      enddo
+    enddo
+    do j = 1, n_det_beta_unique
+      norm_col(j) = 0.d0
+      do i = 1, n_det_alpha_unique
+        norm_col(j) += psi_postsvd(i,j) * psi_postsvd(i,j)
+      enddo
+    enddo
+
+    allocate( Utmp(n_det_alpha_unique,n_perturb) , Vtmp(n_det_beta_unique,n_perturb) )
+    do i = 1, n_perturb
+      ii           = MAXLOC( norm_row , DIM=1 )
+      jj           = MAXLOC( norm_col , DIM=1 )
+      if( (norm_row(ii).lt.1.d-12) .or. (norm_col(jj).lt.1.d-12) ) then
+        print *, '  !!!!!!!    '
+        print *, ii, norm_row(ii)
+        print *, jj, norm_col(jj)
+        stop
+      endif
+      Utmp(:,i)    = Uref(:,ii)
+      Vtmp(:,i)    = Vref(:,jj)
+      norm_row(ii) = 0.d0
+      norm_col(jj) = 0.d0
+    enddo 
+    deallocate( norm_row , norm_col )
+
+    print *,  ' --- Compute H in n_perturb space --- '
+    allocate(H(n_perturb,n_perturb,n_det_alpha_unique,n_det_beta_unique))
+    call const_H_TSVD(Uref, Vref, H, n_perturb)
+    allocate( H_tmp(n_perturb,n_perturb,n_perturb,n_perturb) )
+    do l = 1, n_perturb
+      do k = 1, n_perturb
+        do j = 1, n_perturb
+          do i = 1, n_perturb
+            H_tmp(i,j,k,l) = H(i,j,k,l)
+          enddo
+        enddo
+      enddo
+    enddo
+    deallocate( H ) 
+    allocate( eigval0(n_perturb**2) , eigvec0(n_perturb,n_perturb,n_perturb**2) )
+    eigvec0 = 0.d0
+    call lapack_diag(eigval0, eigvec0, H_tmp, n_perturb**2, n_perturb**2)
+    E_perturb = eigval0(1) + nuclear_repulsion
+    print *, ' diag in n_perturb space: ', E_perturb
+    deallocate(H_tmp, eigval0)
+
+    print *,  ' --- second SVD --- '
+    Uref = 0.d0
+    Vref = 0.d0
+    Dref = 0.d0
+    do l = 1, n_perturb
+      Uref(1:n_det_alpha_unique,l) = Utmp(1:n_det_alpha_unique,l)
+      Vref(1:n_det_beta_unique ,l) = Vtmp(1:n_det_beta_unique ,l)
+    enddo
+    deallocate( Utmp , Vtmp )
+
+    call perform_perturbSVD(n_TSVD, n_perturb, eigvec0(1,1,1), size(eigvec0,1), Uref, Vref, Dref)
+    deallocate(eigvec0)
+
+
+    tol_energy = dabs(E_prev - E0)
+    print     '(I5, 2(2X,I5), 3(3X, F20.10))',  it_svd, n_TSVD, n_perturb, E0, E_perturb, tol_energy
+    write(222,'(I5, 2(2X,I5), 3(3X, F20.10))')  it_svd, n_TSVD, n_perturb, E0, E_perturb, tol_energy
+    E_prev = E0
+
+    !print *,  ' --- SVD --- '
+    !call perform_newpostSVD(n_TSVD, psi_postsvd, size(psi_postsvd,1), Uref, Vref, Dref)
+    !call perform_newSVD(n_TSVD, psi_postsvd, size(psi_postsvd,1), Uref, Vref, Dref)
+
+
+  end do
+
+
+end
+
+
+
+subroutine perform_newpostSVD(n_TSVD, psi_postsvd, LDP, Uref, Vref, Dref)
+
+  integer, intent(in)             :: n_TSVD, LDP
+  double precision, intent(in)    :: psi_postsvd(LDP,n_TSVD)
+  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  !double precision, intent(inout) :: Dref(max(n_det_beta_unique,n_det_alpha_unique))
+  double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique))
+
+  integer                         :: mm, nn, i, j, l, na, nb
+  double precision                :: err0, err_norm, err_tmp, norm_tmp
+  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
+  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
+  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
+
+  mm = n_det_alpha_unique
+  nn = n_det_beta_unique
+
+  allocate( U_svd(mm,n_TSVD) , V_svd(nn,n_TSVD) , S_mat(n_TSVD,n_TSVD) )
+
+  U_svd(1:mm,1:n_TSVD) = Uref(1:mm,1:n_TSVD)
+  V_svd(1:nn,1:n_TSVD) = Vref(1:nn,1:n_TSVD)
+
+  S_mat(1:n_TSVD,1:n_TSVD) = psi_postsvd(1:n_TSVD,1:n_TSVD)
+
+  ! first compute S_mat x transpose(V_svd)
+  allocate( SxVt(n_TSVD,nn) )
+  call dgemm( 'N', 'T', n_TSVD, nn, n_TSVD, 1.d0    &
+             , S_mat     , size(S_mat,1)            &
+             , V_svd     , size(V_svd,1)            &
+             , 0.d0, SxVt, size(SxVt ,1)            )
+  deallocate(S_mat)
+
+  ! then compute U_svd x SxVt
+  allocate( A_newsvd(mm,nn) )
+  call dgemm( 'N', 'N', mm, nn, n_TSVD, 1.d0        &
+             , U_svd         , size(U_svd   ,1)     &
+             , SxVt          , size(SxVt    ,1)     &
+             , 0.d0, A_newsvd, size(A_newsvd,1)     )
+  deallocate( SxVt )
+
+  ! perform new SVD
+  !allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(max(mm,nn)) )
+  allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) )
+  call svd_s( A_newsvd, size(A_newsvd,1), &
+              U_newsvd, size(U_newsvd,1), &
+              D_newsvd, &
+              Vt_newsvd, size(Vt_newsvd,1), &
+              mm, nn)
+  deallocate(A_newsvd)
+
+  allocate( V_newsvd(nn,nn) )
+  do l = 1, nn
+    do j = 1, nn
+      V_newsvd(j,l) = Vt_newsvd(l,j)
+    enddo
+  enddo
+  deallocate(Vt_newsvd)
+
+  !do l = 1, n_TSVD
+  !  Dref(l)      = D_newsvd(l)
+  !  Uref(1:mm,l) = U_newsvd(1:mm,l)
+  !  Vref(1:nn,l) = V_newsvd(1:nn,l)
+  !enddo
+  Dref(1:n_TSVD)  = D_newsvd(1:n_TSVD)
+  Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm)
+  Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn)
+
+
+  deallocate(U_newsvd)
+  deallocate(V_newsvd)
+  deallocate(D_newsvd)
+
+end subroutine perform_newpostSVD
+
+
+
+ 
+subroutine perform_perturbSVD(n_TSVD, n_perturb, psi_postsvd, LDP, Uref, Vref, Dref)
+
+  integer, intent(in)             :: n_TSVD, LDP, n_perturb
+  double precision, intent(in)    :: psi_postsvd(LDP,n_TSVD)
+  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique))
+
+  integer                         :: mm, nn, i, j, l, na, nb
+  double precision                :: err0, err_norm, err_tmp, norm_tmp
+  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
+  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
+  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
+
+  mm = n_det_alpha_unique
+  nn = n_det_beta_unique
+
+  allocate( U_svd(mm,n_perturb) , V_svd(nn,n_perturb) , S_mat(n_perturb,n_perturb) )
+  U_svd(1:mm,1:n_perturb)        = Uref(1:mm,1:n_perturb)
+  V_svd(1:nn,1:n_perturb)        = Vref(1:nn,1:n_perturb)
+  S_mat(1:n_perturb,1:n_perturb) = psi_postsvd(1:n_perturb,1:n_perturb)
+
+  ! first compute S_mat x transpose(V_svd)
+  allocate( SxVt(n_perturb,nn) )
+  call dgemm( 'N', 'T', n_perturb, nn, n_perturb, 1.d0    &
+             , S_mat     , size(S_mat,1)                  &
+             , V_svd     , size(V_svd,1)                  &
+             , 0.d0, SxVt, size(SxVt ,1)                  )
+  deallocate(S_mat)
+
+  ! then compute U_svd x SxVt
+  allocate( A_newsvd(mm,nn) )
+  call dgemm( 'N', 'N', mm, nn, n_perturb, 1.d0           &
+             , U_svd         , size(U_svd   ,1)           &
+             , SxVt          , size(SxVt    ,1)           &
+             , 0.d0, A_newsvd, size(A_newsvd,1)           )
+  deallocate( SxVt )
+
+  ! perform new SVD
+  allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) )
+  call svd_s( A_newsvd, size(A_newsvd,1), &
+              U_newsvd, size(U_newsvd,1), &
+              D_newsvd, &
+              Vt_newsvd, size(Vt_newsvd,1), &
+              mm, nn)
+  deallocate(A_newsvd)
+
+  allocate( V_newsvd(nn,nn) )
+  do l = 1, nn
+    do j = 1, nn
+      V_newsvd(j,l) = Vt_newsvd(l,j)
+    enddo
+  enddo
+  deallocate(Vt_newsvd)
+
+  Dref(1:n_TSVD)  = D_newsvd(1:n_TSVD)
+  Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm)
+  Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn)
+
+  deallocate(U_newsvd)
+  deallocate(V_newsvd)
+  deallocate(D_newsvd)
+
+end subroutine perform_perturbSVD
+
+
+
+subroutine perform_newSVD(n_TSVD, psi_postsvd, LDP, Uref, Vref, Dref)
+
+  integer, intent(in)             :: n_TSVD, LDP
+  double precision, intent(in)    :: psi_postsvd(LDP,n_TSVD)
+  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique))
+
+  integer                         :: mm, nn, i, j, l, na, nb
+  double precision                :: err0, err_norm, err_tmp, norm_tmp
+  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
+  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
+  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
+
+  mm = n_det_alpha_unique
+  nn = n_det_beta_unique
+
+  allocate( U_svd(mm,mm) , V_svd(nn,nn) , S_mat(mm,nn) )
+
+  U_svd(1:mm,1:mm) = Uref(1:mm,1:mm)
+  V_svd(1:nn,1:nn) = Vref(1:nn,1:nn)
+
+  norm_tmp = 0.d0
+  do i = 1, nn
+    do j = 1, mm
+      S_mat(j,i) = psi_postsvd(j,i)
+      norm_tmp  += psi_postsvd(j,i) * psi_postsvd(j,i)
+    enddo
+  enddo
+  norm_tmp = 1.d0 / dsqrt(norm_tmp)
+  do i = 1, nn
+    do j = 1, mm
+      S_mat(j,i) = S_mat(j,i) * norm_tmp
+    enddo
+  enddo
+
+  ! first compute S_mat x transpose(V_svd)
+  allocate( SxVt(mm,nn) )
+  call dgemm( 'N', 'T', mm, nn, nn, 1.d0     &
+             , S_mat     , size(S_mat,1)     &
+             , V_svd     , size(V_svd,1)     &
+             , 0.d0, SxVt, size(SxVt ,1)     )
+  deallocate(S_mat)
+  ! then compute U_svd x SxVt
+  allocate( A_newsvd(mm,nn) )
+  call dgemm( 'N', 'N', mm, nn, mm, 1.d0            &
+             , U_svd         , size(U_svd   ,1)     &
+             , SxVt          , size(SxVt    ,1)     &
+             , 0.d0, A_newsvd, size(A_newsvd,1)     )
+  deallocate( SxVt )
+
+  ! perform new SVD
+  allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) )
+  call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn)
+  deallocate(A_newsvd)
+  allocate( V_newsvd(nn,nn) )
+  do l = 1, nn
+    do j = 1, nn
+      V_newsvd(j,l) = Vt_newsvd(l,j)
+    enddo
+  enddo
+  deallocate(Vt_newsvd)
+
+  !Dref(1:n_TSVD)  = D_newsvd(1:n_TSVD)
+  Dref            = D_newsvd
+  Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm)
+  Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn)
+
+  deallocate(U_newsvd)
+  deallocate(V_newsvd)
+  deallocate(D_newsvd)
+
+  return
+
+end subroutine perform_newSVD
+
+
+
+
+subroutine const_H_uv_modif(Uref, Vref, H, H_diag, n_TSVD)
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer, intent(in)            :: n_TSVD
+  double precision, intent(in)   :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)   :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out)  :: H(n_TSVD,n_TSVD, n_det_alpha_unique, n_det_beta_unique)
+  double precision, intent(out)  :: H_diag(n_det_alpha_unique,n_det_beta_unique)
+
+  integer(bit_kind)              :: det1(N_int,2), det2(N_int,2)
+  integer                        :: i, j, k, l, degree, n, m, na, nb
+  double precision               :: h12
+
+  double precision, allocatable  :: H0(:,:,:,:)
+  double precision, allocatable  :: H1(:,:,:,:)
+  double precision, allocatable  :: tmp3(:,:,:)
+  double precision, allocatable  :: tmp1(:,:), tmp0(:,:), tmp4(:,:)
+  double precision :: c_tmp
+
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+
+  call wall_time(t0)
+
+  allocate( H0(na,nb,n_TSVD,n_TSVD) )
+  allocate( tmp3(na,nb,nb) )
+  H0   = 0.d0
+  tmp3 = 0.d0
+
+ !$OMP PARALLEL DEFAULT(NONE)                                                  &
+ !$OMP          PRIVATE(i,j,k,l,m,n,det1,det2,degree,h12,c_tmp,tmp1,tmp0,tmp4) &
+ !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique,         &
+ !$OMP                 N_int,tmp3,Uref,Vref,H_diag,H0,n_TSVD)
+
+  allocate(tmp1(na,na), tmp0(nb,nb), tmp4(nb,nb))
+
+  do i = 1, na
+    do m = 1, na
+      tmp1(m,i) = Uref(i,m)
+    enddo
+  enddo
+
+  !$OMP DO
+  do l = 1, nb
+    det2(:,2) = psi_det_beta_unique(:,l)
+    do j = 1, nb
+      det1(:,2) = psi_det_beta_unique(:,j)
+
+      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+      if (degree > 2) cycle
+
+      do k = 1, na
+        det2(:,1) = psi_det_alpha_unique(:,k)
+        do i = 1, na
+          det1(:,1) = psi_det_alpha_unique(:,i)
+
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if ( degree > 2) cycle
+
+          call i_H_j(det1, det2, N_int, h12)
+          if (h12 == 0.d0) cycle
+
+          do m = 1, na
+            tmp3(m,j,l) = tmp3(m,j,l) + h12 * tmp1(m,i) * tmp1(m,k)
+          enddo
+
+          do n = 1, n_TSVD
+            c_tmp = h12 * Vref(j,n)
+            if (c_tmp == 0.d0) cycle
+            do m = 1, n_TSVD
+              H0(k,l,m,n) = H0(k,l,m,n) + c_tmp * tmp1(m,i)
+            enddo
+          enddo
+
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do m = 1, na
+    do l = 1, nb
+      do j = 1, nb
+        tmp4(j,l) = tmp3(m,j,l)
+      enddo
+    enddo
+    call DGEMM('N','N',nb,nb,nb,1.d0,                                &
+        tmp4, size(tmp4,1),                                          &
+        Vref, size(Vref,1),                                          &
+        0.d0, tmp0, size(tmp0,1))
+    do n = 1, nb
+      H_diag(m,n) = 0.d0
+      do j = 1, nb
+        H_diag(m,n) = H_diag(m,n) + tmp0(j,n) * Vref(j,n)
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  deallocate(tmp1, tmp0)
+  deallocate(tmp4)
+  !$OMP END PARALLEL
+
+  deallocate(tmp3)
+
+  call wall_time(t1)
+
+  ! H0(na,nb,n_TSVD,n_TSVD)
+  allocate( H1(nb,n_TSVD,n_TSVD,na) )
+  call DGEMM('T', 'N', nb * n_TSVD * n_TSVD, na, na, 1.d0, &
+              H0  , size(H0,1)  ,                          & 
+              Uref, size(Uref,1), 0.d0,                    &
+              H1  , size(H1,1) * size(H1,2) * size(H1,3)   )
+  deallocate( H0 )
+
+  ! (l,p,q,r) -> (p,q,r,s)
+  call DGEMM('T','N',  n_TSVD * n_TSVD * na, nb, nb,         &
+      1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3))
+
+!  do j=1,n_TSVD
+!    do i=1,n_TSVD
+!      print *, H_diag(i,j), H(i,j,i,j)
+!    enddo
+!  enddo
+  deallocate(H1)
+
+  call wall_time(t2)
+  print *, 't=', t1-t0, t2-t1
+  double precision               :: t0, t1, t2
+!  stop
+end subroutine const_H_uv_modif
+
+
+
+
+
+
+subroutine const_H_TSVD(Uref, Vref, H, n_TSVD)
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer, intent(in)            :: n_TSVD
+  double precision, intent(in)   :: Uref(n_det_alpha_unique,n_TSVD)
+  double precision, intent(in)   :: Vref(n_det_beta_unique ,n_TSVD)
+  double precision, intent(out)  :: H(n_TSVD,n_TSVD, n_det_alpha_unique, n_det_beta_unique)
+
+  integer(bit_kind)              :: det1(N_int,2), det2(N_int,2)
+  integer                        :: i, j, k, l, degree, n, m, na, nb
+  double precision               :: h12
+
+  double precision, allocatable  :: H0(:,:,:,:)
+  double precision, allocatable  :: H1(:,:,:,:)
+  double precision, allocatable  :: tmp1(:,:)
+  double precision               :: c_tmp
+  double precision               :: t0, t1, t2
+
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+
+  call wall_time(t0)
+
+  allocate( H0(na,nb,n_TSVD,n_TSVD) )
+  H0 = 0.d0
+
+  !$OMP PARALLEL DEFAULT(NONE)                                             &
+      !$OMP          PRIVATE(i,j,k,l,m,n,det1,det2,degree,h12,c_tmp,tmp1)  &
+      !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique &
+      !$OMP                 ,N_int,Uref,Vref,H0,n_TSVD)
+
+  allocate(tmp1(na,na))
+  do i = 1, na
+    do m = 1, na
+      tmp1(m,i) = Uref(i,m)
+    enddo
+  enddo
+
+  !$OMP DO
+  do l = 1, nb
+    det2(:,2) = psi_det_beta_unique(:,l)
+    do j = 1, nb
+      det1(:,2) = psi_det_beta_unique(:,j)
+
+      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+      if (degree > 2) cycle
+
+      do k = 1, na
+        det2(:,1) = psi_det_alpha_unique(:,k)
+        do i = 1, na
+          det1(:,1) = psi_det_alpha_unique(:,i)
+
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if ( degree > 2) cycle
+
+          call i_H_j(det1, det2, N_int, h12)
+          if (h12 == 0.d0) cycle
+
+          do n = 1, n_TSVD
+            c_tmp = h12 * Vref(j,n)
+            if (c_tmp == 0.d0) cycle
+            do m = 1, n_TSVD
+              H0(k,l,m,n) = H0(k,l,m,n) + c_tmp * tmp1(m,i)
+            enddo
+          enddo
+
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  deallocate(tmp1)
+  !$OMP END PARALLEL
+
+  call wall_time(t1)
+  allocate( H1(nb,n_TSVD,n_TSVD,na) )
+  call DGEMM('T','N', nb * n_TSVD * n_TSVD, na, na,          &
+      1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))
+  deallocate( H0 )
+  ! (l,p,q,r) -> (p,q,r,s)
+  call DGEMM('T','N',  n_TSVD * n_TSVD * na, nb, nb,         &
+      1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3))
+  deallocate(H1)
+  call wall_time(t2)
+
+  print *, 't=', t1-t0, t2-t1
+
+end subroutine const_H_TSVD
+
+
--- a/devel/svdwf/buildpsi_diagSVDit_v0.irp.f
+++ b/devel/svdwf/buildpsi_diagSVDit_v0.irp.f
@ -0,0 +1,822 @@
+program buildpsi_diagSVDit_v0
+
+  implicit none
+
+  BEGIN_DOC
+  ! perturbative approach to build psi_postsvd
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state
+  double precision              :: h12
+
+  integer                       :: i, j, k, l, ii, jj, nn, na, nb
+
+  double precision              :: norm_psi, inv_sqrt_norm_psi
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  double precision              :: E0_av, E0_ap, E0pt2
+  double precision              :: err0, err_tmp, e_tmp, E0, overlop, E0_old, tol_energy
+  double precision              :: ctmp, htmp, Ept2
+  double precision              :: E0_postsvd, overlop_postsvd
+  double precision              :: norm_coeff_psi, inv_sqrt_norm_coeff_psi
+  double precision              :: overlopU, overlopU_mat, overlopV, overlopV_mat, overlop_psi
+
+  double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:)
+  double precision, allocatable :: psi_postsvd(:), coeff_psi(:), coeff_psi_perturb(:)
+
+  integer                       :: n_FSVD, n_selected, n_toselect, n_tmp, it_svd, it_svd_max
+  integer                       :: n_selected2
+  integer,          allocatable :: numalpha_selected(:), numbeta_selected(:)
+  integer,          allocatable :: numalpha_toselect(:), numbeta_toselect(:)
+  integer,          allocatable :: numalpha_tmp(:), numbeta_tmp(:)
+
+  integer(kind=8)               :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir
+  real(kind=8)                  :: W_tot_time, W_tot_time_it
+  real(kind=8)                  :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it
+  real(kind=8)                  :: CPU_tot_time, CPU_tot_time_it
+  real(kind=8)                  :: speedup, speedup_it
+  integer                       :: nb_taches
+
+  !$OMP PARALLEL
+  nb_taches = OMP_GET_NUM_THREADS()
+  !$OMP END PARALLEL
+
+  call CPU_TIME(CPU_tbeg)
+  call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
+
+  i_state = 1
+
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call get_excitation_degree(det1,det2,degree,N_int)
+  call i_H_j(det1, det2, N_int, h12)
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det      :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+ 
+  norm_psi = 0.d0
+  do k = 1, N_det
+    norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) &
+                        * psi_bilinear_matrix_values(k,i_state)
+  enddo
+  print *, ' initial norm = ', norm_psi
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_beta_unique) )
+  allocate( Dref(n_det_beta_unique) )
+  allocate( Vref(n_det_beta_unique,n_det_beta_unique)  )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique) )
+  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1)          &
+       , n_det_alpha_unique, n_det_beta_unique)
+
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+  print *, ' ---   First SVD: ok    ---  '
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+
+  ! ---------------------------------------------------------------------------------------
+
+  nn = n_det_beta_unique
+
+  ! ---------------------------------------------------------------------------------------
+  !                               numerote vectors 
+
+  ! Full rank
+  n_FSVD = nn * nn
+  print*, ' Full psi space rank = ', n_FSVD
+
+  ! Truncated rank
+  n_selected  = 20
+  n_selected2 = n_selected * n_selected
+  print*, ' initial psi space rank = ', n_selected
+
+  allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) )
+  do i = 1, n_selected
+    numalpha_selected(i) = i
+    numbeta_selected (i) = i
+  enddo
+  ! check SVD error
+  err0 = 0.d0
+  do j = 1, nn
+    do i = 1, n_det_alpha_unique
+      err_tmp = 0.d0
+      do l = 1, n_selected
+        err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l)
+      enddo
+      err_tmp = Aref(i,j) - err_tmp
+      err0   += err_tmp * err_tmp
+    enddo
+  enddo
+  print *, ' SVD err (%)     = ', 100.d0 * dsqrt(err0/norm_psi)
+
+  deallocate( Aref )
+
+  ! perturbative space rank
+  k = 0
+  n_toselect =  nn*nn - n_selected*n_selected
+  allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) )
+  ! nondiagonal blocs
+  do i = 1, n_selected
+    do j = n_selected+1, nn
+      k = k + 1
+      numalpha_toselect(k) = j 
+      numbeta_toselect (k) = i
+    enddo
+  enddo
+  do j = 1, n_selected
+    do i = n_selected+1, nn
+      k = k + 1
+      numalpha_toselect(k) = j 
+      numbeta_toselect (k) = i
+    enddo
+  enddo
+  ! diagonal bloc
+  do i = n_selected+1, nn
+    do j = n_selected+1, nn
+      k = k + 1
+      numalpha_toselect(k) = j 
+      numbeta_toselect (k) = i
+    enddo
+  enddo
+
+  if( k.ne.n_toselect ) then
+    print*, ' error in numeroting '
+    stop
+  endif
+  print*, ' perturbative psi space rank = ', n_toselect
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  !________________________________________________________________________________________________________
+  !
+  !                                   loop over SVD iterations
+  !________________________________________________________________________________________________________
+
+  E0_old     = 0.d0
+  tol_energy = 1.d0
+  it_svd     = 0 
+  it_svd_max = 100
+
+  do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) )
+
+    call CPU_TIME(CPU_tbeg_it)
+    call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) 
+
+    it_svd = it_svd + 1
+    print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +'
+    print*, '                                                                         '
+    print*, ' iteration', it_svd
+
+    norm_coeff_psi = 0.d0
+    do j = 1, n_selected
+      norm_coeff_psi += Dref(j) * Dref(j)
+    enddo
+    inv_sqrt_norm_coeff_psi = 1.d0 / dsqrt(norm_coeff_psi)
+    do j = 1, n_selected
+      Dref(j) = Dref(j) * inv_sqrt_norm_coeff_psi
+    enddo
+
+    allocate( H0(n_selected2,n_selected2) )
+    print *,  ''
+    print *,  ''
+    print *,  ''
+    print *,  '-- Compute H --'
+    call const_psihpsi_postsvd_H0(n_selected, n_selected2, Uref, Vref, H0)
+
+    ! avant SVD
+    E0 = 0.d0 
+    do i = 1, n_selected
+      ii = (i-1)*n_selected + i
+      do j = 1, n_selected
+        jj = (j-1)*n_selected + j
+        E0 += Dref(j) * H0(jj,ii) * Dref(i)
+      enddo
+    enddo
+    E0_av = E0 + nuclear_repulsion
+    print *,' E0 (avant SVD) =', E0_av
+
+    allocate( psi_postsvd(n_selected2) )
+    print *,  ' --- Diag post-SVD --- '
+    call diag_postsvd(n_selected, n_selected2, Dref, H0, E0_postsvd, overlop_postsvd, psi_postsvd)
+    print*, ' postsvd energy  = ', E0_postsvd
+    deallocate( H0 )
+
+    ! post-SVD
+    print *,  ' --- SVD --- '
+    !Dref(:) = 0.d0
+    call perform_newpostSVD(n_selected, n_selected2, psi_postsvd, Uref, Vref, Dref)
+    deallocate( psi_postsvd )
+
+    allocate( H0(n_selected2,n_selected2) )
+    print *,  ' --- Compute H --- '
+    call const_psihpsi_postsvd_H0(n_selected, n_selected2, Uref, Vref, H0)
+
+    E0             = 0.d0 
+    norm_coeff_psi = 0.d0
+    do i = 1, n_selected
+      ii = (i-1)*n_selected + i
+      do j = 1, n_selected
+        jj = (j-1)*n_selected + j
+        E0 += Dref(j) * H0(jj,ii) * Dref(i)
+      enddo
+      norm_coeff_psi += Dref(i) * Dref(i)
+    enddo
+    E0_ap = E0 + nuclear_repulsion
+    print *,' E0 (apres SVD) =', E0_ap
+    !print *,' norm =', norm_coeff_psi
+   
+    deallocate(H0)
+
+    print *,  ' --- Perturbation --- '
+
+    allocate( Hdiag(n_toselect), Hkl(n_selected2,n_toselect) )
+    call const_Hdiag_Hkl(n_selected, n_selected2, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag, Hkl)
+    !do l = 1, n_toselect
+    !  na = numalpha_toselect(l)
+    !  nb = numbeta_toselect (l)
+    !  print *, na, nb, Hdiag(l)
+    !enddo
+
+    ! evaluate the coefficients for all the vectors
+    allocate( coeff_psi_perturb(n_toselect) )
+    ept2 = 0.d0
+    do ii = 1, n_toselect
+      !na = numalpha_toselect(ii)
+      !nb = numbeta_toselect (ii)
+      ctmp = 0.d0
+      do i = 1, n_selected
+        l = (i-1)*n_selected + i
+        ctmp += Dref(i) * Hkl(l,ii)
+      enddo
+      coeff_psi_perturb(ii) = ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) )
+      ept2            += ctmp*ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) )
+    enddo
+    E0pt2 = E0_ap + ept2
+    print *, ' perturb energy = ', E0pt2, ept2
+
+    tol_energy = 100.d0 * dabs(E0pt2-E0_old) / dabs(E0pt2)
+    E0_old     = E0pt2
+
+    deallocate( Hdiag, Hkl)
+
+    print *,  ' --- SVD --- '
+    call perform_newSVD(n_selected, n_selected2, n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref)
+
+    deallocate( coeff_psi_perturb )
+
+    write(11,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0pt2
+
+    call CPU_TIME(CPU_tend_it)
+    call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir)
+    CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it
+    W_tot_time_it   = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8)
+    speedup_it      = CPU_tot_time_it / W_tot_time_it
+    print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /,  &
+              & 3X, "CPU time     = ", 1PE10.3, " min.", /,  &
+              & 3X, "speed up     = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it
+
+  end do
+  !________________________________________________________________________________________________________
+  !________________________________________________________________________________________________________
+
+
+
+  deallocate( Uref, Vref, Dref )
+
+
+  call CPU_TIME(CPU_tend)
+  call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
+  CPU_tot_time = CPU_tend - CPU_tbeg
+  W_tot_time   = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
+  speedup      = CPU_tot_time / W_tot_time
+  print *,' ___________________________________________________________________'
+  print '(//,3X,"Execution avec ",i2," threads")',nb_taches
+  print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /,  &
+            & 3X, "CPU time     = ", 1PE10.3, " min.", /,  &
+            & 3X, "speed up     = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup
+  print *,' ___________________________________________________________________'
+
+
+end
+
+
+
+
+
+
+
+
+subroutine const_psihpsi_postsvd_H0(n_selected, n_selected2, Uref, Vref, H0)
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer, intent(in)           :: n_selected, n_selected2
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_beta_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: H0(n_selected2,n_selected2)
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: i, j, k, l, degree
+  integer                       :: ii0, jj0, ii, jj, n, m, np, mp
+  integer                       :: nn0, mm0, nn, mm, ind_gs
+  double precision              :: h12, x
+
+  double precision, allocatable :: H0_tmp(:,:) 
+
+
+  H0(:,:) = 0.d0
+
+ !$OMP PARALLEL DEFAULT(NONE)                                                                         &
+ !$OMP          PRIVATE(n,np,nn0,nn,ii0,jj0,x,m,mp,mm0,mm,ii,jj,i,j,k,l,h12,det1,det2,H0_tmp,degree)  &
+ !$OMP          SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, &
+ !$OMP                 N_int,n_selected,n_selected2,Uref,Vref,H0 )
+  allocate( H0_tmp(n_selected2,n_selected2) )
+  H0_tmp(:,:) = 0.d0
+ !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8)
+  do i = 1, n_det_alpha_unique
+    do k = 1, n_det_alpha_unique
+      det1(:,1) = psi_det_alpha_unique(:,i)
+      det2(:,1) = psi_det_alpha_unique(:,k)
+      call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+      if (degree .gt. 2) then
+        cycle
+      endif
+      do j = 1, n_det_beta_unique
+        det1(:,2) = psi_det_beta_unique(:,j)
+        do l = 1, n_det_beta_unique
+          det2(:,2) = psi_det_beta_unique(:,l)
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if (degree .gt. 2) then
+            cycle
+          endif
+          ! !!!
+          call i_H_j(det1, det2, N_int, h12)
+          ! !!!
+          ! ~~~            H0                  ~~~
+          do n  = 1, n_selected
+            nn0 = (n-1)*n_selected
+            do np = 1, n_selected
+              nn  = nn0 + np
+              x   = Uref(k,n) * Vref(l,np) * h12
+              do m  = 1, n_selected
+                mm0 = (m-1)*n_selected
+                do mp = 1, n_selected
+                  mm  = mm0 + mp
+                  H0_tmp(mm,nn) += Uref(i,m) * Vref(j,mp) * x
+                enddo
+              enddo
+            enddo
+          enddo
+          ! ~~~          ~~~~~~                ~~~
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP CRITICAL
+  do n = 1, n_selected2
+     do m = 1, n_selected2
+       H0(m,n) += H0_tmp(m,n)
+     enddo
+  enddo
+ !$OMP END CRITICAL
+  deallocate( H0_tmp )
+ !$OMP END PARALLEL
+
+  return
+end subroutine const_psihpsi_postsvd_H0
+
+
+
+
+
+subroutine diag_postsvd(n_selected, n_selected2, Dref, H0, E0, overlop, psi_postsvd )
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer, intent(in)           :: n_selected, n_selected2
+  double precision, intent(in)  :: H0(n_selected2,n_selected2)
+  double precision, intent(in)  :: Dref(n_det_beta_unique)
+  double precision, intent(out) :: E0, overlop, psi_postsvd(n_selected2)
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: i, j, k, l, degree
+  integer                       :: ii0, jj0, ii, jj, n, m, np, mp
+  integer                       :: nn0, mm0, nn, mm, ind_gs
+  double precision              :: h12, x
+
+  double precision, allocatable :: eigvec0(:,:), eigval0(:), check_ov(:)
+
+  ! diagonalize H0
+  allocate( eigvec0(n_selected2,n_selected2), eigval0(n_selected2) )
+  call lapack_diag(eigval0, eigvec0, H0, n_selected2, n_selected2)
+
+  ! get the postsvd ground state
+  allocate( check_ov(n_selected2) )
+  do l = 1, n_selected2
+    overlop = 0.d0
+    do i = 1, n_selected
+      ii      = n_selected*(i-1) + i
+      overlop = overlop + eigvec0(ii,l) * Dref(i)
+    enddo
+    check_ov(l) = dabs(overlop)
+  enddo
+  ind_gs      = MAXLOC( check_ov, DIM=1 )
+  !ind_gs      = 1
+  overlop     = check_ov(ind_gs)
+  E0          = eigval0(ind_gs)+nuclear_repulsion
+  psi_postsvd = eigvec0(:,ind_gs)
+
+  deallocate( check_ov, eigvec0, eigval0 )
+
+  return
+end subroutine diag_postsvd
+
+
+
+
+subroutine const_Hdiag_Hkl(n_selected, n_selected2, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag, Hkl)
+
+  implicit none
+
+  integer, intent(in)           :: n_selected, n_selected2, n_toselect
+  integer, intent(in)           :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_beta_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: Hdiag(n_toselect), Hkl(n_selected2,n_toselect)
+
+  integer(bit_kind)             :: det1(N_int,2)
+  integer(bit_kind)             :: det2(N_int,2)
+  integer                       :: degree
+
+  integer                       :: i, j, k, l
+  integer                       :: ii0, jj0, ii, jj, n, m, np, mp
+  double precision              :: h12, y
+
+  double precision, allocatable :: Hdiag_tmp(:), Hkl_tmp(:,:) 
+
+
+  Hdiag(:) = 0.d0
+  Hkl(:,:) = 0.d0
+
+ !$OMP PARALLEL DEFAULT(NONE)                                                                         &
+ !$OMP          PRIVATE(n,ii0,jj0,y,m,mp,ii,jj,i,j,k,l,h12,det1,det2,Hdiag_tmp,Hkl_tmp,degree)         &
+ !$OMP          SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, &
+ !$OMP                 N_int,n_selected,n_toselect,Uref,Vref,numalpha_toselect,numbeta_toselect,      &
+ !$OMP                 Hkl,Hdiag,n_selected2 )
+  allocate( Hdiag_tmp(n_toselect), Hkl_tmp(n_selected2,n_toselect) )
+  Hdiag_tmp(:) = 0.d0
+  Hkl_tmp(:,:) = 0.d0
+ !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8)
+  do i = 1, n_det_alpha_unique
+    do k = 1, n_det_alpha_unique
+      det1(:,1) = psi_det_alpha_unique(:,i)
+      det2(:,1) = psi_det_alpha_unique(:,k)
+      ! !!!
+      ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 
+      call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+      if (degree .gt. 2) then
+        cycle
+      endif
+      ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 
+      ! !!!
+      do j = 1, n_det_beta_unique
+        det1(:,2) = psi_det_beta_unique(:,j)
+        do l = 1, n_det_beta_unique
+          det2(:,2) = psi_det_beta_unique(:,l)
+          ! !!!
+          ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if (degree .gt. 2) then
+            cycle
+          endif
+          ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 
+          ! !!!
+          call i_H_j(det1, det2, N_int, h12)
+          ! ~ ~ ~               H                ~ ~ ~
+          do n = 1, n_toselect
+            ii0 = numalpha_toselect(n)
+            jj0 = numbeta_toselect (n)
+            y   = Uref(k,ii0) * Vref(l,jj0) * h12
+            ! Hdiag
+            Hdiag_tmp(n) += y * Uref(i,ii0) * Vref(j,jj0)
+            do m = 1, n_selected
+              ii = (m-1)*n_selected
+              do mp = 1, n_selected
+                jj = ii + mp
+                ! Hkl
+                Hkl_tmp(jj,n) += Uref(i,m) * Vref(j,mp) * y
+              enddo
+            enddo
+          enddo
+          ! ~ ~ ~             ! ! !              ~ ~ ~
+        enddo
+      enddo
+      ! !!!
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP CRITICAL
+  do n = 1, n_toselect
+     Hdiag(n) += Hdiag_tmp(n)
+     do m = 1, n_selected2
+       Hkl(m,n) += Hkl_tmp(m,n)
+     enddo
+  enddo
+ !$OMP END CRITICAL
+  deallocate( Hdiag_tmp,Hkl_tmp )
+ !$OMP END PARALLEL
+
+end subroutine const_Hdiag_Hkl
+
+
+
+
+
+
+subroutine perform_newSVD(n_selected, n_selected2, n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref)
+
+  USE OMP_LIB
+
+  integer, intent(in)             :: n_selected, n_toselect, n_selected2
+  integer, intent(in)             :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect)
+  double precision, intent(in)    :: coeff_psi_perturb(n_toselect)
+  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique)
+  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(inout) :: Dref(n_det_beta_unique)
+
+  integer                         :: mm, nn, i, j, ii0, ii, l, jj, na, nb
+  double precision                :: err0, err_norm, err_tmp, norm_tmp
+  double precision                :: overlopU_mat, overlopV_mat, overlopU, overlopV
+  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
+  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
+  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
+
+  mm = n_det_alpha_unique
+  nn = n_det_beta_unique
+
+  allocate( U_svd(mm,nn) , V_svd(nn,nn) , S_mat(nn,nn) )
+
+  U_svd(:,:) = Uref(:,:)
+  V_svd(:,:) = Vref(:,:)
+  S_mat(:,:) = 0.d0
+  norm_tmp   = 0.d0
+  do j  = 1, n_det_beta_unique
+    S_mat(j,j) = Dref(j)
+    norm_tmp  += S_mat(j,j)*S_mat(j,j)
+  enddo
+  do l = 1, n_toselect
+    na = numalpha_toselect(l)
+    nb = numbeta_toselect (l)
+    S_mat(na,nb) = coeff_psi_perturb(l)
+    norm_tmp    += S_mat(na,nb)*S_mat(na,nb)
+  enddo
+
+  print*, ' norm de S_mat =', norm_tmp
+  !norm_tmp = 1.d0/dsqrt(norm_tmp)
+  !do i = 1, nn 
+  !  do j = 1, nn 
+  !    S_mat(j,i) = S_mat(j,i) * norm_tmp
+  !  enddo
+  !enddo
+
+
+  ! first compute S_mat x transpose(V_svd)
+  allocate( SxVt(nn,nn) )
+  call dgemm( 'N', 'T', nn, nn, nn, 1.d0     &
+             , S_mat     , size(S_mat,1)     &
+             , V_svd     , size(V_svd,1)     &
+             , 0.d0, SxVt, size(SxVt ,1)     )
+  ! then compute U_svd x SxVt
+  allocate( A_newsvd(mm,nn) )
+  call dgemm( 'N', 'N', mm, nn, nn, 1.d0            &
+             , U_svd         , size(U_svd   ,1)     &
+             , SxVt          , size(SxVt    ,1)     &
+             , 0.d0, A_newsvd, size(A_newsvd,1)     )
+  deallocate( SxVt )
+
+  ! perform new SVD
+  allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) )
+  call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn)
+  print *, '    +++ new perturbative SVD is performed +++       '
+  allocate( V_newsvd(nn,nn) )
+  do l = 1, nn
+    do j = 1, nn
+      V_newsvd(j,l) = Vt_newsvd(l,j)
+    enddo
+  enddo
+
+  ! check SVD error
+  err0     = 0.d0
+  err_norm = 0.d0
+  do j = 1, nn
+    do i = 1, mm
+      err_tmp  = 0.d0
+      do l = 1, nn
+        err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l)
+      enddo
+      err_tmp   = A_newsvd(i,j) - err_tmp
+      err0     += err_tmp * err_tmp
+      err_norm += A_newsvd(i,j) * A_newsvd(i,j)
+    enddo
+  enddo
+  print *, ' SVD err (%)              = ', 100.d0 * dsqrt(err0/err_norm)
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+
+
+  do l = 1, nn
+    Dref(l)   = D_newsvd(l)
+    Uref(:,l) = U_newsvd(:,l)
+    Vref(:,l) = V_newsvd(:,l)
+  enddo
+  !print *, Dref(:)
+
+
+  overlopU_mat = 0.d0
+  overlopV_mat = 0.d0
+  do i = 1, nn
+    do j = 1, nn
+      overlopU = 0.d0 
+      do ii = 1, mm
+        overlopU += Uref(ii,j) * Uref(ii,i)
+      enddo
+      overlopU_mat += overlopU
+      overlopV = 0.d0 
+      do ii = 1, nn
+        overlopV += Vref(ii,j) * Vref(ii,i)
+      enddo
+      overlopV_mat += overlopV
+    enddo
+  enddo
+  print *, 'overlop U =', overlopU_mat
+  print *, 'overlop V =', overlopV_mat
+
+
+  deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd )
+
+  return
+
+end subroutine perform_newSVD
+
+
+
+
+
+subroutine perform_newpostSVD(n_selected, n_selected2, psi_postsvd, Uref, Vref, Dref)
+
+  USE OMP_LIB
+
+  integer, intent(in)             :: n_selected, n_selected2
+  double precision, intent(in)    :: psi_postsvd(n_selected2)
+  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique)
+  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(inout) :: Dref(n_det_beta_unique)
+
+  integer                         :: mm, nn, i, j, ii0, ii, l, jj, na, nb
+  double precision                :: err0, err_norm, err_tmp, norm_tmp
+  double precision                :: overlopU_mat, overlopV_mat, overlopU, overlopV
+  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
+  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
+  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
+
+  mm = n_det_alpha_unique
+  nn = n_det_beta_unique
+
+  allocate( U_svd(mm,n_selected) , V_svd(nn,n_selected) , S_mat(n_selected,n_selected) )
+
+  U_svd(:,:) = Uref(:,1:n_selected)
+  V_svd(:,:) = Vref(:,1:n_selected)
+  S_mat(:,:) = 0.d0
+  do i = 1, n_selected
+    ii = (i-1)*n_selected
+    do j = 1, n_selected
+      jj = ii + j
+      S_mat(i,j) = psi_postsvd(jj)
+    enddo
+  enddo
+
+  ! first compute S_mat x transpose(V_svd)
+  allocate( SxVt(n_selected,nn) )
+  call dgemm( 'N', 'T', n_selected, nn, n_selected, 1.d0     &
+             , S_mat     , size(S_mat,1)                     &
+             , V_svd     , size(V_svd,1)                     &
+             , 0.d0, SxVt, size(SxVt ,1)                     )
+  ! then compute U_svd x SxVt
+  allocate( A_newsvd(mm,nn) )
+  call dgemm( 'N', 'N', mm, nn, n_selected, 1.d0            &
+             , U_svd         , size(U_svd   ,1)             &
+             , SxVt          , size(SxVt    ,1)             &
+             , 0.d0, A_newsvd, size(A_newsvd,1)             )
+  deallocate( SxVt )
+
+  ! perform new SVD
+  allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) )
+  call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn)
+  print *, '    +++ new SVD is performed +++       '
+  allocate( V_newsvd(nn,nn) )
+  do l = 1, nn
+    do j = 1, nn
+      V_newsvd(j,l) = Vt_newsvd(l,j)
+    enddo
+  enddo
+
+  ! check SVD error
+  err0     = 0.d0
+  err_norm = 0.d0
+  do j = 1, nn
+    do i = 1, mm
+      err_tmp  = 0.d0
+      do l = 1, n_selected
+        err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l)
+      enddo
+      err_tmp   = A_newsvd(i,j) - err_tmp
+      err0     += err_tmp * err_tmp
+      err_norm += A_newsvd(i,j) * A_newsvd(i,j)
+    enddo
+  enddo
+  print *, ' SVD err (%) = ', 100.d0 * dsqrt(err0/err_norm)
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+
+  do l = 1, n_selected
+    Dref(l)   = D_newsvd(l)
+    Uref(:,l) = U_newsvd(:,l)
+    Vref(:,l) = V_newsvd(:,l)
+  enddo
+!  print *, Dref(:)
+
+  overlopU_mat = 0.d0
+  overlopV_mat = 0.d0
+  do i = 1, nn
+    do j = 1, nn
+      overlopU = 0.d0 
+      do ii = 1, mm
+        overlopU += Uref(ii,j) * Uref(ii,i)
+      enddo
+      overlopU_mat += overlopU
+      overlopV = 0.d0 
+      do ii = 1, nn
+        overlopV += Vref(ii,j) * Vref(ii,i)
+      enddo
+      overlopV_mat += overlopV
+    enddo
+  enddo
+  print *, 'overlop U =', overlopU_mat
+  print *, 'overlop V =', overlopV_mat
+
+
+  deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd )
+
+  return
+
+end subroutine perform_newpostSVD
+
+
--- a/devel/svdwf/buildpsi_diagSVDit_v1.irp.f
+++ b/devel/svdwf/buildpsi_diagSVDit_v1.irp.f
@ -0,0 +1,915 @@
+program buildpsi_diagSVDit_v1
+
+  implicit none
+
+  BEGIN_DOC
+  ! study efficiency for different way to build | psi >
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state
+  double precision              :: h12
+
+  integer                       :: i, j, k, l, ii, jj, nn, n, na, nb, m, ma, mb
+
+  double precision              :: norm_psi, inv_sqrt_norm_psi
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  double precision              :: E0_av, E0_ap, E0pt2
+  double precision              :: err0, err_tmp, e_tmp, E0, overlop, E0_old, tol_energy
+  double precision              :: ctmp, htmp, Ept2
+  double precision              :: E0_postsvd, overlop_postsvd
+  double precision              :: norm_coeff_psi, inv_sqrt_norm_coeff_psi
+  double precision              :: overlopU, overlopU_mat, overlopV, overlopV_mat, overlop_psi
+
+  double precision, allocatable :: H(:,:,:,:)
+  double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:)
+  double precision, allocatable :: psi_postsvd(:), coeff_psi_perturb(:)
+
+  integer                       :: it_svd, it_svd_max
+
+  integer                       :: n_TSVD, n_FSVD, n_selected, n_toselect
+  integer,          allocatable :: numalpha_selected(:), numbeta_selected(:)
+  integer,          allocatable :: numalpha_toselect(:), numbeta_toselect(:)
+
+  integer(kind=8)               :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_tbeg_step, W_tend_step, W_ir
+  real(kind=8)                  :: W_tot_time, W_tot_time_it, W_tot_time_step
+  real(kind=8)                  :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it, CPU_tbeg_step, CPU_tend_step
+  real(kind=8)                  :: CPU_tot_time, CPU_tot_time_it, CPU_tot_time_step
+  real(kind=8)                  :: speedup, speedup_it, speedup_step
+  integer                       :: nb_taches
+
+  !$OMP PARALLEL
+  nb_taches = OMP_GET_NUM_THREADS()
+  !$OMP END PARALLEL
+
+  call CPU_TIME(CPU_tbeg)
+  call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
+
+  i_state = 1
+
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call get_excitation_degree(det1,det2,degree,N_int)
+  call i_H_j(det1, det2, N_int, h12)
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det      :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+ 
+  norm_psi = 0.d0
+  do k = 1, N_det
+    norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) &
+                        * psi_bilinear_matrix_values(k,i_state)
+  enddo
+  print *, ' initial norm = ', norm_psi
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_beta_unique) )
+  allocate( Dref(n_det_beta_unique) )
+  allocate( Vref(n_det_beta_unique,n_det_beta_unique)  )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique) )
+  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1)          &
+       , n_det_alpha_unique, n_det_beta_unique)
+
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+  print *, ' ---   First SVD: ok    ---  '
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+
+  ! check Truncate SVD error
+  err0 = 0.d0
+  do j = 1, n_det_beta_unique
+    do i = 1, n_det_alpha_unique
+      err_tmp = 0.d0
+      do l = 1, n_det_beta_unique
+        err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l)
+      enddo
+      err_tmp = Aref(i,j) - err_tmp
+      err0   += err_tmp * err_tmp
+    enddo
+  enddo
+  print *, ' Full SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi)
+
+  ! ---------------------------------------------------------------------------------------
+
+  nn = n_det_beta_unique
+
+  ! ---------------------------------------------------------------------------------------
+  !                               numerote vectors 
+
+  ! Full rank
+  n_FSVD = nn * nn
+  print*, ' Full psi space rank = ', n_FSVD
+
+
+  ! Truncated rank
+  n_TSVD = 20
+  print*, ' initial psi space rank = ', n_TSVD
+
+  ! check Truncate SVD error
+  err0 = 0.d0
+  do j = 1, n_det_beta_unique
+    do i = 1, n_det_alpha_unique
+      err_tmp = 0.d0
+      do l = 1, n_TSVD
+        err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l)
+      enddo
+      err_tmp = Aref(i,j) - err_tmp
+      err0   += err_tmp * err_tmp
+    enddo
+  enddo
+  deallocate( Aref )
+  print *, ' Truncate SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi)
+
+  n_selected = n_TSVD * n_TSVD
+  allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) )
+  k = 0
+  ! first diagonal bloc
+  do i = 1, n_TSVD
+    do j = 1, n_TSVD
+      k = k + 1
+      numalpha_selected(k) = j 
+      numbeta_selected (k) = i
+    enddo
+  enddo
+  ! check size
+  if( k.ne.n_selected ) then
+    print*, ' error in numeroting: selected '
+    print*, ' k          = ', k
+    print*, ' n_selected = ', n_selected
+    stop
+  endif
+
+
+  ! perturbative space rank
+  k          = 0
+  n_toselect = n_FSVD - n_selected
+  print*, ' perturbative psi space rank = ', n_toselect
+  allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) )
+  ! nondiagonal blocs
+  do i = 1, n_TSVD
+    do j = n_TSVD+1, nn
+      k = k + 1
+      numalpha_toselect(k) = j 
+      numbeta_toselect (k) = i
+    enddo
+  enddo
+  do j = 1, n_TSVD
+    do i = n_TSVD+1, nn
+      k = k + 1
+      numalpha_toselect(k) = j 
+      numbeta_toselect (k) = i
+    enddo
+  enddo
+  ! diagonal bloc
+  do i = n_TSVD+1, nn
+    do j = n_TSVD+1, nn
+      k = k + 1
+      numalpha_toselect(k) = j 
+      numbeta_toselect (k) = i
+    enddo
+  enddo
+  ! check size
+  if( k.ne.n_toselect ) then
+    print*, ' error in numeroting: to select '
+    print*, ' k          = ', k
+    print*, ' n_toselect = ', n_toselect
+    stop
+  endif
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  !________________________________________________________________________________________________________
+  !
+  !                                   loop over SVD iterations
+  !________________________________________________________________________________________________________
+
+  E0_old     = 0.d0
+  tol_energy = 1.d0
+  it_svd     = 0 
+  it_svd_max = 10
+
+  do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) )
+
+    call CPU_TIME(CPU_tbeg_it)
+    call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) 
+
+    it_svd = it_svd + 1
+    print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +'
+    print*, '                                                                         '
+    print*, '                                                                         '
+    print*, '                                                                         '
+    print*, ' iteration', it_svd
+
+    norm_coeff_psi = 0.d0
+    do j = 1, n_TSVD
+      norm_coeff_psi += Dref(j) * Dref(j)
+    enddo
+    inv_sqrt_norm_coeff_psi = 1.d0 / dsqrt(norm_coeff_psi)
+    do j = 1, n_TSVD
+      Dref(j) = Dref(j) * inv_sqrt_norm_coeff_psi
+    enddo
+
+    allocate( H0(n_selected,n_selected) )
+    call const_psihpsi_postsvd_H0_modif(n_selected, numalpha_selected, numbeta_selected, Uref, Vref, H0)
+
+    ! avant SVD
+    E0 = 0.d0 
+    do i = 1, n_TSVD
+      ii = (i-1)*n_TSVD + i
+      do j = 1, n_TSVD
+        jj = (j-1)*n_TSVD + j
+        E0 += Dref(j) * H0(jj,ii) * Dref(i)
+      enddo
+    enddo
+    E0_av = E0 + nuclear_repulsion
+    print *,' E0 (avant SVD) =', E0_av
+    print *,  ''
+
+
+    allocate( psi_postsvd(n_selected) )
+    print *,  ' --- Diag post-SVD --- '
+    call diag_postsvd(n_TSVD, n_selected, Dref, H0, E0_postsvd, overlop_postsvd, psi_postsvd)
+    print*, ' postsvd energy  = ', E0_postsvd
+    deallocate( H0 )
+
+    ! post-SVD
+    !Dref(:) = 0.d0
+    call perform_newpostSVD(n_TSVD, n_selected, psi_postsvd, Uref, Vref, Dref)
+    deallocate( psi_postsvd )
+
+    print *,  ''
+    print *,  ''
+    print *,  ' --- Compute H --- '
+    allocate( H0(n_selected,n_selected), Hdiag(n_toselect), Hkl(n_selected,n_toselect) )
+    call const_Hdiag_Hkl_H0(n_selected, n_toselect, Uref, Vref, numalpha_selected, numbeta_selected &
+                            , numalpha_toselect, numbeta_toselect, Hdiag, Hkl, H0)
+
+    E0             = 0.d0 
+    norm_coeff_psi = 0.d0
+    do i = 1, n_TSVD
+      ii = (i-1)*n_TSVD + i
+      do j = 1, n_TSVD
+        jj = (j-1)*n_TSVD + j
+        E0 += Dref(j) * H0(jj,ii) * Dref(i)
+      enddo
+      norm_coeff_psi += Dref(i) * Dref(i)
+    enddo
+    E0_ap = E0 + nuclear_repulsion
+    print *,' E0 (apres SVD) =', E0_ap
+   
+    deallocate(H0)
+
+    print *,  ' --- Perturbation --- '
+    allocate( coeff_psi_perturb(n_toselect) )
+    ept2 = 0.d0
+    do ii = 1, n_toselect
+      ctmp = 0.d0
+      do i = 1, n_TSVD
+        l = (i-1)*n_TSVD + i
+        ctmp += Dref(i) * Hkl(l,ii)
+      enddo
+      coeff_psi_perturb(ii) = ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) )
+      ept2            += ctmp*ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) )
+    enddo
+    E0pt2 = E0_ap + ept2
+    print *, ' perturb energy = ', E0pt2, ept2
+    tol_energy = 100.d0 * dabs(E0pt2-E0_old) / dabs(E0pt2)
+    E0_old     = E0pt2
+
+    deallocate( Hdiag, Hkl)
+
+
+    print *,  ' --- SVD --- '
+    call perform_newSVD(n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref)
+
+    deallocate( coeff_psi_perturb )
+
+    write(11,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0pt2
+
+    call CPU_TIME(CPU_tend_it)
+    call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir)
+    CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it
+    W_tot_time_it   = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8)
+    speedup_it      = CPU_tot_time_it / W_tot_time_it
+    print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /,  &
+              & 3X, "CPU time     = ", 1PE10.3, " min.", /,  &
+              & 3X, "speed up     = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it
+
+  end do
+  !________________________________________________________________________________________________________
+  !________________________________________________________________________________________________________
+
+
+
+  deallocate( Uref, Vref, Dref )
+
+
+  call CPU_TIME(CPU_tend)
+  call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
+  CPU_tot_time = CPU_tend - CPU_tbeg
+  W_tot_time   = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
+  speedup      = CPU_tot_time / W_tot_time
+  print *,' ___________________________________________________________________'
+  print '(//,3X,"Execution avec ",i2," threads")',nb_taches
+  print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /,  &
+            & 3X, "CPU time     = ", 1PE10.3, " min.", /,  &
+            & 3X, "speed up     = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup
+  print *,' ___________________________________________________________________'
+
+
+end
+
+
+
+
+
+
+subroutine const_psihpsi_postsvd_H0_modif(n_selected, numalpha_selected, numbeta_selected, Uref, Vref, H0)
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer, intent(in)           :: n_selected
+  integer, intent(in)           :: numalpha_selected(n_selected), numbeta_selected(n_selected)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_beta_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: H0(n_selected,n_selected)
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: i, j, k, l, degree
+  integer                       :: n, na, nb, m , ma, mb
+  double precision, allocatable :: Htot(:,:,:,:), H1(:,:,:) 
+
+  H0(:,:)       = 0.d0
+
+ !$OMP PARALLEL DEFAULT(NONE)                                                                         &
+ !$OMP          PRIVATE(i,j,k,l,n,na,nb,m,ma,mb,det1,det2,degree)                                     &
+ !$OMP          SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, &
+ !$OMP                 N_int,n_selected,Uref,Vref,H0,Htot,H1,numalpha_selected,numbeta_selected )
+
+ !$OMP SINGLE
+  allocate( Htot(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique) )
+  Htot(:,:,:,:) = 0.d0  
+ !$OMP END SINGLE
+
+ !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,20)
+  do i = 1, n_det_alpha_unique
+    do k = 1, n_det_alpha_unique
+      det1(:,1) = psi_det_alpha_unique(:,i)
+      det2(:,1) = psi_det_alpha_unique(:,k)
+      call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+      if (degree .gt. 2) then
+        cycle
+      endif
+      do j = 1, n_det_beta_unique
+        det1(:,2) = psi_det_beta_unique(:,j)
+        do l = 1, n_det_beta_unique
+          det2(:,2) = psi_det_beta_unique(:,l)
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if (degree .gt. 2) then
+            cycle
+          endif
+          ! !!!
+          call i_H_j(det1, det2, N_int, Htot(k,l,i,j))
+          ! !!!
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP SINGLE
+  allocate( H1(n_det_alpha_unique,n_det_beta_unique,n_selected) )
+  H1(:,:,:) = 0.d0  
+ !$OMP END SINGLE
+
+ !$OMP DO
+  do n = 1, n_selected
+    na = numalpha_selected(n) 
+    nb = numbeta_selected (n) 
+    do i = 1, n_det_alpha_unique
+      do j = 1, n_det_beta_unique
+        do l = 1, n_det_beta_unique
+          do k = 1, n_det_alpha_unique
+            H1(k,l,n) += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP SINGLE
+  deallocate( Htot )
+ !$OMP END SINGLE
+
+ !$OMP DO
+  do m = 1, n_selected
+    ma = numalpha_selected(m) 
+    mb = numbeta_selected (m) 
+    do n = 1, n_selected
+      do k = 1, n_det_alpha_unique
+        do l = 1, n_det_beta_unique
+          H0(m,n) += H1(k,l,n) * Uref(k,ma) * Vref(l,mb)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP SINGLE
+  deallocate( H1 )
+ !$OMP END SINGLE
+
+ !$OMP END PARALLEL
+
+  return
+end subroutine const_psihpsi_postsvd_H0_modif
+
+
+
+
+
+
+subroutine diag_postsvd(n_TSVD, n_selected, Dref, H0, E0, overlop, psi_postsvd )
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer, intent(in)           :: n_TSVD, n_selected
+  double precision, intent(in)  :: H0(n_selected,n_selected)
+  double precision, intent(in)  :: Dref(n_det_beta_unique)
+  double precision, intent(out) :: E0, overlop, psi_postsvd(n_selected)
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: i, j, k, l, degree
+  integer                       :: ii0, jj0, ii, jj, n, m, np, mp
+  integer                       :: nn0, mm0, nn, mm, ind_gs
+  double precision              :: h12, x
+
+  double precision, allocatable :: eigvec0(:,:), eigval0(:), check_ov(:)
+
+  ! diagonalize H0
+  allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) )
+  call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected)
+
+  ! get the postsvd ground state
+  allocate( check_ov(n_selected) )
+  do l = 1, n_selected
+    overlop = 0.d0
+    do i = 1, n_TSVD
+      ii      = n_TSVD*(i-1) + i
+      overlop = overlop + eigvec0(ii,l) * Dref(i)
+    enddo
+    check_ov(l) = dabs(overlop)
+  enddo
+  ind_gs      = MAXLOC( check_ov, DIM=1 )
+  !ind_gs      = 1
+  overlop     = check_ov(ind_gs)
+  E0          = eigval0(ind_gs)+nuclear_repulsion
+  psi_postsvd = eigvec0(:,ind_gs)
+
+  deallocate( check_ov, eigvec0, eigval0 )
+
+  return
+end subroutine diag_postsvd
+
+
+
+
+
+subroutine perform_newSVD(n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref)
+
+  USE OMP_LIB
+
+  integer, intent(in)             :: n_toselect
+  integer, intent(in)             :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect)
+  double precision, intent(in)    :: coeff_psi_perturb(n_toselect)
+  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique)
+  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(inout) :: Dref(n_det_beta_unique)
+
+  integer                         :: mm, nn, i, j, ii0, ii, l, jj, na, nb
+  double precision                :: err0, err_norm, err_tmp, norm_tmp
+  double precision                :: overlopU_mat, overlopV_mat, overlopU, overlopV
+  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
+  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
+  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
+
+  mm = n_det_alpha_unique
+  nn = n_det_beta_unique
+
+  allocate( U_svd(mm,nn) , V_svd(nn,nn) , S_mat(nn,nn) )
+
+  U_svd(:,:) = Uref(:,:)
+  V_svd(:,:) = Vref(:,:)
+  S_mat(:,:) = 0.d0
+  norm_tmp   = 0.d0
+  do j  = 1, n_det_beta_unique
+    S_mat(j,j) = Dref(j)
+    norm_tmp  += S_mat(j,j)*S_mat(j,j)
+  enddo
+  do l = 1, n_toselect
+    na = numalpha_toselect(l)
+    nb = numbeta_toselect (l)
+    S_mat(na,nb) = coeff_psi_perturb(l)
+    norm_tmp    += S_mat(na,nb)*S_mat(na,nb)
+  enddo
+
+  print*, ' norm de S_mat =', norm_tmp
+  !norm_tmp = 1.d0/dsqrt(norm_tmp)
+  !do i = 1, nn 
+  !  do j = 1, nn 
+  !    S_mat(j,i) = S_mat(j,i) * norm_tmp
+  !  enddo
+  !enddo
+
+
+  ! first compute S_mat x transpose(V_svd)
+  allocate( SxVt(nn,nn) )
+  call dgemm( 'N', 'T', nn, nn, nn, 1.d0     &
+             , S_mat     , size(S_mat,1)     &
+             , V_svd     , size(V_svd,1)     &
+             , 0.d0, SxVt, size(SxVt ,1)     )
+  ! then compute U_svd x SxVt
+  allocate( A_newsvd(mm,nn) )
+  call dgemm( 'N', 'N', mm, nn, nn, 1.d0            &
+             , U_svd         , size(U_svd   ,1)     &
+             , SxVt          , size(SxVt    ,1)     &
+             , 0.d0, A_newsvd, size(A_newsvd,1)     )
+  deallocate( SxVt )
+
+  ! perform new SVD
+  allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) )
+  call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn)
+  print *, '    +++ new perturbative SVD is performed +++       '
+  allocate( V_newsvd(nn,nn) )
+  do l = 1, nn
+    do j = 1, nn
+      V_newsvd(j,l) = Vt_newsvd(l,j)
+    enddo
+  enddo
+
+  ! check SVD error
+  err0     = 0.d0
+  err_norm = 0.d0
+  do j = 1, nn
+    do i = 1, mm
+      err_tmp  = 0.d0
+      do l = 1, nn
+        err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l)
+      enddo
+      err_tmp   = A_newsvd(i,j) - err_tmp
+      err0     += err_tmp * err_tmp
+      err_norm += A_newsvd(i,j) * A_newsvd(i,j)
+    enddo
+  enddo
+  print *, ' SVD err (%)              = ', 100.d0 * dsqrt(err0/err_norm)
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+
+
+  do l = 1, nn
+    Dref(l)   = D_newsvd(l)
+    Uref(:,l) = U_newsvd(:,l)
+    Vref(:,l) = V_newsvd(:,l)
+  enddo
+  !print *, Dref(:)
+
+
+  overlopU_mat = 0.d0
+  overlopV_mat = 0.d0
+  do i = 1, nn
+    do j = 1, nn
+      overlopU = 0.d0 
+      do ii = 1, mm
+        overlopU += Uref(ii,j) * Uref(ii,i)
+      enddo
+      overlopU_mat += overlopU
+      overlopV = 0.d0 
+      do ii = 1, nn
+        overlopV += Vref(ii,j) * Vref(ii,i)
+      enddo
+      overlopV_mat += overlopV
+    enddo
+  enddo
+  print *, 'overlop U =', overlopU_mat
+  print *, 'overlop V =', overlopV_mat
+
+
+  deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd )
+
+  return
+
+end subroutine perform_newSVD
+
+
+
+
+
+subroutine perform_newpostSVD(n_TSVD, n_selected, psi_postsvd, Uref, Vref, Dref)
+
+  ! TODO: general case wherer we we don't consider the first trucated block
+  USE OMP_LIB
+
+  integer, intent(in)             :: n_TSVD, n_selected
+  double precision, intent(in)    :: psi_postsvd(n_selected)
+  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique)
+  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(inout) :: Dref(n_det_beta_unique)
+
+  integer                         :: mm, nn, i, j, ii0, ii, l, jj, na, nb
+  double precision                :: err0, err_norm, err_tmp, norm_tmp
+  double precision                :: overlopU_mat, overlopV_mat, overlopU, overlopV
+  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
+  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
+  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
+
+  mm = n_det_alpha_unique
+  nn = n_det_beta_unique
+
+  allocate( U_svd(mm,n_TSVD) , V_svd(nn,n_TSVD) , S_mat(n_TSVD,n_TSVD) )
+
+  U_svd(:,:) = Uref(:,1:n_TSVD)
+  V_svd(:,:) = Vref(:,1:n_TSVD)
+  S_mat(:,:) = 0.d0
+  do i = 1, n_TSVD
+    ii = (i-1)*n_TSVD
+    do j = 1, n_TSVD
+      jj = ii + j
+      S_mat(j,i) = psi_postsvd(jj)
+    enddo
+  enddo
+
+  ! first compute S_mat x transpose(V_svd)
+  allocate( SxVt(n_TSVD,nn) )
+  call dgemm( 'N', 'T', n_TSVD, nn, n_TSVD, 1.d0     &
+             , S_mat     , size(S_mat,1)             &
+             , V_svd     , size(V_svd,1)             &
+             , 0.d0, SxVt, size(SxVt ,1)             )
+  ! then compute U_svd x SxVt
+  allocate( A_newsvd(mm,nn) )
+  call dgemm( 'N', 'N', mm, nn, n_TSVD, 1.d0         &
+             , U_svd         , size(U_svd   ,1)      &
+             , SxVt          , size(SxVt    ,1)      &
+             , 0.d0, A_newsvd, size(A_newsvd,1)      )
+  deallocate( SxVt )
+
+  ! perform new SVD
+  allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) )
+  call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn)
+  print *, '    +++ new SVD is performed +++       '
+  allocate( V_newsvd(nn,nn) )
+  do l = 1, nn
+    do j = 1, nn
+      V_newsvd(j,l) = Vt_newsvd(l,j)
+    enddo
+  enddo
+
+  ! check SVD error
+  err0     = 0.d0
+  err_norm = 0.d0
+  do j = 1, nn
+    do i = 1, mm
+      err_tmp  = 0.d0
+      do l = 1, n_TSVD
+        err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l)
+      enddo
+      err_tmp   = A_newsvd(i,j) - err_tmp
+      err0     += err_tmp * err_tmp
+      err_norm += A_newsvd(i,j) * A_newsvd(i,j)
+    enddo
+  enddo
+  print *, ' SVD err (%) = ', 100.d0 * dsqrt(err0/err_norm)
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+
+  do l = 1, n_TSVD
+    Dref(l)   = D_newsvd(l)
+    Uref(:,l) = U_newsvd(:,l)
+    Vref(:,l) = V_newsvd(:,l)
+  enddo
+!  print *, Dref(:)
+
+  overlopU_mat = 0.d0
+  overlopV_mat = 0.d0
+  do i = 1, nn
+    do j = 1, nn
+      overlopU = 0.d0 
+      do ii = 1, mm
+        overlopU += Uref(ii,j) * Uref(ii,i)
+      enddo
+      overlopU_mat += overlopU
+      overlopV = 0.d0 
+      do ii = 1, nn
+        overlopV += Vref(ii,j) * Vref(ii,i)
+      enddo
+      overlopV_mat += overlopV
+    enddo
+  enddo
+  print *, 'overlop U =', overlopU_mat
+  print *, 'overlop V =', overlopV_mat
+
+
+  deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd )
+
+  return
+
+end subroutine perform_newpostSVD
+
+
+
+
+
+
+subroutine const_Hdiag_Hkl_H0(n_selected, n_toselect, Uref, Vref, numalpha_selected, numbeta_selected &
+                            , numalpha_toselect, numbeta_toselect, Hdiag, Hkl, H0)
+
+  implicit none
+
+  integer, intent(in)           :: n_selected, n_toselect
+  integer, intent(in)           :: numalpha_selected(n_selected), numbeta_selected(n_selected)
+  integer, intent(in)           :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_beta_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: Hdiag(n_toselect), Hkl(n_selected,n_toselect), H0(n_selected,n_selected)
+
+  integer(bit_kind)             :: det1(N_int,2)
+  integer(bit_kind)             :: det2(N_int,2)
+  integer                       :: degree
+
+  integer                       :: i, j, k, l
+  integer                       :: n, na, nb, m, ma, mb
+  double precision, allocatable :: Htot(:,:,:,:), H1(:,:,:), H2(:,:,:)
+
+  Hdiag(:) = 0.d0
+  Hkl(:,:) = 0.d0
+  H0(:,:)  = 0.d0
+
+ !$OMP PARALLEL DEFAULT(NONE)                                                                         &
+ !$OMP          PRIVATE(i,j,k,l,n,na,nb,m,ma,mb,det1,det2,degree)                                     &
+ !$OMP          SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, &
+ !$OMP                 N_int,n_selected,n_toselect,Uref,Vref,H0,Htot,H1,H2,Hdiag,Hkl,                 &
+ !$OMP                 numalpha_selected,numbeta_selected,numalpha_toselect,numbeta_toselect )
+
+ !$OMP SINGLE
+  allocate( Htot(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique) )
+  Htot(:,:,:,:) = 0.d0  
+ !$OMP END SINGLE
+
+ !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,20)
+  do i = 1, n_det_alpha_unique
+    do k = 1, n_det_alpha_unique
+      det1(:,1) = psi_det_alpha_unique(:,i)
+      det2(:,1) = psi_det_alpha_unique(:,k)
+      call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+      if (degree .gt. 2) then
+        cycle
+      endif
+      do j = 1, n_det_beta_unique
+        det1(:,2) = psi_det_beta_unique(:,j)
+        do l = 1, n_det_beta_unique
+          det2(:,2) = psi_det_beta_unique(:,l)
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if (degree .gt. 2) then
+            cycle
+          endif
+          ! !!!
+          call i_H_j(det1, det2, N_int, Htot(k,l,i,j))
+          ! !!!
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP SINGLE
+  print *, ' *** Htot is calculated *** '
+  allocate( H1(n_det_alpha_unique,n_det_beta_unique,n_selected) )
+  H1(:,:,:) = 0.d0  
+ !$OMP END SINGLE
+ !$OMP DO
+  do n = 1, n_selected
+    na = numalpha_selected(n) 
+    nb = numbeta_selected (n) 
+    do i = 1, n_det_alpha_unique
+      do j = 1, n_det_beta_unique
+        do l = 1, n_det_beta_unique
+          do k = 1, n_det_alpha_unique
+            H1(k,l,n) += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP SINGLE
+  allocate( H2(n_det_alpha_unique,n_det_beta_unique,n_toselect) )
+  H2(:,:,:) = 0.d0  
+ !$OMP END SINGLE
+ !$OMP DO
+  do n = 1, n_toselect
+    na = numalpha_toselect(n) 
+    nb = numbeta_toselect (n) 
+    do i = 1, n_det_alpha_unique
+      do j = 1, n_det_beta_unique
+        do l = 1, n_det_beta_unique
+          do k = 1, n_det_alpha_unique
+            H2(k,l,n) += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb)
+            Hdiag(n)  += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb) * Uref(k,na) * Vref(l,nb)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP SINGLE
+  deallocate( Htot )
+ !$OMP END SINGLE
+
+ !$OMP DO
+  do m = 1, n_selected
+    ma = numalpha_selected(m) 
+    mb = numbeta_selected (m) 
+    do n = 1, n_toselect
+      do k = 1, n_det_alpha_unique
+        do l = 1, n_det_beta_unique
+          Hkl(m,n) += H2(k,l,n) * Uref(k,ma) * Vref(l,mb)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP SINGLE
+  deallocate( H2 )
+ !$OMP END SINGLE
+
+ !$OMP DO
+  do m = 1, n_selected
+    ma = numalpha_selected(m) 
+    mb = numbeta_selected (m) 
+    do n = 1, n_selected
+      do k = 1, n_det_alpha_unique
+        do l = 1, n_det_beta_unique
+          H0(m,n) += H1(k,l,n) * Uref(k,ma) * Vref(l,mb)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP SINGLE
+  deallocate( H1 )
+ !$OMP END SINGLE
+
+ !$OMP END PARALLEL
+
+  return
+
+end subroutine const_Hdiag_Hkl_H0
--- a/devel/svdwf/buildpsi_diagSVDit_v2.irp.f
+++ b/devel/svdwf/buildpsi_diagSVDit_v2.irp.f
@ -0,0 +1,840 @@
+program buildpsi_diagSVDit_v2
+
+  implicit none
+
+  BEGIN_DOC
+  ! study efficiency for different way to build | psi >
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state, n_blocs
+  double precision              :: h12
+
+  integer                       :: i, j, k, l, ii, jj, nn, n, na, nb, m, ma, mb
+
+  double precision              :: norm_psi, inv_sqrt_norm_psi
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  double precision              :: E0_av, E0_ap, E0pt2
+  double precision              :: err0, err_tmp, e_tmp, E0, overlop, E0_old, tol_energy
+  double precision              :: ctmp, htmp, Ept2
+  double precision              :: E0_postsvd, overlop_postsvd
+  double precision              :: norm_coeff_psi, inv_sqrt_norm_coeff_psi
+  double precision              :: overlopU, overlopU_mat, overlopV, overlopV_mat, overlop_psi
+
+  double precision, allocatable :: H(:,:,:,:)
+  double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:)
+  double precision, allocatable :: psi_postsvd(:), coeff_psi_perturb(:)
+
+  integer                       :: it_svd, it_svd_max
+
+  integer                       :: n_TSVD, n_FSVD, n_selected, n_toselect
+  integer,          allocatable :: numalpha_selected(:), numbeta_selected(:)
+  integer,          allocatable :: numalpha_toselect(:), numbeta_toselect(:)
+
+  integer(kind=8)               :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_tbeg_step, W_tend_step, W_ir
+  real(kind=8)                  :: W_tot_time, W_tot_time_it, W_tot_time_step
+  real(kind=8)                  :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it, CPU_tbeg_step, CPU_tend_step
+  real(kind=8)                  :: CPU_tot_time, CPU_tot_time_it, CPU_tot_time_step
+  real(kind=8)                  :: speedup, speedup_it, speedup_step
+  integer                       :: nb_taches
+
+  double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:)
+
+
+  !$OMP PARALLEL
+  nb_taches = OMP_GET_NUM_THREADS()
+  !$OMP END PARALLEL
+
+  call CPU_TIME(CPU_tbeg)
+  call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
+
+  i_state = 1
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call i_H_j(det1, det2, N_int, h12)
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det      :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+ 
+  norm_psi = 0.d0
+  do k = 1, N_det
+    norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) &
+                        * psi_bilinear_matrix_values(k,i_state)
+  enddo
+  print *, ' initial norm = ', norm_psi
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) )
+  allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) )
+  allocate( Vref (n_det_beta_unique,n_det_beta_unique) )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique) )
+  call svd_s( Aref, size(Aref,1),                   &
+              Uref, size(Uref,1),                   &
+              Dref,                                 &
+              Vtref, size(Vtref,1),                 &
+              n_det_alpha_unique, n_det_beta_unique )
+
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+  print *, ' ---   First SVD: ok    ---  '
+
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+
+  ! check Full SVD error
+  err0 = 0.d0
+  do j = 1, n_det_beta_unique
+    do i = 1, n_det_alpha_unique
+      err_tmp = 0.d0
+      do l = 1, min(n_det_alpha_unique,n_det_beta_unique)
+        err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l)
+      enddo
+      err_tmp = Aref(i,j) - err_tmp
+      err0   += err_tmp * err_tmp
+    enddo
+  enddo
+  print *, ' Full SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi)
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+
+  ! ---------------------------------------------------------------------------------------
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                               numerote vectors 
+
+  ! Full rank
+  n_FSVD = n_det_alpha_unique * n_det_beta_unique
+  print*, ' Full psi space rank = ', n_FSVD
+
+  ! Truncated rank
+  n_TSVD = 15
+  print*, ' initial psi space rank = ', n_TSVD
+
+  ! check Truncate SVD error
+  err0 = 0.d0
+  do j = 1, n_det_beta_unique
+    do i = 1, n_det_alpha_unique
+      err_tmp = 0.d0
+      do l = 1, n_TSVD
+        err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l)
+      enddo
+      err_tmp = Aref(i,j) - err_tmp
+      err0   += err_tmp * err_tmp
+    enddo
+  enddo
+  deallocate( Aref )
+  print *, ' Truncate SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi)
+
+  n_selected = n_TSVD * n_TSVD
+  allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) )
+  k = 0
+  ! first diagonal bloc
+  do i = 1, n_TSVD
+    do j = 1, n_TSVD
+      k = k + 1
+      numalpha_selected(k) = j 
+      numbeta_selected (k) = i
+    enddo
+  enddo
+  ! check size
+  if( k.ne.n_selected ) then
+    print*, ' error in numeroting: selected '
+    print*, ' k          = ', k
+    print*, ' n_selected = ', n_selected
+    stop
+  endif
+
+
+  ! perturbative space rank
+  n_blocs = 2
+  k       = 0
+  if( n_blocs.eq.3  ) then
+
+    n_toselect = n_FSVD - n_selected
+    allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) )
+    ! nondiagonal blocs
+    do i = 1, n_TSVD
+      do j = n_TSVD+1, n_det_beta_unique
+        k = k + 1
+        numalpha_toselect(k) = i 
+        numbeta_toselect (k) = j
+      enddo
+    enddo
+    do i = n_TSVD+1, n_det_alpha_unique
+      do j = 1, n_TSVD
+        k = k + 1
+        numalpha_toselect(k) = i 
+        numbeta_toselect (k) = j
+      enddo
+    enddo
+    ! diagonal bloc
+    do i = n_TSVD+1, n_det_alpha_unique
+      do j = n_TSVD+1, n_det_beta_unique
+        k = k + 1
+        numalpha_toselect(k) = i 
+        numbeta_toselect (k) = j
+      enddo
+    enddo
+
+  elseif( n_blocs.eq.2  ) then
+
+    n_toselect = n_FSVD - n_selected - (n_det_alpha_unique-n_TSVD)*(n_det_beta_unique-n_TSVD)
+    allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) )
+    ! nondiagonal blocs
+    do i = 1, n_TSVD
+      do j = n_TSVD+1, n_det_beta_unique
+        k = k + 1
+        numalpha_toselect(k) = j 
+        numbeta_toselect (k) = i
+      enddo
+    enddo
+    do j = 1, n_TSVD
+      !do i = n_TSVD+1, n_det_beta_unique
+      do i = n_TSVD+1, n_det_alpha_unique
+        k = k + 1
+        numalpha_toselect(k) = j 
+        numbeta_toselect (k) = i
+      enddo
+    enddo
+
+  endif
+
+  ! check size
+  if( k.ne.n_toselect ) then
+    print*, ' error in numeroting: to select '
+    print*, ' k          = ', k
+    print*, ' n_toselect = ', n_toselect
+    stop
+  endif
+  print*, ' perturbative psi space rank = ', n_toselect
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  !________________________________________________________________________________________________________
+  !
+  !                                   loop over SVD iterations
+  !________________________________________________________________________________________________________
+
+  E0_old     = 0.d0
+  tol_energy = 1.d0
+  it_svd     = 0 
+  it_svd_max = 100
+
+  do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) )
+
+    call CPU_TIME(CPU_tbeg_it)
+    call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) 
+
+    it_svd = it_svd + 1
+    print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +'
+    print*, '                                                                         '
+    print*, '                                                                         '
+    print*, '                                                                         '
+    print*, ' iteration', it_svd
+
+    norm_coeff_psi = 0.d0
+    do j = 1, n_TSVD
+      norm_coeff_psi += Dref(j) * Dref(j)
+    enddo
+    inv_sqrt_norm_coeff_psi = 1.d0 / dsqrt(norm_coeff_psi)
+    do j = 1, n_TSVD
+      Dref(j) = Dref(j) * inv_sqrt_norm_coeff_psi
+    enddo
+
+    allocate( H0(n_selected,n_selected) )
+
+    call const_H0(n_TSVD, n_selected, Uref, Vref, H0)
+
+    E0 = 0.d0 
+    do i = 1, n_TSVD
+      ii = (i-1)*n_TSVD + i
+      do j = 1, n_TSVD
+        jj = (j-1)*n_TSVD + j
+        E0 += Dref(j) * H0(jj,ii) * Dref(i)
+      enddo
+    enddo
+    E0_av = E0 + nuclear_repulsion
+    print *,' E0 (avant SVD) =', E0_av
+
+
+    allocate( psi_postsvd(n_selected) )
+    call diag_postsvd(n_TSVD, n_selected, Dref, H0, E0_postsvd, overlop_postsvd, psi_postsvd)
+    print*, ' postsvd energy  = ', E0_postsvd
+    deallocate( H0 )
+
+    !Dref(:) = 0.d0
+    call perform_newpostSVD(n_TSVD, n_selected, psi_postsvd, Uref, Vref, Dref)
+    deallocate( psi_postsvd )
+
+    print *,  ' --- Compute H --- '
+    allocate( H0(n_selected,n_selected), Hdiag(n_toselect), Hkl(n_selected,n_toselect) )
+    call const_Hdiag_Hkl_H0(n_TSVD, n_selected, n_toselect, numalpha_selected, numbeta_selected, &
+           numalpha_toselect, numbeta_toselect, Uref, Vref, Hdiag, Hkl, H0)
+
+    E0             = 0.d0 
+    norm_coeff_psi = 0.d0
+    do i = 1, n_TSVD
+      ii = (i-1)*n_TSVD + i
+      do j = 1, n_TSVD
+        jj = (j-1)*n_TSVD + j
+        E0 += Dref(j) * H0(jj,ii) * Dref(i)
+      enddo
+      norm_coeff_psi += Dref(i) * Dref(i)
+    enddo
+    E0_ap = E0 + nuclear_repulsion
+    print *,' E0 (apres SVD) =', E0_ap
+   
+    deallocate(H0)
+
+    allocate( coeff_psi_perturb(n_toselect) )
+    ept2 = 0.d0
+    do ii = 1, n_toselect
+      ctmp = 0.d0
+      do i = 1, n_TSVD
+        l = (i-1)*n_TSVD + i
+        ctmp += Dref(i) * Hkl(l,ii)
+      enddo
+      coeff_psi_perturb(ii) = ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) )
+      ept2            += ctmp*ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) )
+    enddo
+    E0pt2 = E0_ap + ept2
+    print *, ' perturb energy = ', E0pt2, ept2
+    tol_energy = dabs(E0_ap-E0_old) 
+    E0_old     = E0_ap
+
+    deallocate( Hdiag, Hkl)
+
+
+    call perform_newSVD(n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref)
+
+    deallocate( coeff_psi_perturb )
+
+    write(n_blocs,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0pt2
+    !write(222,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0pt2
+
+    call CPU_TIME(CPU_tend_it)
+    call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir)
+    CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it
+    W_tot_time_it   = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8)
+    speedup_it      = CPU_tot_time_it / W_tot_time_it
+    print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /,  &
+              & 3X, "CPU time     = ", 1PE10.3, " min.", /,  &
+              & 3X, "speed up     = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it
+
+  end do
+  !________________________________________________________________________________________________________
+  !________________________________________________________________________________________________________
+
+
+  ! save to EZFIO
+  allocate(Uezfio(n_det_alpha_unique,n_TSVD,1), Dezfio(n_TSVD,1), Vezfio(n_det_beta_unique,n_TSVD,1))
+  Dezfio(1:n_TSVD,1) = Dref(1:n_TSVD)
+  Uezfio(1:n_det_alpha_unique,1:n_TSVD,1) = Uref(1:n_det_alpha_unique,1:n_TSVD)
+  Vezfio(1:n_det_beta_unique ,1:n_TSVD,1) = Vref(1:n_det_beta_unique ,1:n_TSVD)
+
+  !call ezfio_set_spindeterminants_n_det(N_det)
+  !call ezfio_set_spindeterminants_n_states(N_states)
+  !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique)
+  !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values)
+  !call ezfio_set_spindeterminants_n_svd_coefs(n_TSVD)
+
+  !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio)
+  !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio )
+  !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio)
+  
+  deallocate(Uezfio, Dezfio, Vezfio)
+  deallocate( Uref, Vref, Dref )
+
+
+  call CPU_TIME(CPU_tend)
+  call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
+  CPU_tot_time = CPU_tend - CPU_tbeg
+  W_tot_time   = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
+  speedup      = CPU_tot_time / W_tot_time
+  print *,' ___________________________________________________________________'
+  print '(//,3X,"Execution avec ",i2," threads")',nb_taches
+  print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /,  &
+            & 3X, "CPU time     = ", 1PE10.3, " min.", /,  &
+            & 3X, "speed up     = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup
+  print *,' ___________________________________________________________________'
+
+
+end
+
+
+
+
+
+
+
+subroutine const_H0(n_TSVD, n_selected, Uref, Vref, H0)
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer, intent(in)           :: n_TSVD, n_selected
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: H0(n_selected,n_selected)
+
+  integer                       :: i, j, k, l
+  integer                       :: n, m
+  double precision, allocatable :: H(:,:,:,:)
+
+  H0(:,:) = 0.d0
+
+  allocate( H(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique) )
+  call const_H_uv_lapack(Uref, Vref, H)
+
+ !$OMP PARALLEL DEFAULT(NONE)          &
+ !$OMP          PRIVATE(i,j,k,l,m,n)   &
+ !$OMP          SHARED(n_TSVD,H0,H)
+ !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8)
+  do i = 1, n_TSVD
+    do j = 1, n_TSVD
+      m = (i-1)*n_TSVD + j
+      do k = 1, n_TSVD
+        do l = 1, n_TSVD
+          n = (k-1)*n_TSVD + l
+          H0(n,m) = H(k,l,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate( H )
+
+  return
+end subroutine const_H0
+
+
+
+
+
+
+subroutine diag_postsvd(n_TSVD, n_selected, Dref, H0, E0, overlop, psi_postsvd )
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer, intent(in)           :: n_TSVD, n_selected
+  double precision, intent(in)  :: H0(n_selected,n_selected)
+  double precision, intent(in)  :: Dref(min(n_det_alpha_unique,n_det_beta_unique))
+  double precision, intent(out) :: E0, overlop, psi_postsvd(n_selected)
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: i, j, k, l, degree
+  integer                       :: ii0, jj0, ii, jj, n, m, np, mp
+  integer                       :: nn0, mm0, nn, mm, ind_gs
+  double precision              :: h12, x
+
+  double precision, allocatable :: eigvec0(:,:), eigval0(:), check_ov(:)
+
+  ! diagonalize H0
+  allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) )
+  call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected)
+
+  ! get the postsvd ground state
+  allocate( check_ov(n_selected) )
+  do l = 1, n_selected
+    overlop = 0.d0
+    do i = 1, n_TSVD
+      ii      = n_TSVD*(i-1) + i
+      overlop = overlop + eigvec0(ii,l) * Dref(i)
+    enddo
+    check_ov(l) = dabs(overlop)
+  enddo
+  ind_gs      = MAXLOC( check_ov, DIM=1 )
+  !ind_gs      = 1
+  overlop     = check_ov(ind_gs)
+  E0          = eigval0(ind_gs)+nuclear_repulsion
+  psi_postsvd = eigvec0(:,ind_gs)
+
+  deallocate( check_ov, eigvec0, eigval0 )
+
+  return
+end subroutine diag_postsvd
+
+
+
+
+
+subroutine perform_newSVD(n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref)
+
+  USE OMP_LIB
+
+  integer, intent(in)             :: n_toselect
+  integer, intent(in)             :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect)
+  double precision, intent(in)    :: coeff_psi_perturb(n_toselect)
+  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique))
+
+  integer                         :: mm, nn, i, j, ii0, ii, l, jj, na, nb
+  double precision                :: err0, err_norm, err_tmp, norm_tmp
+  double precision                :: overlopU_mat, overlopV_mat, overlopU, overlopV
+  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
+  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
+  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
+
+  mm = n_det_alpha_unique
+  nn = n_det_beta_unique
+
+  allocate( U_svd(mm,mm) , V_svd(nn,nn) , S_mat(mm,nn) )
+
+  U_svd(:,:) = Uref(:,:)
+  V_svd(:,:) = Vref(:,:)
+  S_mat(:,:) = 0.d0
+  norm_tmp   = 0.d0
+  do j  = 1, min(mm,nn)
+    S_mat(j,j) = Dref(j)
+    norm_tmp  += S_mat(j,j)*S_mat(j,j)
+  enddo
+  do l = 1, n_toselect
+    na = numalpha_toselect(l)
+    nb = numbeta_toselect (l)
+    S_mat(na,nb) = coeff_psi_perturb(l)
+    norm_tmp    += S_mat(na,nb)*S_mat(na,nb)
+  enddo
+
+  norm_tmp = 1.d0/dsqrt(norm_tmp)
+  do i = 1, nn 
+    do j = 1, mm 
+      S_mat(j,i) = S_mat(j,i) * norm_tmp
+    enddo
+  enddo
+
+  ! first compute S_mat x transpose(V_svd)
+  allocate( SxVt(mm,nn) )
+  call dgemm( 'N', 'T', mm, nn, nn, 1.d0     &
+             , S_mat     , size(S_mat,1)     &
+             , V_svd     , size(V_svd,1)     &
+             , 0.d0, SxVt, size(SxVt ,1)     )
+  ! then compute U_svd x SxVt
+  allocate( A_newsvd(mm,nn) )
+  call dgemm( 'N', 'N', mm, nn, mm, 1.d0            &
+             , U_svd         , size(U_svd   ,1)     &
+             , SxVt          , size(SxVt    ,1)     &
+             , 0.d0, A_newsvd, size(A_newsvd,1)     )
+  deallocate( SxVt )
+
+  ! perform new SVD
+  allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) )
+  call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn)
+  allocate( V_newsvd(nn,nn) )
+  do l = 1, nn
+    do j = 1, nn
+      V_newsvd(j,l) = Vt_newsvd(l,j)
+    enddo
+  enddo
+
+  Dref(:)         = D_newsvd(:)
+  Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm)
+  Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn)
+
+  deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd )
+
+  return
+
+end subroutine perform_newSVD
+
+
+
+
+
+
+
+subroutine perform_newpostSVD(n_TSVD, n_selected, psi_postsvd, Uref, Vref, Dref)
+
+  USE OMP_LIB
+
+  integer, intent(in)             :: n_TSVD, n_selected
+  double precision, intent(in)    :: psi_postsvd(n_selected)
+  double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique))
+
+  integer                         :: mm, nn, i, j, ii0, ii, l, jj, na, nb
+  double precision                :: err0, err_norm, err_tmp, norm_tmp
+  double precision                :: overlopU_mat, overlopV_mat, overlopU, overlopV
+  double precision, allocatable   :: S_mat(:,:), SxVt(:,:)
+  double precision, allocatable   :: U_svd(:,:), V_svd(:,:)
+  double precision, allocatable   :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
+
+  mm = n_det_alpha_unique
+  nn = n_det_beta_unique
+
+  allocate( U_svd(mm,n_TSVD) , V_svd(nn,n_TSVD) , S_mat(n_TSVD,n_TSVD) )
+
+  U_svd(1:mm,1:n_TSVD) = Uref(1:mm,1:n_TSVD)
+  V_svd(1:nn,1:n_TSVD) = Vref(1:nn,1:n_TSVD)
+  S_mat(:,:) = 0.d0
+  do i = 1, n_TSVD
+    ii = (i-1)*n_TSVD
+    do j = 1, n_TSVD
+      jj = ii + j
+      S_mat(i,j) = psi_postsvd(jj)
+    enddo
+  enddo
+
+  ! first compute S_mat x transpose(V_svd)
+  allocate( SxVt(n_TSVD,nn) )
+  call dgemm( 'N', 'T', n_TSVD, nn, n_TSVD, 1.d0     &
+             , S_mat     , size(S_mat,1)             &
+             , V_svd     , size(V_svd,1)             &
+             , 0.d0, SxVt, size(SxVt ,1)             )
+  ! then compute U_svd x SxVt
+  allocate( A_newsvd(mm,nn) )
+  call dgemm( 'N', 'N', mm, nn, n_TSVD, 1.d0         &
+             , U_svd         , size(U_svd   ,1)      &
+             , SxVt          , size(SxVt    ,1)      &
+             , 0.d0, A_newsvd, size(A_newsvd,1)      )
+  deallocate( SxVt )
+
+  ! perform new SVD
+  allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) )
+  call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn)
+  allocate( V_newsvd(nn,nn) )
+  do l = 1, nn
+    do j = 1, nn
+      V_newsvd(j,l) = Vt_newsvd(l,j)
+    enddo
+  enddo
+
+  ! check SVD error
+  err0     = 0.d0
+  err_norm = 0.d0
+  do j = 1, nn
+    do i = 1, mm
+      err_tmp  = 0.d0
+      do l = 1, n_TSVD
+        err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l)
+      enddo
+      err_tmp   = A_newsvd(i,j) - err_tmp
+      err0     += err_tmp * err_tmp
+      err_norm += A_newsvd(i,j) * A_newsvd(i,j)
+    enddo
+  enddo
+
+  Dref(1:n_TSVD)  = D_newsvd(1:n_TSVD)
+  Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm)
+  Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn)
+
+  deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd )
+
+  return
+
+end subroutine perform_newpostSVD
+
+
+
+
+
+
+subroutine const_Hdiag_Hkl_H0(n_TSVD, n_selected, n_toselect, numalpha_selected, numbeta_selected, &
+           numalpha_toselect, numbeta_toselect, Uref, Vref, Hdiag, Hkl, H0)
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer, intent(in)           :: n_TSVD, n_selected, n_toselect
+  integer, intent(in)           :: numalpha_selected(n_selected), numbeta_selected(n_selected)
+  integer, intent(in)           :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: Hdiag(n_toselect), Hkl(n_selected,n_toselect), H0(n_selected,n_selected)
+
+  integer                       :: i, j, k, l
+  integer                       :: n, na, nb, m, ma, mb
+  double precision, allocatable :: H(:,:,:,:)
+  integer(kind=8)               :: W_tbeg_step, W_tend_step, W_ir
+  real(kind=8)                  :: W_tot_time_step
+
+  Hdiag(:) = 0.d0
+  Hkl(:,:) = 0.d0
+  H0(:,:)  = 0.d0
+
+  allocate( H(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique) )
+  call const_H_uv_lapack(Uref, Vref, H)
+
+  call SYSTEM_CLOCK(COUNT=W_tbeg_step, COUNT_RATE=W_ir)
+
+ !$OMP PARALLEL DEFAULT(NONE)                                                                &
+ !$OMP          PRIVATE(i,j,k,l,n,na,nb,m,ma,mb)                                             &
+ !$OMP          SHARED(n_TSVD,n_selected,n_toselect,H0,H,Hdiag,Hkl,                          &
+ !$OMP                 numalpha_selected,numbeta_selected,numalpha_toselect,numbeta_toselect )
+
+ !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8)
+  do i = 1, n_TSVD
+    do j = 1, n_TSVD
+      m = (i-1)*n_TSVD + j
+      do k = 1, n_TSVD
+        do l = 1, n_TSVD
+          n = (k-1)*n_TSVD + l
+          H0(n,m) = H(k,l,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP DO
+  do n = 1, n_toselect
+    na = numalpha_toselect(n)
+    nb = numbeta_toselect (n)
+    ! diagonal part
+    Hdiag(n) = H(na,nb,na,nb)
+    do m = 1, n_selected
+      ma = numalpha_selected(m)
+      mb = numalpha_selected(m)
+      ! 3 blocs treated perturbatively
+      Hkl(m,n) = H(ma,mb,na,nb)
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP END PARALLEL
+
+  call SYSTEM_CLOCK(COUNT=W_tend_step, COUNT_RATE=W_ir)
+  W_tot_time_step = real(W_tend_step-W_tbeg_step, kind=8) / real(W_ir, kind=8)
+
+  deallocate( H )
+
+  return
+end subroutine const_Hdiag_Hkl_H0
+
+
+
+
+
+
+
+
+subroutine const_H_uv_lapack(Uref, Vref, H)
+
+  USE OMP_LIB
+
+  implicit none
+
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_beta_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: H(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique,n_det_beta_unique)
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: i, j, k, l, degree
+  integer                       :: ii0, jj0, ii, jj, n, m, np, mp
+  integer                       :: nn0, mm0, na, nb, mm, ind_gs
+  integer                       :: p,q,r,s
+  double precision              :: h12, x
+
+  double precision, allocatable :: H0(:,:,:,:)
+  double precision, allocatable :: H1(:,:,:,:)
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+
+  allocate( H0(na,nb,na,nb) )
+  allocate( H1(nb,na,nb,na) )
+
+  H0 = 0.d0
+  call wall_time(t0)
+ !$OMP PARALLEL DEFAULT(NONE)                                                                         &
+ !$OMP          PRIVATE(p,q,r,s,i,j,k,l,det1,det2,degree,h12)  &
+ !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, &
+ !$OMP                 N_int,Uref,Vref,H0,H1,H)
+
+ !$OMP DO
+ do l = 1, nb
+   det2(:,2) = psi_det_beta_unique(:,l)
+   do j = 1, nb
+     det1(:,2) = psi_det_beta_unique(:,j)
+     call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+     if (degree > 2) cycle
+     do k = 1, na
+       det2(:,1) = psi_det_alpha_unique(:,k)
+       do i = 1, na
+         det1(:,1) = psi_det_alpha_unique(:,i)
+         call get_excitation_degree(det1,det2,degree,N_int)
+         if ( degree > 2) cycle
+         call i_H_j(det1, det2, N_int, h12)
+         H0(i,j,k,l) = h12
+       enddo
+     enddo
+   enddo
+ enddo
+ !$OMP END DO
+
+ !$OMP END PARALLEL
+
+  call wall_time(t1)
+  ! (i,j,k,l) -> (j,k,l,p)
+  call DGEMM('T','N', nb * na * nb, na, na, &
+     1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))
+
+  ! (j,k,l,p) -> (k,l,p,q)
+  call DGEMM('T','N', na * nb * na, nb, nb, &
+     1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H0, size(H0,1)*size(H0,2)*size(H0,3))
+
+  ! (k,l,p,q) -> (l,p,q,r)
+  call DGEMM('T','N', nb * na * nb, na, na, &
+     1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))
+
+  ! (l,p,q,r) -> (p,q,r,s)
+  call DGEMM('T','N',  na * nb * na, nb, nb, &
+     1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3))
+  call wall_time(t2)
+  print *, t1-t0, t2-t1
+  double precision :: t0, t1, t2
+
+  deallocate(H1,H0)
+
+end const_H_uv_lapack
--- a/devel/svdwf/buildpsi_eff.irp.f
+++ b/devel/svdwf/buildpsi_eff.irp.f
--- a/devel/svdwf/kl_H_kl_v0.irp.f
+++ b/devel/svdwf/kl_H_kl_v0.irp.f
@ -0,0 +1,326 @@
+program kl_H_kl_v0
+
+  implicit none
+
+  BEGIN_DOC
+  ! perturbative approach to build psi_postsvd
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state
+
+  integer                       :: i, j, k, l, m, n
+  double precision              :: x, y, h12
+
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  integer                       :: rank_max
+  double precision              :: E0, overlop, Ept2
+  double precision, allocatable :: H0(:,:)
+  double precision, allocatable :: eigvec0(:,:), eigval0(:), coeff_psi(:), coeff_tmp(:)
+
+  integer                       :: ii, ia, ib
+  double precision, allocatable :: Hdiag(:), Hkl_save(:,:), Hkl_1d(:), Hkl_tmp(:,:), Hdiag_tmp(:)
+
+  integer                       :: na_new, nb_new, ind_new, ind_gs
+  double precision              :: ctmp, coeff_new
+  double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:)
+
+  double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:)
+
+  integer                       :: ibeg_alpha, ibeg_beta, iend_alpha, iend_beta
+  integer                       :: n_toselect, na_max, nb_max
+  integer,          allocatable :: numalpha_toselect(:), numbeta_toselect(:)
+
+  integer                       :: cantor_pairing_ij, cantor_pairing_new
+  integer, allocatable          :: cantor_pairing(:), cantor_pairing_tmp(:)
+
+  double precision              :: t_beg, t_end
+  integer(kind=8)               :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir
+  real(kind=8)                  :: W_tot_time, W_tot_time_it
+  integer                       :: nb_taches
+
+  !$OMP PARALLEL
+  nb_taches = OMP_GET_NUM_THREADS()
+  !$OMP END PARALLEL
+
+  call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
+
+  i_state = 1
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+  call get_excitation_degree(det1,det2,degree,N_int)
+  call i_H_j(det1, det2, N_int, h12)
+
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det      :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_alpha_unique)     )
+  allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique)      )
+
+  call cpu_time(t_beg)
+  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref &
+              , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique)
+  call cpu_time(t_end)
+  print *, " SVD is performed after (min)", (t_end-t_beg)/60.
+
+  deallocate( Aref , Dref )
+
+  allocate( Vref(n_det_beta_unique,n_det_beta_unique) )
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+
+  ibeg_alpha = 1
+  iend_alpha = n_det_alpha_unique
+  na_max     = iend_alpha - ibeg_alpha + 1
+
+  ibeg_beta = 1
+  iend_beta = n_det_beta_unique
+  nb_max    = iend_beta - ibeg_beta + 1
+
+  n_toselect = na_max * nb_max
+
+  print *, ' na_max      = ', na_max
+  print *, ' nb_max      = ', nb_max
+  print *, ' n_toselect  = ', n_toselect
+  
+
+  allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) )
+  k = 0
+  do i = ibeg_alpha, iend_alpha
+    do j = ibeg_beta, iend_beta
+      k = k + 1
+      numalpha_toselect(k) = i
+      numbeta_toselect (k) = j 
+    enddo
+  enddo
+  if( k.ne.n_toselect ) then
+    print *, " error in numbering"
+    stop
+  endif
+
+
+  allocate( Hdiag(n_toselect) )
+
+  ! get < u_k v_l | H | u_k v_l > for all vectors
+  call const_Hdiag(na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag)
+
+  open(UNIT=11, FILE="klHkl_v0.dat", ACTION="WRITE")
+    do i = 1, n_toselect
+      write(11, '(2(I5,2X), 5X, E15.7)') numalpha_toselect(i), numbeta_toselect(i), Hdiag(i)
+    enddo
+  close(11)
+
+
+  deallocate( Uref, Vref )
+  deallocate( numalpha_toselect, numbeta_toselect, Hdiag )
+
+
+  ! ***************************************************************************************************
+  ! save to ezfion
+  !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) )
+  !do l = 1, rank0
+  !  Dezfio(l,1)   = coeff_psi(l)
+  !  Uezfio(:,l,1) = U0(:,l)
+  !  Vezfio(:,l,1) = V0(:,l)
+  !enddo
+  !call ezfio_set_spindeterminants_n_det(N_det)
+  !call ezfio_set_spindeterminants_n_states(N_states)
+  !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique)
+  !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values)
+
+  !call ezfio_set_spindeterminants_n_svd_coefs(rank0)
+  !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio)
+  !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio )
+  !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio)
+  !deallocate( Uezfio, Dezfio, Vezfio )
+  ! ***************************************************************************************************
+
+
+
+  call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
+  W_tot_time   = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
+  print *, ' ___________________________________________________________________'
+  print *, ' '
+  print *, " Execution avec ", nb_taches, " threads"
+  print *, " total elapsed time (min) = ", W_tot_time/60.d0
+  print *, ' ___________________________________________________________________'
+
+
+end
+
+
+
+
+
+
+
+
+
+subroutine const_Hdiag(na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag)
+
+  implicit none
+
+  integer, intent(in)           :: n_toselect, na_max, nb_max
+  integer, intent(in)           :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: Hdiag(n_toselect)
+
+  integer(bit_kind)             :: det1(N_int,2)
+  integer(bit_kind)             :: det2(N_int,2)
+  integer                       :: degree, na, nb
+
+  integer                       :: i, j, k, l, ii, jj, m, n
+  double precision              :: h12, xtmp
+
+  double precision, allocatable :: Hmat_diag(:,:), Vt(:,:), bl1_tmp(:,:,:) 
+  double precision, allocatable :: Ut(:,:), tmp0(:,:,:) , Hmat_diag_tmp(:,:)
+
+  double precision              :: t1, t2, t3, t4
+
+  print *, ""
+  print *, " start const_Hdiag"
+  call wall_time(t1)
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+ 
+  allocate(Hmat_diag(na_max,nb_max))
+  Hmat_diag = 0.d0
+  
+  allocate( bl1_tmp(na,na,nb_max) )
+  bl1_tmp = 0.d0
+
+  allocate( Vt(nb_max,nb) )
+  do i = 1, nb
+    do n = 1, nb_max
+      Vt(n,i) = Vref(i,n)
+    enddo
+  enddo
+
+  do l = 1, nb
+    det2(:,2) = psi_det_beta_unique(:,l)
+    do j = 1, nb
+      det1(:,2) = psi_det_beta_unique(:,j)
+
+      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+      if(degree .gt. 2) cycle
+
+      do k = 1, na
+        det2(:,1) = psi_det_alpha_unique(:,k)
+        do i = 1, na
+          det1(:,1) = psi_det_alpha_unique(:,i)
+
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if(degree .gt. 2) cycle
+
+          call i_H_j(det1, det2, N_int, h12)
+          if( h12 .eq. 0.d0) cycle
+
+          do n = 1, nb_max
+            bl1_tmp(i,k,n) += h12 * Vt(n,j) * Vt(n,l)
+          enddo
+
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(Vt)
+
+  call wall_time(t2)
+  print *, " end bl1_tmp after (min) ", (t2-t1)/60.
+
+  allocate( Ut(na,na_max) )
+  Ut(1:na,1:na_max) = Uref(1:na,1:na_max)
+  allocate( tmp0(na,nb_max,na_max) )
+  call DGEMM('T', 'N', na*nb_max, na_max, na, 1.d0,    &
+             bl1_tmp, size(bl1_tmp,1), Ut, size(Ut,1), &
+             0.d0, tmp0, size(tmp0,1)*size(tmp0,2)     )
+  deallocate( bl1_tmp )
+
+  call wall_time(t3)
+  print *, " end DGEMM after (min) ", (t3-t2)/60.
+
+  do n = 1, nb_max
+    do m = 1, na_max
+      do k = 1, na
+        Hmat_diag(m,n) += tmp0(k,n,m) * Ut(k,m)
+      enddo
+    enddo
+  enddo
+
+  deallocate( tmp0 , Ut )
+
+  Hdiag(:) = 0.d0
+  do m = 1, n_toselect
+    ii = numalpha_toselect(m)
+    jj = numbeta_toselect (m)
+    Hdiag(m) = Hmat_diag(ii,jj)
+  enddo
+
+  deallocate( Hmat_diag )
+
+  call wall_time(t4)
+  print *, " end const_Hdiag after (min) ", (t4-t3)/60.
+  print *, ""
+
+
+  print *, " total time (min) ", (t4-t1)/60.
+  print *, ""
+
+  return
+end subroutine const_Hdiag
+
+
+
+
+
--- a/devel/svdwf/kl_H_kl_v1.irp.f
+++ b/devel/svdwf/kl_H_kl_v1.irp.f
@ -0,0 +1,352 @@
+program kl_H_kl_v1
+
+  implicit none
+
+  BEGIN_DOC
+  ! perturbative approach to build psi_postsvd
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state
+
+  integer                       :: i, j, k, l, m, n
+  double precision              :: x, y, h12
+
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  integer                       :: rank_max
+  double precision              :: E0, overlop, Ept2
+  double precision, allocatable :: H0(:,:)
+  double precision, allocatable :: eigvec0(:,:), eigval0(:), coeff_psi(:), coeff_tmp(:)
+
+  integer                       :: ii, ia, ib
+  double precision, allocatable :: Hdiag(:), Hkl_save(:,:), Hkl_1d(:), Hkl_tmp(:,:), Hdiag_tmp(:)
+
+  integer                       :: na_new, nb_new, ind_new, ind_gs
+  double precision              :: ctmp, coeff_new
+  double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:)
+
+  double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:)
+
+  integer                       :: ibeg_alpha, ibeg_beta, iend_alpha, iend_beta
+  integer                       :: n_toselect, na_max, nb_max
+  integer,          allocatable :: numalpha_toselect(:), numbeta_toselect(:)
+
+  integer                       :: cantor_pairing_ij, cantor_pairing_new
+  integer, allocatable          :: cantor_pairing(:), cantor_pairing_tmp(:)
+
+  double precision              :: t_beg, t_end
+  integer(kind=8)               :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir
+  real(kind=8)                  :: W_tot_time, W_tot_time_it
+  integer                       :: nb_taches
+
+  !$OMP PARALLEL
+  nb_taches = OMP_GET_NUM_THREADS()
+  !$OMP END PARALLEL
+
+  call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
+
+  i_state = 1
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+  call get_excitation_degree(det1,det2,degree,N_int)
+  call i_H_j(det1, det2, N_int, h12)
+
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det      :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_alpha_unique)     )
+  allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique)      )
+
+  call cpu_time(t_beg)
+  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref &
+              , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique)
+  call cpu_time(t_end)
+  print *, " SVD is performed after (min)", (t_end-t_beg)/60.
+
+  deallocate( Aref , Dref )
+
+  allocate( Vref(n_det_beta_unique,n_det_beta_unique) )
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+
+  ibeg_alpha = 1
+  iend_alpha = n_det_alpha_unique
+  na_max     = iend_alpha - ibeg_alpha + 1
+
+  ibeg_beta = 1
+  iend_beta = n_det_beta_unique
+  nb_max    = iend_beta - ibeg_beta + 1
+
+  n_toselect = na_max * nb_max
+
+  print *, ' na_max      = ', na_max
+  print *, ' nb_max      = ', nb_max
+  print *, ' n_toselect  = ', n_toselect
+  
+
+  allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) )
+  k = 0
+  do i = ibeg_alpha, iend_alpha
+    do j = ibeg_beta, iend_beta
+      k = k + 1
+      numalpha_toselect(k) = i
+      numbeta_toselect (k) = j 
+    enddo
+  enddo
+  if( k.ne.n_toselect ) then
+    print *, " error in numbering"
+    stop
+  endif
+
+
+  allocate( Hdiag(n_toselect) )
+
+  ! get < u_k v_l | H | u_k v_l > for all vectors
+  call const_Hdiag(na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag)
+
+  open(UNIT=11, FILE="klHkl_v1.dat", ACTION="WRITE")
+    do i = 1, n_toselect
+      write(11, '(2(I5,2X), 5X, E15.7)') numalpha_toselect(i), numbeta_toselect(i), Hdiag(i)
+    enddo
+  close(11)
+
+
+  deallocate( Uref, Vref )
+  deallocate( numalpha_toselect, numbeta_toselect, Hdiag )
+
+
+  ! ***************************************************************************************************
+  ! save to ezfion
+  !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) )
+  !do l = 1, rank0
+  !  Dezfio(l,1)   = coeff_psi(l)
+  !  Uezfio(:,l,1) = U0(:,l)
+  !  Vezfio(:,l,1) = V0(:,l)
+  !enddo
+  !call ezfio_set_spindeterminants_n_det(N_det)
+  !call ezfio_set_spindeterminants_n_states(N_states)
+  !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique)
+  !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values)
+
+  !call ezfio_set_spindeterminants_n_svd_coefs(rank0)
+  !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio)
+  !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio )
+  !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio)
+  !deallocate( Uezfio, Dezfio, Vezfio )
+  ! ***************************************************************************************************
+
+
+
+  call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
+  W_tot_time   = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
+  print *, ' ___________________________________________________________________'
+  print *, ' '
+  print *, " Execution avec ", nb_taches, " threads"
+  print *, " total elapsed time (min) = ", W_tot_time/60.d0
+  print *, ' ___________________________________________________________________'
+
+
+end
+
+
+
+
+
+
+
+
+
+subroutine const_Hdiag(na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag)
+
+  implicit none
+
+  integer, intent(in)           :: n_toselect, na_max, nb_max
+  integer, intent(in)           :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: Hdiag(n_toselect)
+
+  integer(bit_kind)             :: det1(N_int,2)
+  integer(bit_kind)             :: det2(N_int,2)
+  integer                       :: degree, na, nb
+
+  integer                       :: i, j, k, l, ii, jj, m, n
+  double precision              :: h12, xtmp
+
+  double precision, allocatable :: Hmat_diag(:,:), Vt(:,:), bl1_tmp(:,:,:) 
+  double precision, allocatable :: Ut(:,:), tmp0(:,:,:) , Hmat_diag_tmp(:,:)
+
+  double precision              :: t1, t2, t3, t4
+
+  print *, ""
+  print *, " start const_Hdiag"
+  call wall_time(t1)
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+ 
+  allocate(Hmat_diag(na_max,nb_max))
+  Hmat_diag = 0.d0
+  
+  allocate( bl1_tmp(na,na,nb_max) )
+  bl1_tmp = 0.d0
+
+  allocate( Vt(nb_max,nb) )
+  do i = 1, nb
+    do n = 1, nb_max
+      Vt(n,i) = Vref(i,n)
+    enddo
+  enddo
+
+ !$OMP PARALLEL DEFAULT(NONE)                                          &
+ !$OMP          PRIVATE(i,j,k,l,n,h12,det1,det2,degree)                &
+ !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, &
+ !$OMP                 N_int,nb_max,Vt,bl1_tmp)
+
+ !$OMP DO
+  do l = 1, nb
+    det2(:,2) = psi_det_beta_unique(:,l)
+    do j = 1, nb
+      det1(:,2) = psi_det_beta_unique(:,j)
+
+      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+      if(degree .gt. 2) cycle
+
+      do k = 1, na
+        det2(:,1) = psi_det_alpha_unique(:,k)
+        do i = 1, na
+          det1(:,1) = psi_det_alpha_unique(:,i)
+
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if(degree .gt. 2) cycle
+
+          call i_H_j(det1, det2, N_int, h12)
+          if( h12 .eq. 0.d0) cycle
+
+ !$OMP CRITICAL
+          do n = 1, nb_max
+            bl1_tmp(i,k,n) += h12 * Vt(n,j) * Vt(n,l)
+          enddo
+ !$OMP END CRITICAL
+
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate(Vt)
+
+  call wall_time(t2)
+  print *, " end bl1_tmp after (min) ", (t2-t1)/60.
+
+  allocate( Ut(na,na_max) )
+  Ut(1:na,1:na_max) = Uref(1:na,1:na_max)
+  allocate( tmp0(na,nb_max,na_max) )
+  call DGEMM('T', 'N', na*nb_max, na_max, na, 1.d0,    &
+             bl1_tmp, size(bl1_tmp,1), Ut, size(Ut,1), &
+             0.d0, tmp0, size(tmp0,1)*size(tmp0,2)     )
+  deallocate( bl1_tmp )
+
+  call wall_time(t3)
+  print *, " end DGEMM after (min) ", (t3-t2)/60.
+
+ !$OMP PARALLEL DEFAULT(NONE)                             &
+ !$OMP          PRIVATE(k,m,n,Hmat_diag_tmp)              &
+ !$OMP          SHARED(na,na_max,nb_max,Ut,tmp0,Hmat_diag)
+  allocate( Hmat_diag_tmp(na_max,nb_max) )
+  Hmat_diag_tmp = 0.d0
+ !$OMP DO
+  do n = 1, nb_max
+    do m = 1, na_max
+      do k = 1, na
+        Hmat_diag_tmp(m,n) += tmp0(k,n,m) * Ut(k,m)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP CRITICAL
+  do n = 1, nb_max
+    do m = 1, na_max
+      Hmat_diag(m,n) += Hmat_diag_tmp(m,n)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+  deallocate( Hmat_diag_tmp )
+ !$OMP END PARALLEL
+
+  deallocate( tmp0 , Ut )
+
+  Hdiag(:) = 0.d0
+  do m = 1, n_toselect
+    ii = numalpha_toselect(m)
+    jj = numbeta_toselect (m)
+    Hdiag(m) = Hmat_diag(ii,jj)
+  enddo
+
+  deallocate( Hmat_diag )
+
+  call wall_time(t4)
+  print *, " end const_Hdiag after (min) ", (t4-t3)/60.
+  print *, ""
+
+
+  print *, " total time (min) ", (t4-t1)/60.
+  print *, ""
+
+  return
+end subroutine const_Hdiag
+
+
+
+
+
--- a/devel/svdwf/kl_H_kl_v2.irp.f
+++ b/devel/svdwf/kl_H_kl_v2.irp.f
@ -0,0 +1,365 @@
+program kl_H_kl_v2
+
+  implicit none
+
+  BEGIN_DOC
+  ! perturbative approach to build psi_postsvd
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state
+
+  integer                       :: i, j, k, l, m, n
+  double precision              :: x, y, h12
+
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  integer                       :: rank_max
+  double precision              :: E0, overlop, Ept2
+  double precision, allocatable :: H0(:,:)
+  double precision, allocatable :: eigvec0(:,:), eigval0(:), coeff_psi(:), coeff_tmp(:)
+
+  integer                       :: ii, ia, ib
+  double precision, allocatable :: Hdiag(:), Hkl_save(:,:), Hkl_1d(:), Hkl_tmp(:,:), Hdiag_tmp(:)
+
+  integer                       :: na_new, nb_new, ind_new, ind_gs
+  double precision              :: ctmp, coeff_new
+  double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:)
+
+  double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:)
+
+  integer                       :: ibeg_alpha, ibeg_beta, iend_alpha, iend_beta
+  integer                       :: n_toselect, na_max, nb_max
+  integer,          allocatable :: numalpha_toselect(:), numbeta_toselect(:)
+
+  integer                       :: cantor_pairing_ij, cantor_pairing_new
+  integer, allocatable          :: cantor_pairing(:), cantor_pairing_tmp(:)
+
+  double precision              :: t_beg, t_end
+  integer(kind=8)               :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir
+  real(kind=8)                  :: W_tot_time, W_tot_time_it
+  integer                       :: nb_taches
+
+  !$OMP PARALLEL
+  nb_taches = OMP_GET_NUM_THREADS()
+  !$OMP END PARALLEL
+
+  call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
+
+  i_state = 1
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+  call get_excitation_degree(det1,det2,degree,N_int)
+  call i_H_j(det1, det2, N_int, h12)
+
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det      :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_alpha_unique)     )
+  allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique)      )
+
+  call cpu_time(t_beg)
+  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref &
+              , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique)
+  call cpu_time(t_end)
+  print *, " SVD is performed after (min)", (t_end-t_beg)/60.
+
+  deallocate( Aref , Dref )
+
+  allocate( Vref(n_det_beta_unique,n_det_beta_unique) )
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+
+  ibeg_alpha = 1
+  iend_alpha = n_det_alpha_unique
+  na_max     = iend_alpha - ibeg_alpha + 1
+
+  ibeg_beta = 1
+  iend_beta = n_det_beta_unique
+  nb_max    = iend_beta - ibeg_beta + 1
+
+  n_toselect = na_max * nb_max
+
+  print *, ' na_max      = ', na_max
+  print *, ' nb_max      = ', nb_max
+  print *, ' n_toselect  = ', n_toselect
+  
+
+  allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) )
+  k = 0
+  do i = ibeg_alpha, iend_alpha
+    do j = ibeg_beta, iend_beta
+      k = k + 1
+      numalpha_toselect(k) = i
+      numbeta_toselect (k) = j 
+    enddo
+  enddo
+  if( k.ne.n_toselect ) then
+    print *, " error in numbering"
+    stop
+  endif
+
+
+  allocate( Hdiag(n_toselect) )
+
+  ! get < u_k v_l | H | u_k v_l > for all vectors
+  call const_Hdiag(na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag)
+
+  open( UNIT=11, FILE="klHkl_v2.dat", ACTION="WRITE")
+    do i = 1, n_toselect
+      write(11, '(2(I5,2X), 5X, E15.7)') numalpha_toselect(i), numbeta_toselect(i), Hdiag(i)
+    enddo
+  close(11)
+
+
+  deallocate( Uref, Vref )
+  deallocate( numalpha_toselect, numbeta_toselect, Hdiag )
+
+
+  ! ***************************************************************************************************
+  ! save to ezfion
+  !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) )
+  !do l = 1, rank0
+  !  Dezfio(l,1)   = coeff_psi(l)
+  !  Uezfio(:,l,1) = U0(:,l)
+  !  Vezfio(:,l,1) = V0(:,l)
+  !enddo
+  !call ezfio_set_spindeterminants_n_det(N_det)
+  !call ezfio_set_spindeterminants_n_states(N_states)
+  !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique)
+  !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values)
+
+  !call ezfio_set_spindeterminants_n_svd_coefs(rank0)
+  !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio)
+  !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio )
+  !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio)
+  !deallocate( Uezfio, Dezfio, Vezfio )
+  ! ***************************************************************************************************
+
+
+
+  call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
+  W_tot_time   = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
+  print *, ' ___________________________________________________________________'
+  print *, ' '
+  print *, " Execution avec ", nb_taches, " threads"
+  print *, " total elapsed time (min) = ", W_tot_time/60.d0
+  print *, ' ___________________________________________________________________'
+
+
+end
+
+
+
+
+
+
+
+
+
+subroutine const_Hdiag(na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag)
+
+  implicit none
+
+  integer, intent(in)           :: n_toselect, na_max, nb_max
+  integer, intent(in)           :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: Hdiag(n_toselect)
+
+  integer(bit_kind)             :: det1(N_int,2)
+  integer(bit_kind)             :: det2(N_int,2)
+  integer                       :: degree, na, nb
+
+  integer                       :: i, j, k, l, ii, jj, m, n
+  double precision              :: h12, xtmp
+
+  double precision, allocatable :: Hmat_diag(:,:), Vt(:,:), bl1_tmp(:,:,:) 
+  double precision, allocatable :: Ut(:,:), tmp0(:,:,:) , Hmat_diag_tmp(:,:)
+
+  double precision              :: t1, t2, t3, t4
+
+  print *, ""
+  print *, " start const_Hdiag"
+  call wall_time(t1)
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+ 
+  allocate(Hmat_diag(na_max,nb_max))
+  Hmat_diag = 0.d0
+  
+  allocate( bl1_tmp(na,na,nb_max) )
+  bl1_tmp = 0.d0
+
+  allocate( Vt(nb_max,nb) )
+  do i = 1, nb
+    do n = 1, nb_max
+      Vt(n,i) = Vref(i,n)
+    enddo
+  enddo
+
+ !$OMP PARALLEL DEFAULT(NONE)                                          &
+ !$OMP          PRIVATE(i,j,k,l,n,h12,det1,det2,degree,tmp0)           &
+ !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, &
+ !$OMP                 N_int,nb_max,Vt,bl1_tmp)
+
+  allocate( tmp0(na,na,nb_max) )
+  tmp0 = 0.d0
+
+ !$OMP DO
+  do l = 1, nb
+    det2(:,2) = psi_det_beta_unique(:,l)
+    do j = 1, nb
+      det1(:,2) = psi_det_beta_unique(:,j)
+
+      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+      if(degree .gt. 2) cycle
+
+      do k = 1, na
+        det2(:,1) = psi_det_alpha_unique(:,k)
+        do i = 1, na
+          det1(:,1) = psi_det_alpha_unique(:,i)
+
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if(degree .gt. 2) cycle
+
+          call i_H_j(det1, det2, N_int, h12)
+          if( h12 .eq. 0.d0) cycle
+
+          do n = 1, nb_max
+            tmp0(i,k,n) += h12 * Vt(n,j) * Vt(n,l)
+          enddo
+
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP CRITICAL
+  do n = 1, nb_max
+    do k = 1, na
+      do i = 1, na
+        bl1_tmp(i,k,n) += tmp0(i,k,n)
+      enddo
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate( tmp0 )
+ !$OMP END PARALLEL
+
+  deallocate(Vt)
+
+  call wall_time(t2)
+  print *, " end bl1_tmp after (min) ", (t2-t1)/60.
+
+  allocate( Ut(na,na_max) )
+  Ut(1:na,1:na_max) = Uref(1:na,1:na_max)
+  allocate( tmp0(na,nb_max,na_max) )
+  call DGEMM('T', 'N', na*nb_max, na_max, na, 1.d0,    &
+             bl1_tmp, size(bl1_tmp,1), Ut, size(Ut,1), &
+             0.d0, tmp0, size(tmp0,1)*size(tmp0,2)     )
+  deallocate( bl1_tmp )
+
+  call wall_time(t3)
+  print *, " end DGEMM after (min) ", (t3-t2)/60.
+
+ !$OMP PARALLEL DEFAULT(NONE)                             &
+ !$OMP          PRIVATE(k,m,n,Hmat_diag_tmp)              &
+ !$OMP          SHARED(na,na_max,nb_max,Ut,tmp0,Hmat_diag)
+  allocate( Hmat_diag_tmp(na_max,nb_max) )
+  Hmat_diag_tmp = 0.d0
+ !$OMP DO
+  do n = 1, nb_max
+    do m = 1, na_max
+      do k = 1, na
+        Hmat_diag_tmp(m,n) += tmp0(k,n,m) * Ut(k,m)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP CRITICAL
+  do n = 1, nb_max
+    do m = 1, na_max
+      Hmat_diag(m,n) += Hmat_diag_tmp(m,n)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+  deallocate( Hmat_diag_tmp )
+ !$OMP END PARALLEL
+
+  deallocate( tmp0 , Ut )
+
+  Hdiag(:) = 0.d0
+  do m = 1, n_toselect
+    ii = numalpha_toselect(m)
+    jj = numbeta_toselect (m)
+    Hdiag(m) = Hmat_diag(ii,jj)
+  enddo
+
+  deallocate( Hmat_diag )
+
+  call wall_time(t4)
+  print *, " end const_Hdiag after (min) ", (t4-t3)/60.
+  print *, ""
+
+
+  print *, " total time (min) ", (t4-t1)/60.
+  print *, ""
+
+  return
+end subroutine const_Hdiag
+
+
+
+
+
--- a/devel/svdwf/linear_algebra.irp.f
+++ b/devel/svdwf/linear_algebra.irp.f
@ -0,0 +1,197 @@
+subroutine svd_s(A, LDA, U, LDU, D, Vt, LDVt, m, n)
+  implicit none
+  BEGIN_DOC
+  ! !!!
+  ! DGESVD computes the singular value decomposition (SVD) of a real
+  ! M-by-N matrix A, optionally computing the left and/or right singular
+  ! vectors. The SVD is written:
+  !      A = U * SIGMA * transpose(V)
+  ! where SIGMA is an M-by-N matrix which is zero except for its
+  ! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
+  ! V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
+  ! are the singular values of A; they are real and non-negative, and
+  ! are returned in descending order.  The first min(m,n) columns of
+  ! U and V are the left and right singular vectors of A.
+  !
+  ! Note that the routine returns V**T, not V.
+  ! !!!
+  END_DOC
+
+  integer, intent(in)             :: LDA, LDU, LDVt, m, n
+  double precision, intent(in)    :: A(LDA,n)
+  double precision, intent(out)   :: U(LDU,m), Vt(LDVt,n), D(min(m,n))
+  double precision,allocatable    :: work(:), A_tmp(:,:)
+  integer                         :: info, lwork, i, j, k
+
+
+   allocate (A_tmp(LDA,n))
+   do k=1,n
+     do i=1,m
+       !A_tmp(i,k) = A(i,k) + 1d-16
+       A_tmp(i,k) = A(i,k)
+     enddo
+   enddo
+ 
+   ! Find optimal size for temp arrays
+   allocate(work(1))
+   lwork = -1
+   ! 'A': all M columns of U are returned in array U
+   ! 'A': all N rows of V**T are returned in the array VT
+   call dgesvd('A', 'A', m, n, A_tmp, LDA, D, U, LDU, Vt, LDVt, work, lwork, info)
+   ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+   if( info.ne.0 ) then
+     print *, ' problem in first call DGESVD !!!!'
+     print *, ' info = ', info
+     print *, '  < 0 : if INFO = -i, the i-th argument had an illegal value.'
+     print *, '  > 0 : if DBDSQR did not converge, INFO specifies how many  '
+     print *, '        superdiagonals of an intermediate bidiagonal form B  '
+     print *, '        did not converge to zero. See the description of WORK'
+     print *, '        above for details.                                   '
+     stop
+   endif
+   lwork = max(int(work(1)), 5*MIN(M,N))
+   deallocate(work)
+
+   allocate(work(lwork))
+
+   call dgesvd('A', 'A', m, n, A_tmp, LDA, D, U, LDU, Vt, LDVt, work, lwork, info)
+   if( info.ne.0 ) then
+     print *, ' problem in second call DGESVD !!!!'
+     print *, ' info = ', info
+     print *, '  < 0 : if INFO = -i, the i-th argument had an illegal value.'
+     print *, '  > 0 : if DBDSQR did not converge, INFO specifies how many  '
+     print *, '        superdiagonals of an intermediate bidiagonal form B  '
+     print *, '        did not converge to zero. See the description of WORK'
+     print *, '        above for details.                                   '
+     stop
+   endif
+
+   deallocate(A_tmp,work)
+ 
+   !do j=1, m
+   !  do i=1, LDU
+   !    if (dabs(U(i,j)) < 1.d-14)  U(i,j) = 0.d0
+   !  enddo
+   !enddo
+   !do j = 1, n
+   !  do i = 1, LDVt
+   !    if (dabs(Vt(i,j)) < 1.d-14) Vt(i,j) = 0.d0
+   !  enddo
+   !enddo
+ 
+end
+
+
+
+subroutine svd_s2(A, LDA, U, LDU, D, Vt, LDVt, m, n)
+  implicit none
+
+  integer, intent(in)             :: LDA, LDU, LDVt, m, n
+  double precision, intent(in)    :: A(LDA,n)
+  double precision, intent(out)   :: U(LDU,min(m,n)), Vt(LDVt,n), D(min(m,n))
+  double precision,allocatable    :: work(:), A_tmp(:,:)
+  integer                         :: info, lwork, i, j, k
+
+
+   allocate (A_tmp(LDA,n))
+   do k=1,n
+     do i=1,m
+       A_tmp(i,k) = A(i,k)
+     enddo
+   enddo
+ 
+   ! Find optimal size for temp arrays
+   allocate(work(1))
+   lwork = -1
+   ! 'A': all M columns of U are returned in array U
+   ! 'A': all N rows of V**T are returned in the array VT
+   call dgesvd('A', 'S', m, n, A_tmp, LDA, D, U, LDU, Vt, LDVt, work, lwork, info)
+   ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+   if( info.ne.0 ) then
+     print *, ' problem in first call DGESVD !!!!'
+     stop
+   endif
+   lwork = max(int(work(1)), 5*MIN(M,N))
+   deallocate(work)
+
+   allocate(work(lwork))
+
+   call dgesvd('A', 'S', m, n, A_tmp, LDA, D, U, LDU, Vt, LDVt, work, lwork, info)
+   if( info.ne.0 ) then
+     print *, ' problem in second call DGESVD !!!!'
+     stop
+   endif
+
+   deallocate(A_tmp,work)
+ 
+   do j=1, min(m,n)
+     do i=1, m
+       if (dabs(U(i,j)) < 1.d-14)  U(i,j) = 0.d0
+     enddo
+   enddo
+   do j = 1, n
+     do i = 1, LDVt
+       if (dabs(Vt(i,j)) < 1.d-14) Vt(i,j) = 0.d0
+     enddo
+   enddo
+ 
+end
+
+
+
+subroutine my_ortho_qr(A,LDA,m,n)
+  implicit none
+  BEGIN_DOC
+  ! Orthogonalization using Q.R factorization
+  !
+  ! A : matrix to orthogonalize
+  !
+  ! LDA : leftmost dimension of A
+  !
+  ! m : Number of rows of A
+  !
+  ! n : Number of columns of A
+  !
+  END_DOC
+  integer, intent(in)             :: m, n, LDA
+  double precision, intent(inout) :: A(LDA,n)
+  integer                         :: LWORK, INFO, nTAU, ii, jj
+  double precision, allocatable   :: TAU(:), WORK(:)
+  double precision                :: Adorgqr(LDA,n)
+
+  allocate (TAU(min(m,n)), WORK(1))
+
+  LWORK=-1
+  call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
+  ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+  LWORK=max(n,int(WORK(1)))
+
+  deallocate(WORK)
+  allocate(WORK(LWORK))
+  call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO )
+  if(INFO.ne.0 ) then
+   print*, 'problem in DGEQRF'
+  endif
+
+  nTAU = size(TAU)
+  do jj = 1, n
+    do ii = 1, LDA
+      Adorgqr(ii,jj) = A(ii,jj)
+    enddo
+  enddo
+
+  LWORK=-1
+  call dorgqr(m, n, nTAU, Adorgqr, LDA, TAU, WORK, LWORK, INFO)
+  ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+  LWORK=max(n,int(WORK(1)))
+
+  deallocate(WORK)
+  allocate(WORK(LWORK))
+  call dorgqr(m, n, nTAU, A, LDA, TAU, WORK, LWORK, INFO)
+  if(INFO.ne.0 ) then
+   print*, 'problem in DORGQR'
+  endif
+
+
+  deallocate(WORK,TAU)
+end
--- a/devel/svdwf/perform_RSVD.py
+++ b/devel/svdwf/perform_RSVD.py
@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+
+import os, sys
+
+#QP_PATH=os.environ["QMCCHEM_PATH"]
+#sys.path.insert(0,QMCCHEM_PATH+"/EZFIO/Python/")
+
+import scipy
+from   scipy import linalg
+
+from   ezfio import ezfio
+from   datetime import datetime
+import numpy as np
+import time 
+
+
+
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+def get_Aref():
+
+    Aref = np.zeros( (n_alpha, n_beta) )
+    for k in range(n_det):
+        i         = A_rows[k] - 1
+        j         = A_cols[k] - 1
+        Aref[i,j] = A_vals[0][k]
+
+    return( Aref )
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+def powit_RSVD(X, n_TSVD, nb_powit, nb_oversamp):
+
+    print(" --- begin powit_RSVD --- ")
+    print(" n_TSVD = {}".format(n_TSVD))
+    print(" pow it = {} & nb oversampling = {}".
+          format(nb_powit,nb_oversamp))
+
+    G  = np.random.randn(X.shape[1], n_TSVD+nb_oversamp)
+    Q  = QR_fact( np.dot(X,G) )
+
+    for i in range(nb_powit):
+        ti = time.time()
+        print(" start pow it = {}".format(i))
+
+        Q = QR_fact( np.dot(X.T,Q) )
+        Q = QR_fact( np.dot(X,Q) )
+
+        tf = time.time()
+        dt = (tf-ti)/60.
+        print(" end pow it = {} after {} min".format(i,dt))
+
+    Y = np.dot(Q.T,X)
+
+    U, S, VT = np.linalg.svd(Y, full_matrices=1)
+    U = np.dot(Q,U)
+
+    print( " --- end powit_RSVD --- \n")
+    return U, S, VT
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+def QR_fact(X):
+
+    Q, _ = linalg.qr(X, mode="full")
+    #Q,R      = np.linalg.qr(X, mode="complete")
+    #D        = np.diag( np.sign( np.diag(R) ) )
+    Qunique  = Q #np.dot(Q,D)
+    #Runique = np.dot(D,R)
+
+    return(Qunique)
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+def TSVD_save_EZFIO():
+
+    U_toEZFIO = np.zeros( ( 1, U.shape[1], U.shape[0] ) )
+    V_toEZFIO = np.zeros( ( 1, V.shape[1], V.shape[0] ) )
+    U_toEZFIO[0,:,:] = U_TSVD.T
+    V_toEZFIO[0,:,:] = V_TSVD.T
+
+    ezfio.set_spindeterminants_n_svd_coefs( n_TSVD )
+    ezfio.set_spindeterminants_psi_svd_alpha( U_toEZFIO )
+    ezfio.set_spindeterminants_psi_svd_beta ( V_toEZFIO )
+    ezfio.set_spindeterminants_psi_svd_coefs( S_RSVD )
+
+    print(' SVD vectors & coeff are saved to EZFIO ')
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+
+
+if __name__ == "__main__":
+
+
+    print("")
+    print(" Today's date:", datetime.now() )
+
+    # EZFIO file
+    #EZFIO_file = "/home/aammar/qp2/src/svdwf/h2o_work/FN_test/cc_pCVDZ/h2o_dz"
+    EZFIO_file = "/home/aammar/qp2/src/svdwf/h2o_work/FN_test/cipsi_calcul/h2o_dz_fci"
+    ezfio.set_file(EZFIO_file)
+    print(" EZFIO = {}\n".format(EZFIO_file))
+
+    #read_wf = True
+    #ezfio.read_wf = True
+    #TOUCH read_wf
+
+
+    n_det = ezfio.get_spindeterminants_n_det()
+    print(' n_det = {}'.format(n_det))
+
+    n_alpha = ezfio.get_spindeterminants_n_det_alpha()
+    n_beta  = ezfio.get_spindeterminants_n_det_beta()
+    print(' matrix dimensions = {} x {} = {} \n'.format(n_alpha, n_beta, n_alpha*n_beta))
+
+    A_rows = np.array(ezfio.get_spindeterminants_psi_coef_matrix_rows())
+    A_cols = np.array(ezfio.get_spindeterminants_psi_coef_matrix_columns())
+    A_vals = np.array(ezfio.get_spindeterminants_psi_coef_matrix_values())
+    Aref   = get_Aref()
+    A_norm = np.linalg.norm(Aref, ord='fro')
+
+
+    npow        = 15
+    nb_oversamp = 10
+    n_TSVD      = 100 #min(n_alpha,n_beta)
+
+    t_beg = time.time()
+    U, S_RSVD, Vt = powit_RSVD(Aref, n_TSVD, npow, nb_oversamp)
+    print(' powit_RSVD time = {}\n'.format((time.time()-t_beg)/60.))
+    
+    S_mat = np.zeros((n_alpha,n_beta))
+    for i in range(n_TSVD):
+        S_mat[i,i] = S_RSVD[i]
+    err_SVD = 100. * np.linalg.norm( Aref - np.dot(U,np.dot(S_mat,Vt)), ord="fro") / A_norm
+    print(' powit_RSVD error (%) = {} \n'.format(err_SVD))
+#______________________________________________________________________________________________________________________
--- a/devel/svdwf/psiSVD_naiv1by1_v1.irp.f
+++ b/devel/svdwf/psiSVD_naiv1by1_v1.irp.f
@ -0,0 +1,527 @@
+program psiSVD_naiv1by1_v1
+
+  implicit none
+
+  BEGIN_DOC
+  ! perturbative approach to build psi_postsvd
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state
+
+  integer                       :: i, j, k, l, m, n
+  double precision              :: x, y, h12
+
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  integer                       :: rank_max
+  double precision              :: E0, overlop, Ept2, Em, norm
+  double precision, allocatable :: H0(:,:)
+  double precision, allocatable :: eigvec0(:,:), eigval0(:), coeff_psi(:), coeff_tmp(:)
+
+  integer                       :: ii, jj, ia, ib, ja, jb
+  double precision, allocatable :: Hdiag(:), H0_1d(:), H0_tmp(:,:)
+
+  integer                       :: na_new, nb_new, ind_new, ind_gs
+  double precision              :: ctmp, coeff_new
+  double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:)
+
+  double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:)
+
+  integer                       :: n_selected, n_toselect, n_tmp, na_max, nb_max
+  integer,          allocatable :: numalpha_selected(:), numbeta_selected(:)
+  integer,          allocatable :: numalpha_tmp(:), numbeta_tmp(:)
+
+  double precision              :: t_beg, t_end
+  integer(kind=8)               :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir
+  real(kind=8)                  :: W_tot_time, W_tot_time_it
+  integer                       :: nb_taches
+
+  !$OMP PARALLEL
+  nb_taches = OMP_GET_NUM_THREADS()
+  !$OMP END PARALLEL
+
+  call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
+
+  i_state = 1
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+  call get_excitation_degree(det1,det2,degree,N_int)
+  call i_H_j(det1, det2, N_int, h12)
+
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det      :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_alpha_unique)     )
+  allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique)      )
+  call cpu_time(t_beg)
+  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref &
+              , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique)
+  call cpu_time(t_end)
+  print *, " SVD is performed after (min)", (t_end-t_beg)/60.
+
+  allocate( Vref(n_det_beta_unique,n_det_beta_unique) )
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+  deallocate( Aref )
+
+  ! ---------------------------------------------------------------------------------------
+
+  !  ***   PARAMETERS   ***  !
+  na_max = n_det_alpha_unique
+  nb_max = n_det_beta_unique
+  !  ***     *****      ***  !
+
+  print *, ' na_max  = ', na_max
+  print *, ' nb_max  = ', nb_max
+
+  ! ---------------------------------------------------------------------------------------
+  !                            initial wavefunction: psi_0
+
+  n_selected = 1
+  allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) )
+
+  numalpha_selected(1) = 1
+  numbeta_selected (1) = 1
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !         construnc the initial basis to select phi_1 from the FSVD
+
+  n_toselect = min(na_max,nb_max) - n_selected
+  print *, ' toselect = ', n_toselect
+  print *, ' to trun  = ', n_det_alpha_unique*n_det_beta_unique - na_max*nb_max
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                        read < u_k v_l | H | u_k v_l > for all vectors 
+
+  allocate( Hdiag(n_toselect) , H0(n_selected,n_selected) )
+ 
+  n_tmp = n_det_alpha_unique * n_det_beta_unique - 1
+
+  open( unit=11, FILE="klHkl_v1.dat", ACTION="READ")
+  !open( unit=11, FILE="klHkl_v2.dat", ACTION="READ")
+
+    read(11,*) i, i, E0
+    H0(1,1) = E0
+
+    do i = 1, n_tmp
+      read(11,*) ia, ib, ctmp 
+      if( ia .eq. ib ) then
+        ii        = ia - 1
+        Hdiag(ii) = ctmp
+        !print *, ia, ib , Hdiag(ia-1)
+      endif
+    enddo
+
+  close(11)
+
+  ! ---------------------------------------------------------------------------------------
+
+  E0 = E0 + nuclear_repulsion
+  Em = E0 
+  print*, ' space dimen     = ', n_selected
+  print*, ' ground state Em = ', Em
+  print*, ' ground state E0 = ', E0
+
+  na_new  = 1
+  nb_new  = 1
+  ind_new = 0
+
+  !________________________________________________________________________________________________________
+  !
+  !                              increase the size of psi0 iteratively
+  !________________________________________________________________________________________________________
+
+  !  ***   PARAMETERS   ***  !
+  rank_max = min( na_max , nb_max ) - 1
+  !  ***     *****      ***  !
+
+  if( rank_max .gt. n_toselect ) then
+    print *, " rank_max should be less then n_toselect"
+    stop
+  endif
+
+
+  do while( n_selected .lt. rank_max )
+
+    call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) 
+
+    print*, '                         '
+    print*, '       new iteration     '
+
+    if( n_toselect .lt. 1 ) then
+
+      print*, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+      print*, ' no more vectors to construct a new basis '
+      print*, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+      stop
+
+    else
+
+      ! ---------------------------------------------------------------------------------------
+      !                                select a new vector
+
+      na_new  += 1 
+      nb_new  += 1
+      ind_new += 1
+      print *, ' best vector', na_new, nb_new
+
+      ! < psi_old | H | delta_psi > 
+      allocate( H0_1d(n_selected) )
+      call const_H0_1d(na_new, nb_new, na_max, nb_max, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0_1d)
+
+      ! ---------------------------------------------------------------------------------------
+
+
+
+      ! ---------------------------------------------------------------------------------------
+      !                                      new psi
+
+      allocate( numalpha_tmp(n_selected), numbeta_tmp(n_selected) )
+      allocate( H0_tmp(n_selected,n_selected)  )
+
+      numalpha_tmp(:) = numalpha_selected(:)
+      numbeta_tmp (:) = numbeta_selected (:)
+      H0_tmp    (:,:) = H0             (:,:)
+
+      deallocate( numalpha_selected, numbeta_selected, H0 )
+
+      n_tmp      = n_selected
+      n_selected = n_selected + 1
+
+      allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) )
+      allocate( H0(n_selected,n_selected)  )
+      H0(:,:) = 0.d0
+
+      do l = 1, n_tmp
+        numalpha_selected(l) = numalpha_tmp(l)
+        numbeta_selected (l) = numbeta_tmp (l)
+      enddo
+      H0(1:n_tmp,1:n_tmp) = H0_tmp(1:n_tmp,1:n_tmp)
+
+      deallocate( numalpha_tmp, numbeta_tmp, H0_tmp )
+
+      numalpha_selected(n_selected) = na_new
+      numbeta_selected (n_selected) = nb_new
+
+      H0(1:n_tmp,n_selected) = H0_1d(1:n_tmp)
+      H0(n_selected,1:n_tmp) = H0_1d(1:n_tmp)
+      deallocate( H0_1d )
+      H0(n_selected,n_selected) = Hdiag(ind_new) 
+
+      ! ---------------------------------------------------------------------------------------
+
+
+
+      ! ---------------------------------------------------------------------------------------
+      !                                  energy without diag
+
+      ! < psi | psi > 
+      norm = 0.d0
+      do j = 1, n_selected
+        ja = numalpha_selected(j)
+        jb = numbeta_selected (j)
+        if(ja.eq.jb)  norm = norm + Dref(ja)*Dref(jb)
+      enddo
+
+      ! < psi | H | psi > 
+      Em = 0.d0
+      do j = 1, n_selected
+        ja = numalpha_selected(j)
+        jb = numbeta_selected (j)
+        if(ja.eq.jb) then
+          do i = 1, n_selected
+            ia = numalpha_selected(i)
+            ib = numbeta_selected (i)
+            if(ia.eq.ib)  Em = Em + Dref(ja) * H0(j,i) * Dref(ia)
+          enddo
+        endif
+      enddo
+
+      ! Em = < psi | H | psi > / < psi | psi > 
+      Em = Em / norm + nuclear_repulsion
+
+      ! ---------------------------------------------------------------------------------------
+
+
+
+      ! ---------------------------------------------------------------------------------------
+      !                                  energy with diag
+
+      allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) )
+      call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected)
+
+      ! get the postsvd ground state
+      allocate( check_ov(n_selected) )
+      do l = 1, n_selected
+        overlop = 0.d0   
+        do i = 1, n_selected
+          ia = numalpha_selected(i)
+          ib = numbeta_selected (i)
+          if( ia .eq. ib ) overlop = overlop + eigvec0(i,l) * Dref(ia)
+        enddo
+        check_ov(l) = dabs(overlop)
+      enddo
+      ind_gs       = MAXLOC( check_ov, DIM=1 )
+      overlop      = check_ov(ind_gs)
+      E0           = eigval0(ind_gs)+nuclear_repulsion
+      coeff_psi(:) = eigvec0(:,ind_gs)
+
+      deallocate( check_ov, eigval0, eigvec0 )
+ 
+      ! ---------------------------------------------------------------------------------------
+
+
+      print*, ' space dimen = ', n_selected
+      print*, ' E bef diag  = ', Em
+      print*, ' E aft diag  = ', E0
+      print*, ' overlop     = ', overlop
+      print*, ' index       = ', ind_gs
+
+      write(211, '( 3(I5,3X), 4(F15.8,3X) )') n_selected, na_new, nb_new, Em, E0, overlop
+
+
+
+      ! ---------------------------------------------------------------------------------------
+      !                      remove selected pair | na_new   nb_new >
+
+      n_toselect = n_toselect - 1
+      print*, ' rank to select  = ', n_toselect
+
+      ! ---------------------------------------------------------------------------------------
+
+
+    endif 
+
+    call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir)
+    W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8)
+    print*, " "
+    print*, "  elapsed time (min) = ", W_tot_time_it/60.d0
+ 
+  end do
+  !________________________________________________________________________________________________________
+  !________________________________________________________________________________________________________
+
+
+
+  ! ***************************************************************************************************
+  ! save to ezfion
+  !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) )
+  !do l = 1, rank0
+  !  Dezfio(l,1)   = coeff_psi(l)
+  !  Uezfio(:,l,1) = U0(:,l)
+  !  Vezfio(:,l,1) = V0(:,l)
+  !enddo
+  !call ezfio_set_spindeterminants_n_det(N_det)
+  !call ezfio_set_spindeterminants_n_states(N_states)
+  !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique)
+  !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values)
+
+  !call ezfio_set_spindeterminants_n_svd_coefs(rank0)
+  !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio)
+  !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio )
+  !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio)
+  !deallocate( Uezfio, Dezfio, Vezfio )
+  ! ***************************************************************************************************
+
+  call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
+  W_tot_time   = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
+  print *, ' ___________________________________________________________________'
+  print *, ' '
+  print *, " Execution avec ", nb_taches, " threads"
+  print *, " total elapsed time (min) = ", W_tot_time/60.d0
+  print *, ' ___________________________________________________________________'
+
+
+
+  deallocate( Dref )
+  deallocate( Uref, Vref )
+
+deallocate( coeff_psi )
+  deallocate( numalpha_selected, numbeta_selected )
+  deallocate( H0, Hdiag )
+
+end
+
+
+
+
+subroutine const_H0_1d(na_new, nb_new, na_max, nb_max, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0_1d)
+
+  implicit none
+
+  integer, intent(in)           :: na_new, nb_new, na_max, nb_max, n_selected
+  integer, intent(in)           :: numalpha_selected(n_selected), numbeta_selected(n_selected)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: H0_1d(n_selected)
+
+  integer(bit_kind)             :: det1(N_int,2)
+  integer(bit_kind)             :: det2(N_int,2)
+  integer                       :: degree, na, nb
+
+  integer                       :: i, j, k, l, ii, jj, m
+  double precision              :: h12
+
+  double precision, allocatable :: Hmat_kl(:,:), tmp1(:,:), tmp2(:,:)
+  double precision, allocatable :: U1d(:), V1d(:) 
+  double precision, allocatable :: Utmp(:,:), Vtmp(:,:)
+
+  double precision              :: ti, tf
+
+  print *, ""
+  print *, " start const_H0_1d"
+  call wall_time(ti)
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+
+  allocate( U1d(na) , V1d(nb) )
+  U1d(1:na) = Uref(1:na,na_new)
+  V1d(1:nb) = Vref(1:nb,nb_new)
+
+  allocate( tmp1(na,nb) )
+  tmp1 = 0.d0
+
+ !$OMP PARALLEL DEFAULT(NONE)                                          &
+ !$OMP          PRIVATE(i,j,k,l,h12,det1,det2,degree,tmp2)             &
+ !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, &
+ !$OMP                 N_int,U1d,V1d,tmp1)
+
+  allocate( tmp2(na,nb) )
+  tmp2 = 0.d0
+
+ !$OMP DO
+  do l = 1, nb
+    det2(:,2) = psi_det_beta_unique(:,l)
+    do j = 1, nb
+      det1(:,2) = psi_det_beta_unique(:,j)
+
+      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+      if(degree .gt. 2) cycle
+
+      do k = 1, na
+        det2(:,1) = psi_det_alpha_unique(:,k)
+        do i = 1, na
+          det1(:,1) = psi_det_alpha_unique(:,i)
+
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if(degree .gt. 2) cycle
+
+          call i_H_j(det1, det2, N_int, h12)
+          if( h12 .eq. 0.d0) cycle
+
+          tmp2(i,j) += h12 * U1d(k) * V1d(l)
+
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP CRITICAL
+  do j = 1, nb
+    do i = 1, na
+      tmp1(i,j) += tmp2(i,j)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate( tmp2 )
+ !$OMP END PARALLEL
+
+  deallocate( U1d , V1d )
+
+  ! tmp2(j,m) = sum_i tmp1(i,j) x Uref(i,m)
+  allocate( Utmp(na,na_max) )
+  Utmp(1:na,1:na_max) = Uref(1:na,1:na_max)
+
+  allocate( tmp2(nb,na_max) )
+  call DGEMM('T', 'N', nb, na_max, na, 1.d0,   &
+       tmp1, size(tmp1,1), Utmp, size(Utmp,1), &
+       0.d0, tmp2, size(tmp2,1)                )
+  deallocate( tmp1 )
+  deallocate( Utmp )
+
+  ! Hmat_kl(m,n) = sum_j tmp2(j,m) x Vref(j,n)
+  allocate( Vtmp(nb,nb_max) )
+  Vtmp(1:nb,1:nb_max) = Vref(1:nb,1:nb_max)
+
+  allocate( Hmat_kl(na_max,nb_max) )
+  call DGEMM('T', 'N', na_max, nb_max, nb, 1.d0, &
+       tmp2, size(tmp2,1), Vtmp, size(Vtmp,1),   &
+       0.d0, Hmat_kl, size(Hmat_kl,1)            )
+  deallocate( tmp2 )
+  deallocate( Vtmp )
+
+  do m = 1, n_selected
+    ii       = numalpha_selected(m)
+    jj       = numbeta_selected (m)
+    H0_1d(m) = Hmat_kl(ii,jj)
+  enddo
+  deallocate( Hmat_kl )
+
+  call wall_time(tf)
+  print *, " end const_H0_1d after (min) ", (tf-ti)/60.
+  print *, ""
+
+  return
+end subroutine const_H0_1d
+
+
+
--- a/devel/svdwf/psiSVD_naivBbyB_v0.irp.f
+++ b/devel/svdwf/psiSVD_naivBbyB_v0.irp.f
@ -0,0 +1,358 @@
+program psiSVD_naivBbyB_v0
+
+  implicit none
+
+  BEGIN_DOC
+  ! perturbative approach to build psi_postsvd
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state
+
+  integer                       :: i, j, k, l, m, n
+  double precision              :: x, y, h12
+
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  integer                       :: rank_max, n_TSVD, n_selected
+  double precision              :: E0, overlop
+  double precision, allocatable :: H0(:,:)
+  double precision, allocatable :: eigvec0(:,:), eigval0(:) 
+  integer,          allocatable :: numalpha_selected(:), numbeta_selected(:)
+
+  integer                       :: ii
+
+  integer                       :: na_new, nb_new, ind_new, ind_gs
+  double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:)
+
+  double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:)
+
+
+  integer(kind=8)               :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir
+  real(kind=8)                  :: W_tot_time, W_tot_time_it
+  integer                       :: nb_taches
+
+  !$OMP PARALLEL
+  nb_taches = OMP_GET_NUM_THREADS()
+  !$OMP END PARALLEL
+
+  call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
+
+  i_state = 1
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+  call get_excitation_degree(det1,det2,degree,N_int)
+  call i_H_j(det1, det2, N_int, h12)
+
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det    :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_alpha_unique)     )
+  allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique)      )
+  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref &
+              , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique)
+
+  allocate( Vref(n_det_beta_unique,n_det_beta_unique) )
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+  deallocate( Aref )
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                            initial wavefunction: psi_0
+
+  n_TSVD     = 1
+  n_selected = 1
+  allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) )
+  numalpha_selected(1) = 1
+  numbeta_selected (1) = 1
+
+  ! get E0 = < psi_0 | H | psi_0 >
+  allocate( H0(n_selected,n_selected) )
+  call const_psiHpsi(n_TSVD, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0)
+  E0 = H0(1,1) + nuclear_repulsion
+  print*, ' ground state E0 = ', E0
+
+  deallocate( H0 )
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+
+  !________________________________________________________________________________________________________
+  !
+  !                              increase the size of psi0 iteratively
+  !________________________________________________________________________________________________________
+
+  
+  rank_max = n_det_alpha_unique*n_det_beta_unique  ! 15*15
+
+  do while( n_selected .lt. rank_max )
+
+    call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) 
+    print*, '                         '
+    print*, '       new iteration     '
+
+
+    deallocate( numalpha_selected, numbeta_selected )
+
+    n_TSVD     = n_TSVD + 1
+    n_selected = n_TSVD * n_TSVD
+    allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) )
+    l = 0
+    do i = 1, n_TSVD
+      do j = 1, n_TSVD
+        l = l + 1
+        numalpha_selected(l) = i
+        numbeta_selected (l) = j
+      enddo
+    enddo
+    if( l.ne.n_selected) then
+      print *, "error in numbering"
+      stop
+    endif
+
+    ! construct and diagonalise the hamiltonian < psi_selected | H | psi_selected >
+    allocate( H0(n_selected,n_selected) )
+    call const_psiHpsi(n_TSVD, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0)
+
+    allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) )
+    call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected)
+
+    ! get the postsvd ground state
+    allocate( check_ov(n_selected) )
+    do l = 1, n_selected
+      overlop = 0.d0   
+      do i = 1, n_TSVD
+        ii = i + (i-1)*n_TSVD
+        overlop = overlop + eigvec0(ii,l) * Dref(i)
+      enddo
+      check_ov(l) = dabs(overlop)
+    enddo
+    ind_gs  = MAXLOC( check_ov, DIM=1 )
+    overlop = check_ov(ind_gs)
+    E0      = eigval0(ind_gs)+nuclear_repulsion
+    print*, ' space dimen = ', n_selected
+    print*, ' diag energy = ', E0
+    print*, ' overlop    = ', overlop
+
+    deallocate( H0 )
+    deallocate( check_ov, eigval0, eigvec0 )
+    
+    ! ---------------------------------------------------------------------------------------
+
+    write(220, *) n_selected, E0
+
+    call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir)
+    W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8)
+    print*, " "
+    print*, "  elapsed time (min) = ", W_tot_time_it/60.d0
+ 
+  end do
+  !________________________________________________________________________________________________________
+  !________________________________________________________________________________________________________
+
+
+  deallocate( Dref )
+  deallocate( Uref, Vref )
+
+
+  ! ***************************************************************************************************
+  ! save to ezfion
+  !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) )
+  !do l = 1, rank0
+  !  Dezfio(l,1)   = coeff_psi(l)
+  !  Uezfio(:,l,1) = U0(:,l)
+  !  Vezfio(:,l,1) = V0(:,l)
+  !enddo
+  !call ezfio_set_spindeterminants_n_det(N_det)
+  !call ezfio_set_spindeterminants_n_states(N_states)
+  !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique)
+  !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values)
+
+  !call ezfio_set_spindeterminants_n_svd_coefs(rank0)
+  !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio)
+  !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio )
+  !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio)
+  !deallocate( Uezfio, Dezfio, Vezfio )
+  ! ***************************************************************************************************
+
+
+
+  call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
+  W_tot_time   = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
+  print *, ' ___________________________________________________________________'
+  print *, ' '
+  print *, " Execution avec ", nb_taches, " threads"
+  print *, " total elapsed time (min) = ", W_tot_time/60.d0
+  print *, ' ___________________________________________________________________'
+
+
+end
+
+
+
+
+
+
+
+subroutine const_psiHpsi(n_TSVD, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0)
+
+  implicit none
+
+  integer, intent(in)           :: n_TSVD, n_selected
+  integer, intent(in)           :: numalpha_selected(n_selected), numbeta_selected(n_selected)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: H0(n_selected,n_selected)
+
+  integer(bit_kind)             :: det1(N_int,2)
+  integer(bit_kind)             :: det2(N_int,2)
+  integer                       :: degree, na, nb
+
+  integer                       :: i, j, k, l
+  integer                       :: iin, jjn, iim, jjm, n, m
+  double precision              :: h12, x
+  double precision, allocatable :: Utmp(:,:), Vtmp(:,:)
+  double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:)
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+  H0(:,:) = 0.d0
+
+  allocate( tmp1(nb,nb,n_TSVD,n_TSVD) )
+  tmp1(:,:,:,:) = 0.d0
+
+  allocate( Utmp(n_TSVD,na) )
+  do i = 1, na
+    do iin = 1, n_TSVD
+      Utmp(iin,i) = Uref(i,iin)
+    enddo 
+  enddo 
+
+  do l = 1, nb
+    do j = 1, nb
+      det2(:,2) = psi_det_beta_unique(:,l)
+      det1(:,2) = psi_det_beta_unique(:,j)
+
+      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+      if(degree .gt. 2) cycle
+
+      do k = 1, na
+        det2(:,1) = psi_det_alpha_unique(:,k)
+        do i = 1, na
+          det1(:,1) = psi_det_alpha_unique(:,i)
+
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if(degree .gt. 2) cycle
+
+          call i_H_j(det1, det2, N_int, h12)
+          if( h12 .eq. 0.d0) cycle
+
+          do iin = 1, n_TSVD
+            x = Utmp(iin,i) * h12
+            if( x == 0.d0 ) cycle
+            do iim = 1, n_TSVD
+              tmp1(j,l,iim,iin) += Utmp(iim,k) * x
+            enddo
+          enddo
+
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate( Utmp )
+
+  allocate( Vtmp(nb,n_TSVD) )
+  do iin = 1, n_TSVD
+    do i = 1, nb
+      Vtmp(i,iin) = Vref(i,iin)
+    enddo 
+  enddo 
+
+  allocate( tmp2(nb,n_TSVD,n_TSVD,n_TSVD) )
+  call DGEMM('T','N', nb*n_TSVD*n_TSVD, n_TSVD, nb, 1.d0          &
+             , tmp1, size(tmp1,1)                                 &
+             , Vtmp, size(Vtmp,1)                                 &
+             , 0.d0, tmp2, size(tmp2,1)*size(tmp2,2)*size(tmp2,3) )
+
+  deallocate(tmp1)  
+  allocate( tmp1(n_TSVD,n_TSVD,n_TSVD,n_TSVD) )
+  call DGEMM('T','N', n_TSVD*n_TSVD*n_TSVD, n_TSVD, nb, 1.d0      &
+             , tmp2, size(tmp2,1)                                 &
+             , Vtmp, size(Vtmp,1)                                 &
+             , 0.d0, tmp1, size(tmp1,1)*size(tmp1,2)*size(tmp1,3) )
+  deallocate( tmp2, Vtmp )
+
+  do n = 1, n_selected
+    iin = numalpha_selected(n)
+    jjn = numbeta_selected (n)
+
+    do m = 1, n_selected
+      iim = numalpha_selected(m)
+      jjm = numbeta_selected (m)
+
+      H0(m,n) = tmp1(iin,iim,jjn,jjm)
+
+    enddo
+  enddo
+
+  deallocate( tmp1 )
+
+  return
+end subroutine const_psiHpsi
+
+
--- a/devel/svdwf/psiSVD_naivBbyB_v1.irp.f
+++ b/devel/svdwf/psiSVD_naivBbyB_v1.irp.f
@ -0,0 +1,383 @@
+program psiSVD_naivBbyB_v1
+
+  implicit none
+
+  BEGIN_DOC
+  ! perturbative approach to build psi_postsvd
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state
+
+  integer                       :: i, j, k, l, m, n
+  double precision              :: x, y, h12
+
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  integer                       :: rank_max, n_TSVD, n_selected
+  double precision              :: E0, overlop
+  double precision, allocatable :: H0(:,:)
+  double precision, allocatable :: eigvec0(:,:), eigval0(:) 
+  integer,          allocatable :: numalpha_selected(:), numbeta_selected(:)
+
+  integer                       :: ii
+
+  integer                       :: na_new, nb_new, ind_new, ind_gs
+  double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:)
+
+  double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:)
+
+
+  integer(kind=8)               :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir
+  real(kind=8)                  :: W_tot_time, W_tot_time_it
+  integer                       :: nb_taches
+
+  !$OMP PARALLEL
+  nb_taches = OMP_GET_NUM_THREADS()
+  !$OMP END PARALLEL
+
+  call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
+
+  i_state = 1
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+  call get_excitation_degree(det1,det2,degree,N_int)
+  call i_H_j(det1, det2, N_int, h12)
+
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det    :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_alpha_unique)     )
+  allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique)      )
+  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref &
+              , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique)
+
+  allocate( Vref(n_det_beta_unique,n_det_beta_unique) )
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+  deallocate( Aref )
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                            initial wavefunction: psi_0
+
+  n_TSVD     = 1
+  n_selected = 1
+  allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) )
+  numalpha_selected(1) = 1
+  numbeta_selected (1) = 1
+
+  ! get E0 = < psi_0 | H | psi_0 >
+  allocate( H0(n_selected,n_selected) )
+  call const_psiHpsi(n_TSVD, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0)
+  E0 = H0(1,1) + nuclear_repulsion
+  print*, ' ground state E0 = ', E0
+
+  deallocate( H0 )
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+
+  !________________________________________________________________________________________________________
+  !
+  !                              increase the size of psi0 iteratively
+  !________________________________________________________________________________________________________
+
+  
+  rank_max = n_det_alpha_unique*n_det_beta_unique  ! 15*15
+
+  do while( n_selected .lt. rank_max )
+
+    call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) 
+    print*, '                         '
+    print*, '       new iteration     '
+
+
+    deallocate( numalpha_selected, numbeta_selected )
+
+    n_TSVD     = n_TSVD + 1
+    n_selected = n_TSVD * n_TSVD
+    allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) )
+    l = 0
+    do i = 1, n_TSVD
+      do j = 1, n_TSVD
+        l = l + 1
+        numalpha_selected(l) = i
+        numbeta_selected (l) = j
+      enddo
+    enddo
+    if( l.ne.n_selected) then
+      print *, "error in numbering"
+      stop
+    endif
+
+    ! construct and diagonalise the hamiltonian < psi_selected | H | psi_selected >
+    allocate( H0(n_selected,n_selected) )
+    call const_psiHpsi(n_TSVD, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0)
+
+    allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) )
+    call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected)
+
+    ! get the postsvd ground state
+    allocate( check_ov(n_selected) )
+    do l = 1, n_selected
+      overlop = 0.d0   
+      do i = 1, n_TSVD
+        ii = i + (i-1)*n_TSVD
+        overlop = overlop + eigvec0(ii,l) * Dref(i)
+      enddo
+      check_ov(l) = dabs(overlop)
+    enddo
+    ind_gs  = MAXLOC( check_ov, DIM=1 )
+    overlop = check_ov(ind_gs)
+    E0      = eigval0(ind_gs)+nuclear_repulsion
+    print*, ' space dimen = ', n_selected
+    print*, ' diag energy = ', E0
+    print*, ' overlop    = ', overlop
+
+    deallocate( H0 )
+    deallocate( check_ov, eigval0, eigvec0 )
+    
+    ! ---------------------------------------------------------------------------------------
+
+    write(221, *) n_selected, E0
+
+    call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir)
+    W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8)
+    print*, " "
+    print*, "  elapsed time (min) = ", W_tot_time_it/60.d0
+ 
+  end do
+  !________________________________________________________________________________________________________
+  !________________________________________________________________________________________________________
+
+
+  deallocate( Dref )
+  deallocate( Uref, Vref )
+
+
+  ! ***************************************************************************************************
+  ! save to ezfion
+  !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) )
+  !do l = 1, rank0
+  !  Dezfio(l,1)   = coeff_psi(l)
+  !  Uezfio(:,l,1) = U0(:,l)
+  !  Vezfio(:,l,1) = V0(:,l)
+  !enddo
+  !call ezfio_set_spindeterminants_n_det(N_det)
+  !call ezfio_set_spindeterminants_n_states(N_states)
+  !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique)
+  !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values)
+
+  !call ezfio_set_spindeterminants_n_svd_coefs(rank0)
+  !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio)
+  !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio )
+  !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio)
+  !deallocate( Uezfio, Dezfio, Vezfio )
+  ! ***************************************************************************************************
+
+
+
+  call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
+  W_tot_time   = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
+  print *, ' ___________________________________________________________________'
+  print *, ' '
+  print *, " Execution avec ", nb_taches, " threads"
+  print *, " total elapsed time (min) = ", W_tot_time/60.d0
+  print *, ' ___________________________________________________________________'
+
+
+end
+
+
+
+
+
+
+
+subroutine const_psiHpsi(n_TSVD, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0)
+
+  implicit none
+
+  integer, intent(in)           :: n_TSVD, n_selected
+  integer, intent(in)           :: numalpha_selected(n_selected), numbeta_selected(n_selected)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: H0(n_selected,n_selected)
+
+  integer(bit_kind)             :: det1(N_int,2)
+  integer(bit_kind)             :: det2(N_int,2)
+  integer                       :: degree, na, nb
+
+  integer                       :: i, j, k, l
+  integer                       :: iin, jjn, iim, jjm, n, m
+  double precision              :: h12, x
+  double precision, allocatable :: Utmp(:,:), Vtmp(:,:)
+  double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:)
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+  H0(:,:) = 0.d0
+
+  allocate( tmp1(nb,nb,n_TSVD,n_TSVD) )
+  tmp1(:,:,:,:) = 0.d0
+
+  allocate( Utmp(n_TSVD,na) )
+  do i = 1, na
+    do iin = 1, n_TSVD
+      Utmp(iin,i) = Uref(i,iin)
+    enddo 
+  enddo 
+
+ !$OMP PARALLEL DEFAULT(NONE)                                           &
+ !$OMP          PRIVATE(iin,iim,i,j,k,l,h12,x,det1,det2,degree,tmp2)    &
+ !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique,  &
+ !$OMP                 N_int,n_TSVD,Utmp,tmp1)
+
+  allocate( tmp2(nb,nb,n_TSVD,n_TSVD) )
+  tmp2(:,:,:,:) = 0.d0
+
+ !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8)
+  do l = 1, nb
+    do j = 1, nb
+      det2(:,2) = psi_det_beta_unique(:,l)
+      det1(:,2) = psi_det_beta_unique(:,j)
+
+      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+      if(degree .gt. 2) cycle
+
+      do k = 1, na
+        det2(:,1) = psi_det_alpha_unique(:,k)
+        do i = 1, na
+          det1(:,1) = psi_det_alpha_unique(:,i)
+
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if(degree .gt. 2) cycle
+
+          call i_H_j(det1, det2, N_int, h12)
+          if( h12 .eq. 0.d0) cycle
+
+          do iin = 1, n_TSVD
+            x = Utmp(iin,i) * h12
+            if( x == 0.d0 ) cycle
+            do iim = 1, n_TSVD
+              tmp2(j,l,iim,iin) += Utmp(iim,k) * x
+            enddo
+          enddo
+
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP CRITICAL
+  do iin = 1, n_TSVD
+    do iim = 1, n_TSVD
+      do l = 1, nb
+        do j = 1, nb
+          tmp1(j,l,iim,iin) += tmp2(j,l,iim,iin)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END CRITICAL
+  deallocate( tmp2 )
+
+ !$OMP END PARALLEL
+  
+  deallocate( Utmp )
+
+  allocate( Vtmp(nb,n_TSVD) )
+  do iin = 1, n_TSVD
+    do i = 1, nb
+      Vtmp(i,iin) = Vref(i,iin)
+    enddo 
+  enddo 
+
+  allocate( tmp2(nb,n_TSVD,n_TSVD,n_TSVD) )
+  call DGEMM('T','N', nb*n_TSVD*n_TSVD, n_TSVD, nb, 1.d0          &
+             , tmp1, size(tmp1,1)                                 &
+             , Vtmp, size(Vtmp,1)                                 &
+             , 0.d0, tmp2, size(tmp2,1)*size(tmp2,2)*size(tmp2,3) )
+
+  deallocate(tmp1)  
+  allocate( tmp1(n_TSVD,n_TSVD,n_TSVD,n_TSVD) )
+  call DGEMM('T','N', n_TSVD*n_TSVD*n_TSVD, n_TSVD, nb, 1.d0      &
+             , tmp2, size(tmp2,1)                                 &
+             , Vtmp, size(Vtmp,1)                                 &
+             , 0.d0, tmp1, size(tmp1,1)*size(tmp1,2)*size(tmp1,3) )
+  deallocate( tmp2, Vtmp )
+
+  do n = 1, n_selected
+    iin = numalpha_selected(n)
+    jjn = numbeta_selected (n)
+
+    do m = 1, n_selected
+      iim = numalpha_selected(m)
+      jjm = numbeta_selected (m)
+
+      H0(m,n) = tmp1(iin,iim,jjn,jjm)
+
+    enddo
+  enddo
+
+  deallocate( tmp1 )
+
+  return
+end subroutine const_psiHpsi
+
+
--- a/devel/svdwf/psiSVD_pt2_v0.irp.f
+++ b/devel/svdwf/psiSVD_pt2_v0.irp.f
@ -0,0 +1,691 @@
+program psiSVD_pt2_v0
+
+  implicit none
+
+  BEGIN_DOC
+  ! perturbative approach to build psi_postsvd
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state
+
+  integer                       :: i, j, k, l, m, n
+  double precision              :: x, y, h12
+
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  integer                       :: rank_max
+  double precision              :: E0, overlop, Ept2
+  double precision, allocatable :: H0(:,:)
+  double precision, allocatable :: eigvec0(:,:), eigval0(:), coeff_psi(:), coeff_tmp(:)
+
+  integer                       :: ii, jj, ia, ib
+  double precision, allocatable :: Hdiag(:), Hkl_save(:,:), Hkl_1d(:), Hkl_tmp(:,:), Hdiag_tmp(:)
+  double precision, allocatable :: H0_1d(:), H0_tmp(:,:)
+
+  integer                       :: na_new, nb_new, ind_new, ind_gs
+  double precision              :: ctmp, coeff_new
+  double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:)
+
+  double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:)
+
+  integer                       :: n_selected, n_toselect, n_tmp, na_max, nb_max
+  integer,          allocatable :: numalpha_selected(:), numbeta_selected(:)
+  integer,          allocatable :: numalpha_toselect(:), numbeta_toselect(:)
+  integer,          allocatable :: numalpha_tmp(:), numbeta_tmp(:)
+
+  integer                       :: cantor_pairing_ij, cantor_pairing_new
+  integer, allocatable          :: cantor_pairing(:), cantor_pairing_tmp(:)
+
+  double precision              :: t_beg, t_end
+  integer(kind=8)               :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir
+  real(kind=8)                  :: W_tot_time, W_tot_time_it
+  integer                       :: nb_taches
+
+  !$OMP PARALLEL
+  nb_taches = OMP_GET_NUM_THREADS()
+  !$OMP END PARALLEL
+
+  call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
+
+  i_state = 1
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+  call get_excitation_degree(det1,det2,degree,N_int)
+  call i_H_j(det1, det2, N_int, h12)
+
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det      :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_alpha_unique)     )
+  allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique)      )
+  call cpu_time(t_beg)
+  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref &
+              , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique)
+  call cpu_time(t_end)
+  print *, " SVD is performed after (min)", (t_end-t_beg)/60.
+
+  allocate( Vref(n_det_beta_unique,n_det_beta_unique) )
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+  deallocate( Aref )
+
+  ! ---------------------------------------------------------------------------------------
+
+  !  ***   PARAMETERS   ***  !
+  na_max = n_det_alpha_unique
+  nb_max = n_det_beta_unique
+  !  ***     *****      ***  !
+
+  print *, ' na_max  = ', na_max
+  print *, ' nb_max  = ', nb_max
+
+  ! ---------------------------------------------------------------------------------------
+  !                            initial wavefunction: psi_0
+
+  n_selected = 1
+  allocate(numalpha_selected(n_selected), numbeta_selected(n_selected), cantor_pairing(n_selected))
+
+  numalpha_selected(1) = 1
+  numbeta_selected (1) = 1
+  cantor_pairing   (1) = 4 !int( 0.5*(1+1)*(1+1+1) ) + 1
+
+  allocate( coeff_psi(n_selected) )
+  coeff_psi(1) = 1.d0
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !         construnc the initial basis to select phi_1 from the FSVD
+
+  n_toselect = na_max * nb_max - n_selected
+  print *, ' toselect = ', n_toselect
+  print *, ' to trun  = ', n_det_alpha_unique*n_det_beta_unique - na_max*nb_max
+
+  allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) )
+  k = 0
+  do i = 1, na_max
+    do j = 1, nb_max
+
+      cantor_pairing_ij = int( 0.5*(i+j)*(i+j+1) ) + j
+      if( ANY(cantor_pairing .eq. cantor_pairing_ij) ) cycle
+
+      k = k + 1
+      numalpha_toselect(k) = i
+      numbeta_toselect (k) = j
+
+    enddo
+  enddo
+  if( k.ne.n_toselect ) then
+    print *, " error in chosing vectors toselect"
+    print *, " n_toselect =", n_toselect
+    print *, " k          =", k
+    stop
+  endif
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                        read < u_k v_l | H | u_k v_l > for all vectors 
+
+  allocate( Hdiag(n_toselect) , H0(n_selected,n_selected) )
+ 
+  open( unit=11, FILE="klHkl_v0.dat", ACTION="READ")
+
+    read(11,*) i, i, E0
+    H0(1,1) = E0
+
+    do i = 1, n_toselect
+      read(11,*) ia, ib, ctmp 
+      if( (numalpha_toselect(i).ne.ia) .or. (numbeta_toselect(i).ne.ib) ) then
+        print *, ' error in reading klHkl_v0 '
+        print *, ia, ib
+        print *, numalpha_toselect(i), numbeta_toselect(i)
+        stop
+      endif
+      Hdiag(i) = ctmp
+    enddo
+
+  close(11)
+
+  ! ---------------------------------------------------------------------------------------
+
+  E0 = E0 + nuclear_repulsion
+  print*, ' space dimen     = ', n_selected
+  print*, ' ground state E0 = ', E0
+
+  na_new = 1
+  nb_new = 1
+
+  !________________________________________________________________________________________________________
+  !
+  !                              increase the size of psi0 iteratively
+  !________________________________________________________________________________________________________
+
+  !  ***   PARAMETERS   ***  !
+  rank_max = na_max * nb_max
+  !rank_max = 50 * 50
+  !  ***     *****      ***  !
+
+  if( rank_max .gt. (na_max*nb_max) ) then
+    print *, " rank_max should be less then na_max x nb_max"
+    stop
+  endif
+
+
+  allocate( Hkl_save(n_toselect,n_selected) )
+
+  do while( n_selected .lt. rank_max )
+
+    call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) 
+
+    print*, '                         '
+    print*, '       new iteration     '
+
+    if( n_toselect .lt. 1 ) then
+
+      print*, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+      print*, ' no more vectors to construct a new basis '
+      print*, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+      stop
+
+    else
+
+      ! ---------------------------------------------------------------------------------------
+      !                                select a new vector
+
+      allocate( Hkl_1d(n_toselect)  )
+      call const_Hkl_1d(na_new, nb_new, na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hkl_1d)
+      Hkl_save(1:n_toselect,n_selected) = Hkl_1d(1:n_toselect)
+      deallocate( Hkl_1d )
+
+      ! choose the best vector
+      allocate( epsil(n_toselect) , epsil_energ(n_toselect) ) 
+      do ii = 1, n_toselect
+
+        ctmp = 0.d0 
+        do l = 1, n_selected
+          ctmp = ctmp + coeff_psi(l) * Hkl_save(ii,l)
+        enddo
+        epsil(ii) = ctmp * ctmp / ( E0 - (Hdiag(ii)+nuclear_repulsion) )
+
+        epsil_energ(ii) = epsil(ii)
+        epsil(ii)       = dabs( epsil(ii) ) 
+      enddo
+
+      ind_new = MAXLOC( epsil, DIM=1 )
+
+      ept2 = epsil_energ(ind_new) 
+      if( ept2 .gt. 0.d0 ) then
+        print *, ' ept2 > 0 !!!!!!!!!! '
+        print *, na_new, nb_new, ept2
+        stop
+      endif
+
+      na_new             = numalpha_toselect(ind_new) 
+      nb_new             = numbeta_toselect (ind_new) 
+      cantor_pairing_new = int( 0.5 * (na_new+nb_new) * (na_new+nb_new+1) ) + nb_new
+
+      print *, ' best vector', na_new, nb_new, ept2
+      deallocate(epsil,epsil_energ)
+
+      ! new coefficient
+      coeff_new = 0.d0
+      do l = 1, n_selected
+        coeff_new += coeff_psi(l) * Hkl_save(ind_new,l)
+      enddo
+      coeff_new = coeff_new / ( E0 - (Hdiag(ind_new)+nuclear_repulsion) )
+      print *, ' new coeff = ', coeff_new
+
+      ! < psi_old | H | delta_psi > 
+      allocate( H0_1d(n_selected) )
+      call const_H0_1d(na_new, nb_new, na_max, nb_max, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0_1d)
+
+      ! ---------------------------------------------------------------------------------------
+
+
+
+      ! ---------------------------------------------------------------------------------------
+      !                                      new psi
+
+      allocate( numalpha_tmp(n_selected), numbeta_tmp(n_selected), coeff_tmp(n_selected) )
+      allocate( cantor_pairing_tmp(n_selected) )
+      allocate( H0_tmp(n_selected,n_selected)  )
+
+      coeff_tmp         (:) = coeff_psi        (:)
+      numalpha_tmp      (:) = numalpha_selected(:)
+      numbeta_tmp       (:) = numbeta_selected (:)
+      cantor_pairing_tmp(:) = cantor_pairing   (:)
+      H0_tmp          (:,:) = H0             (:,:)
+
+      deallocate( numalpha_selected, numbeta_selected, coeff_psi, cantor_pairing, H0 )
+
+      n_tmp      = n_selected
+      n_selected = n_selected + 1
+
+      allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) , coeff_psi(n_selected) )
+      allocate( cantor_pairing(n_selected) )
+      allocate( H0(n_selected,n_selected)  )
+      H0(:,:) = 0.d0
+
+      do l = 1, n_tmp
+        coeff_psi        (l) = coeff_tmp         (l)
+        numalpha_selected(l) = numalpha_tmp      (l)
+        numbeta_selected (l) = numbeta_tmp       (l)
+        cantor_pairing   (l) = cantor_pairing_tmp(l)
+      enddo
+      H0(1:n_tmp,1:n_tmp) = H0_tmp(1:n_tmp,1:n_tmp)
+
+      deallocate( numalpha_tmp, numbeta_tmp, coeff_tmp, cantor_pairing_tmp, H0_tmp )
+
+      coeff_psi        (n_selected) = coeff_new
+      numalpha_selected(n_selected) = na_new
+      numbeta_selected (n_selected) = nb_new
+      cantor_pairing   (n_selected) = cantor_pairing_new
+
+      H0(1:n_tmp,n_selected) = H0_1d(1:n_tmp)
+      H0(n_selected,1:n_tmp) = H0_1d(1:n_tmp)
+      deallocate( H0_1d )
+      H0(n_selected,n_selected) = Hdiag(ind_new) 
+
+      ! ---------------------------------------------------------------------------------------
+
+
+
+      ! ---------------------------------------------------------------------------------------
+      !                                      new energy
+
+      allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) )
+      call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected)
+
+      ! get the postsvd ground state
+      allocate( check_ov(n_selected) )
+      do l = 1, n_selected
+        overlop = 0.d0   
+        do i = 1, n_selected
+          ia = numalpha_selected(i)
+          ib = numbeta_selected (i)
+          if( ia .eq. ib ) overlop = overlop + eigvec0(i,l) * Dref(ia)
+          !overlop = overlop + eigvec0(i,l) * coeff_psi(i)
+        enddo
+        check_ov(l) = dabs(overlop)
+      enddo
+      ind_gs       = MAXLOC( check_ov, DIM=1 )
+      overlop      = check_ov(ind_gs)
+      E0           = eigval0(ind_gs)+nuclear_repulsion
+      coeff_psi(:) = eigvec0(:,ind_gs)
+
+      deallocate( check_ov, eigval0, eigvec0 )
+
+      print*, ' space dimen = ', n_selected
+      print*, ' diag energy = ', E0
+      print*, ' overlop     = ', overlop
+      print*, ' index       = ', ind_gs
+ 
+      ! ---------------------------------------------------------------------------------------
+
+
+  write(2110, '( 3(I5,3X), 3(F15.8,3X) )') n_selected, na_new, nb_new, ept2, E0, overlop
+
+
+      ! ---------------------------------------------------------------------------------------
+      !                      remove selected pair | na_new   nb_new >
+
+      allocate( numalpha_tmp(n_toselect), numbeta_tmp(n_toselect), Hdiag_tmp(n_toselect) )
+      numalpha_tmp(:) = numalpha_toselect(:)
+      numbeta_tmp (:) = numbeta_toselect (:)
+      Hdiag_tmp   (:) = Hdiag            (:)
+
+      ii = n_selected - 1
+      allocate( Hkl_tmp(n_toselect,ii) )
+      Hkl_tmp(1:n_toselect,1:ii) = Hkl_save(1:n_toselect,1:ii)
+
+      deallocate( numalpha_toselect , numbeta_toselect, Hkl_save, Hdiag )
+
+      n_tmp      = n_toselect
+      n_toselect = n_toselect - 1
+      print*, ' rank to select  = ', n_toselect
+
+      allocate(numalpha_toselect(n_toselect), numbeta_toselect(n_toselect), Hkl_save(n_toselect,n_selected))
+      allocate(Hdiag(n_toselect))
+
+      Hkl_save = 0.d0
+      l = 0
+      do k = 1, n_tmp
+
+        ia = numalpha_tmp(k) 
+        ib = numbeta_tmp (k)
+        cantor_pairing_ij = int( 0.5*(ia+ib)*(ia+ib+1) ) + ib
+        if( ANY(cantor_pairing .eq. cantor_pairing_ij) ) cycle
+
+        l = l + 1
+        numalpha_toselect(l) = numalpha_tmp(k)
+        numbeta_toselect (l) = numbeta_tmp (k)
+        Hdiag            (l) = Hdiag_tmp   (k)
+
+        Hkl_save(l,1:ii)     = Hkl_tmp(k,1:ii)
+
+      enddo
+      if( l .ne. n_toselect) then
+        print *, " error in updating to select vectors"
+        print *, " l          = ", l
+        print *, " n_toselect = ", n_toselect
+        stop
+      endif
+
+      deallocate( numalpha_tmp , numbeta_tmp , Hkl_tmp, Hdiag_tmp )
+
+    ! ---------------------------------------------------------------------------------------
+
+
+    endif 
+
+    call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir)
+    W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8)
+    print*, " "
+    print*, "  elapsed time (min) = ", W_tot_time_it/60.d0
+ 
+  end do
+  !________________________________________________________________________________________________________
+  !________________________________________________________________________________________________________
+
+
+
+  ! ***************************************************************************************************
+  ! save to ezfion
+  !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) )
+  !do l = 1, rank0
+  !  Dezfio(l,1)   = coeff_psi(l)
+  !  Uezfio(:,l,1) = U0(:,l)
+  !  Vezfio(:,l,1) = V0(:,l)
+  !enddo
+  !call ezfio_set_spindeterminants_n_det(N_det)
+  !call ezfio_set_spindeterminants_n_states(N_states)
+  !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique)
+  !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values)
+
+  !call ezfio_set_spindeterminants_n_svd_coefs(rank0)
+  !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio)
+  !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio )
+  !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio)
+  !deallocate( Uezfio, Dezfio, Vezfio )
+  ! ***************************************************************************************************
+
+  call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
+  W_tot_time   = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
+  print *, ' ___________________________________________________________________'
+  print *, ' '
+  print *, " Execution avec ", nb_taches, " threads"
+  print *, " total elapsed time (min) = ", W_tot_time/60.d0
+  print *, ' ___________________________________________________________________'
+
+
+
+  deallocate( Dref )
+  deallocate( Uref, Vref )
+  
+  deallocate( psi_coef )
+  deallocate( numalpha_selected, numbeta_selected, numalpha_toselect, numbeta_toselect )
+  deallocate( H0, Hdiag, Hkl_save )
+
+end
+
+
+
+
+subroutine const_H0_1d(na_new, nb_new, na_max, nb_max, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0_1d)
+
+  implicit none
+
+  integer, intent(in)           :: na_new, nb_new, na_max, nb_max, n_selected
+  integer, intent(in)           :: numalpha_selected(n_selected), numbeta_selected(n_selected)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: H0_1d(n_selected)
+
+  integer(bit_kind)             :: det1(N_int,2)
+  integer(bit_kind)             :: det2(N_int,2)
+  integer                       :: degree, na, nb
+
+  integer                       :: i, j, k, l, ii, jj, m
+  double precision              :: h12
+
+  double precision, allocatable :: Hmat_kl(:,:), tmp1(:,:), tmp2(:,:)
+  double precision, allocatable :: U1d(:), V1d(:) 
+  double precision, allocatable :: Utmp(:,:), Vtmp(:,:)
+
+  double precision              :: ti, tf
+
+  print *, ""
+  print *, " start const_H0_1d"
+  call wall_time(ti)
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+
+  allocate( U1d(na) , V1d(nb) )
+  U1d(1:na) = Uref(1:na,na_new)
+  V1d(1:nb) = Vref(1:nb,nb_new)
+
+  allocate( tmp1(na,nb) )
+  tmp1 = 0.d0
+
+  do l = 1, nb
+    det2(:,2) = psi_det_beta_unique(:,l)
+    do j = 1, nb
+      det1(:,2) = psi_det_beta_unique(:,j)
+
+      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+      if(degree .gt. 2) cycle
+
+      do k = 1, na
+        det2(:,1) = psi_det_alpha_unique(:,k)
+        do i = 1, na
+          det1(:,1) = psi_det_alpha_unique(:,i)
+
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if(degree .gt. 2) cycle
+
+          call i_H_j(det1, det2, N_int, h12)
+          if( h12 .eq. 0.d0) cycle
+
+          tmp1(i,j) += h12 * U1d(k) * V1d(l)
+
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate( U1d , V1d )
+
+  ! tmp2(j,m) = sum_i tmp1(i,j) x Uref(i,m)
+  allocate( Utmp(na,na_max) )
+  Utmp(1:na,1:na_max) = Uref(1:na,1:na_max)
+
+  allocate( tmp2(nb,na_max) )
+  call DGEMM('T', 'N', nb, na_max, na, 1.d0,   &
+       tmp1, size(tmp1,1), Utmp, size(Utmp,1), &
+       0.d0, tmp2, size(tmp2,1)                )
+  deallocate( tmp1 )
+  deallocate( Utmp )
+
+  ! Hmat_kl(m,n) = sum_j tmp2(j,m) x Vref(j,n)
+  allocate( Vtmp(nb,nb_max) )
+  Vtmp(1:nb,1:nb_max) = Vref(1:nb,1:nb_max)
+
+  allocate( Hmat_kl(na_max,nb_max) )
+  call DGEMM('T', 'N', na_max, nb_max, nb, 1.d0, &
+       tmp2, size(tmp2,1), Vtmp, size(Vtmp,1),   &
+       0.d0, Hmat_kl, size(Hmat_kl,1)            )
+  deallocate( tmp2 )
+  deallocate( Vtmp )
+
+  do m = 1, n_selected
+    ii       = numalpha_selected(m)
+    jj       = numbeta_selected (m)
+    H0_1d(m) = Hmat_kl(ii,jj)
+  enddo
+  deallocate( Hmat_kl )
+
+  call wall_time(tf)
+  print *, " end const_H0_1d after (min) ", (tf-ti)/60.
+  print *, ""
+
+  return
+end subroutine const_H0_1d
+
+
+
+
+
+subroutine const_Hkl_1d(na_new, nb_new, na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hkl_1d)
+
+  implicit none
+
+  integer, intent(in)           :: na_new, nb_new, na_max, nb_max, n_toselect
+  integer, intent(in)           :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: Hkl_1d(n_toselect)
+
+  integer(bit_kind)             :: det1(N_int,2)
+  integer(bit_kind)             :: det2(N_int,2)
+  integer                       :: degree, na, nb
+
+  integer                       :: i, j, k, l, ii, jj, m
+  double precision              :: h12
+
+  double precision, allocatable :: Hmat_kl(:,:), tmp1(:,:), tmp2(:,:)
+  double precision, allocatable :: U1d(:), V1d(:) 
+  double precision, allocatable :: Utmp(:,:), Vtmp(:,:)
+
+  double precision              :: ti, tf
+
+  print *, ""
+  print *, " start const_Hkl_1d"
+  call wall_time(ti)
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+
+  allocate( U1d(na) , V1d(nb) )
+  U1d(1:na) = Uref(1:na,na_new)
+  V1d(1:nb) = Vref(1:nb,nb_new)
+
+  allocate( tmp1(na,nb) )
+  tmp1 = 0.d0
+
+  do l = 1, nb
+    det2(:,2) = psi_det_beta_unique(:,l)
+    do j = 1, nb
+      det1(:,2) = psi_det_beta_unique(:,j)
+
+      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+      if(degree .gt. 2) cycle
+
+      do k = 1, na
+        det2(:,1) = psi_det_alpha_unique(:,k)
+        do i = 1, na
+          det1(:,1) = psi_det_alpha_unique(:,i)
+
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if(degree .gt. 2) cycle
+
+          call i_H_j(det1, det2, N_int, h12)
+          if( h12 .eq. 0.d0) cycle
+
+          tmp1(i,j) += h12 * U1d(k) * V1d(l)
+
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate( U1d , V1d )
+
+  ! tmp2(j,m) = sum_i tmp1(i,j) x Uref(i,m)
+  allocate( Utmp(na,na_max) )
+  Utmp(1:na,1:na_max) = Uref(1:na,1:na_max)
+
+  allocate( tmp2(nb,na_max) )
+  call DGEMM('T', 'N', nb, na_max, na, 1.d0,   &
+       tmp1, size(tmp1,1), Utmp, size(Utmp,1), &
+       0.d0, tmp2, size(tmp2,1)                )
+  deallocate( tmp1 , Utmp )
+
+  ! Hmat_kl(m,n) = sum_j tmp2(j,m) x Vref(j,n)
+  allocate( Vtmp(nb,nb_max) )
+  Vtmp(1:nb,1:nb_max) = Vref(1:nb,1:nb_max)
+
+  allocate( Hmat_kl(na_max,nb_max) )
+  call DGEMM('T', 'N', na_max, nb_max, nb, 1.d0, &
+       tmp2, size(tmp2,1), Vtmp, size(Vtmp,1),   &
+       0.d0, Hmat_kl, size(Hmat_kl,1)            )
+  deallocate( tmp2 )
+  deallocate( Vtmp )
+
+  do m = 1, n_toselect
+    ii        = numalpha_toselect(m)
+    jj        = numbeta_toselect (m)
+    Hkl_1d(m) = Hmat_kl(ii,jj)
+  enddo
+  deallocate( Hmat_kl )
+
+  call wall_time(tf)
+  print *, " end const_Hkl_1d after (min) ", (tf-ti)/60.
+  print *, ""
+
+  return
+end subroutine const_Hkl_1d
+
--- a/devel/svdwf/psiSVD_pt2_v1.irp.f
+++ b/devel/svdwf/psiSVD_pt2_v1.irp.f
@ -0,0 +1,734 @@
+program psiSVD_pt2_v1
+
+  implicit none
+
+  BEGIN_DOC
+  ! perturbative approach to build psi_postsvd
+  END_DOC
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  PROVIDE N_int
+
+  call run()
+end
+
+
+subroutine run
+
+  USE OMP_LIB
+
+  implicit none
+
+  integer(bit_kind)             :: det1(N_int,2), det2(N_int,2)
+  integer                       :: degree, i_state
+
+  integer                       :: i, j, k, l, m, n
+  double precision              :: x, y, h12
+
+  double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
+
+  integer                       :: rank_max
+  double precision              :: E0, overlop, Ept2
+  double precision, allocatable :: H0(:,:)
+  double precision, allocatable :: eigvec0(:,:), eigval0(:), coeff_psi(:), coeff_tmp(:)
+
+  integer                       :: ii, jj, ia, ib
+  double precision, allocatable :: Hdiag(:), Hkl_save(:,:), Hkl_1d(:), Hkl_tmp(:,:), Hdiag_tmp(:)
+  double precision, allocatable :: H0_1d(:), H0_tmp(:,:)
+
+  integer                       :: na_new, nb_new, ind_new, ind_gs
+  double precision              :: ctmp, coeff_new
+  double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:)
+
+  double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:)
+
+  integer                       :: n_selected, n_toselect, n_tmp, na_max, nb_max
+  integer,          allocatable :: numalpha_selected(:), numbeta_selected(:)
+  integer,          allocatable :: numalpha_toselect(:), numbeta_toselect(:)
+  integer,          allocatable :: numalpha_tmp(:), numbeta_tmp(:)
+
+  integer                       :: cantor_pairing_ij, cantor_pairing_new
+  integer, allocatable          :: cantor_pairing(:), cantor_pairing_tmp(:)
+
+  double precision              :: t_beg, t_end
+  integer(kind=8)               :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir
+  real(kind=8)                  :: W_tot_time, W_tot_time_it
+  integer                       :: nb_taches
+
+  !$OMP PARALLEL
+  nb_taches = OMP_GET_NUM_THREADS()
+  !$OMP END PARALLEL
+
+  call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
+
+  i_state = 1
+
+  det1(:,1) = psi_det_alpha_unique(:,1)
+  det2(:,1) = psi_det_alpha_unique(:,1)
+  det1(:,2) = psi_det_beta_unique(:,1)
+  det2(:,2) = psi_det_beta_unique(:,1)
+  call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int)
+  call get_excitation_degree(det1,det2,degree,N_int)
+  call i_H_j(det1, det2, N_int, h12)
+
+  ! ---------------------------------------------------------------------------------------
+  !                      construct the initial CISD matrix
+
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+  print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique
+  print *, ' N det      :', N_det
+  print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
+
+  allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
+  Aref(:,:) = 0.d0
+  do k = 1, N_det
+    i         = psi_bilinear_matrix_rows(k)
+    j         = psi_bilinear_matrix_columns(k)
+    Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
+  enddo
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                                  perform a Full SVD
+
+  allocate( Uref(n_det_alpha_unique,n_det_alpha_unique)     )
+  allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) )
+  allocate( Vtref(n_det_beta_unique,n_det_beta_unique)      )
+  call cpu_time(t_beg)
+  call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref &
+              , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique)
+  call cpu_time(t_end)
+  print *, " SVD is performed after (min)", (t_end-t_beg)/60.
+
+  allocate( Vref(n_det_beta_unique,n_det_beta_unique) )
+  do l = 1, n_det_beta_unique
+    do i = 1, n_det_beta_unique
+      Vref(i,l) = Vtref(l,i)
+    enddo
+  enddo
+  deallocate( Vtref )
+  deallocate( Aref )
+
+  ! ---------------------------------------------------------------------------------------
+
+  !  ***   PARAMETERS   ***  !
+  na_max = n_det_alpha_unique
+  nb_max = n_det_beta_unique
+  !  ***     *****      ***  !
+
+  print *, ' na_max  = ', na_max
+  print *, ' nb_max  = ', nb_max
+
+  ! ---------------------------------------------------------------------------------------
+  !                            initial wavefunction: psi_0
+
+  n_selected = 1
+  allocate(numalpha_selected(n_selected), numbeta_selected(n_selected), cantor_pairing(n_selected))
+
+  numalpha_selected(1) = 1
+  numbeta_selected (1) = 1
+  cantor_pairing   (1) = 4 !int( 0.5*(1+1)*(1+1+1) ) + 1
+
+  allocate( coeff_psi(n_selected) )
+  coeff_psi(1) = 1.d0
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !         construnc the initial basis to select phi_1 from the FSVD
+
+  n_toselect = na_max * nb_max - n_selected
+  print *, ' toselect = ', n_toselect
+  print *, ' to trun  = ', n_det_alpha_unique*n_det_beta_unique - na_max*nb_max
+
+  allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) )
+  k = 0
+  do i = 1, na_max
+    do j = 1, nb_max
+
+      cantor_pairing_ij = int( 0.5*(i+j)*(i+j+1) ) + j
+      if( ANY(cantor_pairing .eq. cantor_pairing_ij) ) cycle
+
+      k = k + 1
+      numalpha_toselect(k) = i
+      numbeta_toselect (k) = j
+
+    enddo
+  enddo
+  if( k.ne.n_toselect ) then
+    print *, " error in chosing vectors toselect"
+    print *, " n_toselect =", n_toselect
+    print *, " k          =", k
+    stop
+  endif
+
+  ! ---------------------------------------------------------------------------------------
+
+
+
+  ! ---------------------------------------------------------------------------------------
+  !                        read < u_k v_l | H | u_k v_l > for all vectors 
+
+  allocate( Hdiag(n_toselect) , H0(n_selected,n_selected) )
+ 
+  open( unit=11, FILE="klHkl_v1.dat", ACTION="READ")
+
+    read(11,*) i, i, E0
+    H0(1,1) = E0
+
+    do i = 1, n_toselect
+      read(11,*) ia, ib, ctmp 
+      !print *, ' ia , ib :', ia, ib
+      if( (numalpha_toselect(i).ne.ia) .or. (numbeta_toselect(i).ne.ib) ) then
+        print *, ' error in reading klHkl_v1 '
+        print *, ' ia , ib :', ia, ib
+        print *, numalpha_toselect(i) , numbeta_toselect(i)
+        stop
+      endif
+      Hdiag(i) = ctmp
+    enddo
+
+  close(11)
+
+  ! ---------------------------------------------------------------------------------------
+
+  E0 = E0 + nuclear_repulsion
+  print*, ' space dimen     = ', n_selected
+  print*, ' ground state E0 = ', E0
+
+  na_new = 1
+  nb_new = 1
+
+  !________________________________________________________________________________________________________
+  !
+  !                              increase the size of psi0 iteratively
+  !________________________________________________________________________________________________________
+
+  !  ***   PARAMETERS   ***  !
+  rank_max = na_max * nb_max
+  !  ***     *****      ***  !
+
+  if( rank_max .gt. (na_max*nb_max) ) then
+    print *, " rank_max should be less then na_max x nb_max"
+    stop
+  endif
+
+
+  allocate( Hkl_save(n_toselect,n_selected) )
+
+  do while( n_selected .lt. rank_max )
+
+    call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) 
+
+    print*, '                         '
+    print*, '       new iteration     '
+
+    if( n_toselect .lt. 1 ) then
+
+      print*, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+      print*, ' no more vectors to construct a new basis '
+      print*, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
+      stop
+
+    else
+
+      ! ---------------------------------------------------------------------------------------
+      !                                select a new vector
+
+      allocate( Hkl_1d(n_toselect)  )
+      call const_Hkl_1d(na_new, nb_new, na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hkl_1d)
+      Hkl_save(1:n_toselect,n_selected) = Hkl_1d(1:n_toselect)
+      deallocate( Hkl_1d )
+
+      ! choose the best vector
+      allocate( epsil(n_toselect) , epsil_energ(n_toselect) ) 
+      do ii = 1, n_toselect
+
+        ctmp = 0.d0 
+        do l = 1, n_selected
+          ctmp = ctmp + coeff_psi(l) * Hkl_save(ii,l)
+        enddo
+        epsil(ii) = ctmp * ctmp / ( E0 - (Hdiag(ii)+nuclear_repulsion) )
+
+        epsil_energ(ii) = epsil(ii)
+        epsil(ii)       = dabs( epsil(ii) ) 
+      enddo
+
+      ind_new = MAXLOC( epsil, DIM=1 )
+
+      ept2 = epsil_energ(ind_new) 
+      if( ept2 .gt. 0.d0 ) then
+        print *, ' ept2 > 0 !!!!!!!!!! '
+        print *, na_new, nb_new, ept2
+        stop
+      endif
+
+      na_new             = numalpha_toselect(ind_new) 
+      nb_new             = numbeta_toselect (ind_new) 
+      cantor_pairing_new = int( 0.5 * (na_new+nb_new) * (na_new+nb_new+1) ) + nb_new
+
+      print *, ' ind_new    ', ind_new
+      print *, ' best vector', na_new, nb_new, ept2
+      deallocate(epsil,epsil_energ)
+
+      ! new coefficient
+      coeff_new = 0.d0
+      do l = 1, n_selected
+        coeff_new += coeff_psi(l) * Hkl_save(ind_new,l)
+      enddo
+      coeff_new = coeff_new / ( E0 - (Hdiag(ind_new)+nuclear_repulsion) )
+      print *, ' new coeff = ', coeff_new
+      print *, ' Hdiag     = ', Hdiag(ind_new)
+
+      ! < psi_old | H | delta_psi > 
+      allocate( H0_1d(n_selected) )
+      call const_H0_1d(na_new, nb_new, na_max, nb_max, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0_1d)
+
+      ! ---------------------------------------------------------------------------------------
+
+
+
+      ! ---------------------------------------------------------------------------------------
+      !                                      new psi
+
+      allocate( numalpha_tmp(n_selected), numbeta_tmp(n_selected), coeff_tmp(n_selected) )
+      allocate( cantor_pairing_tmp(n_selected) )
+      allocate( H0_tmp(n_selected,n_selected)  )
+
+      coeff_tmp         (:) = coeff_psi        (:)
+      numalpha_tmp      (:) = numalpha_selected(:)
+      numbeta_tmp       (:) = numbeta_selected (:)
+      cantor_pairing_tmp(:) = cantor_pairing   (:)
+      H0_tmp          (:,:) = H0             (:,:)
+
+      deallocate( numalpha_selected, numbeta_selected, coeff_psi, cantor_pairing, H0 )
+
+      n_tmp      = n_selected
+      n_selected = n_selected + 1
+
+      allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) , coeff_psi(n_selected) )
+      allocate( cantor_pairing(n_selected) )
+      allocate( H0(n_selected,n_selected)  )
+      H0(:,:) = 0.d0
+
+      do l = 1, n_tmp
+        coeff_psi        (l) = coeff_tmp         (l)
+        numalpha_selected(l) = numalpha_tmp      (l)
+        numbeta_selected (l) = numbeta_tmp       (l)
+        cantor_pairing   (l) = cantor_pairing_tmp(l)
+      enddo
+      H0(1:n_tmp,1:n_tmp) = H0_tmp(1:n_tmp,1:n_tmp)
+
+      deallocate( numalpha_tmp, numbeta_tmp, coeff_tmp, cantor_pairing_tmp, H0_tmp )
+
+      coeff_psi        (n_selected) = coeff_new
+      numalpha_selected(n_selected) = na_new
+      numbeta_selected (n_selected) = nb_new
+      cantor_pairing   (n_selected) = cantor_pairing_new
+
+      H0(1:n_tmp,n_selected) = H0_1d(1:n_tmp)
+      H0(n_selected,1:n_tmp) = H0_1d(1:n_tmp)
+      deallocate( H0_1d )
+      H0(n_selected,n_selected) = Hdiag(ind_new) 
+
+      ! ---------------------------------------------------------------------------------------
+
+
+
+      ! ---------------------------------------------------------------------------------------
+      !                                      new energy
+
+      allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) )
+      call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected)
+
+      ! get the postsvd ground state
+      allocate( check_ov(n_selected) )
+      do l = 1, n_selected
+        overlop = 0.d0   
+        do i = 1, n_selected
+          ia = numalpha_selected(i)
+          ib = numbeta_selected (i)
+          if( ia .eq. ib ) overlop = overlop + eigvec0(i,l) * Dref(ia)
+          !overlop = overlop + eigvec0(i,l) * coeff_psi(i)
+        enddo
+        check_ov(l) = dabs(overlop)
+      enddo
+      ind_gs       = MAXLOC( check_ov, DIM=1 )
+      overlop      = check_ov(ind_gs)
+      E0           = eigval0(ind_gs)+nuclear_repulsion
+      coeff_psi(:) = eigvec0(:,ind_gs)
+
+      deallocate( check_ov, eigval0, eigvec0 )
+
+      print*, ' space dimen = ', n_selected
+      print*, ' diag energy = ', E0
+      print*, ' overlop     = ', overlop
+      print*, ' index       = ', ind_gs
+ 
+      ! ---------------------------------------------------------------------------------------
+
+
+  write(2111, '( 3(I5,3X), 3(F15.8,3X) )') n_selected, na_new, nb_new, ept2, E0, overlop
+
+
+      ! ---------------------------------------------------------------------------------------
+      !                      remove selected pair | na_new   nb_new >
+
+      allocate( numalpha_tmp(n_toselect), numbeta_tmp(n_toselect), Hdiag_tmp(n_toselect) )
+      numalpha_tmp(:) = numalpha_toselect(:)
+      numbeta_tmp (:) = numbeta_toselect (:)
+      Hdiag_tmp   (:) = Hdiag            (:)
+
+      ii = n_selected - 1
+      allocate( Hkl_tmp(n_toselect,ii) )
+      Hkl_tmp(1:n_toselect,1:ii) = Hkl_save(1:n_toselect,1:ii)
+
+      deallocate( numalpha_toselect , numbeta_toselect, Hkl_save, Hdiag )
+
+      n_tmp      = n_toselect
+      n_toselect = n_toselect - 1
+      print*, ' rank to select  = ', n_toselect
+
+      allocate(numalpha_toselect(n_toselect), numbeta_toselect(n_toselect), Hkl_save(n_toselect,n_selected))
+      allocate(Hdiag(n_toselect))
+
+      Hkl_save = 0.d0
+      l = 0
+      do k = 1, n_tmp
+
+        ia = numalpha_tmp(k) 
+        ib = numbeta_tmp (k)
+        cantor_pairing_ij = int( 0.5*(ia+ib)*(ia+ib+1) ) + ib
+        if( ANY(cantor_pairing .eq. cantor_pairing_ij) ) cycle
+
+        l = l + 1
+        numalpha_toselect(l) = numalpha_tmp(k)
+        numbeta_toselect (l) = numbeta_tmp (k)
+        Hdiag            (l) = Hdiag_tmp   (k)
+
+        Hkl_save(l,1:ii)     = Hkl_tmp(k,1:ii)
+
+      enddo
+      if( l .ne. n_toselect) then
+        print *, " error in updating to select vectors"
+        print *, " l          = ", l
+        print *, " n_toselect = ", n_toselect
+        stop
+      endif
+
+      deallocate( numalpha_tmp , numbeta_tmp , Hkl_tmp, Hdiag_tmp )
+
+    ! ---------------------------------------------------------------------------------------
+
+
+    endif 
+
+    call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir)
+    W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8)
+    print*, " "
+    print*, "  elapsed time (min) = ", W_tot_time_it/60.d0
+ 
+  end do
+  !________________________________________________________________________________________________________
+  !________________________________________________________________________________________________________
+
+
+
+  ! ***************************************************************************************************
+  ! save to ezfion
+  !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) )
+  !do l = 1, rank0
+  !  Dezfio(l,1)   = coeff_psi(l)
+  !  Uezfio(:,l,1) = U0(:,l)
+  !  Vezfio(:,l,1) = V0(:,l)
+  !enddo
+  !call ezfio_set_spindeterminants_n_det(N_det)
+  !call ezfio_set_spindeterminants_n_states(N_states)
+  !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique)
+  !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns)
+  !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values)
+
+  !call ezfio_set_spindeterminants_n_svd_coefs(rank0)
+  !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio)
+  !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio )
+  !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio)
+  !deallocate( Uezfio, Dezfio, Vezfio )
+  ! ***************************************************************************************************
+
+  call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
+  W_tot_time   = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
+  print *, ' ___________________________________________________________________'
+  print *, ' '
+  print *, " Execution avec ", nb_taches, " threads"
+  print *, " total elapsed time (min) = ", W_tot_time/60.d0
+  print *, ' ___________________________________________________________________'
+
+
+
+  deallocate( Dref )
+  deallocate( Uref, Vref )
+  
+  deallocate( psi_coef )
+  deallocate( numalpha_selected, numbeta_selected, numalpha_toselect, numbeta_toselect )
+  deallocate( H0, Hdiag, Hkl_save )
+
+end
+
+
+
+
+subroutine const_H0_1d(na_new, nb_new, na_max, nb_max, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0_1d)
+
+  implicit none
+
+  integer, intent(in)           :: na_new, nb_new, na_max, nb_max, n_selected
+  integer, intent(in)           :: numalpha_selected(n_selected), numbeta_selected(n_selected)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: H0_1d(n_selected)
+
+  integer(bit_kind)             :: det1(N_int,2)
+  integer(bit_kind)             :: det2(N_int,2)
+  integer                       :: degree, na, nb
+
+  integer                       :: i, j, k, l, ii, jj, m
+  double precision              :: h12
+
+  double precision, allocatable :: Hmat_kl(:,:), tmp1(:,:), tmp2(:,:)
+  double precision, allocatable :: U1d(:), V1d(:) 
+  double precision, allocatable :: Utmp(:,:), Vtmp(:,:)
+
+  double precision              :: ti, tf
+
+  print *, ""
+  print *, " start const_H0_1d"
+  call wall_time(ti)
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+
+  allocate( U1d(na) , V1d(nb) )
+  U1d(1:na) = Uref(1:na,na_new)
+  V1d(1:nb) = Vref(1:nb,nb_new)
+
+  allocate( tmp1(na,nb) )
+  tmp1 = 0.d0
+
+ !$OMP PARALLEL DEFAULT(NONE)                                          &
+ !$OMP          PRIVATE(i,j,k,l,h12,det1,det2,degree,tmp2)             &
+ !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, &
+ !$OMP                 N_int,U1d,V1d,tmp1)
+
+  allocate( tmp2(na,nb) )
+  tmp2 = 0.d0
+
+ !$OMP DO
+  do l = 1, nb
+    det2(:,2) = psi_det_beta_unique(:,l)
+    do j = 1, nb
+      det1(:,2) = psi_det_beta_unique(:,j)
+
+      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+      if(degree .gt. 2) cycle
+
+      do k = 1, na
+        det2(:,1) = psi_det_alpha_unique(:,k)
+        do i = 1, na
+          det1(:,1) = psi_det_alpha_unique(:,i)
+
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if(degree .gt. 2) cycle
+
+          call i_H_j(det1, det2, N_int, h12)
+          if( h12 .eq. 0.d0) cycle
+
+          tmp2(i,j) += h12 * U1d(k) * V1d(l)
+
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP CRITICAL
+  do j = 1, nb
+    do i = 1, na
+      tmp1(i,j) += tmp2(i,j)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate( tmp2 )
+ !$OMP END PARALLEL
+
+  deallocate( U1d , V1d )
+
+  ! tmp2(j,m) = sum_i tmp1(i,j) x Uref(i,m)
+  allocate( Utmp(na,na_max) )
+  Utmp(1:na,1:na_max) = Uref(1:na,1:na_max)
+
+  allocate( tmp2(nb,na_max) )
+  call DGEMM('T', 'N', nb, na_max, na, 1.d0,   &
+       tmp1, size(tmp1,1), Utmp, size(Utmp,1), &
+       0.d0, tmp2, size(tmp2,1)                )
+  deallocate( tmp1 )
+  deallocate( Utmp )
+
+  ! Hmat_kl(m,n) = sum_j tmp2(j,m) x Vref(j,n)
+  allocate( Vtmp(nb,nb_max) )
+  Vtmp(1:nb,1:nb_max) = Vref(1:nb,1:nb_max)
+
+  allocate( Hmat_kl(na_max,nb_max) )
+  call DGEMM('T', 'N', na_max, nb_max, nb, 1.d0, &
+       tmp2, size(tmp2,1), Vtmp, size(Vtmp,1),   &
+       0.d0, Hmat_kl, size(Hmat_kl,1)            )
+  deallocate( tmp2 )
+  deallocate( Vtmp )
+
+  do m = 1, n_selected
+    ii       = numalpha_selected(m)
+    jj       = numbeta_selected (m)
+    H0_1d(m) = Hmat_kl(ii,jj)
+  enddo
+  deallocate( Hmat_kl )
+
+  call wall_time(tf)
+  print *, " end const_H0_1d after (min) ", (tf-ti)/60.
+  print *, ""
+
+  return
+end subroutine const_H0_1d
+
+
+
+
+
+subroutine const_Hkl_1d(na_new, nb_new, na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hkl_1d)
+
+  implicit none
+
+  integer, intent(in)           :: na_new, nb_new, na_max, nb_max, n_toselect
+  integer, intent(in)           :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect)
+  double precision, intent(in)  :: Uref(n_det_alpha_unique,n_det_alpha_unique)
+  double precision, intent(in)  :: Vref(n_det_beta_unique ,n_det_beta_unique)
+  double precision, intent(out) :: Hkl_1d(n_toselect)
+
+  integer(bit_kind)             :: det1(N_int,2)
+  integer(bit_kind)             :: det2(N_int,2)
+  integer                       :: degree, na, nb
+
+  integer                       :: i, j, k, l, ii, jj, m
+  double precision              :: h12
+
+  double precision, allocatable :: Hmat_kl(:,:), tmp1(:,:), tmp2(:,:)
+  double precision, allocatable :: U1d(:), V1d(:) 
+  double precision, allocatable :: Utmp(:,:), Vtmp(:,:)
+
+  double precision              :: ti, tf
+
+  print *, ""
+  print *, " start const_Hkl_1d"
+  call wall_time(ti)
+
+  na = n_det_alpha_unique
+  nb = n_det_beta_unique
+
+  allocate( U1d(na) , V1d(nb) )
+  U1d(1:na) = Uref(1:na,na_new)
+  V1d(1:nb) = Vref(1:nb,nb_new)
+
+  allocate( tmp1(na,nb) )
+  tmp1 = 0.d0
+
+ !$OMP PARALLEL DEFAULT(NONE)                                          &
+ !$OMP          PRIVATE(i,j,k,l,h12,det1,det2,degree,tmp2)             &
+ !$OMP          SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, &
+ !$OMP                 N_int,U1d,V1d,tmp1)
+
+  allocate( tmp2(na,nb) )
+  tmp2 = 0.d0
+ !$OMP DO
+  do l = 1, nb
+    det2(:,2) = psi_det_beta_unique(:,l)
+    do j = 1, nb
+      det1(:,2) = psi_det_beta_unique(:,j)
+
+      call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
+      if(degree .gt. 2) cycle
+
+      do k = 1, na
+        det2(:,1) = psi_det_alpha_unique(:,k)
+        do i = 1, na
+          det1(:,1) = psi_det_alpha_unique(:,i)
+
+          call get_excitation_degree(det1,det2,degree,N_int)
+          if(degree .gt. 2) cycle
+
+          call i_H_j(det1, det2, N_int, h12)
+          if( h12 .eq. 0.d0) cycle
+
+          tmp2(i,j) += h12 * U1d(k) * V1d(l)
+
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP CRITICAL
+  do j = 1, nb
+    do i = 1, na
+      tmp1(i,j) += tmp2(i,j)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate( tmp2 )
+ !$OMP END PARALLEL
+
+  deallocate( U1d , V1d )
+
+  ! tmp2(j,m) = sum_i tmp1(i,j) x Uref(i,m)
+  allocate( Utmp(na,na_max) )
+  Utmp(1:na,1:na_max) = Uref(1:na,1:na_max)
+
+  allocate( tmp2(nb,na_max) )
+  call DGEMM('T', 'N', nb, na_max, na, 1.d0,   &
+       tmp1, size(tmp1,1), Utmp, size(Utmp,1), &
+       0.d0, tmp2, size(tmp2,1)                )
+  deallocate( tmp1 , Utmp )
+
+  ! Hmat_kl(m,n) = sum_j tmp2(j,m) x Vref(j,n)
+  allocate( Vtmp(nb,nb_max) )
+  Vtmp(1:nb,1:nb_max) = Vref(1:nb,1:nb_max)
+
+  allocate( Hmat_kl(na_max,nb_max) )
+  call DGEMM('T', 'N', na_max, nb_max, nb, 1.d0, &
+       tmp2, size(tmp2,1), Vtmp, size(Vtmp,1),   &
+       0.d0, Hmat_kl, size(Hmat_kl,1)            )
+  deallocate( tmp2 )
+  deallocate( Vtmp )
+
+  do m = 1, n_toselect
+    ii        = numalpha_toselect(m)
+    jj        = numbeta_toselect (m)
+    Hkl_1d(m) = Hmat_kl(ii,jj)
+  enddo
+  deallocate( Hmat_kl )
+
+  call wall_time(tf)
+  print *, " end const_Hkl_1d after (min) ", (tf-ti)/60.
+  print *, ""
+
+  return
+end subroutine const_Hkl_1d
+
--- a/devel/svdwf/pyth_RSVD.py
+++ b/devel/svdwf/pyth_RSVD.py
@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+# !!!
+import os, sys
+# !!!
+#QP_PATH=os.environ["QMCCHEM_PATH"]
+#sys.path.insert(0,QMCCHEM_PATH+"/EZFIO/Python/")
+# !!!
+from ezfio import ezfio
+from datetime import datetime
+import numpy as np
+from scipy.sparse.linalg import svds
+from R3SVD_LiYu  	       import R3SVD_LiYu
+from RSVD                import powit_RSVD  
+from R3SVD_AMMAR         import R3SVD_AMMAR
+import time 
+# !!!
+fmt = '%5d' + 2 * ' %e'
+# !!!
+if __name__ == "__main__":
+    # !!!
+    if len(sys.argv) != 2:
+        print("Usage: %s <EZFIO_DIRECTORY>"%sys.argv[0])
+        sys.exit(1)
+    filename = sys.argv[1]
+    ezfio.set_file(filename)
+    # !!!
+    N_det  = ezfio.get_spindeterminants_n_det()
+    A_rows = np.array(ezfio.get_spindeterminants_psi_coef_matrix_rows())
+    A_cols = np.array(ezfio.get_spindeterminants_psi_coef_matrix_columns())
+    A_vals = np.array(ezfio.get_spindeterminants_psi_coef_matrix_values())
+    nrows, ncols  = ezfio.get_spindeterminants_n_det_alpha(), ezfio.get_spindeterminants_n_det_beta()
+    Y = np.zeros( (nrows, ncols) )
+    for k in range(N_det):
+        i = A_rows[k] - 1
+        j = A_cols[k] - 1
+        Y[i,j] = A_vals[0][k]
+    print("# # # # # # # # # # # # # # # # # # # # # #")
+    print('matrix dimensions = {} x {}'.format(nrows, ncols))
+    print("# # # # # # # # # # # # # # # # # # # # # # \n")
+    normY = np.linalg.norm(Y, ord='fro')
+    print( normY )
+    # !!!
+    print('Full SVD:')
+    t_beg = time.time()
+    U, S_FSVD, VT = np.linalg.svd(Y, full_matrices=0)
+    t_end         = time.time()
+    rank          = S_FSVD.shape[0]
+    energy        = np.sum(np.square(S_FSVD)) / normY**2
+    err_SVD       = 100. * np.linalg.norm(Y - np.dot(U,np.dot(np.diag(S_FSVD),VT)), ord='fro') / normY
+    print('rank = {}, energy = {}, error = {}%, CPU time = {} \n'.format(rank, energy, err_SVD, t_end-t_beg))
+    # !!!
+    np.savetxt('results_python/h2o_pseudo/SingValues_FullSVD.txt', np.transpose([ np.array(range(rank))+1, S_FSVD  ]), fmt='%5d' + ' %e', delimiter=' ')
+    # !!!
+    t       = 50
+    delta_t = 10
+    npow    = 15
+    err_thr = 1e-3
+    maxit   = 10
+    # !!!
+    print('RRR SVD Li & Yu:')
+    t_beg          = time.time()
+    U, S_R3SVD, VT = R3SVD_LiYu(Y, t, delta_t, npow, err_thr, maxit)
+    t_end          = time.time()
+    rank           = S_R3SVD.shape[0]
+    energy         = np.sum( np.square(S_R3SVD) ) / normY**2
+    err_SVD        = 100. * np.linalg.norm(Y - np.dot(U,np.dot(np.diag(S_R3SVD),VT)), ord='fro') / normY
+    print('nb Pow It = {}, rank = {}, energy = {}, error = {} %, CPU time = {}\n'.format(npow, rank, energy, err_SVD, t_end-t_beg))
+    # !!!
+    err_R3SVD = np.zeros( (rank) )
+    for i in range(rank):
+        err_R3SVD[i] = 100.0 * abs( S_FSVD[i] - S_R3SVD[i] ) / S_FSVD[i]
+    np.savetxt('results_python/h2o_pseudo/SingValues_R3SVD.txt', np.transpose([ np.array(range(rank))+1, S_R3SVD, err_R3SVD ]), fmt=fmt, delimiter=' ')
+    # !!!
+    nb_oversamp = 10
+    tol_SVD     = 1e-10
+    print('RRR SVD my version:')
+    t_beg          = time.time()
+    U, S_MRSVD, VT = R3SVD_AMMAR(Y, t, delta_t, npow, nb_oversamp, err_thr, maxit, tol_SVD)
+    t_end          = time.time()
+    rank           = S_MRSVD.shape[0]
+    energy         = np.sum( np.square(S_MRSVD) ) / normY**2
+    err_SVD        = 100. * np.linalg.norm(Y - np.dot(U,np.dot(np.diag(S_MRSVD),VT)), ord='fro') / normY
+    print('nb Pow It = {}, rank = {}, energy = {}, error = {} %, CPU time = {}\n'.format(npow, rank, energy, err_SVD, t_end-t_beg))
+    # !!!
+    err_MRSVD = np.zeros( (rank) )
+    for i in range(rank):
+        err_MRSVD[i] = 100.0 * abs( S_FSVD[i] - S_MRSVD[i] ) / S_FSVD[i]
+    np.savetxt('results_python/h2o_pseudo/SingValues_MRSVD.txt', np.transpose([ np.array(range(rank))+1, S_MRSVD, err_MRSVD ]), fmt=fmt, delimiter=' ')
+    # !!!
+    trank = rank
+    print("Truncated RSVD (pre-fixed rank = {} & oversampling parameter = {}):".format(trank,nb_oversamp))
+    t_beg         = time.time()
+    U, S_RSVD, VT = powit_RSVD(Y, trank, npow, nb_oversamp)
+    t_end         = time.time()
+    rank          = S_RSVD.shape[0]
+    energy        = np.sum( np.square(S_RSVD) ) / normY**2
+    err_SVD       = 100. * np.linalg.norm( Y - np.dot(U,np.dot(np.diag(S_RSVD),VT)), ord="fro") / normY
+    print('nb Pow It = {}, rank = {}, energy = {}, error = {} %, CPU time = {}\n'.format(npow, rank, energy, err_SVD, t_end-t_beg))
+    # !!!
+    err_RSVD = np.zeros( (rank) )
+    for i in range(rank):
+        err_RSVD[i] = 100.0 * abs( S_FSVD[i] - S_RSVD[i] ) / S_FSVD[i]
+    np.savetxt('results_python/h2o_pseudo/SingValues_RSVD.txt', np.transpose([ np.array(range(rank))+1, S_RSVD, err_RSVD ]), fmt=fmt, delimiter=' ')
+    # !!!
+    print("Truncated SVD (scipy):")
+    t_beg         = time.time()
+    U, S_TSVD, VT = svds(Y, min(trank, min(Y.shape[0],Y.shape[1])-1 ), which='LM')
+    t_end         = time.time()
+    rank          = S_TSVD.shape[0]
+    energy        = np.sum( np.square(S_TSVD) ) / normY**2
+    err_SVD       = 100. * np.linalg.norm( Y - np.dot(U, np.dot(np.diag(S_TSVD),VT) ), ord="fro") / normY
+    print('rank = {}, energy = {}, error = {} %, CPU time = {}\n'.format(rank, energy, err_SVD, t_end-t_beg))
+    # !!!
+    err_TSVD = np.zeros( (rank) )
+    for i in range(rank-1):
+        for j in range(i+1,rank):
+            if( S_TSVD[j] > S_TSVD[i]):
+                S_TSVD[i], S_TSVD[j] = S_TSVD[j], S_TSVD[i]
+    for i in range(rank):
+        err_TSVD[i] = 100.0 * abs( S_FSVD[i] - S_TSVD[i] ) / S_FSVD[i]
+    np.savetxt('results_python/h2o_pseudo/SingValues_TSVD.txt', np.transpose([ np.array(range(rank))+1, S_TSVD, err_TSVD ]), fmt=fmt, delimiter=' ')
+    # !!!
+# !!!
--- a/devel/svdwf/set_QP_svd.py
+++ b/devel/svdwf/set_QP_svd.py
@ -0,0 +1,223 @@
+# !!!
+import sys, os
+#QMCCHEM_PATH=os.environ["QMCCHEM_PATH"]
+#sys.path.insert(0,QMCCHEM_PATH+"/EZFIO/Python/")
+# !!!
+from ezfio import ezfio
+from math import sqrt
+from datetime import datetime
+import time
+import numpy as np
+import subprocess
+from scipy.linalg import eig, eigh
+from RSVD import powit_RSVD
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~
+def get_Hsvd_QP(Hsvd_qp_txt):
+    Hsvd_qp      = np.zeros( (n_svd,n_svd) )
+    Hsvd_qp_file = open(Hsvd_qp_txt, 'r')
+    for line in Hsvd_qp_file:
+        line         = line.split()
+        i            = int(line[0]) - 1
+        j            = int(line[1]) - 1
+        Hsvd_qp[i,j] = float(line[2])
+    return(Hsvd_qp)
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~
+
+
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~
+def get_Hpostsvd_QP(Hpostsvd_qp_txt):
+    Hpostsvd_qp      = np.zeros( (n_svd*n_svd,n_svd*n_svd) )
+    Hpostsvd_qp_file = open(Hpostsvd_qp_txt, 'r')
+    for line in Hpostsvd_qp_file:
+        line             = line.split()
+        i                = int(line[0]) - 1
+        j                = int(line[1]) - 1
+        Hpostsvd_qp[i,j] = float(line[2])
+    return(Hpostsvd_qp)
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~
+
+
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~
+def get_Esvd_QP(Hsvd_qp):
+    # symmetrise and diagonalise
+    aa = Hsvd_qp
+    aa = 0.5*( aa + aa.T )
+    bb = np.identity(n_svd)
+    #eigvals_svd, vr = eig(aa, bb, left=False, right=True, overwrite_a=True, overwrite_b=True,
+    eigvals_svd, vr = eig(aa, left=False, right=True, overwrite_a=True, overwrite_b=True,
+            check_finite=True, homogeneous_eigvals=False)
+    recouvre_svd = np.abs(psi_svd_coeff @ vr)
+    ind_gssvd = np.argmax(recouvre_svd)
+    E_svd = eigvals_svd[ind_gssvd] + E_toadd
+    return( E_svd, vr[:,ind_gssvd] )
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~
+def get_Epostsvd_QP(Hpostsvd_qp):
+    # symmetrise and diagonalise
+    aa = Hpostsvd_qp
+    aa = 0.5*( aa + aa.T )
+    bb = np.identity(n_svd*n_svd)
+    eigvals_postsvd, vr = eig(aa, bb, left=False, right=True, overwrite_a=True, overwrite_b=True,
+            check_finite=True, homogeneous_eigvals=False)
+    d_postsvd = np.diagflat(psi_svd_coeff)
+    d_postsvd = d_postsvd.reshape( (1,n_svd*n_svd) )
+    recouvre_postsvd = np.abs(d_postsvd @ vr)
+    ind_gspostsvd = np.argmax(recouvre_postsvd)
+    # !!!
+    E_postsvd = eigvals_postsvd[ind_gspostsvd] + E_toadd
+    # !!!
+    return( E_postsvd,  vr[:,ind_gspostsvd] )
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~
+
+
+
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~
+def SVD_postsvd(sigma_postsvd):
+    # !!!
+    print(' performing new SVD for the post SVD eigenvector:' )
+    # !!!
+    sigma_postsvd_mat = np.zeros( (n_svd,n_svd) )
+    for indc in range(1, n_svd**2+1):
+        if( ( indc % n_svd ) !=0 ):
+            kp = indc % n_svd
+        else:
+            kp = n_svd
+        indc1 = int( ( indc - kp ) / n_svd )
+        k     = indc1 % n_svd + 1
+        irow  = kp + (k-1)*n_svd - 1
+        sigma_postsvd_mat[kp-1][k-1] = sigma_postsvd[irow]
+    sigma_postsvd = sigma_postsvd_mat
+    print(sigma_postsvd[0:n_svd,0:n_svd])
+    # !!!
+    # construct the new matrix Y
+    Y     = np.dot( U_svd , np.dot(sigma_postsvd , V_svd.T) )
+    normY = np.linalg.norm(Y, ord='fro')
+    # !!!
+    # parameters of RSVD
+    rank        = n_svd
+    npow        = 10
+    nb_oversamp = 10
+    # !!!
+    # call RSV
+    U_postSVD, sigma_postsvd_diag, VT_postsvd = powit_RSVD(Y, rank, npow, nb_oversamp)
+    # !!!
+    # check precision
+    Y_SVD = np.dot( U_postSVD , np.dot( np.diag(sigma_postsvd_diag) , VT_postsvd ) )
+    energy = np.sum( np.square(sigma_postsvd_diag) ) / normY**2
+    err_SVD = 100. * np.linalg.norm( Y - Y_SVD, ord="fro") / normY
+    print(' energy = {}, error = {}\n'.format(energy, err_SVD))
+    # !!!
+    return(U_postSVD, sigma_postsvd_diag, VT_postsvd)
+    # !!!
+# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~
+
+
+
+
+
+if __name__ == '__main__':
+    t0 = time.time()
+    # !!!
+    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ #
+    EZFIO_file      = "/home/aammar/qp2/src/svdwf/2h2_cisd_nsvd20"
+    Hsvd_qp_txt     = 'H_QP_svd_2h2_nsvd20.txt'
+    Hpostsvd_qp_txt = 'H_QP_postsvd_2h2_nsvd20.txt'
+    # h2o
+    #E_toadd         =  9.194966082434476 #6.983610961797779
+    # 2h2
+    E_toadd         =  1.711353545183182
+    # f2
+    #E_toadd         =  30.35863309325590
+    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ #
+    # !!!
+    ezfio.set_file(EZFIO_file)
+    n_svd         = ezfio.get_spindeterminants_n_svd_coefs()
+    psi_svd_coeff = np.array(ezfio.get_spindeterminants_psi_svd_coefs())
+    U_svd         = np.array(ezfio.get_spindeterminants_psi_svd_alpha())
+    V_svd         = np.array(ezfio.get_spindeterminants_psi_svd_beta())
+    # !!!
+    U_svd = U_svd[0,:,:].T
+    V_svd = V_svd[0,:,:].T
+    # !!!
+    print(" Today's date:", datetime.now() )
+    print(" EZFIO file = {}".format(EZFIO_file))
+    print(" n_svd = {}\n".format(n_svd) )
+    # !!!
+    print(' initial svd coeff = {}\n'.format(psi_svd_coeff))
+    C_old   = np.dot( U_svd , np.dot( np.diagflat(psi_svd_coeff) , V_svd.T ) )
+    norm_C  = np.linalg.norm(C_old, ord="fro")
+    # !!!
+    read_QPsvd = 'y'
+    if( read_QPsvd == 'y' ):
+        Hsvd_qp      = get_Hsvd_QP(Hsvd_qp_txt)
+        E_svd_QP, sigma_svd_QP = get_Esvd_QP(Hsvd_qp)
+        print(' QP svd enegry = {}'.format(E_svd_QP) )
+        sigma_svd_QP = sigma_svd_QP * np.sign(sigma_svd_QP[0])
+        print(' QP svd coeff  = {}\n'.format(sigma_svd_QP))
+        # compare C_new and C_old
+        C_new   = np.dot( U_svd , np.dot( np.diagflat(sigma_svd_QP) , V_svd.T ) )
+        delta_C = 100. * np.linalg.norm(C_old-C_new, ord="fro") / norm_C
+        print(' Difference between C_old and C_new svd = {} %\n'.format(delta_C))
+        # !!!
+    read_QPpostsvd = 'y'
+    if( read_QPpostsvd == 'y' ):
+        Hpostsvd_qp      = get_Hpostsvd_QP(Hpostsvd_qp_txt)
+        E_postsvd_QP, sigma_postsvd_QP = get_Epostsvd_QP(Hpostsvd_qp)
+        print(' QP postsvd enegry = {}'.format(E_postsvd_QP) )
+        U_postSVD, sigma_postsvd_diag, Vt_postSVD = SVD_postsvd(sigma_postsvd_QP)
+        V_postSVD                                 = Vt_postSVD.T
+        print(' QP postsvd coeff  = {}\n'.format(sigma_postsvd_diag))
+        # compare C_new and C_old
+        C_new   = np.dot( U_postSVD , np.dot( np.diag(sigma_postsvd_diag) , Vt_postSVD ) )
+        delta_C_m = 100. * np.linalg.norm(C_old-C_new, ord="fro") / norm_C
+        delta_C_p = 100. * np.linalg.norm(C_old+C_new, ord="fro") / norm_C
+        delta_C   = min(delta_C_m,delta_C_p)
+        print(' Difference between C_old and C_new postsvd = {} %'.format(delta_C))
+        # !!!
+    # !!!
+    # ___________________________________________________________________________
+    # ___________________________________________________________________________
+    # 
+    save_to_EZFIO = ''
+    #
+    if( save_to_EZFIO == 'svd'):
+        ezfio.set_spindeterminants_psi_svd_coefs( sigma_svd_QP )
+        direc_svdcoeff = EZFIO_file + '/spindeterminants/psi_svd_coefs.gz'
+        print(' {} is modified'.format(direc_svdcoeff) )
+        # # #
+    elif( save_to_EZFIO == 'postsvd' ):
+        U_postSVD_toEZFIO        = np.zeros( ( 1, U_postSVD.shape[1], U_postSVD.shape[0] ) )
+        V_postSVD_toEZFIO        = np.zeros( ( 1, V_postSVD.shape[1], V_postSVD.shape[0] ) )
+        U_postSVD_toEZFIO[0,:,:] = U_postSVD.T
+        V_postSVD_toEZFIO[0,:,:] = V_postSVD.T
+        #
+        ezfio.set_spindeterminants_psi_svd_alpha( U_postSVD_toEZFIO )
+        ezfio.set_spindeterminants_psi_svd_coefs( sigma_postsvd_diag )
+        ezfio.set_spindeterminants_psi_svd_beta( V_postSVD_toEZFIO )
+        # 
+        direc_svdcoeff = EZFIO_file + '/spindeterminants/psi_svd_coefs.gz'
+        direc_svdalpha = EZFIO_file + '/spindeterminants/psi_svd_alpha.gz'
+        direc_svdbeta  = EZFIO_file + '/spindeterminants/psi_svd_beta.gz'
+        print(' {} is modified'.format(direc_svdcoeff) )
+        print(' {} is modified'.format(direc_svdalpha) )
+        print(' {} is modified'.format(direc_svdbeta ) )
+    else:
+        pass
+    # ___________________________________________________________________________
+    # ___________________________________________________________________________
+    # 
+    print("end after {:.3f} minutes".format((time.time()-t0)/60.) )
+    # !!!
+# !!!