diff --git a/devel/casscf/EZFIO.cfg b/devel/casscf/EZFIO.cfg index 95b032d..2a1f192 100644 --- a/devel/casscf/EZFIO.cfg +++ b/devel/casscf/EZFIO.cfg @@ -11,23 +11,23 @@ interface: ezfio size: (determinants.n_states) [state_following_casscf] -type: logical +type: logical doc: If |true|, the CASSCF will try to follow the guess CI vector and orbitals interface: ezfio,provider,ocaml default: False [diag_hess_cas] -type: logical +type: logical doc: If |true|, only the DIAGONAL part of the hessian is retained for the CASSCF interface: ezfio,provider,ocaml default: False [hess_cv_cv] -type: logical -doc: If |true|, the core-virtual - core-virtual part of the hessian is computed +type: logical +doc: If |true|, the core-virtual - core-virtual part of the hessian is computed interface: ezfio,provider,ocaml -default: True +default: True [level_shift_casscf] @@ -41,11 +41,35 @@ default: 0.005 type: logical doc: If true, the two-rdm are computed with a fast algo interface: ezfio,provider,ocaml -default: True +default: True [criterion_casscf] type: character*(32) -doc: choice of the criterion for the convergence of the casscf: can be energy or gradients +doc: choice of the criterion for the convergence of the casscf: can be energy or gradients or e_pt2 interface: ezfio, provider, ocaml -default: energy +default: e_pt2 +[thresh_casscf] +type: Threshold +doc: Threshold on the convergence of the CASCF energy. +interface: ezfio,provider,ocaml +default: 1.e-06 + + +[pt2_min_casscf] +type: Threshold +doc: Minimum value of the pt2_max parameter for the CIPSI in the CASSCF iterations. +interface: ezfio,provider,ocaml +default: 1.e-04 + +[n_big_act_orb] +type: integer +doc: Number of active orbitals from which the active space is considered as large, and therefore pt2_min_casscf is activated. +interface: ezfio,provider,ocaml +default: 16 + +[adaptive_pt2_max] +type: logical +doc: If |true|, the pt2_max value in the CIPSI iterations will automatically adapt, otherwise it is fixed at the value given in the EZFIO folder +interface: ezfio,provider,ocaml +default: True diff --git a/devel/casscf/casscf.irp.f b/devel/casscf/casscf.irp.f index 24f2f2f..39dc94d 100644 --- a/devel/casscf/casscf.irp.f +++ b/devel/casscf/casscf.irp.f @@ -6,17 +6,17 @@ program casscf call reorder_orbitals_for_casscf ! no_vvvv_integrals = .True. ! touch no_vvvv_integrals - pt2_max = 0.005 - SOFT_TOUCH pt2_max n_det_max_full = 500 touch n_det_max_full + pt2_relative_error = 0.02 + touch pt2_relative_error call run_stochastic_cipsi call run end subroutine run implicit none - double precision :: energy_old, energy + double precision :: energy_old, energy, pt2_max_before, ept2_before,delta_E logical :: converged,state_following_casscf_save integer :: iteration converged = .False. @@ -27,42 +27,73 @@ subroutine run state_following_casscf_save = state_following_casscf state_following_casscf = .True. touch state_following_casscf + ept2_before = 0.d0 + if(adaptive_pt2_max)then + pt2_max = 0.005 + SOFT_TOUCH pt2_max + endif do while (.not.converged) call run_stochastic_cipsi energy_old = energy energy = eone+etwo+ecore + pt2_max_before = pt2_max call write_time(6) call write_int(6,iteration,'CAS-SCF iteration = ') call write_double(6,energy,'CAS-SCF energy = ') + if(n_states == 1)then + double precision :: E_PT2, PT2 + call ezfio_get_casscf_energy_pt2(E_PT2) + call ezfio_get_casscf_energy(PT2) + PT2 -= E_PT2 + call write_double(6,E_PT2,'E + PT2 energy = ') + call write_double(6,PT2,' PT2 = ') + call write_double(6,pt2_max,' PT2_MAX = ') + endif + + print*,'' call write_double(6,norm_grad_vec2,'Norm of gradients = ') call write_double(6,norm_grad_vec2_tab(1), ' Core-active gradients = ') call write_double(6,norm_grad_vec2_tab(2), ' Core-virtual gradients = ') call write_double(6,norm_grad_vec2_tab(3), ' Active-virtual gradients = ') + print*,'' call write_double(6,energy_improvement, 'Predicted energy improvement = ') if(criterion_casscf == "energy")then converged = dabs(energy_improvement) < thresh_scf else if (criterion_casscf == "gradients")then converged = norm_grad_vec2 < thresh_scf - else - converged = dabs(energy_improvement) < thresh_scf + else if (criterion_casscf == "e_pt2")then + delta_E = dabs(E_PT2 - ept2_before) + converged = dabs(delta_E) < thresh_casscf endif - pt2_max = dabs(energy_improvement / pt2_relative_error) + ept2_before = E_PT2 + if(adaptive_pt2_max)then + pt2_max = dabs(energy_improvement / (pt2_relative_error)) + pt2_max = min(pt2_max, pt2_max_before) + if(n_act_orb.ge.n_big_act_orb)then + pt2_max = max(pt2_max,pt2_min_casscf) + endif + endif + print*,'' + call write_double(6,pt2_max, 'PT2_MAX for next iteration = ') mo_coef = NewOrbs - mo_occ = occnum + mo_occ = occnum call save_mos if(.not.converged)then iteration += 1 - N_det = max(N_det/2 ,N_states) + N_det = max(N_det/8 ,N_states) psi_det = psi_det_sorted psi_coef = psi_coef_sorted read_wf = .True. call clear_mo_map - SOFT_TOUCH mo_coef N_det pt2_max psi_det psi_coef + SOFT_TOUCH mo_coef N_det psi_det psi_coef + if(adaptive_pt2_max)then + SOFT_TOUCH pt2_max + endif if(iteration .gt. 3)then - state_following_casscf = state_following_casscf_save + state_following_casscf = state_following_casscf_save soft_touch state_following_casscf endif endif @@ -70,3 +101,5 @@ subroutine run enddo end + + diff --git a/devel/dmc_dress/EZFIO.cfg b/devel/dmc_dress/EZFIO.cfg new file mode 100644 index 0000000..88bc4ff --- /dev/null +++ b/devel/dmc_dress/EZFIO.cfg @@ -0,0 +1,6 @@ +[dmc_delta_h] +type: double precision +doc: Dressing matrix obtained from DMC +size: (determinants.n_det) +interface: ezfio, provider + diff --git a/devel/dmc_dress/NEED b/devel/dmc_dress/NEED new file mode 100644 index 0000000..657d6cf --- /dev/null +++ b/devel/dmc_dress/NEED @@ -0,0 +1,3 @@ +selectors_full +generators_full +davidson_dressed diff --git a/devel/dmc_dress/README.rst b/devel/dmc_dress/README.rst new file mode 100644 index 0000000..5b0de0c --- /dev/null +++ b/devel/dmc_dress/README.rst @@ -0,0 +1,4 @@ +========= +dmc_dress +========= + diff --git a/devel/dmc_dress/dmc_dress.irp.f b/devel/dmc_dress/dmc_dress.irp.f new file mode 100644 index 0000000..72a78b4 --- /dev/null +++ b/devel/dmc_dress/dmc_dress.irp.f @@ -0,0 +1,36 @@ +program dmc_dress + implicit none + BEGIN_DOC +! Program that extracts the lowest states of the Hamiltonian dressed by the QMC +! dressing vector stored in :option:`dmc_dressing dmc_delta_h` +! + END_DOC + read_wf = .True. + touch read_wf + call pre + call routine + call save_wavefunction_general(N_det,N_states,psi_det_sorted,size(psi_coef_sorted,1),psi_coef_sorted) +end + +subroutine pre + implicit none + double precision, allocatable :: left(:,:), right(:,:), tmp(:,:), res(:,:) + integer :: i + allocate (left(1,1:N_det), right(1:N_det,1), tmp(1:N_det,1), res(1,1)) + left(1,1:N_det) = psi_coef(1:N_det,1) + right(1:N_det,1) = psi_coef(1:N_det,1) + + tmp(1:N_det,1:1) = matmul(h_matrix_dressed(1:N_det,1:N_det), right(1:N_det,1:1)) + res(1:1,1:1) = matmul(left(1:1,1:N_det), tmp(1:N_det,1:1)) + print *, 'E_in = ', res(1,1) + do i=1,N_det + print *, 'HPsi/c0 = ', tmp(i,1)/psi_coef(i,1) + enddo +end +subroutine routine + implicit none + psi_coef(1:N_det,1) = ci_eigenvectors_dressed(1:N_det,1) + print*,'N_det = ',N_det + print *, 'E = ', ci_energy_dressed(1) + nuclear_repulsion + SOFT_TOUCH psi_coef +end diff --git a/devel/dmc_dress/dress.py b/devel/dmc_dress/dress.py new file mode 100755 index 0000000..53f585b --- /dev/null +++ b/devel/dmc_dress/dress.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +import numpy as np +import subprocess +import sys +import os + +QP_PATH=os.environ["QP_ROOT"]+"/external/ezfio/Python/" + +sys.path.insert(0,QP_PATH) + +from ezfio import ezfio + +def read_hamiltonian(inp): + text = subprocess.run(["qmcchem", "result", inp], capture_output=True).stdout + inside = None + h = [] + s = [] + norm = None + for line in text.splitlines(): + line = str(line) + print (line) + if "Psi_norm :" in line: + norm = float(line.split()[3]) + if "]" in line: + inside = None + if inside == "H": + data = line.split() + h.append(float(data[3])) + elif "Ci_dress" in line: + inside = "H" + h = np.array(h)/norm + + return h + + +h = read_hamiltonian(sys.argv[1]) +ezfio.set_file(sys.argv[1]) +ezfio.set_dmc_dress_dmc_delta_h(h) +print(h) diff --git a/devel/dmc_dress/dressing_vector.irp.f b/devel/dmc_dress/dressing_vector.irp.f new file mode 100644 index 0000000..71c59c8 --- /dev/null +++ b/devel/dmc_dress/dressing_vector.irp.f @@ -0,0 +1,33 @@ + BEGIN_PROVIDER [ double precision, dressing_column_h, (N_det,N_states) ] +&BEGIN_PROVIDER [ double precision, dressing_column_s, (N_det,N_states) ] + implicit none + BEGIN_DOC + ! \Delta_{state-specific}. \Psi + ! Diagonal element is divided by 2 because Delta = D + D^t + END_DOC + + integer :: i,ii,k,j, l + double precision :: f, tmp + double precision, allocatable :: delta(:) + + allocate(delta(N_det)) + delta(1:N_det) = dmc_delta_h(1:N_det) + + call dset_order(delta,psi_bilinear_matrix_order_reverse,N_det) + + dressing_column_h(:,:) = 0.d0 + dressing_column_s(:,:) = 0.d0 + + l = dressed_column_idx(1) + do j = 1, n_det + if (j == l) cycle + dressing_column_h(j,1) = delta(j) + dressing_column_h(l,1) -= psi_coef(j,1) * delta(j) / psi_coef(l,1) + enddo + dressing_column_h(l,1) += delta(l) + dressing_column_h(l,1) *= 0.5d0 + +END_PROVIDER + + + diff --git a/devel/fci_complete/generate_fci.irp.f b/devel/fci_complete/generate_fci.irp.f index d66988b..5ea3fc2 100644 --- a/devel/fci_complete/generate_fci.irp.f +++ b/devel/fci_complete/generate_fci.irp.f @@ -8,9 +8,6 @@ subroutine generate_fci_space integer(bit_kind) :: o(N_int,2) integer(bit_kind) :: u, coremask - if (mo_num > 64) then - stop 'No more than 64 MOs' - endif ncore = 0 coremask = 0_bit_kind @@ -23,13 +20,26 @@ subroutine generate_fci_space o(1,1) = iand(full_ijkl_bitmask(1),not(coremask)) o(1,2) = 0_bit_kind - call configuration_to_dets_size(o,n_det_alpha_unique,elec_alpha_num-ncore,N_int) + integer :: norb + norb = mo_num + do i=1,mo_num + if (trim(mo_class(i)) == 'Deleted') then + norb -= 1 + o(1,1) = ibclr(o(1,1) ,i-1) + endif + enddo + + if (norb > 64) then + stop 'No more than 64 MOs' + endif + + call configuration_to_dets_size(act_bitmask,n_det_alpha_unique,elec_alpha_num-ncore,N_int) TOUCH n_det_alpha_unique integer :: k,n,m, t, t1, t2 k=0 n = elec_alpha_num - m = mo_num - n + m = norb - n n = n u = shiftl(1_bit_kind,n) -1 @@ -49,12 +59,12 @@ IRP_ENDIF enddo - call configuration_to_dets_size(o,n_det_beta_unique,elec_beta_num-ncore,N_int) + call configuration_to_dets_size(act_bitmask,n_det_beta_unique,elec_beta_num-ncore,N_int) TOUCH n_det_beta_unique k=0 n = elec_beta_num - m = mo_num - n + m = norb - n u = shiftl(1_bit_kind,n) -1 do while (u < shiftl(1_bit_kind,n+m)) if (iand(coremask, u) == coremask) then diff --git a/devel/svdwf/Evar_TruncSVD.irp.f b/devel/svdwf/Evar_TruncSVD.irp.f index 62befde..4a43592 100644 --- a/devel/svdwf/Evar_TruncSVD.irp.f +++ b/devel/svdwf/Evar_TruncSVD.irp.f @@ -9,158 +9,443 @@ program Evar_TruncSVD call run() ! !!! end - - - - +! !!! subroutine run + ! !!! implicit none include 'constants.include.F' - double precision, allocatable :: A(:,:), U(:,:), V(:,:), D(:) - integer :: r, i, j, k, l, m, n, iter, iter_max - double precision, allocatable :: Z(:,:), P(:,:), Yt(:,:), UYt(:,:), r1(:,:) ! !!! - m = n_det_alpha_unique - n = n_det_beta_unique - r = n + integer :: m, n, i_state + double precision :: error_thr, error_RRRSVD, norm_psi, norm_SVD, err_verif, err_tmp + integer :: i, j, k, l, It, PowerIt + integer :: It_max, PowerIt_max, nb_oversamp + integer :: r_init, delta_r, low_rank + double precision, allocatable :: B_old(:,:), Q_old(:,:) + double precision, allocatable :: UB(:,:), D(:), Vt(:,:), U(:,:) + double precision, allocatable :: P(:,:), r1(:,:) + double precision, allocatable :: Q_new(:,:), Q_tmp(:,:), Q_mult(:,:) + double precision, allocatable :: B_new(:,:), B_tmp(:,:) + double precision, allocatable :: URSVD(:,:), DRSVD(:), VtRSVD(:,:) + double precision, allocatable :: Uverif(:,:), Dverif(:), Vtverif(:,:), Averif(:,:) + double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:) + double precision :: tmp + ! !!! + i_state = 1 + m = n_det_alpha_unique + n = n_det_beta_unique + ! !!! + !open(111, file = 'data_to_python.txt', action = 'WRITE' ) + ! do k = 1, N_det + ! write(111, '(I8, 5X, I8, 5X, E15.7)' ) psi_bilinear_matrix_rows(k), psi_bilinear_matrix_columns(k), psi_bilinear_matrix_values(k,i_state) + ! end do + !close(111) + ! !!! + ! !!! print *, 'matrix:', m,'x',n print *, 'N det:', N_det - print *, 'rank = ', r - iter_max = 20 ! !!! - allocate( Z(m,r) , P(n,r) ) ! Z(m,r) = A(m,n) @ P(n,r) - Z(:,:) = 0.d0 + r_init = 78 + delta_r = 5 ! !!! - ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! - ! first we apply a RSVD for a pre-fixed rank (r) - ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,r1) - allocate(r1(N_det,2)) + PowerIt_max = 10 + nb_oversamp = 10 + ! !!! + It_max = 10 + error_thr = 1.d-3 !! don't touche it but rather increase r_init + ! !!! + ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ! + ! !!! ~ Rank Revealing Randomized SVD ~ !!! ! + ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ! + ! !!! + norm_psi = 0.d0 + do k = 1, N_det + norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) * psi_bilinear_matrix_values(k,i_state) + enddo + ! !!! + ! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! ! + ! !!! build initial QB decomposition !!! ! + ! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! ! + ! !!! + allocate( Q_old(m, r_init) ) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,k,l,r1) + allocate( r1(N_det,2) ) !$OMP DO - do l=1,r - call random_number(r1) - r1(:,1) = dsqrt(-2.d0*dlog(r1(:,1))) - r1(:,1) = r1(:,1) * dcos(dtwo_pi*r1(:,2)) - do k=1,N_det - i = psi_bilinear_matrix_rows(k) - j = psi_bilinear_matrix_columns(k) - Z(i,l) = Z(i,l) + psi_bilinear_matrix_values(k,1) * r1(k,1) - enddo + do l = 1, r_init + Q_old(:,l) = 0.d0 + call random_number(r1) + r1(:,1) = dsqrt(-2.d0*dlog(r1(:,1))) + r1(:,1) = r1(:,1) * dcos(dtwo_pi*r1(:,2)) + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + Q_old(i,l) = Q_old(i,l) + psi_bilinear_matrix_values(k,i_state) * r1(k,1) + enddo enddo !$OMP END DO deallocate(r1) !$OMP END PARALLEL ! !!! - ! Power iterations + ! power scheme ! !!! - do iter=1,iter_max - ! !!! - print *, 'Power iteration ', iter, '/', 20 - ! !!! - ! P(n,r) = At(n,m) @ Z(m,r) - ! !!! - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l) - !$OMP DO - do l=1,r - P(:,l) = 0.d0 - do k=1,N_det - i = psi_bilinear_matrix_rows(k) - j = psi_bilinear_matrix_columns(k) - P(j,l) = P(j,l) + psi_bilinear_matrix_values(k,1) * Z(i,l) + allocate( P(n, r_init) ) + do PowerIt = 1, PowerIt_max + ! !!! + call my_ortho_qr(Q_old, size(Q_old,1), m, r_init) + ! !!! + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l) + !$OMP DO + do l = 1, r_init + P(:,l) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + P(j,l) = P(j,l) + psi_bilinear_matrix_values(k,i_state) * Q_old(i,l) + enddo enddo - enddo - !$OMP END DO - ! !!! - ! Z(m,r) = A(m,n) @ P(n,r) - ! !!! - !$OMP BARRIER - !$OMP DO - do l=1,r - Z(:,l) = 0.d0 - do k=1,N_det - i = psi_bilinear_matrix_rows(k) - j = psi_bilinear_matrix_columns(k) - Z(i,l) = Z(i,l) + psi_bilinear_matrix_values(k,1) * P(j,l) + !$OMP END DO + !$OMP END PARALLEL + ! !!! + call my_ortho_qr(P, size(P,1), n, r_init) + ! !!! + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l) + !$OMP DO + do l = 1, r_init + Q_old(:,l) = 0.d0 + do k = 1 , N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Q_old(i,l) = Q_old(i,l) + psi_bilinear_matrix_values(k,i_state) * P(j,l) + enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - ! !!! - ! Compute QR: at return: Q is Z(m,r) - ! !!! - call ortho_qr(Z,size(Z,1),m,r) - ! !!! + !$OMP END DO + !$OMP END PARALLEL + ! !!! enddo + deallocate( P ) ! !!! - ! Y(r,n) = Zt(r,m) @ A(m,n) or Yt(n,r) = At(n,m) @ Z(m,r) + call my_ortho_qr(Q_old, size(Q_old,1), m, r_init) ! !!! - allocate(Yt(n,r)) + allocate( B_old(r_init,n) ) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l) !$OMP DO - do l=1,r - do k=1,n - Yt(k,l) = 0.d0 + do l = 1, r_init + B_old(l,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + B_old(l,j) = B_old(l,j) + Q_old(i,l) * psi_bilinear_matrix_values(k,i_state) enddo - do k=1,N_det - i = psi_bilinear_matrix_rows(k) - j = psi_bilinear_matrix_columns(k) - Yt(j,l) = Yt(j,l) + Z(i,l) * psi_bilinear_matrix_values(k,1) - enddo enddo !$OMP END DO !$OMP END PARALLEL + norm_SVD = 0.d0 + do j = 1, n + do l = 1, r_init + norm_SVD = norm_SVD + B_old(l,j) * B_old(l,j) + enddo + enddo + error_RRRSVD = dabs( norm_psi - norm_SVD ) / norm_psi + It = 1 + low_rank = r_init + print *, It, low_rank, error_RRRSVD ! !!! - ! Y = UY @ D @ Vt or Yt = V @ Dt @ UYt + ! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! ! + ! !!! incrementally build up QB decomposition !!! ! + ! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! ! ! !!! - allocate(D(r),V(n,r), UYt(r,r)) - ! !!! - call svd(Yt,size(Yt,1),V,size(V,1),D,UYt,size(UYt,1),n,r) - deallocate(Yt) - ! !!! - ! U(m,r) = Z(m,r) @ UY(r,r) or U = Z @ (UYt).T - ! !!! - allocate(U(m,r)) - call dgemm('N','T',m,r,r,1.d0,Z,size(Z,1),UYt,size(UYt,1),0.d0,U,size(U,1)) - deallocate(UYt,Z) - ! !!! - !do i=1,r - ! print *, i, real(D(i)), real(D(i)**2), real(sum(D(1:i)**2)) - ! if (D(i) < 1.d-15) then - ! k = i - ! exit - ! endif - !enddo - !print *, 'threshold: ', 2.858 * D(k/2) - ! !!! - ! Build the new determinant: U @ D @ Vt - ! !!! - !!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l) - !!$OMP DO - !! - !print *, 'ok 1' - !N_det = m * n - !print *, 'ok 11' - !TOUCH N_det - !psi_bilinear_matrix_values(:,1) = 0.d0 - !TOUCH psi_bilinear_matrix_values - ! print *, size(psi_bilinear_matrix_values,1), size(D), size(U,1), size(U,2), size(V,1), size(V,2) - print*, PSI_energy(1) + nuclear_repulsion - psi_bilinear_matrix(:,:,:) = 0.d0 - do r = 1, n - call generate_all_alpha_beta_det_products - do i = 1, N_det_beta_unique - do j = 1, N_det_alpha_unique - psi_bilinear_matrix(j,i,1) = 0.d0 - do l = 1, r - psi_bilinear_matrix(j,i,1) = psi_bilinear_matrix(j,i,1) + D(l) * U(j,l) * V(i,l) - enddo + do while( ( error_RRRSVD.gt.error_thr ).and.( It.lt.It_max ).and.( low_rank.lt.(min(m,n)-delta_r) ) ) + ! !!! + allocate( Q_new(m,delta_r) ) + allocate( r1(N_det, 2) ) + do l = 1, delta_r + Q_new(:,l) = 0.d0 + call random_number(r1) + r1(:,1) = dsqrt(-2.d0*dlog(r1(:,1))) + r1(:,1) = r1(:,1) * dcos(dtwo_pi*r1(:,2)) + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + Q_new(i,l) = Q_new(i,l) + psi_bilinear_matrix_values(k,i_state) * r1(k,1) enddo enddo - TOUCH psi_bilinear_matrix - call update_wf_of_psi_bilinear_matrix(.False.) - print*, r, PSI_energy(1) + nuclear_repulsion, s2_values(1) !CI_energy(1) - call save_wavefunction() + deallocate(r1) + ! !!! + ! orthogonalization with Q_old: Q_new = Q_new - Q_old @ Q_old.T @ Q_new + ! !!! + !allocate( Q_mult(m,m) ) + !call dgemm( 'N', 'T', m, m, low_rank, +1.d0, Q_old, size(Q_old,1), Q_old, size(Q_old,1), 0.d0, Q_mult, size(Q_mult,1) ) + !!do i = 1, m + !! do j = 1, m + !! Q_mult(j,i) = 0.d0 + !! do l = 1, low_rank + !! Q_mult(j,i) = Q_mult(j,i) + Q_old(i,l) * Q_old(j,l) + !! enddo + !! enddo + !!enddo + !!call dgemm( 'N', 'N', m, delta_r, m, -1.d0, Q_mult, size(Q_mult,1), Q_new, size(Q_new,1), 1.d0, Q_new, size(Q_new,1) ) + !do l = 1, delta_r + ! do i = 1, m + ! tmp = 0.d0 + ! do j = 1, m + ! tmp = tmp + Q_mult(i,j) * Q_new(j,l) + ! enddo + ! Q_new(i,l) = Q_new(i,l) - tmp + ! enddo + !enddo + !deallocate( Q_mult ) + ! !!! + ! power scheme + ! !!! + allocate( P(n, delta_r) ) + do PowerIt = 1, PowerIt_max + ! !!! + call my_ortho_qr(Q_new, size(Q_new,1), m, delta_r) + ! !!! + do l = 1, delta_r + P(:,l) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + P(j,l) = P(j,l) + psi_bilinear_matrix_values(k,i_state) * Q_new(i,l) + enddo + enddo + ! !!! + call my_ortho_qr(P, size(P,1), n, delta_r) + ! !!! + do l = 1, delta_r + Q_new(:,l) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Q_new(i,l) = Q_new(i,l) + psi_bilinear_matrix_values(k,i_state) * P(j,l) + enddo + enddo + ! !!! + enddo + deallocate( P ) + ! !!! + ! orthogonalization with Q_old: Q_new = Q_new - Q_old @ Q_old.T @ Q_new + ! !!! + allocate( Q_mult(m,m) ) + call dgemm( 'N', 'T', m, m, low_rank, +1.d0, Q_old, size(Q_old,1), Q_old, size(Q_old,1), 0.d0, Q_mult, size(Q_mult,1) ) + !do i = 1, m + ! do j = 1, m + ! Q_mult(j,i) = 0.d0 + ! do l = 1, low_rank + ! Q_mult(j,i) = Q_mult(j,i) + Q_old(i,l) * Q_old(j,l) + ! enddo + ! enddo + !enddo + !call dgemm( 'N', 'N', m, delta_r, m, -1.d0, Q_mult, size(Q_mult,1), Q_new, size(Q_new,1), 1.d0, Q_new, size(Q_new,1) ) + do l = 1, delta_r + do i = 1, m + tmp = 0.d0 + do j = 1, m + tmp = tmp + Q_mult(i,j) * Q_new(j,l) + enddo + Q_new(i,l) = Q_new(i,l) - tmp + enddo + enddo + deallocate( Q_mult ) + ! !!! + call my_ortho_qr(Q_new, size(Q_new,1), m, delta_r) + ! !!! + allocate( B_new(delta_r,n) ) + do l = 1 , delta_r + B_new(l,:) = 0.d0 + do k = 1 , N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + B_new(l,j) = B_new(l,j) + Q_new(i,l) * psi_bilinear_matrix_values(k,i_state) + enddo + enddo + ! !!! + do j = 1, n + do l = 1, delta_r + norm_SVD = norm_SVD + B_new(l,j) * B_new(l,j) + enddo + enddo + ! !!! + error_RRRSVD = dabs( norm_psi - norm_SVD ) / norm_psi + It = It + 1 + low_rank = low_rank + delta_r + ! !!! + ! build up approximate basis: + ! Q_old = np.append(Q_new, Q_old, axis=1) + ! B = np.append(B_new, B, axis=0) + ! !!! + allocate( Q_tmp(m,low_rank) , B_tmp(low_rank,n) ) + ! !!! + !do l = 1, delta_r + ! do i = 1, m + ! Q_tmp(i,l) = Q_new(i,l) + ! enddo + !enddo + !do l = 1 , low_rank-delta_r + ! do i = 1, m + ! Q_tmp(i,l+delta_r) = Q_old(i,l) + ! enddo + !enddo + !do i = 1, n + ! do l = 1 , delta_r + ! B_tmp(l,i) = B_new(l,i) + ! enddo + !enddo + !do i = 1, n + ! do l = 1 , low_rank-delta_r + ! B_tmp(l+delta_r,i) = B_old(l,i) + ! enddo + !enddo + ! !!! + do i = 1, m + do l = 1, low_rank-delta_r + Q_tmp(i,l) = Q_old(i,l) + enddo + do l = 1, delta_r + Q_tmp(i,l+low_rank-delta_r) = Q_new(i,l) + enddo + enddo + do i = 1, n + do l = 1 , low_rank-delta_r + B_tmp(l,i) = B_old(l,i) + enddo + do l = 1 , delta_r + B_tmp(l+low_rank-delta_r,i) = B_new(l,i) + enddo + enddo + ! !!! + deallocate( Q_old, B_old, Q_new, B_new ) + allocate( Q_old(m,low_rank) , B_old(low_rank,n) ) + ! !!! + do l = 1, low_rank + do i = 1, m + Q_old(i,l) = Q_tmp(i,l) + enddo + enddo + do l = 1, n + do i = 1, low_rank + B_old(i,l) = B_tmp(i,l) + enddo + enddo + deallocate( Q_tmp , B_tmp ) + ! !!! + print *, It, low_rank, error_RRRSVD + ! !!! enddo - deallocate(U,D,V) + ! !!! + ! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! ! + ! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! ! + ! !!! + allocate( UB(low_rank,low_rank), D(low_rank), Vt(low_rank,n) ) + call svd_s(B_old, size(B_old,1), UB, size(UB,1), D, Vt, size(Vt,1), low_rank, n) + deallocate(B_old) + print*, 'ok 1' + ! !!! + allocate( U(m,low_rank) ) + call dgemm('N', 'N', m, low_rank, low_rank, 1.d0, Q_old, size(Q_old,1), UB, size(UB,1), 0.d0, U, size(U,1)) + deallocate( Q_old,UB ) + print*, 'ok 2' + ! !!! + ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ! + ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ! + ! !!! + allocate( URSVD(m,low_rank), DRSVD(low_rank), VtRSVD(low_rank,n) ) + call RSVD( i_state, low_rank, PowerIt_max, nb_oversamp, URSVD, DRSVD, VtRSVD ) + print*, 'ok 3' + ! !!! + ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ! + ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ! + ! !!! + allocate( Averif(m,n), Uverif(m,m), Dverif(min(m,n)), Vtverif(n,n) ) + do i = 1, n + Averif(:,i) = 1d-16 + enddo + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Averif(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + call svd_s( Averif, size(Averif,1), Uverif, size(Uverif,1), Dverif, Vtverif, size(Vtverif,1), m, n) + print*, 'ok 4' + ! !!! + err_verif = 0.d0 + do j = 1, n + do i = 1, m + err_tmp = 0.d0 + do l = 1, low_rank + err_tmp = err_tmp + Dverif(l) * Uverif(i,l) * Vtverif(l,j) + enddo + err_verif = err_verif + ( Averif(i,j) - err_tmp )**2.d0 + enddo + enddo + print*, 'err verif (%) = ', 100.d0 * dsqrt(err_verif/norm_psi) + ! !!! + ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ! + ! !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ~ !!! ! + ! !!! + open(111, file = 'singular_values.txt', action = 'WRITE' ) + do i = 1, low_rank + write(111, '(I8, 5X, E15.7, 2(5X, E15.7, E15.7) )' ) i, Dverif(i), D(i), 100.d0*dabs(D(i)-Dverif(i))/Dverif(i), DRSVD(i), 100.d0*dabs(DRSVD(i)-Dverif(i))/Dverif(i) + end do + close(111) + ! !!! + !deallocate( Averif, Uverif, Dverif, Vtverif ) + ! !!! + low_rank = 10 + ! !!! + err_verif = 0.d0 + do j = 1, n + do i = 1, m + err_tmp = 0.d0 + do l = 1, low_rank + err_tmp = err_tmp + Dverif(l) * Uverif(i,l) * Vtverif(l,j) + enddo + err_verif = err_verif + ( Averif(i,j) - err_tmp )**2.d0 + enddo + enddo + print*, 'err verif (%) = ', 100.d0 * dsqrt(err_verif/norm_psi) + ! !!! + print*, 'low_rank =', low_rank + allocate(Uezfio(m,low_rank,1), Dezfio(low_rank,1), Vezfio(n,low_rank,1)) + do l = 1, low_rank + Dezfio(l,1) = Dverif(l) + do j = 1, m + Uezfio(j,l,1) = Uverif(j,l) + enddo + do j = 1, n + Vezfio(j,l,1) = Vtverif(l,j) + enddo + enddo + deallocate( U, D, Vt ) + ! !!! + call ezfio_set_spindeterminants_n_det(N_det) + call ezfio_set_spindeterminants_n_states(N_states) + call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique) + call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique) + call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows) + call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns) + call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values) + ! !!! + call ezfio_set_spindeterminants_n_svd_coefs(low_rank) + call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio) + call ezfio_set_spindeterminants_psi_svd_beta(Vezfio ) + call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio) + deallocate(Uezfio, Dezfio, Vezfio) + ! !!! + !print*, PSI_energy(1) + nuclear_repulsion + !psi_bilinear_matrix(:,:,:) = 0.d0 + !do low_rank = n, n + ! call generate_all_alpha_beta_det_products + ! do i = 1, N_det_beta_unique + ! do j = 1, N_det_alpha_unique + ! psi_bilinear_matrix(j,i,1) = 0.d0 + ! do l = 1, r + ! psi_bilinear_matrix(j,i,1) = psi_bilinear_matrix(j,i,1) + D(l) * U(j,l) * V(i,l) + ! enddo + ! enddo + ! enddo + ! TOUCH psi_bilinear_matrix + ! call update_wf_of_psi_bilinear_matrix(.False.) + ! print*, low_rank, PSI_energy(1) + nuclear_repulsion !CI_energy(1) + !enddo + !deallocate(U,D,V) ! !!! end diff --git a/devel/svdwf/FSVD_trunc.irp.f b/devel/svdwf/FSVD_trunc.irp.f new file mode 100644 index 0000000..3abf6a8 --- /dev/null +++ b/devel/svdwf/FSVD_trunc.irp.f @@ -0,0 +1,97 @@ + +program FSVD_trunc + implicit none + BEGIN_DOC + ! study precision variation with truncated SVD + END_DOC + read_wf = .True. + TOUCH read_wf + ! !!! + call run() + ! !!! +end + + +subroutine run + + implicit none + + integer :: mm, nn, i_state, low_rank, lrank_min, lrank_max + integer :: i, j, k, l + double precision :: norm_psi, err_verif, err_tmp + double precision, allocatable :: U_FSVD(:,:), D_FSVD(:), Vt_FSVD(:,:), A_FSVD(:,:) + double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:) + + i_state = 1 + mm = n_det_alpha_unique + nn = n_det_beta_unique + + print *, ' matrix dimensions:', mm,'x',nn + print *, ' N det:', N_det + + allocate( A_FSVD(mm,nn), U_FSVD(mm,mm), D_FSVD(min(mm,nn)), Vt_FSVD(nn,nn) ) + + norm_psi = 0.d0 + A_FSVD(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + A_FSVD(i,j) = psi_bilinear_matrix_values(k,i_state) + norm_psi += psi_bilinear_matrix_values(k,i_state) * psi_bilinear_matrix_values(k,i_state) + enddo + + call svd_s( A_FSVD, size(A_FSVD,1), U_FSVD, size(U_FSVD,1), D_FSVD, Vt_FSVD, size(Vt_FSVD,1), mm, nn) + print *, ' --- Full SVD: ok --- ' + + !lrank_min = 100 + !lrank_max = nn + !do low_rank = lrank_min, lrank_max, 1 + ! err_verif = 0.d0 + ! do j = 1, nn + ! do i = 1, mm + ! err_tmp = 0.d0 + ! do l = 1, low_rank + ! err_tmp = err_tmp + D_FSVD(l) * U_FSVD(i,l) * Vt_FSVD(l,j) + ! enddo + ! err_verif = err_verif + (A_FSVD(i,j)-err_tmp) * (A_FSVD(i,j)-err_tmp) + ! enddo + ! enddo + ! print*, ' low rank = ', low_rank + ! print*, ' err verif (%) = ', 100.d0 * dsqrt(err_verif/norm_psi) + !enddo + + ! ------------------------------------------------------------------------------------------------ + ! set to EZFIO for a fixed low rank + + low_rank = min(mm,nn) + allocate( Uezfio(mm,low_rank,1), Dezfio(low_rank,1), Vezfio(nn,low_rank,1)) + + do l = 1, low_rank + Dezfio(l,1) = D_FSVD(l) + do j = 1, mm + Uezfio(j,l,1) = U_FSVD(j,l) + enddo + do j = 1, nn + Vezfio(j,l,1) = Vt_FSVD(l,j) + enddo + enddo + + !call ezfio_set_spindeterminants_n_det(N_det) + !call ezfio_set_spindeterminants_n_states(N_states) + !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique) + !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique) + !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows) + !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns) + !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values) + + call ezfio_set_spindeterminants_n_svd_coefs(low_rank) + call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio) + call ezfio_set_spindeterminants_psi_svd_beta(Vezfio ) + call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio) + + ! ------------------------------------------------------------------------------------------------ + + deallocate( Uezfio, Dezfio, Vezfio ) + deallocate( U_FSVD, D_FSVD, Vt_FSVD ) + +end diff --git a/devel/svdwf/NEED b/devel/svdwf/NEED index 8d89a45..49488c9 100644 --- a/devel/svdwf/NEED +++ b/devel/svdwf/NEED @@ -1,2 +1,2 @@ determinants -davidson_undressed +davidson_undressed diff --git a/devel/svdwf/QR.py b/devel/svdwf/QR.py new file mode 100644 index 0000000..262ee4e --- /dev/null +++ b/devel/svdwf/QR.py @@ -0,0 +1,10 @@ +# !!! +import numpy as np +# !!! +def QR_fact(X): + Q, R = np.linalg.qr(X, mode="reduced") + D = np.diag( np.sign( np.diag(R) ) ) + Qunique = np.dot(Q,D) + #Runique = np.dot(D,R) + return(Qunique) +# !!! \ No newline at end of file diff --git a/devel/svdwf/R3SVD_AMMAR.py b/devel/svdwf/R3SVD_AMMAR.py new file mode 100644 index 0000000..6a719b7 --- /dev/null +++ b/devel/svdwf/R3SVD_AMMAR.py @@ -0,0 +1,68 @@ +# !!! +import numpy as np +from QR import QR_fact +from RSVD import powit_RSVD +# !!! +def R3SVD_AMMAR(A, t, delta_t, npow, nb_oversamp, err_thr, maxit, tol): + # !!! + # build initial QB decomposition + # !!! + n = A.shape[1] + G = np.random.randn(n, t) + normA = np.linalg.norm(A, ord='fro')**2 + i_it = 0 + rank = 0 + Y = np.dot(A,G) + # The power scheme + for j in range(npow): + Q = QR_fact(Y) + Q = QR_fact( np.dot(A.T,Q) ) + Y = np.dot(A,Q) + # orthogonalization process + Q_old = QR_fact(Y) + B = np.dot(Q_old.T,A) + normB = np.linalg.norm(B, ord='fro')**2 + # error percentage + errpr = abs( normA - normB ) / normA + rank += t + i_it += 1 + print("iteration = {}, rank = {}, error = {}".format(i_it, rank, errpr)) + # !!! + # incrementally build up QB decomposition + # !!! + while ( (errpr>err_thr) and (i_iterr_thr) and (i_it + norm_psi = 0.d0 + do i = 1, n_selected + norm_psi += Dref(i) * Dref(i) + enddo + norm_psi = 1.d0 / dsqrt(norm_psi) + do i = 1, n_selected + coeff_psi_selected(i) = Dref(i) * norm_psi + enddo + + ! H0(i,j) = < u_i v_j | H | u_i v_j > + print *, '' + print *, '' + print *, '' + print *, '-- Compute H --' + allocate( H0(n_selected,n_selected) ) + call const_psiHpsi(n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0) + + ! avant SVD + ! E0 = < psi_0 | H | psi_0 > / < psi_0 | psi_0 > + E0 = 0.d0 + do i = 1, n_selected + ii = numalpha_selected(i) + htmp = 0.d0 + do j = 1, n_selected + jj = numalpha_selected(j) + htmp = htmp + coeff_psi_selected(j) * H0(jj,ii) + enddo + E0 = E0 + htmp * coeff_psi_selected(i) + enddo + E0 = E0 + nuclear_repulsion + print *,' E0 (avant SVD) =', E0 + + deallocate( H0 ) + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! nondiagonal elements + + print *, ' --- Perturbation --- ' + + allocate( Hdiag(n_toselect), Hkl(n_selected,n_toselect) ) + call const_Hdiag_Hkl(n_selected, n_toselect, Uref, Vref & + , numalpha_selected, numbeta_selected, numalpha_toselect, numbeta_toselect, Hdiag, Hkl) + + ! evaluate the coefficients for all the vectors + allocate( coeff_psi_toselect(n_toselect) ) + ept2 = 0.d0 + do ii = 1, n_toselect + ctmp = 0.d0 + do l = 1, n_selected + ctmp += coeff_psi_selected(l) * Hkl(l,ii) + enddo + coeff_psi_toselect(ii) = ctmp / ( E0 - (Hdiag(ii)+nuclear_repulsion) ) + ept2 += ctmp * ctmp / ( E0 - (Hdiag(ii)+nuclear_repulsion) ) + enddo + E0pt2 = E0 + ept2 + deallocate( Hdiag, Hkl) + + print *, ' perturb energy = ', E0pt2, ept2 + print*, ' delta E0 = ', E0pt2 - E0_old + tol_energy = 100.d0 * dabs(E0pt2-E0_old)/dabs(E0pt2) + E0_old = E0pt2 + + ! normalize the new psi and perform a new SVD + norm_psi = 0.d0 + do l = 1, n_toselect + norm_psi = norm_psi + coeff_psi_toselect(l)*coeff_psi_toselect(l) + enddo + norm_psi = norm_psi + 1.d0 + norm_psi = 1.d0 / dsqrt(norm_psi) + do i = 1, n_toselect + coeff_psi_toselect(i) = coeff_psi_toselect(i) * norm_psi + enddo + do i = 1, n_selected + coeff_psi_selected(i) = coeff_psi_selected(i) * norm_psi + enddo + + print *, ' --- SVD --- ' + call perform_newSVD(n_selected, n_toselect, numalpha_selected, numbeta_selected & + , numalpha_toselect, numbeta_toselect, coeff_psi_selected, coeff_psi_toselect & + , Uref, Vref, Dref ) + + ! --------------------------------------------------------------------------------------- + + deallocate( coeff_psi_toselect ) + deallocate( coeff_psi_selected ) + + + write(55,'(i5,4x,4(f22.15,2x))') it_svd, E0, E0pt2 + + call CPU_TIME(CPU_tend_it) + call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir) + CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it + W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8) + speedup_it = CPU_tot_time_it / W_tot_time_it + print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & + & 3X, "CPU time = ", 1PE10.3, " min.", /, & + & 3X, "speed up = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it + + !print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +' + !print*, ' ' + + end do + !________________________________________________________________________________________________________ + !________________________________________________________________________________________________________ + + + deallocate( Uref, Vref, Dref ) + + + call CPU_TIME(CPU_tend) + call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) + CPU_tot_time = CPU_tend - CPU_tbeg + W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) + speedup = CPU_tot_time / W_tot_time + print *,' ___________________________________________________________________' + print '(//,3X,"Execution avec ",i2," threads")',nb_taches + print *,' ___________________________________________________________________' + print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & + & 3X, "CPU time = ", 1PE10.3, " min.", /, & + & 3X, "speed up = ", 1PE10.3 ,//)', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup + + +end + + + + + + + +subroutine const_psiHpsi(n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_selected + integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H0(n_selected,n_selected) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree + + integer :: i, j, k, l + integer :: ii0, jj0, ii, jj, n, m, np, mp + double precision :: h12, x + + H0(:,:) = 0.d0 + + do i = 1, n_det_alpha_unique + det1(:,1) = psi_det_alpha_unique(:,i) + do k = 1, n_det_alpha_unique + det2(:,1) = psi_det_alpha_unique(:,k) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + if (degree .gt. 2) then + cycle + endif + do j = 1, n_det_beta_unique + det1(:,2) = psi_det_beta_unique(:,j) + do l = 1, n_det_beta_unique + det2(:,2) = psi_det_beta_unique(:,l) + call get_excitation_degree(det1,det2,degree,N_int) + if (degree .gt. 2) then + cycle + endif + ! !!! + call i_H_j(det1, det2, N_int, h12) + ! !!! + ! ~~~ H0 ~~~ + do n = 1, n_selected + ii0 = numalpha_selected(n) + jj0 = numbeta_selected (n) + x = Uref(k,ii0) * Vref(l,jj0) * h12 + do m = 1, n_selected + ii = numalpha_selected(m) + jj = numbeta_selected (m) + H0(m,n) += Uref(i,ii) * Vref(j,jj) * x + enddo + enddo + ! ~~~ ~~~~~~ ~~~ + enddo + enddo + enddo + enddo + +end subroutine const_psiHpsi + + + + + +subroutine const_Hdiag_Hkl(n_selected, n_toselect, Uref, Vref & + , numalpha_selected, numbeta_selected, numalpha_toselect, numbeta_toselect, Hdiag, Hkl) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_selected,n_toselect + integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: Hdiag(n_toselect), Hkl(n_selected,n_toselect) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree + integer :: i, j, k, l + integer :: ii0, jj0, ii, jj, n, m, np, mp + double precision :: h12, y + double precision, allocatable :: Hdiag_tmp(:), Hkl_tmp(:,:) + + Hdiag(:) = 0.d0 + Hkl(:,:) = 0.d0 + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(n,ii0,jj0,y,m,ii,jj,i,j,k,l,h12,det1,det2,Hdiag_tmp,Hkl_tmp,degree) & + !$OMP SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,n_selected,n_toselect,Uref,Vref,numalpha_toselect,numbeta_toselect, & + !$OMP numalpha_selected, numbeta_selected,Hkl,Hdiag ) + allocate( Hdiag_tmp(n_toselect), Hkl_tmp(n_selected,n_toselect) ) + Hdiag_tmp(:) = 0.d0 + Hkl_tmp(:,:) = 0.d0 + !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8) + do i = 1, n_det_alpha_unique + do k = 1, n_det_alpha_unique + det1(:,1) = psi_det_alpha_unique(:,i) + det2(:,1) = psi_det_alpha_unique(:,k) + ! !!! + ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + if (degree .gt. 2) then + cycle + endif + ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + ! !!! + do j = 1, n_det_beta_unique + det1(:,2) = psi_det_beta_unique(:,j) + do l = 1, n_det_beta_unique + det2(:,2) = psi_det_beta_unique(:,l) + ! !!! + ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + call get_excitation_degree(det1,det2,degree,N_int) + if (degree .gt. 2) then + cycle + endif + ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + ! !!! + call i_H_j(det1, det2, N_int, h12) + ! ~ ~ ~ H ~ ~ ~ + do n = 1, n_toselect + ii0 = numalpha_toselect(n) + jj0 = numbeta_toselect (n) + y = Uref(k,ii0) * Vref(l,jj0) * h12 + ! Hdiag + Hdiag_tmp(n) += Uref(i,ii0) * Vref(j,jj0) * y + do m = 1, n_selected + ii = numalpha_selected(m) + jj = numbeta_selected (m) + ! Hkl + Hkl_tmp(m,n) += Uref(i,ii) * Vref(j,jj) * y + enddo + enddo + ! ~ ~ ~ ! ! ! ~ ~ ~ + enddo + enddo + ! !!! + enddo + enddo + !$OMP END DO + !$OMP CRITICAL + do n = 1, n_toselect + Hdiag(n) += Hdiag_tmp(n) + do m = 1, n_selected + Hkl(m,n) += Hkl_tmp(m,n) + enddo + enddo + !$OMP END CRITICAL + deallocate( Hdiag_tmp,Hkl_tmp ) + !$OMP END PARALLEL + +end subroutine const_Hdiag_Hkl + + + + + + +subroutine perform_newSVD(n_selected, n_toselect, numalpha_selected, numbeta_selected & + , numalpha_toselect, numbeta_toselect, coeff_psi_selected, coeff_psi_toselect & + , Uref, Vref, Dref ) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_selected, n_toselect + integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: coeff_psi_selected(n_selected), coeff_psi_toselect(n_toselect) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(inout) :: Dref(n_det_beta_unique) + + integer :: mm, nn, i, j, ii0, ii, l, jj + double precision :: err0, err_norm, err_tmp + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(n_det_alpha_unique,n_det_beta_unique) ) + allocate( V_svd(n_det_beta_unique ,n_det_beta_unique) ) + allocate( S_mat(n_det_beta_unique ,n_det_beta_unique) ) + + U_svd(:,:) = Uref(:,:) + V_svd(:,:) = Vref(:,:) + + S_mat(:,:) = 0.d0 + do l = 1, n_selected + ii = numalpha_selected(l) + jj = numbeta_selected (l) + S_mat(ii,jj) = coeff_psi_selected(l) + enddo + do l = 1, n_toselect + ii = numalpha_toselect(l) + jj = numbeta_toselect (l) + S_mat(ii,jj) = coeff_psi_toselect(l) + enddo + + ! construct the new matrix: U_svd x S_mat x transpose(V_svd) + ! (NaxNb) (NbxNb) transpose(NbxNb) + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(nn,nn) ) + call dgemm( 'N', 'T', nn, nn, nn, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, nn, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + + ! perform new SVD + allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) ) + call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd & + , Vt_newsvd, size(Vt_newsvd,1), mm, nn) + + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + print *, ' +++ new SVD is performed +++ ' + + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + + ! check SVD error + err0 = 0.d0 + err_norm = 0.d0 + do j = 1, nn + do i = 1, mm + err_tmp = 0.d0 + do l = 1, mm + err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l) + enddo + err_tmp = A_newsvd(i,j) - err_tmp + err0 += err_tmp * err_tmp + err_norm += A_newsvd(i,j) * A_newsvd(i,j) + enddo + enddo + print *, ' SVD err (%) = ', 100.d0 * dsqrt(err0/err_norm) + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + do l = 1, nn + Dref(l) = D_newsvd(l) + Uref(:,l) = U_newsvd(:,l) + Vref(:,l) = V_newsvd(:,l) + enddo + + deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd ) + + return + +end subroutine perform_newSVD + diff --git a/devel/svdwf/buildpsi_diagSVDit.py b/devel/svdwf/buildpsi_diagSVDit.py new file mode 100644 index 0000000..3119aa8 --- /dev/null +++ b/devel/svdwf/buildpsi_diagSVDit.py @@ -0,0 +1,614 @@ +import sys, os +QMCCHEM_PATH=os.environ["QMCCHEM_PATH"] +sys.path.insert(0,QMCCHEM_PATH+"/EZFIO/Python/") + +from ezfio import ezfio +from datetime import datetime +import time +import numpy as np +import subprocess +from scipy.linalg import eig, eigh + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! +def get_energy(): + buffer = subprocess.check_output( ['qmcchem', 'result', '-e', 'e_loc', filename] + , encoding='UTF-8' ) + if buffer.strip() != "": + buffer = buffer.splitlines()[-1] + _, energy, error = [float(x) for x in buffer.split()] + return energy, error + else: + return None, None +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + +def run_qmc(): + return subprocess.check_output(['qmcchem', 'run', filename]) + +def stop_qmc(): + subprocess.check_output(['qmcchem', 'stop', filename]) + +def set_vmc_params(): + #subprocess.check_output(['qmcchem', 'edit', '-c', '-j', 'Simple', + # '-m', 'VMC', + # '-l', str(20), + # '--time-step=0.3', + # '--stop-time=36000', + # '--norm=1.e-5', + # '-w', '10', + # filename]) + subprocess.check_output(['qmcchem', 'edit', '-c' + , '-j', 'None' + , '-l', str(block_time) + , '-t', str(total_time) + , filename]) + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + +def get_Aref(): + + Aref = np.zeros( (n_alpha, n_beta) ) + for k in range(n_det): + i = A_rows[k] - 1 + j = A_cols[k] - 1 + Aref[i,j] = A_vals[0][k] + return( Aref ) + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + +def check_svd_error( A, U, S, V ): + + # || A - U x S x transpose(V) || + + # to normalize + norm_A = np.linalg.norm(A, ord='fro') + + # vector S ==> matrix S + _, na = U.shape + _, nb = V.shape + S_mat = np.zeros( (na,nb) ) + for i in range( min(na,nb) ): + S_mat[i,i] = S[i] + + #A_SVD = np.linalg.multi_dot([ U, np.diag(S), Vt ]) + A_SVD = np.linalg.multi_dot([ U, S_mat, V.T ]) + err_SVD = 100. * np.linalg.norm( A - A_SVD, ord="fro") / norm_A + + print(' error between A_SVD and Aref = {} %\n'.format(err_SVD) ) + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + +def FSVD_save_EZFIO(): + + U_toEZFIO = np.zeros( ( 1, U_FSVD.shape[1], U_FSVD.shape[0] ) ) + V_toEZFIO = np.zeros( ( 1, V_FSVD.shape[1], V_FSVD.shape[0] ) ) + U_toEZFIO[0,:,:] = U_FSVD.T + V_toEZFIO[0,:,:] = V_FSVD.T + + ezfio.set_spindeterminants_psi_svd_alpha_unique( U_toEZFIO ) + ezfio.set_spindeterminants_psi_svd_beta_unique ( V_toEZFIO ) + ezfio.set_spindeterminants_psi_svd_coefs_unique( S_FSVD ) + + print(' Full SVD unique vectors & coeff are saved to EZFIO ') + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + +def numerote_selected_vectors(): + + numalpha_selected = [] + numbeta_selected = [] + + for i in range(n_TSVD): + for j in range(n_TSVD): + numalpha_selected.append(j+1) + numbeta_selected.append(i+1) + + if( (len(numalpha_selected)!=n_selected) or (len(numbeta_selected)!=n_selected) ) : + print(' error in numerating selectod vectors') + print(' {} != {} != {}'.format(n_selected,len(numalpha_selected),len(numbeta_selected)) ) + else: + ezfio.set_spindeterminants_psi_svd_alpha_numselected(numalpha_selected) + ezfio.set_spindeterminants_psi_svd_beta_numselected (numbeta_selected) + print(' selected vectors are numeroted in EZFIO') + + return( numalpha_selected,numbeta_selected ) + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + +def numerote_toselect_vectors( choix ): + + numalpha_toselect = [] + numbeta_toselect = [] + + if( choix == 3 ): + # nondiagonal blocs + for i in range(n_TSVD): + for j in range(n_TSVD,n_beta): + numalpha_toselect.append(i+1) + numbeta_toselect.append(j+1) + for i in range(n_TSVD,n_alpha): + for j in range(n_TSVD): + numalpha_toselect.append(i+1) + numbeta_toselect.append(j+1) + # diagonal bloc + for i in range(n_TSVD,n_alpha): + for j in range(n_TSVD,n_beta): + numalpha_toselect.append(i+1) + numbeta_toselect.append(j+1) + elif( choix == 2 ): + # nondiagonal blocs + for i in range(n_TSVD): + for j in range(n_TSVD,n_beta): + numalpha_toselect.append(i+1) + numbeta_toselect.append(j+1) + for i in range(n_TSVD,n_alpha): + for j in range(n_TSVD): + numalpha_toselect.append(i+1) + numbeta_toselect.append(j+1) + else: + print(' choix = 2 ou 3' ) + exit() + + if( (len(numalpha_toselect)!=n_toselect) or (len(numbeta_toselect)!=n_toselect) ) : + print(' error in numerating vectors to select') + print(' {} != {} != {}'.format(n_toselect,len(numalpha_toselect),len(numbeta_toselect)) ) + else: + ezfio.set_spindeterminants_psi_svd_alpha_numtoselect(numalpha_toselect) + ezfio.set_spindeterminants_psi_svd_beta_numtoselect (numbeta_toselect) + print(' vectors to select are numeroted in EZFIO') + + return( numalpha_toselect,numbeta_toselect ) + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + +def normalize_S_TSVD(): + + global S_TSVD + + norm_S_TSVD = np.linalg.norm(S_TSVD, ord='fro') + S_TSVD = S_TSVD / norm_S_TSVD + + return() + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + +def get_h_selected_matrix(): + + h_selected_matrix = np.zeros( (n_selected,n_selected) ) + h_selected_stater = np.zeros( (n_selected,n_selected) ) + + beg_h_selected_matrix = results.find('h_selected_matrix : [ ') + len('h_selected_matrix : [ ') + end_h_selected_matrix = len(results) + h_selected_matrix_buf = results[beg_h_selected_matrix:end_h_selected_matrix] + h_selected_matrix_buf = h_selected_matrix_buf.split( '\n' ) + + for i in range(1,n_selected+1): + ii0 = (i-1) * n_selected + for j in range(1,n_selected+1): + iline = ii0 + j + + line = h_selected_matrix_buf[iline].split() + indc = int ( line[0] ) + + if( indc != iline ): + print('Error in get_h_selected_matrix') + exit() + else: + h_selected_matrix[i-1][j-1] = float( line[2] ) + h_selected_stater[i-1][j-1] = float( line[4] ) + + return(h_selected_matrix,h_selected_stater) + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + +def get_o_selected_matrix(): + + o_selected_matrix = np.zeros( (n_selected,n_selected) ) + o_selected_stater = np.zeros( (n_selected,n_selected) ) + + beg_o_selected_matrix = results.find('overlop_selected_matrix : [ ') + len('overlop_selected_matrix : [ ') + end_o_selected_matrix = len(results) + o_selected_matrix_buf = results[beg_o_selected_matrix:end_o_selected_matrix] + o_selected_matrix_buf = o_selected_matrix_buf.split( '\n' ) + + for i in range(1,n_selected+1): + ii0 = (i-1) * n_selected + for j in range(1,n_selected+1): + iline = ii0 + j + + line = o_selected_matrix_buf[iline].split() + indc = int ( line[0] ) + + if( indc != iline ): + print('Error in get_o_selected_matrix') + exit() + else: + o_selected_matrix[i-1][j-1] = float( line[2] ) + o_selected_stater[i-1][j-1] = float( line[4] ) + + return(o_selected_matrix,o_selected_stater) + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + +def get_Epostsvd(): + + # symmetrise and diagonalise + aa = h_selected_matrix + aa = 0.5*( aa + aa.T ) + bb = o_selected_matrix + eigvals_postsvd, vr = eig(aa, bb, left=False, right=True, overwrite_a=True, overwrite_b=True, + check_finite=True, homogeneous_eigvals=False) + + d_postsvd = np.diagflat(S_TSVD) + d_postsvd = d_postsvd.reshape( (1,n_selected*n_selected) ) + recouvre_postsvd = np.abs(d_postsvd @ vr) + ind_gspostsvd = np.argmax(recouvre_postsvd) + + E_postsvd = eigvals_postsvd[ind_gspostsvd] + + return( E_postsvd, vr[:,ind_gspostsvd] ) + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + +def Hqmc_svd_diag(): + + # read CI_SVD matrices + Ci_h_matrix_svd = get_Ci_h_matrix_svd() + Ci_overlap_matrix_svd = get_Ci_overlap_matrix_svd() + + # symmetrise + aa = Ci_h_matrix_svd + aa = 0.5*( aa + aa.T ) + + bb = Ci_overlap_matrix_svd + + # diagonalise + eigvals_svd, vr = eig( aa, bb, left=False, right=True + , overwrite_a=True, overwrite_b=True + , check_finite=True, homogeneous_eigvals=False) + + recouvre_svd = np.abs( np.dot( S_FSVD, vr) ) + ind_gssvd = np.argmax(recouvre_svd) + E_svd = eigvals_svd[ind_gssvd] + nuc_energy + + return( E_svd, vr[:,ind_gssvd] ) + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + +def SVD_postsvd(): + + # reshape sigma_postsvd + sigma_postsvd_mat = np.zeros( (n_selected,n_selected) ) + for i in range(n_TSVD): + ii = i*n_TSVD + for j in range(n_TSVD): + jj = i*n_TSVD + j + sigma_postsvd_mat[i][j] = sigma_postsvd[jj] + + # construct the new matrix Y & perform a new SVD + Y = np.dot( U_TSVD , np.dot(sigma_postsvd_mat , Vt_TSVD) ) + U, S, Vt = np.linalg.svd(Y, full_matrices=True) + + return(U, S, Vt) + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + +def get_hij_fm(): + + hij_fm = np.zeros( (n_toselect) ) + hij_fm_stater = np.zeros( (n_toselect) ) + + beg_hij = results.find('hij_fm : [ ') + len('hij_fm : [ ') + end_hij = len(results) + hij_buf = results[beg_hij:end_hij] + hij_buf = hij_buf.split( '\n' ) + + for iline in range(1,n_toselect+1): + line = hij_buf[iline].split() + indc = int( line[0] ) + if( indc != iline ): + print('Error in get_hij_fm') + exit() + else: + hij_fm [iline-1] = float( line[2] ) + hij_fm_stater[iline-1] = float( line[4] ) + + return(hij_fm, hij_fm_stater) + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + +def get_hij_sm(): + + hij_sm = np.zeros( (n_toselect) ) + hij_sm_stater = np.zeros( (n_toselect) ) + + beg_hij = results.find('hij_sm : [ ') + len('hij_sm : [ ') + end_hij = len(results) + hij_buf = results[beg_hij:end_hij] + hij_buf = hij_buf.split( '\n' ) + + for iline in range(1,n_toselect+1): + line = hij_buf[iline].split() + indc = int( line[0] ) + if( indc != iline ): + print('Error in get_hij_sm') + exit() + else: + hij_sm [iline-1] = float( line[2] ) + hij_sm_stater[iline-1] = float( line[4] ) + + return(hij_sm, hij_sm_stater) + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + +def get_xij_diag(): + + xij_diag = np.zeros( (n_toselect) ) + xij_diag_stater = np.zeros( (n_toselect) ) + + beg_xij = results.find('xij_diag : [ ') + len('xij_diag : [ ') + end_xij = len(results) + xij_buf = results[beg_xij:end_xij] + xij_buf = xij_buf.split( '\n' ) + + for iline in range(1,n_toselect+1): + line = xij_buf[iline].split() + indc = int( line[0] ) + if( indc != iline ): + print('Error in get_xij_diag') + exit() + else: + xij_diag [iline-1] = float( line[2] ) + xij_diag_stater[iline-1] = float( line[4] ) + + return(xij_diag, xij_diag_stater) + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! + + + + + +if __name__ == '__main__': + + t0 = time.time() + print("Today's date:", datetime.now() ) + + # EZFIO file + filename = "/home/aammar/qp2/src/svdwf/2h2_work/2h2_cisd" + ezfio.set_file(filename) + print("filename = {}".format(filename)) + + # parameters + energ_nuc = 1.711353545183182 # for 2h2 + + # get spindeterminant data from EZFIO + n_alpha = ezfio.get_spindeterminants_n_det_alpha() + n_beta = ezfio.get_spindeterminants_n_det_beta() + A_rows = np.array(ezfio.get_spindeterminants_psi_coef_matrix_rows() ) + A_cols = np.array(ezfio.get_spindeterminants_psi_coef_matrix_columns()) + A_vals = np.array(ezfio.get_spindeterminants_psi_coef_matrix_values() ) + n_det = A_rows.shape[0] + + print('~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~') + print(' matrix: {} x {}'.format(n_alpha,n_beta)) + print(' n_det = {}'.format(n_det)) + print('~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~') + + # construct intial dense matrix + Aref = get_Aref() + + # perform initial Full SVD + print('') + print(' ----- Performing Full SVD ----- ') + U_FSVD, S_FSVD, Vt_FSVD = np.linalg.svd(Aref, full_matrices=True) + V_FSVD = Vt_FSVD.T + + # check Full SVD error + check_svd_error( Aref, U_FSVD, S_FSVD, V_FSVD ) + + # save SVD vectors & coefficients in EZFIO + ezfio.set_spindeterminants_n_svd_coefs_unique(min(n_alpha,n_beta)) + FSVD_save_EZFIO() + + # truncated SVD + n_TSVD = 15 + ezfio.set_spindeterminants_n_svd_coefs(n_TSVD) + U_TSVD = U_FSVD[:,:n_TSVD] + V_TSVD = V_FSVD[:,:n_TSVD] + S_TSVD = S_FSVD[:n_TSVD] + + # check truncataed SVD error + check_svd_error( Aref, U_TSVD, S_TSVD, V_TSVD ) + + # numerote selected vectors & save to EZFIO + n_selected = n_TSVD * n_TSVD + print(' n_selected = {}'.format(n_selected)) + ezfio.set_spindeterminants_n_svd_selected(n_selected) + numalpha_selected,numbeta_selected = numerote_selected_vectors() + + # numerote vectors to select & save to EZFIO + n_toselect = n_alpha * n_beta - n_selected + print(' n_toselect = {}'.format(n_toselect)) + ezfio.set_spindeterminants_n_svd_toselect(n_toselect) + numalpha_toselect,numbeta_toselect = numerote_toselect_vectors(choix=3) + + + #_________________________________________________________________________________________ + # + # loop over SVD iterations + #_________________________________________________________________________________________ + + it_svd = 0 + it_svd_max = 1 + + while( it_svd < it_svd_max ): + + it_svd = it_svd + 1 + + # normalize sigular values in the truncated space + #normalize_S_TSVD() + + # run QMC to get H_postsvd + block_time = 300 # in sec + total_time = 1800 # in sec + set_vmc_params() + ezfio.set_properties_hij_fm(False) + ezfio.set_properties_hij_sm(False) + ezfio.set_properties_xij_diag(False) + ezfio.set_properties_h_selected_matrix(True) + ezfio.set_properties_overlop_selected_matrix(True) + run_qmc() + + # read QMC=CHEM results + t_read = time.time() + print(' getting QMCCHEM results from {}'.format(EZFIO_file) ) + results = subprocess.check_output(['qmcchem', 'result', EZFIO_file, '>> results.dat'], encoding='UTF-8') + print(' getting results after {} minutes \n'.format( (time.time()-t_read)/60. )) + + # < E_loc > + Eloc, ErrEloc = get_energy() + print(' Eloc = {} +/- {}'.format(Eloc, ErrEloc)) + + # get H and overlop from QMC=CHEM + h_selected_matrix, h_selected_stater = get_h_selected_matrix() + o_selected_matrix, o_selected_stater = get_o_selected_matrix() + + # ground state from H S = E S + E_postsvd, sigma_postsvd = get_Epostsvd() + print(' post svd energy = {}'.format(E_postsvd+energ_nuc) ) + + # perform new SVD: U x sigma_postsvd x Vt --> U' x S' x Vt' + U_FSVD, S_FSVD, Vt_FSVD = SVD_postsvd() + V_FSVD = Vt_FSVD.T + + # save in EZFIO + FSVD_save_EZFIO() + + # run QMC to get hij, e and xij_diag from QMC=CHEM + block_time = 300 # in sec + total_time = 1800 # in sec + set_vmc_params() + ezfio.set_properties_h_selected_matrix(False) + ezfio.set_properties_overlop_selected_matrix(False) + ezfio.set_properties_hij_fm(True) + ezfio.set_properties_hij_sm(True) + ezfio.set_properties_xij_diag(True) + run_qmc() + + # read QMC=CHEM results + t_read = time.time() + print(' getting QMCCHEM results from {}'.format(EZFIO_file) ) + results = subprocess.check_output(['qmcchem', 'result', EZFIO_file, '>> results.dat'], encoding='UTF-8') + print(' getting results after {} minutes \n'.format( (time.time()-t_read)/60. )) + + # < E_loc > + Eloc, ErrEloc = get_energy() + print(' Eloc = {} +/- {}'.format(Eloc, ErrEloc)) + + # hij = < psi_svd J | H | J l l' > / < psi_svd J | psi_svd J > + # = < H (J l l')/(psi_svd J) > ( first method: fm ) + # = < E_loc (l l') / psi_svd > ( second method: sm ) + hij_fm, hif_fm_stater = get_hij_fm() + hij_sm, hif_sm_stater = get_hij_sm() + + # get xij_diag + xij_diag, xij_diag_stater = get_xij_diag() + + # first method + dij_fm = np.zeros( (n_toselect) ) + dij_fm_stater = np.zeros( (n_toselect) ) + for i in range(n_toselect): + dij_fm[i] = hij_fm[i] / ( Eloc - xij_diag[i] ) + # statistic error + a = ( hij_fm_stater[i]*hij_fm_stater[i] ) / ( hij_fm[i]*hij_fm[i] ) + b = ( ErrEloc*ErrEloc + xij_diag_stater[i]*xij_diag_stater[i] ) / ( (Eloc-xij_diag[i])*(Eloc-xij_diag[i]) ) + dij_fm_stater[i] = abs(dij_fm[i]) * np.sqrt( a + b ) + + cc = np.concatenate((dij_fm,dij_fm_stater),axis=1) + np.savetxt('dij_fm.txt',cc) + + # second method + dij_sm = np.zeros( (n_toselect) ) + dij_sm_stater = np.zeros( (n_toselect) ) + for i in range(n_toselect): + dij_sm[i] = hij_sm[i] / ( Eloc - xij_diag[i] ) + # statistic error + a = ( hij_fm_stater[i]*hij_fm_stater[i] ) / ( hij_sm[i]*hij_sm[i] ) + b = ( ErrEloc*ErrEloc + xij_diag_stater[i]*xij_diag_stater[i] ) / ( (Eloc-xij_diag[i])*(Eloc-xij_diag[i]) ) + dij_sm_stater[i] = abs(dij_sm[i]) * np.sqrt( a + b ) + + cc = np.concatenate((dij_sm,dij_sm_stater),axis=1) + np.savetxt('dij_sm.txt',cc) + + # TODO + # choose fm or sm & perform a new SVD + + + #_________________________________________________________________________________________ + + print("end after {:.3f} minutes".format((time.time()-t0)/60.) ) + diff --git a/devel/svdwf/buildpsi_diagSVDit_Anthony_v0.irp.f b/devel/svdwf/buildpsi_diagSVDit_Anthony_v0.irp.f new file mode 100644 index 0000000..68727f8 --- /dev/null +++ b/devel/svdwf/buildpsi_diagSVDit_Anthony_v0.irp.f @@ -0,0 +1,612 @@ +program buildpsi_diagSVDit_Anthony_v0 + + implicit none + + BEGIN_DOC + ! perturbative approach to build psi_postsvd + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + double precision :: h12 + + integer :: i, j, k, l, ii, jj, na, nb + + double precision :: norm_psi, inv_sqrt_norm_psi + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + double precision :: E0_av, E0_ap, E0pt2 + double precision :: err0, err_tmp, e_tmp, E0, overlop, E0_old, tol_energy + double precision :: ctmp, htmp, Ept2 + double precision :: E0_postsvd, overlop_postsvd + double precision :: norm_coeff_psi, inv_sqrt_norm_coeff_psi + double precision :: overlopU, overlopU_mat, overlopV, overlopV_mat, overlop_psi + + double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:), H(:,:,:,:) + double precision, allocatable :: psi_postsvd(:,:), coeff_psi_perturb(:) + + integer :: n_FSVD, n_selected, n_toselect, n_tmp, it_svd, it_svd_max + integer :: n_selected2 + integer, allocatable :: numalpha_selected(:), numbeta_selected(:) + integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:) + integer, allocatable :: numalpha_tmp(:), numbeta_tmp(:) + + integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir + real(kind=8) :: W_tot_time, W_tot_time_it + real(kind=8) :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it + real(kind=8) :: CPU_tot_time, CPU_tot_time_it + real(kind=8) :: speedup, speedup_it + integer :: nb_taches + + !$OMP PARALLEL + nb_taches = OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + + call CPU_TIME(CPU_tbeg) + call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call get_excitation_degree(det1,det2,degree,N_int) + call i_H_j(det1, det2, N_int, h12) + + i_state = 1 + + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + norm_psi = 0.d0 + do k = 1, N_det + norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) & + * psi_bilinear_matrix_values(k,i_state) + enddo + print *, ' initial norm = ', norm_psi + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_beta_unique) ) + allocate( Dref(n_det_beta_unique) ) + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1) & + , n_det_alpha_unique, n_det_beta_unique) + + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + print *, ' --- First SVD: ok --- ' + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! numerote vectors + + ! Truncated rank + n_selected = 20 + print*, ' initial psi space rank = ', n_selected + + ! check SVD error + err0 = 0.d0 + do j = 1, n_det_beta_unique + do i = 1, n_det_alpha_unique + err_tmp = 0.d0 + do l = 1, n_selected + err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l) + enddo + err_tmp = Aref(i,j) - err_tmp + err0 += err_tmp * err_tmp + enddo + enddo + print *, ' SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi) + + deallocate( Aref ) + + ! --------------------------------------------------------------------------------------- + + + + !________________________________________________________________________________________________________ + ! + ! loop over SVD iterations + !________________________________________________________________________________________________________ + + E0_old = 0.d0 + tol_energy = 1.d0 + it_svd = 0 + it_svd_max = 100 + + allocate( H(n_det_beta_unique,n_det_beta_unique,n_det_beta_unique,n_det_beta_unique) ) + allocate( psi_postsvd(n_det_beta_unique,n_det_beta_unique) ) + + do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) ) + + call CPU_TIME(CPU_tbeg_it) + call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) + + it_svd = it_svd + 1 + print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +' + print*, ' ' + print*, ' iteration', it_svd + + double precision :: norm + norm = 0.d0 + do j = 1, n_selected + norm = norm + Dref(j)*Dref(j) + enddo + Dref = Dref / dsqrt(norm) + + print *, '' + print *, '' + print *, '' + print *, '-- Compute H --' + call const_H_uv(Uref, Vref, H) + + ! H0(i,j) = < u_i v_j | H | u_i v_j > + ! E0 = < psi_0 | H | psi_0 > + E0 = 0.d0 + do j = 1, n_selected + do i = 1, n_selected + E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j) + enddo + enddo + E0_av = E0 + nuclear_repulsion + print *,' E0 (avant SVD) =', E0_av + + double precision, allocatable :: eigval0(:) + double precision, allocatable :: eigvec0(:,:,:) + double precision, allocatable :: H_tmp(:,:,:,:) + + allocate( H_tmp(n_selected,n_selected,n_selected,n_selected) ) + do l=1,n_selected + do k=1,n_selected + do j=1,n_selected + do i=1,n_selected + H_tmp(i,j,k,l) = H(i,j,k,l) + enddo + enddo + enddo + enddo + allocate( eigval0(n_selected**2),eigvec0(n_selected,n_selected,n_selected**2)) + eigvec0 = 0.d0 + + print *, ' --- Diag post-SVD --- ' + call lapack_diag(eigval0, eigvec0, H_tmp, n_selected**2, n_selected**2) + E0_postsvd = eigval0(1)+nuclear_repulsion + print*, ' postsvd energy = ', E0_postsvd + deallocate(H_tmp, eigval0) + + print *, ' --- SVD --- ' + Dref = 0.d0 + call perform_newpostSVD(n_selected, eigvec0(1,1,1), Uref, Vref, Dref) + deallocate(eigvec0) + + print *, ' --- Compute H --- ' + call const_H_uv(Uref, Vref, H) + + ! H0(i,j) = < u_i v_j | H | u_i v_j > + ! E0 = < psi_0 | H | psi_0 > + E0 = 0.d0 + norm = 0.d0 + do j = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j) + enddo + norm = norm + Dref(j)*Dref(j) + enddo + E0_ap = E0 + nuclear_repulsion + print *,' E0 (apres SVD) =', E0_ap + !print *,' norm =', norm + + print *, ' --- Perturbation --- ' + psi_postsvd = 0.d0 + do i=1,n_selected + psi_postsvd(i,i) = Dref(i) + enddo + + !do j=1,n_selected + ! do i=n_selected+1,n_det_beta_unique + ! print *, i,j, H(i,j,i,j) + ! enddo + !enddo + !do j=n_selected+1,n_det_beta_unique + ! do i=1,n_selected + ! print *, i,j, H(i,j,i,j) + ! enddo + !enddo + !do j=n_selected+1,n_det_beta_unique + ! do i=n_selected+1,n_det_beta_unique + ! print *, i,j, H(i,j,i,j) + ! enddo + !enddo + + Ept2 = 0.d0 + do j=1,n_selected + do i=n_selected+1,n_det_beta_unique + ctmp = 0.d0 + do l=1,n_selected + do k=1,n_selected + ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l) + enddo + enddo + psi_postsvd(i,j) = ctmp / (E0_ap - (H(i,j,i,j)+nuclear_repulsion) ) + Ept2 += ctmp*ctmp / (E0_ap - (H(i,j,i,j)+nuclear_repulsion) ) + enddo + enddo + + do j=n_selected+1,n_det_beta_unique + do i=1,n_selected + ctmp = 0.d0 + do l=1,n_selected + do k=1,n_selected + ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l) + enddo + enddo + psi_postsvd(i,j) = ctmp / (E0_ap - (H(i,j,i,j)+nuclear_repulsion) ) + Ept2 += ctmp*ctmp / (E0_ap - (H(i,j,i,j)+nuclear_repulsion) ) + enddo + enddo + do j=n_selected+1,n_det_beta_unique + do i=n_selected+1,n_det_beta_unique + ctmp = 0.d0 + do l=1,n_selected + do k=1,n_selected + ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l) + enddo + enddo + psi_postsvd(i,j) = ctmp / (E0_ap - (H(i,j,i,j)+nuclear_repulsion) ) + Ept2 += ctmp*ctmp / (E0_ap - (H(i,j,i,j)+nuclear_repulsion) ) + enddo + enddo + E0pt2 = E0_ap + Ept2 + print *, ' perturb energy = ', E0pt2, Ept2 + + tol_energy = 100.d0 * dabs(E0pt2-E0_old) / dabs(E0pt2) + E0_old = E0pt2 + + print *, ' --- SVD --- ' + call perform_newpostSVD(n_det_beta_unique, psi_postsvd, Uref, Vref, Dref) + + write(22,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0pt2 + + call CPU_TIME(CPU_tend_it) + call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir) + CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it + W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8) + speedup_it = CPU_tot_time_it / W_tot_time_it + print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & + & 3X, "CPU time = ", 1PE10.3, " min.", /, & + & 3X, "speed up = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it + + end do + + !________________________________________________________________________________________________________ + !________________________________________________________________________________________________________ + + + deallocate( H, psi_postsvd ) + deallocate( Uref, Vref, Dref ) + + call CPU_TIME(CPU_tend) + call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) + CPU_tot_time = CPU_tend - CPU_tbeg + W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) + speedup = CPU_tot_time / W_tot_time + print *,' ___________________________________________________________________' + print '(//,3X,"Execution avec ",i2," threads")',nb_taches + print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & + & 3X, "CPU time = ", 1PE10.3, " min.", /, & + & 3X, "speed up = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup + print *,' ___________________________________________________________________' + + +end + + + + + + + + + + + + + + + + + + +subroutine perform_newpostSVD(n_selected, psi_postsvd, Uref, Vref, Dref) + + USE OMP_LIB + + integer, intent(in) :: n_selected + double precision, intent(in) :: psi_postsvd(n_selected,n_selected) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(inout) :: Dref(n_det_beta_unique) + + integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision :: overlopU_mat, overlopV_mat, overlopU, overlopV + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,n_selected) , V_svd(nn,n_selected) , S_mat(n_selected,n_selected) ) + + U_svd(:,:) = Uref(:,1:n_selected) + V_svd(:,:) = Vref(:,1:n_selected) + + S_mat(:,:) = 0.d0 + do j = 1, n_selected + do i = 1, n_selected + S_mat(i,j) = psi_postsvd(i,j) + enddo + enddo + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(n_selected,nn) ) + call dgemm( 'N', 'T', n_selected, nn, n_selected, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, n_selected, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) ) + call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn) + print *, ' +++ new perturbative SVD is performed +++ ' + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + + ! check SVD error + err0 = 0.d0 + err_norm = 0.d0 + do j = 1, nn + do i = 1, mm + err_tmp = 0.d0 + do l = 1, n_selected + err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l) + enddo + err_tmp = A_newsvd(i,j) - err_tmp + err0 += err_tmp * err_tmp + err_norm += A_newsvd(i,j) * A_newsvd(i,j) + enddo + enddo + print *, ' SVD err (%) = ', 100.d0 * dsqrt(err0/err_norm) + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + + do l = 1, n_selected + Dref(l) = D_newsvd(l) + Uref(:,l) = U_newsvd(:,l) + Vref(:,l) = V_newsvd(:,l) + enddo + ! print *, Dref(:) + + overlopU_mat = 0.d0 + overlopV_mat = 0.d0 + do i = 1, nn + do j = 1, nn + overlopU = 0.d0 + do ii = 1, mm + overlopU += Uref(ii,j) * Uref(ii,i) + enddo + overlopU_mat += overlopU + overlopV = 0.d0 + do ii = 1, nn + overlopV += Vref(ii,j) * Vref(ii,i) + enddo + overlopV_mat += overlopV + enddo + enddo + print *, 'overlop U =', overlopU_mat + print *, 'overlop V =', overlopV_mat + + + deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd ) + + return + +end subroutine perform_newpostSVD + + + +subroutine const_H_uv(Uref, Vref, H) + + USE OMP_LIB + + implicit none + + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H(n_det_beta_unique,n_det_beta_unique, n_det_beta_unique,n_det_beta_unique) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree + integer :: ii0, jj0, ii, jj, n, m, np, mp + integer :: nn0, mm0, nn, mm, ind_gs + integer :: p,q,r,s + double precision :: h12, x + + double precision, allocatable :: H0(:,:,:,:) + double precision, allocatable :: H1(:,:,:,:) + + + + allocate( H0(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique, n_det_beta_unique) ) + allocate( H1(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique, n_det_beta_unique) ) + + H0(:,:,:,:) = 0.d0 + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(p,q,r,s,i,j,k,l,det1,det2,degree) & + !$OMP SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,Uref,Vref,H0,H1,H) + !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8) + do i = 1, n_det_alpha_unique + do k = 1, n_det_alpha_unique + det1(:,1) = psi_det_alpha_unique(:,i) + det2(:,1) = psi_det_alpha_unique(:,k) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + if (degree .gt. 2) then + cycle + endif + do j = 1, n_det_beta_unique + det1(:,2) = psi_det_beta_unique(:,j) + do l = 1, n_det_beta_unique + det2(:,2) = psi_det_beta_unique(:,l) + call get_excitation_degree(det1,det2,degree,N_int) + if (degree .gt. 2) then + cycle + endif + ! !!! + call i_H_j(det2, det1, N_int, H0(k,l,i,j) ) + ! !!! + enddo + enddo + enddo + enddo + !$OMP END DO + + + + !$OMP SINGLE + H1 = 0.d0 + !$OMP END SINGLE + !$OMP DO + do s = 1, n_det_beta_unique + do l = 1, n_det_beta_unique + do k = 1, n_det_alpha_unique + do j = 1, n_det_beta_unique + do i = 1, n_det_alpha_unique + H1(i,j,k,s) = H1(i,j,k,s) + H0(i,j,k,l) * Vref(l,s) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + H0 = 0.d0 + !$OMP END SINGLE + !$OMP DO + do s = 1, n_det_beta_unique + do r = 1, n_det_beta_unique + do k = 1, n_det_alpha_unique + do j = 1, n_det_beta_unique + do i = 1, n_det_alpha_unique + H0(i,j,r,s) = H0(i,j,r,s) + H1(i,j,k,s) * Uref(k,r) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + H1 = 0.d0 + !$OMP END SINGLE + !$OMP DO + do s = 1, n_det_beta_unique + do j = 1, n_det_beta_unique + do r = 1, n_det_alpha_unique + do q = 1, n_det_beta_unique + do i = 1, n_det_alpha_unique + H1(i,q,r,s) = H1(i,q,r,s) + H0(i,j,r,s) * Vref(j,q) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + + !$OMP SINGLE + H = 0.d0 + !$OMP END SINGLE + !$OMP DO + do s = 1, n_det_beta_unique + do r = 1, n_det_beta_unique + do q = 1, n_det_beta_unique + do p = 1, n_det_beta_unique + do i = 1, n_det_alpha_unique + H(p,q,r,s) = H(p,q,r,s) + H1(i,q,r,s) * Uref(i,p) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP END PARALLEL + + deallocate(H1,H0) + + return +end + + + + + diff --git a/devel/svdwf/buildpsi_diagSVDit_Anthony_v1.irp.f b/devel/svdwf/buildpsi_diagSVDit_Anthony_v1.irp.f new file mode 100644 index 0000000..fe4f056 --- /dev/null +++ b/devel/svdwf/buildpsi_diagSVDit_Anthony_v1.irp.f @@ -0,0 +1,454 @@ +program buildpsi_diagSVDit_Anthony_v1 + + implicit none + + BEGIN_DOC + ! perturbative approach to build psi_postsvd + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + double precision :: h12 + + integer :: i, j, k, l, ii, jj, na, nb + + double precision :: norm_psi, inv_sqrt_norm_psi + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + double precision :: err0, err_tmp, e_tmp, E0, overlap, E0_old, tol_energy + double precision :: ctmp, htmp, Ept2 + double precision :: E0_postsvd, overlap_postsvd, E_prev + double precision :: norm_coeff_psi, inv_sqrt_norm_coeff_psi + double precision :: overlapU, overlapU_mat, overlapV, overlapV_mat, overlap_psi + + double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:), H(:,:,:,:) + double precision, allocatable :: psi_postsvd(:,:), coeff_psi_perturb(:) + + integer :: n_TSVD, n_FSVD, n_selected, n_toselect, n_tmp, it_svd, it_svd_max + integer :: n_selected2 + integer, allocatable :: numalpha_selected(:), numbeta_selected(:) + integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:) + integer, allocatable :: numalpha_tmp(:), numbeta_tmp(:) + + integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir + real(kind=8) :: W_tot_time, W_tot_time_it + real(kind=8) :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it + real(kind=8) :: CPU_tot_time, CPU_tot_time_it + real(kind=8) :: speedup, speedup_it + integer :: nb_taches + + !$OMP PARALLEL + nb_taches = OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + + call CPU_TIME(CPU_tbeg) + call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) + + i_state = 1 + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call i_H_j(det1, det2, N_int, h12) + + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + norm_psi = 0.d0 + do k = 1, N_det + norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) & + * psi_bilinear_matrix_values(k,i_state) + enddo + print *, ' initial norm = ', norm_psi + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) ) + !allocate( Dref(max(n_det_beta_unique,n_det_alpha_unique)) ) + allocate( Dref(min(n_det_beta_unique,n_det_alpha_unique)) ) + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1) & + , n_det_alpha_unique, n_det_beta_unique) + + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + + ! Truncated rank + n_TSVD = 20 + n_selected = n_TSVD + call write_int(6,n_TSVD, 'Rank of psi') + + + !________________________________________________________________________________________________________ + ! + ! loop over SVD iterations + !________________________________________________________________________________________________________ + + tol_energy = 1.d0 + it_svd = 0 + it_svd_max = 100 + E_prev = 0.d0 + + allocate(H(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique)) + allocate(psi_postsvd(n_det_alpha_unique,n_det_beta_unique)) + do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) ) + + call CPU_TIME(CPU_tbeg_it) + call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) + + it_svd = it_svd + 1 + print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +' + print*, ' ' + print*, ' ' + print*, ' ' + print*, ' iteration', it_svd + + double precision :: norm + norm = 0.d0 + do j = 1, n_selected + norm = norm + Dref(j)*Dref(j) + enddo + Dref = Dref / dsqrt(norm) + + call const_H_uv(Uref, Vref, H) + + E0 = 0.d0 + do j = 1, n_selected + do i = 1, n_selected + E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j) + enddo + enddo + double precision :: E0_av, E0_ap, E0pt2 + E0_av = E0 + nuclear_repulsion + print *,' E0 (avant SVD) =', E0_av + print *, '' + + double precision, allocatable :: eigval0(:) + double precision, allocatable :: eigvec0(:,:,:) + double precision, allocatable :: H_tmp(:,:,:,:) + + allocate( H_tmp(n_selected,n_selected,n_selected,n_selected) ) + do l=1,n_selected + do k=1,n_selected + do j=1,n_selected + do i=1,n_selected + H_tmp(i,j,k,l) = H(i,j,k,l) + enddo + enddo + enddo + enddo + allocate( eigval0(n_selected**2),eigvec0(n_selected,n_selected,n_selected**2)) + eigvec0 = 0.d0 + + call lapack_diag(eigval0, eigvec0, H_tmp, n_selected**2, n_selected**2) + E0_postsvd = eigval0(1) + nuclear_repulsion + print*, ' postsvd energy = ', E0_postsvd + deallocate(H_tmp, eigval0) + + Dref = 0.d0 + call perform_newpostSVD(n_selected, eigvec0(1,1,1), Uref, Vref, Dref) + deallocate(eigvec0) + + print *, ' --- Compute H --- ' + call const_H_uv(Uref, Vref, H) + + E0 = 0.d0 + norm = 0.d0 + do j = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j) + enddo + norm = norm + Dref(j)*Dref(j) + enddo + E0_ap = E0 + nuclear_repulsion + print *,' E0 (apres SVD) =', E0_ap + + psi_postsvd = 0.d0 + do i=1,n_selected + psi_postsvd(i,i) = Dref(i) + enddo + + E0 = E0_ap + Ept2 = 0.d0 + do j=1,n_selected + do i=n_selected+1,n_det_alpha_unique + ctmp = 0.d0 + do l=1,n_selected + do k=1,n_selected + ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l) + enddo + enddo + psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) + Ept2 += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) + enddo + enddo + do j=n_selected+1,n_det_beta_unique + do i=1,n_selected + ctmp = 0.d0 + do l=1,n_selected + do k=1,n_selected + ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l) + enddo + enddo + psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) + Ept2 += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) + enddo + enddo + do j=n_selected+1,n_det_beta_unique + do i=n_selected+1,n_det_alpha_unique + ctmp = 0.d0 + do l=1,n_selected + do k=1,n_selected + ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l) + enddo + enddo + psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) + Ept2 += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) + enddo + enddo + E0pt2 = E0_ap + ept2 + print *, ' perturb energy = ', E0pt2, ept2 + + tol_energy = dabs(E_prev - E0_ap) + E_prev = E0_ap + + call perform_newpostSVD(n_det_beta_unique, psi_postsvd, Uref, Vref, Dref) + + write(44,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0_ap+Ept2 + + + call CPU_TIME(CPU_tend_it) + call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir) + CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it + W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8) + speedup_it = CPU_tot_time_it / W_tot_time_it + print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & + & 3X, "CPU time = ", 1PE10.3, " min.", /, & + & 3X, "speed up = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it + + + end do + + + deallocate( Uref, Vref, Dref ) + + + call CPU_TIME(CPU_tend) + call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) + CPU_tot_time = CPU_tend - CPU_tbeg + W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) + speedup = CPU_tot_time / W_tot_time + print *,' ___________________________________________________________________' + print '(//,3X,"Execution avec ",i2," threads")',nb_taches + print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & + & 3X, "CPU time = ", 1PE10.3, " min.", /, & + & 3X, "speed up = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup + print *,' ___________________________________________________________________' + + +end + + + +subroutine perform_newpostSVD(n_selected, psi_postsvd, Uref, Vref, Dref) + + USE OMP_LIB + + integer, intent(in) :: n_selected + double precision, intent(in) :: psi_postsvd(n_selected,n_selected) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique)) + + integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision :: overlapU_mat, overlapV_mat, overlapU, overlapV + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,n_selected) , V_svd(nn,n_selected) , S_mat(n_selected,n_selected) ) + + U_svd(1:mm,1:n_selected) = Uref(1:mm,1:n_selected) + V_svd(1:nn,1:n_selected) = Vref(1:nn,1:n_selected) + S_mat(1:n_selected,1:n_selected) = psi_postsvd(1:n_selected,1:n_selected) + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(n_selected,nn) ) + call dgemm( 'N', 'T', n_selected, nn, n_selected, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + deallocate(S_mat) + + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, n_selected, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) ) + call svd_s( A_newsvd, size(A_newsvd,1), & + U_newsvd, size(U_newsvd,1), & + D_newsvd, & + Vt_newsvd, size(Vt_newsvd,1), & + mm, nn) + deallocate(A_newsvd) + + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + deallocate(Vt_newsvd) + + + !do l = 1, n_selected + ! Dref(l) = D_newsvd(l) + ! Uref(1:mm,l) = U_newsvd(1:mm,l) + ! Vref(1:nn,l) = V_newsvd(1:nn,l) + !enddo + Dref(1:n_selected) = D_newsvd(1:n_selected) + Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm) + Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn) + + + deallocate(U_newsvd) + deallocate(V_newsvd) + deallocate(D_newsvd) + +end subroutine perform_newpostSVD + + + +subroutine const_H_uv(Uref, Vref, H) + + USE OMP_LIB + + implicit none + + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique,n_det_beta_unique) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree + integer :: ii0, jj0, ii, jj, n, m, np, mp + integer :: nn0, mm0, na, nb, mm, ind_gs + integer :: p,q,r,s + double precision :: h12, x + + double precision, allocatable :: H0(:,:,:,:) + double precision, allocatable :: H1(:,:,:,:) + + na = n_det_alpha_unique + nb = n_det_beta_unique + + allocate( H0(na,nb,na,nb) ) + allocate( H1(nb,na,nb,na) ) + + H0 = 0.d0 + call wall_time(t0) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(p,q,r,s,i,j,k,l,det1,det2,degree,h12) & + !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,Uref,Vref,H0,H1,H) + + !$OMP DO + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if (degree > 2) cycle + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + call get_excitation_degree(det1,det2,degree,N_int) + if ( degree > 2) cycle + call i_H_j(det1, det2, N_int, h12) + H0(i,j,k,l) = h12 + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call wall_time(t1) + ! (i,j,k,l) -> (j,k,l,p) + call DGEMM('T','N', nb * na * nb, na, na, & + 1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3)) + + ! (j,k,l,p) -> (k,l,p,q) + call DGEMM('T','N', na * nb * na, nb, nb, & + 1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H0, size(H0,1)*size(H0,2)*size(H0,3)) + + ! (k,l,p,q) -> (l,p,q,r) + call DGEMM('T','N', nb * na * nb, na, na, & + 1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3)) + + ! (l,p,q,r) -> (p,q,r,s) + call DGEMM('T','N', na * nb * na, nb, nb, & + 1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3)) + call wall_time(t2) + print *, t1-t0, t2-t1 + double precision :: t0, t1, t2 + + deallocate(H1,H0) + +end + + + + + diff --git a/devel/svdwf/buildpsi_diagSVDit_Anthony_v2.irp.f b/devel/svdwf/buildpsi_diagSVDit_Anthony_v2.irp.f new file mode 100644 index 0000000..6f4062a --- /dev/null +++ b/devel/svdwf/buildpsi_diagSVDit_Anthony_v2.irp.f @@ -0,0 +1,754 @@ +program buildpsi_diagSVDit_Anthony_v2 + + implicit none + + BEGIN_DOC + ! perturbative approach to build psi_postsvd + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + double precision :: h12 + + integer :: i, j, k, l, na, nb + + double precision :: norm_psi + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + double precision :: err0, err_tmp, e_tmp, E0, E0_old, tol_energy + double precision :: ctmp, htmp, Ept2 + double precision :: E0_postsvd, overlap_postsvd, E_prev + + double precision, allocatable :: H_diag(:,:), Hkl(:,:), H0(:,:), H(:,:,:,:) + double precision, allocatable :: psi_postsvd(:,:), coeff_psi_perturb(:) + + integer :: n_TSVD, it_svd, it_svd_max + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call i_H_j(det1, det2, N_int, h12) + + + i_state = 1 + + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + norm_psi = 0.d0 + do k = 1, N_det + norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) & + * psi_bilinear_matrix_values(k,i_state) + enddo + print *, ' initial norm = ', norm_psi + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) ) + !allocate( Dref(max(n_det_beta_unique,n_det_alpha_unique)) ) + allocate( Dref(min(n_det_beta_unique,n_det_alpha_unique)) ) + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1) & + , n_det_alpha_unique, n_det_beta_unique) + + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + + ! Truncated rank + !n_TSVD = 100 + !call write_int(6,n_TSVD, 'Rank of psi') + + + !________________________________________________________________________________________________________ + ! + ! loop over SVD iterations + !________________________________________________________________________________________________________ + + tol_energy = 1.d0 + it_svd = 0 + it_svd_max = 100 + E_prev = 0.d0 + + print *, ci_energy(1) + + allocate(H_diag(n_det_alpha_unique,n_det_beta_unique)) + allocate(psi_postsvd(n_det_alpha_unique,n_det_beta_unique)) + + do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-6 ) ) + + it_svd = it_svd + 1 + + ! Truncated rank + n_TSVD = min(n_det_alpha_unique,n_det_beta_unique) + do i = min(n_det_alpha_unique,n_det_beta_unique), 10, -1 + if( dabs(Dref(i)) .lt. 1d-2 ) then + n_TSVD = n_TSVD - 1 + else + exit + endif + enddo + !do i = 1, min(n_det_alpha_unique,n_det_beta_unique) + ! print *, i, Dref(i) + !enddo + call write_int(6,n_TSVD, 'Rank of psi') + n_TSVD = min(n_TSVD,100) + call write_int(6,n_TSVD, 'Rank of psi') + + allocate(H(n_TSVD,n_TSVD,n_det_alpha_unique,n_det_beta_unique)) + + double precision :: norm + norm = 0.d0 + do j = 1, n_TSVD + norm = norm + Dref(j)*Dref(j) + enddo + Dref = Dref / dsqrt(norm) + + print *, '-- Compute H --' + !call const_H_uv_modif(Uref, Vref, H, H_diag, n_TSVD) + call const_H_uv(Uref, Vref, H, H_diag, n_TSVD) + + ! H0(i,j) = < u_i v_j | H | u_i v_j > + ! E0 = < psi_0 | H | psi_0 > + E0 = 0.d0 + do j = 1, n_TSVD + do i = 1, n_TSVD + E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j) + enddo + enddo + E0 = E0 + nuclear_repulsion + print *,' E0 =', E0 + + double precision, allocatable :: eigval0(:) + double precision, allocatable :: eigvec0(:,:,:) + double precision, allocatable :: H_tmp(:,:,:,:) + + allocate( H_tmp(n_TSVD,n_TSVD,n_TSVD,n_TSVD) ) + do l=1,n_TSVD + do k=1,n_TSVD + do j=1,n_TSVD + do i=1,n_TSVD + H_tmp(i,j,k,l) = H(i,j,k,l) + enddo + enddo + enddo + enddo + allocate( eigval0(n_TSVD**2),eigvec0(n_TSVD,n_TSVD,n_TSVD**2)) + eigvec0 = 0.d0 + + print *, ' --- Diag post-SVD --- ' + call lapack_diag(eigval0, eigvec0, H_tmp, n_TSVD**2, n_TSVD**2) + print *, 'eig =', eigval0(1) + nuclear_repulsion + deallocate(H_tmp, eigval0) + + print *, ' --- SVD --- ' + Dref = 0.d0 + call perform_newpostSVD(n_TSVD, eigvec0(1,1,1), size(eigvec0,1), Uref, Vref, Dref) + deallocate(eigvec0) + + print *, ' --- Compute H --- ' + !call const_H_uv_modif(Uref, Vref, H, H_diag, n_TSVD) + call const_H_uv(Uref, Vref, H, H_diag, n_TSVD) + + ! H0(i,j) = < u_i v_j | H | u_i v_j > + ! E0 = < psi_0 | H | psi_0 > + E0 = 0.d0 + norm = 0.d0 + do j = 1, n_det_beta_unique + do i = 1, n_TSVD + E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j) + enddo + norm = norm + Dref(j)*Dref(j) + enddo + E0 = E0 + nuclear_repulsion + print *,' E0 =', E0 +! print *,' norm =', norm + + print *, ' --- Perturbation --- ' + psi_postsvd = 0.d0 + !do i=1,n_TSVD + ! psi_postsvd(i,i) = Dref(i) + !enddo + + double precision :: lambda + + lambda = 1.d0 + + Ept2 = 0.d0 + do j=1,n_TSVD + do i=n_TSVD+1,n_det_alpha_unique + ctmp = 0.d0 + do k=1,n_TSVD + ctmp = ctmp + H(k,k,i,j) * Dref(k) + enddo + psi_postsvd(i,j) = lambda * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) ) + Ept2 += ctmp*ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) ) + enddo + enddo + + do j=n_TSVD+1,n_det_beta_unique + do i=1,n_TSVD + ctmp = 0.d0 + do k=1,n_TSVD + ctmp = ctmp + H(k,k,i,j) * Dref(k) + enddo + psi_postsvd(i,j) = lambda * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) ) + Ept2 += ctmp*ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) ) + enddo + enddo + + norm = 0.d0 + do l = 1, n_det_beta_unique + do k = 1, n_det_alpha_unique + norm = norm + psi_postsvd(k,l)**2 + enddo + enddo + + norm = dsqrt(norm) + print *, norm + if( norm .gt. 0.01d0 ) then + psi_postsvd = 0.01d0 * psi_postsvd / norm + endif + + do i=1,n_TSVD + psi_postsvd(i,i) = Dref(i) + enddo + + norm = 0.d0 + do l = 1, n_det_beta_unique + do k = 1, n_det_alpha_unique + norm = norm + psi_postsvd(k,l)**2 + enddo + enddo + psi_postsvd = psi_postsvd / dsqrt(norm) + + + tol_energy = dabs(E_prev - E0) + print '(I5, 2X, I5, 3(3X, F20.10))', it_svd, n_TSVD, E0, E0 + Ept2, tol_energy + write(114,'(I5, 2X, I5, 3(3X, F20.10))') it_svd,n_TSVD, E0, E0 + Ept2, tol_energy + E_prev = E0 + + E0 = 0.d0 + do j = 1, n_TSVD + do i = 1, n_TSVD + do l = 1, n_det_beta_unique + do k = 1, n_det_alpha_unique + E0 = E0 + psi_postsvd(i,j) * H(i,j,k,l) * psi_postsvd(k,l) + enddo + enddo + enddo + enddo + norm = 0.d0 + do l = 1, n_det_beta_unique + do k = 1, n_det_alpha_unique + norm = norm + psi_postsvd(k,l)**2 + enddo + enddo + + E0 = E0/norm + nuclear_repulsion + print *,' E0 avant =', E0 + + !print *, ' --- SVD --- ' + !call perform_newpostSVD(n_TSVD, psi_postsvd, size(psi_postsvd,1), Uref, Vref, Dref) + call perform_newSVD(n_TSVD, psi_postsvd, size(psi_postsvd,1), Uref, Vref, Dref) + + + + deallocate( H ) + + end do + + +end + + + +subroutine perform_newpostSVD(n_TSVD, psi_postsvd, LDP, Uref, Vref, Dref) + + USE OMP_LIB + + integer, intent(in) :: n_TSVD, LDP + double precision, intent(in) :: psi_postsvd(LDP,n_TSVD) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + !double precision, intent(inout) :: Dref(max(n_det_beta_unique,n_det_alpha_unique)) + double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique)) + + integer :: mm, nn, i, j, l, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,n_TSVD) , V_svd(nn,n_TSVD) , S_mat(n_TSVD,n_TSVD) ) + + U_svd(1:mm,1:n_TSVD) = Uref(1:mm,1:n_TSVD) + V_svd(1:nn,1:n_TSVD) = Vref(1:nn,1:n_TSVD) + + S_mat(1:n_TSVD,1:n_TSVD) = psi_postsvd(1:n_TSVD,1:n_TSVD) + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(n_TSVD,nn) ) + call dgemm( 'N', 'T', n_TSVD, nn, n_TSVD, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + deallocate(S_mat) + + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, n_TSVD, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + !allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(max(mm,nn)) ) + allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) ) + call svd_s( A_newsvd, size(A_newsvd,1), & + U_newsvd, size(U_newsvd,1), & + D_newsvd, & + Vt_newsvd, size(Vt_newsvd,1), & + mm, nn) + deallocate(A_newsvd) + + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + deallocate(Vt_newsvd) + + !do l = 1, n_TSVD + ! Dref(l) = D_newsvd(l) + ! Uref(1:mm,l) = U_newsvd(1:mm,l) + ! Vref(1:nn,l) = V_newsvd(1:nn,l) + !enddo + Dref(1:n_TSVD) = D_newsvd(1:n_TSVD) + Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm) + Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn) + + + deallocate(U_newsvd) + deallocate(V_newsvd) + deallocate(D_newsvd) + +end subroutine perform_newpostSVD + + + +subroutine perform_newSVD(n_TSVD, psi_postsvd, LDP, Uref, Vref, Dref) + + integer, intent(in) :: n_TSVD, LDP + double precision, intent(in) :: psi_postsvd(LDP,n_TSVD) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique)) + + integer :: mm, nn, i, j, l, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,mm) , V_svd(nn,nn) , S_mat(mm,nn) ) + + U_svd(1:mm,1:mm) = Uref(1:mm,1:mm) + V_svd(1:nn,1:nn) = Vref(1:nn,1:nn) + + norm_tmp = 0.d0 + do i = 1, nn + do j = 1, mm + S_mat(j,i) = psi_postsvd(j,i) + norm_tmp += psi_postsvd(j,i) * psi_postsvd(j,i) + enddo + enddo + norm_tmp = 1.d0 / dsqrt(norm_tmp) + do i = 1, nn + do j = 1, mm + S_mat(j,i) = S_mat(j,i) * norm_tmp + enddo + enddo + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(mm,nn) ) + call dgemm( 'N', 'T', mm, nn, nn, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + deallocate(S_mat) + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, mm, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) ) + call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn) + deallocate(A_newsvd) + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + deallocate(Vt_newsvd) + + !do l = 1, n_TSVD + ! Dref(l) = D_newsvd(l) + ! Uref(1:mm,l) = U_newsvd(1:mm,l) + ! Vref(1:nn,l) = V_newsvd(1:nn,l) + !enddo + + !Dref(1:n_TSVD) = D_newsvd(1:n_TSVD) + Dref = D_newsvd + Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm) + Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn) + + deallocate(U_newsvd) + deallocate(V_newsvd) + deallocate(D_newsvd) + + return + +end subroutine perform_newSVD + + + + + +subroutine const_H_uv(Uref, Vref, H, H_diag, n_TSVD) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_TSVD + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H(n_TSVD,n_TSVD, n_det_alpha_unique, n_det_beta_unique) + double precision, intent(out) :: H_diag(n_det_alpha_unique,n_det_beta_unique) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree + integer :: jj0, n, m, np, mp + integer :: nn0, mm0, na, nb, mm, ind_gs + integer :: p,q,r,s + double precision :: h12, x + + double precision, allocatable :: H0(:,:,:,:) + double precision, allocatable :: H1(:,:,:,:) + double precision, allocatable :: tmp3(:,:,:) + double precision, allocatable :: tmp1(:,:), tmp0(:,:) + double precision :: c_tmp + + + na = n_det_alpha_unique + nb = n_det_beta_unique + + call wall_time(t0) + tmp3 = 0.d0 + + allocate( H0(na,nb,n_TSVD,n_TSVD) ) + allocate (tmp3(nb,nb,nb)) + H0 = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,m,n,det1,det2,degree,h12,c_tmp,tmp1,tmp0)& + !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique,& + !$OMP N_int,tmp3,Uref,Vref,H_diag,H0,n_TSVD) + + allocate(tmp1(na,na), tmp0(na,na)) + + do i=1,na + do m=1,na + tmp1(m,i) = Uref(i,m) + enddo + enddo + + !$OMP DO + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if (degree > 2) cycle + + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + + call get_excitation_degree(det1,det2,degree,N_int) + if ( degree > 2) cycle + + call i_H_j(det1, det2, N_int, h12) + if (h12 == 0.d0) cycle + + do m=1,nb + tmp3(m,j,l) = tmp3(m,j,l) + h12 * tmp1(m,i) * tmp1(m,k) + enddo + + do n=1,n_TSVD + c_tmp = h12 * Vref(j,n) + if (c_tmp == 0.d0) cycle + do m=1,n_TSVD + H0(k,l,m,n) = H0(k,l,m,n) + c_tmp * tmp1(m,i) + enddo + enddo + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do m=1,nb + + do l=1,nb + do j=1,nb + tmp1(j,l) = tmp3(m,j,l) + enddo + enddo + !print *, 'DGEMM1' + call DGEMM('N','N',nb,nb,nb,1.d0, & + tmp1, size(tmp1,1), & + Vref, size(Vref,1), & + 0.d0, tmp0, size(tmp0,1)) + + do n=1,nb + H_diag(m,n) = 0.d0 + do j=1,nb + H_diag(m,n) = H_diag(m,n) + tmp0(j,n) * Vref(j,n) + enddo + enddo + enddo + !$OMP END DO + deallocate(tmp1, tmp0) + !$OMP END PARALLEL + + call wall_time(t1) + + allocate( H1(nb,n_TSVD,n_TSVD,na) ) + !print *, 'DGEMM2' + call DGEMM('T','N', nb * n_TSVD * n_TSVD, na, na, & + 1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3)) + deallocate( H0 ) + + ! (l,p,q,r) -> (p,q,r,s) + !print *, 'DGEMM3' + call DGEMM('T','N', n_TSVD * n_TSVD * na, nb, nb, & + 1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3)) + +! do j=1,n_TSVD +! do i=1,n_TSVD +! print *, H_diag(i,j), H(i,j,i,j) +! enddo +! enddo + deallocate(H1) + + call wall_time(t2) + print *, 't=', t1-t0, t2-t1 + double precision :: t0, t1, t2 +! stop +end subroutine const_H_uv + + + + + + + +subroutine const_H_uv_modif(Uref, Vref, H, H_diag, n_TSVD) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_TSVD + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H(n_TSVD,n_TSVD, n_det_alpha_unique, n_det_beta_unique) + double precision, intent(out) :: H_diag(n_det_alpha_unique,n_det_beta_unique) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree, n, m, na, nb + double precision :: h12 + + double precision, allocatable :: H0(:,:,:,:) + double precision, allocatable :: H1(:,:,:,:) + double precision, allocatable :: tmp3(:,:,:) + double precision, allocatable :: tmp1(:,:), tmp0(:,:), tmp4(:,:) + double precision :: c_tmp + + + na = n_det_alpha_unique + nb = n_det_beta_unique + + call wall_time(t0) + + allocate( H0(na,nb,n_TSVD,n_TSVD) ) + allocate( tmp3(na,nb,nb) ) + H0 = 0.d0 + tmp3 = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,m,n,det1,det2,degree,h12,c_tmp,tmp1,tmp0,tmp4)& + !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique,& + !$OMP N_int,tmp3,Uref,Vref,H_diag,H0,n_TSVD) + + allocate(tmp1(na,na), tmp0(nb,nb), tmp4(nb,nb)) + + do i=1,na + do m=1,na + tmp1(m,i) = Uref(i,m) + enddo + enddo + + !$OMP DO + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if (degree > 2) cycle + + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + + call get_excitation_degree(det1,det2,degree,N_int) + if ( degree > 2) cycle + + call i_H_j(det1, det2, N_int, h12) + if (h12 == 0.d0) cycle + + do m=1,na + tmp3(m,j,l) = tmp3(m,j,l) + h12 * tmp1(m,i) * tmp1(m,k) + enddo + + do n=1,n_TSVD + c_tmp = h12 * Vref(j,n) + if (c_tmp == 0.d0) cycle + do m=1,n_TSVD + H0(k,l,m,n) = H0(k,l,m,n) + c_tmp * tmp1(m,i) + enddo + enddo + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do m=1,na + + do l=1,nb + do j=1,nb + tmp4(j,l) = tmp3(m,j,l) + enddo + enddo + + call DGEMM('N','N',nb,nb,nb,1.d0, & + tmp4, size(tmp4,1), & + Vref, size(Vref,1), & + 0.d0, tmp0, size(tmp0,1)) + + do n=1,nb + H_diag(m,n) = 0.d0 + do j=1,nb + H_diag(m,n) = H_diag(m,n) + tmp0(j,n) * Vref(j,n) + enddo + enddo + enddo + !$OMP END DO + deallocate(tmp1, tmp0) + deallocate(tmp4) + !$OMP END PARALLEL + + deallocate(tmp3) + + call wall_time(t1) + + allocate( H1(nb,n_TSVD,n_TSVD,na) ) + call DGEMM('T','N', nb * n_TSVD * n_TSVD, na, na, & + 1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3)) + deallocate( H0 ) + + ! (l,p,q,r) -> (p,q,r,s) + call DGEMM('T','N', n_TSVD * n_TSVD * na, nb, nb, & + 1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3)) + +! do j=1,n_TSVD +! do i=1,n_TSVD +! print *, H_diag(i,j), H(i,j,i,j) +! enddo +! enddo + deallocate(H1) + + call wall_time(t2) + print *, 't=', t1-t0, t2-t1 + double precision :: t0, t1, t2 +! stop +end subroutine const_H_uv_modif + + diff --git a/devel/svdwf/buildpsi_diagSVDit_Anthony_v4.irp.f b/devel/svdwf/buildpsi_diagSVDit_Anthony_v4.irp.f new file mode 100644 index 0000000..10f7bf4 --- /dev/null +++ b/devel/svdwf/buildpsi_diagSVDit_Anthony_v4.irp.f @@ -0,0 +1,776 @@ +program buildpsi_diagSVDit_Anthony_v4 + + implicit none + + BEGIN_DOC + ! perturbative approach to build psi_postsvd + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + double precision :: h12 + + integer :: i, j, k, l + + double precision :: norm_psi + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + double precision :: E0, tol_energy, ctmp, Ept2, E_prev + + double precision, allocatable :: H_diag(:,:), Hkl(:,:), H0(:,:), H(:,:,:,:) + double precision, allocatable :: psi_postsvd(:,:) + + integer :: n_TSVD, it_svd, it_svd_max + + integer :: ii, jj, n_perturb + double precision :: norm, lambda, E_perturb + double precision, allocatable :: eigval0(:), eigvec0(:,:,:), H_tmp(:,:,:,:) + double precision, allocatable :: norm_row(:), norm_col(:), Utmp(:,:), Vtmp(:,:) + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call i_H_j(det1, det2, N_int, h12) + + + i_state = 1 + + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + norm_psi = 0.d0 + do k = 1, N_det + norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) & + * psi_bilinear_matrix_values(k,i_state) + enddo + print *, ' initial norm = ', norm_psi + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) ) + !allocate( Dref(max(n_det_beta_unique,n_det_alpha_unique)) ) + allocate( Dref(min(n_det_beta_unique,n_det_alpha_unique)) ) + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1) & + , n_det_alpha_unique, n_det_beta_unique) + + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + + ! Truncated rank + !n_TSVD = 100 + !call write_int(6,n_TSVD, 'Rank of psi') + + + !________________________________________________________________________________________________________ + ! + ! loop over SVD iterations + !________________________________________________________________________________________________________ + + tol_energy = 1.d0 + it_svd = 0 + it_svd_max = 100 + E_prev = 0.d0 + + !print *, ci_energy(1) + + allocate(H_diag(n_det_alpha_unique,n_det_beta_unique)) + allocate(psi_postsvd(n_det_alpha_unique,n_det_beta_unique)) + + do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-6 ) ) + + it_svd = it_svd + 1 + + ! Truncated rank + !n_TSVD = min(n_det_alpha_unique,n_det_beta_unique) + !do i = min(n_det_alpha_unique,n_det_beta_unique), 10, -1 + ! if( dabs(Dref(i)) .lt. 1d-2 ) then + ! n_TSVD = n_TSVD - 1 + ! else + ! exit + ! endif + !enddo + !do i = 1, min(n_det_alpha_unique,n_det_beta_unique) + ! print *, i, Dref(i) + !enddo + !call write_int(6,n_TSVD, 'Rank of psi') + n_TSVD = 30 !min(n_TSVD,100) + call write_int(6,n_TSVD, 'Rank of psi') + + print *, '-- Compute H --' + allocate(H(n_TSVD,n_TSVD,n_det_alpha_unique,n_det_beta_unique)) + call const_H_uv_modif(Uref, Vref, H, H_diag, n_TSVD) + !call const_H_uv(Uref, Vref, H, H_diag, n_TSVD) + + ! E0 = < psi_0 | H | psi_0 > + !norm = 0.d0 + !do j = 1, n_TSVD + ! norm = norm + Dref(j)*Dref(j) + !enddo + !Dref = Dref / dsqrt(norm) + !E0 = 0.d0 + !do j = 1, n_TSVD + ! do i = 1, n_TSVD + ! E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j) + ! enddo + !enddo + !E0 = E0 + nuclear_repulsion + !print *,' E0 bef diag =', E0 + + allocate( H_tmp(n_TSVD,n_TSVD,n_TSVD,n_TSVD) ) + do l = 1, n_TSVD + do k = 1, n_TSVD + do j = 1, n_TSVD + do i = 1, n_TSVD + H_tmp(i,j,k,l) = H(i,j,k,l) + enddo + enddo + enddo + enddo + allocate( eigval0(n_TSVD**2) , eigvec0(n_TSVD,n_TSVD,n_TSVD**2) ) + eigvec0 = 0.d0 + !print *, ' --- Diag post-SVD --- ' + call lapack_diag(eigval0, eigvec0, H_tmp, n_TSVD**2, n_TSVD**2) + !print *, 'eig =', eigval0(1) + nuclear_repulsion + deallocate(H_tmp, eigval0) + + print *, ' --- first SVD --- ' + Dref = 0.d0 + call perform_newpostSVD(n_TSVD, eigvec0(1,1,1), size(eigvec0,1), Uref, Vref, Dref) + deallocate(eigvec0) + + print *, ' --- Compute H --- ' + call const_H_uv_modif(Uref, Vref, H, H_diag, n_TSVD) + !call const_H_uv(Uref, Vref, H, H_diag, n_TSVD) + + ! E0 = < psi_0 | H | psi_0 > + E0 = 0.d0 + norm = 0.d0 + do j = 1, n_det_beta_unique + do i = 1, n_TSVD + E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j) + enddo + norm = norm + Dref(j)*Dref(j) + enddo + E0 = E0 + nuclear_repulsion + print *,' E0 aft diag =', E0 + + ! ----------------------------------------------------------------- + + !print *, ' --- Perturbation --- ' + psi_postsvd = 0.d0 + do i = 1, n_TSVD + psi_postsvd(i,i) = Dref(i) + enddo + + lambda = 1.d0 + Ept2 = 0.d0 + do j = 1, n_TSVD + do i = n_TSVD+1, n_det_alpha_unique + ctmp = 0.d0 + do k = 1, n_TSVD + ctmp = ctmp + H(k,k,i,j) * Dref(k) + enddo + psi_postsvd(i,j) = lambda * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) ) + Ept2 += ctmp * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) ) + enddo + enddo + + do j = n_TSVD+1, n_det_beta_unique + do i = 1, n_TSVD + ctmp = 0.d0 + do k = 1, n_TSVD + ctmp = ctmp + H(k,k,i,j) * Dref(k) + enddo + psi_postsvd(i,j) = lambda * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) ) + Ept2 += ctmp * ctmp / (E0 - (H_diag(i,j)+nuclear_repulsion) ) + enddo + enddo + + deallocate( H ) + ! ----------------------------------------------------------------- + + n_perturb = n_TSVD + n_TSVD + + allocate( norm_row(n_det_alpha_unique) , norm_col(n_det_beta_unique) ) + do i = 1, n_det_alpha_unique + norm_row(i) = 0.d0 + do j = 1, n_det_beta_unique + norm_row(i) += psi_postsvd(i,j) * psi_postsvd(i,j) + enddo + enddo + do j = 1, n_det_beta_unique + norm_col(j) = 0.d0 + do i = 1, n_det_alpha_unique + norm_col(j) += psi_postsvd(i,j) * psi_postsvd(i,j) + enddo + enddo + + allocate( Utmp(n_det_alpha_unique,n_perturb) , Vtmp(n_det_beta_unique,n_perturb) ) + do i = 1, n_perturb + ii = MAXLOC( norm_row , DIM=1 ) + jj = MAXLOC( norm_col , DIM=1 ) + if( (norm_row(ii).lt.1.d-12) .or. (norm_col(jj).lt.1.d-12) ) then + print *, ' !!!!!!! ' + print *, ii, norm_row(ii) + print *, jj, norm_col(jj) + stop + endif + Utmp(:,i) = Uref(:,ii) + Vtmp(:,i) = Vref(:,jj) + norm_row(ii) = 0.d0 + norm_col(jj) = 0.d0 + enddo + deallocate( norm_row , norm_col ) + + print *, ' --- Compute H in n_perturb space --- ' + allocate(H(n_perturb,n_perturb,n_det_alpha_unique,n_det_beta_unique)) + call const_H_TSVD(Uref, Vref, H, n_perturb) + allocate( H_tmp(n_perturb,n_perturb,n_perturb,n_perturb) ) + do l = 1, n_perturb + do k = 1, n_perturb + do j = 1, n_perturb + do i = 1, n_perturb + H_tmp(i,j,k,l) = H(i,j,k,l) + enddo + enddo + enddo + enddo + deallocate( H ) + allocate( eigval0(n_perturb**2) , eigvec0(n_perturb,n_perturb,n_perturb**2) ) + eigvec0 = 0.d0 + call lapack_diag(eigval0, eigvec0, H_tmp, n_perturb**2, n_perturb**2) + E_perturb = eigval0(1) + nuclear_repulsion + print *, ' diag in n_perturb space: ', E_perturb + deallocate(H_tmp, eigval0) + + print *, ' --- second SVD --- ' + Uref = 0.d0 + Vref = 0.d0 + Dref = 0.d0 + do l = 1, n_perturb + Uref(1:n_det_alpha_unique,l) = Utmp(1:n_det_alpha_unique,l) + Vref(1:n_det_beta_unique ,l) = Vtmp(1:n_det_beta_unique ,l) + enddo + deallocate( Utmp , Vtmp ) + + call perform_perturbSVD(n_TSVD, n_perturb, eigvec0(1,1,1), size(eigvec0,1), Uref, Vref, Dref) + deallocate(eigvec0) + + + tol_energy = dabs(E_prev - E0) + print '(I5, 2(2X,I5), 3(3X, F20.10))', it_svd, n_TSVD, n_perturb, E0, E_perturb, tol_energy + write(222,'(I5, 2(2X,I5), 3(3X, F20.10))') it_svd, n_TSVD, n_perturb, E0, E_perturb, tol_energy + E_prev = E0 + + !print *, ' --- SVD --- ' + !call perform_newpostSVD(n_TSVD, psi_postsvd, size(psi_postsvd,1), Uref, Vref, Dref) + !call perform_newSVD(n_TSVD, psi_postsvd, size(psi_postsvd,1), Uref, Vref, Dref) + + + end do + + +end + + + +subroutine perform_newpostSVD(n_TSVD, psi_postsvd, LDP, Uref, Vref, Dref) + + integer, intent(in) :: n_TSVD, LDP + double precision, intent(in) :: psi_postsvd(LDP,n_TSVD) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + !double precision, intent(inout) :: Dref(max(n_det_beta_unique,n_det_alpha_unique)) + double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique)) + + integer :: mm, nn, i, j, l, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,n_TSVD) , V_svd(nn,n_TSVD) , S_mat(n_TSVD,n_TSVD) ) + + U_svd(1:mm,1:n_TSVD) = Uref(1:mm,1:n_TSVD) + V_svd(1:nn,1:n_TSVD) = Vref(1:nn,1:n_TSVD) + + S_mat(1:n_TSVD,1:n_TSVD) = psi_postsvd(1:n_TSVD,1:n_TSVD) + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(n_TSVD,nn) ) + call dgemm( 'N', 'T', n_TSVD, nn, n_TSVD, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + deallocate(S_mat) + + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, n_TSVD, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + !allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(max(mm,nn)) ) + allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) ) + call svd_s( A_newsvd, size(A_newsvd,1), & + U_newsvd, size(U_newsvd,1), & + D_newsvd, & + Vt_newsvd, size(Vt_newsvd,1), & + mm, nn) + deallocate(A_newsvd) + + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + deallocate(Vt_newsvd) + + !do l = 1, n_TSVD + ! Dref(l) = D_newsvd(l) + ! Uref(1:mm,l) = U_newsvd(1:mm,l) + ! Vref(1:nn,l) = V_newsvd(1:nn,l) + !enddo + Dref(1:n_TSVD) = D_newsvd(1:n_TSVD) + Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm) + Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn) + + + deallocate(U_newsvd) + deallocate(V_newsvd) + deallocate(D_newsvd) + +end subroutine perform_newpostSVD + + + + +subroutine perform_perturbSVD(n_TSVD, n_perturb, psi_postsvd, LDP, Uref, Vref, Dref) + + integer, intent(in) :: n_TSVD, LDP, n_perturb + double precision, intent(in) :: psi_postsvd(LDP,n_TSVD) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique)) + + integer :: mm, nn, i, j, l, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,n_perturb) , V_svd(nn,n_perturb) , S_mat(n_perturb,n_perturb) ) + U_svd(1:mm,1:n_perturb) = Uref(1:mm,1:n_perturb) + V_svd(1:nn,1:n_perturb) = Vref(1:nn,1:n_perturb) + S_mat(1:n_perturb,1:n_perturb) = psi_postsvd(1:n_perturb,1:n_perturb) + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(n_perturb,nn) ) + call dgemm( 'N', 'T', n_perturb, nn, n_perturb, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + deallocate(S_mat) + + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, n_perturb, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) ) + call svd_s( A_newsvd, size(A_newsvd,1), & + U_newsvd, size(U_newsvd,1), & + D_newsvd, & + Vt_newsvd, size(Vt_newsvd,1), & + mm, nn) + deallocate(A_newsvd) + + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + deallocate(Vt_newsvd) + + Dref(1:n_TSVD) = D_newsvd(1:n_TSVD) + Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm) + Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn) + + deallocate(U_newsvd) + deallocate(V_newsvd) + deallocate(D_newsvd) + +end subroutine perform_perturbSVD + + + +subroutine perform_newSVD(n_TSVD, psi_postsvd, LDP, Uref, Vref, Dref) + + integer, intent(in) :: n_TSVD, LDP + double precision, intent(in) :: psi_postsvd(LDP,n_TSVD) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique)) + + integer :: mm, nn, i, j, l, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,mm) , V_svd(nn,nn) , S_mat(mm,nn) ) + + U_svd(1:mm,1:mm) = Uref(1:mm,1:mm) + V_svd(1:nn,1:nn) = Vref(1:nn,1:nn) + + norm_tmp = 0.d0 + do i = 1, nn + do j = 1, mm + S_mat(j,i) = psi_postsvd(j,i) + norm_tmp += psi_postsvd(j,i) * psi_postsvd(j,i) + enddo + enddo + norm_tmp = 1.d0 / dsqrt(norm_tmp) + do i = 1, nn + do j = 1, mm + S_mat(j,i) = S_mat(j,i) * norm_tmp + enddo + enddo + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(mm,nn) ) + call dgemm( 'N', 'T', mm, nn, nn, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + deallocate(S_mat) + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, mm, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) ) + call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn) + deallocate(A_newsvd) + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + deallocate(Vt_newsvd) + + !Dref(1:n_TSVD) = D_newsvd(1:n_TSVD) + Dref = D_newsvd + Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm) + Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn) + + deallocate(U_newsvd) + deallocate(V_newsvd) + deallocate(D_newsvd) + + return + +end subroutine perform_newSVD + + + + +subroutine const_H_uv_modif(Uref, Vref, H, H_diag, n_TSVD) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_TSVD + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H(n_TSVD,n_TSVD, n_det_alpha_unique, n_det_beta_unique) + double precision, intent(out) :: H_diag(n_det_alpha_unique,n_det_beta_unique) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree, n, m, na, nb + double precision :: h12 + + double precision, allocatable :: H0(:,:,:,:) + double precision, allocatable :: H1(:,:,:,:) + double precision, allocatable :: tmp3(:,:,:) + double precision, allocatable :: tmp1(:,:), tmp0(:,:), tmp4(:,:) + double precision :: c_tmp + + + na = n_det_alpha_unique + nb = n_det_beta_unique + + call wall_time(t0) + + allocate( H0(na,nb,n_TSVD,n_TSVD) ) + allocate( tmp3(na,nb,nb) ) + H0 = 0.d0 + tmp3 = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,m,n,det1,det2,degree,h12,c_tmp,tmp1,tmp0,tmp4) & + !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,tmp3,Uref,Vref,H_diag,H0,n_TSVD) + + allocate(tmp1(na,na), tmp0(nb,nb), tmp4(nb,nb)) + + do i = 1, na + do m = 1, na + tmp1(m,i) = Uref(i,m) + enddo + enddo + + !$OMP DO + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if (degree > 2) cycle + + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + + call get_excitation_degree(det1,det2,degree,N_int) + if ( degree > 2) cycle + + call i_H_j(det1, det2, N_int, h12) + if (h12 == 0.d0) cycle + + do m = 1, na + tmp3(m,j,l) = tmp3(m,j,l) + h12 * tmp1(m,i) * tmp1(m,k) + enddo + + do n = 1, n_TSVD + c_tmp = h12 * Vref(j,n) + if (c_tmp == 0.d0) cycle + do m = 1, n_TSVD + H0(k,l,m,n) = H0(k,l,m,n) + c_tmp * tmp1(m,i) + enddo + enddo + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do m = 1, na + do l = 1, nb + do j = 1, nb + tmp4(j,l) = tmp3(m,j,l) + enddo + enddo + call DGEMM('N','N',nb,nb,nb,1.d0, & + tmp4, size(tmp4,1), & + Vref, size(Vref,1), & + 0.d0, tmp0, size(tmp0,1)) + do n = 1, nb + H_diag(m,n) = 0.d0 + do j = 1, nb + H_diag(m,n) = H_diag(m,n) + tmp0(j,n) * Vref(j,n) + enddo + enddo + enddo + !$OMP END DO + deallocate(tmp1, tmp0) + deallocate(tmp4) + !$OMP END PARALLEL + + deallocate(tmp3) + + call wall_time(t1) + + ! H0(na,nb,n_TSVD,n_TSVD) + allocate( H1(nb,n_TSVD,n_TSVD,na) ) + call DGEMM('T', 'N', nb * n_TSVD * n_TSVD, na, na, 1.d0, & + H0 , size(H0,1) , & + Uref, size(Uref,1), 0.d0, & + H1 , size(H1,1) * size(H1,2) * size(H1,3) ) + deallocate( H0 ) + + ! (l,p,q,r) -> (p,q,r,s) + call DGEMM('T','N', n_TSVD * n_TSVD * na, nb, nb, & + 1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3)) + +! do j=1,n_TSVD +! do i=1,n_TSVD +! print *, H_diag(i,j), H(i,j,i,j) +! enddo +! enddo + deallocate(H1) + + call wall_time(t2) + print *, 't=', t1-t0, t2-t1 + double precision :: t0, t1, t2 +! stop +end subroutine const_H_uv_modif + + + + + + +subroutine const_H_TSVD(Uref, Vref, H, n_TSVD) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_TSVD + double precision, intent(in) :: Uref(n_det_alpha_unique,n_TSVD) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_TSVD) + double precision, intent(out) :: H(n_TSVD,n_TSVD, n_det_alpha_unique, n_det_beta_unique) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree, n, m, na, nb + double precision :: h12 + + double precision, allocatable :: H0(:,:,:,:) + double precision, allocatable :: H1(:,:,:,:) + double precision, allocatable :: tmp1(:,:) + double precision :: c_tmp + double precision :: t0, t1, t2 + + + na = n_det_alpha_unique + nb = n_det_beta_unique + + call wall_time(t0) + + allocate( H0(na,nb,n_TSVD,n_TSVD) ) + H0 = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,m,n,det1,det2,degree,h12,c_tmp,tmp1) & + !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique & + !$OMP ,N_int,Uref,Vref,H0,n_TSVD) + + allocate(tmp1(na,na)) + do i = 1, na + do m = 1, na + tmp1(m,i) = Uref(i,m) + enddo + enddo + + !$OMP DO + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if (degree > 2) cycle + + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + + call get_excitation_degree(det1,det2,degree,N_int) + if ( degree > 2) cycle + + call i_H_j(det1, det2, N_int, h12) + if (h12 == 0.d0) cycle + + do n = 1, n_TSVD + c_tmp = h12 * Vref(j,n) + if (c_tmp == 0.d0) cycle + do m = 1, n_TSVD + H0(k,l,m,n) = H0(k,l,m,n) + c_tmp * tmp1(m,i) + enddo + enddo + + enddo + enddo + enddo + enddo + !$OMP END DO + deallocate(tmp1) + !$OMP END PARALLEL + + call wall_time(t1) + allocate( H1(nb,n_TSVD,n_TSVD,na) ) + call DGEMM('T','N', nb * n_TSVD * n_TSVD, na, na, & + 1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3)) + deallocate( H0 ) + ! (l,p,q,r) -> (p,q,r,s) + call DGEMM('T','N', n_TSVD * n_TSVD * na, nb, nb, & + 1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3)) + deallocate(H1) + call wall_time(t2) + + print *, 't=', t1-t0, t2-t1 + +end subroutine const_H_TSVD + + diff --git a/devel/svdwf/buildpsi_diagSVDit_v0.irp.f b/devel/svdwf/buildpsi_diagSVDit_v0.irp.f new file mode 100644 index 0000000..e01e855 --- /dev/null +++ b/devel/svdwf/buildpsi_diagSVDit_v0.irp.f @@ -0,0 +1,822 @@ +program buildpsi_diagSVDit_v0 + + implicit none + + BEGIN_DOC + ! perturbative approach to build psi_postsvd + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + double precision :: h12 + + integer :: i, j, k, l, ii, jj, nn, na, nb + + double precision :: norm_psi, inv_sqrt_norm_psi + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + double precision :: E0_av, E0_ap, E0pt2 + double precision :: err0, err_tmp, e_tmp, E0, overlop, E0_old, tol_energy + double precision :: ctmp, htmp, Ept2 + double precision :: E0_postsvd, overlop_postsvd + double precision :: norm_coeff_psi, inv_sqrt_norm_coeff_psi + double precision :: overlopU, overlopU_mat, overlopV, overlopV_mat, overlop_psi + + double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:) + double precision, allocatable :: psi_postsvd(:), coeff_psi(:), coeff_psi_perturb(:) + + integer :: n_FSVD, n_selected, n_toselect, n_tmp, it_svd, it_svd_max + integer :: n_selected2 + integer, allocatable :: numalpha_selected(:), numbeta_selected(:) + integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:) + integer, allocatable :: numalpha_tmp(:), numbeta_tmp(:) + + integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir + real(kind=8) :: W_tot_time, W_tot_time_it + real(kind=8) :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it + real(kind=8) :: CPU_tot_time, CPU_tot_time_it + real(kind=8) :: speedup, speedup_it + integer :: nb_taches + + !$OMP PARALLEL + nb_taches = OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + + call CPU_TIME(CPU_tbeg) + call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) + + i_state = 1 + + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call get_excitation_degree(det1,det2,degree,N_int) + call i_H_j(det1, det2, N_int, h12) + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + norm_psi = 0.d0 + do k = 1, N_det + norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) & + * psi_bilinear_matrix_values(k,i_state) + enddo + print *, ' initial norm = ', norm_psi + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_beta_unique) ) + allocate( Dref(n_det_beta_unique) ) + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1) & + , n_det_alpha_unique, n_det_beta_unique) + + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + print *, ' --- First SVD: ok --- ' + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + + ! --------------------------------------------------------------------------------------- + + nn = n_det_beta_unique + + ! --------------------------------------------------------------------------------------- + ! numerote vectors + + ! Full rank + n_FSVD = nn * nn + print*, ' Full psi space rank = ', n_FSVD + + ! Truncated rank + n_selected = 20 + n_selected2 = n_selected * n_selected + print*, ' initial psi space rank = ', n_selected + + allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) ) + do i = 1, n_selected + numalpha_selected(i) = i + numbeta_selected (i) = i + enddo + ! check SVD error + err0 = 0.d0 + do j = 1, nn + do i = 1, n_det_alpha_unique + err_tmp = 0.d0 + do l = 1, n_selected + err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l) + enddo + err_tmp = Aref(i,j) - err_tmp + err0 += err_tmp * err_tmp + enddo + enddo + print *, ' SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi) + + deallocate( Aref ) + + ! perturbative space rank + k = 0 + n_toselect = nn*nn - n_selected*n_selected + allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) ) + ! nondiagonal blocs + do i = 1, n_selected + do j = n_selected+1, nn + k = k + 1 + numalpha_toselect(k) = j + numbeta_toselect (k) = i + enddo + enddo + do j = 1, n_selected + do i = n_selected+1, nn + k = k + 1 + numalpha_toselect(k) = j + numbeta_toselect (k) = i + enddo + enddo + ! diagonal bloc + do i = n_selected+1, nn + do j = n_selected+1, nn + k = k + 1 + numalpha_toselect(k) = j + numbeta_toselect (k) = i + enddo + enddo + + if( k.ne.n_toselect ) then + print*, ' error in numeroting ' + stop + endif + print*, ' perturbative psi space rank = ', n_toselect + + ! --------------------------------------------------------------------------------------- + + + + !________________________________________________________________________________________________________ + ! + ! loop over SVD iterations + !________________________________________________________________________________________________________ + + E0_old = 0.d0 + tol_energy = 1.d0 + it_svd = 0 + it_svd_max = 100 + + do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) ) + + call CPU_TIME(CPU_tbeg_it) + call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) + + it_svd = it_svd + 1 + print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +' + print*, ' ' + print*, ' iteration', it_svd + + norm_coeff_psi = 0.d0 + do j = 1, n_selected + norm_coeff_psi += Dref(j) * Dref(j) + enddo + inv_sqrt_norm_coeff_psi = 1.d0 / dsqrt(norm_coeff_psi) + do j = 1, n_selected + Dref(j) = Dref(j) * inv_sqrt_norm_coeff_psi + enddo + + allocate( H0(n_selected2,n_selected2) ) + print *, '' + print *, '' + print *, '' + print *, '-- Compute H --' + call const_psihpsi_postsvd_H0(n_selected, n_selected2, Uref, Vref, H0) + + ! avant SVD + E0 = 0.d0 + do i = 1, n_selected + ii = (i-1)*n_selected + i + do j = 1, n_selected + jj = (j-1)*n_selected + j + E0 += Dref(j) * H0(jj,ii) * Dref(i) + enddo + enddo + E0_av = E0 + nuclear_repulsion + print *,' E0 (avant SVD) =', E0_av + + allocate( psi_postsvd(n_selected2) ) + print *, ' --- Diag post-SVD --- ' + call diag_postsvd(n_selected, n_selected2, Dref, H0, E0_postsvd, overlop_postsvd, psi_postsvd) + print*, ' postsvd energy = ', E0_postsvd + deallocate( H0 ) + + ! post-SVD + print *, ' --- SVD --- ' + !Dref(:) = 0.d0 + call perform_newpostSVD(n_selected, n_selected2, psi_postsvd, Uref, Vref, Dref) + deallocate( psi_postsvd ) + + allocate( H0(n_selected2,n_selected2) ) + print *, ' --- Compute H --- ' + call const_psihpsi_postsvd_H0(n_selected, n_selected2, Uref, Vref, H0) + + E0 = 0.d0 + norm_coeff_psi = 0.d0 + do i = 1, n_selected + ii = (i-1)*n_selected + i + do j = 1, n_selected + jj = (j-1)*n_selected + j + E0 += Dref(j) * H0(jj,ii) * Dref(i) + enddo + norm_coeff_psi += Dref(i) * Dref(i) + enddo + E0_ap = E0 + nuclear_repulsion + print *,' E0 (apres SVD) =', E0_ap + !print *,' norm =', norm_coeff_psi + + deallocate(H0) + + print *, ' --- Perturbation --- ' + + allocate( Hdiag(n_toselect), Hkl(n_selected2,n_toselect) ) + call const_Hdiag_Hkl(n_selected, n_selected2, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag, Hkl) + !do l = 1, n_toselect + ! na = numalpha_toselect(l) + ! nb = numbeta_toselect (l) + ! print *, na, nb, Hdiag(l) + !enddo + + ! evaluate the coefficients for all the vectors + allocate( coeff_psi_perturb(n_toselect) ) + ept2 = 0.d0 + do ii = 1, n_toselect + !na = numalpha_toselect(ii) + !nb = numbeta_toselect (ii) + ctmp = 0.d0 + do i = 1, n_selected + l = (i-1)*n_selected + i + ctmp += Dref(i) * Hkl(l,ii) + enddo + coeff_psi_perturb(ii) = ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) ) + ept2 += ctmp*ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) ) + enddo + E0pt2 = E0_ap + ept2 + print *, ' perturb energy = ', E0pt2, ept2 + + tol_energy = 100.d0 * dabs(E0pt2-E0_old) / dabs(E0pt2) + E0_old = E0pt2 + + deallocate( Hdiag, Hkl) + + print *, ' --- SVD --- ' + call perform_newSVD(n_selected, n_selected2, n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref) + + deallocate( coeff_psi_perturb ) + + write(11,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0pt2 + + call CPU_TIME(CPU_tend_it) + call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir) + CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it + W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8) + speedup_it = CPU_tot_time_it / W_tot_time_it + print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & + & 3X, "CPU time = ", 1PE10.3, " min.", /, & + & 3X, "speed up = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it + + end do + !________________________________________________________________________________________________________ + !________________________________________________________________________________________________________ + + + + deallocate( Uref, Vref, Dref ) + + + call CPU_TIME(CPU_tend) + call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) + CPU_tot_time = CPU_tend - CPU_tbeg + W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) + speedup = CPU_tot_time / W_tot_time + print *,' ___________________________________________________________________' + print '(//,3X,"Execution avec ",i2," threads")',nb_taches + print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & + & 3X, "CPU time = ", 1PE10.3, " min.", /, & + & 3X, "speed up = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup + print *,' ___________________________________________________________________' + + +end + + + + + + + + +subroutine const_psihpsi_postsvd_H0(n_selected, n_selected2, Uref, Vref, H0) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_selected, n_selected2 + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H0(n_selected2,n_selected2) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree + integer :: ii0, jj0, ii, jj, n, m, np, mp + integer :: nn0, mm0, nn, mm, ind_gs + double precision :: h12, x + + double precision, allocatable :: H0_tmp(:,:) + + + H0(:,:) = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(n,np,nn0,nn,ii0,jj0,x,m,mp,mm0,mm,ii,jj,i,j,k,l,h12,det1,det2,H0_tmp,degree) & + !$OMP SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,n_selected,n_selected2,Uref,Vref,H0 ) + allocate( H0_tmp(n_selected2,n_selected2) ) + H0_tmp(:,:) = 0.d0 + !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8) + do i = 1, n_det_alpha_unique + do k = 1, n_det_alpha_unique + det1(:,1) = psi_det_alpha_unique(:,i) + det2(:,1) = psi_det_alpha_unique(:,k) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + if (degree .gt. 2) then + cycle + endif + do j = 1, n_det_beta_unique + det1(:,2) = psi_det_beta_unique(:,j) + do l = 1, n_det_beta_unique + det2(:,2) = psi_det_beta_unique(:,l) + call get_excitation_degree(det1,det2,degree,N_int) + if (degree .gt. 2) then + cycle + endif + ! !!! + call i_H_j(det1, det2, N_int, h12) + ! !!! + ! ~~~ H0 ~~~ + do n = 1, n_selected + nn0 = (n-1)*n_selected + do np = 1, n_selected + nn = nn0 + np + x = Uref(k,n) * Vref(l,np) * h12 + do m = 1, n_selected + mm0 = (m-1)*n_selected + do mp = 1, n_selected + mm = mm0 + mp + H0_tmp(mm,nn) += Uref(i,m) * Vref(j,mp) * x + enddo + enddo + enddo + enddo + ! ~~~ ~~~~~~ ~~~ + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP CRITICAL + do n = 1, n_selected2 + do m = 1, n_selected2 + H0(m,n) += H0_tmp(m,n) + enddo + enddo + !$OMP END CRITICAL + deallocate( H0_tmp ) + !$OMP END PARALLEL + + return +end subroutine const_psihpsi_postsvd_H0 + + + + + +subroutine diag_postsvd(n_selected, n_selected2, Dref, H0, E0, overlop, psi_postsvd ) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_selected, n_selected2 + double precision, intent(in) :: H0(n_selected2,n_selected2) + double precision, intent(in) :: Dref(n_det_beta_unique) + double precision, intent(out) :: E0, overlop, psi_postsvd(n_selected2) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree + integer :: ii0, jj0, ii, jj, n, m, np, mp + integer :: nn0, mm0, nn, mm, ind_gs + double precision :: h12, x + + double precision, allocatable :: eigvec0(:,:), eigval0(:), check_ov(:) + + ! diagonalize H0 + allocate( eigvec0(n_selected2,n_selected2), eigval0(n_selected2) ) + call lapack_diag(eigval0, eigvec0, H0, n_selected2, n_selected2) + + ! get the postsvd ground state + allocate( check_ov(n_selected2) ) + do l = 1, n_selected2 + overlop = 0.d0 + do i = 1, n_selected + ii = n_selected*(i-1) + i + overlop = overlop + eigvec0(ii,l) * Dref(i) + enddo + check_ov(l) = dabs(overlop) + enddo + ind_gs = MAXLOC( check_ov, DIM=1 ) + !ind_gs = 1 + overlop = check_ov(ind_gs) + E0 = eigval0(ind_gs)+nuclear_repulsion + psi_postsvd = eigvec0(:,ind_gs) + + deallocate( check_ov, eigvec0, eigval0 ) + + return +end subroutine diag_postsvd + + + + +subroutine const_Hdiag_Hkl(n_selected, n_selected2, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag, Hkl) + + implicit none + + integer, intent(in) :: n_selected, n_selected2, n_toselect + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: Hdiag(n_toselect), Hkl(n_selected2,n_toselect) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree + + integer :: i, j, k, l + integer :: ii0, jj0, ii, jj, n, m, np, mp + double precision :: h12, y + + double precision, allocatable :: Hdiag_tmp(:), Hkl_tmp(:,:) + + + Hdiag(:) = 0.d0 + Hkl(:,:) = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(n,ii0,jj0,y,m,mp,ii,jj,i,j,k,l,h12,det1,det2,Hdiag_tmp,Hkl_tmp,degree) & + !$OMP SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,n_selected,n_toselect,Uref,Vref,numalpha_toselect,numbeta_toselect, & + !$OMP Hkl,Hdiag,n_selected2 ) + allocate( Hdiag_tmp(n_toselect), Hkl_tmp(n_selected2,n_toselect) ) + Hdiag_tmp(:) = 0.d0 + Hkl_tmp(:,:) = 0.d0 + !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8) + do i = 1, n_det_alpha_unique + do k = 1, n_det_alpha_unique + det1(:,1) = psi_det_alpha_unique(:,i) + det2(:,1) = psi_det_alpha_unique(:,k) + ! !!! + ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + if (degree .gt. 2) then + cycle + endif + ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + ! !!! + do j = 1, n_det_beta_unique + det1(:,2) = psi_det_beta_unique(:,j) + do l = 1, n_det_beta_unique + det2(:,2) = psi_det_beta_unique(:,l) + ! !!! + ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + call get_excitation_degree(det1,det2,degree,N_int) + if (degree .gt. 2) then + cycle + endif + ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + ! !!! + call i_H_j(det1, det2, N_int, h12) + ! ~ ~ ~ H ~ ~ ~ + do n = 1, n_toselect + ii0 = numalpha_toselect(n) + jj0 = numbeta_toselect (n) + y = Uref(k,ii0) * Vref(l,jj0) * h12 + ! Hdiag + Hdiag_tmp(n) += y * Uref(i,ii0) * Vref(j,jj0) + do m = 1, n_selected + ii = (m-1)*n_selected + do mp = 1, n_selected + jj = ii + mp + ! Hkl + Hkl_tmp(jj,n) += Uref(i,m) * Vref(j,mp) * y + enddo + enddo + enddo + ! ~ ~ ~ ! ! ! ~ ~ ~ + enddo + enddo + ! !!! + enddo + enddo + !$OMP END DO + !$OMP CRITICAL + do n = 1, n_toselect + Hdiag(n) += Hdiag_tmp(n) + do m = 1, n_selected2 + Hkl(m,n) += Hkl_tmp(m,n) + enddo + enddo + !$OMP END CRITICAL + deallocate( Hdiag_tmp,Hkl_tmp ) + !$OMP END PARALLEL + +end subroutine const_Hdiag_Hkl + + + + + + +subroutine perform_newSVD(n_selected, n_selected2, n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref) + + USE OMP_LIB + + integer, intent(in) :: n_selected, n_toselect, n_selected2 + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: coeff_psi_perturb(n_toselect) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(inout) :: Dref(n_det_beta_unique) + + integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision :: overlopU_mat, overlopV_mat, overlopU, overlopV + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,nn) , V_svd(nn,nn) , S_mat(nn,nn) ) + + U_svd(:,:) = Uref(:,:) + V_svd(:,:) = Vref(:,:) + S_mat(:,:) = 0.d0 + norm_tmp = 0.d0 + do j = 1, n_det_beta_unique + S_mat(j,j) = Dref(j) + norm_tmp += S_mat(j,j)*S_mat(j,j) + enddo + do l = 1, n_toselect + na = numalpha_toselect(l) + nb = numbeta_toselect (l) + S_mat(na,nb) = coeff_psi_perturb(l) + norm_tmp += S_mat(na,nb)*S_mat(na,nb) + enddo + + print*, ' norm de S_mat =', norm_tmp + !norm_tmp = 1.d0/dsqrt(norm_tmp) + !do i = 1, nn + ! do j = 1, nn + ! S_mat(j,i) = S_mat(j,i) * norm_tmp + ! enddo + !enddo + + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(nn,nn) ) + call dgemm( 'N', 'T', nn, nn, nn, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, nn, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) ) + call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn) + print *, ' +++ new perturbative SVD is performed +++ ' + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + + ! check SVD error + err0 = 0.d0 + err_norm = 0.d0 + do j = 1, nn + do i = 1, mm + err_tmp = 0.d0 + do l = 1, nn + err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l) + enddo + err_tmp = A_newsvd(i,j) - err_tmp + err0 += err_tmp * err_tmp + err_norm += A_newsvd(i,j) * A_newsvd(i,j) + enddo + enddo + print *, ' SVD err (%) = ', 100.d0 * dsqrt(err0/err_norm) + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + + do l = 1, nn + Dref(l) = D_newsvd(l) + Uref(:,l) = U_newsvd(:,l) + Vref(:,l) = V_newsvd(:,l) + enddo + !print *, Dref(:) + + + overlopU_mat = 0.d0 + overlopV_mat = 0.d0 + do i = 1, nn + do j = 1, nn + overlopU = 0.d0 + do ii = 1, mm + overlopU += Uref(ii,j) * Uref(ii,i) + enddo + overlopU_mat += overlopU + overlopV = 0.d0 + do ii = 1, nn + overlopV += Vref(ii,j) * Vref(ii,i) + enddo + overlopV_mat += overlopV + enddo + enddo + print *, 'overlop U =', overlopU_mat + print *, 'overlop V =', overlopV_mat + + + deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd ) + + return + +end subroutine perform_newSVD + + + + + +subroutine perform_newpostSVD(n_selected, n_selected2, psi_postsvd, Uref, Vref, Dref) + + USE OMP_LIB + + integer, intent(in) :: n_selected, n_selected2 + double precision, intent(in) :: psi_postsvd(n_selected2) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(inout) :: Dref(n_det_beta_unique) + + integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision :: overlopU_mat, overlopV_mat, overlopU, overlopV + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,n_selected) , V_svd(nn,n_selected) , S_mat(n_selected,n_selected) ) + + U_svd(:,:) = Uref(:,1:n_selected) + V_svd(:,:) = Vref(:,1:n_selected) + S_mat(:,:) = 0.d0 + do i = 1, n_selected + ii = (i-1)*n_selected + do j = 1, n_selected + jj = ii + j + S_mat(i,j) = psi_postsvd(jj) + enddo + enddo + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(n_selected,nn) ) + call dgemm( 'N', 'T', n_selected, nn, n_selected, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, n_selected, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) ) + call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn) + print *, ' +++ new SVD is performed +++ ' + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + + ! check SVD error + err0 = 0.d0 + err_norm = 0.d0 + do j = 1, nn + do i = 1, mm + err_tmp = 0.d0 + do l = 1, n_selected + err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l) + enddo + err_tmp = A_newsvd(i,j) - err_tmp + err0 += err_tmp * err_tmp + err_norm += A_newsvd(i,j) * A_newsvd(i,j) + enddo + enddo + print *, ' SVD err (%) = ', 100.d0 * dsqrt(err0/err_norm) + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + do l = 1, n_selected + Dref(l) = D_newsvd(l) + Uref(:,l) = U_newsvd(:,l) + Vref(:,l) = V_newsvd(:,l) + enddo +! print *, Dref(:) + + overlopU_mat = 0.d0 + overlopV_mat = 0.d0 + do i = 1, nn + do j = 1, nn + overlopU = 0.d0 + do ii = 1, mm + overlopU += Uref(ii,j) * Uref(ii,i) + enddo + overlopU_mat += overlopU + overlopV = 0.d0 + do ii = 1, nn + overlopV += Vref(ii,j) * Vref(ii,i) + enddo + overlopV_mat += overlopV + enddo + enddo + print *, 'overlop U =', overlopU_mat + print *, 'overlop V =', overlopV_mat + + + deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd ) + + return + +end subroutine perform_newpostSVD + + diff --git a/devel/svdwf/buildpsi_diagSVDit_v1.irp.f b/devel/svdwf/buildpsi_diagSVDit_v1.irp.f new file mode 100644 index 0000000..aad8587 --- /dev/null +++ b/devel/svdwf/buildpsi_diagSVDit_v1.irp.f @@ -0,0 +1,915 @@ +program buildpsi_diagSVDit_v1 + + implicit none + + BEGIN_DOC + ! study efficiency for different way to build | psi > + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + double precision :: h12 + + integer :: i, j, k, l, ii, jj, nn, n, na, nb, m, ma, mb + + double precision :: norm_psi, inv_sqrt_norm_psi + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + double precision :: E0_av, E0_ap, E0pt2 + double precision :: err0, err_tmp, e_tmp, E0, overlop, E0_old, tol_energy + double precision :: ctmp, htmp, Ept2 + double precision :: E0_postsvd, overlop_postsvd + double precision :: norm_coeff_psi, inv_sqrt_norm_coeff_psi + double precision :: overlopU, overlopU_mat, overlopV, overlopV_mat, overlop_psi + + double precision, allocatable :: H(:,:,:,:) + double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:) + double precision, allocatable :: psi_postsvd(:), coeff_psi_perturb(:) + + integer :: it_svd, it_svd_max + + integer :: n_TSVD, n_FSVD, n_selected, n_toselect + integer, allocatable :: numalpha_selected(:), numbeta_selected(:) + integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:) + + integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_tbeg_step, W_tend_step, W_ir + real(kind=8) :: W_tot_time, W_tot_time_it, W_tot_time_step + real(kind=8) :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it, CPU_tbeg_step, CPU_tend_step + real(kind=8) :: CPU_tot_time, CPU_tot_time_it, CPU_tot_time_step + real(kind=8) :: speedup, speedup_it, speedup_step + integer :: nb_taches + + !$OMP PARALLEL + nb_taches = OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + + call CPU_TIME(CPU_tbeg) + call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) + + i_state = 1 + + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call get_excitation_degree(det1,det2,degree,N_int) + call i_H_j(det1, det2, N_int, h12) + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + norm_psi = 0.d0 + do k = 1, N_det + norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) & + * psi_bilinear_matrix_values(k,i_state) + enddo + print *, ' initial norm = ', norm_psi + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_beta_unique) ) + allocate( Dref(n_det_beta_unique) ) + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1) & + , n_det_alpha_unique, n_det_beta_unique) + + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + print *, ' --- First SVD: ok --- ' + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + + ! check Truncate SVD error + err0 = 0.d0 + do j = 1, n_det_beta_unique + do i = 1, n_det_alpha_unique + err_tmp = 0.d0 + do l = 1, n_det_beta_unique + err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l) + enddo + err_tmp = Aref(i,j) - err_tmp + err0 += err_tmp * err_tmp + enddo + enddo + print *, ' Full SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi) + + ! --------------------------------------------------------------------------------------- + + nn = n_det_beta_unique + + ! --------------------------------------------------------------------------------------- + ! numerote vectors + + ! Full rank + n_FSVD = nn * nn + print*, ' Full psi space rank = ', n_FSVD + + + ! Truncated rank + n_TSVD = 20 + print*, ' initial psi space rank = ', n_TSVD + + ! check Truncate SVD error + err0 = 0.d0 + do j = 1, n_det_beta_unique + do i = 1, n_det_alpha_unique + err_tmp = 0.d0 + do l = 1, n_TSVD + err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l) + enddo + err_tmp = Aref(i,j) - err_tmp + err0 += err_tmp * err_tmp + enddo + enddo + deallocate( Aref ) + print *, ' Truncate SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi) + + n_selected = n_TSVD * n_TSVD + allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) ) + k = 0 + ! first diagonal bloc + do i = 1, n_TSVD + do j = 1, n_TSVD + k = k + 1 + numalpha_selected(k) = j + numbeta_selected (k) = i + enddo + enddo + ! check size + if( k.ne.n_selected ) then + print*, ' error in numeroting: selected ' + print*, ' k = ', k + print*, ' n_selected = ', n_selected + stop + endif + + + ! perturbative space rank + k = 0 + n_toselect = n_FSVD - n_selected + print*, ' perturbative psi space rank = ', n_toselect + allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) ) + ! nondiagonal blocs + do i = 1, n_TSVD + do j = n_TSVD+1, nn + k = k + 1 + numalpha_toselect(k) = j + numbeta_toselect (k) = i + enddo + enddo + do j = 1, n_TSVD + do i = n_TSVD+1, nn + k = k + 1 + numalpha_toselect(k) = j + numbeta_toselect (k) = i + enddo + enddo + ! diagonal bloc + do i = n_TSVD+1, nn + do j = n_TSVD+1, nn + k = k + 1 + numalpha_toselect(k) = j + numbeta_toselect (k) = i + enddo + enddo + ! check size + if( k.ne.n_toselect ) then + print*, ' error in numeroting: to select ' + print*, ' k = ', k + print*, ' n_toselect = ', n_toselect + stop + endif + + ! --------------------------------------------------------------------------------------- + + + + !________________________________________________________________________________________________________ + ! + ! loop over SVD iterations + !________________________________________________________________________________________________________ + + E0_old = 0.d0 + tol_energy = 1.d0 + it_svd = 0 + it_svd_max = 10 + + do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) ) + + call CPU_TIME(CPU_tbeg_it) + call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) + + it_svd = it_svd + 1 + print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +' + print*, ' ' + print*, ' ' + print*, ' ' + print*, ' iteration', it_svd + + norm_coeff_psi = 0.d0 + do j = 1, n_TSVD + norm_coeff_psi += Dref(j) * Dref(j) + enddo + inv_sqrt_norm_coeff_psi = 1.d0 / dsqrt(norm_coeff_psi) + do j = 1, n_TSVD + Dref(j) = Dref(j) * inv_sqrt_norm_coeff_psi + enddo + + allocate( H0(n_selected,n_selected) ) + call const_psihpsi_postsvd_H0_modif(n_selected, numalpha_selected, numbeta_selected, Uref, Vref, H0) + + ! avant SVD + E0 = 0.d0 + do i = 1, n_TSVD + ii = (i-1)*n_TSVD + i + do j = 1, n_TSVD + jj = (j-1)*n_TSVD + j + E0 += Dref(j) * H0(jj,ii) * Dref(i) + enddo + enddo + E0_av = E0 + nuclear_repulsion + print *,' E0 (avant SVD) =', E0_av + print *, '' + + + allocate( psi_postsvd(n_selected) ) + print *, ' --- Diag post-SVD --- ' + call diag_postsvd(n_TSVD, n_selected, Dref, H0, E0_postsvd, overlop_postsvd, psi_postsvd) + print*, ' postsvd energy = ', E0_postsvd + deallocate( H0 ) + + ! post-SVD + !Dref(:) = 0.d0 + call perform_newpostSVD(n_TSVD, n_selected, psi_postsvd, Uref, Vref, Dref) + deallocate( psi_postsvd ) + + print *, '' + print *, '' + print *, ' --- Compute H --- ' + allocate( H0(n_selected,n_selected), Hdiag(n_toselect), Hkl(n_selected,n_toselect) ) + call const_Hdiag_Hkl_H0(n_selected, n_toselect, Uref, Vref, numalpha_selected, numbeta_selected & + , numalpha_toselect, numbeta_toselect, Hdiag, Hkl, H0) + + E0 = 0.d0 + norm_coeff_psi = 0.d0 + do i = 1, n_TSVD + ii = (i-1)*n_TSVD + i + do j = 1, n_TSVD + jj = (j-1)*n_TSVD + j + E0 += Dref(j) * H0(jj,ii) * Dref(i) + enddo + norm_coeff_psi += Dref(i) * Dref(i) + enddo + E0_ap = E0 + nuclear_repulsion + print *,' E0 (apres SVD) =', E0_ap + + deallocate(H0) + + print *, ' --- Perturbation --- ' + allocate( coeff_psi_perturb(n_toselect) ) + ept2 = 0.d0 + do ii = 1, n_toselect + ctmp = 0.d0 + do i = 1, n_TSVD + l = (i-1)*n_TSVD + i + ctmp += Dref(i) * Hkl(l,ii) + enddo + coeff_psi_perturb(ii) = ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) ) + ept2 += ctmp*ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) ) + enddo + E0pt2 = E0_ap + ept2 + print *, ' perturb energy = ', E0pt2, ept2 + tol_energy = 100.d0 * dabs(E0pt2-E0_old) / dabs(E0pt2) + E0_old = E0pt2 + + deallocate( Hdiag, Hkl) + + + print *, ' --- SVD --- ' + call perform_newSVD(n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref) + + deallocate( coeff_psi_perturb ) + + write(11,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0pt2 + + call CPU_TIME(CPU_tend_it) + call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir) + CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it + W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8) + speedup_it = CPU_tot_time_it / W_tot_time_it + print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & + & 3X, "CPU time = ", 1PE10.3, " min.", /, & + & 3X, "speed up = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it + + end do + !________________________________________________________________________________________________________ + !________________________________________________________________________________________________________ + + + + deallocate( Uref, Vref, Dref ) + + + call CPU_TIME(CPU_tend) + call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) + CPU_tot_time = CPU_tend - CPU_tbeg + W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) + speedup = CPU_tot_time / W_tot_time + print *,' ___________________________________________________________________' + print '(//,3X,"Execution avec ",i2," threads")',nb_taches + print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & + & 3X, "CPU time = ", 1PE10.3, " min.", /, & + & 3X, "speed up = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup + print *,' ___________________________________________________________________' + + +end + + + + + + +subroutine const_psihpsi_postsvd_H0_modif(n_selected, numalpha_selected, numbeta_selected, Uref, Vref, H0) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_selected + integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H0(n_selected,n_selected) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree + integer :: n, na, nb, m , ma, mb + double precision, allocatable :: Htot(:,:,:,:), H1(:,:,:) + + H0(:,:) = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,n,na,nb,m,ma,mb,det1,det2,degree) & + !$OMP SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,n_selected,Uref,Vref,H0,Htot,H1,numalpha_selected,numbeta_selected ) + + !$OMP SINGLE + allocate( Htot(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique) ) + Htot(:,:,:,:) = 0.d0 + !$OMP END SINGLE + + !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,20) + do i = 1, n_det_alpha_unique + do k = 1, n_det_alpha_unique + det1(:,1) = psi_det_alpha_unique(:,i) + det2(:,1) = psi_det_alpha_unique(:,k) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + if (degree .gt. 2) then + cycle + endif + do j = 1, n_det_beta_unique + det1(:,2) = psi_det_beta_unique(:,j) + do l = 1, n_det_beta_unique + det2(:,2) = psi_det_beta_unique(:,l) + call get_excitation_degree(det1,det2,degree,N_int) + if (degree .gt. 2) then + cycle + endif + ! !!! + call i_H_j(det1, det2, N_int, Htot(k,l,i,j)) + ! !!! + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + allocate( H1(n_det_alpha_unique,n_det_beta_unique,n_selected) ) + H1(:,:,:) = 0.d0 + !$OMP END SINGLE + + !$OMP DO + do n = 1, n_selected + na = numalpha_selected(n) + nb = numbeta_selected (n) + do i = 1, n_det_alpha_unique + do j = 1, n_det_beta_unique + do l = 1, n_det_beta_unique + do k = 1, n_det_alpha_unique + H1(k,l,n) += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + deallocate( Htot ) + !$OMP END SINGLE + + !$OMP DO + do m = 1, n_selected + ma = numalpha_selected(m) + mb = numbeta_selected (m) + do n = 1, n_selected + do k = 1, n_det_alpha_unique + do l = 1, n_det_beta_unique + H0(m,n) += H1(k,l,n) * Uref(k,ma) * Vref(l,mb) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + deallocate( H1 ) + !$OMP END SINGLE + + !$OMP END PARALLEL + + return +end subroutine const_psihpsi_postsvd_H0_modif + + + + + + +subroutine diag_postsvd(n_TSVD, n_selected, Dref, H0, E0, overlop, psi_postsvd ) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_TSVD, n_selected + double precision, intent(in) :: H0(n_selected,n_selected) + double precision, intent(in) :: Dref(n_det_beta_unique) + double precision, intent(out) :: E0, overlop, psi_postsvd(n_selected) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree + integer :: ii0, jj0, ii, jj, n, m, np, mp + integer :: nn0, mm0, nn, mm, ind_gs + double precision :: h12, x + + double precision, allocatable :: eigvec0(:,:), eigval0(:), check_ov(:) + + ! diagonalize H0 + allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) ) + call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected) + + ! get the postsvd ground state + allocate( check_ov(n_selected) ) + do l = 1, n_selected + overlop = 0.d0 + do i = 1, n_TSVD + ii = n_TSVD*(i-1) + i + overlop = overlop + eigvec0(ii,l) * Dref(i) + enddo + check_ov(l) = dabs(overlop) + enddo + ind_gs = MAXLOC( check_ov, DIM=1 ) + !ind_gs = 1 + overlop = check_ov(ind_gs) + E0 = eigval0(ind_gs)+nuclear_repulsion + psi_postsvd = eigvec0(:,ind_gs) + + deallocate( check_ov, eigvec0, eigval0 ) + + return +end subroutine diag_postsvd + + + + + +subroutine perform_newSVD(n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref) + + USE OMP_LIB + + integer, intent(in) :: n_toselect + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: coeff_psi_perturb(n_toselect) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(inout) :: Dref(n_det_beta_unique) + + integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision :: overlopU_mat, overlopV_mat, overlopU, overlopV + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,nn) , V_svd(nn,nn) , S_mat(nn,nn) ) + + U_svd(:,:) = Uref(:,:) + V_svd(:,:) = Vref(:,:) + S_mat(:,:) = 0.d0 + norm_tmp = 0.d0 + do j = 1, n_det_beta_unique + S_mat(j,j) = Dref(j) + norm_tmp += S_mat(j,j)*S_mat(j,j) + enddo + do l = 1, n_toselect + na = numalpha_toselect(l) + nb = numbeta_toselect (l) + S_mat(na,nb) = coeff_psi_perturb(l) + norm_tmp += S_mat(na,nb)*S_mat(na,nb) + enddo + + print*, ' norm de S_mat =', norm_tmp + !norm_tmp = 1.d0/dsqrt(norm_tmp) + !do i = 1, nn + ! do j = 1, nn + ! S_mat(j,i) = S_mat(j,i) * norm_tmp + ! enddo + !enddo + + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(nn,nn) ) + call dgemm( 'N', 'T', nn, nn, nn, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, nn, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) ) + call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn) + print *, ' +++ new perturbative SVD is performed +++ ' + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + + ! check SVD error + err0 = 0.d0 + err_norm = 0.d0 + do j = 1, nn + do i = 1, mm + err_tmp = 0.d0 + do l = 1, nn + err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l) + enddo + err_tmp = A_newsvd(i,j) - err_tmp + err0 += err_tmp * err_tmp + err_norm += A_newsvd(i,j) * A_newsvd(i,j) + enddo + enddo + print *, ' SVD err (%) = ', 100.d0 * dsqrt(err0/err_norm) + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + + do l = 1, nn + Dref(l) = D_newsvd(l) + Uref(:,l) = U_newsvd(:,l) + Vref(:,l) = V_newsvd(:,l) + enddo + !print *, Dref(:) + + + overlopU_mat = 0.d0 + overlopV_mat = 0.d0 + do i = 1, nn + do j = 1, nn + overlopU = 0.d0 + do ii = 1, mm + overlopU += Uref(ii,j) * Uref(ii,i) + enddo + overlopU_mat += overlopU + overlopV = 0.d0 + do ii = 1, nn + overlopV += Vref(ii,j) * Vref(ii,i) + enddo + overlopV_mat += overlopV + enddo + enddo + print *, 'overlop U =', overlopU_mat + print *, 'overlop V =', overlopV_mat + + + deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd ) + + return + +end subroutine perform_newSVD + + + + + +subroutine perform_newpostSVD(n_TSVD, n_selected, psi_postsvd, Uref, Vref, Dref) + + ! TODO: general case wherer we we don't consider the first trucated block + USE OMP_LIB + + integer, intent(in) :: n_TSVD, n_selected + double precision, intent(in) :: psi_postsvd(n_selected) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(inout) :: Dref(n_det_beta_unique) + + integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision :: overlopU_mat, overlopV_mat, overlopU, overlopV + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,n_TSVD) , V_svd(nn,n_TSVD) , S_mat(n_TSVD,n_TSVD) ) + + U_svd(:,:) = Uref(:,1:n_TSVD) + V_svd(:,:) = Vref(:,1:n_TSVD) + S_mat(:,:) = 0.d0 + do i = 1, n_TSVD + ii = (i-1)*n_TSVD + do j = 1, n_TSVD + jj = ii + j + S_mat(j,i) = psi_postsvd(jj) + enddo + enddo + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(n_TSVD,nn) ) + call dgemm( 'N', 'T', n_TSVD, nn, n_TSVD, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, n_TSVD, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) ) + call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn) + print *, ' +++ new SVD is performed +++ ' + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + + ! check SVD error + err0 = 0.d0 + err_norm = 0.d0 + do j = 1, nn + do i = 1, mm + err_tmp = 0.d0 + do l = 1, n_TSVD + err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l) + enddo + err_tmp = A_newsvd(i,j) - err_tmp + err0 += err_tmp * err_tmp + err_norm += A_newsvd(i,j) * A_newsvd(i,j) + enddo + enddo + print *, ' SVD err (%) = ', 100.d0 * dsqrt(err0/err_norm) + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + do l = 1, n_TSVD + Dref(l) = D_newsvd(l) + Uref(:,l) = U_newsvd(:,l) + Vref(:,l) = V_newsvd(:,l) + enddo +! print *, Dref(:) + + overlopU_mat = 0.d0 + overlopV_mat = 0.d0 + do i = 1, nn + do j = 1, nn + overlopU = 0.d0 + do ii = 1, mm + overlopU += Uref(ii,j) * Uref(ii,i) + enddo + overlopU_mat += overlopU + overlopV = 0.d0 + do ii = 1, nn + overlopV += Vref(ii,j) * Vref(ii,i) + enddo + overlopV_mat += overlopV + enddo + enddo + print *, 'overlop U =', overlopU_mat + print *, 'overlop V =', overlopV_mat + + + deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd ) + + return + +end subroutine perform_newpostSVD + + + + + + +subroutine const_Hdiag_Hkl_H0(n_selected, n_toselect, Uref, Vref, numalpha_selected, numbeta_selected & + , numalpha_toselect, numbeta_toselect, Hdiag, Hkl, H0) + + implicit none + + integer, intent(in) :: n_selected, n_toselect + integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: Hdiag(n_toselect), Hkl(n_selected,n_toselect), H0(n_selected,n_selected) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree + + integer :: i, j, k, l + integer :: n, na, nb, m, ma, mb + double precision, allocatable :: Htot(:,:,:,:), H1(:,:,:), H2(:,:,:) + + Hdiag(:) = 0.d0 + Hkl(:,:) = 0.d0 + H0(:,:) = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,n,na,nb,m,ma,mb,det1,det2,degree) & + !$OMP SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,n_selected,n_toselect,Uref,Vref,H0,Htot,H1,H2,Hdiag,Hkl, & + !$OMP numalpha_selected,numbeta_selected,numalpha_toselect,numbeta_toselect ) + + !$OMP SINGLE + allocate( Htot(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique) ) + Htot(:,:,:,:) = 0.d0 + !$OMP END SINGLE + + !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,20) + do i = 1, n_det_alpha_unique + do k = 1, n_det_alpha_unique + det1(:,1) = psi_det_alpha_unique(:,i) + det2(:,1) = psi_det_alpha_unique(:,k) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + if (degree .gt. 2) then + cycle + endif + do j = 1, n_det_beta_unique + det1(:,2) = psi_det_beta_unique(:,j) + do l = 1, n_det_beta_unique + det2(:,2) = psi_det_beta_unique(:,l) + call get_excitation_degree(det1,det2,degree,N_int) + if (degree .gt. 2) then + cycle + endif + ! !!! + call i_H_j(det1, det2, N_int, Htot(k,l,i,j)) + ! !!! + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + print *, ' *** Htot is calculated *** ' + allocate( H1(n_det_alpha_unique,n_det_beta_unique,n_selected) ) + H1(:,:,:) = 0.d0 + !$OMP END SINGLE + !$OMP DO + do n = 1, n_selected + na = numalpha_selected(n) + nb = numbeta_selected (n) + do i = 1, n_det_alpha_unique + do j = 1, n_det_beta_unique + do l = 1, n_det_beta_unique + do k = 1, n_det_alpha_unique + H1(k,l,n) += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + allocate( H2(n_det_alpha_unique,n_det_beta_unique,n_toselect) ) + H2(:,:,:) = 0.d0 + !$OMP END SINGLE + !$OMP DO + do n = 1, n_toselect + na = numalpha_toselect(n) + nb = numbeta_toselect (n) + do i = 1, n_det_alpha_unique + do j = 1, n_det_beta_unique + do l = 1, n_det_beta_unique + do k = 1, n_det_alpha_unique + H2(k,l,n) += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb) + Hdiag(n) += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb) * Uref(k,na) * Vref(l,nb) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + deallocate( Htot ) + !$OMP END SINGLE + + !$OMP DO + do m = 1, n_selected + ma = numalpha_selected(m) + mb = numbeta_selected (m) + do n = 1, n_toselect + do k = 1, n_det_alpha_unique + do l = 1, n_det_beta_unique + Hkl(m,n) += H2(k,l,n) * Uref(k,ma) * Vref(l,mb) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP SINGLE + deallocate( H2 ) + !$OMP END SINGLE + + !$OMP DO + do m = 1, n_selected + ma = numalpha_selected(m) + mb = numbeta_selected (m) + do n = 1, n_selected + do k = 1, n_det_alpha_unique + do l = 1, n_det_beta_unique + H0(m,n) += H1(k,l,n) * Uref(k,ma) * Vref(l,mb) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP SINGLE + deallocate( H1 ) + !$OMP END SINGLE + + !$OMP END PARALLEL + + return + +end subroutine const_Hdiag_Hkl_H0 diff --git a/devel/svdwf/buildpsi_diagSVDit_v2.irp.f b/devel/svdwf/buildpsi_diagSVDit_v2.irp.f new file mode 100644 index 0000000..fc30928 --- /dev/null +++ b/devel/svdwf/buildpsi_diagSVDit_v2.irp.f @@ -0,0 +1,840 @@ +program buildpsi_diagSVDit_v2 + + implicit none + + BEGIN_DOC + ! study efficiency for different way to build | psi > + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state, n_blocs + double precision :: h12 + + integer :: i, j, k, l, ii, jj, nn, n, na, nb, m, ma, mb + + double precision :: norm_psi, inv_sqrt_norm_psi + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + double precision :: E0_av, E0_ap, E0pt2 + double precision :: err0, err_tmp, e_tmp, E0, overlop, E0_old, tol_energy + double precision :: ctmp, htmp, Ept2 + double precision :: E0_postsvd, overlop_postsvd + double precision :: norm_coeff_psi, inv_sqrt_norm_coeff_psi + double precision :: overlopU, overlopU_mat, overlopV, overlopV_mat, overlop_psi + + double precision, allocatable :: H(:,:,:,:) + double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:) + double precision, allocatable :: psi_postsvd(:), coeff_psi_perturb(:) + + integer :: it_svd, it_svd_max + + integer :: n_TSVD, n_FSVD, n_selected, n_toselect + integer, allocatable :: numalpha_selected(:), numbeta_selected(:) + integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:) + + integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_tbeg_step, W_tend_step, W_ir + real(kind=8) :: W_tot_time, W_tot_time_it, W_tot_time_step + real(kind=8) :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it, CPU_tbeg_step, CPU_tend_step + real(kind=8) :: CPU_tot_time, CPU_tot_time_it, CPU_tot_time_step + real(kind=8) :: speedup, speedup_it, speedup_step + integer :: nb_taches + + double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:) + + + !$OMP PARALLEL + nb_taches = OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + + call CPU_TIME(CPU_tbeg) + call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) + + i_state = 1 + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call i_H_j(det1, det2, N_int, h12) + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + norm_psi = 0.d0 + do k = 1, N_det + norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) & + * psi_bilinear_matrix_values(k,i_state) + enddo + print *, ' initial norm = ', norm_psi + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) ) + allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) ) + allocate( Vref (n_det_beta_unique,n_det_beta_unique) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + call svd_s( Aref, size(Aref,1), & + Uref, size(Uref,1), & + Dref, & + Vtref, size(Vtref,1), & + n_det_alpha_unique, n_det_beta_unique ) + + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + print *, ' --- First SVD: ok --- ' + + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + + ! check Full SVD error + err0 = 0.d0 + do j = 1, n_det_beta_unique + do i = 1, n_det_alpha_unique + err_tmp = 0.d0 + do l = 1, min(n_det_alpha_unique,n_det_beta_unique) + err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l) + enddo + err_tmp = Aref(i,j) - err_tmp + err0 += err_tmp * err_tmp + enddo + enddo + print *, ' Full SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi) + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + ! --------------------------------------------------------------------------------------- + + + ! --------------------------------------------------------------------------------------- + ! numerote vectors + + ! Full rank + n_FSVD = n_det_alpha_unique * n_det_beta_unique + print*, ' Full psi space rank = ', n_FSVD + + ! Truncated rank + n_TSVD = 15 + print*, ' initial psi space rank = ', n_TSVD + + ! check Truncate SVD error + err0 = 0.d0 + do j = 1, n_det_beta_unique + do i = 1, n_det_alpha_unique + err_tmp = 0.d0 + do l = 1, n_TSVD + err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l) + enddo + err_tmp = Aref(i,j) - err_tmp + err0 += err_tmp * err_tmp + enddo + enddo + deallocate( Aref ) + print *, ' Truncate SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi) + + n_selected = n_TSVD * n_TSVD + allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) ) + k = 0 + ! first diagonal bloc + do i = 1, n_TSVD + do j = 1, n_TSVD + k = k + 1 + numalpha_selected(k) = j + numbeta_selected (k) = i + enddo + enddo + ! check size + if( k.ne.n_selected ) then + print*, ' error in numeroting: selected ' + print*, ' k = ', k + print*, ' n_selected = ', n_selected + stop + endif + + + ! perturbative space rank + n_blocs = 2 + k = 0 + if( n_blocs.eq.3 ) then + + n_toselect = n_FSVD - n_selected + allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) ) + ! nondiagonal blocs + do i = 1, n_TSVD + do j = n_TSVD+1, n_det_beta_unique + k = k + 1 + numalpha_toselect(k) = i + numbeta_toselect (k) = j + enddo + enddo + do i = n_TSVD+1, n_det_alpha_unique + do j = 1, n_TSVD + k = k + 1 + numalpha_toselect(k) = i + numbeta_toselect (k) = j + enddo + enddo + ! diagonal bloc + do i = n_TSVD+1, n_det_alpha_unique + do j = n_TSVD+1, n_det_beta_unique + k = k + 1 + numalpha_toselect(k) = i + numbeta_toselect (k) = j + enddo + enddo + + elseif( n_blocs.eq.2 ) then + + n_toselect = n_FSVD - n_selected - (n_det_alpha_unique-n_TSVD)*(n_det_beta_unique-n_TSVD) + allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) ) + ! nondiagonal blocs + do i = 1, n_TSVD + do j = n_TSVD+1, n_det_beta_unique + k = k + 1 + numalpha_toselect(k) = j + numbeta_toselect (k) = i + enddo + enddo + do j = 1, n_TSVD + !do i = n_TSVD+1, n_det_beta_unique + do i = n_TSVD+1, n_det_alpha_unique + k = k + 1 + numalpha_toselect(k) = j + numbeta_toselect (k) = i + enddo + enddo + + endif + + ! check size + if( k.ne.n_toselect ) then + print*, ' error in numeroting: to select ' + print*, ' k = ', k + print*, ' n_toselect = ', n_toselect + stop + endif + print*, ' perturbative psi space rank = ', n_toselect + + ! --------------------------------------------------------------------------------------- + + + + !________________________________________________________________________________________________________ + ! + ! loop over SVD iterations + !________________________________________________________________________________________________________ + + E0_old = 0.d0 + tol_energy = 1.d0 + it_svd = 0 + it_svd_max = 100 + + do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) ) + + call CPU_TIME(CPU_tbeg_it) + call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) + + it_svd = it_svd + 1 + print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +' + print*, ' ' + print*, ' ' + print*, ' ' + print*, ' iteration', it_svd + + norm_coeff_psi = 0.d0 + do j = 1, n_TSVD + norm_coeff_psi += Dref(j) * Dref(j) + enddo + inv_sqrt_norm_coeff_psi = 1.d0 / dsqrt(norm_coeff_psi) + do j = 1, n_TSVD + Dref(j) = Dref(j) * inv_sqrt_norm_coeff_psi + enddo + + allocate( H0(n_selected,n_selected) ) + + call const_H0(n_TSVD, n_selected, Uref, Vref, H0) + + E0 = 0.d0 + do i = 1, n_TSVD + ii = (i-1)*n_TSVD + i + do j = 1, n_TSVD + jj = (j-1)*n_TSVD + j + E0 += Dref(j) * H0(jj,ii) * Dref(i) + enddo + enddo + E0_av = E0 + nuclear_repulsion + print *,' E0 (avant SVD) =', E0_av + + + allocate( psi_postsvd(n_selected) ) + call diag_postsvd(n_TSVD, n_selected, Dref, H0, E0_postsvd, overlop_postsvd, psi_postsvd) + print*, ' postsvd energy = ', E0_postsvd + deallocate( H0 ) + + !Dref(:) = 0.d0 + call perform_newpostSVD(n_TSVD, n_selected, psi_postsvd, Uref, Vref, Dref) + deallocate( psi_postsvd ) + + print *, ' --- Compute H --- ' + allocate( H0(n_selected,n_selected), Hdiag(n_toselect), Hkl(n_selected,n_toselect) ) + call const_Hdiag_Hkl_H0(n_TSVD, n_selected, n_toselect, numalpha_selected, numbeta_selected, & + numalpha_toselect, numbeta_toselect, Uref, Vref, Hdiag, Hkl, H0) + + E0 = 0.d0 + norm_coeff_psi = 0.d0 + do i = 1, n_TSVD + ii = (i-1)*n_TSVD + i + do j = 1, n_TSVD + jj = (j-1)*n_TSVD + j + E0 += Dref(j) * H0(jj,ii) * Dref(i) + enddo + norm_coeff_psi += Dref(i) * Dref(i) + enddo + E0_ap = E0 + nuclear_repulsion + print *,' E0 (apres SVD) =', E0_ap + + deallocate(H0) + + allocate( coeff_psi_perturb(n_toselect) ) + ept2 = 0.d0 + do ii = 1, n_toselect + ctmp = 0.d0 + do i = 1, n_TSVD + l = (i-1)*n_TSVD + i + ctmp += Dref(i) * Hkl(l,ii) + enddo + coeff_psi_perturb(ii) = ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) ) + ept2 += ctmp*ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) ) + enddo + E0pt2 = E0_ap + ept2 + print *, ' perturb energy = ', E0pt2, ept2 + tol_energy = dabs(E0_ap-E0_old) + E0_old = E0_ap + + deallocate( Hdiag, Hkl) + + + call perform_newSVD(n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref) + + deallocate( coeff_psi_perturb ) + + write(n_blocs,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0pt2 + !write(222,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0pt2 + + call CPU_TIME(CPU_tend_it) + call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir) + CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it + W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8) + speedup_it = CPU_tot_time_it / W_tot_time_it + print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & + & 3X, "CPU time = ", 1PE10.3, " min.", /, & + & 3X, "speed up = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it + + end do + !________________________________________________________________________________________________________ + !________________________________________________________________________________________________________ + + + ! save to EZFIO + allocate(Uezfio(n_det_alpha_unique,n_TSVD,1), Dezfio(n_TSVD,1), Vezfio(n_det_beta_unique,n_TSVD,1)) + Dezfio(1:n_TSVD,1) = Dref(1:n_TSVD) + Uezfio(1:n_det_alpha_unique,1:n_TSVD,1) = Uref(1:n_det_alpha_unique,1:n_TSVD) + Vezfio(1:n_det_beta_unique ,1:n_TSVD,1) = Vref(1:n_det_beta_unique ,1:n_TSVD) + + !call ezfio_set_spindeterminants_n_det(N_det) + !call ezfio_set_spindeterminants_n_states(N_states) + !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique) + !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique) + !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows) + !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns) + !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values) + !call ezfio_set_spindeterminants_n_svd_coefs(n_TSVD) + + !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio) + !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio ) + !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio) + + deallocate(Uezfio, Dezfio, Vezfio) + deallocate( Uref, Vref, Dref ) + + + call CPU_TIME(CPU_tend) + call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) + CPU_tot_time = CPU_tend - CPU_tbeg + W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) + speedup = CPU_tot_time / W_tot_time + print *,' ___________________________________________________________________' + print '(//,3X,"Execution avec ",i2," threads")',nb_taches + print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & + & 3X, "CPU time = ", 1PE10.3, " min.", /, & + & 3X, "speed up = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup + print *,' ___________________________________________________________________' + + +end + + + + + + + +subroutine const_H0(n_TSVD, n_selected, Uref, Vref, H0) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_TSVD, n_selected + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H0(n_selected,n_selected) + + integer :: i, j, k, l + integer :: n, m + double precision, allocatable :: H(:,:,:,:) + + H0(:,:) = 0.d0 + + allocate( H(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique) ) + call const_H_uv_lapack(Uref, Vref, H) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,m,n) & + !$OMP SHARED(n_TSVD,H0,H) + !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8) + do i = 1, n_TSVD + do j = 1, n_TSVD + m = (i-1)*n_TSVD + j + do k = 1, n_TSVD + do l = 1, n_TSVD + n = (k-1)*n_TSVD + l + H0(n,m) = H(k,l,i,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate( H ) + + return +end subroutine const_H0 + + + + + + +subroutine diag_postsvd(n_TSVD, n_selected, Dref, H0, E0, overlop, psi_postsvd ) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_TSVD, n_selected + double precision, intent(in) :: H0(n_selected,n_selected) + double precision, intent(in) :: Dref(min(n_det_alpha_unique,n_det_beta_unique)) + double precision, intent(out) :: E0, overlop, psi_postsvd(n_selected) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree + integer :: ii0, jj0, ii, jj, n, m, np, mp + integer :: nn0, mm0, nn, mm, ind_gs + double precision :: h12, x + + double precision, allocatable :: eigvec0(:,:), eigval0(:), check_ov(:) + + ! diagonalize H0 + allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) ) + call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected) + + ! get the postsvd ground state + allocate( check_ov(n_selected) ) + do l = 1, n_selected + overlop = 0.d0 + do i = 1, n_TSVD + ii = n_TSVD*(i-1) + i + overlop = overlop + eigvec0(ii,l) * Dref(i) + enddo + check_ov(l) = dabs(overlop) + enddo + ind_gs = MAXLOC( check_ov, DIM=1 ) + !ind_gs = 1 + overlop = check_ov(ind_gs) + E0 = eigval0(ind_gs)+nuclear_repulsion + psi_postsvd = eigvec0(:,ind_gs) + + deallocate( check_ov, eigvec0, eigval0 ) + + return +end subroutine diag_postsvd + + + + + +subroutine perform_newSVD(n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref) + + USE OMP_LIB + + integer, intent(in) :: n_toselect + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: coeff_psi_perturb(n_toselect) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique)) + + integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision :: overlopU_mat, overlopV_mat, overlopU, overlopV + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,mm) , V_svd(nn,nn) , S_mat(mm,nn) ) + + U_svd(:,:) = Uref(:,:) + V_svd(:,:) = Vref(:,:) + S_mat(:,:) = 0.d0 + norm_tmp = 0.d0 + do j = 1, min(mm,nn) + S_mat(j,j) = Dref(j) + norm_tmp += S_mat(j,j)*S_mat(j,j) + enddo + do l = 1, n_toselect + na = numalpha_toselect(l) + nb = numbeta_toselect (l) + S_mat(na,nb) = coeff_psi_perturb(l) + norm_tmp += S_mat(na,nb)*S_mat(na,nb) + enddo + + norm_tmp = 1.d0/dsqrt(norm_tmp) + do i = 1, nn + do j = 1, mm + S_mat(j,i) = S_mat(j,i) * norm_tmp + enddo + enddo + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(mm,nn) ) + call dgemm( 'N', 'T', mm, nn, nn, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, mm, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) ) + call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn) + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + + Dref(:) = D_newsvd(:) + Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm) + Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn) + + deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd ) + + return + +end subroutine perform_newSVD + + + + + + + +subroutine perform_newpostSVD(n_TSVD, n_selected, psi_postsvd, Uref, Vref, Dref) + + USE OMP_LIB + + integer, intent(in) :: n_TSVD, n_selected + double precision, intent(in) :: psi_postsvd(n_selected) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique)) + + integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision :: overlopU_mat, overlopV_mat, overlopU, overlopV + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,n_TSVD) , V_svd(nn,n_TSVD) , S_mat(n_TSVD,n_TSVD) ) + + U_svd(1:mm,1:n_TSVD) = Uref(1:mm,1:n_TSVD) + V_svd(1:nn,1:n_TSVD) = Vref(1:nn,1:n_TSVD) + S_mat(:,:) = 0.d0 + do i = 1, n_TSVD + ii = (i-1)*n_TSVD + do j = 1, n_TSVD + jj = ii + j + S_mat(i,j) = psi_postsvd(jj) + enddo + enddo + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(n_TSVD,nn) ) + call dgemm( 'N', 'T', n_TSVD, nn, n_TSVD, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, n_TSVD, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) ) + call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn) + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + + ! check SVD error + err0 = 0.d0 + err_norm = 0.d0 + do j = 1, nn + do i = 1, mm + err_tmp = 0.d0 + do l = 1, n_TSVD + err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l) + enddo + err_tmp = A_newsvd(i,j) - err_tmp + err0 += err_tmp * err_tmp + err_norm += A_newsvd(i,j) * A_newsvd(i,j) + enddo + enddo + + Dref(1:n_TSVD) = D_newsvd(1:n_TSVD) + Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm) + Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn) + + deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd ) + + return + +end subroutine perform_newpostSVD + + + + + + +subroutine const_Hdiag_Hkl_H0(n_TSVD, n_selected, n_toselect, numalpha_selected, numbeta_selected, & + numalpha_toselect, numbeta_toselect, Uref, Vref, Hdiag, Hkl, H0) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_TSVD, n_selected, n_toselect + integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: Hdiag(n_toselect), Hkl(n_selected,n_toselect), H0(n_selected,n_selected) + + integer :: i, j, k, l + integer :: n, na, nb, m, ma, mb + double precision, allocatable :: H(:,:,:,:) + integer(kind=8) :: W_tbeg_step, W_tend_step, W_ir + real(kind=8) :: W_tot_time_step + + Hdiag(:) = 0.d0 + Hkl(:,:) = 0.d0 + H0(:,:) = 0.d0 + + allocate( H(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique) ) + call const_H_uv_lapack(Uref, Vref, H) + + call SYSTEM_CLOCK(COUNT=W_tbeg_step, COUNT_RATE=W_ir) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,n,na,nb,m,ma,mb) & + !$OMP SHARED(n_TSVD,n_selected,n_toselect,H0,H,Hdiag,Hkl, & + !$OMP numalpha_selected,numbeta_selected,numalpha_toselect,numbeta_toselect ) + + !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8) + do i = 1, n_TSVD + do j = 1, n_TSVD + m = (i-1)*n_TSVD + j + do k = 1, n_TSVD + do l = 1, n_TSVD + n = (k-1)*n_TSVD + l + H0(n,m) = H(k,l,i,j) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do n = 1, n_toselect + na = numalpha_toselect(n) + nb = numbeta_toselect (n) + ! diagonal part + Hdiag(n) = H(na,nb,na,nb) + do m = 1, n_selected + ma = numalpha_selected(m) + mb = numalpha_selected(m) + ! 3 blocs treated perturbatively + Hkl(m,n) = H(ma,mb,na,nb) + enddo + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call SYSTEM_CLOCK(COUNT=W_tend_step, COUNT_RATE=W_ir) + W_tot_time_step = real(W_tend_step-W_tbeg_step, kind=8) / real(W_ir, kind=8) + + deallocate( H ) + + return +end subroutine const_Hdiag_Hkl_H0 + + + + + + + + +subroutine const_H_uv_lapack(Uref, Vref, H) + + USE OMP_LIB + + implicit none + + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique,n_det_beta_unique) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree + integer :: ii0, jj0, ii, jj, n, m, np, mp + integer :: nn0, mm0, na, nb, mm, ind_gs + integer :: p,q,r,s + double precision :: h12, x + + double precision, allocatable :: H0(:,:,:,:) + double precision, allocatable :: H1(:,:,:,:) + + na = n_det_alpha_unique + nb = n_det_beta_unique + + allocate( H0(na,nb,na,nb) ) + allocate( H1(nb,na,nb,na) ) + + H0 = 0.d0 + call wall_time(t0) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(p,q,r,s,i,j,k,l,det1,det2,degree,h12) & + !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,Uref,Vref,H0,H1,H) + + !$OMP DO + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if (degree > 2) cycle + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + call get_excitation_degree(det1,det2,degree,N_int) + if ( degree > 2) cycle + call i_H_j(det1, det2, N_int, h12) + H0(i,j,k,l) = h12 + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call wall_time(t1) + ! (i,j,k,l) -> (j,k,l,p) + call DGEMM('T','N', nb * na * nb, na, na, & + 1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3)) + + ! (j,k,l,p) -> (k,l,p,q) + call DGEMM('T','N', na * nb * na, nb, nb, & + 1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H0, size(H0,1)*size(H0,2)*size(H0,3)) + + ! (k,l,p,q) -> (l,p,q,r) + call DGEMM('T','N', nb * na * nb, na, na, & + 1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3)) + + ! (l,p,q,r) -> (p,q,r,s) + call DGEMM('T','N', na * nb * na, nb, nb, & + 1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3)) + call wall_time(t2) + print *, t1-t0, t2-t1 + double precision :: t0, t1, t2 + + deallocate(H1,H0) + +end const_H_uv_lapack diff --git a/devel/svdwf/buildpsi_eff.irp.f b/devel/svdwf/buildpsi_eff.irp.f new file mode 100644 index 0000000..e73e47b --- /dev/null +++ b/devel/svdwf/buildpsi_eff.irp.f @@ -0,0 +1,1433 @@ +program buildpsi_eff + + implicit none + + BEGIN_DOC + ! study efficiency for different way to build | psi > + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + double precision :: h12 + + integer :: i, j, k, l, ii, jj, nn, n, na, nb, m, ma, mb + + double precision :: norm_psi, inv_sqrt_norm_psi + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + double precision :: E0_av, E0_ap, E0pt2 + double precision :: err0, err_tmp, e_tmp, E0, overlop, E0_old, tol_energy + double precision :: ctmp, htmp, Ept2 + double precision :: E0_postsvd, overlop_postsvd + double precision :: norm_coeff_psi, inv_sqrt_norm_coeff_psi + double precision :: overlopU, overlopU_mat, overlopV, overlopV_mat, overlop_psi + + double precision, allocatable :: H(:,:,:,:) + double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:) + double precision, allocatable :: psi_postsvd(:), coeff_psi_perturb(:) + + integer :: it_svd, it_svd_max + + integer :: n_TSVD, n_FSVD, n_selected, n_toselect + integer, allocatable :: numalpha_selected(:), numbeta_selected(:) + integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:) + + integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_tbeg_step, W_tend_step, W_ir + real(kind=8) :: W_tot_time, W_tot_time_it, W_tot_time_step + real(kind=8) :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it, CPU_tbeg_step, CPU_tend_step + real(kind=8) :: CPU_tot_time, CPU_tot_time_it, CPU_tot_time_step + real(kind=8) :: speedup, speedup_it, speedup_step + integer :: nb_taches + + !$OMP PARALLEL + nb_taches = OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + + call CPU_TIME(CPU_tbeg) + call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) + + i_state = 1 + + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call get_excitation_degree(det1,det2,degree,N_int) + call i_H_j(det1, det2, N_int, h12) + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + norm_psi = 0.d0 + do k = 1, N_det + norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) & + * psi_bilinear_matrix_values(k,i_state) + enddo + print *, ' initial norm = ', norm_psi + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_beta_unique) ) + allocate( Dref(n_det_beta_unique) ) + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1) & + , n_det_alpha_unique, n_det_beta_unique) + + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + print *, ' --- First SVD: ok --- ' + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + + ! check Truncate SVD error + err0 = 0.d0 + do j = 1, n_det_beta_unique + do i = 1, n_det_alpha_unique + err_tmp = 0.d0 + do l = 1, n_det_beta_unique + err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l) + enddo + err_tmp = Aref(i,j) - err_tmp + err0 += err_tmp * err_tmp + enddo + enddo + print *, ' Full SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi) + + ! --------------------------------------------------------------------------------------- + + nn = n_det_beta_unique + + ! --------------------------------------------------------------------------------------- + ! numerote vectors + + ! Full rank + n_FSVD = nn * nn + print*, ' Full psi space rank = ', n_FSVD + + + ! Truncated rank + n_TSVD = 25 + print*, ' initial psi space rank = ', n_TSVD + + ! check Truncate SVD error + err0 = 0.d0 + do j = 1, n_det_beta_unique + do i = 1, n_det_alpha_unique + err_tmp = 0.d0 + do l = 1, n_TSVD + err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l) + enddo + err_tmp = Aref(i,j) - err_tmp + err0 += err_tmp * err_tmp + enddo + enddo + deallocate( Aref ) + print *, ' Truncate SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi) + + n_selected = n_TSVD * n_TSVD + allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) ) + k = 0 + ! first diagonal bloc + do i = 1, n_TSVD + do j = 1, n_TSVD + k = k + 1 + numalpha_selected(k) = j + numbeta_selected (k) = i + enddo + enddo + ! check size + if( k.ne.n_selected ) then + print*, ' error in numeroting: selected ' + print*, ' k = ', k + print*, ' n_selected = ', n_selected + stop + endif + + + ! perturbative space rank + k = 0 + n_toselect = n_FSVD - n_selected + print*, ' perturbative psi space rank = ', n_toselect + allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) ) + ! nondiagonal blocs + do i = 1, n_TSVD + do j = n_TSVD+1, nn + k = k + 1 + numalpha_toselect(k) = j + numbeta_toselect (k) = i + enddo + enddo + do j = 1, n_TSVD + do i = n_TSVD+1, nn + k = k + 1 + numalpha_toselect(k) = j + numbeta_toselect (k) = i + enddo + enddo + ! diagonal bloc + do i = n_TSVD+1, nn + do j = n_TSVD+1, nn + k = k + 1 + numalpha_toselect(k) = j + numbeta_toselect (k) = i + enddo + enddo + ! check size + if( k.ne.n_toselect ) then + print*, ' error in numeroting: to select ' + print*, ' k = ', k + print*, ' n_toselect = ', n_toselect + stop + endif + + ! --------------------------------------------------------------------------------------- + + + + !________________________________________________________________________________________________________ + ! + ! loop over SVD iterations + !________________________________________________________________________________________________________ + + E0_old = 0.d0 + tol_energy = 1.d0 + it_svd = 0 + it_svd_max = 1 + + do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) ) + + call CPU_TIME(CPU_tbeg_it) + call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) + + it_svd = it_svd + 1 + print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +' + print*, ' ' + print*, ' ' + print*, ' ' + print*, ' iteration', it_svd + + norm_coeff_psi = 0.d0 + do j = 1, n_TSVD + norm_coeff_psi += Dref(j) * Dref(j) + enddo + inv_sqrt_norm_coeff_psi = 1.d0 / dsqrt(norm_coeff_psi) + do j = 1, n_TSVD + Dref(j) = Dref(j) * inv_sqrt_norm_coeff_psi + enddo + + allocate( H0(n_selected,n_selected) ) + + + !--------------------------------------------------------------------------------------------------------- + + call SYSTEM_CLOCK(COUNT=W_tbeg_step, COUNT_RATE=W_ir) + call const_psihpsi_postsvd_H0(n_selected, numalpha_selected, numbeta_selected, Uref, Vref, H0) + call SYSTEM_CLOCK(COUNT=W_tend_step, COUNT_RATE=W_ir) + W_tot_time_step = real(W_tend_step-W_tbeg_step, kind=8) / real(W_ir, kind=8) + print *, '' + print *, ' +++ CONST_PSIHPSI_POSTSVD_H0 time +++ ', W_tot_time_step + ! avant SVD + E0 = 0.d0 + do i = 1, n_TSVD + ii = (i-1)*n_TSVD + i + do j = 1, n_TSVD + jj = (j-1)*n_TSVD + j + E0 += Dref(j) * H0(jj,ii) * Dref(i) + enddo + enddo + E0_av = E0 + nuclear_repulsion + print *,' E0 (avant SVD) =', E0_av + + ! modified version + call SYSTEM_CLOCK(COUNT=W_tbeg_step, COUNT_RATE=W_ir) + call const_psihpsi_postsvd_H0_modif(n_selected, numalpha_selected, numbeta_selected, Uref, Vref, H0) + call SYSTEM_CLOCK(COUNT=W_tend_step, COUNT_RATE=W_ir) + W_tot_time_step = real(W_tend_step-W_tbeg_step, kind=8) / real(W_ir, kind=8) + print *, ' +++ CONST_PSIHPSI_POSTSVD_H0_MODIF time +++ ', W_tot_time_step + ! avant SVD + E0 = 0.d0 + do i = 1, n_TSVD + ii = (i-1)*n_TSVD + i + do j = 1, n_TSVD + jj = (j-1)*n_TSVD + j + E0 += Dref(j) * H0(jj,ii) * Dref(i) + enddo + enddo + E0_av = E0 + nuclear_repulsion + print *,' E0 (avant SVD) =', E0_av + print *, '' + + !--------------------------------------------------------------------------------------------------------- + + allocate( psi_postsvd(n_selected) ) + print *, ' --- Diag post-SVD --- ' + call diag_postsvd(n_TSVD, n_selected, Dref, H0, E0_postsvd, overlop_postsvd, psi_postsvd) + print*, ' postsvd energy = ', E0_postsvd + + deallocate( H0 ) + + ! post-SVD + !Dref(:) = 0.d0 + call perform_newpostSVD(n_TSVD, n_selected, psi_postsvd, Uref, Vref, Dref) + + deallocate( psi_postsvd ) + + + + + print *, '' + print *, '' + print *, ' --- Compute H first way --- ' + !---------------------------------------------------------------------------------------------------------------------------- + ! first way + ! + + allocate( H(n_det_beta_unique,n_det_beta_unique,n_det_beta_unique,n_det_beta_unique) ) + call SYSTEM_CLOCK(COUNT=W_tbeg_step, COUNT_RATE=W_ir) + call const_H_uv(Uref, Vref, H) + call SYSTEM_CLOCK(COUNT=W_tend_step, COUNT_RATE=W_ir) + W_tot_time_step = real(W_tend_step-W_tbeg_step, kind=8) / real(W_ir, kind=8) + print *, '' + print *, ' +++ CONST_H_UV +++ ', W_tot_time_step + + allocate( H0(n_selected,n_selected), Hdiag(n_toselect), Hkl(n_selected,n_toselect) ) + + call SYSTEM_CLOCK(COUNT=W_tbeg_step, COUNT_RATE=W_ir) + do n = 1, n_selected + na = numalpha_selected(n) + nb = numalpha_selected(n) + do m = 1, n_selected + ma = numalpha_selected(m) + mb = numalpha_selected(m) + H0(m,n) = H(ma,mb,na,nb) + enddo + enddo + do n = 1, n_toselect + na = numalpha_toselect(n) + nb = numbeta_toselect (n) + ! diagonal part + Hdiag(n) = H(na,nb,na,nb) + do m = 1, n_selected + ma = numalpha_selected(m) + mb = numalpha_selected(m) + ! 3 blocs treated perturbatively + Hkl(m,n) = H(ma,mb,na,nb) + enddo + enddo + call SYSTEM_CLOCK(COUNT=W_tend_step, COUNT_RATE=W_ir) + W_tot_time_step = real(W_tend_step-W_tbeg_step, kind=8) / real(W_ir, kind=8) + print *, ' +++ CONST_H_UV +++ ', W_tot_time_step + print *, '' + deallocate( H ) + + E0 = 0.d0 + norm_coeff_psi = 0.d0 + do i = 1, n_TSVD + ii = (i-1)*n_TSVD + i + do j = 1, n_TSVD + jj = (j-1)*n_TSVD + j + E0 += Dref(j) * H0(jj,ii) * Dref(i) + enddo + norm_coeff_psi += Dref(i) * Dref(i) + enddo + E0_ap = E0 + nuclear_repulsion + print *,' E0 (apres SVD) =', E0_ap + + deallocate(H0) + + print *, ' --- Perturbation --- ' + allocate( coeff_psi_perturb(n_toselect) ) + ept2 = 0.d0 + do ii = 1, n_toselect + ctmp = 0.d0 + do i = 1, n_TSVD + l = (i-1)*n_TSVD + i + ctmp += Dref(i) * Hkl(l,ii) + enddo + coeff_psi_perturb(ii) = ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) ) + ept2 += ctmp*ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) ) + enddo + E0pt2 = E0_ap + ept2 + print *, ' perturb energy = ', E0pt2, ept2 + tol_energy = 100.d0 * dabs(E0pt2-E0_old) / dabs(E0pt2) + E0_old = E0pt2 + + deallocate( Hdiag, Hkl) + + !---------------------------------------------------------------------------------------------------------------------------- + + + + print *, '' + print *, '' + print *, ' --- Compute H second way --- ' + !---------------------------------------------------------------------------------------------------------------------------- + ! second way + ! + + allocate( H0(n_selected,n_selected), Hdiag(n_toselect), Hkl(n_selected,n_toselect) ) + + call SYSTEM_CLOCK(COUNT=W_tbeg_step, COUNT_RATE=W_ir) + call const_psihpsi_postsvd_H0_modif(n_selected, numalpha_selected, numbeta_selected, Uref, Vref, H0) + call SYSTEM_CLOCK(COUNT=W_tend_step, COUNT_RATE=W_ir) + W_tot_time_step = real(W_tend_step-W_tbeg_step, kind=8) / real(W_ir, kind=8) + print *, '' + print *, ' +++ CONST_PSIHPSI_POSTSVD_H0_MODIF time +++ ', W_tot_time_step + + + call SYSTEM_CLOCK(COUNT=W_tbeg_step, COUNT_RATE=W_ir) + call const_Hdiag_Hkl(n_selected, n_toselect, Uref, Vref, numalpha_selected, numbeta_selected & + , numalpha_toselect, numbeta_toselect, Hdiag, Hkl) + call SYSTEM_CLOCK(COUNT=W_tend_step, COUNT_RATE=W_ir) + W_tot_time_step = real(W_tend_step-W_tbeg_step, kind=8) / real(W_ir, kind=8) + print *, ' +++ CONST_HDIAG_HKL time +++ ', W_tot_time_step + print *, '' + + E0 = 0.d0 + norm_coeff_psi = 0.d0 + do i = 1, n_TSVD + ii = (i-1)*n_TSVD + i + do j = 1, n_TSVD + jj = (j-1)*n_TSVD + j + E0 += Dref(j) * H0(jj,ii) * Dref(i) + enddo + norm_coeff_psi += Dref(i) * Dref(i) + enddo + E0_ap = E0 + nuclear_repulsion + print *,' E0 (apres SVD) =', E0_ap + + deallocate(H0) + + print *, ' --- Perturbation --- ' + deallocate( coeff_psi_perturb ) + allocate( coeff_psi_perturb(n_toselect) ) + ept2 = 0.d0 + do ii = 1, n_toselect + ctmp = 0.d0 + do i = 1, n_TSVD + l = (i-1)*n_TSVD + i + ctmp += Dref(i) * Hkl(l,ii) + enddo + coeff_psi_perturb(ii) = ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) ) + ept2 += ctmp*ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) ) + enddo + E0pt2 = E0_ap + ept2 + print *, ' perturb energy = ', E0pt2, ept2 + tol_energy = 100.d0 * dabs(E0pt2-E0_old) / dabs(E0pt2) + E0_old = E0pt2 + + deallocate( Hdiag, Hkl) + + !---------------------------------------------------------------------------------------------------------------------------- + + + + + + print *, '' + print *, '' + print *, ' --- Compute H third way --- ' + !---------------------------------------------------------------------------------------------------------------------------- + ! second way + ! + + allocate( H0(n_selected,n_selected), Hdiag(n_toselect), Hkl(n_selected,n_toselect) ) + + call SYSTEM_CLOCK(COUNT=W_tbeg_step, COUNT_RATE=W_ir) + call const_Hdiag_Hkl_H0(n_selected, n_toselect, Uref, Vref, numalpha_selected, numbeta_selected & + , numalpha_toselect, numbeta_toselect, Hdiag, Hkl, H0) + call SYSTEM_CLOCK(COUNT=W_tend_step, COUNT_RATE=W_ir) + W_tot_time_step = real(W_tend_step-W_tbeg_step, kind=8) / real(W_ir, kind=8) + print *, '' + print *, ' +++ CONST_HDIAG_HKL_H0 time +++ ', W_tot_time_step + print *, '' + + E0 = 0.d0 + norm_coeff_psi = 0.d0 + do i = 1, n_TSVD + ii = (i-1)*n_TSVD + i + do j = 1, n_TSVD + jj = (j-1)*n_TSVD + j + E0 += Dref(j) * H0(jj,ii) * Dref(i) + enddo + norm_coeff_psi += Dref(i) * Dref(i) + enddo + E0_ap = E0 + nuclear_repulsion + print *,' E0 (apres SVD) =', E0_ap + + deallocate(H0) + + print *, ' --- Perturbation --- ' + deallocate( coeff_psi_perturb ) + allocate( coeff_psi_perturb(n_toselect) ) + ept2 = 0.d0 + do ii = 1, n_toselect + ctmp = 0.d0 + do i = 1, n_TSVD + l = (i-1)*n_TSVD + i + ctmp += Dref(i) * Hkl(l,ii) + enddo + coeff_psi_perturb(ii) = ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) ) + ept2 += ctmp*ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) ) + enddo + E0pt2 = E0_ap + ept2 + print *, ' perturb energy = ', E0pt2, ept2 + tol_energy = 100.d0 * dabs(E0pt2-E0_old) / dabs(E0pt2) + E0_old = E0pt2 + + deallocate( Hdiag, Hkl) + + !---------------------------------------------------------------------------------------------------------------------------- + + + + + print *, ' --- SVD --- ' + call perform_newSVD(n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref) + + deallocate( coeff_psi_perturb ) + + write(11,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0pt2 + + call CPU_TIME(CPU_tend_it) + call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir) + CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it + W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8) + speedup_it = CPU_tot_time_it / W_tot_time_it + print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & + & 3X, "CPU time = ", 1PE10.3, " min.", /, & + & 3X, "speed up = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it + + end do + !________________________________________________________________________________________________________ + !________________________________________________________________________________________________________ + + + + deallocate( Uref, Vref, Dref ) + + + call CPU_TIME(CPU_tend) + call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) + CPU_tot_time = CPU_tend - CPU_tbeg + W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) + speedup = CPU_tot_time / W_tot_time + print *,' ___________________________________________________________________' + print '(//,3X,"Execution avec ",i2," threads")',nb_taches + print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & + & 3X, "CPU time = ", 1PE10.3, " min.", /, & + & 3X, "speed up = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup + print *,' ___________________________________________________________________' + + +end + + + + + + + + +subroutine const_psihpsi_postsvd_H0(n_selected, numalpha_selected, numbeta_selected, Uref, Vref, H0) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_selected + integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H0(n_selected,n_selected) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree + integer :: n, na, nb, m , ma, mb + double precision :: h12, x + double precision, allocatable :: H0_tmp(:,:) + + H0(:,:) = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,n,na,nb,m,ma,mb,x,h12,det1,det2,H0_tmp,degree) & + !$OMP SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,n_selected,Uref,Vref,H0,numalpha_selected,numbeta_selected ) + allocate( H0_tmp(n_selected,n_selected) ) + H0_tmp(:,:) = 0.d0 + !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,20) + do i = 1, n_det_alpha_unique + do k = 1, n_det_alpha_unique + det1(:,1) = psi_det_alpha_unique(:,i) + det2(:,1) = psi_det_alpha_unique(:,k) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + if (degree .gt. 2) then + cycle + endif + do j = 1, n_det_beta_unique + det1(:,2) = psi_det_beta_unique(:,j) + do l = 1, n_det_beta_unique + det2(:,2) = psi_det_beta_unique(:,l) + call get_excitation_degree(det1,det2,degree,N_int) + if (degree .gt. 2) then + cycle + endif + ! !!! + call i_H_j(det1, det2, N_int, h12) + ! !!! + ! ~~~ H0 ~~~ + do n = 1, n_selected + na = numalpha_selected(n) + nb = numbeta_selected (n) + x = Uref(k,na) * Vref(l,nb) * h12 + do m = 1, n_selected + ma = numalpha_selected(m) + mb = numbeta_selected (m) + H0_tmp(m,n) += Uref(i,ma) * Vref(j,mb) * x + enddo + enddo + ! ~~~ ~~~~~~ ~~~ + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP CRITICAL + do n = 1, n_selected + do m = 1, n_selected + H0(m,n) += H0_tmp(m,n) + enddo + enddo + !$OMP END CRITICAL + deallocate( H0_tmp ) + !$OMP END PARALLEL + + return +end subroutine const_psihpsi_postsvd_H0 + + + + + + + + +subroutine const_psihpsi_postsvd_H0_modif(n_selected, numalpha_selected, numbeta_selected, Uref, Vref, H0) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_selected + integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H0(n_selected,n_selected) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree + integer :: n, na, nb, m , ma, mb + double precision :: h12, x + double precision, allocatable :: Htot(:,:,:,:), H1(:,:,:) + + H0(:,:) = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,n,na,nb,m,ma,mb,x,h12,det1,det2,degree) & + !$OMP SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,n_selected,Uref,Vref,H0,Htot,H1,numalpha_selected,numbeta_selected ) + + !$OMP SINGLE + allocate( Htot(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique) ) + Htot(:,:,:,:) = 0.d0 + !$OMP END SINGLE + + !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,20) + do i = 1, n_det_alpha_unique + do k = 1, n_det_alpha_unique + det1(:,1) = psi_det_alpha_unique(:,i) + det2(:,1) = psi_det_alpha_unique(:,k) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + if (degree .gt. 2) then + cycle + endif + do j = 1, n_det_beta_unique + det1(:,2) = psi_det_beta_unique(:,j) + do l = 1, n_det_beta_unique + det2(:,2) = psi_det_beta_unique(:,l) + call get_excitation_degree(det1,det2,degree,N_int) + if (degree .gt. 2) then + cycle + endif + ! !!! + call i_H_j(det1, det2, N_int, Htot(k,l,i,j)) + ! !!! + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + allocate( H1(n_det_alpha_unique,n_det_beta_unique,n_selected) ) + H1(:,:,:) = 0.d0 + !$OMP END SINGLE + + !$OMP DO + do n = 1, n_selected + na = numalpha_selected(n) + nb = numbeta_selected (n) + do i = 1, n_det_alpha_unique + do j = 1, n_det_beta_unique + do l = 1, n_det_beta_unique + do k = 1, n_det_alpha_unique + H1(k,l,n) += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + deallocate( Htot ) + !$OMP END SINGLE + + !$OMP DO + do m = 1, n_selected + ma = numalpha_selected(m) + mb = numbeta_selected (m) + do n = 1, n_selected + do k = 1, n_det_alpha_unique + do l = 1, n_det_beta_unique + H0(m,n) += H1(k,l,n) * Uref(k,ma) * Vref(l,mb) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + deallocate( H1 ) + !$OMP END SINGLE + + !$OMP END PARALLEL + + return +end subroutine const_psihpsi_postsvd_H0_modif + + + + + + +subroutine diag_postsvd(n_TSVD, n_selected, Dref, H0, E0, overlop, psi_postsvd ) + + USE OMP_LIB + + implicit none + + integer, intent(in) :: n_TSVD, n_selected + double precision, intent(in) :: H0(n_selected,n_selected) + double precision, intent(in) :: Dref(n_det_beta_unique) + double precision, intent(out) :: E0, overlop, psi_postsvd(n_selected) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree + integer :: ii0, jj0, ii, jj, n, m, np, mp + integer :: nn0, mm0, nn, mm, ind_gs + double precision :: h12, x + + double precision, allocatable :: eigvec0(:,:), eigval0(:), check_ov(:) + + ! diagonalize H0 + allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) ) + call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected) + + ! get the postsvd ground state + allocate( check_ov(n_selected) ) + do l = 1, n_selected + overlop = 0.d0 + do i = 1, n_TSVD + ii = n_TSVD*(i-1) + i + overlop = overlop + eigvec0(ii,l) * Dref(i) + enddo + check_ov(l) = dabs(overlop) + enddo + ind_gs = MAXLOC( check_ov, DIM=1 ) + !ind_gs = 1 + overlop = check_ov(ind_gs) + E0 = eigval0(ind_gs)+nuclear_repulsion + psi_postsvd = eigvec0(:,ind_gs) + + deallocate( check_ov, eigvec0, eigval0 ) + + return +end subroutine diag_postsvd + + + + +subroutine const_H_uv(Uref, Vref, H) + + USE OMP_LIB + + implicit none + + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H(n_det_beta_unique,n_det_beta_unique,n_det_beta_unique,n_det_beta_unique) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree + + integer :: na, nb + integer :: i, j, k, l + integer :: p, q, r, s + + double precision, allocatable :: H0(:,:,:,:), H1(:,:,:,:), H2(:,:,:,:), H3(:,:,:,:) + + na = n_det_alpha_unique + nb = n_det_beta_unique + + allocate( H0(na,nb,na,nb) , H1(na,nb,na,nb) ) + allocate( H2(na,nb,nb,nb) , H3(na,nb,nb,nb) ) + + H(:,:,:,:) = 0.d0 + H0(:,:,:,:) = 0.d0 + H1(:,:,:,:) = 0.d0 + H2(:,:,:,:) = 0.d0 + H3(:,:,:,:) = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(p,q,r,s,i,j,k,l,det1,det2,degree) & + !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,Uref,Vref,H0,H1,H2,H3,H) + + !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,20) + do i = 1, na + do k = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + det2(:,1) = psi_det_alpha_unique(:,k) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + if (degree .gt. 2) then + cycle + endif + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + call get_excitation_degree(det1,det2,degree,N_int) + if (degree .gt. 2) then + cycle + endif + call i_H_j(det1, det2, N_int, H0(k,l,i,j)) + enddo + enddo + enddo + enddo + !$OMP END DO + +! !$OMP SINGLE +! allocate( H1(na,nb,na,nb) ) +! H1(:,:,:,:) = 0.d0 +! !$OMP END SINGLE + !$OMP DO + do s = 1, nb + do l = 1, nb + do k = 1, na + do j = 1, nb + do i = 1, na + H1(i,j,k,s) += H0(i,j,k,l) * Vref(l,s) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do s = 1, nb + do r = 1, nb + do k = 1, na + do j = 1, nb + do i = 1, na + H2(i,j,r,s) += H1(i,j,k,s) * Uref(k,r) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do s = 1, nb + do j = 1, nb + do r = 1, na + do q = 1, nb + do i = 1, na + H3(i,q,r,s) += H2(i,j,r,s) * Vref(j,q) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do s = 1, nb + do r = 1, nb + do q = 1, nb + do p = 1, nb + do i = 1, na + H(p,q,r,s) += H3(i,q,r,s) * Uref(i,p) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP END PARALLEL + + deallocate( H0, H1, H2, H3 ) + + return + +end subroutine const_H_uv + + + + + + +subroutine perform_newSVD(n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref) + + USE OMP_LIB + + integer, intent(in) :: n_toselect + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: coeff_psi_perturb(n_toselect) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(inout) :: Dref(n_det_beta_unique) + + integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision :: overlopU_mat, overlopV_mat, overlopU, overlopV + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,nn) , V_svd(nn,nn) , S_mat(nn,nn) ) + + U_svd(:,:) = Uref(:,:) + V_svd(:,:) = Vref(:,:) + S_mat(:,:) = 0.d0 + norm_tmp = 0.d0 + do j = 1, n_det_beta_unique + S_mat(j,j) = Dref(j) + norm_tmp += S_mat(j,j)*S_mat(j,j) + enddo + do l = 1, n_toselect + na = numalpha_toselect(l) + nb = numbeta_toselect (l) + S_mat(na,nb) = coeff_psi_perturb(l) + norm_tmp += S_mat(na,nb)*S_mat(na,nb) + enddo + + print*, ' norm de S_mat =', norm_tmp + !norm_tmp = 1.d0/dsqrt(norm_tmp) + !do i = 1, nn + ! do j = 1, nn + ! S_mat(j,i) = S_mat(j,i) * norm_tmp + ! enddo + !enddo + + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(nn,nn) ) + call dgemm( 'N', 'T', nn, nn, nn, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, nn, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) ) + call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn) + print *, ' +++ new perturbative SVD is performed +++ ' + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + + ! check SVD error + err0 = 0.d0 + err_norm = 0.d0 + do j = 1, nn + do i = 1, mm + err_tmp = 0.d0 + do l = 1, nn + err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l) + enddo + err_tmp = A_newsvd(i,j) - err_tmp + err0 += err_tmp * err_tmp + err_norm += A_newsvd(i,j) * A_newsvd(i,j) + enddo + enddo + print *, ' SVD err (%) = ', 100.d0 * dsqrt(err0/err_norm) + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + + do l = 1, nn + Dref(l) = D_newsvd(l) + Uref(:,l) = U_newsvd(:,l) + Vref(:,l) = V_newsvd(:,l) + enddo + !print *, Dref(:) + + + overlopU_mat = 0.d0 + overlopV_mat = 0.d0 + do i = 1, nn + do j = 1, nn + overlopU = 0.d0 + do ii = 1, mm + overlopU += Uref(ii,j) * Uref(ii,i) + enddo + overlopU_mat += overlopU + overlopV = 0.d0 + do ii = 1, nn + overlopV += Vref(ii,j) * Vref(ii,i) + enddo + overlopV_mat += overlopV + enddo + enddo + print *, 'overlop U =', overlopU_mat + print *, 'overlop V =', overlopV_mat + + + deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd ) + + return + +end subroutine perform_newSVD + + + + + +subroutine perform_newpostSVD(n_TSVD, n_selected, psi_postsvd, Uref, Vref, Dref) + + ! TODO: general case wherer we we don't consider the first trucated block + USE OMP_LIB + + integer, intent(in) :: n_TSVD, n_selected + double precision, intent(in) :: psi_postsvd(n_selected) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(inout) :: Dref(n_det_beta_unique) + + integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision :: overlopU_mat, overlopV_mat, overlopU, overlopV + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,n_TSVD) , V_svd(nn,n_TSVD) , S_mat(n_TSVD,n_TSVD) ) + + U_svd(:,:) = Uref(:,1:n_TSVD) + V_svd(:,:) = Vref(:,1:n_TSVD) + S_mat(:,:) = 0.d0 + do i = 1, n_TSVD + ii = (i-1)*n_TSVD + do j = 1, n_TSVD + jj = ii + j + S_mat(j,i) = psi_postsvd(jj) + enddo + enddo + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(n_TSVD,nn) ) + call dgemm( 'N', 'T', n_TSVD, nn, n_TSVD, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, n_TSVD, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) ) + call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn) + print *, ' +++ new SVD is performed +++ ' + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + + ! check SVD error + err0 = 0.d0 + err_norm = 0.d0 + do j = 1, nn + do i = 1, mm + err_tmp = 0.d0 + do l = 1, n_TSVD + err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l) + enddo + err_tmp = A_newsvd(i,j) - err_tmp + err0 += err_tmp * err_tmp + err_norm += A_newsvd(i,j) * A_newsvd(i,j) + enddo + enddo + print *, ' SVD err (%) = ', 100.d0 * dsqrt(err0/err_norm) + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + do l = 1, n_TSVD + Dref(l) = D_newsvd(l) + Uref(:,l) = U_newsvd(:,l) + Vref(:,l) = V_newsvd(:,l) + enddo +! print *, Dref(:) + + overlopU_mat = 0.d0 + overlopV_mat = 0.d0 + do i = 1, nn + do j = 1, nn + overlopU = 0.d0 + do ii = 1, mm + overlopU += Uref(ii,j) * Uref(ii,i) + enddo + overlopU_mat += overlopU + overlopV = 0.d0 + do ii = 1, nn + overlopV += Vref(ii,j) * Vref(ii,i) + enddo + overlopV_mat += overlopV + enddo + enddo + print *, 'overlop U =', overlopU_mat + print *, 'overlop V =', overlopV_mat + + + deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd ) + + return + +end subroutine perform_newpostSVD + + + + + +subroutine const_Hdiag_Hkl(n_selected, n_toselect, Uref, Vref, numalpha_selected, numbeta_selected & + , numalpha_toselect, numbeta_toselect, Hdiag, Hkl) + + implicit none + + integer, intent(in) :: n_selected, n_toselect + integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: Hdiag(n_toselect), Hkl(n_selected,n_toselect) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree + + integer :: i, j, k, l + integer :: n, na, nb, m, ma, mb + double precision :: h12, y + + double precision, allocatable :: Hdiag_tmp(:), Hkl_tmp(:,:) + + + Hdiag(:) = 0.d0 + Hkl(:,:) = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,n,na,nb,m,ma,mb,y,h12,det1,det2,Hdiag_tmp,Hkl_tmp,degree) & + !$OMP SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,n_selected,n_toselect,Uref,Vref,numalpha_toselect,numbeta_toselect, & + !$OMP Hkl,Hdiag,numalpha_selected, numbeta_selected ) + allocate( Hdiag_tmp(n_toselect), Hkl_tmp(n_selected,n_toselect) ) + Hdiag_tmp(:) = 0.d0 + Hkl_tmp(:,:) = 0.d0 + !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,20) + do i = 1, n_det_alpha_unique + do k = 1, n_det_alpha_unique + det1(:,1) = psi_det_alpha_unique(:,i) + det2(:,1) = psi_det_alpha_unique(:,k) + ! !!! + ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + if (degree .gt. 2) then + cycle + endif + ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + ! !!! + do j = 1, n_det_beta_unique + det1(:,2) = psi_det_beta_unique(:,j) + do l = 1, n_det_beta_unique + det2(:,2) = psi_det_beta_unique(:,l) + ! !!! + ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + call get_excitation_degree(det1,det2,degree,N_int) + if (degree .gt. 2) then + cycle + endif + ! ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + ! !!! + call i_H_j(det1, det2, N_int, h12) + ! ~ ~ ~ H ~ ~ ~ + do n = 1, n_toselect + na = numalpha_toselect(n) + nb = numbeta_toselect (n) + y = Uref(k,na) * Vref(l,nb) * h12 + ! Hdiag + Hdiag_tmp(n) += y * Uref(i,na) * Vref(j,nb) + do m = 1, n_selected + ma = numalpha_selected(m) + mb = numbeta_selected (m) + ! Hkl + Hkl_tmp(m,n) += Uref(i,ma) * Vref(j,mb) * y + enddo + enddo + ! ~ ~ ~ ! ! ! ~ ~ ~ + enddo + enddo + ! !!! + enddo + enddo + !$OMP END DO + !$OMP CRITICAL + do n = 1, n_toselect + Hdiag(n) += Hdiag_tmp(n) + do m = 1, n_selected + Hkl(m,n) += Hkl_tmp(m,n) + enddo + enddo + !$OMP END CRITICAL + deallocate( Hdiag_tmp,Hkl_tmp ) + !$OMP END PARALLEL + + return + +end subroutine const_Hdiag_Hkl + + + + + + + + +subroutine const_Hdiag_Hkl_H0(n_selected, n_toselect, Uref, Vref, numalpha_selected, numbeta_selected & + , numalpha_toselect, numbeta_toselect, Hdiag, Hkl, H0) + + implicit none + + integer, intent(in) :: n_selected, n_toselect + integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: Hdiag(n_toselect), Hkl(n_selected,n_toselect), H0(n_selected,n_selected) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree + + integer :: i, j, k, l + integer :: n, na, nb, m, ma, mb + double precision, allocatable :: Htot(:,:,:,:), H1(:,:,:), H2(:,:,:) + + Hdiag(:) = 0.d0 + Hkl(:,:) = 0.d0 + H0(:,:) = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,n,na,nb,m,ma,mb,det1,det2,degree) & + !$OMP SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,n_selected,n_toselect,Uref,Vref,H0,Htot,H1,H2,Hdiag,Hkl, & + !$OMP numalpha_selected,numbeta_selected,numalpha_toselect,numbeta_toselect ) + + !$OMP SINGLE + allocate( Htot(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique) ) + Htot(:,:,:,:) = 0.d0 + !$OMP END SINGLE + + !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,20) + do i = 1, n_det_alpha_unique + do k = 1, n_det_alpha_unique + det1(:,1) = psi_det_alpha_unique(:,i) + det2(:,1) = psi_det_alpha_unique(:,k) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + if (degree .gt. 2) then + cycle + endif + do j = 1, n_det_beta_unique + det1(:,2) = psi_det_beta_unique(:,j) + do l = 1, n_det_beta_unique + det2(:,2) = psi_det_beta_unique(:,l) + call get_excitation_degree(det1,det2,degree,N_int) + if (degree .gt. 2) then + cycle + endif + ! !!! + call i_H_j(det1, det2, N_int, Htot(k,l,i,j)) + ! !!! + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + allocate( H1(n_det_alpha_unique,n_det_beta_unique,n_selected) ) + H1(:,:,:) = 0.d0 + !$OMP END SINGLE + !$OMP DO + do n = 1, n_selected + na = numalpha_selected(n) + nb = numbeta_selected (n) + do i = 1, n_det_alpha_unique + do j = 1, n_det_beta_unique + do l = 1, n_det_beta_unique + do k = 1, n_det_alpha_unique + H1(k,l,n) += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + allocate( H2(n_det_alpha_unique,n_det_beta_unique,n_toselect) ) + H2(:,:,:) = 0.d0 + !$OMP END SINGLE + !$OMP DO + do n = 1, n_toselect + na = numalpha_toselect(n) + nb = numbeta_toselect (n) + do i = 1, n_det_alpha_unique + do j = 1, n_det_beta_unique + do l = 1, n_det_beta_unique + do k = 1, n_det_alpha_unique + H2(k,l,n) += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb) + Hdiag(n) += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb) * Uref(k,na) * Vref(l,nb) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + deallocate( Htot ) + !$OMP END SINGLE + + !$OMP DO + do m = 1, n_selected + ma = numalpha_selected(m) + mb = numbeta_selected (m) + do n = 1, n_toselect + do k = 1, n_det_alpha_unique + do l = 1, n_det_beta_unique + Hkl(m,n) += H2(k,l,n) * Uref(k,ma) * Vref(l,mb) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP SINGLE + deallocate( H2 ) + !$OMP END SINGLE + + !$OMP DO + do m = 1, n_selected + ma = numalpha_selected(m) + mb = numbeta_selected (m) + do n = 1, n_selected + do k = 1, n_det_alpha_unique + do l = 1, n_det_beta_unique + H0(m,n) += H1(k,l,n) * Uref(k,ma) * Vref(l,mb) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP SINGLE + deallocate( H1 ) + !$OMP END SINGLE + + !$OMP END PARALLEL + + return + +end subroutine const_Hdiag_Hkl_H0 diff --git a/devel/svdwf/kl_H_kl_v0.irp.f b/devel/svdwf/kl_H_kl_v0.irp.f new file mode 100644 index 0000000..4d2c5e2 --- /dev/null +++ b/devel/svdwf/kl_H_kl_v0.irp.f @@ -0,0 +1,326 @@ +program kl_H_kl_v0 + + implicit none + + BEGIN_DOC + ! perturbative approach to build psi_postsvd + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + + integer :: i, j, k, l, m, n + double precision :: x, y, h12 + + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + integer :: rank_max + double precision :: E0, overlop, Ept2 + double precision, allocatable :: H0(:,:) + double precision, allocatable :: eigvec0(:,:), eigval0(:), coeff_psi(:), coeff_tmp(:) + + integer :: ii, ia, ib + double precision, allocatable :: Hdiag(:), Hkl_save(:,:), Hkl_1d(:), Hkl_tmp(:,:), Hdiag_tmp(:) + + integer :: na_new, nb_new, ind_new, ind_gs + double precision :: ctmp, coeff_new + double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:) + + double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:) + + integer :: ibeg_alpha, ibeg_beta, iend_alpha, iend_beta + integer :: n_toselect, na_max, nb_max + integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:) + + integer :: cantor_pairing_ij, cantor_pairing_new + integer, allocatable :: cantor_pairing(:), cantor_pairing_tmp(:) + + double precision :: t_beg, t_end + integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir + real(kind=8) :: W_tot_time, W_tot_time_it + integer :: nb_taches + + !$OMP PARALLEL + nb_taches = OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + + call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) + + i_state = 1 + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + call get_excitation_degree(det1,det2,degree,N_int) + call i_H_j(det1, det2, N_int, h12) + + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) ) + allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + + call cpu_time(t_beg) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref & + , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique) + call cpu_time(t_end) + print *, " SVD is performed after (min)", (t_end-t_beg)/60. + + deallocate( Aref , Dref ) + + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + + ibeg_alpha = 1 + iend_alpha = n_det_alpha_unique + na_max = iend_alpha - ibeg_alpha + 1 + + ibeg_beta = 1 + iend_beta = n_det_beta_unique + nb_max = iend_beta - ibeg_beta + 1 + + n_toselect = na_max * nb_max + + print *, ' na_max = ', na_max + print *, ' nb_max = ', nb_max + print *, ' n_toselect = ', n_toselect + + + allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) ) + k = 0 + do i = ibeg_alpha, iend_alpha + do j = ibeg_beta, iend_beta + k = k + 1 + numalpha_toselect(k) = i + numbeta_toselect (k) = j + enddo + enddo + if( k.ne.n_toselect ) then + print *, " error in numbering" + stop + endif + + + allocate( Hdiag(n_toselect) ) + + ! get < u_k v_l | H | u_k v_l > for all vectors + call const_Hdiag(na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag) + + open(UNIT=11, FILE="klHkl_v0.dat", ACTION="WRITE") + do i = 1, n_toselect + write(11, '(2(I5,2X), 5X, E15.7)') numalpha_toselect(i), numbeta_toselect(i), Hdiag(i) + enddo + close(11) + + + deallocate( Uref, Vref ) + deallocate( numalpha_toselect, numbeta_toselect, Hdiag ) + + + ! *************************************************************************************************** + ! save to ezfion + !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) ) + !do l = 1, rank0 + ! Dezfio(l,1) = coeff_psi(l) + ! Uezfio(:,l,1) = U0(:,l) + ! Vezfio(:,l,1) = V0(:,l) + !enddo + !call ezfio_set_spindeterminants_n_det(N_det) + !call ezfio_set_spindeterminants_n_states(N_states) + !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique) + !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique) + !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows) + !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns) + !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values) + + !call ezfio_set_spindeterminants_n_svd_coefs(rank0) + !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio) + !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio ) + !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio) + !deallocate( Uezfio, Dezfio, Vezfio ) + ! *************************************************************************************************** + + + + call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) + W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) + print *, ' ___________________________________________________________________' + print *, ' ' + print *, " Execution avec ", nb_taches, " threads" + print *, " total elapsed time (min) = ", W_tot_time/60.d0 + print *, ' ___________________________________________________________________' + + +end + + + + + + + + + +subroutine const_Hdiag(na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag) + + implicit none + + integer, intent(in) :: n_toselect, na_max, nb_max + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: Hdiag(n_toselect) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree, na, nb + + integer :: i, j, k, l, ii, jj, m, n + double precision :: h12, xtmp + + double precision, allocatable :: Hmat_diag(:,:), Vt(:,:), bl1_tmp(:,:,:) + double precision, allocatable :: Ut(:,:), tmp0(:,:,:) , Hmat_diag_tmp(:,:) + + double precision :: t1, t2, t3, t4 + + print *, "" + print *, " start const_Hdiag" + call wall_time(t1) + + na = n_det_alpha_unique + nb = n_det_beta_unique + + allocate(Hmat_diag(na_max,nb_max)) + Hmat_diag = 0.d0 + + allocate( bl1_tmp(na,na,nb_max) ) + bl1_tmp = 0.d0 + + allocate( Vt(nb_max,nb) ) + do i = 1, nb + do n = 1, nb_max + Vt(n,i) = Vref(i,n) + enddo + enddo + + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if(degree .gt. 2) cycle + + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + + call get_excitation_degree(det1,det2,degree,N_int) + if(degree .gt. 2) cycle + + call i_H_j(det1, det2, N_int, h12) + if( h12 .eq. 0.d0) cycle + + do n = 1, nb_max + bl1_tmp(i,k,n) += h12 * Vt(n,j) * Vt(n,l) + enddo + + enddo + enddo + enddo + enddo + + deallocate(Vt) + + call wall_time(t2) + print *, " end bl1_tmp after (min) ", (t2-t1)/60. + + allocate( Ut(na,na_max) ) + Ut(1:na,1:na_max) = Uref(1:na,1:na_max) + allocate( tmp0(na,nb_max,na_max) ) + call DGEMM('T', 'N', na*nb_max, na_max, na, 1.d0, & + bl1_tmp, size(bl1_tmp,1), Ut, size(Ut,1), & + 0.d0, tmp0, size(tmp0,1)*size(tmp0,2) ) + deallocate( bl1_tmp ) + + call wall_time(t3) + print *, " end DGEMM after (min) ", (t3-t2)/60. + + do n = 1, nb_max + do m = 1, na_max + do k = 1, na + Hmat_diag(m,n) += tmp0(k,n,m) * Ut(k,m) + enddo + enddo + enddo + + deallocate( tmp0 , Ut ) + + Hdiag(:) = 0.d0 + do m = 1, n_toselect + ii = numalpha_toselect(m) + jj = numbeta_toselect (m) + Hdiag(m) = Hmat_diag(ii,jj) + enddo + + deallocate( Hmat_diag ) + + call wall_time(t4) + print *, " end const_Hdiag after (min) ", (t4-t3)/60. + print *, "" + + + print *, " total time (min) ", (t4-t1)/60. + print *, "" + + return +end subroutine const_Hdiag + + + + + diff --git a/devel/svdwf/kl_H_kl_v1.irp.f b/devel/svdwf/kl_H_kl_v1.irp.f new file mode 100644 index 0000000..6c4ea3d --- /dev/null +++ b/devel/svdwf/kl_H_kl_v1.irp.f @@ -0,0 +1,352 @@ +program kl_H_kl_v1 + + implicit none + + BEGIN_DOC + ! perturbative approach to build psi_postsvd + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + + integer :: i, j, k, l, m, n + double precision :: x, y, h12 + + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + integer :: rank_max + double precision :: E0, overlop, Ept2 + double precision, allocatable :: H0(:,:) + double precision, allocatable :: eigvec0(:,:), eigval0(:), coeff_psi(:), coeff_tmp(:) + + integer :: ii, ia, ib + double precision, allocatable :: Hdiag(:), Hkl_save(:,:), Hkl_1d(:), Hkl_tmp(:,:), Hdiag_tmp(:) + + integer :: na_new, nb_new, ind_new, ind_gs + double precision :: ctmp, coeff_new + double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:) + + double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:) + + integer :: ibeg_alpha, ibeg_beta, iend_alpha, iend_beta + integer :: n_toselect, na_max, nb_max + integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:) + + integer :: cantor_pairing_ij, cantor_pairing_new + integer, allocatable :: cantor_pairing(:), cantor_pairing_tmp(:) + + double precision :: t_beg, t_end + integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir + real(kind=8) :: W_tot_time, W_tot_time_it + integer :: nb_taches + + !$OMP PARALLEL + nb_taches = OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + + call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) + + i_state = 1 + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + call get_excitation_degree(det1,det2,degree,N_int) + call i_H_j(det1, det2, N_int, h12) + + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) ) + allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + + call cpu_time(t_beg) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref & + , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique) + call cpu_time(t_end) + print *, " SVD is performed after (min)", (t_end-t_beg)/60. + + deallocate( Aref , Dref ) + + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + + ibeg_alpha = 1 + iend_alpha = n_det_alpha_unique + na_max = iend_alpha - ibeg_alpha + 1 + + ibeg_beta = 1 + iend_beta = n_det_beta_unique + nb_max = iend_beta - ibeg_beta + 1 + + n_toselect = na_max * nb_max + + print *, ' na_max = ', na_max + print *, ' nb_max = ', nb_max + print *, ' n_toselect = ', n_toselect + + + allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) ) + k = 0 + do i = ibeg_alpha, iend_alpha + do j = ibeg_beta, iend_beta + k = k + 1 + numalpha_toselect(k) = i + numbeta_toselect (k) = j + enddo + enddo + if( k.ne.n_toselect ) then + print *, " error in numbering" + stop + endif + + + allocate( Hdiag(n_toselect) ) + + ! get < u_k v_l | H | u_k v_l > for all vectors + call const_Hdiag(na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag) + + open(UNIT=11, FILE="klHkl_v1.dat", ACTION="WRITE") + do i = 1, n_toselect + write(11, '(2(I5,2X), 5X, E15.7)') numalpha_toselect(i), numbeta_toselect(i), Hdiag(i) + enddo + close(11) + + + deallocate( Uref, Vref ) + deallocate( numalpha_toselect, numbeta_toselect, Hdiag ) + + + ! *************************************************************************************************** + ! save to ezfion + !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) ) + !do l = 1, rank0 + ! Dezfio(l,1) = coeff_psi(l) + ! Uezfio(:,l,1) = U0(:,l) + ! Vezfio(:,l,1) = V0(:,l) + !enddo + !call ezfio_set_spindeterminants_n_det(N_det) + !call ezfio_set_spindeterminants_n_states(N_states) + !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique) + !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique) + !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows) + !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns) + !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values) + + !call ezfio_set_spindeterminants_n_svd_coefs(rank0) + !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio) + !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio ) + !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio) + !deallocate( Uezfio, Dezfio, Vezfio ) + ! *************************************************************************************************** + + + + call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) + W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) + print *, ' ___________________________________________________________________' + print *, ' ' + print *, " Execution avec ", nb_taches, " threads" + print *, " total elapsed time (min) = ", W_tot_time/60.d0 + print *, ' ___________________________________________________________________' + + +end + + + + + + + + + +subroutine const_Hdiag(na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag) + + implicit none + + integer, intent(in) :: n_toselect, na_max, nb_max + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: Hdiag(n_toselect) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree, na, nb + + integer :: i, j, k, l, ii, jj, m, n + double precision :: h12, xtmp + + double precision, allocatable :: Hmat_diag(:,:), Vt(:,:), bl1_tmp(:,:,:) + double precision, allocatable :: Ut(:,:), tmp0(:,:,:) , Hmat_diag_tmp(:,:) + + double precision :: t1, t2, t3, t4 + + print *, "" + print *, " start const_Hdiag" + call wall_time(t1) + + na = n_det_alpha_unique + nb = n_det_beta_unique + + allocate(Hmat_diag(na_max,nb_max)) + Hmat_diag = 0.d0 + + allocate( bl1_tmp(na,na,nb_max) ) + bl1_tmp = 0.d0 + + allocate( Vt(nb_max,nb) ) + do i = 1, nb + do n = 1, nb_max + Vt(n,i) = Vref(i,n) + enddo + enddo + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,n,h12,det1,det2,degree) & + !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,nb_max,Vt,bl1_tmp) + + !$OMP DO + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if(degree .gt. 2) cycle + + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + + call get_excitation_degree(det1,det2,degree,N_int) + if(degree .gt. 2) cycle + + call i_H_j(det1, det2, N_int, h12) + if( h12 .eq. 0.d0) cycle + + !$OMP CRITICAL + do n = 1, nb_max + bl1_tmp(i,k,n) += h12 * Vt(n,j) * Vt(n,l) + enddo + !$OMP END CRITICAL + + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(Vt) + + call wall_time(t2) + print *, " end bl1_tmp after (min) ", (t2-t1)/60. + + allocate( Ut(na,na_max) ) + Ut(1:na,1:na_max) = Uref(1:na,1:na_max) + allocate( tmp0(na,nb_max,na_max) ) + call DGEMM('T', 'N', na*nb_max, na_max, na, 1.d0, & + bl1_tmp, size(bl1_tmp,1), Ut, size(Ut,1), & + 0.d0, tmp0, size(tmp0,1)*size(tmp0,2) ) + deallocate( bl1_tmp ) + + call wall_time(t3) + print *, " end DGEMM after (min) ", (t3-t2)/60. + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(k,m,n,Hmat_diag_tmp) & + !$OMP SHARED(na,na_max,nb_max,Ut,tmp0,Hmat_diag) + allocate( Hmat_diag_tmp(na_max,nb_max) ) + Hmat_diag_tmp = 0.d0 + !$OMP DO + do n = 1, nb_max + do m = 1, na_max + do k = 1, na + Hmat_diag_tmp(m,n) += tmp0(k,n,m) * Ut(k,m) + enddo + enddo + enddo + !$OMP END DO + !$OMP CRITICAL + do n = 1, nb_max + do m = 1, na_max + Hmat_diag(m,n) += Hmat_diag_tmp(m,n) + enddo + enddo + !$OMP END CRITICAL + deallocate( Hmat_diag_tmp ) + !$OMP END PARALLEL + + deallocate( tmp0 , Ut ) + + Hdiag(:) = 0.d0 + do m = 1, n_toselect + ii = numalpha_toselect(m) + jj = numbeta_toselect (m) + Hdiag(m) = Hmat_diag(ii,jj) + enddo + + deallocate( Hmat_diag ) + + call wall_time(t4) + print *, " end const_Hdiag after (min) ", (t4-t3)/60. + print *, "" + + + print *, " total time (min) ", (t4-t1)/60. + print *, "" + + return +end subroutine const_Hdiag + + + + + diff --git a/devel/svdwf/kl_H_kl_v2.irp.f b/devel/svdwf/kl_H_kl_v2.irp.f new file mode 100644 index 0000000..a44f66c --- /dev/null +++ b/devel/svdwf/kl_H_kl_v2.irp.f @@ -0,0 +1,365 @@ +program kl_H_kl_v2 + + implicit none + + BEGIN_DOC + ! perturbative approach to build psi_postsvd + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + + integer :: i, j, k, l, m, n + double precision :: x, y, h12 + + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + integer :: rank_max + double precision :: E0, overlop, Ept2 + double precision, allocatable :: H0(:,:) + double precision, allocatable :: eigvec0(:,:), eigval0(:), coeff_psi(:), coeff_tmp(:) + + integer :: ii, ia, ib + double precision, allocatable :: Hdiag(:), Hkl_save(:,:), Hkl_1d(:), Hkl_tmp(:,:), Hdiag_tmp(:) + + integer :: na_new, nb_new, ind_new, ind_gs + double precision :: ctmp, coeff_new + double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:) + + double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:) + + integer :: ibeg_alpha, ibeg_beta, iend_alpha, iend_beta + integer :: n_toselect, na_max, nb_max + integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:) + + integer :: cantor_pairing_ij, cantor_pairing_new + integer, allocatable :: cantor_pairing(:), cantor_pairing_tmp(:) + + double precision :: t_beg, t_end + integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir + real(kind=8) :: W_tot_time, W_tot_time_it + integer :: nb_taches + + !$OMP PARALLEL + nb_taches = OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + + call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) + + i_state = 1 + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + call get_excitation_degree(det1,det2,degree,N_int) + call i_H_j(det1, det2, N_int, h12) + + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) ) + allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + + call cpu_time(t_beg) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref & + , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique) + call cpu_time(t_end) + print *, " SVD is performed after (min)", (t_end-t_beg)/60. + + deallocate( Aref , Dref ) + + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + + ibeg_alpha = 1 + iend_alpha = n_det_alpha_unique + na_max = iend_alpha - ibeg_alpha + 1 + + ibeg_beta = 1 + iend_beta = n_det_beta_unique + nb_max = iend_beta - ibeg_beta + 1 + + n_toselect = na_max * nb_max + + print *, ' na_max = ', na_max + print *, ' nb_max = ', nb_max + print *, ' n_toselect = ', n_toselect + + + allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) ) + k = 0 + do i = ibeg_alpha, iend_alpha + do j = ibeg_beta, iend_beta + k = k + 1 + numalpha_toselect(k) = i + numbeta_toselect (k) = j + enddo + enddo + if( k.ne.n_toselect ) then + print *, " error in numbering" + stop + endif + + + allocate( Hdiag(n_toselect) ) + + ! get < u_k v_l | H | u_k v_l > for all vectors + call const_Hdiag(na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag) + + open( UNIT=11, FILE="klHkl_v2.dat", ACTION="WRITE") + do i = 1, n_toselect + write(11, '(2(I5,2X), 5X, E15.7)') numalpha_toselect(i), numbeta_toselect(i), Hdiag(i) + enddo + close(11) + + + deallocate( Uref, Vref ) + deallocate( numalpha_toselect, numbeta_toselect, Hdiag ) + + + ! *************************************************************************************************** + ! save to ezfion + !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) ) + !do l = 1, rank0 + ! Dezfio(l,1) = coeff_psi(l) + ! Uezfio(:,l,1) = U0(:,l) + ! Vezfio(:,l,1) = V0(:,l) + !enddo + !call ezfio_set_spindeterminants_n_det(N_det) + !call ezfio_set_spindeterminants_n_states(N_states) + !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique) + !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique) + !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows) + !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns) + !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values) + + !call ezfio_set_spindeterminants_n_svd_coefs(rank0) + !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio) + !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio ) + !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio) + !deallocate( Uezfio, Dezfio, Vezfio ) + ! *************************************************************************************************** + + + + call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) + W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) + print *, ' ___________________________________________________________________' + print *, ' ' + print *, " Execution avec ", nb_taches, " threads" + print *, " total elapsed time (min) = ", W_tot_time/60.d0 + print *, ' ___________________________________________________________________' + + +end + + + + + + + + + +subroutine const_Hdiag(na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hdiag) + + implicit none + + integer, intent(in) :: n_toselect, na_max, nb_max + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: Hdiag(n_toselect) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree, na, nb + + integer :: i, j, k, l, ii, jj, m, n + double precision :: h12, xtmp + + double precision, allocatable :: Hmat_diag(:,:), Vt(:,:), bl1_tmp(:,:,:) + double precision, allocatable :: Ut(:,:), tmp0(:,:,:) , Hmat_diag_tmp(:,:) + + double precision :: t1, t2, t3, t4 + + print *, "" + print *, " start const_Hdiag" + call wall_time(t1) + + na = n_det_alpha_unique + nb = n_det_beta_unique + + allocate(Hmat_diag(na_max,nb_max)) + Hmat_diag = 0.d0 + + allocate( bl1_tmp(na,na,nb_max) ) + bl1_tmp = 0.d0 + + allocate( Vt(nb_max,nb) ) + do i = 1, nb + do n = 1, nb_max + Vt(n,i) = Vref(i,n) + enddo + enddo + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,n,h12,det1,det2,degree,tmp0) & + !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,nb_max,Vt,bl1_tmp) + + allocate( tmp0(na,na,nb_max) ) + tmp0 = 0.d0 + + !$OMP DO + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if(degree .gt. 2) cycle + + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + + call get_excitation_degree(det1,det2,degree,N_int) + if(degree .gt. 2) cycle + + call i_H_j(det1, det2, N_int, h12) + if( h12 .eq. 0.d0) cycle + + do n = 1, nb_max + tmp0(i,k,n) += h12 * Vt(n,j) * Vt(n,l) + enddo + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP CRITICAL + do n = 1, nb_max + do k = 1, na + do i = 1, na + bl1_tmp(i,k,n) += tmp0(i,k,n) + enddo + enddo + enddo + !$OMP END CRITICAL + + deallocate( tmp0 ) + !$OMP END PARALLEL + + deallocate(Vt) + + call wall_time(t2) + print *, " end bl1_tmp after (min) ", (t2-t1)/60. + + allocate( Ut(na,na_max) ) + Ut(1:na,1:na_max) = Uref(1:na,1:na_max) + allocate( tmp0(na,nb_max,na_max) ) + call DGEMM('T', 'N', na*nb_max, na_max, na, 1.d0, & + bl1_tmp, size(bl1_tmp,1), Ut, size(Ut,1), & + 0.d0, tmp0, size(tmp0,1)*size(tmp0,2) ) + deallocate( bl1_tmp ) + + call wall_time(t3) + print *, " end DGEMM after (min) ", (t3-t2)/60. + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(k,m,n,Hmat_diag_tmp) & + !$OMP SHARED(na,na_max,nb_max,Ut,tmp0,Hmat_diag) + allocate( Hmat_diag_tmp(na_max,nb_max) ) + Hmat_diag_tmp = 0.d0 + !$OMP DO + do n = 1, nb_max + do m = 1, na_max + do k = 1, na + Hmat_diag_tmp(m,n) += tmp0(k,n,m) * Ut(k,m) + enddo + enddo + enddo + !$OMP END DO + !$OMP CRITICAL + do n = 1, nb_max + do m = 1, na_max + Hmat_diag(m,n) += Hmat_diag_tmp(m,n) + enddo + enddo + !$OMP END CRITICAL + deallocate( Hmat_diag_tmp ) + !$OMP END PARALLEL + + deallocate( tmp0 , Ut ) + + Hdiag(:) = 0.d0 + do m = 1, n_toselect + ii = numalpha_toselect(m) + jj = numbeta_toselect (m) + Hdiag(m) = Hmat_diag(ii,jj) + enddo + + deallocate( Hmat_diag ) + + call wall_time(t4) + print *, " end const_Hdiag after (min) ", (t4-t3)/60. + print *, "" + + + print *, " total time (min) ", (t4-t1)/60. + print *, "" + + return +end subroutine const_Hdiag + + + + + diff --git a/devel/svdwf/linear_algebra.irp.f b/devel/svdwf/linear_algebra.irp.f new file mode 100644 index 0000000..8e6ca3a --- /dev/null +++ b/devel/svdwf/linear_algebra.irp.f @@ -0,0 +1,197 @@ +subroutine svd_s(A, LDA, U, LDU, D, Vt, LDVt, m, n) + implicit none + BEGIN_DOC + ! !!! + ! DGESVD computes the singular value decomposition (SVD) of a real + ! M-by-N matrix A, optionally computing the left and/or right singular + ! vectors. The SVD is written: + ! A = U * SIGMA * transpose(V) + ! where SIGMA is an M-by-N matrix which is zero except for its + ! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and + ! V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA + ! are the singular values of A; they are real and non-negative, and + ! are returned in descending order. The first min(m,n) columns of + ! U and V are the left and right singular vectors of A. + ! + ! Note that the routine returns V**T, not V. + ! !!! + END_DOC + + integer, intent(in) :: LDA, LDU, LDVt, m, n + double precision, intent(in) :: A(LDA,n) + double precision, intent(out) :: U(LDU,m), Vt(LDVt,n), D(min(m,n)) + double precision,allocatable :: work(:), A_tmp(:,:) + integer :: info, lwork, i, j, k + + + allocate (A_tmp(LDA,n)) + do k=1,n + do i=1,m + !A_tmp(i,k) = A(i,k) + 1d-16 + A_tmp(i,k) = A(i,k) + enddo + enddo + + ! Find optimal size for temp arrays + allocate(work(1)) + lwork = -1 + ! 'A': all M columns of U are returned in array U + ! 'A': all N rows of V**T are returned in the array VT + call dgesvd('A', 'A', m, n, A_tmp, LDA, D, U, LDU, Vt, LDVt, work, lwork, info) + ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 + if( info.ne.0 ) then + print *, ' problem in first call DGESVD !!!!' + print *, ' info = ', info + print *, ' < 0 : if INFO = -i, the i-th argument had an illegal value.' + print *, ' > 0 : if DBDSQR did not converge, INFO specifies how many ' + print *, ' superdiagonals of an intermediate bidiagonal form B ' + print *, ' did not converge to zero. See the description of WORK' + print *, ' above for details. ' + stop + endif + lwork = max(int(work(1)), 5*MIN(M,N)) + deallocate(work) + + allocate(work(lwork)) + + call dgesvd('A', 'A', m, n, A_tmp, LDA, D, U, LDU, Vt, LDVt, work, lwork, info) + if( info.ne.0 ) then + print *, ' problem in second call DGESVD !!!!' + print *, ' info = ', info + print *, ' < 0 : if INFO = -i, the i-th argument had an illegal value.' + print *, ' > 0 : if DBDSQR did not converge, INFO specifies how many ' + print *, ' superdiagonals of an intermediate bidiagonal form B ' + print *, ' did not converge to zero. See the description of WORK' + print *, ' above for details. ' + stop + endif + + deallocate(A_tmp,work) + + !do j=1, m + ! do i=1, LDU + ! if (dabs(U(i,j)) < 1.d-14) U(i,j) = 0.d0 + ! enddo + !enddo + !do j = 1, n + ! do i = 1, LDVt + ! if (dabs(Vt(i,j)) < 1.d-14) Vt(i,j) = 0.d0 + ! enddo + !enddo + +end + + + +subroutine svd_s2(A, LDA, U, LDU, D, Vt, LDVt, m, n) + implicit none + + integer, intent(in) :: LDA, LDU, LDVt, m, n + double precision, intent(in) :: A(LDA,n) + double precision, intent(out) :: U(LDU,min(m,n)), Vt(LDVt,n), D(min(m,n)) + double precision,allocatable :: work(:), A_tmp(:,:) + integer :: info, lwork, i, j, k + + + allocate (A_tmp(LDA,n)) + do k=1,n + do i=1,m + A_tmp(i,k) = A(i,k) + enddo + enddo + + ! Find optimal size for temp arrays + allocate(work(1)) + lwork = -1 + ! 'A': all M columns of U are returned in array U + ! 'A': all N rows of V**T are returned in the array VT + call dgesvd('A', 'S', m, n, A_tmp, LDA, D, U, LDU, Vt, LDVt, work, lwork, info) + ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 + if( info.ne.0 ) then + print *, ' problem in first call DGESVD !!!!' + stop + endif + lwork = max(int(work(1)), 5*MIN(M,N)) + deallocate(work) + + allocate(work(lwork)) + + call dgesvd('A', 'S', m, n, A_tmp, LDA, D, U, LDU, Vt, LDVt, work, lwork, info) + if( info.ne.0 ) then + print *, ' problem in second call DGESVD !!!!' + stop + endif + + deallocate(A_tmp,work) + + do j=1, min(m,n) + do i=1, m + if (dabs(U(i,j)) < 1.d-14) U(i,j) = 0.d0 + enddo + enddo + do j = 1, n + do i = 1, LDVt + if (dabs(Vt(i,j)) < 1.d-14) Vt(i,j) = 0.d0 + enddo + enddo + +end + + + +subroutine my_ortho_qr(A,LDA,m,n) + implicit none + BEGIN_DOC + ! Orthogonalization using Q.R factorization + ! + ! A : matrix to orthogonalize + ! + ! LDA : leftmost dimension of A + ! + ! m : Number of rows of A + ! + ! n : Number of columns of A + ! + END_DOC + integer, intent(in) :: m, n, LDA + double precision, intent(inout) :: A(LDA,n) + integer :: LWORK, INFO, nTAU, ii, jj + double precision, allocatable :: TAU(:), WORK(:) + double precision :: Adorgqr(LDA,n) + + allocate (TAU(min(m,n)), WORK(1)) + + LWORK=-1 + call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO ) + ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 + LWORK=max(n,int(WORK(1))) + + deallocate(WORK) + allocate(WORK(LWORK)) + call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO ) + if(INFO.ne.0 ) then + print*, 'problem in DGEQRF' + endif + + nTAU = size(TAU) + do jj = 1, n + do ii = 1, LDA + Adorgqr(ii,jj) = A(ii,jj) + enddo + enddo + + LWORK=-1 + call dorgqr(m, n, nTAU, Adorgqr, LDA, TAU, WORK, LWORK, INFO) + ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 + LWORK=max(n,int(WORK(1))) + + deallocate(WORK) + allocate(WORK(LWORK)) + call dorgqr(m, n, nTAU, A, LDA, TAU, WORK, LWORK, INFO) + if(INFO.ne.0 ) then + print*, 'problem in DORGQR' + endif + + + deallocate(WORK,TAU) +end diff --git a/devel/svdwf/perform_RSVD.py b/devel/svdwf/perform_RSVD.py new file mode 100644 index 0000000..90e2894 --- /dev/null +++ b/devel/svdwf/perform_RSVD.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 + +import os, sys + +#QP_PATH=os.environ["QMCCHEM_PATH"] +#sys.path.insert(0,QMCCHEM_PATH+"/EZFIO/Python/") + +import scipy +from scipy import linalg + +from ezfio import ezfio +from datetime import datetime +import numpy as np +import time + + + + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +def get_Aref(): + + Aref = np.zeros( (n_alpha, n_beta) ) + for k in range(n_det): + i = A_rows[k] - 1 + j = A_cols[k] - 1 + Aref[i,j] = A_vals[0][k] + + return( Aref ) +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +def powit_RSVD(X, n_TSVD, nb_powit, nb_oversamp): + + print(" --- begin powit_RSVD --- ") + print(" n_TSVD = {}".format(n_TSVD)) + print(" pow it = {} & nb oversampling = {}". + format(nb_powit,nb_oversamp)) + + G = np.random.randn(X.shape[1], n_TSVD+nb_oversamp) + Q = QR_fact( np.dot(X,G) ) + + for i in range(nb_powit): + ti = time.time() + print(" start pow it = {}".format(i)) + + Q = QR_fact( np.dot(X.T,Q) ) + Q = QR_fact( np.dot(X,Q) ) + + tf = time.time() + dt = (tf-ti)/60. + print(" end pow it = {} after {} min".format(i,dt)) + + Y = np.dot(Q.T,X) + + U, S, VT = np.linalg.svd(Y, full_matrices=1) + U = np.dot(Q,U) + + print( " --- end powit_RSVD --- \n") + return U, S, VT +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +def QR_fact(X): + + Q, _ = linalg.qr(X, mode="full") + #Q,R = np.linalg.qr(X, mode="complete") + #D = np.diag( np.sign( np.diag(R) ) ) + Qunique = Q #np.dot(Q,D) + #Runique = np.dot(D,R) + + return(Qunique) +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +def TSVD_save_EZFIO(): + + U_toEZFIO = np.zeros( ( 1, U.shape[1], U.shape[0] ) ) + V_toEZFIO = np.zeros( ( 1, V.shape[1], V.shape[0] ) ) + U_toEZFIO[0,:,:] = U_TSVD.T + V_toEZFIO[0,:,:] = V_TSVD.T + + ezfio.set_spindeterminants_n_svd_coefs( n_TSVD ) + ezfio.set_spindeterminants_psi_svd_alpha( U_toEZFIO ) + ezfio.set_spindeterminants_psi_svd_beta ( V_toEZFIO ) + ezfio.set_spindeterminants_psi_svd_coefs( S_RSVD ) + + print(' SVD vectors & coeff are saved to EZFIO ') +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + +if __name__ == "__main__": + + + print("") + print(" Today's date:", datetime.now() ) + + # EZFIO file + #EZFIO_file = "/home/aammar/qp2/src/svdwf/h2o_work/FN_test/cc_pCVDZ/h2o_dz" + EZFIO_file = "/home/aammar/qp2/src/svdwf/h2o_work/FN_test/cipsi_calcul/h2o_dz_fci" + ezfio.set_file(EZFIO_file) + print(" EZFIO = {}\n".format(EZFIO_file)) + + #read_wf = True + #ezfio.read_wf = True + #TOUCH read_wf + + + n_det = ezfio.get_spindeterminants_n_det() + print(' n_det = {}'.format(n_det)) + + n_alpha = ezfio.get_spindeterminants_n_det_alpha() + n_beta = ezfio.get_spindeterminants_n_det_beta() + print(' matrix dimensions = {} x {} = {} \n'.format(n_alpha, n_beta, n_alpha*n_beta)) + + A_rows = np.array(ezfio.get_spindeterminants_psi_coef_matrix_rows()) + A_cols = np.array(ezfio.get_spindeterminants_psi_coef_matrix_columns()) + A_vals = np.array(ezfio.get_spindeterminants_psi_coef_matrix_values()) + Aref = get_Aref() + A_norm = np.linalg.norm(Aref, ord='fro') + + + npow = 15 + nb_oversamp = 10 + n_TSVD = 100 #min(n_alpha,n_beta) + + t_beg = time.time() + U, S_RSVD, Vt = powit_RSVD(Aref, n_TSVD, npow, nb_oversamp) + print(' powit_RSVD time = {}\n'.format((time.time()-t_beg)/60.)) + + S_mat = np.zeros((n_alpha,n_beta)) + for i in range(n_TSVD): + S_mat[i,i] = S_RSVD[i] + err_SVD = 100. * np.linalg.norm( Aref - np.dot(U,np.dot(S_mat,Vt)), ord="fro") / A_norm + print(' powit_RSVD error (%) = {} \n'.format(err_SVD)) +#______________________________________________________________________________________________________________________ diff --git a/devel/svdwf/psiSVD_naiv1by1_v1.irp.f b/devel/svdwf/psiSVD_naiv1by1_v1.irp.f new file mode 100644 index 0000000..33f1fb9 --- /dev/null +++ b/devel/svdwf/psiSVD_naiv1by1_v1.irp.f @@ -0,0 +1,527 @@ +program psiSVD_naiv1by1_v1 + + implicit none + + BEGIN_DOC + ! perturbative approach to build psi_postsvd + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + + integer :: i, j, k, l, m, n + double precision :: x, y, h12 + + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + integer :: rank_max + double precision :: E0, overlop, Ept2, Em, norm + double precision, allocatable :: H0(:,:) + double precision, allocatable :: eigvec0(:,:), eigval0(:), coeff_psi(:), coeff_tmp(:) + + integer :: ii, jj, ia, ib, ja, jb + double precision, allocatable :: Hdiag(:), H0_1d(:), H0_tmp(:,:) + + integer :: na_new, nb_new, ind_new, ind_gs + double precision :: ctmp, coeff_new + double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:) + + double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:) + + integer :: n_selected, n_toselect, n_tmp, na_max, nb_max + integer, allocatable :: numalpha_selected(:), numbeta_selected(:) + integer, allocatable :: numalpha_tmp(:), numbeta_tmp(:) + + double precision :: t_beg, t_end + integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir + real(kind=8) :: W_tot_time, W_tot_time_it + integer :: nb_taches + + !$OMP PARALLEL + nb_taches = OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + + call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) + + i_state = 1 + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + call get_excitation_degree(det1,det2,degree,N_int) + call i_H_j(det1, det2, N_int, h12) + + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) ) + allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + call cpu_time(t_beg) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref & + , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique) + call cpu_time(t_end) + print *, " SVD is performed after (min)", (t_end-t_beg)/60. + + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + deallocate( Aref ) + + ! --------------------------------------------------------------------------------------- + + ! *** PARAMETERS *** ! + na_max = n_det_alpha_unique + nb_max = n_det_beta_unique + ! *** ***** *** ! + + print *, ' na_max = ', na_max + print *, ' nb_max = ', nb_max + + ! --------------------------------------------------------------------------------------- + ! initial wavefunction: psi_0 + + n_selected = 1 + allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) ) + + numalpha_selected(1) = 1 + numbeta_selected (1) = 1 + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! construnc the initial basis to select phi_1 from the FSVD + + n_toselect = min(na_max,nb_max) - n_selected + print *, ' toselect = ', n_toselect + print *, ' to trun = ', n_det_alpha_unique*n_det_beta_unique - na_max*nb_max + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! read < u_k v_l | H | u_k v_l > for all vectors + + allocate( Hdiag(n_toselect) , H0(n_selected,n_selected) ) + + n_tmp = n_det_alpha_unique * n_det_beta_unique - 1 + + open( unit=11, FILE="klHkl_v1.dat", ACTION="READ") + !open( unit=11, FILE="klHkl_v2.dat", ACTION="READ") + + read(11,*) i, i, E0 + H0(1,1) = E0 + + do i = 1, n_tmp + read(11,*) ia, ib, ctmp + if( ia .eq. ib ) then + ii = ia - 1 + Hdiag(ii) = ctmp + !print *, ia, ib , Hdiag(ia-1) + endif + enddo + + close(11) + + ! --------------------------------------------------------------------------------------- + + E0 = E0 + nuclear_repulsion + Em = E0 + print*, ' space dimen = ', n_selected + print*, ' ground state Em = ', Em + print*, ' ground state E0 = ', E0 + + na_new = 1 + nb_new = 1 + ind_new = 0 + + !________________________________________________________________________________________________________ + ! + ! increase the size of psi0 iteratively + !________________________________________________________________________________________________________ + + ! *** PARAMETERS *** ! + rank_max = min( na_max , nb_max ) - 1 + ! *** ***** *** ! + + if( rank_max .gt. n_toselect ) then + print *, " rank_max should be less then n_toselect" + stop + endif + + + do while( n_selected .lt. rank_max ) + + call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) + + print*, ' ' + print*, ' new iteration ' + + if( n_toselect .lt. 1 ) then + + print*, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + print*, ' no more vectors to construct a new basis ' + print*, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + stop + + else + + ! --------------------------------------------------------------------------------------- + ! select a new vector + + na_new += 1 + nb_new += 1 + ind_new += 1 + print *, ' best vector', na_new, nb_new + + ! < psi_old | H | delta_psi > + allocate( H0_1d(n_selected) ) + call const_H0_1d(na_new, nb_new, na_max, nb_max, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0_1d) + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! new psi + + allocate( numalpha_tmp(n_selected), numbeta_tmp(n_selected) ) + allocate( H0_tmp(n_selected,n_selected) ) + + numalpha_tmp(:) = numalpha_selected(:) + numbeta_tmp (:) = numbeta_selected (:) + H0_tmp (:,:) = H0 (:,:) + + deallocate( numalpha_selected, numbeta_selected, H0 ) + + n_tmp = n_selected + n_selected = n_selected + 1 + + allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) ) + allocate( H0(n_selected,n_selected) ) + H0(:,:) = 0.d0 + + do l = 1, n_tmp + numalpha_selected(l) = numalpha_tmp(l) + numbeta_selected (l) = numbeta_tmp (l) + enddo + H0(1:n_tmp,1:n_tmp) = H0_tmp(1:n_tmp,1:n_tmp) + + deallocate( numalpha_tmp, numbeta_tmp, H0_tmp ) + + numalpha_selected(n_selected) = na_new + numbeta_selected (n_selected) = nb_new + + H0(1:n_tmp,n_selected) = H0_1d(1:n_tmp) + H0(n_selected,1:n_tmp) = H0_1d(1:n_tmp) + deallocate( H0_1d ) + H0(n_selected,n_selected) = Hdiag(ind_new) + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! energy without diag + + ! < psi | psi > + norm = 0.d0 + do j = 1, n_selected + ja = numalpha_selected(j) + jb = numbeta_selected (j) + if(ja.eq.jb) norm = norm + Dref(ja)*Dref(jb) + enddo + + ! < psi | H | psi > + Em = 0.d0 + do j = 1, n_selected + ja = numalpha_selected(j) + jb = numbeta_selected (j) + if(ja.eq.jb) then + do i = 1, n_selected + ia = numalpha_selected(i) + ib = numbeta_selected (i) + if(ia.eq.ib) Em = Em + Dref(ja) * H0(j,i) * Dref(ia) + enddo + endif + enddo + + ! Em = < psi | H | psi > / < psi | psi > + Em = Em / norm + nuclear_repulsion + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! energy with diag + + allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) ) + call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected) + + ! get the postsvd ground state + allocate( check_ov(n_selected) ) + do l = 1, n_selected + overlop = 0.d0 + do i = 1, n_selected + ia = numalpha_selected(i) + ib = numbeta_selected (i) + if( ia .eq. ib ) overlop = overlop + eigvec0(i,l) * Dref(ia) + enddo + check_ov(l) = dabs(overlop) + enddo + ind_gs = MAXLOC( check_ov, DIM=1 ) + overlop = check_ov(ind_gs) + E0 = eigval0(ind_gs)+nuclear_repulsion + coeff_psi(:) = eigvec0(:,ind_gs) + + deallocate( check_ov, eigval0, eigvec0 ) + + ! --------------------------------------------------------------------------------------- + + + print*, ' space dimen = ', n_selected + print*, ' E bef diag = ', Em + print*, ' E aft diag = ', E0 + print*, ' overlop = ', overlop + print*, ' index = ', ind_gs + + write(211, '( 3(I5,3X), 4(F15.8,3X) )') n_selected, na_new, nb_new, Em, E0, overlop + + + + ! --------------------------------------------------------------------------------------- + ! remove selected pair | na_new nb_new > + + n_toselect = n_toselect - 1 + print*, ' rank to select = ', n_toselect + + ! --------------------------------------------------------------------------------------- + + + endif + + call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir) + W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8) + print*, " " + print*, " elapsed time (min) = ", W_tot_time_it/60.d0 + + end do + !________________________________________________________________________________________________________ + !________________________________________________________________________________________________________ + + + + ! *************************************************************************************************** + ! save to ezfion + !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) ) + !do l = 1, rank0 + ! Dezfio(l,1) = coeff_psi(l) + ! Uezfio(:,l,1) = U0(:,l) + ! Vezfio(:,l,1) = V0(:,l) + !enddo + !call ezfio_set_spindeterminants_n_det(N_det) + !call ezfio_set_spindeterminants_n_states(N_states) + !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique) + !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique) + !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows) + !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns) + !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values) + + !call ezfio_set_spindeterminants_n_svd_coefs(rank0) + !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio) + !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio ) + !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio) + !deallocate( Uezfio, Dezfio, Vezfio ) + ! *************************************************************************************************** + + call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) + W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) + print *, ' ___________________________________________________________________' + print *, ' ' + print *, " Execution avec ", nb_taches, " threads" + print *, " total elapsed time (min) = ", W_tot_time/60.d0 + print *, ' ___________________________________________________________________' + + + + deallocate( Dref ) + deallocate( Uref, Vref ) + +deallocate( coeff_psi ) + deallocate( numalpha_selected, numbeta_selected ) + deallocate( H0, Hdiag ) + +end + + + + +subroutine const_H0_1d(na_new, nb_new, na_max, nb_max, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0_1d) + + implicit none + + integer, intent(in) :: na_new, nb_new, na_max, nb_max, n_selected + integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H0_1d(n_selected) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree, na, nb + + integer :: i, j, k, l, ii, jj, m + double precision :: h12 + + double precision, allocatable :: Hmat_kl(:,:), tmp1(:,:), tmp2(:,:) + double precision, allocatable :: U1d(:), V1d(:) + double precision, allocatable :: Utmp(:,:), Vtmp(:,:) + + double precision :: ti, tf + + print *, "" + print *, " start const_H0_1d" + call wall_time(ti) + + na = n_det_alpha_unique + nb = n_det_beta_unique + + allocate( U1d(na) , V1d(nb) ) + U1d(1:na) = Uref(1:na,na_new) + V1d(1:nb) = Vref(1:nb,nb_new) + + allocate( tmp1(na,nb) ) + tmp1 = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,h12,det1,det2,degree,tmp2) & + !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,U1d,V1d,tmp1) + + allocate( tmp2(na,nb) ) + tmp2 = 0.d0 + + !$OMP DO + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if(degree .gt. 2) cycle + + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + + call get_excitation_degree(det1,det2,degree,N_int) + if(degree .gt. 2) cycle + + call i_H_j(det1, det2, N_int, h12) + if( h12 .eq. 0.d0) cycle + + tmp2(i,j) += h12 * U1d(k) * V1d(l) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP CRITICAL + do j = 1, nb + do i = 1, na + tmp1(i,j) += tmp2(i,j) + enddo + enddo + !$OMP END CRITICAL + + deallocate( tmp2 ) + !$OMP END PARALLEL + + deallocate( U1d , V1d ) + + ! tmp2(j,m) = sum_i tmp1(i,j) x Uref(i,m) + allocate( Utmp(na,na_max) ) + Utmp(1:na,1:na_max) = Uref(1:na,1:na_max) + + allocate( tmp2(nb,na_max) ) + call DGEMM('T', 'N', nb, na_max, na, 1.d0, & + tmp1, size(tmp1,1), Utmp, size(Utmp,1), & + 0.d0, tmp2, size(tmp2,1) ) + deallocate( tmp1 ) + deallocate( Utmp ) + + ! Hmat_kl(m,n) = sum_j tmp2(j,m) x Vref(j,n) + allocate( Vtmp(nb,nb_max) ) + Vtmp(1:nb,1:nb_max) = Vref(1:nb,1:nb_max) + + allocate( Hmat_kl(na_max,nb_max) ) + call DGEMM('T', 'N', na_max, nb_max, nb, 1.d0, & + tmp2, size(tmp2,1), Vtmp, size(Vtmp,1), & + 0.d0, Hmat_kl, size(Hmat_kl,1) ) + deallocate( tmp2 ) + deallocate( Vtmp ) + + do m = 1, n_selected + ii = numalpha_selected(m) + jj = numbeta_selected (m) + H0_1d(m) = Hmat_kl(ii,jj) + enddo + deallocate( Hmat_kl ) + + call wall_time(tf) + print *, " end const_H0_1d after (min) ", (tf-ti)/60. + print *, "" + + return +end subroutine const_H0_1d + + + diff --git a/devel/svdwf/psiSVD_naivBbyB_v0.irp.f b/devel/svdwf/psiSVD_naivBbyB_v0.irp.f new file mode 100644 index 0000000..3d07152 --- /dev/null +++ b/devel/svdwf/psiSVD_naivBbyB_v0.irp.f @@ -0,0 +1,358 @@ +program psiSVD_naivBbyB_v0 + + implicit none + + BEGIN_DOC + ! perturbative approach to build psi_postsvd + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + + integer :: i, j, k, l, m, n + double precision :: x, y, h12 + + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + integer :: rank_max, n_TSVD, n_selected + double precision :: E0, overlop + double precision, allocatable :: H0(:,:) + double precision, allocatable :: eigvec0(:,:), eigval0(:) + integer, allocatable :: numalpha_selected(:), numbeta_selected(:) + + integer :: ii + + integer :: na_new, nb_new, ind_new, ind_gs + double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:) + + double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:) + + + integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir + real(kind=8) :: W_tot_time, W_tot_time_it + integer :: nb_taches + + !$OMP PARALLEL + nb_taches = OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + + call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) + + i_state = 1 + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + call get_excitation_degree(det1,det2,degree,N_int) + call i_H_j(det1, det2, N_int, h12) + + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) ) + allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref & + , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique) + + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + deallocate( Aref ) + + ! --------------------------------------------------------------------------------------- + + + + + ! --------------------------------------------------------------------------------------- + ! initial wavefunction: psi_0 + + n_TSVD = 1 + n_selected = 1 + allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) ) + numalpha_selected(1) = 1 + numbeta_selected (1) = 1 + + ! get E0 = < psi_0 | H | psi_0 > + allocate( H0(n_selected,n_selected) ) + call const_psiHpsi(n_TSVD, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0) + E0 = H0(1,1) + nuclear_repulsion + print*, ' ground state E0 = ', E0 + + deallocate( H0 ) + + ! --------------------------------------------------------------------------------------- + + + + + !________________________________________________________________________________________________________ + ! + ! increase the size of psi0 iteratively + !________________________________________________________________________________________________________ + + + rank_max = n_det_alpha_unique*n_det_beta_unique ! 15*15 + + do while( n_selected .lt. rank_max ) + + call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) + print*, ' ' + print*, ' new iteration ' + + + deallocate( numalpha_selected, numbeta_selected ) + + n_TSVD = n_TSVD + 1 + n_selected = n_TSVD * n_TSVD + allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) ) + l = 0 + do i = 1, n_TSVD + do j = 1, n_TSVD + l = l + 1 + numalpha_selected(l) = i + numbeta_selected (l) = j + enddo + enddo + if( l.ne.n_selected) then + print *, "error in numbering" + stop + endif + + ! construct and diagonalise the hamiltonian < psi_selected | H | psi_selected > + allocate( H0(n_selected,n_selected) ) + call const_psiHpsi(n_TSVD, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0) + + allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) ) + call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected) + + ! get the postsvd ground state + allocate( check_ov(n_selected) ) + do l = 1, n_selected + overlop = 0.d0 + do i = 1, n_TSVD + ii = i + (i-1)*n_TSVD + overlop = overlop + eigvec0(ii,l) * Dref(i) + enddo + check_ov(l) = dabs(overlop) + enddo + ind_gs = MAXLOC( check_ov, DIM=1 ) + overlop = check_ov(ind_gs) + E0 = eigval0(ind_gs)+nuclear_repulsion + print*, ' space dimen = ', n_selected + print*, ' diag energy = ', E0 + print*, ' overlop = ', overlop + + deallocate( H0 ) + deallocate( check_ov, eigval0, eigvec0 ) + + ! --------------------------------------------------------------------------------------- + + write(220, *) n_selected, E0 + + call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir) + W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8) + print*, " " + print*, " elapsed time (min) = ", W_tot_time_it/60.d0 + + end do + !________________________________________________________________________________________________________ + !________________________________________________________________________________________________________ + + + deallocate( Dref ) + deallocate( Uref, Vref ) + + + ! *************************************************************************************************** + ! save to ezfion + !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) ) + !do l = 1, rank0 + ! Dezfio(l,1) = coeff_psi(l) + ! Uezfio(:,l,1) = U0(:,l) + ! Vezfio(:,l,1) = V0(:,l) + !enddo + !call ezfio_set_spindeterminants_n_det(N_det) + !call ezfio_set_spindeterminants_n_states(N_states) + !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique) + !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique) + !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows) + !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns) + !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values) + + !call ezfio_set_spindeterminants_n_svd_coefs(rank0) + !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio) + !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio ) + !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio) + !deallocate( Uezfio, Dezfio, Vezfio ) + ! *************************************************************************************************** + + + + call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) + W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) + print *, ' ___________________________________________________________________' + print *, ' ' + print *, " Execution avec ", nb_taches, " threads" + print *, " total elapsed time (min) = ", W_tot_time/60.d0 + print *, ' ___________________________________________________________________' + + +end + + + + + + + +subroutine const_psiHpsi(n_TSVD, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0) + + implicit none + + integer, intent(in) :: n_TSVD, n_selected + integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H0(n_selected,n_selected) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree, na, nb + + integer :: i, j, k, l + integer :: iin, jjn, iim, jjm, n, m + double precision :: h12, x + double precision, allocatable :: Utmp(:,:), Vtmp(:,:) + double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:) + + na = n_det_alpha_unique + nb = n_det_beta_unique + H0(:,:) = 0.d0 + + allocate( tmp1(nb,nb,n_TSVD,n_TSVD) ) + tmp1(:,:,:,:) = 0.d0 + + allocate( Utmp(n_TSVD,na) ) + do i = 1, na + do iin = 1, n_TSVD + Utmp(iin,i) = Uref(i,iin) + enddo + enddo + + do l = 1, nb + do j = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + det1(:,2) = psi_det_beta_unique(:,j) + + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if(degree .gt. 2) cycle + + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + + call get_excitation_degree(det1,det2,degree,N_int) + if(degree .gt. 2) cycle + + call i_H_j(det1, det2, N_int, h12) + if( h12 .eq. 0.d0) cycle + + do iin = 1, n_TSVD + x = Utmp(iin,i) * h12 + if( x == 0.d0 ) cycle + do iim = 1, n_TSVD + tmp1(j,l,iim,iin) += Utmp(iim,k) * x + enddo + enddo + + enddo + enddo + enddo + enddo + + deallocate( Utmp ) + + allocate( Vtmp(nb,n_TSVD) ) + do iin = 1, n_TSVD + do i = 1, nb + Vtmp(i,iin) = Vref(i,iin) + enddo + enddo + + allocate( tmp2(nb,n_TSVD,n_TSVD,n_TSVD) ) + call DGEMM('T','N', nb*n_TSVD*n_TSVD, n_TSVD, nb, 1.d0 & + , tmp1, size(tmp1,1) & + , Vtmp, size(Vtmp,1) & + , 0.d0, tmp2, size(tmp2,1)*size(tmp2,2)*size(tmp2,3) ) + + deallocate(tmp1) + allocate( tmp1(n_TSVD,n_TSVD,n_TSVD,n_TSVD) ) + call DGEMM('T','N', n_TSVD*n_TSVD*n_TSVD, n_TSVD, nb, 1.d0 & + , tmp2, size(tmp2,1) & + , Vtmp, size(Vtmp,1) & + , 0.d0, tmp1, size(tmp1,1)*size(tmp1,2)*size(tmp1,3) ) + deallocate( tmp2, Vtmp ) + + do n = 1, n_selected + iin = numalpha_selected(n) + jjn = numbeta_selected (n) + + do m = 1, n_selected + iim = numalpha_selected(m) + jjm = numbeta_selected (m) + + H0(m,n) = tmp1(iin,iim,jjn,jjm) + + enddo + enddo + + deallocate( tmp1 ) + + return +end subroutine const_psiHpsi + + diff --git a/devel/svdwf/psiSVD_naivBbyB_v1.irp.f b/devel/svdwf/psiSVD_naivBbyB_v1.irp.f new file mode 100644 index 0000000..3597db5 --- /dev/null +++ b/devel/svdwf/psiSVD_naivBbyB_v1.irp.f @@ -0,0 +1,383 @@ +program psiSVD_naivBbyB_v1 + + implicit none + + BEGIN_DOC + ! perturbative approach to build psi_postsvd + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + + integer :: i, j, k, l, m, n + double precision :: x, y, h12 + + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + integer :: rank_max, n_TSVD, n_selected + double precision :: E0, overlop + double precision, allocatable :: H0(:,:) + double precision, allocatable :: eigvec0(:,:), eigval0(:) + integer, allocatable :: numalpha_selected(:), numbeta_selected(:) + + integer :: ii + + integer :: na_new, nb_new, ind_new, ind_gs + double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:) + + double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:) + + + integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir + real(kind=8) :: W_tot_time, W_tot_time_it + integer :: nb_taches + + !$OMP PARALLEL + nb_taches = OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + + call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) + + i_state = 1 + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + call get_excitation_degree(det1,det2,degree,N_int) + call i_H_j(det1, det2, N_int, h12) + + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) ) + allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref & + , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique) + + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + deallocate( Aref ) + + ! --------------------------------------------------------------------------------------- + + + + + ! --------------------------------------------------------------------------------------- + ! initial wavefunction: psi_0 + + n_TSVD = 1 + n_selected = 1 + allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) ) + numalpha_selected(1) = 1 + numbeta_selected (1) = 1 + + ! get E0 = < psi_0 | H | psi_0 > + allocate( H0(n_selected,n_selected) ) + call const_psiHpsi(n_TSVD, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0) + E0 = H0(1,1) + nuclear_repulsion + print*, ' ground state E0 = ', E0 + + deallocate( H0 ) + + ! --------------------------------------------------------------------------------------- + + + + + !________________________________________________________________________________________________________ + ! + ! increase the size of psi0 iteratively + !________________________________________________________________________________________________________ + + + rank_max = n_det_alpha_unique*n_det_beta_unique ! 15*15 + + do while( n_selected .lt. rank_max ) + + call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) + print*, ' ' + print*, ' new iteration ' + + + deallocate( numalpha_selected, numbeta_selected ) + + n_TSVD = n_TSVD + 1 + n_selected = n_TSVD * n_TSVD + allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) ) + l = 0 + do i = 1, n_TSVD + do j = 1, n_TSVD + l = l + 1 + numalpha_selected(l) = i + numbeta_selected (l) = j + enddo + enddo + if( l.ne.n_selected) then + print *, "error in numbering" + stop + endif + + ! construct and diagonalise the hamiltonian < psi_selected | H | psi_selected > + allocate( H0(n_selected,n_selected) ) + call const_psiHpsi(n_TSVD, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0) + + allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) ) + call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected) + + ! get the postsvd ground state + allocate( check_ov(n_selected) ) + do l = 1, n_selected + overlop = 0.d0 + do i = 1, n_TSVD + ii = i + (i-1)*n_TSVD + overlop = overlop + eigvec0(ii,l) * Dref(i) + enddo + check_ov(l) = dabs(overlop) + enddo + ind_gs = MAXLOC( check_ov, DIM=1 ) + overlop = check_ov(ind_gs) + E0 = eigval0(ind_gs)+nuclear_repulsion + print*, ' space dimen = ', n_selected + print*, ' diag energy = ', E0 + print*, ' overlop = ', overlop + + deallocate( H0 ) + deallocate( check_ov, eigval0, eigvec0 ) + + ! --------------------------------------------------------------------------------------- + + write(221, *) n_selected, E0 + + call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir) + W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8) + print*, " " + print*, " elapsed time (min) = ", W_tot_time_it/60.d0 + + end do + !________________________________________________________________________________________________________ + !________________________________________________________________________________________________________ + + + deallocate( Dref ) + deallocate( Uref, Vref ) + + + ! *************************************************************************************************** + ! save to ezfion + !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) ) + !do l = 1, rank0 + ! Dezfio(l,1) = coeff_psi(l) + ! Uezfio(:,l,1) = U0(:,l) + ! Vezfio(:,l,1) = V0(:,l) + !enddo + !call ezfio_set_spindeterminants_n_det(N_det) + !call ezfio_set_spindeterminants_n_states(N_states) + !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique) + !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique) + !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows) + !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns) + !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values) + + !call ezfio_set_spindeterminants_n_svd_coefs(rank0) + !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio) + !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio ) + !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio) + !deallocate( Uezfio, Dezfio, Vezfio ) + ! *************************************************************************************************** + + + + call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) + W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) + print *, ' ___________________________________________________________________' + print *, ' ' + print *, " Execution avec ", nb_taches, " threads" + print *, " total elapsed time (min) = ", W_tot_time/60.d0 + print *, ' ___________________________________________________________________' + + +end + + + + + + + +subroutine const_psiHpsi(n_TSVD, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0) + + implicit none + + integer, intent(in) :: n_TSVD, n_selected + integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H0(n_selected,n_selected) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree, na, nb + + integer :: i, j, k, l + integer :: iin, jjn, iim, jjm, n, m + double precision :: h12, x + double precision, allocatable :: Utmp(:,:), Vtmp(:,:) + double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:) + + na = n_det_alpha_unique + nb = n_det_beta_unique + H0(:,:) = 0.d0 + + allocate( tmp1(nb,nb,n_TSVD,n_TSVD) ) + tmp1(:,:,:,:) = 0.d0 + + allocate( Utmp(n_TSVD,na) ) + do i = 1, na + do iin = 1, n_TSVD + Utmp(iin,i) = Uref(i,iin) + enddo + enddo + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(iin,iim,i,j,k,l,h12,x,det1,det2,degree,tmp2) & + !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,n_TSVD,Utmp,tmp1) + + allocate( tmp2(nb,nb,n_TSVD,n_TSVD) ) + tmp2(:,:,:,:) = 0.d0 + + !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8) + do l = 1, nb + do j = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + det1(:,2) = psi_det_beta_unique(:,j) + + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if(degree .gt. 2) cycle + + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + + call get_excitation_degree(det1,det2,degree,N_int) + if(degree .gt. 2) cycle + + call i_H_j(det1, det2, N_int, h12) + if( h12 .eq. 0.d0) cycle + + do iin = 1, n_TSVD + x = Utmp(iin,i) * h12 + if( x == 0.d0 ) cycle + do iim = 1, n_TSVD + tmp2(j,l,iim,iin) += Utmp(iim,k) * x + enddo + enddo + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP CRITICAL + do iin = 1, n_TSVD + do iim = 1, n_TSVD + do l = 1, nb + do j = 1, nb + tmp1(j,l,iim,iin) += tmp2(j,l,iim,iin) + enddo + enddo + enddo + enddo + !$OMP END CRITICAL + deallocate( tmp2 ) + + !$OMP END PARALLEL + + deallocate( Utmp ) + + allocate( Vtmp(nb,n_TSVD) ) + do iin = 1, n_TSVD + do i = 1, nb + Vtmp(i,iin) = Vref(i,iin) + enddo + enddo + + allocate( tmp2(nb,n_TSVD,n_TSVD,n_TSVD) ) + call DGEMM('T','N', nb*n_TSVD*n_TSVD, n_TSVD, nb, 1.d0 & + , tmp1, size(tmp1,1) & + , Vtmp, size(Vtmp,1) & + , 0.d0, tmp2, size(tmp2,1)*size(tmp2,2)*size(tmp2,3) ) + + deallocate(tmp1) + allocate( tmp1(n_TSVD,n_TSVD,n_TSVD,n_TSVD) ) + call DGEMM('T','N', n_TSVD*n_TSVD*n_TSVD, n_TSVD, nb, 1.d0 & + , tmp2, size(tmp2,1) & + , Vtmp, size(Vtmp,1) & + , 0.d0, tmp1, size(tmp1,1)*size(tmp1,2)*size(tmp1,3) ) + deallocate( tmp2, Vtmp ) + + do n = 1, n_selected + iin = numalpha_selected(n) + jjn = numbeta_selected (n) + + do m = 1, n_selected + iim = numalpha_selected(m) + jjm = numbeta_selected (m) + + H0(m,n) = tmp1(iin,iim,jjn,jjm) + + enddo + enddo + + deallocate( tmp1 ) + + return +end subroutine const_psiHpsi + + diff --git a/devel/svdwf/psiSVD_pt2_v0.irp.f b/devel/svdwf/psiSVD_pt2_v0.irp.f new file mode 100644 index 0000000..7d820c1 --- /dev/null +++ b/devel/svdwf/psiSVD_pt2_v0.irp.f @@ -0,0 +1,691 @@ +program psiSVD_pt2_v0 + + implicit none + + BEGIN_DOC + ! perturbative approach to build psi_postsvd + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + + integer :: i, j, k, l, m, n + double precision :: x, y, h12 + + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + integer :: rank_max + double precision :: E0, overlop, Ept2 + double precision, allocatable :: H0(:,:) + double precision, allocatable :: eigvec0(:,:), eigval0(:), coeff_psi(:), coeff_tmp(:) + + integer :: ii, jj, ia, ib + double precision, allocatable :: Hdiag(:), Hkl_save(:,:), Hkl_1d(:), Hkl_tmp(:,:), Hdiag_tmp(:) + double precision, allocatable :: H0_1d(:), H0_tmp(:,:) + + integer :: na_new, nb_new, ind_new, ind_gs + double precision :: ctmp, coeff_new + double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:) + + double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:) + + integer :: n_selected, n_toselect, n_tmp, na_max, nb_max + integer, allocatable :: numalpha_selected(:), numbeta_selected(:) + integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:) + integer, allocatable :: numalpha_tmp(:), numbeta_tmp(:) + + integer :: cantor_pairing_ij, cantor_pairing_new + integer, allocatable :: cantor_pairing(:), cantor_pairing_tmp(:) + + double precision :: t_beg, t_end + integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir + real(kind=8) :: W_tot_time, W_tot_time_it + integer :: nb_taches + + !$OMP PARALLEL + nb_taches = OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + + call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) + + i_state = 1 + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + call get_excitation_degree(det1,det2,degree,N_int) + call i_H_j(det1, det2, N_int, h12) + + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) ) + allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + call cpu_time(t_beg) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref & + , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique) + call cpu_time(t_end) + print *, " SVD is performed after (min)", (t_end-t_beg)/60. + + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + deallocate( Aref ) + + ! --------------------------------------------------------------------------------------- + + ! *** PARAMETERS *** ! + na_max = n_det_alpha_unique + nb_max = n_det_beta_unique + ! *** ***** *** ! + + print *, ' na_max = ', na_max + print *, ' nb_max = ', nb_max + + ! --------------------------------------------------------------------------------------- + ! initial wavefunction: psi_0 + + n_selected = 1 + allocate(numalpha_selected(n_selected), numbeta_selected(n_selected), cantor_pairing(n_selected)) + + numalpha_selected(1) = 1 + numbeta_selected (1) = 1 + cantor_pairing (1) = 4 !int( 0.5*(1+1)*(1+1+1) ) + 1 + + allocate( coeff_psi(n_selected) ) + coeff_psi(1) = 1.d0 + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! construnc the initial basis to select phi_1 from the FSVD + + n_toselect = na_max * nb_max - n_selected + print *, ' toselect = ', n_toselect + print *, ' to trun = ', n_det_alpha_unique*n_det_beta_unique - na_max*nb_max + + allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) ) + k = 0 + do i = 1, na_max + do j = 1, nb_max + + cantor_pairing_ij = int( 0.5*(i+j)*(i+j+1) ) + j + if( ANY(cantor_pairing .eq. cantor_pairing_ij) ) cycle + + k = k + 1 + numalpha_toselect(k) = i + numbeta_toselect (k) = j + + enddo + enddo + if( k.ne.n_toselect ) then + print *, " error in chosing vectors toselect" + print *, " n_toselect =", n_toselect + print *, " k =", k + stop + endif + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! read < u_k v_l | H | u_k v_l > for all vectors + + allocate( Hdiag(n_toselect) , H0(n_selected,n_selected) ) + + open( unit=11, FILE="klHkl_v0.dat", ACTION="READ") + + read(11,*) i, i, E0 + H0(1,1) = E0 + + do i = 1, n_toselect + read(11,*) ia, ib, ctmp + if( (numalpha_toselect(i).ne.ia) .or. (numbeta_toselect(i).ne.ib) ) then + print *, ' error in reading klHkl_v0 ' + print *, ia, ib + print *, numalpha_toselect(i), numbeta_toselect(i) + stop + endif + Hdiag(i) = ctmp + enddo + + close(11) + + ! --------------------------------------------------------------------------------------- + + E0 = E0 + nuclear_repulsion + print*, ' space dimen = ', n_selected + print*, ' ground state E0 = ', E0 + + na_new = 1 + nb_new = 1 + + !________________________________________________________________________________________________________ + ! + ! increase the size of psi0 iteratively + !________________________________________________________________________________________________________ + + ! *** PARAMETERS *** ! + rank_max = na_max * nb_max + !rank_max = 50 * 50 + ! *** ***** *** ! + + if( rank_max .gt. (na_max*nb_max) ) then + print *, " rank_max should be less then na_max x nb_max" + stop + endif + + + allocate( Hkl_save(n_toselect,n_selected) ) + + do while( n_selected .lt. rank_max ) + + call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) + + print*, ' ' + print*, ' new iteration ' + + if( n_toselect .lt. 1 ) then + + print*, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + print*, ' no more vectors to construct a new basis ' + print*, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + stop + + else + + ! --------------------------------------------------------------------------------------- + ! select a new vector + + allocate( Hkl_1d(n_toselect) ) + call const_Hkl_1d(na_new, nb_new, na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hkl_1d) + Hkl_save(1:n_toselect,n_selected) = Hkl_1d(1:n_toselect) + deallocate( Hkl_1d ) + + ! choose the best vector + allocate( epsil(n_toselect) , epsil_energ(n_toselect) ) + do ii = 1, n_toselect + + ctmp = 0.d0 + do l = 1, n_selected + ctmp = ctmp + coeff_psi(l) * Hkl_save(ii,l) + enddo + epsil(ii) = ctmp * ctmp / ( E0 - (Hdiag(ii)+nuclear_repulsion) ) + + epsil_energ(ii) = epsil(ii) + epsil(ii) = dabs( epsil(ii) ) + enddo + + ind_new = MAXLOC( epsil, DIM=1 ) + + ept2 = epsil_energ(ind_new) + if( ept2 .gt. 0.d0 ) then + print *, ' ept2 > 0 !!!!!!!!!! ' + print *, na_new, nb_new, ept2 + stop + endif + + na_new = numalpha_toselect(ind_new) + nb_new = numbeta_toselect (ind_new) + cantor_pairing_new = int( 0.5 * (na_new+nb_new) * (na_new+nb_new+1) ) + nb_new + + print *, ' best vector', na_new, nb_new, ept2 + deallocate(epsil,epsil_energ) + + ! new coefficient + coeff_new = 0.d0 + do l = 1, n_selected + coeff_new += coeff_psi(l) * Hkl_save(ind_new,l) + enddo + coeff_new = coeff_new / ( E0 - (Hdiag(ind_new)+nuclear_repulsion) ) + print *, ' new coeff = ', coeff_new + + ! < psi_old | H | delta_psi > + allocate( H0_1d(n_selected) ) + call const_H0_1d(na_new, nb_new, na_max, nb_max, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0_1d) + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! new psi + + allocate( numalpha_tmp(n_selected), numbeta_tmp(n_selected), coeff_tmp(n_selected) ) + allocate( cantor_pairing_tmp(n_selected) ) + allocate( H0_tmp(n_selected,n_selected) ) + + coeff_tmp (:) = coeff_psi (:) + numalpha_tmp (:) = numalpha_selected(:) + numbeta_tmp (:) = numbeta_selected (:) + cantor_pairing_tmp(:) = cantor_pairing (:) + H0_tmp (:,:) = H0 (:,:) + + deallocate( numalpha_selected, numbeta_selected, coeff_psi, cantor_pairing, H0 ) + + n_tmp = n_selected + n_selected = n_selected + 1 + + allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) , coeff_psi(n_selected) ) + allocate( cantor_pairing(n_selected) ) + allocate( H0(n_selected,n_selected) ) + H0(:,:) = 0.d0 + + do l = 1, n_tmp + coeff_psi (l) = coeff_tmp (l) + numalpha_selected(l) = numalpha_tmp (l) + numbeta_selected (l) = numbeta_tmp (l) + cantor_pairing (l) = cantor_pairing_tmp(l) + enddo + H0(1:n_tmp,1:n_tmp) = H0_tmp(1:n_tmp,1:n_tmp) + + deallocate( numalpha_tmp, numbeta_tmp, coeff_tmp, cantor_pairing_tmp, H0_tmp ) + + coeff_psi (n_selected) = coeff_new + numalpha_selected(n_selected) = na_new + numbeta_selected (n_selected) = nb_new + cantor_pairing (n_selected) = cantor_pairing_new + + H0(1:n_tmp,n_selected) = H0_1d(1:n_tmp) + H0(n_selected,1:n_tmp) = H0_1d(1:n_tmp) + deallocate( H0_1d ) + H0(n_selected,n_selected) = Hdiag(ind_new) + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! new energy + + allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) ) + call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected) + + ! get the postsvd ground state + allocate( check_ov(n_selected) ) + do l = 1, n_selected + overlop = 0.d0 + do i = 1, n_selected + ia = numalpha_selected(i) + ib = numbeta_selected (i) + if( ia .eq. ib ) overlop = overlop + eigvec0(i,l) * Dref(ia) + !overlop = overlop + eigvec0(i,l) * coeff_psi(i) + enddo + check_ov(l) = dabs(overlop) + enddo + ind_gs = MAXLOC( check_ov, DIM=1 ) + overlop = check_ov(ind_gs) + E0 = eigval0(ind_gs)+nuclear_repulsion + coeff_psi(:) = eigvec0(:,ind_gs) + + deallocate( check_ov, eigval0, eigvec0 ) + + print*, ' space dimen = ', n_selected + print*, ' diag energy = ', E0 + print*, ' overlop = ', overlop + print*, ' index = ', ind_gs + + ! --------------------------------------------------------------------------------------- + + + write(2110, '( 3(I5,3X), 3(F15.8,3X) )') n_selected, na_new, nb_new, ept2, E0, overlop + + + ! --------------------------------------------------------------------------------------- + ! remove selected pair | na_new nb_new > + + allocate( numalpha_tmp(n_toselect), numbeta_tmp(n_toselect), Hdiag_tmp(n_toselect) ) + numalpha_tmp(:) = numalpha_toselect(:) + numbeta_tmp (:) = numbeta_toselect (:) + Hdiag_tmp (:) = Hdiag (:) + + ii = n_selected - 1 + allocate( Hkl_tmp(n_toselect,ii) ) + Hkl_tmp(1:n_toselect,1:ii) = Hkl_save(1:n_toselect,1:ii) + + deallocate( numalpha_toselect , numbeta_toselect, Hkl_save, Hdiag ) + + n_tmp = n_toselect + n_toselect = n_toselect - 1 + print*, ' rank to select = ', n_toselect + + allocate(numalpha_toselect(n_toselect), numbeta_toselect(n_toselect), Hkl_save(n_toselect,n_selected)) + allocate(Hdiag(n_toselect)) + + Hkl_save = 0.d0 + l = 0 + do k = 1, n_tmp + + ia = numalpha_tmp(k) + ib = numbeta_tmp (k) + cantor_pairing_ij = int( 0.5*(ia+ib)*(ia+ib+1) ) + ib + if( ANY(cantor_pairing .eq. cantor_pairing_ij) ) cycle + + l = l + 1 + numalpha_toselect(l) = numalpha_tmp(k) + numbeta_toselect (l) = numbeta_tmp (k) + Hdiag (l) = Hdiag_tmp (k) + + Hkl_save(l,1:ii) = Hkl_tmp(k,1:ii) + + enddo + if( l .ne. n_toselect) then + print *, " error in updating to select vectors" + print *, " l = ", l + print *, " n_toselect = ", n_toselect + stop + endif + + deallocate( numalpha_tmp , numbeta_tmp , Hkl_tmp, Hdiag_tmp ) + + ! --------------------------------------------------------------------------------------- + + + endif + + call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir) + W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8) + print*, " " + print*, " elapsed time (min) = ", W_tot_time_it/60.d0 + + end do + !________________________________________________________________________________________________________ + !________________________________________________________________________________________________________ + + + + ! *************************************************************************************************** + ! save to ezfion + !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) ) + !do l = 1, rank0 + ! Dezfio(l,1) = coeff_psi(l) + ! Uezfio(:,l,1) = U0(:,l) + ! Vezfio(:,l,1) = V0(:,l) + !enddo + !call ezfio_set_spindeterminants_n_det(N_det) + !call ezfio_set_spindeterminants_n_states(N_states) + !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique) + !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique) + !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows) + !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns) + !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values) + + !call ezfio_set_spindeterminants_n_svd_coefs(rank0) + !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio) + !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio ) + !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio) + !deallocate( Uezfio, Dezfio, Vezfio ) + ! *************************************************************************************************** + + call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) + W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) + print *, ' ___________________________________________________________________' + print *, ' ' + print *, " Execution avec ", nb_taches, " threads" + print *, " total elapsed time (min) = ", W_tot_time/60.d0 + print *, ' ___________________________________________________________________' + + + + deallocate( Dref ) + deallocate( Uref, Vref ) + + deallocate( psi_coef ) + deallocate( numalpha_selected, numbeta_selected, numalpha_toselect, numbeta_toselect ) + deallocate( H0, Hdiag, Hkl_save ) + +end + + + + +subroutine const_H0_1d(na_new, nb_new, na_max, nb_max, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0_1d) + + implicit none + + integer, intent(in) :: na_new, nb_new, na_max, nb_max, n_selected + integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H0_1d(n_selected) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree, na, nb + + integer :: i, j, k, l, ii, jj, m + double precision :: h12 + + double precision, allocatable :: Hmat_kl(:,:), tmp1(:,:), tmp2(:,:) + double precision, allocatable :: U1d(:), V1d(:) + double precision, allocatable :: Utmp(:,:), Vtmp(:,:) + + double precision :: ti, tf + + print *, "" + print *, " start const_H0_1d" + call wall_time(ti) + + na = n_det_alpha_unique + nb = n_det_beta_unique + + allocate( U1d(na) , V1d(nb) ) + U1d(1:na) = Uref(1:na,na_new) + V1d(1:nb) = Vref(1:nb,nb_new) + + allocate( tmp1(na,nb) ) + tmp1 = 0.d0 + + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if(degree .gt. 2) cycle + + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + + call get_excitation_degree(det1,det2,degree,N_int) + if(degree .gt. 2) cycle + + call i_H_j(det1, det2, N_int, h12) + if( h12 .eq. 0.d0) cycle + + tmp1(i,j) += h12 * U1d(k) * V1d(l) + + enddo + enddo + enddo + enddo + + deallocate( U1d , V1d ) + + ! tmp2(j,m) = sum_i tmp1(i,j) x Uref(i,m) + allocate( Utmp(na,na_max) ) + Utmp(1:na,1:na_max) = Uref(1:na,1:na_max) + + allocate( tmp2(nb,na_max) ) + call DGEMM('T', 'N', nb, na_max, na, 1.d0, & + tmp1, size(tmp1,1), Utmp, size(Utmp,1), & + 0.d0, tmp2, size(tmp2,1) ) + deallocate( tmp1 ) + deallocate( Utmp ) + + ! Hmat_kl(m,n) = sum_j tmp2(j,m) x Vref(j,n) + allocate( Vtmp(nb,nb_max) ) + Vtmp(1:nb,1:nb_max) = Vref(1:nb,1:nb_max) + + allocate( Hmat_kl(na_max,nb_max) ) + call DGEMM('T', 'N', na_max, nb_max, nb, 1.d0, & + tmp2, size(tmp2,1), Vtmp, size(Vtmp,1), & + 0.d0, Hmat_kl, size(Hmat_kl,1) ) + deallocate( tmp2 ) + deallocate( Vtmp ) + + do m = 1, n_selected + ii = numalpha_selected(m) + jj = numbeta_selected (m) + H0_1d(m) = Hmat_kl(ii,jj) + enddo + deallocate( Hmat_kl ) + + call wall_time(tf) + print *, " end const_H0_1d after (min) ", (tf-ti)/60. + print *, "" + + return +end subroutine const_H0_1d + + + + + +subroutine const_Hkl_1d(na_new, nb_new, na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hkl_1d) + + implicit none + + integer, intent(in) :: na_new, nb_new, na_max, nb_max, n_toselect + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: Hkl_1d(n_toselect) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree, na, nb + + integer :: i, j, k, l, ii, jj, m + double precision :: h12 + + double precision, allocatable :: Hmat_kl(:,:), tmp1(:,:), tmp2(:,:) + double precision, allocatable :: U1d(:), V1d(:) + double precision, allocatable :: Utmp(:,:), Vtmp(:,:) + + double precision :: ti, tf + + print *, "" + print *, " start const_Hkl_1d" + call wall_time(ti) + + na = n_det_alpha_unique + nb = n_det_beta_unique + + allocate( U1d(na) , V1d(nb) ) + U1d(1:na) = Uref(1:na,na_new) + V1d(1:nb) = Vref(1:nb,nb_new) + + allocate( tmp1(na,nb) ) + tmp1 = 0.d0 + + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if(degree .gt. 2) cycle + + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + + call get_excitation_degree(det1,det2,degree,N_int) + if(degree .gt. 2) cycle + + call i_H_j(det1, det2, N_int, h12) + if( h12 .eq. 0.d0) cycle + + tmp1(i,j) += h12 * U1d(k) * V1d(l) + + enddo + enddo + enddo + enddo + + deallocate( U1d , V1d ) + + ! tmp2(j,m) = sum_i tmp1(i,j) x Uref(i,m) + allocate( Utmp(na,na_max) ) + Utmp(1:na,1:na_max) = Uref(1:na,1:na_max) + + allocate( tmp2(nb,na_max) ) + call DGEMM('T', 'N', nb, na_max, na, 1.d0, & + tmp1, size(tmp1,1), Utmp, size(Utmp,1), & + 0.d0, tmp2, size(tmp2,1) ) + deallocate( tmp1 , Utmp ) + + ! Hmat_kl(m,n) = sum_j tmp2(j,m) x Vref(j,n) + allocate( Vtmp(nb,nb_max) ) + Vtmp(1:nb,1:nb_max) = Vref(1:nb,1:nb_max) + + allocate( Hmat_kl(na_max,nb_max) ) + call DGEMM('T', 'N', na_max, nb_max, nb, 1.d0, & + tmp2, size(tmp2,1), Vtmp, size(Vtmp,1), & + 0.d0, Hmat_kl, size(Hmat_kl,1) ) + deallocate( tmp2 ) + deallocate( Vtmp ) + + do m = 1, n_toselect + ii = numalpha_toselect(m) + jj = numbeta_toselect (m) + Hkl_1d(m) = Hmat_kl(ii,jj) + enddo + deallocate( Hmat_kl ) + + call wall_time(tf) + print *, " end const_Hkl_1d after (min) ", (tf-ti)/60. + print *, "" + + return +end subroutine const_Hkl_1d + diff --git a/devel/svdwf/psiSVD_pt2_v1.irp.f b/devel/svdwf/psiSVD_pt2_v1.irp.f new file mode 100644 index 0000000..fa11ed3 --- /dev/null +++ b/devel/svdwf/psiSVD_pt2_v1.irp.f @@ -0,0 +1,734 @@ +program psiSVD_pt2_v1 + + implicit none + + BEGIN_DOC + ! perturbative approach to build psi_postsvd + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + + integer :: i, j, k, l, m, n + double precision :: x, y, h12 + + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + integer :: rank_max + double precision :: E0, overlop, Ept2 + double precision, allocatable :: H0(:,:) + double precision, allocatable :: eigvec0(:,:), eigval0(:), coeff_psi(:), coeff_tmp(:) + + integer :: ii, jj, ia, ib + double precision, allocatable :: Hdiag(:), Hkl_save(:,:), Hkl_1d(:), Hkl_tmp(:,:), Hdiag_tmp(:) + double precision, allocatable :: H0_1d(:), H0_tmp(:,:) + + integer :: na_new, nb_new, ind_new, ind_gs + double precision :: ctmp, coeff_new + double precision, allocatable :: epsil(:), epsil_energ(:), check_ov(:) + + double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:) + + integer :: n_selected, n_toselect, n_tmp, na_max, nb_max + integer, allocatable :: numalpha_selected(:), numbeta_selected(:) + integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:) + integer, allocatable :: numalpha_tmp(:), numbeta_tmp(:) + + integer :: cantor_pairing_ij, cantor_pairing_new + integer, allocatable :: cantor_pairing(:), cantor_pairing_tmp(:) + + double precision :: t_beg, t_end + integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir + real(kind=8) :: W_tot_time, W_tot_time_it + integer :: nb_taches + + !$OMP PARALLEL + nb_taches = OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + + call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) + + i_state = 1 + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) + call get_excitation_degree(det1,det2,degree,N_int) + call i_H_j(det1, det2, N_int, h12) + + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) ) + allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + call cpu_time(t_beg) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref & + , size(Vtref,1), n_det_alpha_unique, n_det_beta_unique) + call cpu_time(t_end) + print *, " SVD is performed after (min)", (t_end-t_beg)/60. + + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + deallocate( Aref ) + + ! --------------------------------------------------------------------------------------- + + ! *** PARAMETERS *** ! + na_max = n_det_alpha_unique + nb_max = n_det_beta_unique + ! *** ***** *** ! + + print *, ' na_max = ', na_max + print *, ' nb_max = ', nb_max + + ! --------------------------------------------------------------------------------------- + ! initial wavefunction: psi_0 + + n_selected = 1 + allocate(numalpha_selected(n_selected), numbeta_selected(n_selected), cantor_pairing(n_selected)) + + numalpha_selected(1) = 1 + numbeta_selected (1) = 1 + cantor_pairing (1) = 4 !int( 0.5*(1+1)*(1+1+1) ) + 1 + + allocate( coeff_psi(n_selected) ) + coeff_psi(1) = 1.d0 + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! construnc the initial basis to select phi_1 from the FSVD + + n_toselect = na_max * nb_max - n_selected + print *, ' toselect = ', n_toselect + print *, ' to trun = ', n_det_alpha_unique*n_det_beta_unique - na_max*nb_max + + allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) ) + k = 0 + do i = 1, na_max + do j = 1, nb_max + + cantor_pairing_ij = int( 0.5*(i+j)*(i+j+1) ) + j + if( ANY(cantor_pairing .eq. cantor_pairing_ij) ) cycle + + k = k + 1 + numalpha_toselect(k) = i + numbeta_toselect (k) = j + + enddo + enddo + if( k.ne.n_toselect ) then + print *, " error in chosing vectors toselect" + print *, " n_toselect =", n_toselect + print *, " k =", k + stop + endif + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! read < u_k v_l | H | u_k v_l > for all vectors + + allocate( Hdiag(n_toselect) , H0(n_selected,n_selected) ) + + open( unit=11, FILE="klHkl_v1.dat", ACTION="READ") + + read(11,*) i, i, E0 + H0(1,1) = E0 + + do i = 1, n_toselect + read(11,*) ia, ib, ctmp + !print *, ' ia , ib :', ia, ib + if( (numalpha_toselect(i).ne.ia) .or. (numbeta_toselect(i).ne.ib) ) then + print *, ' error in reading klHkl_v1 ' + print *, ' ia , ib :', ia, ib + print *, numalpha_toselect(i) , numbeta_toselect(i) + stop + endif + Hdiag(i) = ctmp + enddo + + close(11) + + ! --------------------------------------------------------------------------------------- + + E0 = E0 + nuclear_repulsion + print*, ' space dimen = ', n_selected + print*, ' ground state E0 = ', E0 + + na_new = 1 + nb_new = 1 + + !________________________________________________________________________________________________________ + ! + ! increase the size of psi0 iteratively + !________________________________________________________________________________________________________ + + ! *** PARAMETERS *** ! + rank_max = na_max * nb_max + ! *** ***** *** ! + + if( rank_max .gt. (na_max*nb_max) ) then + print *, " rank_max should be less then na_max x nb_max" + stop + endif + + + allocate( Hkl_save(n_toselect,n_selected) ) + + do while( n_selected .lt. rank_max ) + + call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) + + print*, ' ' + print*, ' new iteration ' + + if( n_toselect .lt. 1 ) then + + print*, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + print*, ' no more vectors to construct a new basis ' + print*, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + stop + + else + + ! --------------------------------------------------------------------------------------- + ! select a new vector + + allocate( Hkl_1d(n_toselect) ) + call const_Hkl_1d(na_new, nb_new, na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hkl_1d) + Hkl_save(1:n_toselect,n_selected) = Hkl_1d(1:n_toselect) + deallocate( Hkl_1d ) + + ! choose the best vector + allocate( epsil(n_toselect) , epsil_energ(n_toselect) ) + do ii = 1, n_toselect + + ctmp = 0.d0 + do l = 1, n_selected + ctmp = ctmp + coeff_psi(l) * Hkl_save(ii,l) + enddo + epsil(ii) = ctmp * ctmp / ( E0 - (Hdiag(ii)+nuclear_repulsion) ) + + epsil_energ(ii) = epsil(ii) + epsil(ii) = dabs( epsil(ii) ) + enddo + + ind_new = MAXLOC( epsil, DIM=1 ) + + ept2 = epsil_energ(ind_new) + if( ept2 .gt. 0.d0 ) then + print *, ' ept2 > 0 !!!!!!!!!! ' + print *, na_new, nb_new, ept2 + stop + endif + + na_new = numalpha_toselect(ind_new) + nb_new = numbeta_toselect (ind_new) + cantor_pairing_new = int( 0.5 * (na_new+nb_new) * (na_new+nb_new+1) ) + nb_new + + print *, ' ind_new ', ind_new + print *, ' best vector', na_new, nb_new, ept2 + deallocate(epsil,epsil_energ) + + ! new coefficient + coeff_new = 0.d0 + do l = 1, n_selected + coeff_new += coeff_psi(l) * Hkl_save(ind_new,l) + enddo + coeff_new = coeff_new / ( E0 - (Hdiag(ind_new)+nuclear_repulsion) ) + print *, ' new coeff = ', coeff_new + print *, ' Hdiag = ', Hdiag(ind_new) + + ! < psi_old | H | delta_psi > + allocate( H0_1d(n_selected) ) + call const_H0_1d(na_new, nb_new, na_max, nb_max, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0_1d) + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! new psi + + allocate( numalpha_tmp(n_selected), numbeta_tmp(n_selected), coeff_tmp(n_selected) ) + allocate( cantor_pairing_tmp(n_selected) ) + allocate( H0_tmp(n_selected,n_selected) ) + + coeff_tmp (:) = coeff_psi (:) + numalpha_tmp (:) = numalpha_selected(:) + numbeta_tmp (:) = numbeta_selected (:) + cantor_pairing_tmp(:) = cantor_pairing (:) + H0_tmp (:,:) = H0 (:,:) + + deallocate( numalpha_selected, numbeta_selected, coeff_psi, cantor_pairing, H0 ) + + n_tmp = n_selected + n_selected = n_selected + 1 + + allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) , coeff_psi(n_selected) ) + allocate( cantor_pairing(n_selected) ) + allocate( H0(n_selected,n_selected) ) + H0(:,:) = 0.d0 + + do l = 1, n_tmp + coeff_psi (l) = coeff_tmp (l) + numalpha_selected(l) = numalpha_tmp (l) + numbeta_selected (l) = numbeta_tmp (l) + cantor_pairing (l) = cantor_pairing_tmp(l) + enddo + H0(1:n_tmp,1:n_tmp) = H0_tmp(1:n_tmp,1:n_tmp) + + deallocate( numalpha_tmp, numbeta_tmp, coeff_tmp, cantor_pairing_tmp, H0_tmp ) + + coeff_psi (n_selected) = coeff_new + numalpha_selected(n_selected) = na_new + numbeta_selected (n_selected) = nb_new + cantor_pairing (n_selected) = cantor_pairing_new + + H0(1:n_tmp,n_selected) = H0_1d(1:n_tmp) + H0(n_selected,1:n_tmp) = H0_1d(1:n_tmp) + deallocate( H0_1d ) + H0(n_selected,n_selected) = Hdiag(ind_new) + + ! --------------------------------------------------------------------------------------- + + + + ! --------------------------------------------------------------------------------------- + ! new energy + + allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) ) + call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected) + + ! get the postsvd ground state + allocate( check_ov(n_selected) ) + do l = 1, n_selected + overlop = 0.d0 + do i = 1, n_selected + ia = numalpha_selected(i) + ib = numbeta_selected (i) + if( ia .eq. ib ) overlop = overlop + eigvec0(i,l) * Dref(ia) + !overlop = overlop + eigvec0(i,l) * coeff_psi(i) + enddo + check_ov(l) = dabs(overlop) + enddo + ind_gs = MAXLOC( check_ov, DIM=1 ) + overlop = check_ov(ind_gs) + E0 = eigval0(ind_gs)+nuclear_repulsion + coeff_psi(:) = eigvec0(:,ind_gs) + + deallocate( check_ov, eigval0, eigvec0 ) + + print*, ' space dimen = ', n_selected + print*, ' diag energy = ', E0 + print*, ' overlop = ', overlop + print*, ' index = ', ind_gs + + ! --------------------------------------------------------------------------------------- + + + write(2111, '( 3(I5,3X), 3(F15.8,3X) )') n_selected, na_new, nb_new, ept2, E0, overlop + + + ! --------------------------------------------------------------------------------------- + ! remove selected pair | na_new nb_new > + + allocate( numalpha_tmp(n_toselect), numbeta_tmp(n_toselect), Hdiag_tmp(n_toselect) ) + numalpha_tmp(:) = numalpha_toselect(:) + numbeta_tmp (:) = numbeta_toselect (:) + Hdiag_tmp (:) = Hdiag (:) + + ii = n_selected - 1 + allocate( Hkl_tmp(n_toselect,ii) ) + Hkl_tmp(1:n_toselect,1:ii) = Hkl_save(1:n_toselect,1:ii) + + deallocate( numalpha_toselect , numbeta_toselect, Hkl_save, Hdiag ) + + n_tmp = n_toselect + n_toselect = n_toselect - 1 + print*, ' rank to select = ', n_toselect + + allocate(numalpha_toselect(n_toselect), numbeta_toselect(n_toselect), Hkl_save(n_toselect,n_selected)) + allocate(Hdiag(n_toselect)) + + Hkl_save = 0.d0 + l = 0 + do k = 1, n_tmp + + ia = numalpha_tmp(k) + ib = numbeta_tmp (k) + cantor_pairing_ij = int( 0.5*(ia+ib)*(ia+ib+1) ) + ib + if( ANY(cantor_pairing .eq. cantor_pairing_ij) ) cycle + + l = l + 1 + numalpha_toselect(l) = numalpha_tmp(k) + numbeta_toselect (l) = numbeta_tmp (k) + Hdiag (l) = Hdiag_tmp (k) + + Hkl_save(l,1:ii) = Hkl_tmp(k,1:ii) + + enddo + if( l .ne. n_toselect) then + print *, " error in updating to select vectors" + print *, " l = ", l + print *, " n_toselect = ", n_toselect + stop + endif + + deallocate( numalpha_tmp , numbeta_tmp , Hkl_tmp, Hdiag_tmp ) + + ! --------------------------------------------------------------------------------------- + + + endif + + call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir) + W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8) + print*, " " + print*, " elapsed time (min) = ", W_tot_time_it/60.d0 + + end do + !________________________________________________________________________________________________________ + !________________________________________________________________________________________________________ + + + + ! *************************************************************************************************** + ! save to ezfion + !allocate( Uezfio(n_det_alpha_unique,rank0,1), Dezfio(rank0,1), Vezfio(n_det_beta_unique,rank0,1) ) + !do l = 1, rank0 + ! Dezfio(l,1) = coeff_psi(l) + ! Uezfio(:,l,1) = U0(:,l) + ! Vezfio(:,l,1) = V0(:,l) + !enddo + !call ezfio_set_spindeterminants_n_det(N_det) + !call ezfio_set_spindeterminants_n_states(N_states) + !call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique) + !call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique) + !call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows) + !call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns) + !call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values) + + !call ezfio_set_spindeterminants_n_svd_coefs(rank0) + !call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio) + !call ezfio_set_spindeterminants_psi_svd_beta(Vezfio ) + !call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio) + !deallocate( Uezfio, Dezfio, Vezfio ) + ! *************************************************************************************************** + + call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) + W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) + print *, ' ___________________________________________________________________' + print *, ' ' + print *, " Execution avec ", nb_taches, " threads" + print *, " total elapsed time (min) = ", W_tot_time/60.d0 + print *, ' ___________________________________________________________________' + + + + deallocate( Dref ) + deallocate( Uref, Vref ) + + deallocate( psi_coef ) + deallocate( numalpha_selected, numbeta_selected, numalpha_toselect, numbeta_toselect ) + deallocate( H0, Hdiag, Hkl_save ) + +end + + + + +subroutine const_H0_1d(na_new, nb_new, na_max, nb_max, n_selected, Uref, Vref, numalpha_selected, numbeta_selected, H0_1d) + + implicit none + + integer, intent(in) :: na_new, nb_new, na_max, nb_max, n_selected + integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H0_1d(n_selected) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree, na, nb + + integer :: i, j, k, l, ii, jj, m + double precision :: h12 + + double precision, allocatable :: Hmat_kl(:,:), tmp1(:,:), tmp2(:,:) + double precision, allocatable :: U1d(:), V1d(:) + double precision, allocatable :: Utmp(:,:), Vtmp(:,:) + + double precision :: ti, tf + + print *, "" + print *, " start const_H0_1d" + call wall_time(ti) + + na = n_det_alpha_unique + nb = n_det_beta_unique + + allocate( U1d(na) , V1d(nb) ) + U1d(1:na) = Uref(1:na,na_new) + V1d(1:nb) = Vref(1:nb,nb_new) + + allocate( tmp1(na,nb) ) + tmp1 = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,h12,det1,det2,degree,tmp2) & + !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,U1d,V1d,tmp1) + + allocate( tmp2(na,nb) ) + tmp2 = 0.d0 + + !$OMP DO + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if(degree .gt. 2) cycle + + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + + call get_excitation_degree(det1,det2,degree,N_int) + if(degree .gt. 2) cycle + + call i_H_j(det1, det2, N_int, h12) + if( h12 .eq. 0.d0) cycle + + tmp2(i,j) += h12 * U1d(k) * V1d(l) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP CRITICAL + do j = 1, nb + do i = 1, na + tmp1(i,j) += tmp2(i,j) + enddo + enddo + !$OMP END CRITICAL + + deallocate( tmp2 ) + !$OMP END PARALLEL + + deallocate( U1d , V1d ) + + ! tmp2(j,m) = sum_i tmp1(i,j) x Uref(i,m) + allocate( Utmp(na,na_max) ) + Utmp(1:na,1:na_max) = Uref(1:na,1:na_max) + + allocate( tmp2(nb,na_max) ) + call DGEMM('T', 'N', nb, na_max, na, 1.d0, & + tmp1, size(tmp1,1), Utmp, size(Utmp,1), & + 0.d0, tmp2, size(tmp2,1) ) + deallocate( tmp1 ) + deallocate( Utmp ) + + ! Hmat_kl(m,n) = sum_j tmp2(j,m) x Vref(j,n) + allocate( Vtmp(nb,nb_max) ) + Vtmp(1:nb,1:nb_max) = Vref(1:nb,1:nb_max) + + allocate( Hmat_kl(na_max,nb_max) ) + call DGEMM('T', 'N', na_max, nb_max, nb, 1.d0, & + tmp2, size(tmp2,1), Vtmp, size(Vtmp,1), & + 0.d0, Hmat_kl, size(Hmat_kl,1) ) + deallocate( tmp2 ) + deallocate( Vtmp ) + + do m = 1, n_selected + ii = numalpha_selected(m) + jj = numbeta_selected (m) + H0_1d(m) = Hmat_kl(ii,jj) + enddo + deallocate( Hmat_kl ) + + call wall_time(tf) + print *, " end const_H0_1d after (min) ", (tf-ti)/60. + print *, "" + + return +end subroutine const_H0_1d + + + + + +subroutine const_Hkl_1d(na_new, nb_new, na_max, nb_max, n_toselect, Uref, Vref, numalpha_toselect, numbeta_toselect, Hkl_1d) + + implicit none + + integer, intent(in) :: na_new, nb_new, na_max, nb_max, n_toselect + integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: Hkl_1d(n_toselect) + + integer(bit_kind) :: det1(N_int,2) + integer(bit_kind) :: det2(N_int,2) + integer :: degree, na, nb + + integer :: i, j, k, l, ii, jj, m + double precision :: h12 + + double precision, allocatable :: Hmat_kl(:,:), tmp1(:,:), tmp2(:,:) + double precision, allocatable :: U1d(:), V1d(:) + double precision, allocatable :: Utmp(:,:), Vtmp(:,:) + + double precision :: ti, tf + + print *, "" + print *, " start const_Hkl_1d" + call wall_time(ti) + + na = n_det_alpha_unique + nb = n_det_beta_unique + + allocate( U1d(na) , V1d(nb) ) + U1d(1:na) = Uref(1:na,na_new) + V1d(1:nb) = Vref(1:nb,nb_new) + + allocate( tmp1(na,nb) ) + tmp1 = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,l,h12,det1,det2,degree,tmp2) & + !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,U1d,V1d,tmp1) + + allocate( tmp2(na,nb) ) + tmp2 = 0.d0 + !$OMP DO + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if(degree .gt. 2) cycle + + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + + call get_excitation_degree(det1,det2,degree,N_int) + if(degree .gt. 2) cycle + + call i_H_j(det1, det2, N_int, h12) + if( h12 .eq. 0.d0) cycle + + tmp2(i,j) += h12 * U1d(k) * V1d(l) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP CRITICAL + do j = 1, nb + do i = 1, na + tmp1(i,j) += tmp2(i,j) + enddo + enddo + !$OMP END CRITICAL + + deallocate( tmp2 ) + !$OMP END PARALLEL + + deallocate( U1d , V1d ) + + ! tmp2(j,m) = sum_i tmp1(i,j) x Uref(i,m) + allocate( Utmp(na,na_max) ) + Utmp(1:na,1:na_max) = Uref(1:na,1:na_max) + + allocate( tmp2(nb,na_max) ) + call DGEMM('T', 'N', nb, na_max, na, 1.d0, & + tmp1, size(tmp1,1), Utmp, size(Utmp,1), & + 0.d0, tmp2, size(tmp2,1) ) + deallocate( tmp1 , Utmp ) + + ! Hmat_kl(m,n) = sum_j tmp2(j,m) x Vref(j,n) + allocate( Vtmp(nb,nb_max) ) + Vtmp(1:nb,1:nb_max) = Vref(1:nb,1:nb_max) + + allocate( Hmat_kl(na_max,nb_max) ) + call DGEMM('T', 'N', na_max, nb_max, nb, 1.d0, & + tmp2, size(tmp2,1), Vtmp, size(Vtmp,1), & + 0.d0, Hmat_kl, size(Hmat_kl,1) ) + deallocate( tmp2 ) + deallocate( Vtmp ) + + do m = 1, n_toselect + ii = numalpha_toselect(m) + jj = numbeta_toselect (m) + Hkl_1d(m) = Hmat_kl(ii,jj) + enddo + deallocate( Hmat_kl ) + + call wall_time(tf) + print *, " end const_Hkl_1d after (min) ", (tf-ti)/60. + print *, "" + + return +end subroutine const_Hkl_1d + diff --git a/devel/svdwf/pyth_RSVD.py b/devel/svdwf/pyth_RSVD.py new file mode 100644 index 0000000..3729a33 --- /dev/null +++ b/devel/svdwf/pyth_RSVD.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# !!! +import os, sys +# !!! +#QP_PATH=os.environ["QMCCHEM_PATH"] +#sys.path.insert(0,QMCCHEM_PATH+"/EZFIO/Python/") +# !!! +from ezfio import ezfio +from datetime import datetime +import numpy as np +from scipy.sparse.linalg import svds +from R3SVD_LiYu import R3SVD_LiYu +from RSVD import powit_RSVD +from R3SVD_AMMAR import R3SVD_AMMAR +import time +# !!! +fmt = '%5d' + 2 * ' %e' +# !!! +if __name__ == "__main__": + # !!! + if len(sys.argv) != 2: + print("Usage: %s "%sys.argv[0]) + sys.exit(1) + filename = sys.argv[1] + ezfio.set_file(filename) + # !!! + N_det = ezfio.get_spindeterminants_n_det() + A_rows = np.array(ezfio.get_spindeterminants_psi_coef_matrix_rows()) + A_cols = np.array(ezfio.get_spindeterminants_psi_coef_matrix_columns()) + A_vals = np.array(ezfio.get_spindeterminants_psi_coef_matrix_values()) + nrows, ncols = ezfio.get_spindeterminants_n_det_alpha(), ezfio.get_spindeterminants_n_det_beta() + Y = np.zeros( (nrows, ncols) ) + for k in range(N_det): + i = A_rows[k] - 1 + j = A_cols[k] - 1 + Y[i,j] = A_vals[0][k] + print("# # # # # # # # # # # # # # # # # # # # # #") + print('matrix dimensions = {} x {}'.format(nrows, ncols)) + print("# # # # # # # # # # # # # # # # # # # # # # \n") + normY = np.linalg.norm(Y, ord='fro') + print( normY ) + # !!! + print('Full SVD:') + t_beg = time.time() + U, S_FSVD, VT = np.linalg.svd(Y, full_matrices=0) + t_end = time.time() + rank = S_FSVD.shape[0] + energy = np.sum(np.square(S_FSVD)) / normY**2 + err_SVD = 100. * np.linalg.norm(Y - np.dot(U,np.dot(np.diag(S_FSVD),VT)), ord='fro') / normY + print('rank = {}, energy = {}, error = {}%, CPU time = {} \n'.format(rank, energy, err_SVD, t_end-t_beg)) + # !!! + np.savetxt('results_python/h2o_pseudo/SingValues_FullSVD.txt', np.transpose([ np.array(range(rank))+1, S_FSVD ]), fmt='%5d' + ' %e', delimiter=' ') + # !!! + t = 50 + delta_t = 10 + npow = 15 + err_thr = 1e-3 + maxit = 10 + # !!! + print('RRR SVD Li & Yu:') + t_beg = time.time() + U, S_R3SVD, VT = R3SVD_LiYu(Y, t, delta_t, npow, err_thr, maxit) + t_end = time.time() + rank = S_R3SVD.shape[0] + energy = np.sum( np.square(S_R3SVD) ) / normY**2 + err_SVD = 100. * np.linalg.norm(Y - np.dot(U,np.dot(np.diag(S_R3SVD),VT)), ord='fro') / normY + print('nb Pow It = {}, rank = {}, energy = {}, error = {} %, CPU time = {}\n'.format(npow, rank, energy, err_SVD, t_end-t_beg)) + # !!! + err_R3SVD = np.zeros( (rank) ) + for i in range(rank): + err_R3SVD[i] = 100.0 * abs( S_FSVD[i] - S_R3SVD[i] ) / S_FSVD[i] + np.savetxt('results_python/h2o_pseudo/SingValues_R3SVD.txt', np.transpose([ np.array(range(rank))+1, S_R3SVD, err_R3SVD ]), fmt=fmt, delimiter=' ') + # !!! + nb_oversamp = 10 + tol_SVD = 1e-10 + print('RRR SVD my version:') + t_beg = time.time() + U, S_MRSVD, VT = R3SVD_AMMAR(Y, t, delta_t, npow, nb_oversamp, err_thr, maxit, tol_SVD) + t_end = time.time() + rank = S_MRSVD.shape[0] + energy = np.sum( np.square(S_MRSVD) ) / normY**2 + err_SVD = 100. * np.linalg.norm(Y - np.dot(U,np.dot(np.diag(S_MRSVD),VT)), ord='fro') / normY + print('nb Pow It = {}, rank = {}, energy = {}, error = {} %, CPU time = {}\n'.format(npow, rank, energy, err_SVD, t_end-t_beg)) + # !!! + err_MRSVD = np.zeros( (rank) ) + for i in range(rank): + err_MRSVD[i] = 100.0 * abs( S_FSVD[i] - S_MRSVD[i] ) / S_FSVD[i] + np.savetxt('results_python/h2o_pseudo/SingValues_MRSVD.txt', np.transpose([ np.array(range(rank))+1, S_MRSVD, err_MRSVD ]), fmt=fmt, delimiter=' ') + # !!! + trank = rank + print("Truncated RSVD (pre-fixed rank = {} & oversampling parameter = {}):".format(trank,nb_oversamp)) + t_beg = time.time() + U, S_RSVD, VT = powit_RSVD(Y, trank, npow, nb_oversamp) + t_end = time.time() + rank = S_RSVD.shape[0] + energy = np.sum( np.square(S_RSVD) ) / normY**2 + err_SVD = 100. * np.linalg.norm( Y - np.dot(U,np.dot(np.diag(S_RSVD),VT)), ord="fro") / normY + print('nb Pow It = {}, rank = {}, energy = {}, error = {} %, CPU time = {}\n'.format(npow, rank, energy, err_SVD, t_end-t_beg)) + # !!! + err_RSVD = np.zeros( (rank) ) + for i in range(rank): + err_RSVD[i] = 100.0 * abs( S_FSVD[i] - S_RSVD[i] ) / S_FSVD[i] + np.savetxt('results_python/h2o_pseudo/SingValues_RSVD.txt', np.transpose([ np.array(range(rank))+1, S_RSVD, err_RSVD ]), fmt=fmt, delimiter=' ') + # !!! + print("Truncated SVD (scipy):") + t_beg = time.time() + U, S_TSVD, VT = svds(Y, min(trank, min(Y.shape[0],Y.shape[1])-1 ), which='LM') + t_end = time.time() + rank = S_TSVD.shape[0] + energy = np.sum( np.square(S_TSVD) ) / normY**2 + err_SVD = 100. * np.linalg.norm( Y - np.dot(U, np.dot(np.diag(S_TSVD),VT) ), ord="fro") / normY + print('rank = {}, energy = {}, error = {} %, CPU time = {}\n'.format(rank, energy, err_SVD, t_end-t_beg)) + # !!! + err_TSVD = np.zeros( (rank) ) + for i in range(rank-1): + for j in range(i+1,rank): + if( S_TSVD[j] > S_TSVD[i]): + S_TSVD[i], S_TSVD[j] = S_TSVD[j], S_TSVD[i] + for i in range(rank): + err_TSVD[i] = 100.0 * abs( S_FSVD[i] - S_TSVD[i] ) / S_FSVD[i] + np.savetxt('results_python/h2o_pseudo/SingValues_TSVD.txt', np.transpose([ np.array(range(rank))+1, S_TSVD, err_TSVD ]), fmt=fmt, delimiter=' ') + # !!! +# !!! diff --git a/devel/svdwf/set_QP_svd.py b/devel/svdwf/set_QP_svd.py new file mode 100644 index 0000000..febf9a1 --- /dev/null +++ b/devel/svdwf/set_QP_svd.py @@ -0,0 +1,223 @@ +# !!! +import sys, os +#QMCCHEM_PATH=os.environ["QMCCHEM_PATH"] +#sys.path.insert(0,QMCCHEM_PATH+"/EZFIO/Python/") +# !!! +from ezfio import ezfio +from math import sqrt +from datetime import datetime +import time +import numpy as np +import subprocess +from scipy.linalg import eig, eigh +from RSVD import powit_RSVD + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ +def get_Hsvd_QP(Hsvd_qp_txt): + Hsvd_qp = np.zeros( (n_svd,n_svd) ) + Hsvd_qp_file = open(Hsvd_qp_txt, 'r') + for line in Hsvd_qp_file: + line = line.split() + i = int(line[0]) - 1 + j = int(line[1]) - 1 + Hsvd_qp[i,j] = float(line[2]) + return(Hsvd_qp) +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ + + + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ +def get_Hpostsvd_QP(Hpostsvd_qp_txt): + Hpostsvd_qp = np.zeros( (n_svd*n_svd,n_svd*n_svd) ) + Hpostsvd_qp_file = open(Hpostsvd_qp_txt, 'r') + for line in Hpostsvd_qp_file: + line = line.split() + i = int(line[0]) - 1 + j = int(line[1]) - 1 + Hpostsvd_qp[i,j] = float(line[2]) + return(Hpostsvd_qp) +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ + + + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ +def get_Esvd_QP(Hsvd_qp): + # symmetrise and diagonalise + aa = Hsvd_qp + aa = 0.5*( aa + aa.T ) + bb = np.identity(n_svd) + #eigvals_svd, vr = eig(aa, bb, left=False, right=True, overwrite_a=True, overwrite_b=True, + eigvals_svd, vr = eig(aa, left=False, right=True, overwrite_a=True, overwrite_b=True, + check_finite=True, homogeneous_eigvals=False) + recouvre_svd = np.abs(psi_svd_coeff @ vr) + ind_gssvd = np.argmax(recouvre_svd) + E_svd = eigvals_svd[ind_gssvd] + E_toadd + return( E_svd, vr[:,ind_gssvd] ) +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ +def get_Epostsvd_QP(Hpostsvd_qp): + # symmetrise and diagonalise + aa = Hpostsvd_qp + aa = 0.5*( aa + aa.T ) + bb = np.identity(n_svd*n_svd) + eigvals_postsvd, vr = eig(aa, bb, left=False, right=True, overwrite_a=True, overwrite_b=True, + check_finite=True, homogeneous_eigvals=False) + d_postsvd = np.diagflat(psi_svd_coeff) + d_postsvd = d_postsvd.reshape( (1,n_svd*n_svd) ) + recouvre_postsvd = np.abs(d_postsvd @ vr) + ind_gspostsvd = np.argmax(recouvre_postsvd) + # !!! + E_postsvd = eigvals_postsvd[ind_gspostsvd] + E_toadd + # !!! + return( E_postsvd, vr[:,ind_gspostsvd] ) +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ + + + +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ +def SVD_postsvd(sigma_postsvd): + # !!! + print(' performing new SVD for the post SVD eigenvector:' ) + # !!! + sigma_postsvd_mat = np.zeros( (n_svd,n_svd) ) + for indc in range(1, n_svd**2+1): + if( ( indc % n_svd ) !=0 ): + kp = indc % n_svd + else: + kp = n_svd + indc1 = int( ( indc - kp ) / n_svd ) + k = indc1 % n_svd + 1 + irow = kp + (k-1)*n_svd - 1 + sigma_postsvd_mat[kp-1][k-1] = sigma_postsvd[irow] + sigma_postsvd = sigma_postsvd_mat + print(sigma_postsvd[0:n_svd,0:n_svd]) + # !!! + # construct the new matrix Y + Y = np.dot( U_svd , np.dot(sigma_postsvd , V_svd.T) ) + normY = np.linalg.norm(Y, ord='fro') + # !!! + # parameters of RSVD + rank = n_svd + npow = 10 + nb_oversamp = 10 + # !!! + # call RSV + U_postSVD, sigma_postsvd_diag, VT_postsvd = powit_RSVD(Y, rank, npow, nb_oversamp) + # !!! + # check precision + Y_SVD = np.dot( U_postSVD , np.dot( np.diag(sigma_postsvd_diag) , VT_postsvd ) ) + energy = np.sum( np.square(sigma_postsvd_diag) ) / normY**2 + err_SVD = 100. * np.linalg.norm( Y - Y_SVD, ord="fro") / normY + print(' energy = {}, error = {}\n'.format(energy, err_SVD)) + # !!! + return(U_postSVD, sigma_postsvd_diag, VT_postsvd) + # !!! +# ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ ! ~ + + + + + +if __name__ == '__main__': + t0 = time.time() + # !!! + # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ # + EZFIO_file = "/home/aammar/qp2/src/svdwf/2h2_cisd_nsvd20" + Hsvd_qp_txt = 'H_QP_svd_2h2_nsvd20.txt' + Hpostsvd_qp_txt = 'H_QP_postsvd_2h2_nsvd20.txt' + # h2o + #E_toadd = 9.194966082434476 #6.983610961797779 + # 2h2 + E_toadd = 1.711353545183182 + # f2 + #E_toadd = 30.35863309325590 + # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ # + # !!! + ezfio.set_file(EZFIO_file) + n_svd = ezfio.get_spindeterminants_n_svd_coefs() + psi_svd_coeff = np.array(ezfio.get_spindeterminants_psi_svd_coefs()) + U_svd = np.array(ezfio.get_spindeterminants_psi_svd_alpha()) + V_svd = np.array(ezfio.get_spindeterminants_psi_svd_beta()) + # !!! + U_svd = U_svd[0,:,:].T + V_svd = V_svd[0,:,:].T + # !!! + print(" Today's date:", datetime.now() ) + print(" EZFIO file = {}".format(EZFIO_file)) + print(" n_svd = {}\n".format(n_svd) ) + # !!! + print(' initial svd coeff = {}\n'.format(psi_svd_coeff)) + C_old = np.dot( U_svd , np.dot( np.diagflat(psi_svd_coeff) , V_svd.T ) ) + norm_C = np.linalg.norm(C_old, ord="fro") + # !!! + read_QPsvd = 'y' + if( read_QPsvd == 'y' ): + Hsvd_qp = get_Hsvd_QP(Hsvd_qp_txt) + E_svd_QP, sigma_svd_QP = get_Esvd_QP(Hsvd_qp) + print(' QP svd enegry = {}'.format(E_svd_QP) ) + sigma_svd_QP = sigma_svd_QP * np.sign(sigma_svd_QP[0]) + print(' QP svd coeff = {}\n'.format(sigma_svd_QP)) + # compare C_new and C_old + C_new = np.dot( U_svd , np.dot( np.diagflat(sigma_svd_QP) , V_svd.T ) ) + delta_C = 100. * np.linalg.norm(C_old-C_new, ord="fro") / norm_C + print(' Difference between C_old and C_new svd = {} %\n'.format(delta_C)) + # !!! + read_QPpostsvd = 'y' + if( read_QPpostsvd == 'y' ): + Hpostsvd_qp = get_Hpostsvd_QP(Hpostsvd_qp_txt) + E_postsvd_QP, sigma_postsvd_QP = get_Epostsvd_QP(Hpostsvd_qp) + print(' QP postsvd enegry = {}'.format(E_postsvd_QP) ) + U_postSVD, sigma_postsvd_diag, Vt_postSVD = SVD_postsvd(sigma_postsvd_QP) + V_postSVD = Vt_postSVD.T + print(' QP postsvd coeff = {}\n'.format(sigma_postsvd_diag)) + # compare C_new and C_old + C_new = np.dot( U_postSVD , np.dot( np.diag(sigma_postsvd_diag) , Vt_postSVD ) ) + delta_C_m = 100. * np.linalg.norm(C_old-C_new, ord="fro") / norm_C + delta_C_p = 100. * np.linalg.norm(C_old+C_new, ord="fro") / norm_C + delta_C = min(delta_C_m,delta_C_p) + print(' Difference between C_old and C_new postsvd = {} %'.format(delta_C)) + # !!! + # !!! + # ___________________________________________________________________________ + # ___________________________________________________________________________ + # + save_to_EZFIO = '' + # + if( save_to_EZFIO == 'svd'): + ezfio.set_spindeterminants_psi_svd_coefs( sigma_svd_QP ) + direc_svdcoeff = EZFIO_file + '/spindeterminants/psi_svd_coefs.gz' + print(' {} is modified'.format(direc_svdcoeff) ) + # # # + elif( save_to_EZFIO == 'postsvd' ): + U_postSVD_toEZFIO = np.zeros( ( 1, U_postSVD.shape[1], U_postSVD.shape[0] ) ) + V_postSVD_toEZFIO = np.zeros( ( 1, V_postSVD.shape[1], V_postSVD.shape[0] ) ) + U_postSVD_toEZFIO[0,:,:] = U_postSVD.T + V_postSVD_toEZFIO[0,:,:] = V_postSVD.T + # + ezfio.set_spindeterminants_psi_svd_alpha( U_postSVD_toEZFIO ) + ezfio.set_spindeterminants_psi_svd_coefs( sigma_postsvd_diag ) + ezfio.set_spindeterminants_psi_svd_beta( V_postSVD_toEZFIO ) + # + direc_svdcoeff = EZFIO_file + '/spindeterminants/psi_svd_coefs.gz' + direc_svdalpha = EZFIO_file + '/spindeterminants/psi_svd_alpha.gz' + direc_svdbeta = EZFIO_file + '/spindeterminants/psi_svd_beta.gz' + print(' {} is modified'.format(direc_svdcoeff) ) + print(' {} is modified'.format(direc_svdalpha) ) + print(' {} is modified'.format(direc_svdbeta ) ) + else: + pass + # ___________________________________________________________________________ + # ___________________________________________________________________________ + # + print("end after {:.3f} minutes".format((time.time()-t0)/60.) ) + # !!! +# !!! diff --git a/devel/svdwf/svdwf.irp.f b/devel/svdwf/svdwf.irp.f index c694b3e..7348a16 100644 --- a/devel/svdwf/svdwf.irp.f +++ b/devel/svdwf/svdwf.irp.f @@ -42,6 +42,7 @@ subroutine run endif entropy = 0.d0 + k=n_det_beta_unique do i=1,n_det_beta_unique print *, i, real(D(i)), real(D(i)**2), real(sum(D(1:i)**2)) entropy -= D(i) * dlog(D(i)) @@ -53,6 +54,7 @@ subroutine run print *, 'threshold: ', 2.858 * D(k/2) print *, 'Entropy : ', entropy + call ezfio_set_spindeterminants_n_svd_coefs(min(n_det_beta_unique,n_det_alpha_unique)) call ezfio_set_spindeterminants_psi_svd_alpha(U) call ezfio_set_spindeterminants_psi_svd_beta (Vt) call ezfio_set_spindeterminants_psi_svd_coefs(D)