From b0eb4cb55eb05b729fd74f3cf3b716f31a1b704d Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 4 Nov 2024 11:29:41 +0100 Subject: [PATCH] saving work on Dav on Olympe --- src/GW/RGW_ppBSE.f90 | 130 ++++------ src/LR/ppLR_GW_davidson.f90 | 2 +- src/LR/ppLR_davidson.f90 | 471 +++++++++++++++++++++++++++++++++++- 3 files changed, 505 insertions(+), 98 deletions(-) diff --git a/src/GW/RGW_ppBSE.f90 b/src/GW/RGW_ppBSE.f90 index 48bc225..5ea9318 100644 --- a/src/GW/RGW_ppBSE.f90 +++ b/src/GW/RGW_ppBSE.f90 @@ -126,110 +126,74 @@ subroutine RGW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nOrb,nC,nO,nV,nR,nS allocate(Bpp(nVV,nOO),Cpp(nVV,nVV),Dpp(nOO,nOO)) allocate(KB_sta(nVV,nOO),KC_sta(nVV,nVV),KD_sta(nOO,nOO)) - !print*, 'RGW_ppBSE_static_kernel_C:' - !call wall_time(tt1) call RGW_ppBSE_static_kernel_C(ispin,eta,nOrb,nC,nO,nV,nR,nS,nVV,1d0,ERI,OmRPA,rho_RPA,KC_sta) - !call wall_time(tt2) - !write(*,'(A,1X,F10.3)'), 'wall time for RGW_ppBSE_static_kernel_C (sec)', tt2-tt1 - - !print*, 'RGW_ppBSE_static_kernel_D:' - !call wall_time(tt1) call RGW_ppBSE_static_kernel_D(ispin,eta,nOrb,nC,nO,nV,nR,nS,nOO,1d0,ERI,OmRPA,rho_RPA,KD_sta) - !call wall_time(tt2) - !write(*,'(A,1X,F10.3)'), 'wall time for RGW_ppBSE_static_kernel_D (sec)', tt2-tt1 - if(.not.TDA) then - !print*, 'RGW_ppBSE_static_kernel_B:' - !call wall_time(tt1) call RGW_ppBSE_static_kernel_B(ispin,eta,nOrb,nC,nO,nV,nR,nS,nOO,nVV,1d0,ERI,OmRPA,rho_RPA,KB_sta) - !call wall_time(tt2) - !write(*,'(A,1X,F10.3)'), 'wall time for RGW_ppBSE_static_kernel_B (sec)', tt2-tt1 endif - !print*, 'ppLR_C:' - !call wall_time(tt1) call ppLR_C(ispin,nOrb,nC,nO,nV,nR,nVV,1d0,eGW,ERI,Cpp) - !call wall_time(tt2) - !write(*,'(A,1X,F10.3)'), 'wall time for ppLR_C (sec)', tt2-tt1 - - !print*, 'ppLR_D:' - !call wall_time(tt1) call ppLR_D(ispin,nOrb,nC,nO,nV,nR,nOO,1d0,eGW,ERI,Dpp) - !call wall_time(tt2) - !write(*,'(A,1X,F10.3)'), 'wall time for ppLR_D (sec)', tt2-tt1 - if(.not.TDA) then - !print*, 'ppLR_B:' - !call wall_time(tt1) call ppLR_B(ispin,nOrb,nC,nO,nV,nR,nOO,nVV,1d0,ERI,Bpp) - !call wall_time(tt2) - !write(*,'(A,1X,F10.3)'), 'wall time for ppLR_B (sec)', tt2-tt1 endif Bpp(:,:) = Bpp(:,:) + KB_sta(:,:) Cpp(:,:) = Cpp(:,:) + KC_sta(:,:) Dpp(:,:) = Dpp(:,:) + KD_sta(:,:) - !print*, 'ppLR:' - !call wall_time(tt1) call ppLR(TDA,nOO,nVV,Bpp,Cpp,Dpp,Om1,X1,Y1,Om2,X2,Y2,EcBSE(ispin)) - !call wall_time(tt2) - !write(*,'(A,1X,F10.3)'), 'wall time for ppLR (sec)', tt2-tt1 deallocate(Bpp,Cpp,Dpp,KB_sta,KC_sta,KD_sta) -! -! print*, 'LAPACK:' -! print*, Om2 -! print*, Om1 ! --- - ! --- - ! Davidson - ! --- + !! --- + !! Davidson + !! --- -! n_states = nOO + 5 -! n_states_diag = n_states + 4 -! allocate(Om(nOO+nVV), R(nOO+nVV,n_states_diag)) -! -! supp_data_int = 1 -! allocate(supp_data_int(supp_data_int_size)) -! supp_data_int(1) = nS -! -! supp_data_dbl_size = nS + nOrb*nOrb*nS + 1 -! allocate(supp_data_dbl(supp_data_dbl_size)) -! ! scalars -! supp_data_dbl(1) = eta -! i_data = 1 -! ! rho_RPA -! do q = 1, nOrb -! do p = 1, nOrb -! do m = 1, nS -! i_data = i_data + 1 -! supp_data_dbl(i_data) = rho_RPA(p,q,m) -! enddo -! enddo -! enddo -! ! OmRPA -! do m = 1, nS -! i_data = i_data + 1 -! supp_data_dbl(i_data) = OmRPA(m) -! enddo -! -! call ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, & -! 1.d0, & ! lambda -! eGW(1), & -! 0.d0, & ! eF -! ERI(1,1,1,1), & -! supp_data_int(1), supp_data_int_size, & -! supp_data_dbl(1), supp_data_dbl_size, & -! Om(1), R(1,1), n_states, n_states_diag, "GW") -! -! deallocate(Om, R) -! deallocate(supp_data_dbl, supp_data_int) -! stop + !n_states = nOO + 5 + !n_states_diag = n_states + 4 + !allocate(Om(nOO+nVV), R(nOO+nVV,n_states_diag)) + + !supp_data_int_size = 1 + !allocate(supp_data_int(supp_data_int_size)) + !supp_data_int(1) = nS + + !supp_data_dbl_size = nS + nOrb*nOrb*nS + 1 + !allocate(supp_data_dbl(supp_data_dbl_size)) + !! scalars + !supp_data_dbl(1) = eta + !i_data = 1 + !! rho_RPA + !do q = 1, nOrb + ! do p = 1, nOrb + ! do m = 1, nS + ! i_data = i_data + 1 + ! supp_data_dbl(i_data) = rho_RPA(p,q,m) + ! enddo + ! enddo + !enddo + !! OmRPA + !do m = 1, nS + ! i_data = i_data + 1 + ! supp_data_dbl(i_data) = OmRPA(m) + !enddo + + !call ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, & + ! 1.d0, & ! lambda + ! eGW(1), & + ! 0.d0, & ! eF + ! ERI(1,1,1,1), & + ! supp_data_int(1), supp_data_int_size, & + ! supp_data_dbl(1), supp_data_dbl_size, & + ! Om(1), R(1,1), n_states, n_states_diag, "GW", 1) + + !deallocate(Om, R) + !deallocate(supp_data_dbl, supp_data_int) + !stop ! --- @@ -291,10 +255,6 @@ subroutine RGW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nOrb,nC,nO,nV,nR,nS call ppLR(TDA,nOO,nVV,Bpp,Cpp,Dpp,Om1,X1,Y1,Om2,X2,Y2,EcBSE(ispin)) deallocate(Bpp,Cpp,Dpp,KB_sta,KC_sta,KD_sta) - !print*, 'LAPACK:' - !print*, Om2 - !print*, Om1 - ! --- ! Davidson ! --- @@ -303,7 +263,7 @@ subroutine RGW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nOrb,nC,nO,nV,nR,nS !n_states_diag = n_states + 4 !allocate(Om(nOO+nVV), R(nOO+nVV,n_states_diag)) - !supp_data_int = 1 + !supp_data_int_size = 1 !allocate(supp_data_int(supp_data_int_size)) !supp_data_int(1) = nS @@ -334,7 +294,7 @@ subroutine RGW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nOrb,nC,nO,nV,nR,nS ! ERI(1,1,1,1), & ! supp_data_int(1), supp_data_int_size, & ! supp_data_dbl(1), supp_data_dbl_size, & - ! Om(1), R(1,1), n_states, n_states_diag, "GW") + ! Om(1), R(1,1), n_states, n_states_diag, "GW", 1) !deallocate(Om, R) !deallocate(supp_data_dbl, supp_data_int) diff --git a/src/LR/ppLR_GW_davidson.f90 b/src/LR/ppLR_GW_davidson.f90 index a8c1af2..2c8ac12 100644 --- a/src/LR/ppLR_GW_davidson.f90 +++ b/src/LR/ppLR_GW_davidson.f90 @@ -33,7 +33,7 @@ subroutine ppLR_GW_HR_calc(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda, e, eF, call wall_time(t1) - if((nOO+nVV) .le. 20000) then + if((nOO+nVV) .le. 30000) then call ppLR_GW_HR_calc_oneshot(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda, e(1), eF, n_states_diag, & ERI(1,1,1,1), eta, rho(1,1,1), Om(1), U(1,1), W(1,1)) diff --git a/src/LR/ppLR_davidson.f90 b/src/LR/ppLR_davidson.f90 index 6a4d7ed..5bab7db 100644 --- a/src/LR/ppLR_davidson.f90 +++ b/src/LR/ppLR_davidson.f90 @@ -4,7 +4,7 @@ subroutine ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e, eF, ERI, & supp_data_int, supp_data_int_size, & supp_data_dbl, supp_data_dbl_size, & - Om, R, n_states, n_states_diag, kernel) + Om, R, n_states, n_states_diag, kernel, mode_dav) ! ! Extract the low n_states @@ -18,6 +18,54 @@ subroutine ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e, eF, ! (-B.T -D) ! + implicit none + + logical, intent(in) :: TDA + integer, intent(in) :: ispin + integer, intent(in) :: nC, nO, nR, nOrb, nOO, nVV + integer, intent(in) :: n_states ! nb of physical states + integer, intent(in) :: n_states_diag ! nb of states used to get n_states + integer, intent(in) :: supp_data_int_size + integer, intent(in) :: supp_data_dbl_size + integer, intent(in) :: mode_dav + character(len=*), intent(in) :: kernel + double precision, intent(in) :: lambda, eF + double precision, intent(in) :: e(nOrb) + double precision, intent(in) :: ERI(nOrb,nOrb,nOrb,nOrb) + integer, intent(in) :: supp_data_int(supp_data_int_size) + double precision, intent(in) :: supp_data_dbl(supp_data_dbl_size) + double precision, intent(out) :: Om(n_states) + double precision, intent(out) :: R(nOO+nVV,n_states_diag) + + if(mode_dav .eq. 1) then + + call ppLR_davidson_1(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e(1), eF, ERI(1,1,1,1), & + supp_data_int(1), supp_data_int_size, supp_data_dbl(1), supp_data_dbl_size, & + Om(1), R(1,1), n_states, n_states_diag, kernel) + + elseif(mode_dav .eq. 2) then + + call ppLR_davidson_2(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e(1), eF, ERI(1,1,1,1), & + supp_data_int(1), supp_data_int_size, supp_data_dbl(1), supp_data_dbl_size, & + Om(1), R(1,1), n_states, n_states_diag, kernel) + + else + + print*, " unknown Davidson's variant" + stop + + endif + + return +end + +! --- + +subroutine ppLR_davidson_1(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e, eF, ERI, & + supp_data_int, supp_data_int_size, & + supp_data_dbl, supp_data_dbl_size, & + Om, R, n_states, n_states_diag, kernel) + use omp_lib implicit none @@ -43,16 +91,15 @@ subroutine ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e, eF, integer :: shift1, shift2 integer :: i, j, k, l, ab integer :: p, q, mm, i_data, nS - integer :: i_omax(n_states) logical :: converged - character*(16384) :: write_buffer + character(len=6+41*n_states) :: write_buffer double precision :: r1, r2, dtwo_pi double precision :: lambda_tmp - double precision :: to_print(2,n_states) double precision :: mem double precision :: eta double precision :: t1, t2, tt1, tt2 character(len=len(kernel)) :: kernel_name + integer, allocatable :: i_omax(:) double precision, allocatable :: H_diag(:) double precision, allocatable :: W(:,:) double precision, allocatable :: U(:,:) @@ -61,6 +108,7 @@ subroutine ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e, eF, double precision, allocatable :: overlap(:) double precision, allocatable :: S_check(:,:) double precision, allocatable :: rho_tmp(:,:,:), Om_tmp(:) + double precision, allocatable :: to_print(:,:) double precision, external :: u_dot_u @@ -92,7 +140,8 @@ subroutine ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e, eF, write(*,'(A40, A12)') 'Kernel: ', kernel_name - + allocate(i_omax(n_states)) + allocate(to_print(2,n_states)) allocate(H_diag(N)) allocate(U(N,M)) allocate(W(N,M)) @@ -101,7 +150,7 @@ subroutine ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e, eF, allocate(residual_norm(n_states_diag)) mem = 8.d0 * dble(nOrb + nOrb**4 + N*n_states) & - + 8.d0 * dble(supp_data_dbl_size) + 4.d0 * dble(supp_data_int_size) + + 8.d0 * dble(2*supp_data_dbl_size) + 4.d0 * dble(2*supp_data_int_size) write(*,'(A40, F12.4)') 'I/O mem (GB) = ', mem / (1024.d0*1024.d0*1024.d0) @@ -325,7 +374,6 @@ subroutine ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e, eF, W(1,1), size(W, 1), h_vec(1,1), size(h_vec, 1), & 0.d0, W(1,shift2+1), size(W, 1)) - ! check if W1 = U1 h_val !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i, k) & !$OMP SHARED(n_states, n_states_diag, N, shift2, U, h_val, W, H_diag, residual_norm, to_print) @@ -342,13 +390,11 @@ subroutine ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e, eF, enddo !$OMP END DO !$OMP END PARALLEL - !print*, " to_print", to_print if((itertot > 1) .and. (iter == 1)) then continue else - write(*,'(1X, I3, 1X, 100(1X, F16.10, 1X, F12.6))') iter-1, to_print(1:2,1:n_states) - !write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, F16.10, 1X, F16.10))') iter-1, to_print(1:2,1:n_states) + write(*,'(1X, I3, 1X, 10000(1X, F16.10, 1X, F12.6))') iter-1, to_print(1:2,1:n_states) endif !call wall_time(tt2) @@ -361,7 +407,7 @@ subroutine ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e, eF, endif do k = 1, n_states - if(residual_norm(k) > 1.d8) then + if(residual_norm(k) > 1.d10) then print *, 'Davidson failed' stop -1 endif @@ -377,7 +423,8 @@ subroutine ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e, eF, call dgemm('N', 'N', N, n_states_diag, shift2, 1.d0, & W(1,1), size(W, 1), h_vec(1,1), size(h_vec, 1), & - 0.d0, R, size(R, 1)) + 0.d0, R(1,1), size(R, 1)) + do k = 1, n_states_diag do i = 1, N W(i,k) = R(i,k) @@ -427,6 +474,8 @@ subroutine ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e, eF, print*, k, Om(k) enddo + deallocate(i_omax) + deallocate(to_print) deallocate(H_diag) deallocate(U) deallocate(W) @@ -449,3 +498,401 @@ end ! --- +subroutine ppLR_davidson_2(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e, eF, ERI, & + supp_data_int, supp_data_int_size, & + supp_data_dbl, supp_data_dbl_size, & + Om, R, n_states, n_states_diag, kernel) + + use omp_lib + + implicit none + + logical, intent(in) :: TDA + integer, intent(in) :: ispin + integer, intent(in) :: nC, nO, nR, nOrb, nOO, nVV + integer, intent(in) :: n_states ! nb of physical states + integer, intent(in) :: n_states_diag ! nb of states used to get n_states + integer, intent(in) :: supp_data_int_size + integer, intent(in) :: supp_data_dbl_size + character(len=*), intent(in) :: kernel + double precision, intent(in) :: lambda, eF + double precision, intent(in) :: e(nOrb) + double precision, intent(in) :: ERI(nOrb,nOrb,nOrb,nOrb) + integer, intent(in) :: supp_data_int(supp_data_int_size) + double precision, intent(in) :: supp_data_dbl(supp_data_dbl_size) + double precision, intent(out) :: Om(n_states) + double precision, intent(out) :: R(nOO+nVV,n_states_diag) + + integer :: N, M, num_threads, n_states_delta + integer :: it_start, it_delta, it_size + integer :: iter, itermax, itertot + integer :: i, j, k, l, ab + integer :: p, q, mm, i_data, nS + logical :: converged + double precision :: r1, r2, dtwo_pi + double precision :: mem + double precision :: eta + double precision :: t1, t2, tt1, tt2 + character(len=len(kernel)) :: kernel_name + integer, allocatable :: i_omax(:) + character(len=:), allocatable :: write_buffer + double precision, allocatable :: to_print(:,:) + double precision, allocatable :: H_diag(:) + double precision, allocatable :: W0(:,:), W1(:,:) + double precision, allocatable :: U0(:,:), U1(:,:) + double precision, allocatable :: h(:,:), h_vec(:,:), h_val(:) + double precision, allocatable :: residual_norm(:) + double precision, allocatable :: rho_tmp(:,:,:), Om_tmp(:) + + double precision, external :: u_dot_u + + call wall_time(t1) + + dtwo_pi = 6.283185307179586d0 + + N = nOO + nVV + + n_states_delta = min(max(25, n_states_diag/2), n_states_diag) + itermax = 8 + M = n_states_diag + itermax * n_states_delta + + call lower_case(trim(kernel), kernel_name) + + if(M .ge. N) then + print*, 'N = ', N + print*, 'M = ', M + print*, ' use Lapack or decrease n_states and/or itermax ' + stop + endif + + write(6,'(A)') '' + write(6,'(A)') 'Davidson Diagonalization' + write(6,'(A)') '------------------------' + write(6,'(A)') '' + + write(*,'(A40, I12)') 'Number of states = ', n_states + write(*,'(A40, I12)') 'Number of states in diag = ', n_states_diag + write(*,'(A40, I12)') 'Number of states to add = ', n_states_delta + write(*,'(A40, I12)') 'Number of basis functions = ', N + write(*,'(A40, A12)') 'Kernel: ', kernel_name + + + + allocate(character(len=50*n_states) :: write_buffer) + allocate(i_omax(n_states)) + allocate(to_print(2,n_states)) + allocate(H_diag(N)) + allocate(U0(N,M), U1(N,n_states_diag)) + allocate(W0(N,M), W1(N,n_states_diag)) + allocate(h(M,M), h_vec(M,M), h_val(M)) + allocate(residual_norm(n_states_diag)) + + mem = 8.d0 * dble(nOrb) + 8.d0 * dble(nOrb)**4 + 8.d0 * dble(N*n_states) & + + 8.d0 * dble(2*supp_data_dbl_size) + 4.d0 * dble(2*supp_data_int_size) + + write(*,'(A40, F12.4)') 'I/O mem (GB) = ', mem / (1024.d0*1024.d0*1024.d0) + + mem = 8.d0 * dble(N) & + + 8.d0 * dble(N*M) & + + 8.d0 * dble(N*M) & + + 8.d0 * dble(N*n_states_diag) & + + 8.d0 * dble(N*n_states_diag) & + + 8.d0 * dble(M*M) & + + 8.d0 * dble(M*M) & + + 8.d0 * dble(M) & + + 8.d0 * dble(n_states_diag) & + + 1.d0 * dble(50*n_states) + + write(*,'(A40, F12.4)') 'tmp mem (GB) = ', mem / (1024.d0*1024.d0*1024.d0) + + num_threads = omp_get_max_threads() + write(*,'(A40, I12)') 'Number of threads = ', num_threads + + + if(kernel_name .eq. "rpa") then + + call ppLR_RPA_H_diag(ispin, nOrb, nC, nO, nR, nOO, nVV, lambda, e(1), eF, & + ERI(1,1,1,1), H_diag(1)) + + elseif(kernel_name .eq. "gw") then + + nS = supp_data_int(1) + + allocate(rho_tmp(nS,nOrb,nOrb)) + allocate(Om_tmp(nS)) + + eta = supp_data_dbl(1) + i_data = 1 + do q = 1, nOrb + do p = 1, nOrb + do mm = 1, nS + i_data = i_data + 1 + rho_tmp(mm,p,q) = supp_data_dbl(i_data) + enddo + enddo + enddo + do mm = 1, nS + i_data = i_data + 1 + Om_tmp(mm) = supp_data_dbl(i_data) + enddo + + call ppLR_GW_H_diag(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda, e(1), eF, & + ERI(1,1,1,1), eta, rho_tmp(1,1,1), Om_tmp(1), H_diag(1)) + + !! TODO + !elseif(kernel_name .eq. "gf2") then + + else + + print*, ' kernel not supported', kernel + stop + + endif + + U0 = 0.d0 + W0 = 0.d0 + U1 = 0.d0 + W1 = 0.d0 + + ! TODO: improve guess + ! initialize guess + R = 0.d0 + do k = 1, n_states + R(k,k) = 1.d0 + enddo + do k = n_states+1, n_states_diag + do i = 1, N + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + R(i,k) = r1*dcos(r2) + enddo + R(k,k) = R(k,k) + 10.d0 + call normalize(R(1,k), N) + enddo + + do k = 1, n_states_diag + U0(:,k) = R(:,k) + enddo + + + write(6,'(A)') '' + write_buffer = '=====' + do i = 1, n_states + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*n_states) + write_buffer = 'Iter' + do i = 1, n_states + write_buffer = trim(write_buffer)//' Energy Residual ' + enddo + write(6,'(A)') write_buffer(1:6+41*n_states) + write_buffer = '=====' + do i = 1, n_states + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*n_states) + + + converged = .False. + itertot = 0 + + do while (.not.converged) + + itertot = itertot + 1 + if(itertot == itermax) then + print*, 'exit before convergence !' + print*, 'itertot == itermax', itertot + exit + endif + + do iter = 1, itermax-1 + + if(iter .eq. 1) then + it_start = 0 + it_delta = n_states_diag + else + it_start = n_states_diag + n_states_delta * (iter - 2) + it_delta = n_states_delta + endif + + it_size = it_start + it_delta + + if((iter > 1) .or. (itertot == 1)) then + + !call wall_time(tt1) + + call ortho_qr(U0(1,1), size(U0, 1), N, it_size) + + if(kernel_name .eq. "rpa") then + + call ppLR_RPA_HR_calc(ispin, nOrb, nC, nO, nR, nOO, nVV, lambda, e(1), eF, it_delta, & + ERI(1,1,1,1), & + U0(1,it_start+1), W0(1,it_start+1)) + + elseif(kernel_name .eq. "gw") then + + call ppLR_GW_HR_calc(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda, e(1), eF, it_delta, & + ERI(1,1,1,1), eta, rho_tmp(1,1,1), Om_tmp(1), & + U0(1,it_start+1), W0(1,it_start+1)) + + !! TODO + !elseif(kernel_name .eq. "gf2") then + + endif + + else + + ! computed below + continue + endif + + ! h = U0.T H U0 + call dgemm('T', 'N', it_size, it_size, N, 1.d0, & + U0(1,1), size(U0, 1), W0(1,1), size(W0, 1), & + 0.d0, h(1,1), size(h, 1)) + + ! h h_vec = h_val h_vec + call diag_nonsym_right(it_size, h(1,1), size(h, 1), h_vec(1,1), size(h_vec, 1), h_val(1), size(h_val, 1)) + + ! U1(:,1:it_delta) = U0 h_vec(:,1:it_delta) + call dgemm('N', 'N', N, it_delta, it_size, 1.d0, & + U0(1,1), size(U0, 1), h_vec(1,1), size(h_vec, 1), & + 0.d0, U1(1,1), size(U1, 1)) + + do k = 1, it_delta + call normalize(U1(1,k), N) + enddo + + ! W1(:,1:it_delta) = W0 h_vec(:,1:it_delta) + call dgemm('N', 'N', N, it_delta, it_size, 1.d0, & + W0(1,1), size(W0, 1), h_vec(1,1), size(h_vec, 1), & + 0.d0, W1(1,1), size(W1, 1)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, k) & + !$OMP SHARED(n_states, it_size, it_delta, N, U0, U1, & + !$OMP h_val, W1, H_diag, residual_norm, to_print) + !$OMP DO + do k = 1, it_delta + do i = 1, N + U1(i,k) = (h_val(k) * U1(i,k) - W1(i,k)) / max(dabs(H_diag(i) - h_val(k)), 1.d-2) + U0(i,it_size+k) = U1(i,k) + enddo + if(k <= n_states) then + residual_norm(k) = u_dot_u(U1(1,k), N) + to_print(1,k) = h_val(k) + to_print(2,k) = residual_norm(k) + endif + enddo + !$OMP END DO + !$OMP END PARALLEL + + + if((itertot > 1) .and. (iter == 1)) then + continue + else + write(*,'(1X, I3, 1X, 10000(1X, F16.10, 1X, F12.6))') iter-1, to_print(1:2,1:n_states) + endif + + !call wall_time(tt2) + !write(*,'(A50, F12.4)') 'wall time for one Davidson iteration (sec): ', tt2-tt1 + !stop + + if(iter > 1) then + converged = dabs(maxval(residual_norm(1:n_states))) < 1d-15 + endif + + do k = 1, n_states + if(residual_norm(k) > 1.d10) then + print *, 'Davidson failed' + stop -1 + endif + enddo + + if(converged) exit + + enddo ! loop over iter + + + ! Re-contract U0 and update W0 + ! -------------------------------- + + call dgemm('N', 'N', N, n_states_diag, it_size, 1.d0, & + W0(1,1), size(W0, 1), h_vec(1,1), size(h_vec, 1), & + 0.d0, R(1,1), size(R, 1)) + + do k = 1, n_states_diag + do i = 1, N + W0(i,k) = R(i,k) + enddo + enddo + + call dgemm('N', 'N', N, n_states_diag, it_size, 1.d0, & + U0(1,1), size(U0, 1), h_vec(1,1), size(h_vec, 1), & + 0.d0, R(1,1), size(R, 1)) + + do k = 1, n_states_diag + do i = 1, N + U0(i,k) = R(i,k) + enddo + enddo + + call ortho_qr(U0(1,1), size(U0, 1), N, n_states_diag) + + do j = 1, n_states_diag + k = 1 + do while((k < N) .and. (U0(k,j) == 0.d0)) + k = k+1 + enddo + if(U0(k,j) * R(k,j) < 0.d0) then + do i = 1, N + W0(i,j) = -W0(i,j) + enddo + endif + enddo + + enddo ! loop over while + + ! --- + + write_buffer = '=====' + do i = 1, n_states + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') trim(write_buffer) + write(6,'(A)') '' + + + print*, " Davidson eigenvalues" + do k = 1, n_states + Om(k) = h_val(k) + print*, k, Om(k) + enddo + + deallocate(write_buffer) + deallocate(i_omax) + deallocate(to_print) + deallocate(H_diag) + deallocate(U0, U1) + deallocate(W0, W1) + deallocate(h) + deallocate(h_vec) + deallocate(h_val) + deallocate(residual_norm) + + if(kernel_name .eq. "gw") then + deallocate(rho_tmp) + deallocate(Om_tmp) + endif + + call wall_time(t2) + write(*,'(A50, F12.4)') 'total wall time for Davidson (sec): ', t2-t1 + + return +end + +! --- +