1
0
mirror of https://gitlab.com/scemama/qp_plugins_scemama.git synced 2024-06-02 11:25:23 +02:00
qp_plugins_scemama/devel/ccsd_gpu/ccsd_space_orb_sub.irp.f
2023-08-04 16:19:41 +02:00

411 lines
11 KiB
Fortran

subroutine run_ccsd_space_orb
use gpu_module
implicit none
integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d
integer :: u,v,gam,beta,tmp_gam,tmp_beta
integer :: nb_iter
double precision :: get_two_e_integral
double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb
logical :: not_converged
double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:), tau_x(:,:,:,:)
double precision, allocatable :: t1(:,:), r1(:,:)
double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:)
double precision, allocatable :: all_err(:,:), all_t(:,:)
integer, allocatable :: list_occ(:), list_vir(:)
integer(bit_kind) :: det(N_int,2)
integer :: nO, nV, nOa, nVa
if (do_ao_cholesky) then
PROVIDE cholesky_mo_transp
FREE cholesky_ao
else
PROVIDE mo_two_e_integrals_in_map
endif
det = psi_det(:,:,cc_ref)
print*,'Reference determinant:'
call print_det(det,N_int)
nOa = cc_nOa
nVa = cc_nVa
! Check that the reference is a closed shell determinant
if (cc_ref_is_open_shell) then
call abort
endif
! Number of occ/vir spatial orb
nO = nOa
nV = nVa
allocate(list_occ(nO),list_vir(nV))
list_occ = cc_list_occ
list_vir = cc_list_vir
! Debug
!call extract_list_orb_space(det,nO,nV,list_occ,list_vir)
!print*,'occ',list_occ
!print*,'vir',list_vir
allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV))
allocate(tau(nO,nO,nV,nV))
allocate(tau_x(nO,nO,nV,nV))
allocate(t1(nO,nV), r1(nO,nV))
allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO))
if (cc_update_method == 'diis') then
double precision :: rss, diis_mem, extra_mem
double precision, external :: memory_of_double
diis_mem = 2.d0*memory_of_double(nO*nV)*(1.d0+nO*nV)
call resident_memory(rss)
do while (cc_diis_depth > 1)
if (rss + diis_mem * cc_diis_depth > qp_max_mem) then
cc_diis_depth = cc_diis_depth - 1
else
exit
endif
end do
if (cc_diis_depth <= 1) then
print *, 'Not enough memory for DIIS'
stop -1
endif
print *, 'DIIS size ', cc_diis_depth
allocate(all_err(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth), all_t(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth))
!$OMP PARALLEL PRIVATE(i,j) DEFAULT(SHARED)
do j=1,cc_diis_depth
!$OMP DO
do i=1, size(all_err,1)
all_err(i,j) = 0d0
all_t(i,j) = 0d0
enddo
!$OMP END DO NOWAIT
enddo
!$OMP END PARALLEL
endif
if (elec_alpha_num /= elec_beta_num) then
print*, 'Only for closed shell systems'
print*, 'elec_alpha_num=',elec_alpha_num
print*, 'elec_beta_num =',elec_beta_num
print*, 'abort'
call abort
endif
! Init
call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1)
call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2)
call update_tau_space(nO,nV,t1,t2,tau)
call update_tau_x_space(nO,nV,tau,tau_x)
!print*,'hf_energy', hf_energy
call det_energy(det,uncorr_energy)
print*,'Det energy', uncorr_energy
call ccsd_energy_space_x(nO,nV,tau_x,t1,energy)
print*,'Guess energy', uncorr_energy+energy, energy
nb_iter = 0
not_converged = .True.
max_r1 = 0d0
max_r2 = 0d0
write(*,'(A77)') ' -----------------------------------------------------------------------------'
write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |'
write(*,'(A77)') ' -----------------------------------------------------------------------------'
call wall_time(ta)
type(c_ptr) :: gpu_data
gpu_data = gpu_init(nO, nV, cholesky_mo_num, &
cc_space_v_oo_chol, cc_space_v_ov_chol, cc_space_v_vo_chol, cc_space_v_vv_chol, &
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_voov, cc_space_v_oovv, cc_space_v_vvoo, &
cc_space_v_oovo, cc_space_v_ovvo, cc_space_v_ovov, cc_space_v_ovoo, &
cc_space_f_oo, cc_space_f_ov, cc_space_f_vo, cc_space_f_vv)
if (.not.do_ao_cholesky) then
print *, 'ao_choleky is required'
stop -1
endif
do while (not_converged)
! Residue
call gpu_upload(gpu_data, nO, nV, t1, t2, tau, tau_x);
!$OMP PARALLEL SECTIONS
!$OMP SECTION
call compute_H_oo_chol_gpu(gpu_data,0)
!$OMP SECTION
call compute_H_vo_chol_gpu(gpu_data,1)
!$OMP SECTION
call compute_H_vv_chol_gpu(gpu_data,2)
!$OMP END PARALLEL SECTIONS
call compute_r1_space_chol_gpu(gpu_data, nO, nV, t1, r1, max_r1)
call compute_r2_space_chol_gpu(gpu_data, nO, nV, t1, r2, max_r2)
max_r = max(max_r1,max_r2)
! Update
if (cc_update_method == 'diis') then
!call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
!call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1,r2,t1,t2,all_err,all_t)
! Standard update as T = T - Delta
elseif (cc_update_method == 'none') then
call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1)
call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2)
else
print*,'Unkown cc_method_method: '//cc_update_method
endif
call update_tau_space(nO,nV,t1,t2,tau)
call update_tau_x_space(nO,nV,tau,tau_x)
! Energy
call ccsd_energy_space_x(nO,nV,tau_x,t1,energy)
write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |'
nb_iter = nb_iter + 1
if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then
not_converged = .False.
endif
enddo
write(*,'(A77)') ' -----------------------------------------------------------------------------'
call wall_time(tb)
print*,'Time: ',tb-ta, ' s'
print*,''
if (max_r < cc_thresh_conv) then
write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations'
else
write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations'
endif
print*,''
write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha'
write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha'
write(*,'(A15,ES10.2,A3)')' Conv = ', max_r
print*,''
if (write_amplitudes) then
call write_t1(nO,nV,t1)
call write_t2(nO,nV,t2)
call ezfio_set_utils_cc_io_amplitudes('Read')
endif
! Deallocation
if (cc_update_method == 'diis') then
deallocate(all_err,all_t)
endif
deallocate(r1,r2,tau)
! CCSD(T)
double precision :: e_t
e_t = 0.d0
if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then
! Dumb way
!call wall_time(ta)
!call ccsd_par_t_space(nO,nV,t1,t2,e_t)
!call wall_time(tb)
!print*,'Time: ',tb-ta, ' s'
!print*,''
!write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha'
!write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha'
!write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha'
!print*,''
! New
print*,'Computing (T) correction...'
call wall_time(ta)
! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v &
! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t)
e_t = uncorr_energy + energy ! For print in next call
call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v &
,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t)
call wall_time(tb)
print*,'Time: ',tb-ta, ' s'
print*,''
write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha'
write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha'
write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha'
print*,''
endif
call save_energy(uncorr_energy + energy, e_t)
deallocate(t1,t2)
end
! Energy
subroutine ccsd_energy_space(nO,nV,tau,t1,energy)
implicit none
integer, intent(in) :: nO, nV
double precision, intent(in) :: tau(nO,nO,nV,nV)
double precision, intent(in) :: t1(nO,nV)
double precision, intent(out) :: energy
! internal
integer :: i,j,a,b
double precision :: e
energy = 0d0
!$omp parallel &
!$omp shared(nO,nV,energy,tau,t1,&
!$omp cc_space_f_vo,cc_space_w_oovv) &
!$omp private(i,j,a,b,e) &
!$omp default(none)
e = 0d0
!$omp do
do a = 1, nV
do i = 1, nO
e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a)
enddo
enddo
!$omp end do nowait
!$omp do
do b = 1, nV
do a = 1, nV
do j = 1, nO
do i = 1, nO
e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b)
enddo
enddo
enddo
enddo
!$omp end do nowait
!$omp critical
energy = energy + e
!$omp end critical
!$omp end parallel
end
subroutine ccsd_energy_space_x(nO,nV,tau_x,t1,energy)
implicit none
integer, intent(in) :: nO, nV
double precision, intent(in) :: tau_x(nO,nO,nV,nV)
double precision, intent(in) :: t1(nO,nV)
double precision, intent(out) :: energy
! internal
integer :: i,j,a,b
double precision :: e
energy = 0d0
!$omp parallel &
!$omp shared(nO,nV,energy,tau_x,t1,&
!$omp cc_space_f_vo,cc_space_v_oovv) &
!$omp private(i,j,a,b,e) &
!$omp default(none)
e = 0d0
!$omp do
do a = 1, nV
do i = 1, nO
e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a)
enddo
enddo
!$omp end do nowait
!$omp do
do b = 1, nV
do a = 1, nV
do j = 1, nO
do i = 1, nO
e = e + tau_x(i,j,a,b) * cc_space_v_oovv(i,j,a,b)
enddo
enddo
enddo
enddo
!$omp end do nowait
!$omp critical
energy = energy + e
!$omp end critical
!$omp end parallel
end
! Tau
subroutine update_tau_space(nO,nV,t1,t2,tau)
implicit none
! in
integer, intent(in) :: nO, nV
double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV)
! out
double precision, intent(out) :: tau(nO,nO,nV,nV)
! internal
integer :: i,j,a,b
!$OMP PARALLEL &
!$OMP SHARED(nO,nV,tau,t2,t1) &
!$OMP PRIVATE(i,j,a,b) &
!$OMP DEFAULT(NONE)
!$OMP DO
do b = 1, nV
do a = 1, nV
do j = 1, nO
do i = 1, nO
tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b)
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
end
subroutine update_tau_x_space(nO,nV,tau,tau_x)
implicit none
! in
integer, intent(in) :: nO, nV
double precision, intent(in) :: tau(nO,nO,nV,nV)
! out
double precision, intent(out) :: tau_x(nO,nO,nV,nV)
! internal
integer :: i,j,a,b
!$OMP PARALLEL &
!$OMP SHARED(nO,nV,tau,tau_x) &
!$OMP PRIVATE(i,j,a,b) &
!$OMP DEFAULT(NONE)
!$OMP DO
do b = 1, nV
do a = 1, nV
do j = 1, nO
do i = 1, nO
tau_x(i,j,a,b) = 2.d0*tau(i,j,a,b) - tau(i,j,b,a)
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
end