mirror of
https://github.com/QuantumPackage/qp2.git
synced 2025-04-25 17:54:44 +02:00
Introduced GPU arrays
This commit is contained in:
parent
3d46cde2e4
commit
243ee0ed14
@ -177,9 +177,11 @@ subroutine run_ccsd_space_orb
|
||||
call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,h_t2)
|
||||
call gpu_upload(h_t2, t2)
|
||||
|
||||
deallocate(h_t1, h_t2)
|
||||
|
||||
call update_tau_space(nO,nV,h_t1,t1,t2,tau)
|
||||
call update_tau_space(nO,nV,t1%f,t1,t2,tau)
|
||||
call update_tau_x_space(nO,nV,tau,tau_x)
|
||||
|
||||
call det_energy(psi_det(1,1,cc_ref),uncorr_energy)
|
||||
print*,'Det energy', uncorr_energy
|
||||
|
||||
@ -310,7 +312,6 @@ subroutine run_ccsd_space_orb
|
||||
|
||||
call save_energy(uncorr_energy + energy, e_t)
|
||||
|
||||
deallocate(h_t1, h_t2)
|
||||
if (do_mo_cholesky) then
|
||||
call gpu_deallocate(d_cc_space_v_oo_chol)
|
||||
call gpu_deallocate(d_cc_space_v_ov_chol)
|
||||
|
@ -1,6 +1,5 @@
|
||||
! Code
|
||||
|
||||
subroutine run_ccsd_spin_orb
|
||||
use gpu
|
||||
|
||||
implicit none
|
||||
|
||||
@ -8,8 +7,6 @@ subroutine run_ccsd_spin_orb
|
||||
! CCSD in spin orbitals
|
||||
END_DOC
|
||||
|
||||
double precision, allocatable :: t1(:,:), t2(:,:,:,:), tau(:,:,:,:), tau_t(:,:,:,:)
|
||||
double precision, allocatable :: r1(:,:), r2(:,:,:,:)
|
||||
double precision, allocatable :: cF_oo(:,:), cF_ov(:,:), cF_vv(:,:)
|
||||
double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:) !, cW_vvvv(:,:,:,:)
|
||||
|
||||
@ -23,6 +20,9 @@ subroutine run_ccsd_spin_orb
|
||||
integer :: nb_iter, i,j,a,b
|
||||
double precision :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi
|
||||
|
||||
type(gpu_double4) :: t2, r2, tau, tau_t
|
||||
type(gpu_double2) :: t1, r1
|
||||
|
||||
if (do_mo_cholesky) then
|
||||
PROVIDE cholesky_mo_transp
|
||||
FREE cholesky_ao
|
||||
@ -34,13 +34,18 @@ subroutine run_ccsd_spin_orb
|
||||
call print_det(psi_det(1,1,cc_ref),N_int)
|
||||
|
||||
! Allocation
|
||||
allocate(t1(cc_nOab,cc_nVab), t2(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau_t(cc_nOab,cc_nOab,cc_nVab,cc_nVab))
|
||||
allocate(r1(cc_nOab,cc_nVab), r2(cc_nOab,cc_nOab,cc_nVab,cc_nVab))
|
||||
allocate(cF_oo(cc_nOab,cc_nOab), cF_ov(cc_nOab,cc_nVab), cF_vv(cc_nVab,cc_nVab))
|
||||
allocate(cW_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab), cW_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab))!, cW_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab))
|
||||
|
||||
allocate(f_o(cc_nOab), f_v(cc_nVab))
|
||||
|
||||
call gpu_allocate(t1, cc_nOab,cc_nVab)
|
||||
call gpu_allocate(r1, cc_nOab,cc_nVab)
|
||||
call gpu_allocate(t2, cc_nOab,cc_nOab,cc_nVab,cc_nVab)
|
||||
call gpu_allocate(r2, cc_nOab,cc_nOab,cc_nVab,cc_nVab)
|
||||
call gpu_allocate(tau, cc_nOab,cc_nOab,cc_nVab,cc_nVab)
|
||||
call gpu_allocate(tau_t, cc_nOab,cc_nOab,cc_nVab,cc_nVab)
|
||||
|
||||
! Allocation for the diis
|
||||
if (cc_update_method == 'diis') then
|
||||
allocate(all_err(cc_nOab*cc_nVab+cc_nOab*cc_nOab*cc_nVab*cc_nVab,cc_diis_depth), all_t(cc_nOab*cc_nVab+cc_nOab*cc_nOab*cc_nVab*cc_nVab,cc_diis_depth))
|
||||
@ -58,23 +63,29 @@ subroutine run_ccsd_spin_orb
|
||||
|
||||
|
||||
! Init of T
|
||||
t1 = 0d0
|
||||
call guess_t1(cc_nOab,cc_nVab,f_o,f_v,cc_spin_f_ov,t1)
|
||||
call guess_t2(cc_nOab,cc_nVab,f_o,f_v,cc_spin_v_oovv,t2)
|
||||
call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau)
|
||||
call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t)
|
||||
double precision, allocatable :: h_t1(:,:), h_t2(:,:,:,:)
|
||||
allocate(h_t1(cc_nOab,cc_nVab), h_t2(cc_nOab,cc_nOab,cc_nVab,cc_nVab))
|
||||
h_t1 = 0d0
|
||||
call guess_t1(cc_nOab,cc_nVab,f_o,f_v,cc_spin_f_ov,h_t1)
|
||||
call gpu_upload(h_t1, t1)
|
||||
|
||||
call guess_t2(cc_nOab,cc_nVab,f_o,f_v,cc_spin_v_oovv,h_t2)
|
||||
call gpu_upload(h_t2, t2)
|
||||
|
||||
deallocate(h_t1,h_t2)
|
||||
|
||||
call compute_tau_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau%f)
|
||||
call compute_tau_t_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau_t%f)
|
||||
|
||||
call det_energy(psi_det(1,1,cc_ref),uncorr_energy)
|
||||
print*,'Det energy', uncorr_energy
|
||||
|
||||
call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,cc_spin_v_oovv,energy)
|
||||
call ccsd_energy_spin(cc_nOab,cc_nVab,t1%f,t2%f,cc_spin_F_ov,cc_spin_v_oovv,energy)
|
||||
print*,'guess energy', uncorr_energy+energy, energy
|
||||
|
||||
! Loop init
|
||||
nb_iter = 0
|
||||
not_converged = .True.
|
||||
r1 = 0d0
|
||||
r2 = 0d0
|
||||
max_r1 = 0d0
|
||||
max_r2 = 0d0
|
||||
|
||||
@ -88,46 +99,46 @@ subroutine run_ccsd_spin_orb
|
||||
do while (not_converged)
|
||||
|
||||
! Intermediates
|
||||
call compute_cF_oo(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_oo,cc_spin_F_ov,cc_spin_v_ooov,cc_spin_v_oovv,cF_oo)
|
||||
call compute_cF_ov(cc_nOab,cc_nVab,t1,cc_spin_F_ov,cc_spin_v_oovv,cF_ov)
|
||||
call compute_cF_vv(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_ov,cc_spin_F_vv,cc_spin_v_oovv,cF_vv)
|
||||
call compute_cF_oo(cc_nOab,cc_nVab,t1%f,tau_t%f,cc_spin_F_oo,cc_spin_F_ov,cc_spin_v_ooov,cc_spin_v_oovv,cF_oo)
|
||||
call compute_cF_ov(cc_nOab,cc_nVab,t1%f,cc_spin_F_ov,cc_spin_v_oovv,cF_ov)
|
||||
call compute_cF_vv(cc_nOab,cc_nVab,t1%f,tau_t%f,cc_spin_F_ov,cc_spin_F_vv,cc_spin_v_oovv,cF_vv)
|
||||
|
||||
call compute_cW_oooo(cc_nOab,cc_nVab,t1,t2,tau,cc_spin_v_oooo,cc_spin_v_ooov,cc_spin_v_oovv,cW_oooo)
|
||||
call compute_cW_ovvo(cc_nOab,cc_nVab,t1,t2,tau,cc_spin_v_ovvo,cc_spin_v_oovo,cc_spin_v_oovv,cW_ovvo)
|
||||
call compute_cW_oooo(cc_nOab,cc_nVab,t1%f,t2%f,tau%f,cc_spin_v_oooo,cc_spin_v_ooov,cc_spin_v_oovv,cW_oooo)
|
||||
call compute_cW_ovvo(cc_nOab,cc_nVab,t1%f,t2%f,tau%f,cc_spin_v_ovvo,cc_spin_v_oovo,cc_spin_v_oovv,cW_ovvo)
|
||||
|
||||
! Residuals
|
||||
call compute_r1_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,cc_spin_v_oovo,cc_spin_v_ovov,r1)
|
||||
call compute_r2_spin(cc_nOab,cc_nVab,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,cc_spin_v_ovoo,cc_spin_v_oovv,cc_spin_v_ovvo,r2)
|
||||
call compute_r1_spin(cc_nOab,cc_nVab,t1%f,t2%f,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,cc_spin_v_oovo,cc_spin_v_ovov,r1%f)
|
||||
call compute_r2_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau%f,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,cc_spin_v_ovoo,cc_spin_v_oovv,cc_spin_v_ovvo,r2%f)
|
||||
|
||||
! Max elements in the residuals
|
||||
max_r1 = maxval(abs(r1(:,:)))
|
||||
max_r2 = maxval(abs(r2(:,:,:,:)))
|
||||
max_r1 = maxval(abs(r1%f(:,:)))
|
||||
max_r2 = maxval(abs(r2%f(:,:,:,:)))
|
||||
max_r = max(max_r1,max_r2)
|
||||
|
||||
call wall_time(ti)
|
||||
! Update
|
||||
if (cc_update_method == 'diis') then
|
||||
!call update_t_ccsd(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
|
||||
!call update_t_ccsd_diis(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
|
||||
call update_t_ccsd_diis_v3(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t)
|
||||
!call update_t_ccsd(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1%f,r2%f,t1%f,t2%f,all_err1,all_err2,all_t1%f,all_t2)
|
||||
!call update_t_ccsd_diis(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1%f,r2%f,t1%f,t2%f,all_err1,all_err2,all_t1%f,all_t2)
|
||||
call update_t_ccsd_diis_v3(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1%f,r2%f,t1%f,t2%f,all_err,all_t)
|
||||
|
||||
! Standard update as T = T - Delta
|
||||
elseif (cc_update_method == 'none') then
|
||||
call update_t1(cc_nOab,cc_nVab,f_o,f_v,r1,t1)
|
||||
call update_t2(cc_nOab,cc_nVab,f_o,f_v,r2,t2)
|
||||
call update_t1(cc_nOab,cc_nVab,f_o,f_v,r1%f,t1%f)
|
||||
call update_t2(cc_nOab,cc_nVab,f_o,f_v,r2%f,t2%f)
|
||||
else
|
||||
print*,'Unkonw cc_method_method: '//cc_update_method
|
||||
endif
|
||||
|
||||
call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau)
|
||||
call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t)
|
||||
call compute_tau_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau%f)
|
||||
call compute_tau_t_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau_t%f)
|
||||
call wall_time(tf)
|
||||
if (cc_dev) then
|
||||
print*,'Update:',tf-ti,'s'
|
||||
endif
|
||||
|
||||
! Print
|
||||
call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,cc_spin_v_oovv,energy)
|
||||
call ccsd_energy_spin(cc_nOab,cc_nVab,t1%f,t2%f,cc_spin_F_ov,cc_spin_v_oovv,energy)
|
||||
call wall_time(tfi)
|
||||
|
||||
write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', &
|
||||
@ -159,8 +170,8 @@ subroutine run_ccsd_spin_orb
|
||||
print*,''
|
||||
|
||||
if (write_amplitudes) then
|
||||
call write_t1(cc_nOab,cc_nVab,t1)
|
||||
call write_t2(cc_nOab,cc_nVab,t2)
|
||||
call write_t1(cc_nOab,cc_nVab,t1%f)
|
||||
call write_t2(cc_nOab,cc_nVab,t2%f)
|
||||
call ezfio_set_utils_cc_io_amplitudes('Read')
|
||||
endif
|
||||
|
||||
@ -168,8 +179,6 @@ subroutine run_ccsd_spin_orb
|
||||
if (cc_update_method == 'diis') then
|
||||
deallocate(all_err,all_t)
|
||||
endif
|
||||
deallocate(tau,tau_t)
|
||||
deallocate(r1,r2)
|
||||
deallocate(cF_oo,cF_ov,cF_vv)
|
||||
deallocate(cW_oooo,cW_ovvo)!,cW_vvvv)
|
||||
|
||||
@ -178,7 +187,7 @@ subroutine run_ccsd_spin_orb
|
||||
if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then
|
||||
print*,'CCSD(T) calculation...'
|
||||
call wall_time(ta)
|
||||
call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,cc_spin_v_ooov,cc_spin_v_vvoo,t_corr)
|
||||
call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1%f,t2%f,f_o,f_v,cc_spin_f_ov,cc_spin_v_ooov,cc_spin_v_vvoo,t_corr)
|
||||
call wall_time(tb)
|
||||
print*,'Done'
|
||||
print*,'Time: ',tb-ta, ' s'
|
||||
@ -192,7 +201,13 @@ subroutine run_ccsd_spin_orb
|
||||
call save_energy(uncorr_energy + energy, t_corr)
|
||||
|
||||
deallocate(f_o,f_v)
|
||||
deallocate(t1,t2)
|
||||
|
||||
call gpu_deallocate(t1)
|
||||
call gpu_deallocate(r1)
|
||||
call gpu_deallocate(t2)
|
||||
call gpu_deallocate(r2)
|
||||
call gpu_deallocate(tau)
|
||||
call gpu_deallocate(tau_t)
|
||||
|
||||
end
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user