diff --git a/devel/ccsd_gpu/ccsd_space_orb_sub.irp.f b/devel/ccsd_gpu/ccsd_space_orb_sub.irp.f index 785a5ca..d3c6be9 100644 --- a/devel/ccsd_gpu/ccsd_space_orb_sub.irp.f +++ b/devel/ccsd_gpu/ccsd_space_orb_sub.irp.f @@ -123,25 +123,22 @@ subroutine run_ccsd_space_orb cc_space_v_oovo, cc_space_v_ovvo, cc_space_v_ovov, cc_space_v_ovoo, & cc_space_f_oo, cc_space_f_vo, cc_space_f_vv) + if (.not.do_ao_cholesky) then + print *, 'ao_choleky is required' + stop -1 + endif + do while (not_converged) ! Residue - if (do_ao_cholesky) then - call compute_H_vo_chol(nO,nV,t1,H_vo) - call gpu_upload(gpu_data, nO, nV, t1, t2, tau, tau_x); - call compute_H_oo_chol_gpu(gpu_data,nO,nV,0,H_oo) - call compute_H_vv_chol_gpu(gpu_data,nO,nV,0,H_vv) + call gpu_upload(gpu_data, nO, nV, t1, t2, tau, tau_x); + call compute_H_oo_chol_gpu(gpu_data,nO,nV,0,H_oo) + call compute_H_vo_chol_gpu(gpu_data,nO,nV,1,H_vo) + call compute_H_vv_chol_gpu(gpu_data,nO,nV,2,H_vv) - call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) - call compute_r2_space_chol_gpu(gpu_data, nO, nV, t1, r2, max_r2) - else - call compute_H_oo(nO,nV,t1,t2,tau,H_oo) - call compute_H_vv(nO,nV,t1,t2,tau,H_vv) - call compute_H_vo(nO,nV,t1,t2,H_vo) + call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r2_space_chol_gpu(gpu_data, nO, nV, t1, r2, max_r2) - call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) - call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) - endif max_r = max(max_r1,max_r2) ! Update diff --git a/devel/ccsd_gpu/ccsd_space_orb_sub_chol.irp.f b/devel/ccsd_gpu/ccsd_space_orb_sub_chol.irp.f index 1d28808..45b3ea7 100644 --- a/devel/ccsd_gpu/ccsd_space_orb_sub_chol.irp.f +++ b/devel/ccsd_gpu/ccsd_space_orb_sub_chol.irp.f @@ -290,53 +290,3 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) end -! H_vo -subroutine compute_H_vo_chol(nO,nV,t1,H_vo) - - implicit none - - integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV) - double precision, intent(out) :: H_vo(nV, nO) - - integer :: a,b,i,j,u,k - - double precision, allocatable :: tmp_k(:), tmp(:,:,:), tmp2(:,:,:) - do i=1,nO - do a=1,nV - H_vo(a,i) = cc_space_f_vo(a,i) - enddo - enddo - - allocate(tmp_k(cholesky_mo_num)) - call dgemm('N', 'N', cholesky_mo_num, 1, nO*nV, 2.d0, & - cc_space_v_ov_chol, cholesky_mo_num, & - t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) - - call dgemm('T','N',nV*nO,1,cholesky_mo_num,1.d0, & - cc_space_v_vo_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & - H_vo, nV*nO) - deallocate(tmp_k) - - allocate(tmp(cholesky_mo_num,nO,nO)) - allocate(tmp2(cholesky_mo_num,nO,nO)) - - call dgemm('N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & - cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, tmp, cholesky_mo_num*nO) - - do i=1,nO - do j=1,nO - do k=1,cholesky_mo_num - tmp2(k,j,i) = tmp(k,i,j) - enddo - enddo - enddo - deallocate(tmp) - - call dgemm('T','N', nV, nO, cholesky_mo_num*nO, -1.d0, & - cc_space_v_ov_chol, cholesky_mo_num*nO, tmp2, cholesky_mo_num*nO, & - 1.d0, H_vo, nV) - -end - - diff --git a/devel/ccsd_gpu/gpu.c b/devel/ccsd_gpu/gpu.c index 5d75928..173782a 100644 --- a/devel/ccsd_gpu/gpu.c +++ b/devel/ccsd_gpu/gpu.c @@ -1378,13 +1378,13 @@ void compute_h_oo_chol_gpu(gpu_data* data, int nO, int nV, int igpu, double* H_o double* d_tmp_vov; cudaMalloc((void **)&d_tmp_vov, nV*nO*nV * sizeof(double)); + for (int i=0 ; i