qp_plugins_scemama/devel/ccsd_gpu/ccsd_space_orb_sub.irp.f

subroutine run_ccsd_space_orb
  use gpu_module
  implicit none

  integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d
  integer :: u,v,gam,beta,tmp_gam,tmp_beta
  integer :: nb_iter
  double precision :: get_two_e_integral
  double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb
  logical :: not_converged

  double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:), tau_x(:,:,:,:)
  double precision, allocatable :: t1(:,:), r1(:,:)
  double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:)

  double precision, allocatable :: all_err(:,:), all_t(:,:)
  integer, allocatable          :: list_occ(:), list_vir(:)
  integer(bit_kind)             :: det(N_int,2)
  integer                       :: nO, nV, nOa, nVa

  if (do_ao_cholesky) then
    PROVIDE cholesky_mo_transp
    FREE cholesky_ao
  else
    PROVIDE mo_two_e_integrals_in_map
  endif

  det = psi_det(:,:,cc_ref)
  print*,'Reference determinant:'
  call print_det(det,N_int)

  nOa = cc_nOa
  nVa = cc_nVa

  ! Check that the reference is a closed shell determinant
  if (cc_ref_is_open_shell) then
    call abort
  endif

  ! Number of occ/vir spatial orb
  nO = nOa
  nV = nVa

  allocate(list_occ(nO),list_vir(nV))
  list_occ = cc_list_occ
  list_vir = cc_list_vir
  ! Debug
  !call extract_list_orb_space(det,nO,nV,list_occ,list_vir)
  !print*,'occ',list_occ
  !print*,'vir',list_vir

  allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV))
  allocate(tau(nO,nO,nV,nV))
  allocate(tau_x(nO,nO,nV,nV))
  allocate(t1(nO,nV), r1(nO,nV))
  allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO))

  if (cc_update_method == 'diis') then
    double precision :: rss, diis_mem, extra_mem
    double precision, external :: memory_of_double
    diis_mem = 2.d0*memory_of_double(nO*nV)*(1.d0+nO*nV)
    call resident_memory(rss)
    do while (cc_diis_depth > 1)
      if (rss + diis_mem * cc_diis_depth > qp_max_mem) then
        cc_diis_depth = cc_diis_depth - 1
      else
        exit
      endif
    end do
    if (cc_diis_depth <= 1) then
      print *,  'Not enough memory for DIIS'
      stop -1
    endif
    print *,  'DIIS size  ', cc_diis_depth

    allocate(all_err(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth), all_t(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth))
    !$OMP PARALLEL PRIVATE(i,j) DEFAULT(SHARED)
    do j=1,cc_diis_depth
      !$OMP DO
      do i=1, size(all_err,1)
        all_err(i,j) = 0d0
        all_t(i,j)   = 0d0
      enddo
      !$OMP END DO NOWAIT
    enddo
    !$OMP END PARALLEL
  endif

  if (elec_alpha_num /= elec_beta_num) then
    print*, 'Only for closed shell systems'
    print*, 'elec_alpha_num=',elec_alpha_num
    print*, 'elec_beta_num =',elec_beta_num
    print*, 'abort'
    call abort
  endif

  ! Init
  call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1)
  call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2)
  call update_tau_space(nO,nV,t1,t2,tau)
  call update_tau_x_space(nO,nV,tau,tau_x)
  !print*,'hf_energy', hf_energy
  call det_energy(det,uncorr_energy)
  print*,'Det energy', uncorr_energy
  call ccsd_energy_space_x(nO,nV,tau_x,t1,energy)
  print*,'Guess energy', uncorr_energy+energy, energy

  nb_iter = 0
  not_converged = .True.
  max_r1 = 0d0
  max_r2 = 0d0

  write(*,'(A77)') ' -----------------------------------------------------------------------------'
  write(*,'(A77)') ' |   It.  |       E(CCSD) (Ha) | Correlation (Ha) |  Conv. T1  |  Conv. T2  |'
  write(*,'(A77)') ' -----------------------------------------------------------------------------'
  call wall_time(ta)

  type(c_ptr) :: gpu_data

  gpu_data = gpu_init(nO, nV, cholesky_mo_num, &
      cc_space_v_oo_chol, cc_space_v_ov_chol, cc_space_v_vo_chol, cc_space_v_vv_chol, &
      cc_space_v_oooo, cc_space_v_vooo, cc_space_v_voov, cc_space_v_oovv, cc_space_v_vvoo, &
      cc_space_v_oovo, cc_space_v_ovvo, cc_space_v_ovov, cc_space_v_ovoo, &
      cc_space_f_oo, cc_space_f_ov, cc_space_f_vo, cc_space_f_vv)

  if (.not.do_ao_cholesky) then
    print *, 'ao_choleky is required'
    stop -1
  endif

  do while (not_converged)

    ! Residue
    call gpu_upload(gpu_data, nO, nV, t1, t2, tau, tau_x);
    !$OMP PARALLEL SECTIONS
    !$OMP SECTION
    call compute_H_oo_chol_gpu(gpu_data,0)

    !$OMP SECTION
    call compute_H_vo_chol_gpu(gpu_data,1)

    !$OMP SECTION
    call compute_H_vv_chol_gpu(gpu_data,2)

    !$OMP END PARALLEL SECTIONS

    call compute_r1_space_chol_gpu(gpu_data, nO, nV, t1, r1, max_r1)
    call compute_r2_space_chol_gpu(gpu_data, nO, nV, t1, r2, max_r2)

    max_r = max(max_r1,max_r2)

    ! Update
    if (cc_update_method == 'diis') then
      !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
      !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
      call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1,r2,t1,t2,all_err,all_t)

    ! Standard update as T = T - Delta
    elseif (cc_update_method == 'none') then
      call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1)
      call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2)
    else
      print*,'Unkown cc_method_method: '//cc_update_method
    endif

    call update_tau_space(nO,nV,t1,t2,tau)
    call update_tau_x_space(nO,nV,tau,tau_x)

    ! Energy
    call ccsd_energy_space_x(nO,nV,tau_x,t1,energy)
    write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |'

    nb_iter = nb_iter + 1
    if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then
      not_converged = .False.
    endif

  enddo
  write(*,'(A77)') ' -----------------------------------------------------------------------------'
  call wall_time(tb)
  print*,'Time: ',tb-ta, ' s'
  print*,''
  if (max_r < cc_thresh_conv) then
    write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations'
  else
    write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations'
  endif
  print*,''
  write(*,'(A15,F18.12,A3)') ' E(CCSD)     = ', uncorr_energy+energy, ' Ha'
  write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha'
  write(*,'(A15,ES10.2,A3)')' Conv        = ', max_r
  print*,''

  if (write_amplitudes) then
    call write_t1(nO,nV,t1)
    call write_t2(nO,nV,t2)
    call ezfio_set_utils_cc_io_amplitudes('Read')
  endif

  ! Deallocation
  if (cc_update_method == 'diis') then
    deallocate(all_err,all_t)
  endif

  deallocate(r1,r2,tau)

  ! CCSD(T)
  double precision :: e_t
  e_t = 0.d0

  if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then

    ! Dumb way
    !call wall_time(ta)
    !call ccsd_par_t_space(nO,nV,t1,t2,e_t)
    !call wall_time(tb)
    !print*,'Time: ',tb-ta, ' s'

    !print*,''
    !write(*,'(A15,F18.12,A3)') ' E(CCSD(T))  = ', uncorr_energy + energy + e_t, ' Ha'
    !write(*,'(A15,F18.12,A3)') ' E(T)        = ', e_t, ' Ha'
    !write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha'
    !print*,''

    ! New
    print*,'Computing (T) correction...'
    call wall_time(ta)
!    call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v &
!         ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t)

    e_t = uncorr_energy + energy ! For print in next call
    call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v &
         ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t)

    call wall_time(tb)
    print*,'Time: ',tb-ta, ' s'

    print*,''
    write(*,'(A15,F18.12,A3)') ' E(CCSD(T))  = ', uncorr_energy + energy + e_t, ' Ha'
    write(*,'(A15,F18.12,A3)') ' E(T)        = ', e_t, ' Ha'
    write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha'
    print*,''
  endif

  call save_energy(uncorr_energy + energy, e_t)

  deallocate(t1,t2)

end

! Energy

subroutine ccsd_energy_space(nO,nV,tau,t1,energy)

  implicit none

  integer, intent(in)           :: nO, nV
  double precision, intent(in)  :: tau(nO,nO,nV,nV)
  double precision, intent(in)  :: t1(nO,nV)
  double precision, intent(out) :: energy

  ! internal
  integer :: i,j,a,b
  double precision :: e

  energy = 0d0
  !$omp parallel &
  !$omp shared(nO,nV,energy,tau,t1,&
  !$omp cc_space_f_vo,cc_space_w_oovv) &
  !$omp private(i,j,a,b,e) &
  !$omp default(none)
  e = 0d0
  !$omp do
  do a = 1, nV
    do i = 1, nO
      e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a)
    enddo
  enddo
  !$omp end do nowait
  !$omp do
  do b = 1, nV
    do a = 1, nV
      do j = 1, nO
        do i = 1, nO
          e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b)
       enddo
      enddo
    enddo
  enddo
  !$omp end do nowait
  !$omp critical
  energy = energy + e
  !$omp end critical
  !$omp end parallel

end

subroutine ccsd_energy_space_x(nO,nV,tau_x,t1,energy)

  implicit none

  integer, intent(in)           :: nO, nV
  double precision, intent(in)  :: tau_x(nO,nO,nV,nV)
  double precision, intent(in)  :: t1(nO,nV)
  double precision, intent(out) :: energy

  ! internal
  integer :: i,j,a,b
  double precision :: e

  energy = 0d0
  !$omp parallel &
  !$omp shared(nO,nV,energy,tau_x,t1,&
  !$omp cc_space_f_vo,cc_space_v_oovv) &
  !$omp private(i,j,a,b,e) &
  !$omp default(none)
  e = 0d0
  !$omp do
  do a = 1, nV
    do i = 1, nO
      e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a)
    enddo
  enddo
  !$omp end do nowait
  !$omp do
  do b = 1, nV
    do a = 1, nV
      do j = 1, nO
        do i = 1, nO
          e = e + tau_x(i,j,a,b) * cc_space_v_oovv(i,j,a,b)
       enddo
      enddo
    enddo
  enddo
  !$omp end do nowait
  !$omp critical
  energy = energy + e
  !$omp end critical
  !$omp end parallel

end

! Tau

subroutine update_tau_space(nO,nV,t1,t2,tau)

  implicit none

  ! in
  integer, intent(in)           :: nO, nV
  double precision, intent(in)  :: t1(nO,nV), t2(nO,nO,nV,nV)

  ! out
  double precision, intent(out) :: tau(nO,nO,nV,nV)

  ! internal
  integer                       :: i,j,a,b

  !$OMP PARALLEL &
  !$OMP SHARED(nO,nV,tau,t2,t1) &
  !$OMP PRIVATE(i,j,a,b) &
  !$OMP DEFAULT(NONE)
  !$OMP DO
  do b = 1, nV
    do a = 1, nV
      do j = 1, nO
        do i = 1, nO
          tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b)
        enddo
      enddo
    enddo
  enddo
  !$OMP END DO
  !$OMP END PARALLEL

end

subroutine update_tau_x_space(nO,nV,tau,tau_x)

  implicit none

  ! in
  integer, intent(in)           :: nO, nV
  double precision, intent(in)  :: tau(nO,nO,nV,nV)

  ! out
  double precision, intent(out) :: tau_x(nO,nO,nV,nV)

  ! internal
  integer                       :: i,j,a,b

  !$OMP PARALLEL &
  !$OMP SHARED(nO,nV,tau,tau_x) &
  !$OMP PRIVATE(i,j,a,b) &
  !$OMP DEFAULT(NONE)
  !$OMP DO
  do b = 1, nV
    do a = 1, nV
      do j = 1, nO
        do i = 1, nO
          tau_x(i,j,a,b) = 2.d0*tau(i,j,a,b) - tau(i,j,b,a)
        enddo
      enddo
    enddo
  enddo
  !$OMP END DO
  !$OMP END PARALLEL

end