diff --git a/devel/ccsd_gpu/EZFIO.cfg b/devel/ccsd_gpu/EZFIO.cfg index 328cd98..d617e62 100644 --- a/devel/ccsd_gpu/EZFIO.cfg +++ b/devel/ccsd_gpu/EZFIO.cfg @@ -1,3 +1,75 @@ +[cc_thresh_conv] +type: double precision +doc: Threshold for the convergence of the residual equations. +interface: ezfio,ocaml,provider +default: 1e-6 + +[cc_max_iter] +type: integer +doc: Maximum number of iterations. +interface: ezfio,ocaml,provider +default: 100 + +[cc_diis_depth] +type: integer +doc: Maximum depth of the DIIS, i.e., maximum number of iterations that the DIIS keeps in memory. Warning, we allocate matrices with the diis depth at the beginning without update. If you don't have enough memory it should crash in memory. +interface: ezfio,ocaml,provider +default: 8 + +[cc_level_shift] +type: double precision +doc: Level shift for the CC +interface: ezfio,ocaml,provider +default: 0.0 + +[cc_level_shift_guess] +type: double precision +doc: Level shift for the guess of the CC amplitudes +interface: ezfio,ocaml,provider +default: 0.0 + +[cc_update_method] +type: character*(32) +doc: Method used to update the CC amplitudes. none -> normal, diis -> with diis. +interface: ezfio,ocaml,provider +default: diis + +[cc_guess_t1] +type: character*(32) +doc: Guess used to initialize the T1 amplitudes. none -> 0, MP -> perturbation theory, read -> read from disk. +interface: ezfio,ocaml,provider +default: MP + +[cc_guess_t2] +type: character*(32) +doc: Guess used to initialize the T2 amplitudes. none -> 0, MP -> perturbation theory, read -> read from disk. +interface: ezfio,ocaml,provider +default: MP + +[io_amplitudes] +type: Disk_access +doc: Read/Write |CCSD| amplitudes from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None + +[cc_par_t] +type: logical +doc: If true, the CCSD(T) will be computed. +interface: ezfio,ocaml,provider +default: False + +[cc_dev] +type: logical +doc: Only for dev purposes. +interface: ezfio,ocaml,provider +default: False + +[cc_ref] +type: integer +doc: Index of the reference determinant in psi_det for CC calculation. +interface: ezfio,ocaml,provider +default: 1 + [energy] type: double precision doc: CCSD energy diff --git a/devel/ccsd_gpu/NEED b/devel/ccsd_gpu/NEED index cf913f8..41df5d2 100644 --- a/devel/ccsd_gpu/NEED +++ b/devel/ccsd_gpu/NEED @@ -1,2 +1,2 @@ hartree_fock -utils_cc_gpu +determinants diff --git a/devel/ccsd_gpu/diis.irp.f b/devel/ccsd_gpu/diis.irp.f new file mode 100644 index 0000000..fe77137 --- /dev/null +++ b/devel/ccsd_gpu/diis.irp.f @@ -0,0 +1,529 @@ +! Code + +subroutine diis_cc(all_err,all_t,sze,m,iter,t) + + implicit none + + BEGIN_DOC + ! DIIS. Take the error vectors and the amplitudes of the previous + ! iterations to compute the new amplitudes + END_DOC + + ! {err_i}_{i=1}^{m_it} -> B -> c + ! {t_i}_{i=1}^{m_it}, c, {err_i}_{i=1}^{m_it} -> t_{m_it+1} + + integer, intent(in) :: m,iter,sze + double precision, intent(in) :: all_err(sze,m) + double precision, intent(in) :: all_t(sze,m) + + double precision, intent(out) :: t(sze) + + double precision, allocatable :: B(:,:), c(:), zero(:) + integer :: m_iter + integer :: i,j,k + integer :: info + integer, allocatable :: ipiv(:) + double precision :: accu + + m_iter = min(m,iter) + !print*,'m_iter',m_iter + allocate(B(m_iter+1,m_iter+1), c(m_iter), zero(m_iter+1)) + allocate(ipiv(m+1)) + + ! B(i,j) = < err(iter-m_iter+j),err(iter-m_iter+i) > ! iter-m_iter will be zero for us + B = 0d0 + !$OMP PARALLEL & + !$OMP SHARED(B,m,m_iter,sze,all_err) & + !$OMP PRIVATE(i,j,k,accu) & + !$OMP DEFAULT(NONE) + do j = 1, m_iter + do i = 1, m_iter + accu = 0d0 + !$OMP DO + do k = 1, sze + ! the errors of the ith iteration are in all_err(:,m+1-i) + accu = accu + all_err(k,m+1-i) * all_err(k,m+1-j) + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + B(i,j) = B(i,j) + accu + !$OMP END CRITICAL + enddo + enddo + !$OMP END PARALLEL + + do i = 1, m_iter + B(i,m_iter+1) = -1 + enddo + do j = 1, m_iter + B(m_iter+1,j) = -1 + enddo + ! Debug + !print*,'B' + !do i = 1, m_iter+1 + ! write(*,'(100(F10.6))') B(i,:) + !enddo + + ! (0 0 .... 0 -1) + zero = 0d0 + zero(m_iter+1) = -1d0 + + ! Solve B.c = zero + call dgesv(m_iter+1, 1, B, size(B,1), ipiv, zero, size(zero,1), info) + if (info /= 0) then + print*,'DIIS error in dgesv:', info + call abort + endif + ! c corresponds to the m_iter first solutions + c = zero(1:m_iter) + ! Debug + !print*,'c',c + !print*,'all_t' + !do i = 1, m + ! write(*,'(100(F10.6))') all_t(:,i) + !enddo + !print*,'all_err' + !do i = 1, m + ! write(*,'(100(F10.6))') all_err(:,i) + !enddo + + ! update T + !$OMP PARALLEL & + !$OMP SHARED(t,c,m,all_err,all_t,sze,m_iter) & + !$OMP PRIVATE(i,j,accu) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, sze + t(i) = 0d0 + enddo + !$OMP END DO + do i = 1, m_iter + !$OMP DO + do j = 1, sze + t(j) = t(j) + c(i) * (all_t(j,m+1-i) + all_err(j,m+1-i)) + enddo + !$OMP END DO + enddo + !$OMP END PARALLEL + + !print*,'new t',t + + deallocate(ipiv,B,c,zero) + +end + +! Update all err + +subroutine update_all_err(err,all_err,sze,m,iter) + + implicit none + + BEGIN_DOC + ! Shift all the err vectors of the previous iterations to add the new one + ! The last err vector is placed in the last position and all the others are + ! moved toward the first one. + END_DOC + + integer, intent(in) :: m, iter, sze + double precision, intent(in) :: err(sze) + double precision, intent(inout) :: all_err(sze,m) + integer :: i,j + integer :: m_iter + + m_iter = min(m,iter) + + ! Shift + !$OMP PARALLEL & + !$OMP SHARED(m,all_err,err,sze) & + !$OMP PRIVATE(i,j) & + !$OMP DEFAULT(NONE) + do i = 1, m-1 + !$OMP DO + do j = 1, sze + all_err(j,i) = all_err(j,i+1) + enddo + !$OMP END DO + enddo + + ! Debug + !print*,'shift err' + !do i = 1, m + ! print*,i, all_err(:,i) + !enddo + + ! New + !$OMP DO + do i = 1, sze + all_err(i,m) = err(i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Debug + !print*,'Updated err' + !do i = 1, m + ! print*,i, all_err(:,i) + !enddo + +end + +! Update all t + +subroutine update_all_t(t,all_t,sze,m,iter) + + implicit none + + BEGIN_DOC + ! Shift all the t vectors of the previous iterations to add the new one + ! The last t vector is placed in the last position and all the others are + ! moved toward the first one. + END_DOC + + integer, intent(in) :: m, iter, sze + double precision, intent(in) :: t(sze) + double precision, intent(inout) :: all_t(sze,m) + integer :: i,j + integer :: m_iter + + m_iter = min(m,iter) + + ! Shift + !$OMP PARALLEL & + !$OMP SHARED(m,all_t,t,sze) & + !$OMP PRIVATE(i,j) & + !$OMP DEFAULT(NONE) + do i = 1, m-1 + !$OMP DO + do j = 1, sze + all_t(j,i) = all_t(j,i+1) + enddo + !$OMP END DO + enddo + + ! New + !$OMP DO + do i = 1, sze + all_t(i,m) = t(i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Debug + !print*,'Updated t' + !do i = 1, m + ! print*,i, all_t(:,i) + !enddo + +end + +! Err1 + +subroutine compute_err1(nO,nV,f_o,f_v,r1,err1) + + implicit none + + BEGIN_DOC + ! Compute the error vector for the t1 + END_DOC + + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r1(nO,nV) + + double precision, intent(out) :: err1(nO,nV) + + integer :: i,a + + !$OMP PARALLEL & + !$OMP SHARED(err1,r1,f_o,f_v,nO,nV,cc_level_shift) & + !$OMP PRIVATE(i,a) & + !$OMP DEFAULT(NONE) + !$OMP DO + do a = 1, nV + do i = 1, nO + err1(i,a) = - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! Err2 + +subroutine compute_err2(nO,nV,f_o,f_v,r2,err2) + + implicit none + + BEGIN_DOC + ! Compute the error vector for the t2 + END_DOC + + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r2(nO,nO,nV,nV) + + double precision, intent(out) :: err2(nO,nO,nV,nV) + + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(err2,r2,f_o,f_v,nO,nV,cc_level_shift) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + err2(i,j,a,b) = - r2(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! Update t + +subroutine update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + + implicit none + + integer, intent(in) :: nO,nV,nb_iter + double precision, intent(in) :: f_o(nO), f_v(nV) + double precision, intent(in) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, intent(inout) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(inout) :: all_err1(nO*nV, cc_diis_depth), all_err2(nO*nO*nV*nV, cc_diis_depth) + double precision, intent(inout) :: all_t1(nO*nV, cc_diis_depth), all_t2(nO*nO*nV*nV, cc_diis_depth) + + double precision, allocatable :: err1(:,:), err2(:,:,:,:) + double precision, allocatable :: tmp_err1(:), tmp_err2(:) + double precision, allocatable :: tmp_t1(:), tmp_t2(:) + + if (cc_update_method == 'diis') then + + allocate(err1(nO,nV), err2(nO,nO,nV,nV)) + allocate(tmp_err1(nO*nV), tmp_err2(nO*nO*nV*nV)) + allocate(tmp_t1(nO*nV), tmp_t2(nO*nO*nV*nV)) + + ! DIIS T1, it is not always good since the t1 can be small + ! That's why there is a call to update the t1 in the standard way + ! T1 error tensor + !call compute_err1(nO,nV,f_o,f_v,r1,err1) + ! Transfo errors and parameters in vectors + !tmp_err1 = reshape(err1,(/nO*nV/)) + !tmp_t1 = reshape(t1 ,(/nO*nV/)) + ! Add the error and parameter vectors with those of the previous iterations + !call update_all_err(tmp_err1,all_err1,nO*nV,cc_diis_depth,nb_iter+1) + !call update_all_t (tmp_t1 ,all_t1 ,nO*nV,cc_diis_depth,nb_iter+1) + ! Diis and reshape T as a tensor + !call diis_cc(all_err1,all_t1,nO*nV,cc_diis_depth,nb_iter+1,tmp_t1) + !t1 = reshape(tmp_t1 ,(/nO,nV/)) + call update_t1(nO,nV,f_o,f_v,r1,t1) + + ! DIIS T2 + ! T2 error tensor + call compute_err2(nO,nV,f_o,f_v,r2,err2) + ! Transfo errors and parameters in vectors + tmp_err2 = reshape(err2,(/nO*nO*nV*nV/)) + tmp_t2 = reshape(t2 ,(/nO*nO*nV*nV/)) + ! Add the error and parameter vectors with those of the previous iterations + call update_all_err(tmp_err2,all_err2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + call update_all_t (tmp_t2 ,all_t2 ,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + ! Diis and reshape T as a tensor + call diis_cc(all_err2,all_t2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp_t2) + t2 = reshape(tmp_t2 ,(/nO,nO,nV,nV/)) + + deallocate(tmp_t1,tmp_t2,tmp_err1,tmp_err2,err1,err2) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + + call update_t1(nO,nV,f_o,f_v,r1,t1) + call update_t2(nO,nV,f_o,f_v,r2,t2) + + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + +end + +! Update t v2 + +subroutine update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + + implicit none + + integer, intent(in) :: nO,nV,nb_iter + double precision, intent(in) :: f_o(nO), f_v(nV) + double precision, intent(in) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, intent(inout) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(inout) :: all_err1(nO*nV, cc_diis_depth), all_err2(nO*nO*nV*nV, cc_diis_depth) + double precision, intent(inout) :: all_t1(nO*nV, cc_diis_depth), all_t2(nO*nO*nV*nV, cc_diis_depth) + + double precision, allocatable :: all_t(:,:), all_err(:,:), tmp_t(:) + double precision, allocatable :: err1(:,:), err2(:,:,:,:) + double precision, allocatable :: tmp_err1(:), tmp_err2(:) + double precision, allocatable :: tmp_t1(:), tmp_t2(:) + + integer :: i,j + + ! Allocate + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + allocate(tmp_t(nO*nV+nO*nO*nV*nV)) + allocate(err1(nO,nV), err2(nO,nO,nV,nV)) + allocate(tmp_err1(nO*nV), tmp_err2(nO*nO*nV*nV)) + allocate(tmp_t1(nO*nV), tmp_t2(nO*nO*nV*nV)) + + ! Compute the errors and reshape them as vector + call compute_err1(nO,nV,f_o,f_v,r1,err1) + call compute_err2(nO,nV,f_o,f_v,r2,err2) + tmp_err1 = reshape(err1,(/nO*nV/)) + tmp_err2 = reshape(err2,(/nO*nO*nV*nV/)) + tmp_t1 = reshape(t1 ,(/nO*nV/)) + tmp_t2 = reshape(t2 ,(/nO*nO*nV*nV/)) + + ! Update the errors and parameters for the diis + call update_all_err(tmp_err1,all_err1,nO*nV,cc_diis_depth,nb_iter+1) + call update_all_t (tmp_t1 ,all_t1 ,nO*nV,cc_diis_depth,nb_iter+1) + call update_all_err(tmp_err2,all_err2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + call update_all_t (tmp_t2 ,all_t2 ,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + + ! Gather the different parameters and errors + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,all_err,all_err1,all_err2,cc_diis_depth,& + !$OMP all_t,all_t1,all_t2) & + !$OMP PRIVATE(i,j) & + !$OMP DEFAULT(NONE) + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nV + all_err(i,j) = all_err1(i,j) + enddo + !$OMP END DO NOWAIT + enddo + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nO*nV*nV + all_err(i+nO*nV,j) = all_err2(i,j) + enddo + !$OMP END DO NOWAIT + enddo + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nV + all_t(i,j) = all_t1(i,j) + enddo + !$OMP END DO NOWAIT + enddo + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nO*nV*nV + all_t(i+nO*nV,j) = all_t2(i,j) + enddo + !$OMP END DO + enddo + !$OMP END PARALLEL + + ! Diis + call diis_cc(all_err,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp_t) + + ! Split the resulting vector + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tmp_t,tmp_t1,tmp_t2) & + !$OMP PRIVATE(i) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, nO*nV + tmp_t1(i) = tmp_t(i) + enddo + !$OMP END DO NOWAIT + !$OMP DO + do i = 1, nO*nO*nV*nV + tmp_t2(i) = tmp_t(i+nO*nV) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Reshape as tensors + t1 = reshape(tmp_t1 ,(/nO,nV/)) + t2 = reshape(tmp_t2 ,(/nO,nO,nV,nV/)) + + ! Deallocate + deallocate(tmp_t1,tmp_t2,tmp_err1,tmp_err2,err1,err2,all_t,all_err) + +end + +! Update t v3 + +subroutine update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) + + implicit none + + integer, intent(in) :: nO,nV,nb_iter + double precision, intent(in) :: f_o(nO), f_v(nV) + double precision, intent(in) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, intent(inout) :: t1(nO*nV), t2(nO*nO*nV*nV) + double precision, intent(inout) :: all_err(nO*nV+nO*nO*nV*nV, cc_diis_depth) + double precision, intent(inout) :: all_t(nO*nV+nO*nO*nV*nV, cc_diis_depth) + + double precision, allocatable :: tmp(:) + + integer :: i,j + + ! Allocate + allocate(tmp(nO*nV+nO*nO*nV*nV)) + + ! Compute the errors + call compute_err1(nO,nV,f_o,f_v,r1,tmp(1:nO*nV)) + call compute_err2(nO,nV,f_o,f_v,r2,tmp(nO*nV+1:nO*nV+nO*nO*nV*nV)) + + ! Update the errors and parameters for the diis + call update_all_err(tmp,all_err,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tmp,t1,t2) & + !$OMP PRIVATE(i) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, nO*nV + tmp(i) = t1(i) + enddo + !$OMP END DO NOWAIT + !$OMP DO + do i = 1, nO*nO*nV*nV + tmp(i+nO*nV) = t2(i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + call update_all_t(tmp,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + + ! Diis + call diis_cc(all_err,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp) + + ! Split the resulting vector + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tmp,t1,t2) & + !$OMP PRIVATE(i) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, nO*nV + t1(i) = tmp(i) + enddo + !$OMP END DO NOWAIT + !$OMP DO + do i = 1, nO*nO*nV*nV + t2(i) = tmp(i+nO*nV) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Deallocate + deallocate(tmp) + +end diff --git a/devel/ccsd_gpu/energy.irp.f b/devel/ccsd_gpu/energy.irp.f new file mode 100644 index 0000000..fc1451b --- /dev/null +++ b/devel/ccsd_gpu/energy.irp.f @@ -0,0 +1,12 @@ +subroutine det_energy(det,energy) + + implicit none + + integer(bit_kind), intent(in) :: det + + double precision, intent(out) :: energy + double precision, external :: diag_H_mat_elem + + energy = diag_H_mat_elem(det,N_int) + nuclear_repulsion + +end diff --git a/devel/ccsd_gpu/guess_t.irp.f b/devel/ccsd_gpu/guess_t.irp.f new file mode 100644 index 0000000..bb26e13 --- /dev/null +++ b/devel/ccsd_gpu/guess_t.irp.f @@ -0,0 +1,208 @@ +! T1 + +subroutine guess_t1(nO,nV,f_o,f_v,f_ov,t1) + + implicit none + + BEGIN_DOC + ! Update the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + + ! inout + double precision, intent(out) :: t1(nO, nV) + + ! internal + integer :: i,a + + if (trim(cc_guess_t1) == 'none') then + t1 = 0d0 + else if (trim(cc_guess_t1) == 'MP') then + do a = 1, nV + do i = 1, nO + t1(i,a) = f_ov(i,a) / (f_o(i) - f_v(a) - cc_level_shift_guess) + enddo + enddo + else if (trim(cc_guess_t1) == 'read') then + call read_t1(nO,nV,t1) + else + print*, 'Unknown cc_guess_t1 type: '//trim(cc_guess_t1) + call abort + endif + +end + +! T2 + +subroutine guess_t2(nO,nV,f_o,f_v,v_oovv,t2) + + implicit none + + BEGIN_DOC + ! Update the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), v_oovv(nO, nO, nV, nV) + + ! inout + double precision, intent(out) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + + if (trim(cc_guess_t2) == 'none') then + t2 = 0d0 + else if (trim(cc_guess_t2) == 'MP') then + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + t2(i,j,a,b) = v_oovv(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift_guess) + enddo + enddo + enddo + enddo + else if (trim(cc_guess_t2) == 'read') then + call read_t2(nO,nV,t2) + else + print*, 'Unknown cc_guess_t1 type: '//trim(cc_guess_t2) + call abort + endif + +end + +! T1 + +subroutine write_t1(nO,nV,t1) + + implicit none + + BEGIN_DOC + ! Write the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO, nV) + + ! internal + integer :: i,a, iunit + integer, external :: getunitandopen + + if (write_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T1','w') + do a = 1, nV + do i = 1, nO + write(iunit,'(F20.12)') t1(i,a) + enddo + enddo + close(iunit) + endif + +end + +! T2 + +subroutine write_t2(nO,nV,t2) + + implicit none + + BEGIN_DOC + ! Write the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b, iunit + integer, external :: getunitandopen + + if (write_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T2','w') + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + write(iunit,'(F20.12)') t2(i,j,a,b) + enddo + enddo + enddo + enddo + close(iunit) + endif + +end + +! T1 + +subroutine read_t1(nO,nV,t1) + + implicit none + + BEGIN_DOC + ! Read the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(out) :: t1(nO, nV) + + ! internal + integer :: i,a, iunit + logical :: ok + integer, external :: getunitandopen + + if (read_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T1','r') + do a = 1, nV + do i = 1, nO + read(iunit,'(F20.12)') t1(i,a) + enddo + enddo + close(iunit) + endif + +end + +! T2 + +subroutine read_t2(nO,nV,t2) + + implicit none + + BEGIN_DOC + ! Read the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(out) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b, iunit + logical :: ok + + integer, external :: getunitandopen + + if (read_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T2','r') + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + read(iunit,'(F20.12)') t2(i,j,a,b) + enddo + enddo + enddo + enddo + close(iunit) + endif + +end diff --git a/devel/ccsd_gpu/occupancy.irp.f b/devel/ccsd_gpu/occupancy.irp.f new file mode 100644 index 0000000..27b0ee5 --- /dev/null +++ b/devel/ccsd_gpu/occupancy.irp.f @@ -0,0 +1,328 @@ +! N spin orb + +subroutine extract_n_spin(det,n) + + implicit none + + BEGIN_DOC + ! Returns the number of occupied alpha, occupied beta, virtual alpha, virtual beta spin orbitals + ! in det without counting the core and deleted orbitals in the format n(nOa,nOb,nVa,nVb) + END_DOC + + integer(bit_kind), intent(in) :: det(N_int,2) + + integer, intent(out) :: n(4) + + integer(bit_kind) :: res(N_int,2) + integer :: i, si + logical :: ok, is_core, is_del + + ! Init + n = 0 + + ! Loop over the spin + do si = 1, 2 + do i = 1, mo_num + call apply_hole(det, si, i, res, ok, N_int) + + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + + if (ok) then + ! particle + n(si) = n(si) + 1 + else + ! hole + n(si+2) = n(si+2) + 1 + endif + enddo + enddo + + !print*,n(1),n(2),n(3),n(4) + +end + +! Spin + +subroutine extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir) + + implicit none + + BEGIN_DOC + ! Returns the the list of occupied alpha/beta, virtual alpha/beta spin orbitals + ! size(nO_m,1) must be max(nOa,nOb) and size(nV_m,1) must be max(nVa,nVb) + END_DOC + + integer, intent(in) :: nO_m, nV_m + integer(bit_kind), intent(in) :: det(N_int,2) + + integer, intent(out) :: list_occ(nO_m,2), list_vir(nV_m,2) + + integer(bit_kind) :: res(N_int,2) + integer :: i, si, idx_o, idx_v, idx_i, idx_b + logical :: ok, is_core, is_del + + list_occ = 0 + list_vir = 0 + + ! List of occ/vir alpha/beta + + ! occ alpha -> list_occ(:,1) + ! occ beta -> list_occ(:,2) + ! vir alpha -> list_vir(:,1) + ! vir beta -> list_vir(:,2) + + ! Loop over the spin + do si = 1, 2 + ! tmp idx + idx_o = 1 + idx_v = 1 + do i = 1, mo_num + call apply_hole(det, si, i, res, ok, N_int) + + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + + if (ok) then + ! particle + list_occ(idx_o,si) = i + idx_o = idx_o + 1 + else + ! hole + list_vir(idx_v,si) = i + idx_v = idx_v + 1 + endif + enddo + enddo + +end + +! Space + +subroutine extract_list_orb_space(det,nO,nV,list_occ,list_vir) + + implicit none + + BEGIN_DOC + ! Returns the the list of occupied and virtual alpha spin orbitals + END_DOC + + integer, intent(in) :: nO, nV + integer(bit_kind), intent(in) :: det(N_int,2) + + integer, intent(out) :: list_occ(nO), list_vir(nV) + + integer(bit_kind) :: res(N_int,2) + integer :: i, si, idx_o, idx_v, idx_i, idx_b + logical :: ok, is_core, is_del + + if (elec_alpha_num /= elec_beta_num) then + print*,'Error elec_alpha_num /= elec_beta_num, impossible to create cc_list_occ and cc_list_vir, abort' + call abort + endif + + list_occ = 0 + list_vir = 0 + + ! List of occ/vir alpha + + ! occ alpha -> list_occ(:,1) + ! vir alpha -> list_vir(:,1) + + ! tmp idx + idx_o = 1 + idx_v = 1 + do i = 1, mo_num + call apply_hole(det, 1, i, res, ok, N_int) + + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + + if (ok) then + ! particle + list_occ(idx_o) = i + idx_o = idx_o + 1 + else + ! hole + list_vir(idx_v) = i + idx_v = idx_v + 1 + endif + enddo + +end + +! is_core + +function is_core(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a core orbital + END_DOC + + integer, intent(in) :: i + logical :: is_core + + integer :: j + + ! Init + is_core = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_core = .True. + exit + endif + enddo + +end + +! is_del + +function is_del(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a deleted orbital + END_DOC + + integer, intent(in) :: i + logical :: is_del + + integer :: j + + ! Init + is_del = .False. + + ! Search + do j = 1, dim_list_del_orb + if (list_del(j) == i) then + is_del = .True. + exit + endif + enddo + +end + +! N orb + +BEGIN_PROVIDER [integer, cc_nO_m] +&BEGIN_PROVIDER [integer, cc_nOa] +&BEGIN_PROVIDER [integer, cc_nOb] +&BEGIN_PROVIDER [integer, cc_nOab] +&BEGIN_PROVIDER [integer, cc_nV_m] +&BEGIN_PROVIDER [integer, cc_nVa] +&BEGIN_PROVIDER [integer, cc_nVb] +&BEGIN_PROVIDER [integer, cc_nVab] +&BEGIN_PROVIDER [integer, cc_n_mo] +&BEGIN_PROVIDER [integer, cc_nO_S, (2)] +&BEGIN_PROVIDER [integer, cc_nV_S, (2)] + + implicit none + + BEGIN_DOC + ! Number of orbitals without core and deleted ones of the cc_ref det in psi_det + ! a: alpha, b: beta + ! nO_m: max(a,b) occupied + ! nOa: nb a occupied + ! nOb: nb b occupied + ! nOab: nb a+b occupied + ! nV_m: max(a,b) virtual + ! nVa: nb a virtual + ! nVb: nb b virtual + ! nVab: nb a+b virtual + END_DOC + + integer :: n_spin(4) + + ! Extract number of occ/vir alpha/beta spin orbitals + call extract_n_spin(psi_det(1,1,cc_ref),n_spin) + + cc_nOa = n_spin(1) + cc_nOb = n_spin(2) + cc_nOab = cc_nOa + cc_nOb !n_spin(1) + n_spin(2) + cc_nO_m = max(cc_nOa,cc_nOb) !max(n_spin(1), n_spin(2)) + cc_nVa = n_spin(3) + cc_nVb = n_spin(4) + cc_nVab = cc_nVa + cc_nVb !n_spin(3) + n_spin(4) + cc_nV_m = max(cc_nVa,cc_nVb) !max(n_spin(3), n_spin(4)) + cc_n_mo = cc_nVa + cc_nVb !n_spin(1) + n_spin(3) + cc_nO_S = (/cc_nOa,cc_nOb/) + cc_nV_S = (/cc_nVa,cc_nVb/) + +END_PROVIDER + +! General + +BEGIN_PROVIDER [integer, cc_list_gen, (cc_n_mo)] + + implicit none + + BEGIN_DOC + ! List of general orbitals without core and deleted ones + END_DOC + + integer :: i,j + logical :: is_core, is_del + + j = 1 + do i = 1, mo_num + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + cc_list_gen(j) = i + j = j+1 + enddo + +END_PROVIDER + +! Space + +BEGIN_PROVIDER [integer, cc_list_occ, (cc_nOa)] +&BEGIN_PROVIDER [integer, cc_list_vir, (cc_nVa)] + + implicit none + + BEGIN_DOC + ! List of occupied and virtual spatial orbitals without core and deleted ones + END_DOC + + call extract_list_orb_space(psi_det(1,1,cc_ref),cc_nOa,cc_nVa,cc_list_occ,cc_list_vir) + +END_PROVIDER + +! Spin + +BEGIN_PROVIDER [integer, cc_list_occ_spin, (cc_nO_m,2)] +&BEGIN_PROVIDER [integer, cc_list_vir_spin, (cc_nV_m,2)] +&BEGIN_PROVIDER [logical, cc_ref_is_open_shell] + + implicit none + + BEGIN_DOC + ! List of occupied and virtual spin orbitals without core and deleted ones + END_DOC + + integer :: i + + call extract_list_orb_spin(psi_det(1,1,cc_ref),cc_nO_m,cc_nV_m,cc_list_occ_spin,cc_list_vir_spin) + + cc_ref_is_open_shell = .False. + do i = 1, cc_nO_m + if (cc_list_occ_spin(i,1) /= cc_list_occ_spin(i,2)) then + cc_ref_is_open_shell = .True. + endif + enddo + + +END_PROVIDER diff --git a/devel/ccsd_gpu/phase.irp.f b/devel/ccsd_gpu/phase.irp.f new file mode 100644 index 0000000..e0703fb --- /dev/null +++ b/devel/ccsd_gpu/phase.irp.f @@ -0,0 +1,137 @@ +! phase + +subroutine get_phase_general(det1,det2,phase,degree,Nint) + implicit none + + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2), det2(Nint,2) + double precision, intent(out) :: phase + integer, intent(out) :: degree + integer :: n(2) + integer, allocatable :: list_anni(:,:), list_crea(:,:) + + allocate(list_anni(N_int*bit_kind_size,2)) + allocate(list_crea(N_int*bit_kind_size,2)) + + call get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,Nint) +end + +! Get excitation general + +subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,Nint) + + use bitmasks + + implicit none + + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2), det2(Nint,2) + double precision, intent(out) :: phase + integer, intent(out) :: list_crea(Nint*bit_kind_size,2) + integer, intent(out) :: list_anni(Nint*bit_kind_size,2) + integer, intent(out) :: degree, n(2) + + integer, allocatable :: l1(:,:), l2(:,:) + integer(bit_kind), allocatable :: det_crea(:,:), det_anni(:,:) + integer, allocatable :: pos_anni(:,:), pos_crea(:,:) + + integer :: n1(2),n2(2),n_crea(2),n_anni(2),i,j,k,d + + allocate(l1(Nint*bit_kind_size,2)) + allocate(l2(Nint*bit_kind_size,2)) + allocate(det_crea(Nint,2),det_anni(Nint,2)) + + ! 1 111010 + ! 2 110101 + ! + !not 1-> 000101 + ! 2 110101 + !and 000101 -> crea + ! + ! 1 111010 + !not 2-> 001010 + ! 001010 -> anni + + do j = 1, 2 + do i = 1, Nint + det_crea(i,j) = iand(not(det1(i,j)),det2(i,j)) + enddo + enddo + + do j = 1, 2 + do i = 1, Nint + det_anni(i,j) = iand(det1(i,j),not(det2(i,j))) + enddo + enddo + + call bitstring_to_list_ab(det1,l1,n1,Nint) + call bitstring_to_list_ab(det2,l2,n2,Nint) + call bitstring_to_list_ab(det_crea,list_crea,n_crea,Nint) + call bitstring_to_list_ab(det_anni,list_anni,n_anni,Nint) + + do i = 1, 2 + if (n_crea(i) /= n_anni(i)) then + print*,'Well, it seems we have a problem here...' + call abort + endif + enddo + + !1 11110011001 1 2 3 4 7 8 11 + !pos 1 2 3 4 5 6 7 + !2 11100101011 1 2 3 6 8 10 11 + !anni 00010010000 4 7 + !pos 4 5 + !crea 00000100010 6 10 + !pos 4 6 + !4 -> 6 pos(4 -> 4) + !7 -> 10 pos(5 -> 6) + + n = n_anni + degree = n_anni(1) + n_anni(2) + + allocate(pos_anni(max(n(1),n(2)),2)) + allocate(pos_crea(max(n(1),n(2)),2)) + + ! Search pos anni + do j = 1, 2 + k = 1 + do i = 1, n1(j) + if (k > n_anni(j)) exit + if (l1(i,j) /= list_anni(k,j)) cycle + pos_anni(k,j) = i + k = k + 1 + enddo + enddo + + ! Search pos crea + do j = 1, 2 + k = 1 + do i = 1, n2(j) + if (k > n_crea(j)) exit + if (l2(i,j) /= list_crea(k,j)) cycle + pos_crea(k,j) = i + k = k + 1 + enddo + enddo + + ! Distance between the ith anni and the ith crea op + ! By doing so there is no crossing between the different pairs of anni/crea + ! and the phase is determined by the sum of the distances + ! -> (-1)^{sum of the distances} + d = 0 + do j = 1, 2 + do i = 1, n(j) + d = d + abs(pos_anni(i,j) - pos_crea(i,j)) + enddo + enddo + + phase = dble((-1)**d) + + ! Debug + !print*,l2(1:n2(1),1) + !print*,l2(1:n2(2),2) + !!call print_det(det1,Nint) + !!call print_det(det2,Nint) + !print*,phase + !print*,'' +end diff --git a/devel/ccsd_gpu/update_t.irp.f b/devel/ccsd_gpu/update_t.irp.f new file mode 100644 index 0000000..0cf8626 --- /dev/null +++ b/devel/ccsd_gpu/update_t.irp.f @@ -0,0 +1,73 @@ +! T1 + +subroutine update_t1(nO,nV,f_o,f_v,r1,t1) + + implicit none + + BEGIN_DOC + ! Update the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r1(nO, nV) + + ! inout + double precision, intent(inout) :: t1(nO, nV) + + ! internal + integer :: i,a + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,t1,r1,cc_level_shift,f_o,f_v) & + !$OMP PRIVATE(i,a) & + !$OMP DEFAULT(NONE) + !$OMP DO + do a = 1, nV + do i = 1, nO + t1(i,a) = t1(i,a) - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! T2 + +subroutine update_t2(nO,nV,f_o,f_v,r2,t2) + + implicit none + + BEGIN_DOC + ! Update the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r2(nO, nO, nV, nV) + + ! inout + double precision, intent(inout) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,t2,r2,cc_level_shift,f_o,f_v) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + t2(i,j,a,b) = t2(i,j,a,b) - r2(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end