9
1
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-07-11 05:33:34 +02:00

Merge branch 'dev-stable' of github.com:quantumpackage/qp2 into dev-stable

Conflicts:
	src/ao_two_e_ints/cholesky.irp.f
	src/mo_two_e_ints/cholesky.irp.f
This commit is contained in:
Anthony Scemama 2023-05-26 11:49:51 +02:00
commit d2494ac323
10 changed files with 1029 additions and 404 deletions

View File

@ -11,6 +11,12 @@ interface: ezfio,provider,ocaml
default: 1.e-15
ezfio_name: threshold_ao
[ao_cholesky_threshold]
type: Threshold
doc: If | (ii|jj) | < `ao_cholesky_threshold` then (ii|jj) is zero
interface: ezfio,provider,ocaml
default: 1.e-12
[do_direct_integrals]
type: logical
doc: Compute integrals on the fly (very slow, only for debugging)

View File

@ -4,29 +4,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ]
! Number of Cholesky vectors in AO basis
END_DOC
integer :: i,j,k,l
double precision :: xnorm0, x, integral
double precision, external :: ao_two_e_integral
cholesky_ao_num_guess = 0
xnorm0 = 0.d0
x = 0.d0
do j=1,ao_num
do i=1,ao_num
integral = ao_two_e_integral(i,i,j,j)
if (integral > ao_integrals_threshold) then
cholesky_ao_num_guess += 1
else
x += integral
endif
enddo
enddo
print *, 'Cholesky decomposition of AO integrals'
print *, '--------------------------------------'
print *, ''
print *, 'Estimated Error: ', x
print *, 'Guess size: ', cholesky_ao_num_guess, '(', 100.d0*dble(cholesky_ao_num_guess)/dble(ao_num*ao_num), ' %)'
cholesky_ao_num_guess = ao_num*ao_num / 2
END_PROVIDER
BEGIN_PROVIDER [ integer, cholesky_ao_num ]
@ -39,7 +17,7 @@ END_PROVIDER
END_DOC
type(c_ptr) :: ptr
integer :: fd, i,j,k,l, rank
integer :: fd, i,j,k,l,m,rank
double precision, pointer :: ao_integrals(:,:,:,:)
double precision, external :: ao_two_e_integral
@ -49,49 +27,90 @@ END_PROVIDER
8, fd, .False., ptr)
call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/))
double precision :: integral
print*, 'Providing the AO integrals (Cholesky)'
call wall_time(wall_1)
call cpu_time(cpu_1)
ao_integrals = 0.d0
double precision :: integral, cpu_1, cpu_2, wall_1, wall_2
logical, external :: ao_two_e_integral_zero
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral)
do l=1,ao_num
double precision, external :: get_ao_two_e_integral
if (read_ao_two_e_integrals) then
PROVIDE ao_two_e_integrals_in_map
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2)
do m=0,9
do l=1+m,ao_num,10
!$OMP DO SCHEDULE(dynamic)
do j=1,l
do k=1,ao_num
do i=1,k
do i=1,min(k,j)
if (ao_two_e_integral_zero(i,j,k,l)) cycle
integral = ao_two_e_integral(i,k,j,l)
integral = get_ao_two_e_integral(i,j,k,l, ao_integrals_map)
ao_integrals(i,k,j,l) = integral
ao_integrals(k,i,j,l) = integral
ao_integrals(i,k,l,j) = integral
ao_integrals(k,i,l,j) = integral
ao_integrals(j,l,i,k) = integral
ao_integrals(j,l,k,i) = integral
ao_integrals(l,j,i,k) = integral
ao_integrals(l,j,k,i) = integral
enddo
enddo
enddo
!$OMP END DO NOWAIT
enddo
!$OMP MASTER
call wall_time(wall_2)
print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1
!$OMP END MASTER
enddo
!$OMP END PARALLEL
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral)
do l=1,ao_num
else
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2)
do m=0,9
do l=1+m,ao_num,10
!$OMP DO SCHEDULE(dynamic)
do j=1,l
do k=1,ao_num
do i=1,k
do i=1,min(k,j)
if (ao_two_e_integral_zero(i,j,k,l)) cycle
integral = ao_two_e_integral(i,k,j,l)
ao_integrals(i,k,j,l) = integral
ao_integrals(k,i,j,l) = integral
ao_integrals(i,k,l,j) = integral
ao_integrals(k,i,l,j) = integral
ao_integrals(j,l,i,k) = integral
ao_integrals(j,l,k,i) = integral
ao_integrals(l,j,i,k) = integral
ao_integrals(l,j,k,i) = integral
enddo
enddo
enddo
!$OMP END DO NOWAIT
enddo
!$OMP MASTER
call wall_time(wall_2)
print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1
!$OMP END MASTER
enddo
!$OMP END PARALLEL
call wall_time(wall_2)
call cpu_time(cpu_2)
print*, 'AO integrals provided:'
print*, ' cpu time :',cpu_2 - cpu_1, 's'
print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )'
endif
! Call Lapack
cholesky_ao_num = cholesky_ao_num_guess
call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_integrals_threshold, ao_num*ao_num, cholesky_ao)
call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao)
print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)'
! Remove mmap

View File

@ -16,20 +16,16 @@ subroutine run_ccsd_space_orb
double precision, allocatable :: all_err(:,:), all_t(:,:)
integer, allocatable :: list_occ(:), list_vir(:)
integer(bit_kind) :: det(N_int,2)
integer :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4)
integer :: nO, nV, nOa, nVa
PROVIDE mo_two_e_integrals_in_map
! PROVIDE mo_two_e_integrals_in_map
det = psi_det(:,:,cc_ref)
print*,'Reference determinant:'
call print_det(det,N_int)
! Extract number of occ/vir alpha/beta spin orbitals
!call extract_n_spin(det,n_spin)
nOa = cc_nOa !n_spin(1)
nOb = cc_nOb !n_spin(2)
nVa = cc_nVa !n_spin(3)
nVb = cc_nVb !n_spin(4)
nOa = cc_nOa
nVa = cc_nVa
! Check that the reference is a closed shell determinant
if (cc_ref_is_open_shell) then
@ -169,8 +165,13 @@ subroutine run_ccsd_space_orb
! New
print*,'Computing (T) correction...'
call wall_time(ta)
call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v &
! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v &
! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t)
e_t = uncorr_energy + energy ! For print in next call
call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v &
,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t)
call wall_time(tb)
print*,'Time: ',tb-ta, ' s'
@ -763,7 +764,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
! internal
double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:)
double precision, allocatable :: A1(:,:,:,:), B1(:,:,:,:)
double precision, allocatable :: A1(:,:,:,:), B1_gam(:,:,:)
integer :: u,v,i,j,beta,gam,a,b
allocate(g_occ(nO,nO), g_vir(nV,nV))
@ -833,13 +834,18 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
! enddo
!enddo
allocate(B1(nV,nV,nV,nV))
call compute_B1(nO,nV,t1,t2,B1)
call dgemm('N','N',nO*nO,nV*nV,nV*nV, &
! allocate(B1(nV,nV,nV,nV))
! call compute_B1(nO,nV,t1,t2,B1)
allocate(B1_gam(nV,nV,nV))
do gam=1,nV
call compute_B1_gam(nO,nV,t1,t2,B1_gam,gam)
call dgemm('N','N',nO*nO,nV,nV*nV, &
1d0, tau, size(tau,1) * size(tau,2), &
B1 , size(B1,1) * size(B1,2), &
1d0, r2, size(r2,1) * size(r2,2))
deallocate(B1)
B1_gam , size(B1_gam,1) * size(B1_gam,2), &
1d0, r2(1,1,1,gam), size(r2,1) * size(r2,2))
enddo
deallocate(B1_gam)
!do gam = 1, nV
! do beta = 1, nV
@ -1511,6 +1517,90 @@ end
! B1
subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam)
implicit none
integer, intent(in) :: nO,nV,gam
double precision, intent(in) :: t1(nO, nV)
double precision, intent(in) :: t2(nO, nO, nV, nV)
double precision, intent(out) :: B1(nV, nV, nV)
integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta
! do beta = 1, nV
! do b = 1, nV
! do a = 1, nV
! B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam)
!
! do i = 1, nO
! B1(a,b,beta) = B1(a,b,beta) &
! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) &
! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta)
! enddo
!
! enddo
! enddo
! enddo
double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:)
allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV))
! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam)
!$omp parallel &
!$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) &
!$omp private(a,b,beta) &
!$omp default(none)
!$omp do
do beta = 1, nV
do b = 1, nV
do a = 1, nV
B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam)
enddo
enddo
enddo
!$omp end do nowait
do i = 1, nO
!$omp do
do b = 1, nV
do a = 1, nV
X_vvvo(a,b,i) = cc_space_v_vvov(a,b,i,gam)
enddo
enddo
!$omp end do nowait
enddo
!$omp end parallel
! ! B1(a,b,beta) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) &
call dgemm('N','N', nV*nV*nV, 1, nO, &
-1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), &
t1(1,gam), size(t1,1), &
1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3))
! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta)
call dgemm('N','N', nV*nV, nV, nO, &
-1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2), &
t1 , size(t1,1), &
0d0, Y_vvvv, size(Y_vvvv,1) * size(Y_vvvv,2))
!$omp parallel &
!$omp shared(nV,B1,Y_vvvv,gam) &
!$omp private(a,b,beta) &
!$omp default(none)
!$omp do
do beta = 1, nV
do b = 1, nV
do a = 1, nV
B1(a,b,beta) = B1(a,b,beta) + Y_vvvv(a,b,beta)
enddo
enddo
enddo
!$omp end do
!$omp end parallel
deallocate(X_vvvo,Y_vvvv)
end
subroutine compute_B1(nO,nV,t1,t2,B1)
implicit none

View File

@ -10,23 +10,16 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO)
double precision, intent(out) :: energy
double precision, allocatable :: W(:,:,:,:,:,:)
double precision, allocatable :: V(:,:,:,:,:,:)
double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:)
double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:)
double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:)
double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:)
double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:)
double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:)
integer :: i,j,k,l,a,b,c,d
double precision :: e,ta,tb, delta, delta_abc, x1, x2, x3
double precision :: e,ta,tb
call set_multiple_levels_omp(.False.)
allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV))
allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV))
call set_multiple_levels_omp(.False.)
! Temporary arrays
!$OMP PARALLEL &
!$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, &
!$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) &
@ -103,25 +96,70 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
!$OMP END PARALLEL
energy = 0d0
!$OMP PARALLEL &
!$OMP PRIVATE(a,b,c,x1) &
!$OMP PRIVATE(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, &
!$OMP V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) &
!$OMP PRIVATE(i,j,k,e,delta,delta_abc) &
!$OMP DEFAULT(SHARED)
allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), &
W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), &
V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), &
V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) )
double precision, external :: ccsd_t_task_aba
double precision, external :: ccsd_t_task_abc
!$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED)
e = 0d0
!$OMP DO SCHEDULE(dynamic)
do a = 1, nV
do b = a+1, nV
do c = b+1, nV
delta_abc = f_v(a) + f_v(b) + f_v(c)
e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, &
X_ooov,X_oovv,X_vovv,f_o,f_v)
enddo
e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, &
X_ooov,X_oovv,X_vovv,f_o,f_v)
e = e + ccsd_t_task_aba(b,a,nO,nV,t1,T_oovv,T_voov, &
X_ooov,X_oovv,X_vovv,f_o,f_v)
enddo
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
energy = energy + e
!$OMP END CRITICAL
!$OMP END PARALLEL
energy = energy / 3.d0
deallocate(X_vovv,X_ooov,T_voov,T_oovv)
end
double precision function ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,&
X_ooov,X_oovv,X_vovv,f_o,f_v) result(e)
implicit none
integer, intent(in) :: nO,nV,a,b,c
double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV)
double precision, intent(in) :: X_oovv(nO,nO,nV,nV)
double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV)
double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV)
double precision :: delta, delta_abc
integer :: i,j,k
double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:)
double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:)
double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:)
double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:)
allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), &
W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), &
V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), &
V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) )
call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb)
call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb)
delta_abc = f_v(a) + f_v(b) + f_v(c)
e = 0.d0
do k = 1, nO
do j = 1, nO
do i = 1, nO
@ -139,44 +177,58 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
enddo
enddo
enddo
enddo
enddo
c = a
do b = 1, nV
if (b == c) cycle
delta_abc = f_v(a) + f_v(b) + f_v(c)
call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb)
call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb)
do k = 1, nO
do j = 1, nO
do i = 1, nO
delta = 1.0d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc)
e = e + delta * ( &
(4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + &
(4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + &
(4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) )
enddo
enddo
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
energy = energy + e
!$OMP END CRITICAL
deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, &
V_abc, V_cab, V_bca, V_bac, V_cba, V_acb )
!$OMP END PARALLEL
energy = energy / 3.d0
deallocate(X_vovv,X_ooov,T_voov,T_oovv)
end
double precision function ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,&
X_ooov,X_oovv,X_vovv,f_o,f_v) result(e)
implicit none
integer, intent(in) :: nO,nV,a,b
double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV)
double precision, intent(in) :: X_oovv(nO,nO,nV,nV)
double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV)
double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV)
double precision :: delta, delta_abc
integer :: i,j,k
double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:)
double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:)
double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:)
double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:)
allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), &
W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), &
V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), &
V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) )
call form_w_abc(nO,nV,a,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb)
call form_v_abc(nO,nV,a,b,a,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb)
delta_abc = f_v(a) + f_v(b) + f_v(a)
e = 0.d0
do k = 1, nO
do j = 1, nO
do i = 1, nO
delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc)
e = e + delta * ( &
(4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + &
(4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + &
(4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) )
enddo
enddo
enddo
deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, &
V_abc, V_cab, V_bca, V_bac, V_cba, V_acb )
end
subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb)

View File

@ -0,0 +1,363 @@
! Main
subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
implicit none
integer, intent(in) :: nO,nV
double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV)
double precision, intent(in) :: t2(nO,nO,nV,nV)
double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO)
double precision, intent(inout) :: energy
double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:)
double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:)
integer :: i,j,k,l,a,b,c,d
double precision :: e,ta,tb,eccsd
eccsd = energy
call set_multiple_levels_omp(.False.)
allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV))
allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV))
!$OMP PARALLEL &
!$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, &
!$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) &
!$OMP PRIVATE(a,b,c,d,i,j,k,l) &
!$OMP DEFAULT(NONE)
!v_vvvo(b,a,d,i) * t2(k,j,c,d) &
!X_vovv(d,i,b,a,i) * T_voov(d,j,c,k)
!$OMP DO
do a = 1, nV
do b = 1, nV
do i = 1, nO
do d = 1, nV
X_vovv(d,i,b,a) = v_vvvo(b,a,d,i)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!$OMP DO
do c = 1, nV
do j = 1, nO
do k = 1, nO
do d = 1, nV
T_voov(d,k,j,c) = t2(k,j,c,d)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!v_vooo(c,j,k,l) * t2(i,l,a,b) &
!X_ooov(l,j,k,c) * T_oovv(l,i,a,b) &
!$OMP DO
do c = 1, nV
do k = 1, nO
do j = 1, nO
do l = 1, nO
X_ooov(l,j,k,c) = v_vooo(c,j,k,l)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!$OMP DO
do b = 1, nV
do a = 1, nV
do i = 1, nO
do l = 1, nO
T_oovv(l,i,a,b) = t2(i,l,a,b)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!X_oovv(j,k,b,c) * T1_vo(a,i) &
!$OMP DO
do c = 1, nV
do b = 1, nV
do k = 1, nO
do j = 1, nO
X_oovv(j,k,b,c) = v_vvoo(b,c,j,k)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!$OMP END PARALLEL
double precision, external :: ccsd_t_task_aba
double precision, external :: ccsd_t_task_abc
! logical, external :: omp_test_lock
double precision, allocatable :: memo(:), Pabc(:), waccu(:)
integer*8, allocatable :: sampled(:)
! integer(omp_lock_kind), allocatable :: lock(:)
integer*2 , allocatable :: abc(:,:)
integer*8 :: Nabc, i8
integer*8, allocatable :: iorder(:)
double precision :: eocc
double precision :: norm
integer :: kiter, isample
! Prepare table of triplets (a,b,c)
Nabc = (int(nV,8) * int(nV+1,8) * int(nV+2,8))/6_8 - nV
allocate (memo(Nabc), sampled(Nabc), Pabc(Nabc), waccu(Nabc))
allocate (abc(4,Nabc), iorder(Nabc)) !, lock(Nabc))
! eocc = 3.d0/dble(nO) * sum(f_o(1:nO))
Nabc = 0_8
do a = 1, nV
do b = a+1, nV
do c = b+1, nV
Nabc = Nabc + 1_8
Pabc(Nabc) = -1.d0/(f_v(a) + f_v(b) + f_v(c))
abc(1,Nabc) = a
abc(2,Nabc) = b
abc(3,Nabc) = c
enddo
Nabc = Nabc + 1_8
abc(1,Nabc) = a
abc(2,Nabc) = b
abc(3,Nabc) = a
Pabc(Nabc) = -1.d0/(2.d0*f_v(a) + f_v(b))
Nabc = Nabc + 1_8
abc(1,Nabc) = b
abc(2,Nabc) = a
abc(3,Nabc) = b
Pabc(Nabc) = -1.d0/(f_v(a) + 2.d0*f_v(b))
enddo
enddo
do i8=1,Nabc
iorder(i8) = i8
enddo
! Sort triplets in decreasing Pabc
call dsort_big(Pabc, iorder, Nabc)
! Normalize
norm = 0.d0
do i8=Nabc,1,-1
norm = norm + Pabc(i8)
enddo
norm = 1.d0/norm
do i8=1,Nabc
Pabc(i8) = Pabc(i8) * norm
enddo
call i8set_order_big(abc, iorder, Nabc)
! Cumulative distribution for sampling
waccu(Nabc) = 0.d0
do i8=Nabc-1,1,-1
waccu(i8) = waccu(i8+1) - Pabc(i8+1)
enddo
waccu(:) = waccu(:) + 1.d0
logical :: converged, do_comp
double precision :: eta, variance, error, sample
double precision :: t00, t01
integer*8 :: ieta, Ncomputed
integer*8, external :: binary_search
integer :: nbuckets
nbuckets = 100
double precision, allocatable :: wsum(:)
allocate(wsum(nbuckets))
converged = .False.
Ncomputed = 0_8
energy = 0.d0
variance = 0.d0
memo(:) = 0.d0
sampled(:) = -1_8
integer*8 :: ileft, iright, imin
ileft = 1_8
iright = Nabc
integer*8, allocatable :: bounds(:,:)
allocate (bounds(2,nbuckets))
do isample=1,nbuckets
eta = 1.d0/dble(nbuckets) * dble(isample)
ieta = binary_search(waccu,eta,Nabc,ileft,iright)
bounds(1,isample) = ileft
bounds(2,isample) = ieta
ileft = ieta+1
wsum(isample) = sum( Pabc(bounds(1,isample):bounds(2,isample) ) )
enddo
Pabc(:) = 1.d0/Pabc(:)
print '(A)', ''
print '(A)', ' +----------------------+--------------+----------+'
print '(A)', ' | E(CCSD(T)) | Error | % |'
print '(A)', ' +----------------------+--------------+----------+'
call wall_time(t00)
imin = 1_8
!$OMP PARALLEL &
!$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample) &
!$OMP DEFAULT(SHARED)
do kiter=1,Nabc
!$OMP MASTER
do while ((imin <= Nabc).and.(sampled(imin)>-1_8))
imin = imin+1
enddo
! Deterministic part
if (imin < Nabc) then
ieta=imin
sampled(ieta) = 0_8
a = abc(1,ieta)
b = abc(2,ieta)
c = abc(3,ieta)
Ncomputed += 1_8
!$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta)
if (a/=c) then
memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, &
X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0
else
memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, &
X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0
endif
!$OMP END TASK
endif
! Stochastic part
call random_number(eta)
do isample=1,nbuckets
if (imin >= bounds(2,isample)) then
cycle
endif
ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc)
if (sampled(ieta) == -1_8) then
sampled(ieta) = 0_8
a = abc(1,ieta)
b = abc(2,ieta)
c = abc(3,ieta)
Ncomputed += 1_8
!$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta)
if (a/=c) then
memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, &
X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0
else
memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, &
X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0
endif
!$OMP END TASK
endif
sampled(ieta) = sampled(ieta)+1_8
enddo
call wall_time(t01)
if ((t01-t00 > 1.0d0).or.(imin >= Nabc)) then
t00 = t01
!$OMP TASKWAIT
double precision :: ET, ET2
double precision :: energy_stoch, energy_det
double precision :: scale
double precision :: w
double precision :: tmp
energy_stoch = 0.d0
energy_det = 0.d0
norm = 0.d0
scale = 1.d0
ET = 0.d0
ET2 = 0.d0
do isample=1,nbuckets
if (imin >= bounds(2,isample)) then
energy_det = energy_det + sum(memo(bounds(1,isample):bounds(2,isample)))
scale = scale - wsum(isample)
else
exit
endif
enddo
do ieta=bounds(1,isample), Nabc
w = dble(max(sampled(ieta),0_8))
tmp = w * memo(ieta) * Pabc(ieta)
ET = ET + tmp
ET2 = ET2 + tmp * memo(ieta) * Pabc(ieta)
norm = norm + w
enddo
norm = norm/scale
if (norm > 0.d0) then
energy_stoch = ET / norm
variance = ET2 / norm - energy_stoch*energy_stoch
endif
energy = energy_det + energy_stoch
print '('' | '',F20.8, '' | '', E12.4,'' | '', F8.2,'' |'')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc)
endif
!$OMP END MASTER
if (imin >= Nabc) exit
enddo
!$OMP END PARALLEL
print '(A)', ' +----------------------+--------------+----------+'
print '(A)', ''
deallocate(X_vovv,X_ooov,T_voov,T_oovv)
end
integer*8 function binary_search(arr, key, size)
implicit none
BEGIN_DOC
! Searches the key in array arr(1:size) between l_in and r_in, and returns its index
END_DOC
integer*8 :: size, i, j, mid, l_in, r_in
double precision, dimension(size) :: arr(1:size)
double precision :: key
i = 1_8
j = size
do while (j >= i)
mid = i + (j - i) / 2
if (arr(mid) >= key) then
if (mid > 1 .and. arr(mid - 1) < key) then
binary_search = mid
return
end if
j = mid - 1
else if (arr(mid) < key) then
i = mid + 1
else
binary_search = mid + 1
return
end if
end do
binary_search = i
end function binary_search

View File

@ -7,11 +7,41 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num
integer :: k
call set_multiple_levels_omp(.False.)
print *, 'AO->MO Transformation of Cholesky vectors'
!$OMP PARALLEL DO PRIVATE(k)
do k=1,cholesky_ao_num
call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num)
enddo
!$OMP END PARALLEL DO
print *, ''
END_PROVIDER
BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, mo_num) ]
implicit none
BEGIN_DOC
! Cholesky vectors in MO basis
END_DOC
integer :: i,j,k
double precision, allocatable :: buffer(:,:)
print *, 'AO->MO Transformation of Cholesky vectors .'
!$OMP PARALLEL PRIVATE(i,j,k,buffer)
allocate(buffer(mo_num,mo_num))
!$OMP DO SCHEDULE(static)
do k=1,cholesky_ao_num
call ao_to_mo(cholesky_ao(1,1,k),ao_num,buffer,mo_num)
do j=1,mo_num
do i=1,mo_num
cholesky_mo_transp(k,i,j) = buffer(i,j)
enddo
enddo
enddo
!$OMP END DO
deallocate(buffer)
!$OMP END PARALLEL
print *, ''
END_PROVIDER

View File

@ -4,12 +4,54 @@
BEGIN_DOC
! big_array_coulomb_integrals(j,i,k) = <ij|kj> = (ik|jj)
!
! big_array_exchange_integrals(i,j,k) = <ij|jk> = (ij|kj)
! big_array_exchange_integrals(j,i,k) = <ij|jk> = (ij|kj)
END_DOC
integer :: i,j,k,l
integer :: i,j,k,l,a
double precision :: get_two_e_integral
double precision :: integral
if (do_ao_cholesky) then
double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:)
allocate(buffer_jj(cholesky_ao_num,mo_num), buffer(mo_num,mo_num,mo_num))
do j=1,mo_num
buffer_jj(:,j) = cholesky_mo_transp(:,j,j)
enddo
call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_ao_num, 1.d0, &
cholesky_mo_transp, cholesky_ao_num, &
buffer_jj, cholesky_ao_num, 0.d0, &
buffer, mo_num*mo_num)
do k = 1, mo_num
do i = 1, mo_num
do j = 1, mo_num
big_array_coulomb_integrals(j,i,k) = buffer(i,k,j)
enddo
enddo
enddo
deallocate(buffer_jj)
allocate(buffer_jj(mo_num,mo_num))
do j = 1, mo_num
call dgemm('T','N',mo_num,mo_num,cholesky_ao_num, 1.d0, &
cholesky_mo_transp(1,1,j), cholesky_ao_num, &
cholesky_mo_transp(1,1,j), cholesky_ao_num, 0.d0, &
buffer_jj, mo_num)
do k=1,mo_num
do i=1,mo_num
big_array_exchange_integrals(j,i,k) = buffer_jj(i,k)
enddo
enddo
enddo
deallocate(buffer_jj)
else
do k = 1, mo_num
do i = 1, mo_num
do j = 1, mo_num
@ -23,5 +65,7 @@
enddo
enddo
endif
END_PROVIDER

View File

@ -1353,14 +1353,29 @@ END_PROVIDER
integer :: i,j
double precision :: get_two_e_integral
PROVIDE mo_two_e_integrals_in_map
mo_two_e_integrals_jj = 0.d0
mo_two_e_integrals_jj_exchange = 0.d0
if (do_ao_cholesky) then
do j=1,mo_num
do i=1,mo_num
!TODO: use dgemm
mo_two_e_integrals_jj(i,j) = sum(cholesky_mo_transp(:,i,i)*cholesky_mo_transp(:,j,j))
mo_two_e_integrals_jj_exchange(i,j) = sum(cholesky_mo_transp(:,i,j)*cholesky_mo_transp(:,j,i))
enddo
enddo
else
do j=1,mo_num
do i=1,mo_num
mo_two_e_integrals_jj(i,j) = get_two_e_integral(i,j,i,j,mo_integrals_map)
mo_two_e_integrals_jj_exchange(i,j) = get_two_e_integral(i,j,j,i,mo_integrals_map)
enddo
enddo
endif
do j=1,mo_num
do i=1,mo_num
mo_two_e_integrals_jj_anti(i,j) = mo_two_e_integrals_jj(i,j) - mo_two_e_integrals_jj_exchange(i,j)
enddo
enddo

View File

@ -5,9 +5,8 @@ subroutine det_energy(det,energy)
integer(bit_kind), intent(in) :: det
double precision, intent(out) :: energy
double precision, external :: diag_H_mat_elem
call i_H_j(det,det,N_int,energy)
energy = energy + nuclear_repulsion
energy = diag_H_mat_elem(det,N_int) + nuclear_repulsion
end

View File

@ -45,61 +45,64 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v)
integer, intent(in) :: list1(n1),list2(n2),list3(n3),list4(n4)
double precision, intent(out) :: v(n1,n2,n3,n4)
integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4
double precision :: get_two_e_integral
PROVIDE mo_two_e_integrals_in_map
integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k
double precision, allocatable :: buffer(:,:,:)
!$OMP PARALLEL &
!$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_integrals_map) &
!$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4)&
!$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) &
!$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)&
!$OMP DEFAULT(NONE)
!$OMP DO collapse(3)
allocate(buffer(mo_num,mo_num,mo_num))
!$OMP DO
do i4 = 1, n4
do i3 = 1, n3
do i2 = 1, n2
do i1 = 1, n1
idx4 = list4(i4)
idx3 = list3(i3)
call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, &
cholesky_mo_transp, cholesky_ao_num, &
cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num)
do i2 = 1, n2
idx2 = list2(i2)
do i3 = 1, n3
idx3 = list3(i3)
do i1 = 1, n1
idx1 = list1(i1)
v(i1,i2,i3,i4) = get_two_e_integral(idx1,idx2,idx3,idx4,mo_integrals_map)
v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2)
enddo
enddo
enddo
enddo
!$OMP END DO
deallocate(buffer)
!$OMP END PARALLEL
end
! full
BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)]
implicit none
integer :: i,j,k,l
double precision :: get_two_e_integral
PROVIDE mo_two_e_integrals_in_map
integer :: i1,i2,i3,i4,k
double precision, allocatable :: buffer(:,:,:)
!$OMP PARALLEL &
!$OMP SHARED(cc_space_v,mo_num,mo_integrals_map) &
!$OMP PRIVATE(i,j,k,l) &
!$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) &
!$OMP PRIVATE(i1,i2,i3,i4,k,buffer)&
!$OMP DEFAULT(NONE)
!$OMP DO collapse(3)
do l = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num
do i = 1, mo_num
cc_space_v(i,j,k,l) = get_two_e_integral(i,j,k,l,mo_integrals_map)
allocate(buffer(mo_num,mo_num,mo_num))
!$OMP DO
do i4 = 1, mo_num
call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, &
cholesky_mo_transp, cholesky_ao_num, &
cholesky_mo_transp(1,1,i4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num)
do i2 = 1, mo_num
do i3 = 1, mo_num
do i1 = 1, mo_num
cc_space_v(i1,i2,i3,i4) = buffer(i1,i3,i2)
enddo
enddo
enddo
enddo
!$OMP END DO
deallocate(buffer)
!$OMP END PARALLEL
END_PROVIDER
@ -638,6 +641,7 @@ subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f)
integer :: i,j, idx_i,idx_j,i_shift,j_shift
integer :: tmp_i,tmp_j
integer :: si,sj,s
PROVIDE big_array_exchange_integrals big_array_coulomb_integrals
allocate(tmp_F(mo_num,mo_num))
@ -702,8 +706,10 @@ subroutine get_fock_matrix_spin(det,s,f)
s2 = 1
endif
PROVIDE big_array_coulomb_integrals big_array_exchange_integrals
!$OMP PARALLEL &
!$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals) &
!$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals,big_array_coulomb_integrals,big_array_exchange_integrals) &
!$OMP PRIVATE(p,q,ok,i,res)&
!$OMP DEFAULT(NONE)
!$OMP DO collapse(1)
@ -713,13 +719,14 @@ subroutine get_fock_matrix_spin(det,s,f)
do i = 1, mo_num
call apply_hole(det, s1, i, res, ok, N_int)
if (ok) then
f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q)
! f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q)
f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q) - big_array_exchange_integrals(i,p,q)
endif
enddo
do i = 1, mo_num
call apply_hole(det, s2, i, res, ok, N_int)
if (ok) then
f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i)
f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q)
endif
enddo
enddo