Compare commits

...

6 Commits

Author SHA1 Message Date
Anthony Scemama 0c6e6a1ca0 GPU acceleration 2023-07-16 13:52:11 +02:00
Anthony Scemama 7847ccc674 GPU acceleration 2023-07-16 13:48:58 +02:00
Anthony Scemama 5c1d3987a3 GPU acceleration 2023-07-16 13:47:44 +02:00
Anthony Scemama 8abf589c90 Working 2023-07-16 13:16:11 +02:00
Anthony Scemama 358002d70f Working 2023-07-16 11:42:42 +02:00
Anthony Scemama a9ee23aba4 C version OK 2023-07-16 09:54:58 +02:00
18 changed files with 6355 additions and 0 deletions

59
devel/ccsd_gpu/.gitignore vendored Normal file
View File

@ -0,0 +1,59 @@
IRPF90_temp/
IRPF90_man/
build.ninja
irpf90.make
ezfio_interface.irp.f
irpf90_entities
tags
Makefile
ao_basis
ao_one_e_ints
ao_two_e_erf_ints
ao_two_e_ints
aux_quantities
becke_numerical_grid
bitmask
cis
cisd
cipsi
davidson
davidson_dressed
davidson_undressed
density_for_dft
determinants
dft_keywords
dft_utils_in_r
dft_utils_one_e
dft_utils_two_body
dressing
dummy
electrons
ezfio_files
fci
generators_cas
generators_full
hartree_fock
iterations
kohn_sham
kohn_sham_rs
mo_basis
mo_guess
mo_one_e_ints
mo_two_e_erf_ints
mo_two_e_ints
mpi
mrpt_utils
nuclei
perturbation
pseudo
psiref_cas
psiref_utils
scf_utils
selectors_cassd
selectors_full
selectors_utils
single_ref_method
slave
tools
utils
zmq

View File

@ -0,0 +1,225 @@
#!/usr/bin/env bats
source $QP_ROOT/tests/bats/common.bats.sh
source $QP_ROOT/quantum_package.rc
function run() {
thresh1=1e-6
thresh2=1e-6
test_exe scf || skip
qp set_file $1
qp edit --check
#qp run scf
qp set_frozen_core
qp set utils_cc cc_par_t true
qp set utils_cc cc_thresh_conv 1e-12
file="$(echo $1 | sed 's/.ezfio//g')"
qp run ccsd_spin_orb | tee $file.ccsd.out
energy1="$(grep 'E(CCSD)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')"
energy2="$(grep 'E(T)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')"
#rm $file.ccsd.out
eq $energy1 $2 $thresh1
eq $energy2 $3 $thresh2
}
@test "b2_stretched" {
run b2_stretched.ezfio -49.136487344382 -0.003497589175
}
@test "be" {
run be.ezfio -14.623559003577 -0.000230982022
}
@test "c2h2" {
run c2h2.ezfio -12.394008897618 -0.010790491561
}
@test "ch4" {
run ch4.ezfio -40.390721785799 -0.004476100282
}
@test "clf" {
run clf.ezfio -559.186562904081 -0.006577143392
}
@test "clo" {
run clo.ezfio -534.564874409332 -0.007584571424
}
@test "co2" {
run co2.ezfio -188.129602527766 -0.018040668885
}
@test "dhno" {
run dhno.ezfio -130.816650109473 -0.012197331453
}
@test "f2" {
run f2.ezfio -199.287826338097 -0.017592872692
}
@test "f" {
run f.ezfio -99.616644511121 -0.003624525307
}
@test "h2o2" {
run h2o2.ezfio -151.182552729963 -0.009511682086
}
@test "h2o" {
run h2o.ezfio -76.237710276526 -0.003001800577
}
@test "h2s" {
run h2s.ezfio -398.861214015390 -0.003300559757
}
@test "h3coh" {
run h3coh.ezfio -115.221296424969 -0.003566171432
}
@test "hbo" {
run hbo.ezfio -100.213539770415 -0.006851489212
}
@test "hcn" {
run hcn.ezfio -93.190247992657 -0.013418135043
}
@test "hco" {
run hco.ezfio -113.405413962350 -0.007973455337
}
@test "lif" {
run lif.ezfio -107.270402903250 -0.007742969005
}
@test "n2" {
run n2.ezfio -109.355358930472 -0.018477744342
}
@test "n2h4" {
run n2h4.ezfio -111.556885923139 -0.009048077008
}
@test "nh3" {
run nh3.ezfio -56.465503060954 -0.007638273755
}
@test "oh" {
run oh.ezfio -75.614606132774 -0.004126661739
}
@test "sih2_3b1" {
run sih2_3b1.ezfio -290.016780973072 -0.000497825874
}
@test "sih3" {
run sih3.ezfio -5.575343504534 -0.002094123268
}
@test "so" {
run so.ezfio -26.035945178665 -0.010594351274
}
#@test "b2_stretched" {
#run b2_stretched.ezfio -49.136487344382 -49.139984933557
#}
#
#@test "be" {
#run be.ezfio -14.623559003577 -14.623789985599
#}
#
#@test "c2h2" {
#run c2h2.ezfio -12.394008897618 -12.404799389179
#}
#
#@test "ch4" {
#run ch4.ezfio -40.390721784961 -40.395197884406
#}
#
#@test "clf" {
#run clf.ezfio -559.186562906072 -559.193140046904
#}
#
#@test "clo" {
#run clo.ezfio -534.564874409333 -534.572458980757
#}
#
#@test "co2" {
#run co2.ezfio -188.129602511724 -188.147643198675
#}
#
#@test "dhno" {
#run dhno.ezfio -130.816650109473 -130.828847440925
#}
#
#@test "f2" {
#run f2.ezfio -199.287826338097 -199.305419210789
#}
#
#@test "f" {
#run f.ezfio -99.616644511120 -99.620269036428
#}
#
#@test "h2o2" {
#run h2o2.ezfio -151.182552729963 -151.192064412049
#}
#
#@test "h2o" {
#run h2o.ezfio -76.237710276526 -76.240712077103
#}
#
#@test "h2s" {
#run h2s.ezfio -398.861214015416 -398.864514575146
#}
#
#@test "h3coh" {
#run h3coh.ezfio -115.221296424969 -115.224862596401
#}
#
#@test "hbo" {
#run hbo.ezfio -100.213539770415 -100.220391259627
#}
#
#@test "hcn" {
#run hcn.ezfio -93.190247983000 -93.203666131216
#}
#
#@test "hco" {
#run hco.ezfio -113.405413962350 -113.413387417687
#}
#
#@test "lif" {
#run lif.ezfio -107.270402903211 -107.278145872216
#}
#
#@test "n2" {
#run n2.ezfio -109.355358930472 -109.373836674814
#}
#
#@test "n2h4" {
#run n2h4.ezfio -111.556885922642 -111.565934000556
#}
#
#@test "nh3" {
#run nh3.ezfio -56.465503060954 -56.473141334709
#}
#
#@test "oh" {
#run oh.ezfio -75.614606131897 -75.618732794235
#}
#
#@test "sih2_3b1" {
#run sih2_3b1.ezfio -290.016780973071 -290.017278798946
#}
#
#@test "sih3" {
#run sih3.ezfio -5.575343504534 -5.577437627802
#}
#
#@test "so" {
#run so.ezfio -26.035945181998 -26.046539528491
#}

View File

@ -0,0 +1,225 @@
#!/usr/bin/env bats
source $QP_ROOT/tests/bats/common.bats.sh
source $QP_ROOT/quantum_package.rc
function run() {
thresh1=1e-6
thresh2=1e-6
test_exe scf || skip
qp set_file $1
qp edit --check
#qp run scf
qp set_frozen_core
qp set utils_cc cc_par_t true
qp set utils_cc cc_thresh_conv 1e-12
file="$(echo $1 | sed 's/.ezfio//g')"
qp run ccsd_space_orb | tee $file.ccsd.out
energy1="$(grep 'E(CCSD)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')"
energy2="$(grep 'E(T)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')"
#rm $file.ccsd.out
eq $energy1 $2 $thresh1
eq $energy2 $3 $thresh2
}
@test "b2_stretched" {
run b2_stretched.ezfio -49.136487344382 -0.003497589175
}
@test "be" {
run be.ezfio -14.623559003577 -0.000230982022
}
@test "c2h2" {
run c2h2.ezfio -12.394008897618 -0.010790491561
}
@test "ch4" {
run ch4.ezfio -40.390721785799 -0.004476100282
}
@test "clf" {
run clf.ezfio -559.186562904081 -0.006577143392
}
#@test "clo" {
#run clo.ezfio -534.564874409332 -0.007584571424
#}
@test "co2" {
run co2.ezfio -188.129602527766 -0.018040668885
}
#@test "dhno" {
#run dhno.ezfio -130.816650109473 -0.012197331453
#}
@test "f2" {
run f2.ezfio -199.287826338097 -0.017592872692
}
#@test "f" {
#run f.ezfio -99.616644511121 -0.003624525307
#}
@test "h2o2" {
run h2o2.ezfio -151.182552729963 -0.009511682086
}
@test "h2o" {
run h2o.ezfio -76.237710276526 -0.003001800577
}
@test "h2s" {
run h2s.ezfio -398.861214015390 -0.003300559757
}
@test "h3coh" {
run h3coh.ezfio -115.221296424969 -0.003566171432
}
@test "hbo" {
run hbo.ezfio -100.213539770415 -0.006851489212
}
@test "hcn" {
run hcn.ezfio -93.190247992657 -0.013418135043
}
#@test "hco" {
#run hco.ezfio -113.405413962350 -0.007973455337
#}
@test "lif" {
run lif.ezfio -107.270402903250 -0.007742969005
}
@test "n2" {
run n2.ezfio -109.355358930472 -0.018477744342
}
@test "n2h4" {
run n2h4.ezfio -111.556885923139 -0.009048077008
}
@test "nh3" {
run nh3.ezfio -56.465503060954 -0.007638273755
}
#@test "oh" {
#run oh.ezfio -75.614606132774 -0.004126661739
#}
#@test "sih2_3b1" {
#run sih2_3b1.ezfio -290.016780973072 -0.000497825874
#}
#@test "sih3" {
#run sih3.ezfio -5.575343504534 -0.002094123268
#}
#@test "so" {
#run so.ezfio -26.035945178665 -0.010594351274
#}
#@test "b2_stretched" {
#run b2_stretched.ezfio -49.136487344382 -49.139984933557
#}
#
#@test "be" {
#run be.ezfio -14.623559003577 -14.623789985599
#}
#
#@test "c2h2" {
#run c2h2.ezfio -12.394008897618 -12.404799389179
#}
#
#@test "ch4" {
#run ch4.ezfio -40.390721784961 -40.395197884406
#}
#
#@test "clf" {
#run clf.ezfio -559.186562906072 -559.193140046904
#}
#
##@test "clo" {
##run clo.ezfio -534.564874409333 -534.572458980757
##}
#
#@test "co2" {
#run co2.ezfio -188.129602511724 -188.147643198675
#}
#
##@test "dhno" {
##run dhno.ezfio -130.816650109473 -130.828847440925
##}
#
#@test "f2" {
#run f2.ezfio -199.287826338097 -199.305419210789
#}
#
##@test "f" {
##run f.ezfio -99.616644511120 -99.620269036428
##}
#
#@test "h2o2" {
#run h2o2.ezfio -151.182552729963 -151.192064412049
#}
#
#@test "h2o" {
#run h2o.ezfio -76.237710276526 -76.240712077103
#}
#
#@test "h2s" {
#run h2s.ezfio -398.861214015416 -398.864514575146
#}
#
#@test "h3coh" {
#run h3coh.ezfio -115.221296424969 -115.224862596401
#}
#
#@test "hbo" {
#run hbo.ezfio -100.213539770415 -100.220391259627
#}
#
#@test "hcn" {
#run hcn.ezfio -93.190247983000 -93.203666131216
#}
#
##@test "hco" {
##run hco.ezfio -113.405413962350 -113.413387417687
##}
#
#@test "lif" {
#run lif.ezfio -107.270402903211 -107.278145872216
#}
#
#@test "n2" {
#run n2.ezfio -109.355358930472 -109.373836674814
#}
#
#@test "n2h4" {
#run n2h4.ezfio -111.556885922642 -111.565934000556
#}
#
#@test "nh3" {
#run nh3.ezfio -56.465503060954 -56.473141334709
#}
#
##@test "oh" {
##run oh.ezfio -75.614606131897 -75.618732794235
##}
#
##@test "sih2_3b1" {
##run sih2_3b1.ezfio -290.016780973071 -290.017278798946
##}
#
##@test "sih3" {
##run sih3.ezfio -5.575343504534 -5.577437627802
##}
#
##@test "so" {
##run so.ezfio -26.035945181998 -26.046539528491
##}

11
devel/ccsd_gpu/EZFIO.cfg Normal file
View File

@ -0,0 +1,11 @@
[energy]
type: double precision
doc: CCSD energy
interface: ezfio
[energy_t]
type: double precision
doc: CCSD(T) energy
interface: ezfio

1
devel/ccsd_gpu/LIB Normal file
View File

@ -0,0 +1 @@
-lcudart -lcublas -lcublasLt

2
devel/ccsd_gpu/NEED Normal file
View File

@ -0,0 +1,2 @@
hartree_fock
utils_cc_gpu

31
devel/ccsd_gpu/README.md Normal file
View File

@ -0,0 +1,31 @@
# CCSD in spin orbitals and spatial orbitals
CCSD and CCSD(T) in spin orbitals for open and closed shell systems.
CCSD and CCSD(T) in spatial orbitals for closed shell systems.
## Calculations
The program will automatically choose the version in spin or spatial orbitals
To run the general program:
```
qp run ccsd
```
Nevertheless, you can enforce the run in spin orbitals with
```
qp run ccsd_spin_orb
```
## Settings
The settings can be changed with:
```
qp set utils_cc cc_#param #val
```
For more informations on the settings, look at the module utils_cc and its documentation.
## Org files
The org files are stored in the directory org in order to avoid overwriting on user changes.
The org files can be modified, to export the change to the source code, run
```
./TANGLE_org_mode.sh and
mv *.irp.f ../.
```

View File

@ -0,0 +1,4 @@
========
ccsd_gpu
========

View File

@ -0,0 +1,18 @@
program ccsd
implicit none
BEGIN_DOC
! CCSD program
END_DOC
read_wf = .True.
touch read_wf
if (.not. cc_ref_is_open_shell) then
call run_ccsd_space_orb
else
stop 'Not implemented'
endif
end

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,513 @@
! Dumb way
subroutine ccsd_par_t_space(nO,nV,t1,t2,energy)
implicit none
integer, intent(in) :: nO,nV
double precision, intent(in) :: t1(nO, nV)
double precision, intent(in) :: t2(nO, nO, nV, nV)
double precision, intent(out) :: energy
double precision, allocatable :: W(:,:,:,:,:,:)
double precision, allocatable :: V(:,:,:,:,:,:)
integer :: i,j,k,a,b,c
allocate(W(nO,nO,nO,nV,nV,nV))
allocate(V(nO,nO,nO,nV,nV,nV))
call form_w(nO,nV,t2,W)
call form_v(nO,nV,t1,W,V)
energy = 0d0
do c = 1, nV
do b = 1, nV
do a = 1, nV
do k = 1, nO
do j = 1, nO
do i = 1, nO
energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c)
enddo
enddo
enddo
enddo
enddo
enddo
energy = energy / 3d0
deallocate(V,W)
end
subroutine form_w(nO,nV,t2,W)
implicit none
integer, intent(in) :: nO,nV
double precision, intent(in) :: t2(nO, nO, nV, nV)
double precision, intent(out) :: W(nO, nO, nO, nV, nV, nV)
integer :: i,j,k,l,a,b,c,d
W = 0d0
do c = 1, nV
print*,'W:',c,'/',nV
do b = 1, nV
do a = 1, nV
do k = 1, nO
do j = 1, nO
do i = 1, nO
do d = 1, nV
W(i,j,k,a,b,c) = W(i,j,k,a,b,c) &
! chem (bd|ai)
! phys <ba|di>
+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) &
+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj
+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik
+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij
+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj
+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik
enddo
do l = 1, nO
W(i,j,k,a,b,c) = W(i,j,k,a,b,c) &
! chem (ck|jl)
! phys <cj|kl>
- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) &
- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj
- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik
- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij
- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj
- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik
enddo
enddo
enddo
enddo
enddo
enddo
enddo
end
subroutine form_v(nO,nV,t1,w,v)
implicit none
integer, intent(in) :: nO,nV
double precision, intent(in) :: t1(nO, nV)
double precision, intent(in) :: W(nO, nO, nO, nV, nV, nV)
double precision, intent(out) :: V(nO, nO, nO, nV, nV, nV)
integer :: i,j,k,a,b,c
V = 0d0
do c = 1, nV
do b = 1, nV
do a = 1, nV
do k = 1, nO
do j = 1, nO
do i = 1, nO
V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) &
+ cc_space_v_vvoo(b,c,j,k) * t1(i,a) &
+ cc_space_v_vvoo(a,c,i,k) * t1(j,b) &
+ cc_space_v_vvoo(a,b,i,j) * t1(k,c)
enddo
enddo
enddo
enddo
enddo
enddo
end
! Main
subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
implicit none
integer, intent(in) :: nO,nV
double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV)
double precision, intent(in) :: t2(nO,nO,nV,nV)
double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO)
double precision, intent(out) :: energy
double precision, allocatable :: W(:,:,:,:,:,:)
double precision, allocatable :: V(:,:,:,:,:,:)
double precision, allocatable :: W_ijk(:,:,:), V_ijk(:,:,:)
double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:)
double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:)
integer :: i,j,k,l,a,b,c,d
double precision :: e,ta,tb, delta, delta_ijk
!allocate(W(nV,nV,nV,nO,nO,nO))
!allocate(V(nV,nV,nV,nO,nO,nO))
allocate(W_ijk(nV,nV,nV), V_ijk(nV,nV,nV))
allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO))
allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO))
! Temporary arrays
!$OMP PARALLEL &
!$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, &
!$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) &
!$OMP PRIVATE(a,b,c,d,i,j,k,l) &
!$OMP DEFAULT(NONE)
!v_vvvo(b,a,d,i) * t2(k,j,c,d) &
!X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j)
!$OMP DO collapse(3)
do i = 1, nO
do a = 1, nV
do b = 1, nV
do d = 1, nV
X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!$OMP DO collapse(3)
do j = 1, nO
do k = 1, nO
do c = 1, nV
do d = 1, nV
T_vvoo(d,c,k,j) = t2(k,j,c,d)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!v_vooo(c,j,k,l) * t2(i,l,a,b) &
!X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) &
!$OMP DO collapse(3)
do k = 1, nO
do j = 1, nO
do c = 1, nV
do l = 1, nO
X_ovoo(l,c,j,k) = v_vooo(c,j,k,l)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!$OMP DO collapse(3)
do i = 1, nO
do b = 1, nV
do a = 1, nV
do l = 1, nO
T_ovvo(l,a,b,i) = t2(i,l,a,b)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!v_vvoo(b,c,j,k) * t1(i,a) &
!X_vvoo(b,c,k,j) * T1_vo(a,i) &
!$OMP DO collapse(3)
do j = 1, nO
do k = 1, nO
do c = 1, nV
do b = 1, nV
X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!$OMP DO collapse(1)
do i = 1, nO
do a = 1, nV
T_vo(a,i) = t1(i,a)
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call wall_time(ta)
energy = 0d0
do i = 1, nO
do j = 1, nO
do k = 1, nO
delta_ijk = f_o(i) + f_o(j) + f_o(k)
call form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_ijk)
call form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,W_ijk,V_ijk)
!$OMP PARALLEL &
!$OMP SHARED(energy,nV,i,j,k,W_ijk,V_ijk,f_o,f_v,delta_ijk) &
!$OMP PRIVATE(a,b,c,e,delta) &
!$OMP DEFAULT(NONE)
e = 0d0
!$OMP DO
do c = 1, nV
do b = 1, nV
do a = 1, nV
delta = 1d0 / (delta_ijk - f_v(a) - f_v(b) - f_v(c))
!energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c)
e = e + (4d0 * W_ijk(a,b,c) + W_ijk(b,c,a) + W_ijk(c,a,b)) &
* (V_ijk(a,b,c) - V_ijk(c,b,a)) * delta
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
energy = energy + e
!$OMP END CRITICAL
!$OMP END PARALLEL
enddo
enddo
call wall_time(tb)
write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s'
enddo
energy = energy / 3d0
deallocate(W_ijk,V_ijk,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo)
!deallocate(V,W)
end
! W_ijk
subroutine form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W)
implicit none
integer, intent(in) :: nO,nV,i,j,k
!double precision, intent(in) :: t2(nO,nO,nV,nV)
double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO)
double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO)
double precision, intent(out) :: W(nV,nV,nV)!,nO,nO,nO)
integer :: l,a,b,c,d
double precision, allocatable, dimension(:,:,:) :: X, Y, Z
!W = 0d0
!do i = 1, nO
! do j = 1, nO
! do k = 1, nO
allocate(X(nV,nV,nV))
allocate(Y(nV,nV,nV))
allocate(Z(nV,nV,nV))
!$OMP PARALLEL DO
do b = 1, nV
do a = 1, nV
do d = 1, nV
Z(d,a,b) = X_vvvo(d,b,a,i)
enddo
enddo
enddo
!$OMP END PARALLEL DO
call dgemm('T','N',nV*nV,nV,nV, 1.d0, &
Z, nV, T_vvoo(1,1,k,j), nV, 0.d0, W, nV*nV)
!$OMP PARALLEL DO
do c = 1, nV
do a = 1, nV
do d = 1, nV
Z(d,a,c) = X_vvvo(d,c,a,i)
enddo
enddo
enddo
!$OMP END PARALLEL DO
call dgemm('T','N',nV*nV,nV,nV, 1.d0, &
Z, nV, T_vvoo(1,1,j,k), nV, 0.d0, Y, nV*nV)
call dgemm('T','N',nV*nV,nV,nV, 1.d0, &
X_vvvo(1,1,1,k), nV, T_vvoo(1,1,j,i), nV, 1.d0, Y, nV*nV)
call dgemm('T','N',nV,nV*nV,nV, 1.d0, &
T_vvoo(1,1,i,j), nV, X_vvvo(1,1,1,k), nV, 1.d0, W, nV)
call dgemm('T','N',nV,nV*nV,nV, 1.d0, &
T_vvoo(1,1,i,k), nV, X_vvvo(1,1,1,j), nV, 1.d0, Y, nV)
call dgemm('T','N',nV*nV,nV,nV, 1.d0, &
X_vvvo(1,1,1,j), nV, T_vvoo(1,1,k,i), nV, 1.d0, W, nV*nV)
deallocate(Z)
allocate(Z(nO,nV,nV))
call dgemm('T','N',nV*nV,nV,nO, -1.d0, &
T_ovvo(1,1,1,i), nO, X_ovoo(1,1,j,k), nO, 1.d0, W, nV*nV)
call dgemm('T','N',nV*nV,nV,nO, -1.d0, &
T_ovvo(1,1,1,i), nO, X_ovoo(1,1,k,j), nO, 1.d0, Y, nV*nV)
!$OMP PARALLEL DO
do c = 1, nV
do a = 1, nV
do l = 1, nO
Z(l,a,c) = T_ovvo(l,c,a,k)
enddo
enddo
enddo
!$OMP END PARALLEL DO
call dgemm('T','N',nV*nV,nV,nO, -1.d0, &
Z, nO, X_ovoo(1,1,i,j), nO, 1.d0, Y, nV*nV)
call dgemm('T','N',nV,nV*nV,nO, -1.d0, &
X_ovoo(1,1,j,i), nO, T_ovvo(1,1,1,k), nO, 1.d0, Y, nV)
call dgemm('T','N',nV,nV*nV,nO, -1.d0, &
X_ovoo(1,1,k,i), nO, T_ovvo(1,1,1,j), nO, 1.d0, W, nV)
!$OMP PARALLEL DO
do b = 1, nV
do a = 1, nV
do l = 1, nO
Z(l,a,b) = T_ovvo(l,b,a,j)
enddo
enddo
enddo
!$OMP END PARALLEL DO
call dgemm('T','N',nV*nV,nV,nO, -1.d0, &
Z, nO, X_ovoo(1,1,i,k), nO, 1.d0, W, nV*nV)
!$OMP PARALLEL DO
do c = 1, nV
do b = 1, nV
do a = 1, nV
W(a,b,c) = W(a,b,c) + Y(a,c,b)
enddo
enddo
enddo
!$OMP END PARALLEL DO
deallocate(X,Y,Z)
! !$OMP PARALLEL &
! !$OMP SHARED(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) &
! !$OMP PRIVATE(a,b,c,d,l) &
! !$OMP DEFAULT(NONE)
!
! !$OMP DO collapse(2)
! do c = 1, nV
! do b = 1, nV
! do a = 1, nV
! W(a,b,c) = 0.d0
!
! do d = 1, nV
! !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) &
! W(a,b,c) = W(a,b,c) &
! ! chem (bd|ai)
! ! phys <ba|di>
! !+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) &
! !+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj
! !+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik
! !+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij
! !+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj
! !+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik
! + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) &
! + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & ! bc kj
! + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & ! prev ac ik
! + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & ! prev ab ij
! + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & ! prev bc kj
! + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) ! prev ac ik
! enddo
!
! enddo
! enddo
! enddo
! !$OMP END DO nowait
!
! !$OMP DO collapse(2)
! do c = 1, nV
! do b = 1, nV
! do a = 1, nV
!
! do l = 1, nO
! !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) &
! W(a,b,c) = W(a,b,c) &
! ! chem (ck|jl)
! ! phys <cj|kl>
! !- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) &
! !- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj
! !- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik
! !- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij
! !- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj
! !- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik
! - T_ovvo(l,a,b,i) * X_ovoo(l,c,j,k) &
! - T_ovvo(l,a,c,i) * X_ovoo(l,b,k,j) & ! bc kj
! - T_ovvo(l,c,a,k) * X_ovoo(l,b,i,j) & ! prev ac ik
! - T_ovvo(l,c,b,k) * X_ovoo(l,a,j,i) & ! prev ab ij
! - T_ovvo(l,b,c,j) * X_ovoo(l,a,k,i) & ! prev bc kj
! - T_ovvo(l,b,a,j) * X_ovoo(l,c,i,k) ! prev ac ik
! enddo
!
! enddo
! enddo
! enddo
! !$OMP END DO
! !$OMP END PARALLEL
! enddo
! enddo
!enddo
end
! V_ijk
subroutine form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,w,v)
implicit none
integer, intent(in) :: nO,nV,i,j,k
!double precision, intent(in) :: t1(nO,nV)
double precision, intent(in) :: T_vo(nV,nO)
double precision, intent(in) :: X_vvoo(nV,nV,nO,nO)
double precision, intent(in) :: W(nV,nV,nV)!,nO,nO,nO)
double precision, intent(out) :: V(nV,nV,nV)!,nO,nO,nO)
integer :: a,b,c
!V = 0d0
!do i = 1, nO
! do j = 1, nO
! do k = 1, nO
!$OMP PARALLEL &
!$OMP SHARED(nO,nV,i,j,k,T_vo,X_vvoo,W,V) &
!$OMP PRIVATE(a,b,c) &
!$OMP DEFAULT(NONE)
!$OMP DO collapse(2)
do c = 1, nV
do b = 1, nV
do a = 1, nV
!V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) &
V(a,b,c) = W(a,b,c) &
!+ cc_space_v_vvoo(b,c,j,k) * t1(i,a) &
!+ cc_space_v_vvoo(a,c,i,k) * t1(j,b) &
!+ cc_space_v_vvoo(a,b,i,j) * t1(k,c)
+ X_vvoo(b,c,k,j) * T_vo(a,i) &
+ X_vvoo(a,c,k,i) * T_vo(b,j) &
+ X_vvoo(a,b,j,i) * T_vo(c,k)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
! enddo
! enddo
!enddo
end

View File

@ -0,0 +1,452 @@
! Main
subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
implicit none
integer, intent(in) :: nO,nV
double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV)
double precision, intent(in) :: t2(nO,nO,nV,nV)
double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO)
double precision, intent(out) :: energy
double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:)
double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:)
integer :: i,j,k,l,a,b,c,d
double precision :: e,ta,tb
call set_multiple_levels_omp(.False.)
allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV))
allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV))
!$OMP PARALLEL &
!$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, &
!$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) &
!$OMP PRIVATE(a,b,c,d,i,j,k,l) &
!$OMP DEFAULT(NONE)
!v_vvvo(b,a,d,i) * t2(k,j,c,d) &
!X_vovv(d,i,b,a,i) * T_voov(d,j,c,k)
!$OMP DO
do a = 1, nV
do b = 1, nV
do i = 1, nO
do d = 1, nV
X_vovv(d,i,b,a) = v_vvvo(b,a,d,i)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!$OMP DO
do c = 1, nV
do j = 1, nO
do k = 1, nO
do d = 1, nV
T_voov(d,k,j,c) = t2(k,j,c,d)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!v_vooo(c,j,k,l) * t2(i,l,a,b) &
!X_ooov(l,j,k,c) * T_oovv(l,i,a,b) &
!$OMP DO
do c = 1, nV
do k = 1, nO
do j = 1, nO
do l = 1, nO
X_ooov(l,j,k,c) = v_vooo(c,j,k,l)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!$OMP DO
do b = 1, nV
do a = 1, nV
do i = 1, nO
do l = 1, nO
T_oovv(l,i,a,b) = t2(i,l,a,b)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!X_oovv(j,k,b,c) * T1_vo(a,i) &
!$OMP DO
do c = 1, nV
do b = 1, nV
do k = 1, nO
do j = 1, nO
X_oovv(j,k,b,c) = v_vvoo(b,c,j,k)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!$OMP END PARALLEL
double precision, external :: ccsd_t_task_aba
double precision, external :: ccsd_t_task_abc
!$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED)
e = 0d0
!$OMP DO SCHEDULE(guided)
do a = 1, nV
do b = a+1, nV
do c = b+1, nV
e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, &
X_ooov,X_oovv,X_vovv,f_o,f_v)
enddo
e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, &
X_ooov,X_oovv,X_vovv,f_o,f_v)
e = e + ccsd_t_task_aba(b,a,nO,nV,t1,T_oovv,T_voov, &
X_ooov,X_oovv,X_vovv,f_o,f_v)
enddo
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
energy = energy + e
!$OMP END CRITICAL
!$OMP END PARALLEL
energy = energy / 3.d0
deallocate(X_vovv,X_ooov,T_voov,T_oovv)
end
double precision function ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,&
X_ooov,X_oovv,X_vovv,f_o,f_v) result(e)
implicit none
integer, intent(in) :: nO,nV,a,b,c
double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV)
double precision, intent(in) :: X_oovv(nO,nO,nV,nV)
double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV)
double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV)
double precision :: delta, delta_abc
integer :: i,j,k
double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:)
double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:)
double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:)
double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:)
allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), &
W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), &
V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), &
V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) )
call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb)
call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb)
delta_abc = f_v(a) + f_v(b) + f_v(c)
e = 0.d0
do k = 1, nO
do j = 1, nO
do i = 1, nO
delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc)
e = e + delta * ( &
(4d0 * (W_abc(i,j,k) - W_cba(i,j,k)) + &
W_bca(i,j,k) - W_bac(i,j,k) + &
W_cab(i,j,k) - W_acb(i,j,k) ) * (V_abc(i,j,k) - V_cba(i,j,k)) +&
(4d0 * (W_acb(i,j,k) - W_bca(i,j,k)) + &
W_cba(i,j,k) - W_cab(i,j,k) + &
W_bac(i,j,k) - W_abc(i,j,k) ) * (V_acb(i,j,k) - V_bca(i,j,k)) +&
(4d0 * (W_bac(i,j,k) - W_cab(i,j,k)) + &
W_acb(i,j,k) - W_abc(i,j,k) + &
W_cba(i,j,k) - W_bca(i,j,k) ) * (V_bac(i,j,k) - V_cab(i,j,k)) )
enddo
enddo
enddo
deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, &
V_abc, V_cab, V_bca, V_bac, V_cba, V_acb )
end
double precision function ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,&
X_ooov,X_oovv,X_vovv,f_o,f_v) result(e)
implicit none
integer, intent(in) :: nO,nV,a,b
double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV)
double precision, intent(in) :: X_oovv(nO,nO,nV,nV)
double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV)
double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV)
double precision :: delta, delta_abc
integer :: i,j,k
double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:)
double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:)
double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:)
double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:)
allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), &
W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), &
V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), &
V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) )
call form_w_abc(nO,nV,a,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb)
call form_v_abc(nO,nV,a,b,a,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb)
delta_abc = f_v(a) + f_v(b) + f_v(a)
e = 0.d0
do k = 1, nO
do j = 1, nO
do i = 1, nO
delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc)
e = e + delta * ( &
(4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + &
(4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + &
(4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) )
enddo
enddo
enddo
deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, &
V_abc, V_cab, V_bca, V_bac, V_cba, V_acb )
end
subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb)
implicit none
integer, intent(in) :: nO,nV,a,b,c
double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV)
double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV)
double precision, intent(out) :: W_abc(nO,nO,nO)
double precision, intent(out) :: W_cba(nO,nO,nO)
double precision, intent(out) :: W_bca(nO,nO,nO)
double precision, intent(out) :: W_cab(nO,nO,nO)
double precision, intent(out) :: W_bac(nO,nO,nO)
double precision, intent(out) :: W_acb(nO,nO,nO)
integer :: l,i,j,k,d
double precision, allocatable, dimension(:,:,:,:) :: W_ikj
double precision, allocatable :: X(:,:,:,:)
allocate(W_ikj(nO,nO,nO,6))
allocate(X(nV,nO,nO,3))
do k=1,nO
do i=1,nO
do d=1,nV
X(d,i,k,1) = T_voov(d,k,i,a)
X(d,i,k,2) = T_voov(d,k,i,b)
X(d,i,k,3) = T_voov(d,k,i,c)
enddo
enddo
enddo
! X_vovv(d,i,c,a) * T_voov(d,j,k,b) : i jk
call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO)
call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, T_voov(1,1,1,a), nV, 0.d0, W_bac, nO)
call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, T_voov(1,1,1,b), nV, 0.d0, W_cba, nO)
call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, T_voov(1,1,1,c), nV, 0.d0, W_bca, nO)
call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, T_voov(1,1,1,a), nV, 0.d0, W_cab, nO)
call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, T_voov(1,1,1,c), nV, 0.d0, W_acb, nO)
! T_voov(d,i,j,a) * X_vovv(d,k,b,c) : ij k
call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_abc, nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_bac, nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_cba, nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_bca, nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_cab, nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_acb, nO*nO)
! X_vovv(d,k,a,c) * T_voov(d,j,i,b) : k ji
call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,c), nV, 1.d0, W_bac, nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 1.d0, W_cba, nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,a), nV, 1.d0, W_bca, nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,c,b), nV, 1.d0, W_cab, nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,a,b), nV, 1.d0, W_acb, nO*nO)
! X_vovv(d,i,b,a) * T_voov(d,k,j,c) : i kj
call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, X(1,1,1,3), nV, 1.d0, W_abc, nO)
call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, X(1,1,1,3), nV, 1.d0, W_bac, nO)
call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, X(1,1,1,1), nV, 1.d0, W_cba, nO)
call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, X(1,1,1,1), nV, 1.d0, W_bca, nO)
call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, X(1,1,1,2), nV, 1.d0, W_cab, nO)
call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, X(1,1,1,2), nV, 1.d0, W_acb, nO)
! T_voov(d,k,i,c) * X_vovv(d,j,a,b) : ki j
call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj(1,1,1,1), nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,a), nV, 0.d0, W_ikj(1,1,1,2), nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,c,b), nV, 0.d0, W_ikj(1,1,1,3), nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,c), nV, 0.d0, W_ikj(1,1,1,4), nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 0.d0, W_ikj(1,1,1,5), nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 0.d0, W_ikj(1,1,1,6), nO*nO)
! T_voov(d,i,k,a) * X_vovv(d,j,c,b) : ik j
call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_ikj(1,1,1,1), nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_ikj(1,1,1,2), nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_ikj(1,1,1,3), nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_ikj(1,1,1,4), nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_ikj(1,1,1,5), nO*nO)
call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_ikj(1,1,1,6), nO*nO)
deallocate(X)
allocate(X(nO,nO,nO,3))
do k=1,nO
do j=1,nO
do l=1,nO
X(l,j,k,1) = X_ooov(l,k,j,a)
X(l,j,k,2) = X_ooov(l,k,j,b)
X(l,j,k,3) = X_ooov(l,k,j,c)
enddo
enddo
enddo
! - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) : i jk
call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X_ooov(1,1,1,c), nO, 1.d0, W_abc, nO)
call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X_ooov(1,1,1,c), nO, 1.d0, W_bac, nO)
call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X_ooov(1,1,1,a), nO, 1.d0, W_cba, nO)
call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X_ooov(1,1,1,a), nO, 1.d0, W_bca, nO)
call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X_ooov(1,1,1,b), nO, 1.d0, W_cab, nO)
call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X_ooov(1,1,1,b), nO, 1.d0, W_acb, nO)
! - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) : i kj
call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X(1,1,1,2), nO, 1.d0, W_abc, nO)
call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X(1,1,1,1), nO, 1.d0, W_bac, nO)
call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X(1,1,1,2), nO, 1.d0, W_cba, nO)
call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X(1,1,1,3), nO, 1.d0, W_bca, nO)
call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X(1,1,1,1), nO, 1.d0, W_cab, nO)
call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X(1,1,1,3), nO, 1.d0, W_acb, nO)
! - X_ooov(l,i,j,b) * T_oovv(l,k,c,a) : ij k
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_abc, nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_bac, nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_cba, nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_bca, nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_cab, nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_acb, nO*nO)
! - X_ooov(l,j,i,a) * T_oovv(l,k,c,b) : ji k
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,b), nO, 1.d0, W_abc, nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,a), nO, 1.d0, W_bac, nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_cba, nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,a,c), nO, 1.d0, W_bca, nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_cab, nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,b,c), nO, 1.d0, W_acb, nO*nO)
! - X_ooov(l,k,i,a) * T_oovv(l,j,b,c) : ki j
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,1), nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,2), nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,3), nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,4), nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,5), nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,6), nO*nO)
! - X_ooov(l,i,k,c) * T_oovv(l,j,b,a) : ik j
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,1), nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,2), nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,3), nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,4), nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,5), nO*nO)
call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,6), nO*nO)
do k=1,nO
do j=1,nO
do i=1,nO
W_abc(i,j,k) = W_abc(i,j,k) + W_ikj(i,k,j,1)
W_bac(i,j,k) = W_bac(i,j,k) + W_ikj(i,k,j,2)
W_cba(i,j,k) = W_cba(i,j,k) + W_ikj(i,k,j,3)
W_bca(i,j,k) = W_bca(i,j,k) + W_ikj(i,k,j,4)
W_cab(i,j,k) = W_cab(i,j,k) + W_ikj(i,k,j,5)
W_acb(i,j,k) = W_acb(i,j,k) + W_ikj(i,k,j,6)
enddo
enddo
enddo
deallocate(X,W_ikj)
end
! V_abc
subroutine form_v_abc(nO,nV,a,b,c,T_ov,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb)
implicit none
integer, intent(in) :: nO,nV,a,b,c
double precision, intent(in) :: T_ov(nO,nV)
double precision, intent(in) :: X_oovv(nO,nO,nV,nV)
double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO)
double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO)
double precision, intent(out) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO)
double precision, intent(out) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO)
integer :: i,j,k
do k = 1, nO
do j = 1, nO
do i = 1, nO
V_abc(i,j,k) = W_abc(i,j,k) &
+ X_oovv(j,k,b,c) * T_ov(i,a) &
+ X_oovv(i,k,a,c) * T_ov(j,b) &
+ X_oovv(i,j,a,b) * T_ov(k,c)
V_cba(i,j,k) = W_cba(i,j,k) &
+ X_oovv(j,k,b,a) * T_ov(i,c) &
+ X_oovv(i,k,c,a) * T_ov(j,b) &
+ X_oovv(i,j,c,b) * T_ov(k,a)
V_bca(i,j,k) = W_bca(i,j,k) &
+ X_oovv(j,k,c,a) * T_ov(i,b) &
+ X_oovv(i,k,b,a) * T_ov(j,c) &
+ X_oovv(i,j,b,c) * T_ov(k,a)
V_cab(i,j,k) = W_cab(i,j,k) &
+ X_oovv(j,k,a,b) * T_ov(i,c) &
+ X_oovv(i,k,c,b) * T_ov(j,a) &
+ X_oovv(i,j,c,a) * T_ov(k,b)
V_bac(i,j,k) = W_bac(i,j,k) &
+ X_oovv(j,k,a,c) * T_ov(i,b) &
+ X_oovv(i,k,b,c) * T_ov(j,a) &
+ X_oovv(i,j,b,a) * T_ov(k,c)
V_acb(i,j,k) = W_acb(i,j,k) &
+ X_oovv(j,k,c,b) * T_ov(i,a) &
+ X_oovv(i,k,a,b) * T_ov(j,c) &
+ X_oovv(i,j,a,c) * T_ov(k,b)
enddo
enddo
enddo
end

View File

@ -0,0 +1,380 @@
! Main
subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
implicit none
integer, intent(in) :: nO,nV
double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV)
double precision, intent(in) :: t2(nO,nO,nV,nV)
double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO)
double precision, intent(inout) :: energy
double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:)
double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:)
integer :: i,j,k,l,a,b,c,d
double precision :: e,ta,tb,eccsd
eccsd = energy
call set_multiple_levels_omp(.False.)
allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV))
allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV))
!$OMP PARALLEL &
!$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, &
!$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) &
!$OMP PRIVATE(a,b,c,d,i,j,k,l) &
!$OMP DEFAULT(NONE)
!v_vvvo(b,a,d,i) * t2(k,j,c,d) &
!X_vovv(d,i,b,a,i) * T_voov(d,j,c,k)
!$OMP DO
do a = 1, nV
do b = 1, nV
do i = 1, nO
do d = 1, nV
X_vovv(d,i,b,a) = v_vvvo(b,a,d,i)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!$OMP DO
do c = 1, nV
do j = 1, nO
do k = 1, nO
do d = 1, nV
T_voov(d,k,j,c) = t2(k,j,c,d)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!v_vooo(c,j,k,l) * t2(i,l,a,b) &
!X_ooov(l,j,k,c) * T_oovv(l,i,a,b) &
!$OMP DO
do c = 1, nV
do k = 1, nO
do j = 1, nO
do l = 1, nO
X_ooov(l,j,k,c) = v_vooo(c,j,k,l)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!$OMP DO
do b = 1, nV
do a = 1, nV
do i = 1, nO
do l = 1, nO
T_oovv(l,i,a,b) = t2(i,l,a,b)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!X_oovv(j,k,b,c) * T1_vo(a,i) &
!$OMP DO
do c = 1, nV
do b = 1, nV
do k = 1, nO
do j = 1, nO
X_oovv(j,k,b,c) = v_vvoo(b,c,j,k)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!$OMP BARRIER
!$OMP END PARALLEL
double precision, external :: ccsd_t_task_aba
double precision, external :: ccsd_t_task_abc
! logical, external :: omp_test_lock
double precision, allocatable :: memo(:), Pabc(:), waccu(:)
integer*8, allocatable :: sampled(:)
! integer(omp_lock_kind), allocatable :: lock(:)
integer*2 , allocatable :: abc(:,:)
integer*8 :: Nabc, i8,kiter
integer*8, allocatable :: iorder(:)
double precision :: eocc
double precision :: norm
integer :: isample
! Prepare table of triplets (a,b,c)
Nabc = (int(nV,8) * int(nV+1,8) * int(nV+2,8))/6_8 - nV
allocate (memo(Nabc), sampled(Nabc), Pabc(Nabc), waccu(0:Nabc))
allocate (abc(4,Nabc), iorder(Nabc)) !, lock(Nabc))
! eocc = 3.d0/dble(nO) * sum(f_o(1:nO))
Nabc = 0_8
do a = 1, nV
do b = a+1, nV
do c = b+1, nV
Nabc = Nabc + 1_8
Pabc(Nabc) = -1.d0/(f_v(a) + f_v(b) + f_v(c))
abc(1,Nabc) = int(a,2)
abc(2,Nabc) = int(b,2)
abc(3,Nabc) = int(c,2)
enddo
Nabc = Nabc + 1_8
abc(1,Nabc) = int(a,2)
abc(2,Nabc) = int(b,2)
abc(3,Nabc) = int(a,2)
Pabc(Nabc) = -1.d0/(2.d0*f_v(a) + f_v(b))
Nabc = Nabc + 1_8
abc(1,Nabc) = int(b,2)
abc(2,Nabc) = int(a,2)
abc(3,Nabc) = int(b,2)
Pabc(Nabc) = -1.d0/(f_v(a) + 2.d0*f_v(b))
enddo
enddo
do i8=1,Nabc
iorder(i8) = i8
enddo
! Sort triplets in decreasing Pabc
call dsort_big(Pabc, iorder, Nabc)
! Normalize
norm = 0.d0
do i8=Nabc,1,-1
norm = norm + Pabc(i8)
enddo
norm = 1.d0/norm
do i8=1,Nabc
Pabc(i8) = Pabc(i8) * norm
enddo
call i8set_order_big(abc, iorder, Nabc)
! Cumulative distribution for sampling
waccu(Nabc) = 0.d0
do i8=Nabc-1,1,-1
waccu(i8) = waccu(i8+1) - Pabc(i8+1)
enddo
waccu(:) = waccu(:) + 1.d0
waccu(0) = 0.d0
logical :: converged, do_comp
double precision :: eta, variance, error, sample
double precision :: t00, t01
integer*8 :: ieta, Ncomputed
integer*8, external :: binary_search
integer :: nbuckets
nbuckets = 100
double precision, allocatable :: wsum(:)
allocate(wsum(nbuckets))
converged = .False.
Ncomputed = 0_8
energy = 0.d0
variance = 0.d0
memo(:) = 0.d0
sampled(:) = -1_8
integer*8 :: ileft, iright, imin
ileft = 1_8
iright = Nabc
integer*8, allocatable :: bounds(:,:)
allocate (bounds(2,nbuckets))
do isample=1,nbuckets
eta = 1.d0/dble(nbuckets) * dble(isample)
ieta = binary_search(waccu,eta,Nabc)
bounds(1,isample) = ileft
bounds(2,isample) = ieta
ileft = ieta+1
wsum(isample) = sum( Pabc(bounds(1,isample):bounds(2,isample) ) )
enddo
Pabc(:) = 1.d0/Pabc(:)
print '(A)', ''
print '(A)', ' ======================= ============== =========='
print '(A)', ' E(CCSD(T)) Error % '
print '(A)', ' ======================= ============== =========='
call wall_time(t00)
imin = 1_8
!$OMP PARALLEL &
!$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample) &
!$OMP DEFAULT(SHARED)
do kiter=1,Nabc
!$OMP MASTER
do while (imin <= Nabc)
if (sampled(imin)>-1_8) then
imin = imin+1
else
exit
endif
enddo
! Deterministic part
if (imin < Nabc) then
ieta=imin
sampled(ieta) = 0_8
a = abc(1,ieta)
b = abc(2,ieta)
c = abc(3,ieta)
Ncomputed += 1_8
!$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta)
if (a/=c) then
memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, &
X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0
else
memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, &
X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0
endif
!$OMP END TASK
endif
! Stochastic part
call random_number(eta)
do isample=1,nbuckets
if (imin >= bounds(2,isample)) then
cycle
endif
ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc)+1
if (sampled(ieta) == -1_8) then
sampled(ieta) = 0_8
a = abc(1,ieta)
b = abc(2,ieta)
c = abc(3,ieta)
Ncomputed += 1_8
!$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta)
if (a/=c) then
memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, &
X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0
else
memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, &
X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0
endif
!$OMP END TASK
endif
sampled(ieta) = sampled(ieta)+1_8
enddo
call wall_time(t01)
if ((t01-t00 > 1.0d0).or.(imin >= Nabc)) then
!$OMP TASKWAIT
call wall_time(t01)
t00 = t01
double precision :: ET, ET2
double precision :: energy_stoch, energy_det
double precision :: scale
double precision :: w
double precision :: tmp
energy_stoch = 0.d0
energy_det = 0.d0
norm = 0.d0
scale = 1.d0
ET = 0.d0
ET2 = 0.d0
do isample=1,nbuckets
if (imin >= bounds(2,isample)) then
energy_det = energy_det + sum(memo(bounds(1,isample):bounds(2,isample)))
scale = scale - wsum(isample)
else
exit
endif
enddo
isample = min(isample,nbuckets)
do ieta=bounds(1,isample), Nabc
w = dble(max(sampled(ieta),0_8))
tmp = w * memo(ieta) * Pabc(ieta)
ET = ET + tmp
ET2 = ET2 + tmp * memo(ieta) * Pabc(ieta)
norm = norm + w
enddo
norm = norm/scale
if (norm > 0.d0) then
energy_stoch = ET / norm
variance = ET2 / norm - energy_stoch*energy_stoch
endif
energy = energy_det + energy_stoch
print '('' '',F20.8, '' '', ES12.4,'' '', F8.2,'' '')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc)
endif
!$OMP END MASTER
if (imin >= Nabc) exit
enddo
!$OMP END PARALLEL
print '(A)', ' ======================= ============== ========== '
print '(A)', ''
deallocate(X_vovv)
deallocate(X_ooov)
deallocate(T_voov)
deallocate(T_oovv)
end
integer*8 function binary_search(arr, key, sze)
implicit none
BEGIN_DOC
! Searches the key in array arr(1:sze) between l_in and r_in, and returns its index
END_DOC
integer*8 :: sze, i, j, mid
double precision :: arr(0:sze)
double precision :: key
if ( key < arr(1) ) then
binary_search = 0_8
return
end if
if ( key >= arr(sze) ) then
binary_search = sze
return
end if
i = 0_8
j = sze + 1_8
do while (.True.)
mid = (i + j) / 2_8
if ( key >= arr(mid) ) then
i = mid
else
j = mid
end if
if (j-i <= 1_8) then
binary_search = i
return
endif
end do
end function binary_search

View File

@ -0,0 +1,376 @@
! v1
subroutine ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,energy)
implicit none
integer, intent(in) :: nO, nV
double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV)
double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV)
double precision, intent(in) :: v_ooov(nO,nO,nO,nV)
double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_vvvo(nV,nV,nV,nO)
double precision, intent(out) :: energy
double precision, allocatable :: t3(:,:,:,:,:,:), s(:,:)
double precision :: e_t, e_st, e_dt, delta_abc, delta
integer :: i,j,k,l,m,a,b,c,d,e
allocate(t3(nO,nO,nO,nV,nV,nV), s(nO,nV))
t3 = 0d0
! T3
do c = 1, nV
do b = 1, nV
do a = 1, nV
delta_abc = f_v(a) + f_v(b) + f_v(c)
do k = 1, nO
do j = 1, nO
do i = 1, nO
delta = f_o(i) + f_o(j) + f_o(k) - delta_abc
do e = 1, nV
t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) &
+ t2(j,k,a,e) * v_vvvo(b,c,e,i) &
- t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij)
- t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik)
- t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab)
- t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac)
+ t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab)
+ t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac)
+ t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab)
+ t2(j,i,c,e) * v_vvvo(b,a,e,k) ! + P(ik) P(ac)
enddo
do m = 1, nO
t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) &
+ t2(m,i,b,c) * v_ooov(j,k,m,a) &
- t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij)
- t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik)
- t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab)
- t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac)
+ t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab)
+ t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac)
+ t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab)
+ t2(m,k,b,a) * v_ooov(j,i,m,c) ! + P(ik) P(ac)
enddo
t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) * (1d0 / delta)
enddo
enddo
enddo
enddo
enddo
enddo
! E_T
e_t = 0d0
do c = 1, nV
do b = 1, nV
do a = 1, nV
delta_abc = f_v(a) + f_v(b) + f_v(c)
do k = 1, nO
do j = 1, nO
do i = 1, nO
delta = f_o(i) + f_o(j) + f_o(k) - delta_abc
e_t = e_t + t3(i,j,k,a,b,c) * delta * t3(i,j,k,a,b,c)
enddo
enddo
enddo
enddo
enddo
enddo
e_t = e_t / 36d0
! E_ST
s = 0d0
do c = 1, nV
do b = 1, nV
do a = 1, nV
do k = 1, nO
do j = 1, nO
do i = 1, nO
s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3(i,j,k,a,b,c)
enddo
enddo
enddo
enddo
enddo
enddo
e_st = 0d0
do a = 1, nV
do i = 1, nO
e_st = e_st + s(i,a) * t1(i,a)
enddo
enddo
e_st = e_st * 0.25d0
! E_DT
e_dt = 0d0
do c = 1, nV
do b = 1, nV
do a = 1, nV
do k = 1, nO
do j = 1, nO
do i = 1, nO
e_dt = e_dt + t2(i,j,a,b) * f_ov(k,c) * t3(i,j,k,a,b,c)
enddo
enddo
enddo
enddo
enddo
enddo
e_dt = e_dt * 0.25d0
! (T)
!print*,e_t,e_st,e_dt
energy = e_t + e_st + e_dt
deallocate(t3,s)
end
! v2
subroutine ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,energy)
implicit none
integer, intent(in) :: nO, nV
double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV)
double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV)
double precision, intent(in) :: v_ooov(nO,nO,nO,nV)
double precision, intent(in) :: v_vvoo(nV,nV,nO,nO)
double precision, intent(out) :: energy
double precision, allocatable :: t3_bc(:,:,:,:), s(:,:), e_t(:), e_dt(:)
double precision, allocatable :: A_vovv(:,:,:,:), v_vvvo(:,:,:,:)
double precision, allocatable :: T_voov(:,:,:,:), B_ooov(:,:,:,:)
double precision :: e_st, delta_abc, delta, ta, tb
integer :: i,j,k,l,m,a,b,c,d,e
allocate(t3_bc(nO,nO,nO,nV), s(nO,nV), e_t(nV), e_dt(nV))
allocate(A_vovv(nV,nO,nV,nV),v_vvvo(nV,nV,nV,nO),T_voov(nV,nO,nO,nV),B_ooov(nO,nO,nO,nV))
call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, &
cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, &
cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, &
nV,nV,nV,nO, v_vvvo)
! Init
s = 0d0
e_t = 0d0
e_st = 0d0
e_dt = 0d0
call wall_time(ta)
!$OMP PARALLEL &
!$OMP PRIVATE(i,j,k,m,a,b,c,e) &
!$OMP SHARED(A_vovv,ta,tb,t3_bc,s,e_t,e_st,e_dt,t2,v_vvvo,v_ooov, &
!$OMP v_vvoo,f_o,f_v,f_ov,delta,delta_abc,nO,nV,T_voov,B_ooov) &
!$OMP DEFAULT(NONE)
!$OMP DO collapse(3)
do c = 1, nV
do b = 1, nV
do i = 1, nO
do e = 1, nV
A_vovv(e,i,b,c) = v_vvvo(b,c,e,i)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
!$omp do collapse(3)
do a = 1, nV
do k = 1, nO
do j = 1, nO
do e = 1, nV
T_voov(e,j,k,a) = t2(j,k,a,e)
enddo
enddo
enddo
enddo
!$omp end do nowait
!$omp do collapse(3)
do a = 1, nV
do k = 1, nO
do j = 1, nO
do m = 1, nO
B_ooov(m,j,k,a) = v_ooov(j,k,m,a)
enddo
enddo
enddo
enddo
!$omp end do
do c = 1, nV
do b = 1, nV
! T3(:,:,:,:,b,c)
! Init
!$OMP DO collapse(3)
do a = 1, nV
do k = 1, nO
do j = 1, nO
do i = 1, nO
t3_bc(i,j,k,a) = 0d0
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP DO collapse(3)
do a = 1, nV
do k = 1, nO
do j = 1, nO
do i = 1, nO
do e = 1, nV
t3_bc(i,j,k,a) = t3_bc(i,j,k,a) &
!+ t2(j,k,a,e) * v_vvvo(b,c,e,i) &
!- t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij)
!- t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik)
!- t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab)
!- t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac)
!+ t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab)
!+ t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac)
!+ t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab)
!+ t2(j,i,c,e) * v_vvvo(b,a,e,k) ! + P(ik) P(ac)
+ T_voov(e,j,k,a) * A_vovv(e,i,b,c) &
- T_voov(e,i,k,a) * A_vovv(e,j,b,c) & ! - P(ij)
- T_voov(e,j,i,a) * A_vovv(e,k,b,c) & ! - P(ik)
- T_voov(e,j,k,b) * A_vovv(e,i,a,c) & ! - P(ab)
- T_voov(e,j,k,c) * A_vovv(e,i,b,a) & ! - P(ac)
+ T_voov(e,i,k,b) * A_vovv(e,j,a,c) & ! + P(ij) P(ab)
+ T_voov(e,i,k,c) * A_vovv(e,j,b,a) & ! + P(ij) P(ac)
+ T_voov(e,j,i,b) * A_vovv(e,k,a,c) & ! + P(ik) P(ab)
+ T_voov(e,j,i,c) * A_vovv(e,k,b,a) ! + P(ik) P(ac)
enddo
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP DO collapse(3)
do a = 1, nV
do k = 1, nO
do j = 1, nO
do i = 1, nO
do m = 1, nO
t3_bc(i,j,k,a) = t3_bc(i,j,k,a) &
!+ t2(m,i,b,c) * v_ooov(j,k,m,a) &
!- t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij)
!- t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik)
!- t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab)
!- t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac)
!+ t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab)
!+ t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac)
!+ t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab)
!+ t2(m,k,b,a) * v_ooov(j,i,m,c) ! + P(ik) P(ac)
+ t2(m,i,b,c) * B_ooov(m,j,k,a) &
- t2(m,j,b,c) * B_ooov(m,i,k,a) & ! - P(ij)
- t2(m,k,b,c) * B_ooov(m,j,i,a) & ! - P(ik)
- t2(m,i,a,c) * B_ooov(m,j,k,b) & ! - P(ab)
- t2(m,i,b,a) * B_ooov(m,j,k,c) & ! - P(ac)
+ t2(m,j,a,c) * B_ooov(m,i,k,b) & ! + P(ij) P(ab)
+ t2(m,j,b,a) * B_ooov(m,i,k,c) & ! + P(ij) P(ac)
+ t2(m,k,a,c) * B_ooov(m,j,i,b) & ! + P(ik) P(ab)
+ t2(m,k,b,a) * B_ooov(m,j,i,c) ! + P(ik) P(ac)
enddo
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP DO
do a = 1, nV
delta_abc = f_v(a) + f_v(b) + f_v(c)
do k = 1, nO
do j = 1, nO
do i = 1, nO
delta = f_o(i) + f_o(j) + f_o(k) - delta_abc
t3_bc(i,j,k,a) = t3_bc(i,j,k,a) * (1d0 / delta)
enddo
enddo
enddo
enddo
!$OMP END DO
! E_T
!$OMP DO
do a = 1, nV
delta_abc = f_v(a) + f_v(b) + f_v(c)
do k = 1, nO
do j = 1, nO
do i = 1, nO
delta = f_o(i) + f_o(j) + f_o(k) - delta_abc
e_t(a) = e_t(a) + t3_bc(i,j,k,a) * delta * t3_bc(i,j,k,a)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
! E_ST
!$OMP DO
do a = 1, nV
do k = 1, nO
do j = 1, nO
do i = 1, nO
s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3_bc(i,j,k,a)
enddo
enddo
enddo
enddo
!$OMP END DO nowait
! E_DT
!$OMP DO
do a = 1, nV
do k = 1, nO
do j = 1, nO
do i = 1, nO
e_dt(a) = e_dt(a) + t2(i,j,a,b) * f_ov(k,c) * t3_bc(i,j,k,a)
enddo
enddo
enddo
enddo
!$OMP END DO
enddo
!$OMP MASTER
call wall_time(tb)
write(*,'(A1,F6.2,A5,F10.2,A2)') ' ', dble(c)/dble(nV)*100d0, '% in ', tb-ta, ' s'
!$OMP END MASTER
enddo
!$OMP END PARALLEL
do a = 2, nV
e_t(1) = e_t(1) + e_t(a)
enddo
do a = 2, nV
e_dt(1) = e_dt(1) + e_dt(a)
enddo
e_t = e_t / 36d0
do a = 1, nV
do i = 1, nO
e_st = e_st + s(i,a) * t1(i,a)
enddo
enddo
e_st = e_st * 0.25d0
e_dt = e_dt * 0.25d0
! (T)
!print*,e_t(1),e_st,e_dt(1)
energy = e_t(1) + e_st + e_dt(1)
deallocate(t3_bc,s)
end

167
devel/ccsd_gpu/gpu.c Normal file
View File

@ -0,0 +1,167 @@
#include <stdio.h>
#include <stdlib.h>
#include <cublas_v2.h>
#include <cuda_runtime.h>
#define BLOCK_SIZE 16
void dgemm_(char*, char*, int*, int*, int*, double*, double*, int*, double*, int*,
double*, double*, int*);
void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo_num,
double* t1,
double* tau,
double* cc_space_v_vo_chol,
double* cc_space_v_vv_chol,
double* r2)
{
int m,n,k, lda, ldb, ldc;
double alpha, beta;
double* A;
double* B;
double* C;
cublasHandle_t handle;
cublasCreate(&handle);
double* d_tau;
lda = nO * nO;
cudaMalloc((void **)&d_tau, lda * nV * nV * sizeof(double));
cublasSetMatrix(nO*nO, nV*nV, sizeof(double), tau, lda, d_tau, lda);
double* d_r2;
lda = nO * nO;
cudaMalloc((void **)&d_r2, lda * nV * nV * sizeof(double));
cublasSetMatrix(nO*nO, nV*nV, sizeof(double), r2, lda, d_r2, lda);
double* d_cc_space_v_vv_chol;
lda = cholesky_mo_num * nV;
cudaMalloc((void **)&d_cc_space_v_vv_chol, lda * nV * sizeof(double));
cublasSetMatrix(cholesky_mo_num*nV, nV, sizeof(double), cc_space_v_vv_chol, lda, d_cc_space_v_vv_chol, lda);
double* d_cc_space_v_vo_chol;
lda = cholesky_mo_num * nV;
cudaMalloc((void **)&d_cc_space_v_vo_chol, lda * nO * sizeof(double));
cublasSetMatrix(cholesky_mo_num*nV, nO, sizeof(double), cc_space_v_vo_chol, lda, d_cc_space_v_vo_chol, lda);
double* d_t1;
lda = nO;
cudaMalloc((void **)&d_t1, nO * nV * sizeof(double));
cublasSetMatrix(nO, nV, sizeof(double), t1, lda, d_t1, lda);
double* d_tmp_cc;
lda = cholesky_mo_num * nV;
cudaMalloc((void **)&d_tmp_cc, lda * nV * sizeof(double));
alpha=1.0; beta=0.0;
m=cholesky_mo_num*nV; n=nV; k=nO;
A = d_cc_space_v_vo_chol; B = d_t1; C = d_tmp_cc;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, m, B, k, &beta, C, m);
#pragma omp parallel
{
double* d_tmp_cc2;
cudaMalloc((void **)&d_tmp_cc2, cholesky_mo_num*nV*sizeof(double));
double* d_B1;
cudaMalloc((void**)&d_B1, nV*nV*BLOCK_SIZE*sizeof(double));
double* d_tmpB1;
cudaMalloc((void**)&d_tmpB1, nV*BLOCK_SIZE*nV*sizeof(double));
#pragma omp for
for (size_t gam=0 ; gam<nV ; ++gam)
{
double* d_tmp_cc_ = &(d_tmp_cc[gam*nV*cholesky_mo_num]);
double* d_cc_space_v_vv_chol_ = &(d_cc_space_v_vv_chol[gam*nV*cholesky_mo_num]);
alpha = 1.0;
beta = -1.0;
A = d_cc_space_v_vv_chol_; lda = cholesky_mo_num;
B = d_tmp_cc_; ldb = cholesky_mo_num;
C = d_tmp_cc2 ; ldc = cholesky_mo_num;
cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_N, cholesky_mo_num, nV, &alpha, A, lda, &beta, B, ldb, C, ldc);
for (size_t iblock=0 ; iblock<nV ; iblock += BLOCK_SIZE)
{
const size_t mbs = BLOCK_SIZE < nV-iblock ? BLOCK_SIZE : nV-iblock;
alpha=-1.0; beta=0.0;
m=nV*mbs; n=nV; k=cholesky_mo_num;
A=&(d_tmp_cc[iblock*cholesky_mo_num*nV]); lda=cholesky_mo_num;
B=d_cc_space_v_vv_chol_; ldb=cholesky_mo_num;
C=d_tmpB1 ; ldc=nV*BLOCK_SIZE;
cublasDgemm(handle, CUBLAS_OP_T, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, lda, &beta, C, ldc);
alpha=1.0; beta=1.0;
m=nV*mbs; n=nV; k=cholesky_mo_num;
A=&(d_cc_space_v_vv_chol[iblock*cholesky_mo_num*nV]); lda=cholesky_mo_num;
B=d_tmp_cc2; ldb=cholesky_mo_num;
C=d_tmpB1 ; ldc=nV*BLOCK_SIZE;
cublasDgemm(handle, CUBLAS_OP_T, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, lda, &beta, C, ldc);
for (size_t bet=iblock ; bet<(nV < iblock+BLOCK_SIZE ? nV : iblock+BLOCK_SIZE) ; ++bet)
{
alpha = 1.0;
beta = 0.0;
A = &(d_tmpB1[nV*(bet-iblock)]); lda = nV*BLOCK_SIZE;
B = d_tmpB1; ldb = nV;
C = &(d_B1[nV*nV*(bet-iblock)]) ; ldc = nV;
cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_N, nV, nV, &alpha, A, lda, &beta, B, ldb, C, ldc);
}
alpha=1.0; beta=1.0;
m=nO*nO; n=mbs; k=nV*nV;
A=d_tau; lda=nO*nO;
B=d_B1 ; ldb=nV*nV;
C=&(d_r2[nO*nO*(iblock + nV*gam)]); ldc=nO*nO;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
}
}
}
lda=nO*nO;
cublasGetMatrix(nO*nO, nV*nV, sizeof(double), d_r2, lda, r2, lda);
cudaFree(d_cc_space_v_vo_chol);
cudaFree(d_cc_space_v_vv_chol);
cudaFree(d_tau);
cudaFree(d_t1);
cudaFree(d_tmp_cc);
cudaFree(d_r2);
cublasDestroy(handle);
}
/*
cublasHandle_t handle;
cublasCreate(&handle);
double *d_A, *d_B, *d_C;
cudaMalloc((void **)&d_A, m * k * sizeof(double));
cudaMalloc((void **)&d_B, k * n * sizeof(double));
cudaMalloc((void **)&d_C, m * n * sizeof(double));
cublasSetMatrix(m, k, sizeof(double), A, m, d_A, m);
cublasSetMatrix(k, n, sizeof(double), B, k, d_B, k);
cublasSetMatrix(m, n, sizeof(double), C, m, d_C, m);
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, d_A, m, d_B, k, &beta, d_C, m);
cublasGetMatrix(m, n, sizeof(double), d_C, m, C, m);
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
cublasDestroy(handle);
*/

View File

@ -0,0 +1,52 @@
module gpu_module
use iso_c_binding
implicit none
interface
subroutine compute_r2_space_chol_gpu(nO,nV,cholesky_mo_num, &
t1,tau,cc_space_v_vo_chol,cc_space_v_vv_chol, r2) bind(C)
import c_int, c_double
integer(c_int), value :: nO, nV, cholesky_mo_num
real(c_double), intent(in) :: t1(nO,nV)
real(c_double), intent(in) :: tau(nO,nO,nV,nV)
real(c_double), intent(in) :: cc_space_v_vo_chol(cholesky_mo_num,nV,nO)
real(c_double), intent(in) :: cc_space_v_vv_chol(cholesky_mo_num,nV,nV)
real(c_double), intent(out) :: r2(nO,nO,nV,nV)
end subroutine
subroutine gemm0(nO, nV, cholesky_mo_num, cc_space_v_vo_chol, t1, tmp_cc) bind(C, name="gemm0")
import c_int, c_double
integer(c_int), value :: nO, nV, cholesky_mo_num
real(c_double) :: cc_space_v_vo_chol(cholesky_mo_num,nV,nO)
real(c_double) :: t1(nO,nV)
real(c_double) :: tmp_cc(cholesky_mo_num,nV,nV)
end subroutine gemm0
subroutine gemm1(iblock, nV, cholesky_mo_num, tmp_cc, cc_space_v_vv_chol_, tmpB1) bind(C, name="gemm1")
import c_int, c_double
integer(c_int), value :: iblock, nV, cholesky_mo_num
real(c_double) :: tmp_cc(cholesky_mo_num,nV,nV)
real(c_double) :: cc_space_v_vv_chol_(cholesky_mo_num,nV)
real(c_double) :: tmpB1(nV,16,nV)
end subroutine gemm1
subroutine gemm2(iblock, nV, cholesky_mo_num, tmp_cc2, cc_space_v_vv_chol, tmpB1) bind(C, name="gemm2")
import c_int, c_double
integer(c_int), value :: iblock, nV, cholesky_mo_num
real(c_double) :: tmp_cc2(cholesky_mo_num,nV)
real(c_double) :: cc_space_v_vv_chol(cholesky_mo_num,nV,nV)
real(c_double) :: tmpB1(nV,16,nV)
end subroutine gemm2
subroutine gemm3(iblock, nO, nV, gam, tau, B1, r2) bind(C, name="gemm3")
import c_int, c_double
integer(c_int), value :: iblock, nO, nV, gam
real(c_double) :: tau(nO,nO,nV,nV)
real(c_double) :: B1(nV,nV,*)
real(c_double) :: r2(nO,nO,nV,nV)
end subroutine gemm3
end interface
end module

View File

@ -0,0 +1,13 @@
subroutine save_energy(E,ET)
implicit none
BEGIN_DOC
! Saves the energy in |EZFIO|.
END_DOC
double precision, intent(in) :: E, ET
call ezfio_set_ccsd_energy(E)
if (ET /= 0.d0) then
call ezfio_set_ccsd_energy_t(E+ET)
endif
end