From fadbddc869ca984ed285f7d05ba90f352e673480 Mon Sep 17 00:00:00 2001 From: ydamour Date: Mon, 13 Mar 2023 14:08:32 +0100 Subject: [PATCH] add ccsd --- src/ccsd/80.ccsd_spin.bats | 225 +++ src/ccsd/81.ccsd_space.bats | 225 +++ src/ccsd/NEED | 2 + src/ccsd/README.md | 31 + src/ccsd/ccsd.irp.f | 18 + src/ccsd/ccsd_space_orb.irp.f | 12 + src/ccsd/ccsd_space_orb_sub.irp.f | 2078 +++++++++++++++++++++++++ src/ccsd/ccsd_spin_orb.irp.f | 16 + src/ccsd/ccsd_spin_orb_sub.irp.f | 2301 ++++++++++++++++++++++++++++ src/ccsd/ccsd_t_space_orb.irp.f | 412 +++++ src/ccsd/ccsd_t_spin_orb.irp.f | 376 +++++ src/ccsd/org/ccsd_space_orb.org | 2121 ++++++++++++++++++++++++++ src/ccsd/org/ccsd_spin_orb.org | 2352 +++++++++++++++++++++++++++++ src/ccsd/org/ccsd_t_space_orb.org | 428 ++++++ src/ccsd/org/ccsd_t_spin_orb.org | 385 +++++ 15 files changed, 10982 insertions(+) create mode 100644 src/ccsd/80.ccsd_spin.bats create mode 100644 src/ccsd/81.ccsd_space.bats create mode 100644 src/ccsd/NEED create mode 100644 src/ccsd/README.md create mode 100644 src/ccsd/ccsd.irp.f create mode 100644 src/ccsd/ccsd_space_orb.irp.f create mode 100644 src/ccsd/ccsd_space_orb_sub.irp.f create mode 100644 src/ccsd/ccsd_spin_orb.irp.f create mode 100644 src/ccsd/ccsd_spin_orb_sub.irp.f create mode 100644 src/ccsd/ccsd_t_space_orb.irp.f create mode 100644 src/ccsd/ccsd_t_spin_orb.irp.f create mode 100644 src/ccsd/org/ccsd_space_orb.org create mode 100644 src/ccsd/org/ccsd_spin_orb.org create mode 100644 src/ccsd/org/ccsd_t_space_orb.org create mode 100644 src/ccsd/org/ccsd_t_spin_orb.org diff --git a/src/ccsd/80.ccsd_spin.bats b/src/ccsd/80.ccsd_spin.bats new file mode 100644 index 00000000..0b616871 --- /dev/null +++ b/src/ccsd/80.ccsd_spin.bats @@ -0,0 +1,225 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + + +function run() { + thresh1=1e-6 + thresh2=1e-6 + test_exe scf || skip + qp set_file $1 + qp edit --check + #qp run scf + qp set_frozen_core + qp set utils_cc cc_par_t true + qp set utils_cc cc_thresh_conv 1e-12 + file="$(echo $1 | sed 's/.ezfio//g')" + qp run ccsd_spin_orb | tee $file.ccsd.out + energy1="$(grep 'E(CCSD)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')" + energy2="$(grep 'E(T)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')" + #rm $file.ccsd.out + eq $energy1 $2 $thresh1 + eq $energy2 $3 $thresh2 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -49.136487344382 -0.003497589175 +} + +@test "be" { +run be.ezfio -14.623559003577 -0.000230982022 +} + +@test "c2h2" { +run c2h2.ezfio -12.394008897618 -0.010790491561 +} + +@test "ch4" { +run ch4.ezfio -40.390721785799 -0.004476100282 +} + +@test "clf" { +run clf.ezfio -559.186562904081 -0.006577143392 +} + +@test "clo" { +run clo.ezfio -534.564874409332 -0.007584571424 +} + +@test "co2" { +run co2.ezfio -188.129602527766 -0.018040668885 +} + +@test "dhno" { +run dhno.ezfio -130.816650109473 -0.012197331453 +} + +@test "f2" { +run f2.ezfio -199.287826338097 -0.017592872692 +} + +@test "f" { +run f.ezfio -99.616644511121 -0.003624525307 +} + +@test "h2o2" { +run h2o2.ezfio -151.182552729963 -0.009511682086 +} + +@test "h2o" { +run h2o.ezfio -76.237710276526 -0.003001800577 +} + +@test "h2s" { +run h2s.ezfio -398.861214015390 -0.003300559757 +} + +@test "h3coh" { +run h3coh.ezfio -115.221296424969 -0.003566171432 +} + +@test "hbo" { +run hbo.ezfio -100.213539770415 -0.006851489212 +} + +@test "hcn" { +run hcn.ezfio -93.190247992657 -0.013418135043 +} + +@test "hco" { +run hco.ezfio -113.405413962350 -0.007973455337 +} + +@test "lif" { +run lif.ezfio -107.270402903250 -0.007742969005 +} + +@test "n2" { +run n2.ezfio -109.355358930472 -0.018477744342 +} + +@test "n2h4" { +run n2h4.ezfio -111.556885923139 -0.009048077008 +} + +@test "nh3" { +run nh3.ezfio -56.465503060954 -0.007638273755 +} + +@test "oh" { +run oh.ezfio -75.614606132774 -0.004126661739 +} + +@test "sih2_3b1" { +run sih2_3b1.ezfio -290.016780973072 -0.000497825874 +} + +@test "sih3" { +run sih3.ezfio -5.575343504534 -0.002094123268 +} + +@test "so" { +run so.ezfio -26.035945178665 -0.010594351274 +} + +#@test "b2_stretched" { +#run b2_stretched.ezfio -49.136487344382 -49.139984933557 +#} +# +#@test "be" { +#run be.ezfio -14.623559003577 -14.623789985599 +#} +# +#@test "c2h2" { +#run c2h2.ezfio -12.394008897618 -12.404799389179 +#} +# +#@test "ch4" { +#run ch4.ezfio -40.390721784961 -40.395197884406 +#} +# +#@test "clf" { +#run clf.ezfio -559.186562906072 -559.193140046904 +#} +# +#@test "clo" { +#run clo.ezfio -534.564874409333 -534.572458980757 +#} +# +#@test "co2" { +#run co2.ezfio -188.129602511724 -188.147643198675 +#} +# +#@test "dhno" { +#run dhno.ezfio -130.816650109473 -130.828847440925 +#} +# +#@test "f2" { +#run f2.ezfio -199.287826338097 -199.305419210789 +#} +# +#@test "f" { +#run f.ezfio -99.616644511120 -99.620269036428 +#} +# +#@test "h2o2" { +#run h2o2.ezfio -151.182552729963 -151.192064412049 +#} +# +#@test "h2o" { +#run h2o.ezfio -76.237710276526 -76.240712077103 +#} +# +#@test "h2s" { +#run h2s.ezfio -398.861214015416 -398.864514575146 +#} +# +#@test "h3coh" { +#run h3coh.ezfio -115.221296424969 -115.224862596401 +#} +# +#@test "hbo" { +#run hbo.ezfio -100.213539770415 -100.220391259627 +#} +# +#@test "hcn" { +#run hcn.ezfio -93.190247983000 -93.203666131216 +#} +# +#@test "hco" { +#run hco.ezfio -113.405413962350 -113.413387417687 +#} +# +#@test "lif" { +#run lif.ezfio -107.270402903211 -107.278145872216 +#} +# +#@test "n2" { +#run n2.ezfio -109.355358930472 -109.373836674814 +#} +# +#@test "n2h4" { +#run n2h4.ezfio -111.556885922642 -111.565934000556 +#} +# +#@test "nh3" { +#run nh3.ezfio -56.465503060954 -56.473141334709 +#} +# +#@test "oh" { +#run oh.ezfio -75.614606131897 -75.618732794235 +#} +# +#@test "sih2_3b1" { +#run sih2_3b1.ezfio -290.016780973071 -290.017278798946 +#} +# +#@test "sih3" { +#run sih3.ezfio -5.575343504534 -5.577437627802 +#} +# +#@test "so" { +#run so.ezfio -26.035945181998 -26.046539528491 +#} + diff --git a/src/ccsd/81.ccsd_space.bats b/src/ccsd/81.ccsd_space.bats new file mode 100644 index 00000000..02e8e987 --- /dev/null +++ b/src/ccsd/81.ccsd_space.bats @@ -0,0 +1,225 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + + +function run() { + thresh1=1e-6 + thresh2=1e-6 + test_exe scf || skip + qp set_file $1 + qp edit --check + #qp run scf + qp set_frozen_core + qp set utils_cc cc_par_t true + qp set utils_cc cc_thresh_conv 1e-12 + file="$(echo $1 | sed 's/.ezfio//g')" + qp run ccsd_space_orb | tee $file.ccsd.out + energy1="$(grep 'E(CCSD)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')" + energy2="$(grep 'E(T)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')" + #rm $file.ccsd.out + eq $energy1 $2 $thresh1 + eq $energy2 $3 $thresh2 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -49.136487344382 -0.003497589175 +} + +@test "be" { +run be.ezfio -14.623559003577 -0.000230982022 +} + +@test "c2h2" { +run c2h2.ezfio -12.394008897618 -0.010790491561 +} + +@test "ch4" { +run ch4.ezfio -40.390721785799 -0.004476100282 +} + +@test "clf" { +run clf.ezfio -559.186562904081 -0.006577143392 +} + +#@test "clo" { +#run clo.ezfio -534.564874409332 -0.007584571424 +#} + +@test "co2" { +run co2.ezfio -188.129602527766 -0.018040668885 +} + +#@test "dhno" { +#run dhno.ezfio -130.816650109473 -0.012197331453 +#} + +@test "f2" { +run f2.ezfio -199.287826338097 -0.017592872692 +} + +#@test "f" { +#run f.ezfio -99.616644511121 -0.003624525307 +#} + +@test "h2o2" { +run h2o2.ezfio -151.182552729963 -0.009511682086 +} + +@test "h2o" { +run h2o.ezfio -76.237710276526 -0.003001800577 +} + +@test "h2s" { +run h2s.ezfio -398.861214015390 -0.003300559757 +} + +@test "h3coh" { +run h3coh.ezfio -115.221296424969 -0.003566171432 +} + +@test "hbo" { +run hbo.ezfio -100.213539770415 -0.006851489212 +} + +@test "hcn" { +run hcn.ezfio -93.190247992657 -0.013418135043 +} + +#@test "hco" { +#run hco.ezfio -113.405413962350 -0.007973455337 +#} + +@test "lif" { +run lif.ezfio -107.270402903250 -0.007742969005 +} + +@test "n2" { +run n2.ezfio -109.355358930472 -0.018477744342 +} + +@test "n2h4" { +run n2h4.ezfio -111.556885923139 -0.009048077008 +} + +@test "nh3" { +run nh3.ezfio -56.465503060954 -0.007638273755 +} + +#@test "oh" { +#run oh.ezfio -75.614606132774 -0.004126661739 +#} + +#@test "sih2_3b1" { +#run sih2_3b1.ezfio -290.016780973072 -0.000497825874 +#} + +#@test "sih3" { +#run sih3.ezfio -5.575343504534 -0.002094123268 +#} + +#@test "so" { +#run so.ezfio -26.035945178665 -0.010594351274 +#} + +#@test "b2_stretched" { +#run b2_stretched.ezfio -49.136487344382 -49.139984933557 +#} +# +#@test "be" { +#run be.ezfio -14.623559003577 -14.623789985599 +#} +# +#@test "c2h2" { +#run c2h2.ezfio -12.394008897618 -12.404799389179 +#} +# +#@test "ch4" { +#run ch4.ezfio -40.390721784961 -40.395197884406 +#} +# +#@test "clf" { +#run clf.ezfio -559.186562906072 -559.193140046904 +#} +# +##@test "clo" { +##run clo.ezfio -534.564874409333 -534.572458980757 +##} +# +#@test "co2" { +#run co2.ezfio -188.129602511724 -188.147643198675 +#} +# +##@test "dhno" { +##run dhno.ezfio -130.816650109473 -130.828847440925 +##} +# +#@test "f2" { +#run f2.ezfio -199.287826338097 -199.305419210789 +#} +# +##@test "f" { +##run f.ezfio -99.616644511120 -99.620269036428 +##} +# +#@test "h2o2" { +#run h2o2.ezfio -151.182552729963 -151.192064412049 +#} +# +#@test "h2o" { +#run h2o.ezfio -76.237710276526 -76.240712077103 +#} +# +#@test "h2s" { +#run h2s.ezfio -398.861214015416 -398.864514575146 +#} +# +#@test "h3coh" { +#run h3coh.ezfio -115.221296424969 -115.224862596401 +#} +# +#@test "hbo" { +#run hbo.ezfio -100.213539770415 -100.220391259627 +#} +# +#@test "hcn" { +#run hcn.ezfio -93.190247983000 -93.203666131216 +#} +# +##@test "hco" { +##run hco.ezfio -113.405413962350 -113.413387417687 +##} +# +#@test "lif" { +#run lif.ezfio -107.270402903211 -107.278145872216 +#} +# +#@test "n2" { +#run n2.ezfio -109.355358930472 -109.373836674814 +#} +# +#@test "n2h4" { +#run n2h4.ezfio -111.556885922642 -111.565934000556 +#} +# +#@test "nh3" { +#run nh3.ezfio -56.465503060954 -56.473141334709 +#} +# +##@test "oh" { +##run oh.ezfio -75.614606131897 -75.618732794235 +##} +# +##@test "sih2_3b1" { +##run sih2_3b1.ezfio -290.016780973071 -290.017278798946 +##} +# +##@test "sih3" { +##run sih3.ezfio -5.575343504534 -5.577437627802 +##} +# +##@test "so" { +##run so.ezfio -26.035945181998 -26.046539528491 +##} + diff --git a/src/ccsd/NEED b/src/ccsd/NEED new file mode 100644 index 00000000..e6e6bc59 --- /dev/null +++ b/src/ccsd/NEED @@ -0,0 +1,2 @@ +hartree_fock +utils_cc diff --git a/src/ccsd/README.md b/src/ccsd/README.md new file mode 100644 index 00000000..fa59e8a6 --- /dev/null +++ b/src/ccsd/README.md @@ -0,0 +1,31 @@ +# CCSD in spin orbitals and spatial orbitals + +CCSD and CCSD(T) in spin orbitals for open and closed shell systems. +CCSD and CCSD(T) in spatial orbitals for closed shell systems. + +## Calculations +The program will automatically choose the version in spin or spatial orbitals +To run the general program: +``` +qp run ccsd +``` +Nevertheless, you can enforce the run in spin orbitals with +``` +qp run ccsd_spin_orb +``` + +## Settings +The settings can be changed with: +``` +qp set utils_cc cc_#param #val +``` +For more informations on the settings, look at the module utils_cc and its documentation. + +## Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh and +mv *.irp.f ../. +``` + diff --git a/src/ccsd/ccsd.irp.f b/src/ccsd/ccsd.irp.f new file mode 100644 index 00000000..035f50b8 --- /dev/null +++ b/src/ccsd/ccsd.irp.f @@ -0,0 +1,18 @@ +program ccsd + + implicit none + + BEGIN_DOC + ! CCSD program + END_DOC + + read_wf = .True. + touch read_wf + + if (.not. cc_ref_is_open_shell) then + call run_ccsd_space_orb + else + call run_ccsd_spin_orb + endif + +end diff --git a/src/ccsd/ccsd_space_orb.irp.f b/src/ccsd/ccsd_space_orb.irp.f new file mode 100644 index 00000000..53028ec0 --- /dev/null +++ b/src/ccsd/ccsd_space_orb.irp.f @@ -0,0 +1,12 @@ +! Code + +program ccsd + + implicit none + + read_wf = .True. + touch read_wf + + call run_ccsd_space_orb + +end diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f new file mode 100644 index 00000000..b63375cf --- /dev/null +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -0,0 +1,2078 @@ +subroutine run_ccsd_space_orb + + implicit none + + integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d + integer :: u,v,gam,beta,tmp_gam,tmp_beta + integer :: nb_iter + double precision :: get_two_e_integral + double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb + logical :: not_converged + + double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:) + double precision, allocatable :: t1(:,:), r1(:,:) + double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) + + double precision, allocatable :: all_err(:,:), all_t(:,:) + integer, allocatable :: list_occ(:), list_vir(:) + integer(bit_kind) :: det(N_int,2) + integer :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4) + + PROVIDE mo_two_e_integrals_in_map + + det = psi_det(:,:,cc_ref) + print*,'Reference determinant:' + call print_det(det,N_int) + + ! Extract number of occ/vir alpha/beta spin orbitals + !call extract_n_spin(det,n_spin) + nOa = cc_nOa !n_spin(1) + nOb = cc_nOb !n_spin(2) + nVa = cc_nVa !n_spin(3) + nVb = cc_nVb !n_spin(4) + + ! Check that the reference is a closed shell determinant + if (cc_ref_is_open_shell) then + call abort + endif + + ! Number of occ/vir spatial orb + nO = nOa + nV = nVa + + allocate(list_occ(nO),list_vir(nV)) + list_occ = cc_list_occ + list_vir = cc_list_vir + ! Debug + !call extract_list_orb_space(det,nO,nV,list_occ,list_vir) + !print*,'occ',list_occ + !print*,'vir',list_vir + + allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) + allocate(tau(nO,nO,nV,nV)) + allocate(t1(nO,nV), r1(nO,nV)) + allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) + + if (cc_update_method == 'diis') then + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + all_err = 0d0 + all_t = 0d0 + endif + + if (elec_alpha_num /= elec_beta_num) then + print*, 'Only for closed shell systems' + print*, 'elec_alpha_num=',elec_alpha_num + print*, 'elec_beta_num =',elec_beta_num + print*, 'abort' + call abort + endif + + ! Init + call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1) + call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2) + call update_tau_space(nO,nV,t1,t2,tau) + !print*,'hf_energy', hf_energy + call det_energy(det,uncorr_energy) + print*,'Det energy', uncorr_energy + call ccsd_energy_space(nO,nV,tau,t1,energy) + print*,'Guess energy', uncorr_energy+energy, energy + + nb_iter = 0 + not_converged = .True. + max_r1 = 0d0 + max_r2 = 0d0 + + write(*,'(A77)') ' -----------------------------------------------------------------------------' + write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(ta) + + do while (not_converged) + + call compute_H_oo(nO,nV,t1,t2,tau,H_oo) + call compute_H_vv(nO,nV,t1,t2,tau,H_vv) + call compute_H_vo(nO,nV,t1,t2,H_vo) + + ! Residue + call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + max_r = max(max_r1,max_r2) + + ! Update + if (cc_update_method == 'diis') then + !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1,r2,t1,t2,all_err,all_t) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1) + call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2) + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + + call update_tau_space(nO,nV,t1,t2,tau) + + ! Energy + call ccsd_energy_space(nO,nV,tau,t1,energy) + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + + nb_iter = nb_iter + 1 + if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then + not_converged = .False. + endif + + enddo + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + print*,'' + if (max_r < cc_thresh_conv) then + write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations' + else + write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations' + endif + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' + write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + print*,'' + + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + + ! Deallocation + if (cc_update_method == 'diis') then + deallocate(all_err,all_t) + endif + + deallocate(H_vv,H_oo,H_vo,r1,r2,tau) + + ! CCSD(T) + double precision :: e_t + + if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then + + ! Dumb way + !call wall_time(ta) + !call ccsd_par_t_space(nO,nV,t1,t2,e_t) + !call wall_time(tb) + !print*,'Time: ',tb-ta, ' s' + + !print*,'' + !write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' + !write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' + !write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + !print*,'' + + ! New + print*,'Computing (T) correction...' + call wall_time(ta) + call ccsd_par_t_space_v2(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & + ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' + write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + print*,'' + endif + + print*,'Reference determinant:' + call print_det(det,N_int) + + deallocate(t1,t2) + +end + +! Energy + +subroutine ccsd_energy_space(nO,nV,tau,t1,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau(nO,nO,nV,nV) + double precision, intent(in) :: t1(nO,nV) + double precision, intent(out) :: energy + + ! internal + integer :: i,j,a,b + double precision :: e + + energy = 0d0 + !$omp parallel & + !$omp shared(nO,nV,energy,tau,t1,& + !$omp cc_space_f_vo,cc_space_w_oovv) & + !$omp private(i,j,a,b,e) & + !$omp default(none) + e = 0d0 + !$omp do + do i = 1, nO + do a = 1, nV + e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) + enddo + enddo + !$omp end do nowait + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + energy = energy + e + !$omp end critical + !$omp end parallel + +end + +! Tau + +subroutine update_tau_space(nO,nV,t1,t2,tau) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + ! out + double precision, intent(out) :: tau(nO,nO,nV,nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tau,t2,t1) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! R1 + +subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r1(nO,nV), max_r1 + + ! internal + integer :: u,i,j,beta,a,b + + !$omp parallel & + !$omp shared(nO,nV,r1,cc_space_f_ov) & + !$omp private(u,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + r1(u,beta) = cc_space_f_ov(u,beta) + enddo + enddo + !$omp end do + !$omp end parallel + + ! r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a) + ! cc_space_f_vo(a,i) * t1(i,beta) -> X1(nV,nV), O(nV*nV*nO) + ! X1(a,beta) * t1(u,a) -> O(nO*nV*nV) + ! cc_space_f_vo(a,i) * t1(u,a) -> X1(nO,nO), O(nO*nO*nV) + ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_oo(:,:) + allocate(X_oo(nO,nO)) + call dgemm('N','N', nO, nO, nV, & + -2d0, t1 , size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, X_oo , size(X_oo,1)) + + call dgemm('T','N', nO, nV, nO, & + 1d0, X_oo, size(X_oo,2), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + deallocate(X_oo) + + ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) + !do beta = 1, nV + ! do u = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) + ! enddo + ! enddo + !enddo + call dgemm('N','N', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + H_vv, size(H_vv,1), & + 1d0, r1 , size(r1,1)) + + ! r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta) + ! enddo + ! enddo + !enddo + call dgemm('N','N', nO, nV, nO, & + -1d0, H_oo, size(H_oo,1), & + t1 , size(t1,1), & + 1d0, r1, size(r1,1)) + + !r1(u,beta) = r1(u,beta) + H_vo(a,i) * (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta)) + ! <=> + ! r1(u,beta) = r1(u,beta) + H_vo(a,i) * X(a,i,u,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + H_vo(a,i) * & + ! (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta)) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_voov(:,:,:,:) + allocate(X_voov(nV, nO, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_voov,t2,t1) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,u,beta) = 2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nV*nO, nO*nV, & + 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & + H_vo , 1, & + 1d0, r1 , 1) + + deallocate(X_voov) + + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) + ! <=> + ! r1(u,beta) = r1(u,beta) + X(i,a,u,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_ovov(:,:,:,:) + allocate(X_ovov(nO, nV, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do a = 1, nv + do i = 1, nO + X_ovov(i,a,u,beta) = 2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nO*nV, nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + t1 , 1, & + 1d0, r1 , 1) + + deallocate(X_ovov) + + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! do b = 1, nV + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) + allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & + !$omp private(b,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do u = 1, nO + T_vvoo(a,b,i,u) = tau(i,u,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp end parallel + + call dgemm('T','N',nO,nV,nO*nV*nV, & + 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & + W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_vvov,T_vvoo) + + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do j = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: W_oovo(:,:,:,:) + allocate(W_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) & + !$omp private(u,a,i,j) & + !$omp default(none) + !$omp do collapse(3) + do u = 1, nO + do a = 1, nV + do j = 1, nO + do i = 1, nO + W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & + tau , size(tau,1) * size(tau,2) * size(tau,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_oovo) + + max_r1 = 0d0 + do a = 1, nV + do i = 1, nO + if (dabs(r1(i,a)) > max_r1) then + max_r1 = dabs(r1(i,a)) + endif + enddo + enddo + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r1) & + !$omp private(a,i) & + !$omp default(none) + !$omp do + do a = 1, nV + do i = 1, nO + r1(i,a) = -r1(i,a) + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! H_oo + +subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_oo(nO, nO) + + integer :: a,tmp_a,k,b,l,c,d,tmp_c,tmp_d,i,j,u + + !H_oo = 0d0 + + !do i = 1, nO + ! do u = 1, nO + ! H_oo(u,i) = cc_space_f_oo(u,i) + + ! do j = 1, nO + ! do a = 1, nV + ! do b = 1, nV + ! !H_oo(u,i) = H_oo(u,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * tau(u,j,a,b) + ! !H_oo(u,i) = H_oo(u,i) + cc_space_w_vvoo(a,b,i,j) * tau(u,j,a,b) + ! H_oo(u,i) = H_oo(u,i) + cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) + ! enddo + ! enddo + ! enddo + ! + ! enddo + !enddo + + ! H_oo(u,i) = cc_space_f_oo(u,i) + !$omp parallel & + !$omp shared(nO,H_oo,cc_space_f_oo) & + !$omp private(i,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + H_oo(u,i) = cc_space_f_oo(u,i) + enddo + enddo + !$omp end do + !$omp end parallel + + ! H_oo(u,i) += cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) + ! H_oo(u,i) += tau(u,j,a,b) * cc_space_w_oovv(i,j,a,b) + call dgemm('N','T', nO, nO, nO*nV*nV, & + 1d0, tau , size(tau,1), & + cc_space_w_oovv, size(cc_space_w_oovv,1), & + 1d0, H_oo , size(H_oo,1)) + +end + +! H_vv + +subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_vv(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + !H_vv = 0d0 + + !do beta = 1, nV + ! do a = 1, nV + ! H_vv(a,beta) = cc_space_f_vv(a,beta) + + ! do j = 1, nO + ! do i = 1, nO + ! do b = 1, nV + ! !H_vv(a,beta) = H_vv(a,beta) - (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(a,b,j,i)) * tau(i,j,beta,b) + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) + ! enddo + ! enddo + ! enddo + ! + ! enddo + !enddo + + double precision, allocatable :: tmp_tau(:,:,:,:) + + allocate(tmp_tau(nV,nO,nO,nV)) + + ! H_vv(a,beta) = cc_space_f_vv(a,beta) + !$omp parallel & + !$omp shared(nV,nO,H_vv,cc_space_f_vv,tmp_tau,tau) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + H_vv(a,beta) = cc_space_f_vv(a,beta) + enddo + enddo + !$omp end do nowait + + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta) + + !$omp do collapse(3) + do beta = 1, nV + do j = 1, nO + do i = 1, nO + do b = 1, nV + tmp_tau(b,i,j,beta) = tau(i,j,beta,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nV,nV,nO*nO*nV, & + -1d0, cc_space_w_vvoo, size(cc_space_w_vvoo,1), & + tmp_tau , size(tmp_tau,1) * size(tmp_tau,2) * size(tmp_tau,3), & + 1d0, H_vv , size(H_vv,1)) + + deallocate(tmp_tau) + +end + +! H_vo + +subroutine compute_H_vo(nO,nV,t1,t2,H_vo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: H_vo(nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + !H_vo = 0d0 + + !do i = 1, nO + ! do a = 1, nV + ! H_vo(a,i) = cc_space_f_vo(a,i) + + ! do j = 1, nO + ! do b = 1, nV + ! !H_vo(a,i) = H_vo(a,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t1(j,b) + ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) + ! enddo + ! enddo + ! + ! enddo + !enddo + + double precision, allocatable :: w(:,:,:,:) + + allocate(w(nV,nO,nO,nV)) + + !$omp parallel & + !$omp shared(nV,nO,H_vo,cc_space_f_vo,w,cc_space_w_vvoo,t1) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do i = 1, nO + do a = 1, nV + H_vo(a,i) = cc_space_f_vo(a,i) + enddo + enddo + !$omp end do nowait + + ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) + ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) + + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + w(a,i,j,b) = cc_space_w_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('N',nV*nO, nO*nV, & + 1d0, w , size(w,1) * size(w,2), & + t1 , 1, & + 1d0, H_vo, 1) + + deallocate(w) + +end + +! R2 + +subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2 + + ! internal + double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:) + double precision, allocatable :: A1(:,:,:,:), B1(:,:,:,:) + integer :: u,v,i,j,beta,gam,a,b + + allocate(g_occ(nO,nO), g_vir(nV,nV)) + allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV)) + allocate(A1(nO,nO,nO,nO)) + + call compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) + call compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) + call compute_A1(nO,nV,t1,t2,tau,A1) + call compute_J1(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & + cc_space_v_vvvo,cc_space_v_vvoo,J1) + call compute_K1(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & + cc_space_v_ovov,cc_space_v_vvov,K1) + + ! Residual + !r2 = 0d0 + + !$omp parallel & + !$omp shared(nO,nV,r2,cc_space_v_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = cc_space_v_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do j = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + A1(u,v,i,j) * tau(i,j,beta,gam) + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO*nO,nV*nV,nO*nO, & + 1d0, A1, size(A1,1) * size(A1,2), & + tau, size(tau,1) * size(tau,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do b = 1, nv + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + B1(a,b,beta,gam) * tau(u,v,a,b) + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + allocate(B1(nV,nV,nV,nV)) + call compute_B1(nO,nV,t1,t2,B1) + call dgemm('N','N',nO*nO,nV*nV,nV*nV, & + 1d0, tau, size(tau,1) * size(tau,2), & + B1 , size(B1,1) * size(B1,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + deallocate(B1) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + g_vir(a,beta) * t2(u,v,a,gam) & + ! + g_vir(a,gam) * t2(v,u,a,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_oovv(:,:,:,:),Y_oovv(:,:,:,:) + allocate(X_oovv(nO,nO,nV,nV),Y_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,t2,X_oovv) & + !$omp private(u,v,gam,a) & + !$omp default(none) + !$omp do collapse(3) + do a = 1, nV + do gam = 1, nV + do v = 1, nO + do u = 1, nO + X_oovv(u,v,gam,a) = t2(u,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nO*nV,nV,nV, & + 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + g_vir, size(g_vir,1), & + 0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(u,v,beta,gam) + Y_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - g_occ(u,i) * t2(i,v,beta,gam) & + ! - g_occ(v,i) * t2(i,u,gam,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO,nO*nV*nV,nO, & + 1d0, g_occ , size(g_occ,1), & + t2 , size(t2,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_oovv) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + cc_space_v_ovvv(u,a,beta,gam) * t1(v,a) & + ! + cc_space_v_ovvv(v,a,gam,beta) * t1(u,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vovv(:,:,:,:) + allocate(X_vovv(nV,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & + !$omp private(u,a,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do u = 1, nO + do a = 1, nV + X_vovv(a,u,beta,gam) = cc_space_v_ovvv(u,a,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1 , size(t1,1), & + X_vovv, size(X_vovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_ovov(u,a,i,gam) * t1(i,beta) * t1(v,a) & + ! - cc_space_v_ovov(v,a,i,beta) * t1(i,gam) * t1(u,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & + !$omp private(u,v,gam,i) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do u = 1, nO + do a = 1, nV + X_vovo(a,u,gam,i) = cc_space_v_ovov(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nV*nO*nV,nV,nO, & + 1d0, X_vovo, size(X_vovo,1) * size(X_vovo,2) * size(X_vovo,3), & + t1 , size(t1,1), & + 0d0, Y_vovv, size(Y_vovv,1) * size(Y_vovv,2) * size(Y_vovv,3)) + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1, size(t1,1), & + Y_vovv, size(Y_vovv,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(v,u,gam,beta) - X_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_vovv) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_oovo(u,v,beta,i) * t1(i,gam) & + ! - cc_space_v_oovo(v,u,gam,i) * t1(i,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO*nO*nV,nV,nO, & + 1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_ovvo(u,a,beta,i) * t1(v,a) * t1(i,gam) & + ! - cc_space_v_ovvo(v,a,gam,i) * t1(u,a) * t1(i,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: Y_oovo(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & + !$omp private(a,v,gam,i) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do v = 1, nO + do a = 1, nV + X_vovo(a,v,gam,i) = cc_space_v_ovvo(v,a,gam,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nO,nV, & + 1d0, t1, size(t1,1), & + X_vovo, size(X_vovo,1), & + 0d0, Y_oovo, size(Y_oovo,1)) + + call dgemm('N','N',nO*nO*nV, nV, nO, & + 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,gam,beta) - X_oovv(v,u,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_oovo) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) * & + ! (2d0 * t2(i,v,a,gam) - t2(i,v,gam,a)) & + ! + 0.5d0 * (2d0 * J1(v,a,gam,i) - K1(v,a,i,gam)) * & + ! (2d0 * t2(i,u,a,beta) - t2(i,u,beta,a)) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_voov(:,:,:,:), Z_ovov(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO), Y_voov(nV,nO,nO,nV),Z_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do a = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,a,i) = 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do gam = 1, nV + do v = 1, nO + do i = 1, nO + do a = 1, nV + Y_voov(a,i,v,gam) = 2d0 * t2(i,v,a,gam) - t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_voov, size(Y_voov,1) * size(Y_voov,2), & + 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo,Y_voov) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - 0.5d0 * K1(u,a,i,beta) * t2(i,v,gam,a) & + ! - 0.5d0 * K1(v,a,i,gam) * t2(i,u,beta,a) !P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_ovov(:,:,:,:),Y_ovov(:,:,:,:) + allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & + !$omp private(u,a,i,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do a = 1, nV + do i = 1, nO + X_ovov(i,a,u,beta) = 0.5d0 * K1(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do gam = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Y_ovov(i,a,v,gam) = t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('T','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - K1(u,a,i,gam) * t2(i,v,beta,a) & + ! - K1(v,a,i,beta) * t2(i,u,gam,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + !$omp parallel & + !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do a = 1, nV + do i = 1, nO + do gam = 1, nV + do u = 1, nO + X_ovov(u,gam,i,a) = K1(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do beta = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Z_ovov(i,a,v,beta) = t2(i,v,beta,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovov,Y_ovov,Z_ovov) + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r2) & + !$omp private(i,j,a,b) & + !$omp default(none) + !$omp do collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = -r2(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + max_r2 = 0d0 + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + if (dabs(r2(i,j,a,b)) > max_r2) then + max_r2 = dabs(r2(i,j,a,b)) + endif + enddo + enddo + enddo + enddo + + deallocate(g_occ,g_vir,J1,K1,A1) + +end + +! A1 + +subroutine compute_A1(nO,nV,t1,t2,tau,A1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: A1(nO, nO, nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta + + !A1 = 0d0 + + !do j = 1, nO + ! do i = 1, nO + ! do v = 1, nO + ! do u = 1, nO + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + + ! do a = 1, nV + ! A1(u,v,i,j) = A1(u,v,i,j) & + ! + cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + ! + cc_space_v_vooo(a,v,i,j) * t1(u,a) + ! + ! do b = 1, nV + ! A1(u,v,i,j) = A1(u,v,i,j) + cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:) + allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO)) + + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + !$omp parallel & + !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + enddo + enddo + enddo + enddo + !$omp end do nowait + + ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do u = 1, nO + do a = 1, nV + X_vooo(a,u,i,j) = cc_space_v_ovoo(u,a,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + X_vooo, size(X_vooo,1), & + 0d0, Y_oooo, size(Y_oooo,1)) + + !$omp parallel & + !$omp shared(nO,nV,A1,Y_oooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = A1(u,v,i,j) + Y_oooo(v,u,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vooo,Y_oooo) + + ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a) + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + cc_space_v_vooo, size(cc_space_v_vooo,1), & + 1d0, A1 , size(A1,1)) + + ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + call dgemm('N','N', nO*nO, nO*nO, nV*nV, & + 1d0, tau , size(tau,1) * size(tau,2), & + cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & + 1d0, A1 , size(A1,1) * size(A1,2)) + +end + +! B1 + +subroutine compute_B1(nO,nV,t1,t2,B1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: B1(nV, nV, nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !B1 = 0d0 + + !do gam = 1, nV + ! do beta = 1, nV + ! do b = 1, nV + ! do a = 1, nV + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + + ! do i = 1, nO + ! B1(a,b,beta,gam) = B1(a,b,beta,gam) & + ! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + ! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vvvo(:,:,:,:), Y_vvvv(:,:,:,:) + allocate(X_vvvo(nV,nV,nV,nO), Y_vvvv(nV,nV,nV,nV)) + + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + !$omp parallel & + !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) & + !$omp private(a,b,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do b = 1, nV + do a = 1, nV + X_vvvo(a,b,gam,i) = cc_space_v_vvov(a,b,i,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + call dgemm('N','N', nV*nV*nV, nV, nO, & + -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & + t1 , size(t1,1), & + 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) + + + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + call dgemm('N','N', nV*nV*nV, nV, nO, & + -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2) * size(X_vvvo,3), & + t1 , size(t1,1), & + 0d0, Y_vvvv, size(Y_vvvv,1) * size(Y_vvvv,2) * size(Y_vvvv,3)) + + !$omp parallel & + !$omp shared(nV,B1,Y_vvvv) & + !$omp private(a,b,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta,gam) = B1(a,b,beta,gam) + Y_vvvv(a,b,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vvvo,Y_vvvv) + +end + +! g_occ + +subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_oo(nO, nO) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_occ(nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !g_occ = 0d0 + + !do i = 1, nO + ! do u = 1, nO + ! g_occ(u,i) = H_oo(u,i) + ! + ! do a = 1, nV + ! g_occ(u,i) = g_occ(u,i) + cc_space_f_vo(a,i) * t1(u,a) + ! + ! do j = 1, nO + ! g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) + ! enddo + ! + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO,nO,nV, & + 1d0, t1, size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, g_occ, size(g_occ,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_occ,H_oo, cc_space_v_ovoo,t1) & + !$omp private(i,j,a,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + H_oo(u,i) + enddo + enddo + !$omp end do + + !$omp do collapse(1) + do i = 1, nO + do j = 1, nO + do a = 1, nV + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! g_vir + +subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_vv(nV, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_vir(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !g_vir = 0d0 + + !do beta = 1, nV + ! do a = 1, nV + ! g_vir(a,beta) = H_vv(a,beta) + ! + ! do i = 1, nO + ! g_vir(a,beta) = g_vir(a,beta) - cc_space_f_vo(a,i) * t1(i,beta) + ! + ! do b = 1, nV + ! g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) + ! enddo + ! + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nV,nV,nO, & + -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & + t1 , size(t1,1), & + 0d0, g_vir, size(g_vir,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) & + !$omp private(i,b,a,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta) + enddo + enddo + !$omp end do + + !$omp do collapse(1) + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! J1 + +subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: J1(nO, nV, nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !J1 = 0d0 + + !do i = 1, nO + ! do beta = 1, nV + ! do a = 1, nV + ! do u = 1, nO + ! J1(u,a,beta,i) = cc_space_v_ovvo(u,a,beta,i) + + ! do j = 1, nO + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! - cc_space_v_ovoo(u,a,j,i) * t1(j,beta) + ! enddo + + ! do b = 1, nV + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) + ! enddo + + ! do j = 1, nO + ! do b = 1, nV + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! - cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + ! + 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) + allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) + + !$omp parallel & + !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & + !$omp private(i,j,a,u,beta) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = v_ovvo(u,a,beta,i) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do a = 1, nV + do u = 1, nO + X_ovoo(u,a,i,j) = v_ovoo(u,a,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & + t1 , size(t1,1), & + 0d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2) * size(Y_ovov,3)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Y_ovov) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Y_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(X_ovoo) + + ! v_vvvo(b,a,beta,i) * t1(u,b) + call dgemm('N','N',nO,nV*nV*nO,nV, & + 1d0, t1 , size(t1,1), & + v_vvvo, size(v_vvvo,1), & + 1d0, J1 , size(J1,1)) + + !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:) + allocate(X_voov(nV,nO,nO,nV), Z_ovvo(nO,nV,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & + !$omp private(i,beta,a,u,b,j) & + !$omp default(none) + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do beta = 1, nV + do u = 1, nO + Y_ovov(u,beta,j,b) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,j,b) = v_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nO*nV, & + -1d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + X_voov, size(X_voov,1) * size(X_voov,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + deallocate(X_voov) + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_vovo(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO),Y_vovo(nV,nO,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do i = 1, nO + do a = 1, nV + Y_vovo(a,i,b,j) = 0.5d0 * (2d0 * v_vvoo(a,b,i,j) - v_vvoo(b,a,i,j)) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,b,j) = t2(u,j,beta,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo,Z_ovvo,Y_ovov) + +end + +! K1 + +subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV) + double precision, intent(in) :: v_vvov(nV,nV,nO,nV), v_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: K1(nO, nV, nO, nV) + + double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !K1 = 0d0 + + !do beta = 1, nV + ! do i = 1, nO + ! do a = 1, nV + ! do u = 1, nO + ! K1(u,a,i,beta) = cc_space_v_ovov(u,a,i,beta) + + ! do j = 1, nO + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! - cc_space_v_ovoo(u,a,i,j) * t1(j,beta) + ! enddo + + ! do b = 1, nV + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) + ! enddo + + ! do j = 1, nO + ! do b = 1, nV + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! - cc_space_v_vvoo(b,a,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do i = 1, nO + do a = 1, nV + do j = 1, nO + do b = 1, nV + X(b,j,a,i) = - v_vvoo(b,a,i,j) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do beta = 1, nV + do u = 1, nO + Y(u,beta,b,j) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, v_ovoo, size(v_ovoo,1) * size(v_ovoo,2) * size(v_ovoo,3), & + t1 , size(t1,1), & + 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) + + call dgemm('N','N',nO,nV*nO*nV,nV, & + 1d0, t1 , size(t1,1), & + v_vvov, size(v_vvov,1), & + 1d0, K1 , size(K1,1)) + + ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i) + call dgemm('N','N',nV*nO,nO*nV,nV*nO, & + 1d0, Y, size(Y,1) * size(Y,2), & + X, size(X,1) * size(X,2), & + 0d0, Z, size(Z,1) * size(Z,2)) + + !$omp parallel & + !$omp shared(nO,nV,K1,Z) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = K1(u,a,i,beta) + Z(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X,Y,Z) + +end diff --git a/src/ccsd/ccsd_spin_orb.irp.f b/src/ccsd/ccsd_spin_orb.irp.f new file mode 100644 index 00000000..6f2de11c --- /dev/null +++ b/src/ccsd/ccsd_spin_orb.irp.f @@ -0,0 +1,16 @@ +! Prog + +program ccsd + + implicit none + + BEGIN_DOC + ! CCSD in spin orbitals + END_DOC + + read_wf = .True. + touch read_wf + + call run_ccsd_spin_orb + +end diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f new file mode 100644 index 00000000..23e2cef1 --- /dev/null +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -0,0 +1,2301 @@ +! Code + +subroutine run_ccsd_spin_orb + + implicit none + + BEGIN_DOC + ! CCSD in spin orbitals + END_DOC + + double precision, allocatable :: t1(:,:), t2(:,:,:,:), tau(:,:,:,:), tau_t(:,:,:,:) + double precision, allocatable :: r1(:,:), r2(:,:,:,:) + double precision, allocatable :: cF_oo(:,:), cF_ov(:,:), cF_vv(:,:) + double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:), cW_vvvv(:,:,:,:) + + double precision, allocatable :: f_oo(:,:), f_ov(:,:), f_vv(:,:), f_o(:), f_v(:) + double precision, allocatable :: v_oooo(:,:,:,:), v_vooo(:,:,:,:), v_ovoo(:,:,:,:) + double precision, allocatable :: v_oovo(:,:,:,:), v_ooov(:,:,:,:), v_vvoo(:,:,:,:) + double precision, allocatable :: v_vovo(:,:,:,:), v_voov(:,:,:,:), v_ovvo(:,:,:,:) + double precision, allocatable :: v_ovov(:,:,:,:), v_oovv(:,:,:,:), v_vvvo(:,:,:,:) + double precision, allocatable :: v_vvov(:,:,:,:), v_vovv(:,:,:,:), v_ovvv(:,:,:,:) + double precision, allocatable :: v_vvvv(:,:,:,:) + + double precision, allocatable :: all_err(:,:), all_t(:,:) + + logical :: not_converged + integer, allocatable :: list_occ(:,:), list_vir(:,:) + integer :: nO,nV,nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4) + integer :: nb_iter, i,j,a,b + double precision :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi + integer(bit_kind) :: det(N_int,2) + + det = psi_det(:,:,cc_ref) + print*,'Reference determinant:' + call print_det(det,N_int) + + ! Extract number of occ/vir alpha/beta spin orbitals + !call extract_n_spin(det,n_spin) + nOa = cc_nOa !n_spin(1) + nOb = cc_nOb !n_spin(2) + nVa = cc_nVa !n_spin(3) + nVb = cc_nVb !n_spin(4) + + ! Total number of occ/vir spin orb + nO = cc_nOab !nOa + nOb + nV = cc_nVab !nVa + nVb + ! Debug + !print*,nO,nV + + ! Number of occ/vir spin orb per spin + nO_S = cc_nO_S !(/nOa,nOb/) + nV_S = cc_nV_S !(/nVa,nVb/) + ! Debug + !print*,nO_S,nV_S + + ! Maximal number of occ/vir + nO_m = cc_nO_m !max(nOa, nOb) + nV_m = cc_nV_m !max(nVa, nVb) + ! Debug + !print*,nO_m,nV_m + + allocate(list_occ(nO_m,2), list_vir(nV_m,2)) + list_occ = cc_list_occ_spin + list_vir = cc_list_vir_spin + ! Debug + !call extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir) + !print*,list_occ(:,1) + !print*,list_occ(:,2) + !print*,list_vir(:,1) + !print*,list_vir(:,2) + + ! Allocation + allocate(t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV), tau_t(nO,nO,nV,nV)) + allocate(r1(nO,nV), r2(nO,nO,nV,nV)) + allocate(cF_oo(nO,nO), cF_ov(nO,nV), cF_vv(nV,nV)) + allocate(cW_oooo(nO,nO,nO,nO), cW_ovvo(nO,nV,nV,nO))!, cW_vvvv(nV,nV,nV,nV)) + allocate(v_oooo(nO,nO,nO,nO)) + !allocate(v_vooo(nV,nO,nO,nO)) + allocate(v_ovoo(nO,nV,nO,nO)) + allocate(v_oovo(nO,nO,nV,nO)) + allocate(v_ooov(nO,nO,nO,nV)) + allocate(v_vvoo(nV,nV,nO,nO)) + !allocate(v_vovo(nV,nO,nV,nO)) + !allocate(v_voov(nV,nO,nO,nV)) + allocate(v_ovvo(nO,nV,nV,nO)) + allocate(v_ovov(nO,nV,nO,nV)) + allocate(v_oovv(nO,nO,nV,nV)) + !allocate(v_vvvo(nV,nV,nV,nO)) + !allocate(v_vvov(nV,nV,nO,nV)) + !allocate(v_vovv(nV,nO,nV,nV)) + !allocate(v_ovvv(nO,nV,nV,nV)) + !allocate(v_vvvv(nV,nV,nV,nV)) + allocate(f_o(nO), f_v(nV)) + allocate(f_oo(nO, nO)) + allocate(f_ov(nO, nV)) + allocate(f_vv(nV, nV)) + + ! Allocation for the diis + if (cc_update_method == 'diis') then + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + all_err = 0d0 + all_t = 0d0 + endif + + ! Fock elements + call gen_f_spin(det, nO_m,nO_m, nO_S,nO_S, list_occ,list_occ, nO,nO, f_oo) + call gen_f_spin(det, nO_m,nV_m, nO_S,nV_S, list_occ,list_vir, nO,nV, f_ov) + call gen_f_spin(det, nV_m,nV_m, nV_S,nV_S, list_vir,list_vir, nV,nV, f_vv) + + ! Diag elements + do i = 1, nO + f_o(i) = f_oo(i,i) + enddo + do i = 1, nV + f_v(i) = f_vv(i,i) + enddo + + ! Bi electronic integrals from list + call wall_time(ti) + ! OOOO + call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, list_occ,list_occ,list_occ,list_occ, nO,nO,nO,nO, v_oooo) + + ! OOO V + !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, list_vir,list_occ,list_occ,list_occ, nV,nO,nO,nO, v_vooo) + call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, list_occ,list_vir,list_occ,list_occ, nO,nV,nO,nO, v_ovoo) + call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, list_occ,list_occ,list_vir,list_occ, nO,nO,nV,nO, v_oovo) + call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, list_occ,list_occ,list_occ,list_vir, nO,nO,nO,nV, v_ooov) + + ! OO VV + call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, list_vir,list_vir,list_occ,list_occ, nV,nV,nO,nO, v_vvoo) + !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, list_vir,list_occ,list_vir,list_occ, nV,nO,nV,nO, v_vovo) + !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, list_vir,list_occ,list_occ,list_vir, nV,nO,nO,nV, v_voov) + call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, list_occ,list_vir,list_vir,list_occ, nO,nV,nV,nO, v_ovvo) + call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, list_occ,list_vir,list_occ,list_vir, nO,nV,nO,nV, v_ovov) + call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, list_occ,list_occ,list_vir,list_vir, nO,nO,nV,nV, v_oovv) + + ! O VVV + !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, list_vir,list_vir,list_vir,list_occ, nV,nV,nV,nO, v_vvvo) + !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, list_vir,list_vir,list_occ,list_vir, nV,nV,nO,nV, v_vvov) + !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, list_vir,list_occ,list_vir,list_vir, nV,nO,nV,nV, v_vovv) + !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, list_occ,list_vir,list_vir,list_vir, nO,nV,nV,nV, v_ovvv) + + ! VVVV + !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, list_vir,list_vir,list_vir,list_vir, nV,nV,nV,nV, v_vvvv) + call wall_time(tf) + if (cc_dev) then + print*,'Load bi elec int:',tf-ti,'s' + endif + + ! Init of T + t1 = 0d0 + call guess_t1(nO,nV,f_o,f_v,f_ov,t1) + call guess_t2(nO,nV,f_o,f_v,v_oovv,t2) + call compute_tau_spin(nO,nV,t1,t2,tau) + call compute_tau_t_spin(nO,nV,t1,t2,tau_t) + + ! Loop init + nb_iter = 0 + not_converged = .True. + r1 = 0d0 + r2 = 0d0 + max_r1 = 0d0 + max_r2 = 0d0 + + call det_energy(det,uncorr_energy) + print*,'Det energy', uncorr_energy + call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + print*,'guess energy', uncorr_energy+energy, energy + + write(*,'(A77)') ' -----------------------------------------------------------------------------' + write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' + write(*,'(A77)') ' -----------------------------------------------------------------------------' + + call wall_time(ta) + + ! Loop + do while (not_converged) + + ! Intermediates + call wall_time(tbi) + call wall_time(ti) + call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) + call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) + call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,cF_vv) + call wall_time(tf) + if (cc_dev) then + print*,'Compute cFs:',tf-ti,'s' + endif + + call wall_time(ti) + call compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) + call compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) + !call compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) + call wall_time(tf) + if (cc_dev) then + print*,'Compute cFs:',tf-ti,'s' + endif + + ! Residuals + call wall_time(ti) + call compute_r1_spin(nO,nV,t1,t2,f_o,f_v,F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) + call wall_time(tf) + if (cc_dev) then + print*,'Compute r1:',tf-ti,'s' + endif + call wall_time(ti) + call compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) + call wall_time(tf) + if (cc_dev) then + print*,'Compute r2:',tf-ti,'s' + endif + + ! Max elements in the residuals + max_r1 = maxval(abs(r1(:,:))) + max_r2 = maxval(abs(r2(:,:,:,:))) + max_r = max(max_r1,max_r2) + + call wall_time(ti) + ! Update + if (cc_update_method == 'diis') then + !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + call update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + call update_t1(nO,nV,f_o,f_v,r1,t1) + call update_t2(nO,nV,f_o,f_v,r2,t2) + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + + call compute_tau_spin(nO,nV,t1,t2,tau) + call compute_tau_t_spin(nO,nV,t1,t2,tau_t) + call wall_time(tf) + if (cc_dev) then + print*,'Update:',tf-ti,'s' + endif + + ! Print + call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + call wall_time(tfi) + + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', & + uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + if (cc_dev) then + print*,'Total:',tfi-tbi,'s' + endif + + ! Convergence + nb_iter = nb_iter + 1 + if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then + not_converged = .False. + endif + + enddo + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + print*,'' + if (max_r < cc_thresh_conv) then + write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations' + else + write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations' + endif + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' + write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + print*,'' + + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + + ! Deallocate + if (cc_update_method == 'diis') then + deallocate(all_err,all_t) + endif + deallocate(tau,tau_t) + deallocate(r1,r2) + deallocate(cF_oo,cF_ov,cF_vv) + deallocate(cW_oooo,cW_ovvo)!,cW_vvvv) + deallocate(v_oooo) + deallocate(v_ovoo,v_oovo) + deallocate(v_ovvo,v_ovov,v_oovv) + + if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then + double precision :: t_corr + print*,'CCSD(T) calculation...' + call wall_time(ta) + !allocate(v_vvvo(nV,nV,nV,nO)) + !call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & + ! cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + ! cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + ! nV,nV,nV,nO, v_vvvo) + + !call ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,t_corr) + call ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,t_corr) + !print*,'Working on it...' + !call abort + call wall_time(tb) + print*,'Done' + print*,'Time: ',tb-ta, ' s' + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + t_corr, ' Ha' + write(*,'(A15,F18.12,A3)') ' E(T) = ', t_corr, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + t_corr, ' Ha' + print*,'' + endif + print*,'Reference determinant:' + call print_det(det,N_int) + + deallocate(f_oo,f_ov,f_vv,f_o,f_v) + deallocate(v_ooov,v_vvoo,t1,t2) + !deallocate(v_ovvv,v_vvvo,v_vovv) + !deallocate(v_vvvv) + +end + +! Energy + +subroutine ccsd_energy_spin(nO,nV,t1,t2,Fov,v_oovv,energy) + + implicit none + + BEGIN_DOC + ! CCSD energy in spin orbitals + END_DOC + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: energy + + integer :: i,j,a,b + + + energy = 0d0 + + do i=1,nO + do a=1,nV + energy = energy + Fov(i,a) * t1(i,a) + end do + end do + + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + energy = energy & + + 0.5d0 * v_oovv(i,j,a,b) * t1(i,a) * t1(j,b) & + + 0.25d0 * v_oovv(i,j,a,b) * t2(i,j,a,b) + end do + end do + end do + end do + +end + +! Tau + +subroutine compute_tau_spin(nO,nV,t1,t2,tau) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + + double precision,intent(out) :: tau(nO,nO,nV,nV) + + integer :: i,j,k,l + integer :: a,b,c,d + + !$OMP PARALLEL & + !$OMP SHARED(tau,t1,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! Tau_t + +subroutine compute_tau_t_spin(nO,nV,t1,t2,tau_t) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + + double precision,intent(out) :: tau_t(nO,nO,nV,nV) + + integer :: i,j,k,l + integer :: a,b,c,d + + !$OMP PARALLEL & + !$OMP SHARED(tau_t,t1,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + tau_t(i,j,a,b) = t2(i,j,a,b) + 0.5d0*(t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a)) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! R1 + +subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: f_o(nO), f_v(nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: cF_oo(nO,nO) + double precision,intent(in) :: cF_ov(nO,nV) + double precision,intent(in) :: cF_vv(nV,nV) + double precision,intent(in) :: v_oovo(nO,nO,nV,nO) + double precision,intent(in) :: v_ovov(nO,nV,nO,nV) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: r1(nO,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + !double precision, allocatable :: X_vovv(:,:,:,:) + double precision, allocatable :: X_oovv(:,:,:,:) + double precision :: accu + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,Fov,cF_vv,cF_ov, & + !$OMP v_ovov,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(1) + do a=1,nV + do i=1,nO + r1(i,a) = Fov(i,a) + do e=1,nV + do m=1,nO + r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e) + end do + end do + do f=1,nV + do n=1,nO + r1(i,a) = r1(i,a) - t1(n,f)*v_ovov(n,a,i,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !do a=1,nV + ! do i=1,nO + ! do e=1,nV + ! r1(i,a) = r1(i,a) + t1(i,e)*cF_vv(a,e) + ! end do + ! end do + !end do + call dgemm('N','T', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + cF_vv, size(cF_vv,1), & + 1d0, r1 , size(r1,1)) + + !do a=1,nV + ! do i=1,nO + ! do m=1,nO + ! r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i) + ! end do + ! end do + !end do + call dgemm('T','N', nO, nV, nO, & + -1d0, cF_oo, size(cF_oo,1), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + + !do a=1,nV + ! do i=1,nO + ! do f=1,nV + ! do e=1,nV + ! do m=1,nO + ! r1(i,a) = r1(i,a) - 0.5d0*t2(i,m,e,f)*v_ovvv(m,a,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + + !allocate(X_vovv(nV,nO,nV,nV)) + double precision, allocatable :: v_ovvf(:,:,:), X_vovf(:,:,:) + allocate(v_ovvf(nO,nV,nV),X_vovf(nV,nO,nV)) + + do f = 1, nV + call gen_v_spin_3idx(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovvf) + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,X_vovf,v_ovvf,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + + !$OMP DO collapse(3) + !do f = 1, nV + do e = 1, nV + do m = 1, nO + do a = 1, nV + !X_vovv(a,m,e,f) = v_ovvv(m,a,e,f) + X_vovf(a,m,e) = v_ovvf(m,a,e) + enddo + enddo + enddo + !enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','T', nO, nV, nO*nV, & + -0.5d0, t2(1,1,1,f), size(t2,1), & + X_vovf, size(X_vovf,1), & + 1d0 , r1 , size(r1,1)) + enddo + + !call dgemm('N','T', nO, nV, nO*nV*nV, & + ! -0.5d0, t2 , size(t2,1), & + ! X_vovv, size(X_vovv,1), & + ! 1d0 , r1 , size(r1,1)) + + deallocate(X_vovf) + !deallocate(X_vovv) + allocate(X_oovv(nO,nO,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,X_oovv, & + !$OMP f_o,f_v,v_oovo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + !do a=1,nV + ! do i=1,nO + ! do e=1,nV + ! do m=1,nO + ! do n=1,nO + ! r1(i,a) = r1(i,a) - 0.5d0*t2(m,n,a,e)*v_oovo(n,m,e,i) + ! end do + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(3) + do a = 1, nV + do e = 1, nV + do m = 1, nO + do n = 1, nO + X_oovv(n,m,e,a) = t2(m,n,a,e) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -0.5d0, v_oovo, size(v_oovo,1) * size(v_oovo,2) * size(v_oovo,3), & + X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + 1d0 , r1 , size(r1,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,X_oovv,f_o,f_v,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do a = 1, nV + do i = 1, nO + r1(i,a) = (f_o(i)-f_v(a)) * t1(i,a) - r1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_oovv) + +end + +! R2 + +subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: cF_oo(nO,nO) + double precision,intent(in) :: cF_ov(nO,nV) + double precision,intent(in) :: cF_vv(nV,nV) + double precision,intent(in) :: f_o(nO), f_v(nV) + double precision,intent(in) :: cW_oooo(nO,nO,nO,nO) + !double precision,intent(in) :: cW_vvvv(nV,nV,nV,nV) + double precision,intent(in) :: cW_ovvo(nO,nV,nV,nO) + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_ovoo(nO,nV,nO,nO) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_ovvo(nO,nV,nV,nO) + !double precision,intent(in) :: v_vvvo(nV,nV,nV,nO)!, v_vovv(nV,nO,nV,nV) + + double precision,intent(out) :: r2(nO,nO,nV,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: X_vvoo(:,:,:,:) + !double precision, allocatable :: A_vvov(:,:,:,:) + double precision, allocatable :: X_oovv(:,:,:,:), Y_oovv(:,:,:,:) + double precision, allocatable :: A_vvoo(:,:,:,:), B_ovoo(:,:,:,:), C_ovov(:,:,:,:) + double precision, allocatable :: A_ovov(:,:,:,:), B_ovvo(:,:,:,:), X_ovvo(:,:,:,:) + double precision, allocatable :: A_vv(:,:) + double precision, allocatable :: A_oo(:,:), B_oovv(:,:,:,:) + double precision, allocatable :: A_vbov(:,:,:), X_vboo(:,:,:), v_vbvo(:,:,:) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + ! r2(i,j,a,b) = v_oovv(i,j,a,b) + ! end do + ! end do + ! end do + !end do + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(i,j,a,e)*cF_vv(b,e) + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,j,b,e)*cF_vv(a,e) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(X_oovv(nO,nO,nV,nV)) + call dgemm('N','T',nO*nO*nV, nV, nV, & + 1d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + cF_VV , size(cF_vv,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = v_oovv(i,j,a,b) + X_oovv(i,j,a,b) - X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !deallocate(X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_vv(nV,nV))!, X_oovv(nO,nO,nV,nV)) + call dgemm('T','N', nV, nV, nO, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_vv , size(A_vv,1)) + + call dgemm('N','T', nO*nO*nV, nV, nV, & + 0.5d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + A_vv , size(A_vv,1), & + 0d0 , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vv)!,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j) + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(Y_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + !$OMP PARALLEL & + !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do i=1,nO + do m=1,nO + X_oovv(m,i,a,b) = t2(i,m,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nO*nV*nV, nO, & + 1d0, cF_oo , size(cF_oo,1), & + X_oovv, size(X_oovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(Y_oovv)!,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + + call dgemm('N','T', nO, nO, nV, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_oo , size(A_oo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(t2,B_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do i = 1, nO + do m = 1, nO + B_oovv(m,i,a,b) = t2(i,m,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nO*nV*nV, nO, & + 0.5d0, A_oo, size(A_oo,1), & + B_oovv, size(B_oovv,1), & + 0d0 , X_oovv, size(X_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(A_oo,B_oovv,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do n=1,nO + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(m,n,a,b)*cW_oooo(m,n,i,j) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + call dgemm('T','N', nO*nO, nV*nV, nO*nO, & + 0.5d0, cW_oooo, size(cW_oooo,1) * size(cW_oooo,2), & + tau , size(tau,1) * size(tau,2), & + 1d0 , r2 , size(r2,1) * size(r2,2)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do f=1,nV + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + !call dgemm('N','T', nO*nO, nV*nV, nV*nV, & + ! 0.5d0, tau , size(tau,1) * size(tau,2), & + ! cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2), & + ! 1d0 , r2 , size(r2,1) * size(r2,2)) + double precision :: ti,tf + call wall_time(ti) + call use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) + call wall_time(tf) + if (cc_dev) then + print*,'cW_vvvv:',tf-ti,'s' + endif + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) & + ! + t2(i,m,a,e)*cW_ovvo(m,b,e,j) & + ! - t2(j,m,a,e)*cW_ovvo(m,b,e,i) & + ! - t2(i,m,b,e)*cW_ovvo(m,a,e,j) & + ! + t2(j,m,b,e)*cW_ovvo(m,a,e,i) & + ! - t1(i,e)*t1(m,a)*v_ovvo(m,b,e,j) & + ! + t1(j,e)*t1(m,a)*v_ovvo(m,b,e,i) & + ! + t1(i,e)*t1(m,b)*v_ovvo(m,a,e,j) & + ! - t1(j,e)*t1(m,b)*v_ovvo(m,a,e,i) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_ovov(nO,nV,nO,nV), B_ovvo(nO,nV,nV,nO), X_ovvo(nO,nV,nV,nO)) + !$OMP PARALLEL & + !$OMP SHARED(t2,A_ovov,B_ovvo,cW_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do a = 1, nV + do i = 1, nO + do e = 1, nV + do m = 1, nO + A_ovov(m,e,i,a) = t2(i,m,a,e) + end do + end do + end do + end do + !$OMP END DO NOWAIT + !$OMP DO collapse(3) + do j = 1, nO + do b = 1, nV + do e = 1, nV + do m = 1, nO + B_ovvo(m,e,b,j) = cW_ovvo(m,b,e,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO*nV, nV*nO, nO*nV, & + 1d0, A_ovov, size(A_ovov,1) * size(A_ovov,2), & + B_ovvo, size(B_ovvo,1) * size(B_ovvo,2), & + 0d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = r2(i,j,a,b) + X_ovvo(i,a,b,j) - X_ovvo(j,a,b,i) & + - X_ovvo(i,b,a,j) + X_ovvo(j,b,a,i) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_ovov,B_ovvo,X_ovvo) + allocate(A_vvoo(nV,nV,nO,nO), B_ovoo(nO,nV,nO,nO), C_ovov(nO,nV,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(A_vvoo,v_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do m = 1, nO + do j = 1, nO + do b = 1, nV + do e = 1, nV + A_vvoo(e,b,j,m) = v_ovvo(m,b,e,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nV*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + A_vvoo, size(A_vvoo,1), & + 0d0, B_ovoo, size(B_ovoo,1)) + + call dgemm('N','N', nO*nV*nO, nV, nO, & + 1d0, B_ovoo, size(B_ovoo,1) * size(B_ovoo,2) * size(B_ovoo,3), & + t1 , size(t1,1), & + 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2) * size(C_ovov,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,C_ovov,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - C_ovov(i,b,j,a) + C_ovov(j,b,i,a) & + + C_ovov(i,a,j,b) - C_ovov(j,a,i,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vvoo, B_ovoo, C_ovov) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + t1(i,e)*v_vvvo(a,b,e,j) - t1(j,e)*v_vvvo(a,b,e,i) + ! end do + + ! end do + ! end do + ! end do + !end do + !allocate(A_vvov(nV,nV,nO,nV), X_vvoo(nV,nV,nO,nO)) + allocate(A_vbov(nV,nO,nV), X_vboo(nV,nO,nO), v_vbvo(nV,nV,nO)) + do b = 1, nV + + call gen_v_spin_3idx_i_kl(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, b, cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + nV,nV,nO, v_vbvo) + + !$OMP PARALLEL & + !$OMP SHARED(b,A_vbov,v_vbvo,nO,nV) & + !$OMP PRIVATE(i,j,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do e = 1, nV + do j = 1, nO + !do b = 1, nV + do a = 1, nV + !A_vvov(a,b,j,e) = v_vvvo(a,b,e,j) + A_vbov(a,j,e) = v_vbvo(a,e,j) + enddo + !enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','T', nV*nO, nO, nV, & + 1d0, A_vbov, size(A_vbov,1) * size(A_vbov,2), & + t1 , size(t1,1), & + 0d0, X_vboo, size(X_vboo,1) * size(X_vboo,2)) + !call dgemm('N','T', nV*nV*nO, nO, nV, & + ! 1d0, A_vvov, size(A_vvov,1) * size(A_vvov,2) * size(A_vvov,3), & + ! t1 , size(t1,1), & + ! 0d0, X_vvoo, size(X_vvoo,1) * size(X_vvoo,2) * size(X_vvoo,3)) + + !$OMP PARALLEL & + !$OMP SHARED(b,r2,X_vboo,nO,nV) & + !$OMP PRIVATE(i,j,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + !do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, NO + !r2(i,j,a,b ) = r2(i,j,a,b) + X_vvoo(a,b,j,i) - X_vvoo(a,b,i,j) + r2(i,j,a,b) = r2(i,j,a,b) + X_vboo(a,j,i) - X_vboo(a,i,j) + enddo + enddo + enddo + !enddo + !$OMP END DO + !$OMP END PARALLEL + enddo + + !deallocate(A_vvov)!,X_vvoo) + deallocate(A_vbov, X_vboo, v_vbvo) + allocate(X_vvoo(nV,nV,nO,nO)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t1(m,a)*v_ovoo(m,b,i,j) + t1(m,b)*v_ovoo(m,a,i,j) + ! end do + + ! end do + ! end do + ! end do + !end do + !allocate(X_vvoo(nV,nV,nO,nO)) + + call dgemm('T','N', nV, nV*nO*nO, nO, & + 1d0, t1 , size(t1,1), & + v_ovoo, size(v_ovoo,1), & + 0d0, X_vvoo, size(X_vvoo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_vvoo,f_o,f_v,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_vvoo(a,b,i,j) + X_vvoo(b,a,i,j) + end do + end do + end do + end do + !$OMP END DO + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = (f_o(i)+f_o(j)-f_v(a)-f_v(b)) * t2(i,j,a,b) - r2(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_vvoo) + +end + +! Use cF_oo + +subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2) + + implicit none + + integer,intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau_t(nO,nO,nV,nV) + double precision, intent(in) :: F_oo(nO,nV), F_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV), v_oovv(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_oo(:,:), X_oovv(:,:,:,:),Y_oovv(:,:,:,:) + integer :: i,j,m,a,b + + allocate(cF_oo(nO,nO)) + + call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) + + !do a=1,nV + ! do i=1,nO + ! do m=1,nO + ! r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i) + ! end do + ! end do + !end do + call dgemm('T','N', nO, nV, nO, & + -1d0, cF_oo, size(cF_oo,1), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j) + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i) + ! end do + + ! end do + ! end do + ! end do + !end do + + allocate(Y_oovv(nO,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + !$OMP PARALLEL & + !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do i=1,nO + do m=1,nO + X_oovv(m,i,a,b) = t2(i,m,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nO*nV*nV, nO, & + 1d0, cF_oo , size(cF_oo,1), & + X_oovv, size(X_oovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(cF_oo,X_oovv,Y_oovv) + +end + +! Use cF_ov + +subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: F_ov(nO,nV), v_oovv(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_ov(:,:), A_oo(:,:), A_vv(:,:) + double precision, allocatable :: X_oovv(:,:,:,:), B_oovv(:,:,:,:) + integer :: i,j,a,b,e,m + + allocate(cF_ov(nO,nV)) + + call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t2,cF_ov,nO,nV) & + !$OMP PRIVATE(i,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(1) + do a=1,nV + do i=1,nO + do e=1,nV + do m=1,nO + r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_vv(nV,nV), X_oovv(nO,nO,nV,nV)) + call dgemm('T','N', nV, nV, nO, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_vv , size(A_vv,1)) + + call dgemm('N','T', nO*nO*nV, nV, nV, & + 0.5d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + A_vv , size(A_vv,1), & + 0d0 , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,r2,X_oovv) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + + call dgemm('N','T', nO, nO, nV, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_oo , size(A_oo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(t2,B_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do i = 1, nO + do m = 1, nO + B_oovv(m,i,a,b) = t2(i,m,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nO*nV*nV, nO, & + 0.5d0, A_oo, size(A_oo,1), & + B_oovv, size(B_oovv,1), & + 0d0 , X_oovv, size(X_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(cF_ov,A_oo,B_oovv,X_oovv) + +end + +! Use cF_vv + +subroutine use_cF_vv(nO,nV,t1,t2,r1,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_vv(:,:) + integer :: i,j,a,b,e,m + + allocate(cF_vv(nV,nV)) + + !call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,v_ovvv,cF_vv) + + deallocate(cF_vv) + +end + +! Use cW_vvvd + +subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: v_oovv(nO,nO,nV,nV) + !double precision, intent(in) :: v_vovv(nV,nO,nV,nV) + + double precision, intent(inout) :: r2(nO,nO,nV,nV) + + double precision, allocatable :: cW_vvvf(:,:,:), v_vvvf(:,:,:), tau_f(:,:,:), v_vovf(:,:,:) + integer :: i,j,e,f + double precision :: ti,tf + + allocate(cW_vvvf(nV,nV,nV),v_vvvf(nV,nV,nV),tau_f(nO,nO,nV),v_vovf(nV,nO,nV)) + + !PROVIDE cc_nVab + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do f=1,nV + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + do f = 1, nV + call wall_time(ti) + !$OMP PARALLEL & + !$OMP SHARED(tau,tau_f,f,nO,nV) & + !$OMP PRIVATE(i,j,e) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do e = 1, nV + do j = 1, nO + do i = 1, nO + tau_f(i,j,e) = tau(i,j,e,f) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'1st transpo', tf-ti + endif + + call wall_time(ti) + call gen_v_spin_3idx(cc_nV_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nV,nV,nV, v_vvvf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'vvvf', tf-ti + endif + call wall_time(ti) + call gen_v_spin_3idx(cc_nV_m,cc_nO_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nO_S,cc_nV_S,cc_nV_S, & + cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, & + nV,nO,nV, v_vovf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'vovf', tf-ti + endif + + call wall_time(ti) + call compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'cW_vvvf', tf-ti + endif + + call wall_time(ti) + call dgemm('N','T', nO*nO, nV*nV, nV, & + 0.5d0, tau_f , size(tau_f,1) * size(tau_f,2), & + cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2), & + 1d0 , r2 , size(r2,1) * size(r2,2)) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'last dgemm', tf-ti + endif + enddo + + deallocate(cW_vvvf,v_vvvf,v_vovf) + +end + +! cF_oo + +subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: tau_t(nO,nO,nV,nV) + double precision,intent(in) :: Foo(nO,nO) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: v_ooov(nO,nO,nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cF_oo(nO,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision,external :: Kronecker_Delta + + !$OMP PARALLEL & + !$OMP SHARED(cF_oo,Foo,t1,v_ooov,nO,nV) & + !$OMP PRIVATE(i,m,n,e) & + !$OMP DEFAULT(NONE) + + !do i=1,nO + ! do m=1,nO + ! cF_oo(m,i) = (1d0 - Kronecker_delta(m,i))*Foo(m,i) + ! end do + !end do + !$OMP DO collapse(1) + do i=1,nO + do m=1,nO + cF_oo(m,i) = Foo(m,i) + end do + end do + !$OMP END DO + !$OMP DO + do i = 1, nO + cF_oo(i,i) = 0d0 + end do + !$OMP END DO + + do e=1,nV + do n=1,nO + !$OMP DO collapse(1) + do i=1,nO + do m=1,nO + cF_oo(m,i) = cF_oo(m,i) + t1(n,e)*v_ooov(m,n,i,e) + end do + end do + !$OMP END DO + end do + end do + !$OMP END PARALLEL + + !do i=1,nO + ! do m=1,nO + ! do e=1,nV + ! cF_oo(m,i) = cF_oo(m,i) + 0.5d0*t1(i,e)*Fov(m,e) + ! end do + ! end do + !end do + call dgemm('N','T', nO, nO, nV,& + 0.5d0, Fov , size(Fov,1), & + t1 , size(t1,1), & + 1d0 , cF_oo, size(cF_oo,1)) + + !do i=1,nO + ! do m=1,nO + ! do f=1,nV + ! do e=1,nV + ! do n=1,nO + ! cF_oo(m,i) = cF_oo(m,i) + 0.5d0*tau_t(i,n,e,f)*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + call dgemm('N','T', nO, nO, nO*nV*nV, & + 0.5d0, v_oovv, size(v_oovv,1), & + tau_t , size(tau_t,1), & + 1d0 , cF_oo , size(cF_oo,1)) + +end + +! cF_ov + +subroutine compute_cF_ov(nO,nV,t1,Fov,v_oovv,cF_ov) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: Fov(nO,nV),v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cF_ov(nO,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + + !$OMP PARALLEL & + !$OMP SHARED(cF_ov,Fov,t1,v_oovv,nO,nV) & + !$OMP PRIVATE(i,a,m,n,e,f) & + !$OMP DEFAULT(NONE) + + !cF_ov = Fov + + !$OMP DO collapse(1) + do e=1,nV + do m=1,nO + cF_ov(m,e) = Fov(m,e) + do f=1,nV + do n=1,nO + cF_ov(m,e) = cF_ov(m,e) + t1(n,f)*v_oovv(m,n,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + +end + +! cF_vv + +subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: tau_t(nO,nO,nV,nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: Fvv(nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: cF_vv(nV,nV) + + double precision, allocatable :: v_ovfv(:,:,:),X_ovfv(:,:,:) + integer :: i,j,m,n + integer :: a,b,e,f + + !$OMP PARALLEL & + !$OMP SHARED(cF_vv,Fvv,nO,nV) & + !$OMP PRIVATE(e,a) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do e=1,nV + do a=1,nV + cF_vv(a,e) = Fvv(a,e) + end do + end do + !$OMP END DO + !$OMP DO + do e = 1, nV + cF_vv(e,e) = 0d0 + enddo + !$OMP END DO + !$OMP END PARALLEL + + !do e=1,nV + ! do a=1,nV + ! do m=1,nO + ! cF_vv(a,e) = cF_vv(a,e) - 0.5d0*t1(m,a)*Fov(m,e) + ! end do + ! end do + !end do + call dgemm('T','N', nV, nV, nO, & + -0.5d0, t1 , size(t1,1), & + Fov , size(Fov,1), & + 1d0 , cF_vv, size(cF_vv,1)) + + !do e=1,nV + ! do a=1,nV + ! do m=1,nO + ! do f=1,nV + ! cF_vv(a,e) = cF_vv(a,e) + t1(m,f)*v_ovvv(m,a,f,e) + ! end do + ! end do + ! end do + !end do + allocate(v_ovfv(nO,nV,nV),X_ovfv(nO,nV,nV)) + do f = 1, nV + + call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovfv) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,v_ovfv,X_ovfv,f) & + !$OMP PRIVATE(m,a,e) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do e = 1, nV + do a = 1, nV + do m = 1, nO + !X_ovfv(m,a,e) = v_ovvv(m,a,f,e) + X_ovfv(m,a,e) = v_ovfv(m,a,e) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemv('T', nO, nV*nV, & + !1d0, v_ovvv(:,:,f,:), size(v_ovvv,1), & + 1d0, X_ovfv, size(X_ovfv,1), & + t1(1,f), 1, & + 1d0, cF_vv, 1) + enddo + deallocate(v_ovfv,X_ovfv) + + !do e=1,nV + ! do a=1,nV + ! do f=1,nV + ! do n=1,nO + ! do m=1,nO + ! cF_vv(a,e) = cF_vv(a,e) - 0.5d0*tau_t(m,n,a,f)*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + do f = 1, nV + call dgemm('T','N', nV, nV, nO*nO,& + -0.5d0, tau_t(1,1,1,f) , size(tau_t,1) * size(tau_t,2), & + v_oovv(1,1,1,f), size(v_oovv,1) * size(v_oovv,2), & + 1d0 , cF_vv, size(cF_vv,1)) + enddo + +end + +! cW_oooo + +subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oooo(nO,nO,nO,nO) + double precision,intent(in) :: v_ooov(nO,nO,nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cW_oooo(nO,nO,nO,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: X_oooo(:,:,:,:) + + ! oooo block + + !cW_oooo = v_oooo + + !do j=1,nO + ! do i=1,nO + ! do n=1,nO + ! do m=1,nO + + ! do e=1,nV + ! cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + t1(j,e)*v_ooov(m,n,i,e) - t1(i,e)*v_ooov(m,n,j,e) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(X_oooo(nO,nO,nO,nO)) + + call dgemm('N','T', nO*nO*nO, nO, nV, & + 1d0, v_ooov, size(v_ooov,1) * size(v_ooov,2) * size(v_ooov,3), & + t1 , size(t1,1), & + 0d0, X_oooo, size(X_oooo,1) * size(X_oooo,1) * size(X_oooo,3)) + !$OMP PARALLEL & + !$OMP SHARED(cW_oooo,v_oooo,X_oooo,nO,nV) & + !$OMP PRIVATE(i,j,m,n) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do j=1,nO + do i=1,nO + do n=1,nO + do m=1,nO + cW_oooo(m,n,i,j) = v_oooo(m,n,i,j) + X_oooo(m,n,i,j) - X_oooo(m,n,j,i) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_oooo) + + !do m=1,nO + ! do n=1,nO + ! do i=1,nO + ! do j=1,nO + ! + ! do e=1,nV + ! do f=1,nV + ! cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + 0.25d0*tau(i,j,e,f)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + call dgemm('N','T', nO*nO, nO*nO, nV*nV, & + 0.25d0, v_oovv , size(v_oovv,1) * size(v_oovv,2), & + tau , size(tau,1) * size(tau,2), & + 1.d0 , cW_oooo, size(cW_oooo,1) * size(cW_oooo,2)) + +end + +! cW_ovvo + +subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovo(nO,nO,nV,nO) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_ovvo(nO,nV,nV,nO) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: cW_ovvo(nO,nV,nV,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: A_oovo(:,:,:,:), B_vovo(:,:,:,:) + double precision, allocatable :: A_voov(:,:,:,:), B_voov(:,:,:,:), C_ovov(:,:,:,:) + double precision, allocatable :: v_ovev(:,:,:), cW_oveo(:,:,:) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,v_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do j = 1, nO + do b = 1, nV + do a = 1, nV + do i = 1, nO + cW_ovvo(i,a,b,j) = v_ovvo(i,a,b,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !do m=1,nO + ! do b=1,nV + ! do e=1,nV + ! do j=1,nO + ! do f=1,nV + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + t1(j,f)*v_ovvv(m,b,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + allocate(v_ovev(nO,nV,nV),cW_oveo(nO,nV,nO)) + do e = 1, nV + + call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, e, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovev) + + call dgemm('N','T', nO*nV, nO, nV, & + 1.d0, v_ovev , size(v_ovev,1) * size(v_ovev,2), & + t1 , size(t1,1), & + 0.d0, cW_oveo, size(cW_oveo,1) * size(cW_oveo,2)) + !$OMP PARALLEL & + !$OMP SHARED(e,cW_ovvo,cW_oveo,nO,nV) & + !$OMP PRIVATE(m,b,j) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do j = 1, nO + do b = 1, nV + do m = 1, nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + cW_oveo(m,b,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + enddo + deallocate(v_ovev,cW_oveo) + !call dgemm('N','T', nO*nV*nV, nO, nV, & + ! 1.d0, v_ovvv , size(v_ovvv,1) * size(v_ovvv,2) * size(v_ovvv,3), & + ! t1 , size(t1,1), & + ! 1.d0, cW_ovvo, size(cW_ovvo,1) * size(cW_ovvo,2) * size(cW_ovvo,3)) + + !do j=1,nO + ! do e=1,nV + ! do b=1,nV + ! do m=1,nO + ! do n=1,nO + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - t1(n,b)*v_oovo(m,n,e,j) + ! end do + ! end do + ! end do + ! end do + !end do + + allocate(A_oovo(nO,nO,nV,nO), B_vovo(nV,nO,nV,nO)) + + !$OMP PARALLEL & + !$OMP SHARED(A_oovo,v_oovo,nO,nV) & + !$OMP PRIVATE(j,e,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j=1,nO + do e=1,nV + do m=1,nO + do n=1,nO + A_oovo(n,m,e,j) = v_oovo(m,n,e,j) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nO*nV*nO, nO, & + 1d0, t1 , size(t1,1), & + A_oovo, size(A_oovo,1), & + 0d0, B_vovo, size(B_vovo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,B_vovo,nO,nV) & + !$OMP PRIVATE(j,e,m,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j=1,nO + do e=1,nV + do b=1,nV + do m=1,nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - B_vovo(b,m,e,j) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(A_oovo,B_vovo) + + !do j=1,nO + ! do e=1,nV + ! do b=1,nV + ! do m=1,nO + ! do f=1,nV + ! do n=1,nO + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) & + ! - ( 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b) )*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + ! end do + !end do + allocate(A_voov(nV,nO,nO,nV), B_voov(nV,nO,nO,nV), C_ovov(nO,nV,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,A_voov,B_voov,v_oovv,t2,t1) & + !$OMP PRIVATE(f,n,m,e,j,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do j = 1, nO + do n = 1, nO + do f = 1, nV + A_voov(f,n,j,b) = 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP DO collapse(3) + do e = 1, nV + do m = 1, nO + do n = 1, nO + do f = 1, nV + B_voov(f,n,m,e) = v_oovv(m,n,e,f) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO*nV, nV*nO, nV*nO, & + 1d0, A_voov, size(A_voov,1) * size(A_voov,2), & + B_voov, size(B_voov,1) * size(B_voov,2), & + 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2)) + + deallocate(A_voov,B_voov) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,C_ovov,nO,nV) & + !$OMP PRIVATE(j,e,m,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j = 1, nO + do e = 1, nV + do b = 1, nV + do m = 1, nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - C_ovov(j,b,m,e) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(C_ovov) + +end + +! cW_vvvv + +subroutine compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_vovv(nV,nO,nV,nV) + double precision,intent(in) :: v_vvvv(nV,nV,nV,nV) + + double precision,intent(out) :: cW_vvvv(nV,nV,nV,nV) + + integer :: i,j,m,n + integer :: a,b,c,d,e,f + double precision, allocatable :: A_ovvv(:,:,:,:), B_vvvv(:,:,:,:) + + allocate(A_ovvv(nO,nV,nV,nV), B_vvvv(nV,nV,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvv,A_ovvv,v_vovv,v_vvvv) & + !$OMP PRIVATE(a,b,c,d,e,f,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do d = 1, nV + do c = 1, nV + do b = 1, nV + do a = 1, nV + cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !do f=1,nV + ! do e=1,nV + ! do b=1,nV + ! do a=1,nV + ! do m=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + !$OMP DO collapse(3) + do f=1,nV + do e=1,nV + do a=1,nV + do m=1,nO + A_ovvv(m,a,e,f) = v_vovv(a,m,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nV*nV*nV, nO, & + 1d0, t1 , size(t1,1), & + A_ovvv, size(A_ovvv,1), & + 0d0, B_vvvv, size(B_vvvv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvv,B_vvvv) & + !$OMP PRIVATE(a,b,c,d,e,f,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do f=1,nV + do e=1,nV + do b=1,nV + do a=1,nV + cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_ovvv,B_vvvv) + + !do a=1,nV + ! do b=1,nV + ! do e=1,nV + ! do f=1,nV + ! + ! do m=1,nO + ! do n=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + call dgemm('T','N', nV*nV, nV*nV, nO*nO, & + 0.25d0, tau , size(tau,1) * size(tau,2), & + v_oovv , size(v_oovv,1) * size(v_oovv,2), & + 1.d0 , cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2)) + +end + +! cW_vvvf + +subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) + + implicit none + + integer,intent(in) :: nO,nV,f + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_vovf(nV,nO,nV) + double precision,intent(in) :: v_vvvf(nV,nV,nV) + + double precision,intent(out) :: cW_vvvf(nV,nV,nV) + + integer :: i,j,m,n + integer :: a,b,c,d,e + double precision, allocatable :: A_ovvf(:,:,:), B_vvvf(:,:,:), v_oovf(:,:,:) + double precision :: ti,tf + + allocate(A_ovvf(nO,nV,nV), B_vvvf(nV,nV,nV)) + allocate(v_oovf(nO,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvf,A_ovvf,v_vovf,v_vvvf,f) & + !$OMP PRIVATE(a,b,c,d,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + !cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d) + cW_vvvf(a,b,c) = v_vvvf(a,b,c) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !do f=1,nV + ! do e=1,nV + ! do b=1,nV + ! do a=1,nV + ! do m=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(2) + do e=1,nV + do a=1,nV + do m=1,nO + !A_ovvv(m,a,e,f) = v_vovv(a,m,e,f) + !A_ovvf(m,a,e) = v_vovv(a,m,e,f) + A_ovvf(m,a,e) = v_vovf(a,m,e) + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nV*nV, nO, & + 1d0, t1 , size(t1,1), & + A_ovvf, size(A_ovvf,1), & + 0d0, B_vvvf, size(B_vvvf,1)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvf,B_vvvf,v_oovf,v_oovv,f) & + !$OMP PRIVATE(a,b,c,d,e,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do e=1,nV + do b=1,nV + do a=1,nV + !cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f) + cW_vvvf(a,b,e) = cW_vvvf(a,b,e) - B_vvvf(b,a,e) + B_vvvf(a,b,e) + end do + end do + end do + !$OMP END DO NOWAIT + + !deallocate(A_ovvf,B_vvvf) + + !do a=1,nV + ! do b=1,nV + ! do e=1,nV + ! do f=1,nV + ! + ! do m=1,nO + ! do n=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(2) + do e = 1, nV + do n = 1, nO + do m = 1, nO + v_oovf(m,n,e) = v_oovv(m,n,e,f) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV*nV, nV, nO*nO, & + 0.25d0, tau , size(tau,1) * size(tau,2), & + v_oovf , size(v_oovf,1) * size(v_oovf,2), & + 1.d0 , cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2)) + + deallocate(v_oovf) + deallocate(A_ovvf,B_vvvf) + +end diff --git a/src/ccsd/ccsd_t_space_orb.irp.f b/src/ccsd/ccsd_t_space_orb.irp.f new file mode 100644 index 00000000..1f1db87e --- /dev/null +++ b/src/ccsd/ccsd_t_space_orb.irp.f @@ -0,0 +1,412 @@ +! Dumb way + +subroutine ccsd_par_t_space(nO,nV,t1,t2,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + integer :: i,j,k,a,b,c + + allocate(W(nO,nO,nO,nV,nV,nV)) + allocate(V(nO,nO,nO,nV,nV,nV)) + + call form_w(nO,nV,t2,W) + call form_v(nO,nV,t1,W,V) + + energy = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + + energy = energy / 3d0 + + deallocate(V,W) +end + +subroutine form_w(nO,nV,t2,W) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: W(nO, nO, nO, nV, nV, nV) + + integer :: i,j,k,l,a,b,c,d + + W = 0d0 + do c = 1, nV + print*,'W:',c,'/',nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + + do d = 1, nV + W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + ! chem (bd|ai) + ! phys + + cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & + + cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj + + cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik + + cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij + + cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj + + cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik + enddo + + do l = 1, nO + W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + ! chem (ck|jl) + ! phys + - cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & + - cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj + - cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik + - cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij + - cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj + - cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik + enddo + + enddo + enddo + enddo + enddo + enddo + enddo + +end + +subroutine form_v(nO,nV,t1,w,v) + +implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: W(nO, nO, nO, nV, nV, nV) + double precision, intent(out) :: V(nO, nO, nO, nV, nV, nV) + + integer :: i,j,k,a,b,c + + V = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + + cc_space_v_vvoo(b,c,j,k) * t1(i,a) & + + cc_space_v_vvoo(a,c,i,k) * t1(j,b) & + + cc_space_v_vvoo(a,b,i,j) * t1(k,c) + enddo + enddo + enddo + enddo + enddo + enddo + +end + +! Main + +subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + double precision, allocatable :: W_ijk(:,:,:), V_ijk(:,:,:) + double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:) + double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) + integer :: i,j,k,l,a,b,c,d + double precision :: e,ta,tb, delta, delta_ijk + + !allocate(W(nV,nV,nV,nO,nO,nO)) + !allocate(V(nV,nV,nV,nO,nO,nO)) + allocate(W_ijk(nV,nV,nV), V_ijk(nV,nV,nV)) + allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO)) + allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO)) + + ! Temporary arrays + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, & + !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & + !$OMP PRIVATE(a,b,c,d,i,j,k,l) & + !$OMP DEFAULT(NONE) + + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) + + !$OMP DO collapse(3) + do i = 1, nO + do a = 1, nV + do b = 1, nV + do d = 1, nV + X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do d = 1, nV + T_vvoo(d,c,k,j) = t2(k,j,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vooo(c,j,k,l) * t2(i,l,a,b) & + !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + + !$OMP DO collapse(3) + do k = 1, nO + do j = 1, nO + do c = 1, nV + do l = 1, nO + X_ovoo(l,c,j,k) = v_vooo(c,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do l = 1, nO + T_ovvo(l,a,b,i) = t2(i,l,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vvoo(b,c,j,k) * t1(i,a) & + !X_vvoo(b,c,k,j) * T1_vo(a,i) & + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do b = 1, nV + X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(1) + do i = 1, nO + do a = 1, nV + T_vo(a,i) = t1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(ta) + energy = 0d0 + do i = 1, nO + do j = 1, nO + do k = 1, nO + delta_ijk = f_o(i) + f_o(j) + f_o(k) + call form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_ijk) + call form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,W_ijk,V_ijk) + !$OMP PARALLEL & + !$OMP SHARED(energy,nV,i,j,k,W_ijk,V_ijk,f_o,f_v,delta_ijk) & + !$OMP PRIVATE(a,b,c,e,delta) & + !$OMP DEFAULT(NONE) + e = 0d0 + !$OMP DO + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta = 1d0 / (delta_ijk - f_v(a) - f_v(b) - f_v(c)) + !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + e = e + (4d0 * W_ijk(a,b,c) + W_ijk(b,c,a) + W_ijk(c,a,b)) & + * (V_ijk(a,b,c) - V_ijk(c,b,a)) * delta + enddo + enddo + enddo + !$OMP END DO + !$OMP CRITICAL + energy = energy + e + !$OMP END CRITICAL + !$OMP END PARALLEL + enddo + enddo + call wall_time(tb) + write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' + enddo + + energy = energy / 3d0 + + deallocate(W_ijk,V_ijk,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) + !deallocate(V,W) +end + +! W_ijk + +subroutine form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) + + implicit none + + integer, intent(in) :: nO,nV,i,j,k + !double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) + double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: W(nV,nV,nV)!,nO,nO,nO) + + integer :: l,a,b,c,d + + !W = 0d0 + !do i = 1, nO + ! do j = 1, nO + ! do k = 1, nO + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) & + !$OMP PRIVATE(a,b,c,d,l) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + W(a,b,c) = 0d0 + + do d = 1, nV + !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + W(a,b,c) = W(a,b,c) & + ! chem (bd|ai) + ! phys + !+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj + !+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik + !+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij + !+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj + !+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik + + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & + + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & ! bc kj + + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & ! prev ac ik + + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & ! prev ab ij + + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & ! prev bc kj + + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) ! prev ac ik + enddo + + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + + do l = 1, nO + !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + W(a,b,c) = W(a,b,c) & + ! chem (ck|jl) + ! phys + !- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & + !- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj + !- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik + !- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij + !- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj + !- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik + - X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + - X_ovoo(l,b,k,j) * T_ovvo(l,a,c,i) & ! bc kj + - X_ovoo(l,b,i,j) * T_ovvo(l,c,a,k) & ! prev ac ik + - X_ovoo(l,a,j,i) * T_ovvo(l,c,b,k) & ! prev ab ij + - X_ovoo(l,a,k,i) * T_ovvo(l,b,c,j) & ! prev bc kj + - X_ovoo(l,c,i,k) * T_ovvo(l,b,a,j) ! prev ac ik + enddo + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! enddo + ! enddo + !enddo + +end + +! V_ijk + +subroutine form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,w,v) + +implicit none + + integer, intent(in) :: nO,nV,i,j,k + !double precision, intent(in) :: t1(nO,nV) + double precision, intent(in) :: T_vo(nV,nO) + double precision, intent(in) :: X_vvoo(nV,nV,nO,nO) + double precision, intent(in) :: W(nV,nV,nV)!,nO,nO,nO) + double precision, intent(out) :: V(nV,nV,nV)!,nO,nO,nO) + + integer :: a,b,c + + !V = 0d0 + !do i = 1, nO + ! do j = 1, nO + ! do k = 1, nO + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,i,j,k,T_vo,X_vvoo,W,V) & + !$OMP PRIVATE(a,b,c) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + V(a,b,c) = W(a,b,c) & + !+ cc_space_v_vvoo(b,c,j,k) * t1(i,a) & + !+ cc_space_v_vvoo(a,c,i,k) * t1(j,b) & + !+ cc_space_v_vvoo(a,b,i,j) * t1(k,c) + + X_vvoo(b,c,k,j) * T_vo(a,i) & + + X_vvoo(a,c,k,i) * T_vo(b,j) & + + X_vvoo(a,b,j,i) * T_vo(c,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! enddo + ! enddo + !enddo + +end diff --git a/src/ccsd/ccsd_t_spin_orb.irp.f b/src/ccsd/ccsd_t_spin_orb.irp.f new file mode 100644 index 00000000..3f79e4a0 --- /dev/null +++ b/src/ccsd/ccsd_t_spin_orb.irp.f @@ -0,0 +1,376 @@ +! v1 + +subroutine ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_vvvo(nV,nV,nV,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: t3(:,:,:,:,:,:), s(:,:) + double precision :: e_t, e_st, e_dt, delta_abc, delta + integer :: i,j,k,l,m,a,b,c,d,e + + allocate(t3(nO,nO,nO,nV,nV,nV), s(nO,nV)) + + t3 = 0d0 + + ! T3 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + do e = 1, nV + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) & + + t2(j,k,a,e) * v_vvvo(b,c,e,i) & + - t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij) + - t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik) + - t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab) + - t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac) + + t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab) + + t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac) + + t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab) + + t2(j,i,c,e) * v_vvvo(b,a,e,k) ! + P(ik) P(ac) + enddo + do m = 1, nO + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) & + + t2(m,i,b,c) * v_ooov(j,k,m,a) & + - t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij) + - t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik) + - t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab) + - t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac) + + t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab) + + t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac) + + t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab) + + t2(m,k,b,a) * v_ooov(j,i,m,c) ! + P(ik) P(ac) + enddo + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) * (1d0 / delta) + enddo + enddo + enddo + enddo + enddo + enddo + + + ! E_T + e_t = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + e_t = e_t + t3(i,j,k,a,b,c) * delta * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + e_t = e_t / 36d0 + + ! E_ST + s = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + + e_st = 0d0 + do a = 1, nV + do i = 1, nO + e_st = e_st + s(i,a) * t1(i,a) + enddo + enddo + e_st = e_st * 0.25d0 + + ! E_DT + e_dt = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + e_dt = e_dt + t2(i,j,a,b) * f_ov(k,c) * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + e_dt = e_dt * 0.25d0 + + ! (T) + !print*,e_t,e_st,e_dt + energy = e_t + e_st + e_dt + + deallocate(t3,s) + +end + +! v2 + +subroutine ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: t3_bc(:,:,:,:), s(:,:), e_t(:), e_dt(:) + double precision, allocatable :: A_vovv(:,:,:,:), v_vvvo(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), B_ooov(:,:,:,:) + double precision :: e_st, delta_abc, delta, ta, tb + integer :: i,j,k,l,m,a,b,c,d,e + + allocate(t3_bc(nO,nO,nO,nV), s(nO,nV), e_t(nV), e_dt(nV)) + allocate(A_vovv(nV,nO,nV,nV),v_vvvo(nV,nV,nV,nO),T_voov(nV,nO,nO,nV),B_ooov(nO,nO,nO,nV)) + + call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & + cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + nV,nV,nV,nO, v_vvvo) + + ! Init + s = 0d0 + e_t = 0d0 + e_st = 0d0 + e_dt = 0d0 + + call wall_time(ta) + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,m,a,b,c,e) & + !$OMP SHARED(A_vovv,ta,tb,t3_bc,s,e_t,e_st,e_dt,t2,v_vvvo,v_ooov, & + !$OMP v_vvoo,f_o,f_v,f_ov,delta,delta_abc,nO,nV,T_voov,B_ooov) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do c = 1, nV + do b = 1, nV + do i = 1, nO + do e = 1, nV + A_vovv(e,i,b,c) = v_vvvo(b,c,e,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$omp do collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do e = 1, nV + T_voov(e,j,k,a) = t2(j,k,a,e) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do m = 1, nO + B_ooov(m,j,k,a) = v_ooov(j,k,m,a) + enddo + enddo + enddo + enddo + !$omp end do + + do c = 1, nV + do b = 1, nV + + ! T3(:,:,:,:,b,c) + ! Init + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + t3_bc(i,j,k,a) = 0d0 + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + do e = 1, nV + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) & + !+ t2(j,k,a,e) * v_vvvo(b,c,e,i) & + !- t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij) + !- t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik) + !- t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab) + !- t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac) + !+ t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab) + !+ t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac) + !+ t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab) + !+ t2(j,i,c,e) * v_vvvo(b,a,e,k) ! + P(ik) P(ac) + + T_voov(e,j,k,a) * A_vovv(e,i,b,c) & + - T_voov(e,i,k,a) * A_vovv(e,j,b,c) & ! - P(ij) + - T_voov(e,j,i,a) * A_vovv(e,k,b,c) & ! - P(ik) + - T_voov(e,j,k,b) * A_vovv(e,i,a,c) & ! - P(ab) + - T_voov(e,j,k,c) * A_vovv(e,i,b,a) & ! - P(ac) + + T_voov(e,i,k,b) * A_vovv(e,j,a,c) & ! + P(ij) P(ab) + + T_voov(e,i,k,c) * A_vovv(e,j,b,a) & ! + P(ij) P(ac) + + T_voov(e,j,i,b) * A_vovv(e,k,a,c) & ! + P(ik) P(ab) + + T_voov(e,j,i,c) * A_vovv(e,k,b,a) ! + P(ik) P(ac) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + do m = 1, nO + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) & + !+ t2(m,i,b,c) * v_ooov(j,k,m,a) & + !- t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij) + !- t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik) + !- t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab) + !- t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac) + !+ t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab) + !+ t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac) + !+ t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab) + !+ t2(m,k,b,a) * v_ooov(j,i,m,c) ! + P(ik) P(ac) + + t2(m,i,b,c) * B_ooov(m,j,k,a) & + - t2(m,j,b,c) * B_ooov(m,i,k,a) & ! - P(ij) + - t2(m,k,b,c) * B_ooov(m,j,i,a) & ! - P(ik) + - t2(m,i,a,c) * B_ooov(m,j,k,b) & ! - P(ab) + - t2(m,i,b,a) * B_ooov(m,j,k,c) & ! - P(ac) + + t2(m,j,a,c) * B_ooov(m,i,k,b) & ! + P(ij) P(ab) + + t2(m,j,b,a) * B_ooov(m,i,k,c) & ! + P(ij) P(ac) + + t2(m,k,a,c) * B_ooov(m,j,i,b) & ! + P(ik) P(ab) + + t2(m,k,b,a) * B_ooov(m,j,i,c) ! + P(ik) P(ac) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) * (1d0 / delta) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! E_T + !$OMP DO + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + e_t(a) = e_t(a) + t3_bc(i,j,k,a) * delta * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + ! E_ST + !$OMP DO + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + ! E_DT + !$OMP DO + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + e_dt(a) = e_dt(a) + t2(i,j,a,b) * f_ov(k,c) * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO + enddo + !$OMP MASTER + call wall_time(tb) + write(*,'(A1,F6.2,A5,F10.2,A2)') ' ', dble(c)/dble(nV)*100d0, '% in ', tb-ta, ' s' + !$OMP END MASTER + enddo + !$OMP END PARALLEL + + do a = 2, nV + e_t(1) = e_t(1) + e_t(a) + enddo + + do a = 2, nV + e_dt(1) = e_dt(1) + e_dt(a) + enddo + + e_t = e_t / 36d0 + + do a = 1, nV + do i = 1, nO + e_st = e_st + s(i,a) * t1(i,a) + enddo + enddo + e_st = e_st * 0.25d0 + + e_dt = e_dt * 0.25d0 + + ! (T) + !print*,e_t(1),e_st,e_dt(1) + energy = e_t(1) + e_st + e_dt(1) + + deallocate(t3_bc,s) + +end diff --git a/src/ccsd/org/ccsd_space_orb.org b/src/ccsd/org/ccsd_space_orb.org new file mode 100644 index 00000000..a848fd26 --- /dev/null +++ b/src/ccsd/org/ccsd_space_orb.org @@ -0,0 +1,2121 @@ +* ccsd with spatial orbitals + +Scuseria, Gustavo E.; Janssen, Curtis L.; Schaefer, Henry +F. (1988). An efficient reformulation of the closed-shell coupled +cluster single and double excitation (CCSD) equations. The Journal of +Chemical Physics, 89(12), 7382–. doi:10.1063/1.455269 + +* Code +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb.irp.f +program ccsd + + implicit none + + read_wf = .True. + touch read_wf + + call run_ccsd_space_orb + +end +#+end_src + +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine run_ccsd_space_orb + + implicit none + + integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d + integer :: u,v,gam,beta,tmp_gam,tmp_beta + integer :: nb_iter + double precision :: get_two_e_integral + double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb + logical :: not_converged + + double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:) + double precision, allocatable :: t1(:,:), r1(:,:) + double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) + + double precision, allocatable :: all_err(:,:), all_t(:,:) + integer, allocatable :: list_occ(:), list_vir(:) + integer(bit_kind) :: det(N_int,2) + integer :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4) + + PROVIDE mo_two_e_integrals_in_map + + det = psi_det(:,:,cc_ref) + print*,'Reference determinant:' + call print_det(det,N_int) + + ! Extract number of occ/vir alpha/beta spin orbitals + !call extract_n_spin(det,n_spin) + nOa = cc_nOa !n_spin(1) + nOb = cc_nOb !n_spin(2) + nVa = cc_nVa !n_spin(3) + nVb = cc_nVb !n_spin(4) + + ! Check that the reference is a closed shell determinant + if (cc_ref_is_open_shell) then + call abort + endif + + ! Number of occ/vir spatial orb + nO = nOa + nV = nVa + + allocate(list_occ(nO),list_vir(nV)) + list_occ = cc_list_occ + list_vir = cc_list_vir + ! Debug + !call extract_list_orb_space(det,nO,nV,list_occ,list_vir) + !print*,'occ',list_occ + !print*,'vir',list_vir + + allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) + allocate(tau(nO,nO,nV,nV)) + allocate(t1(nO,nV), r1(nO,nV)) + allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) + + if (cc_update_method == 'diis') then + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + all_err = 0d0 + all_t = 0d0 + endif + + if (elec_alpha_num /= elec_beta_num) then + print*, 'Only for closed shell systems' + print*, 'elec_alpha_num=',elec_alpha_num + print*, 'elec_beta_num =',elec_beta_num + print*, 'abort' + call abort + endif + + ! Init + call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1) + call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2) + call update_tau_space(nO,nV,t1,t2,tau) + !print*,'hf_energy', hf_energy + call det_energy(det,uncorr_energy) + print*,'Det energy', uncorr_energy + call ccsd_energy_space(nO,nV,tau,t1,energy) + print*,'Guess energy', uncorr_energy+energy, energy + + nb_iter = 0 + not_converged = .True. + max_r1 = 0d0 + max_r2 = 0d0 + + write(*,'(A77)') ' -----------------------------------------------------------------------------' + write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(ta) + + do while (not_converged) + + call compute_H_oo(nO,nV,t1,t2,tau,H_oo) + call compute_H_vv(nO,nV,t1,t2,tau,H_vv) + call compute_H_vo(nO,nV,t1,t2,H_vo) + + ! Residue + call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + max_r = max(max_r1,max_r2) + + ! Update + if (cc_update_method == 'diis') then + !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1,r2,t1,t2,all_err,all_t) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1) + call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2) + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + + call update_tau_space(nO,nV,t1,t2,tau) + + ! Energy + call ccsd_energy_space(nO,nV,tau,t1,energy) + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + + nb_iter = nb_iter + 1 + if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then + not_converged = .False. + endif + + enddo + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + print*,'' + if (max_r < cc_thresh_conv) then + write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations' + else + write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations' + endif + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' + write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + print*,'' + + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + + ! Deallocation + if (cc_update_method == 'diis') then + deallocate(all_err,all_t) + endif + + deallocate(H_vv,H_oo,H_vo,r1,r2,tau) + + ! CCSD(T) + double precision :: e_t + + if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then + + ! Dumb way + !call wall_time(ta) + !call ccsd_par_t_space(nO,nV,t1,t2,e_t) + !call wall_time(tb) + !print*,'Time: ',tb-ta, ' s' + + !print*,'' + !write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' + !write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' + !write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + !print*,'' + + ! New + print*,'Computing (T) correction...' + call wall_time(ta) + call ccsd_par_t_space_v2(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & + ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' + write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + print*,'' + endif + + print*,'Reference determinant:' + call print_det(det,N_int) + + deallocate(t1,t2) + +end +#+END_SRC + +* Energy +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine ccsd_energy_space(nO,nV,tau,t1,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau(nO,nO,nV,nV) + double precision, intent(in) :: t1(nO,nV) + double precision, intent(out) :: energy + + ! internal + integer :: i,j,a,b + double precision :: e + + energy = 0d0 + !$omp parallel & + !$omp shared(nO,nV,energy,tau,t1,& + !$omp cc_space_f_vo,cc_space_w_oovv) & + !$omp private(i,j,a,b,e) & + !$omp default(none) + e = 0d0 + !$omp do + do i = 1, nO + do a = 1, nV + e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) + enddo + enddo + !$omp end do nowait + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + energy = energy + e + !$omp end critical + !$omp end parallel + +end +#+END_SRC + +* T +** Tau +#+begin_src f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine update_tau_space(nO,nV,t1,t2,tau) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + ! out + double precision, intent(out) :: tau(nO,nO,nV,nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tau,t2,t1) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +* Residual equations +** R1 +*** R1 +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r1(nO,nV), max_r1 + + ! internal + integer :: u,i,j,beta,a,b + + !$omp parallel & + !$omp shared(nO,nV,r1,cc_space_f_ov) & + !$omp private(u,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + r1(u,beta) = cc_space_f_ov(u,beta) + enddo + enddo + !$omp end do + !$omp end parallel + + ! r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a) + ! cc_space_f_vo(a,i) * t1(i,beta) -> X1(nV,nV), O(nV*nV*nO) + ! X1(a,beta) * t1(u,a) -> O(nO*nV*nV) + ! cc_space_f_vo(a,i) * t1(u,a) -> X1(nO,nO), O(nO*nO*nV) + ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_oo(:,:) + allocate(X_oo(nO,nO)) + call dgemm('N','N', nO, nO, nV, & + -2d0, t1 , size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, X_oo , size(X_oo,1)) + + call dgemm('T','N', nO, nV, nO, & + 1d0, X_oo, size(X_oo,2), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + deallocate(X_oo) + + ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) + !do beta = 1, nV + ! do u = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) + ! enddo + ! enddo + !enddo + call dgemm('N','N', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + H_vv, size(H_vv,1), & + 1d0, r1 , size(r1,1)) + + ! r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta) + ! enddo + ! enddo + !enddo + call dgemm('N','N', nO, nV, nO, & + -1d0, H_oo, size(H_oo,1), & + t1 , size(t1,1), & + 1d0, r1, size(r1,1)) + + !r1(u,beta) = r1(u,beta) + H_vo(a,i) * (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta)) + ! <=> + ! r1(u,beta) = r1(u,beta) + H_vo(a,i) * X(a,i,u,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + H_vo(a,i) * & + ! (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta)) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_voov(:,:,:,:) + allocate(X_voov(nV, nO, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_voov,t2,t1) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,u,beta) = 2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nV*nO, nO*nV, & + 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & + H_vo , 1, & + 1d0, r1 , 1) + + deallocate(X_voov) + + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) + ! <=> + ! r1(u,beta) = r1(u,beta) + X(i,a,u,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_ovov(:,:,:,:) + allocate(X_ovov(nO, nV, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do a = 1, nv + do i = 1, nO + X_ovov(i,a,u,beta) = 2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nO*nV, nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + t1 , 1, & + 1d0, r1 , 1) + + deallocate(X_ovov) + + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! do b = 1, nV + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) + allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & + !$omp private(b,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do u = 1, nO + T_vvoo(a,b,i,u) = tau(i,u,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp end parallel + + call dgemm('T','N',nO,nV,nO*nV*nV, & + 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & + W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_vvov,T_vvoo) + + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do j = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: W_oovo(:,:,:,:) + allocate(W_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) & + !$omp private(u,a,i,j) & + !$omp default(none) + !$omp do collapse(3) + do u = 1, nO + do a = 1, nV + do j = 1, nO + do i = 1, nO + W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & + tau , size(tau,1) * size(tau,2) * size(tau,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_oovo) + + max_r1 = 0d0 + do a = 1, nV + do i = 1, nO + if (dabs(r1(i,a)) > max_r1) then + max_r1 = dabs(r1(i,a)) + endif + enddo + enddo + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r1) & + !$omp private(a,i) & + !$omp default(none) + !$omp do + do a = 1, nV + do i = 1, nO + r1(i,a) = -r1(i,a) + enddo + enddo + !$omp end do + !$omp end parallel + +end +#+end_src + +*** Intermediates +**** H_oo +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_oo(nO, nO) + + integer :: a,tmp_a,k,b,l,c,d,tmp_c,tmp_d,i,j,u + + !H_oo = 0d0 + + !do i = 1, nO + ! do u = 1, nO + ! H_oo(u,i) = cc_space_f_oo(u,i) + + ! do j = 1, nO + ! do a = 1, nV + ! do b = 1, nV + ! !H_oo(u,i) = H_oo(u,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * tau(u,j,a,b) + ! !H_oo(u,i) = H_oo(u,i) + cc_space_w_vvoo(a,b,i,j) * tau(u,j,a,b) + ! H_oo(u,i) = H_oo(u,i) + cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) + ! enddo + ! enddo + ! enddo + ! + ! enddo + !enddo + + ! H_oo(u,i) = cc_space_f_oo(u,i) + !$omp parallel & + !$omp shared(nO,H_oo,cc_space_f_oo) & + !$omp private(i,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + H_oo(u,i) = cc_space_f_oo(u,i) + enddo + enddo + !$omp end do + !$omp end parallel + + ! H_oo(u,i) += cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) + ! H_oo(u,i) += tau(u,j,a,b) * cc_space_w_oovv(i,j,a,b) + call dgemm('N','T', nO, nO, nO*nV*nV, & + 1d0, tau , size(tau,1), & + cc_space_w_oovv, size(cc_space_w_oovv,1), & + 1d0, H_oo , size(H_oo,1)) + +end +#+END_SRC + +**** H_vv +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_vv(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + !H_vv = 0d0 + + !do beta = 1, nV + ! do a = 1, nV + ! H_vv(a,beta) = cc_space_f_vv(a,beta) + + ! do j = 1, nO + ! do i = 1, nO + ! do b = 1, nV + ! !H_vv(a,beta) = H_vv(a,beta) - (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(a,b,j,i)) * tau(i,j,beta,b) + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) + ! enddo + ! enddo + ! enddo + ! + ! enddo + !enddo + + double precision, allocatable :: tmp_tau(:,:,:,:) + + allocate(tmp_tau(nV,nO,nO,nV)) + + ! H_vv(a,beta) = cc_space_f_vv(a,beta) + !$omp parallel & + !$omp shared(nV,nO,H_vv,cc_space_f_vv,tmp_tau,tau) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + H_vv(a,beta) = cc_space_f_vv(a,beta) + enddo + enddo + !$omp end do nowait + + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta) + + !$omp do collapse(3) + do beta = 1, nV + do j = 1, nO + do i = 1, nO + do b = 1, nV + tmp_tau(b,i,j,beta) = tau(i,j,beta,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nV,nV,nO*nO*nV, & + -1d0, cc_space_w_vvoo, size(cc_space_w_vvoo,1), & + tmp_tau , size(tmp_tau,1) * size(tmp_tau,2) * size(tmp_tau,3), & + 1d0, H_vv , size(H_vv,1)) + + deallocate(tmp_tau) + +end +#+END_SRC + +**** H_vo +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_H_vo(nO,nV,t1,t2,H_vo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: H_vo(nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + !H_vo = 0d0 + + !do i = 1, nO + ! do a = 1, nV + ! H_vo(a,i) = cc_space_f_vo(a,i) + + ! do j = 1, nO + ! do b = 1, nV + ! !H_vo(a,i) = H_vo(a,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t1(j,b) + ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) + ! enddo + ! enddo + ! + ! enddo + !enddo + + double precision, allocatable :: w(:,:,:,:) + + allocate(w(nV,nO,nO,nV)) + + !$omp parallel & + !$omp shared(nV,nO,H_vo,cc_space_f_vo,w,cc_space_w_vvoo,t1) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do i = 1, nO + do a = 1, nV + H_vo(a,i) = cc_space_f_vo(a,i) + enddo + enddo + !$omp end do nowait + + ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) + ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) + + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + w(a,i,j,b) = cc_space_w_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('N',nV*nO, nO*nV, & + 1d0, w , size(w,1) * size(w,2), & + t1 , 1, & + 1d0, H_vo, 1) + + deallocate(w) + +end +#+END_SRC + +** R2 +*** R2 +#+begin_src f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2 + + ! internal + double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:) + double precision, allocatable :: A1(:,:,:,:), B1(:,:,:,:) + integer :: u,v,i,j,beta,gam,a,b + + allocate(g_occ(nO,nO), g_vir(nV,nV)) + allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV)) + allocate(A1(nO,nO,nO,nO)) + + call compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) + call compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) + call compute_A1(nO,nV,t1,t2,tau,A1) + call compute_J1(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & + cc_space_v_vvvo,cc_space_v_vvoo,J1) + call compute_K1(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & + cc_space_v_ovov,cc_space_v_vvov,K1) + + ! Residual + !r2 = 0d0 + + !$omp parallel & + !$omp shared(nO,nV,r2,cc_space_v_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = cc_space_v_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do j = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + A1(u,v,i,j) * tau(i,j,beta,gam) + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO*nO,nV*nV,nO*nO, & + 1d0, A1, size(A1,1) * size(A1,2), & + tau, size(tau,1) * size(tau,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do b = 1, nv + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + B1(a,b,beta,gam) * tau(u,v,a,b) + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + allocate(B1(nV,nV,nV,nV)) + call compute_B1(nO,nV,t1,t2,B1) + call dgemm('N','N',nO*nO,nV*nV,nV*nV, & + 1d0, tau, size(tau,1) * size(tau,2), & + B1 , size(B1,1) * size(B1,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + deallocate(B1) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + g_vir(a,beta) * t2(u,v,a,gam) & + ! + g_vir(a,gam) * t2(v,u,a,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_oovv(:,:,:,:),Y_oovv(:,:,:,:) + allocate(X_oovv(nO,nO,nV,nV),Y_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,t2,X_oovv) & + !$omp private(u,v,gam,a) & + !$omp default(none) + !$omp do collapse(3) + do a = 1, nV + do gam = 1, nV + do v = 1, nO + do u = 1, nO + X_oovv(u,v,gam,a) = t2(u,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nO*nV,nV,nV, & + 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + g_vir, size(g_vir,1), & + 0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(u,v,beta,gam) + Y_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - g_occ(u,i) * t2(i,v,beta,gam) & + ! - g_occ(v,i) * t2(i,u,gam,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO,nO*nV*nV,nO, & + 1d0, g_occ , size(g_occ,1), & + t2 , size(t2,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_oovv) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + cc_space_v_ovvv(u,a,beta,gam) * t1(v,a) & + ! + cc_space_v_ovvv(v,a,gam,beta) * t1(u,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vovv(:,:,:,:) + allocate(X_vovv(nV,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & + !$omp private(u,a,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do u = 1, nO + do a = 1, nV + X_vovv(a,u,beta,gam) = cc_space_v_ovvv(u,a,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1 , size(t1,1), & + X_vovv, size(X_vovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_ovov(u,a,i,gam) * t1(i,beta) * t1(v,a) & + ! - cc_space_v_ovov(v,a,i,beta) * t1(i,gam) * t1(u,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & + !$omp private(u,v,gam,i) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do u = 1, nO + do a = 1, nV + X_vovo(a,u,gam,i) = cc_space_v_ovov(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nV*nO*nV,nV,nO, & + 1d0, X_vovo, size(X_vovo,1) * size(X_vovo,2) * size(X_vovo,3), & + t1 , size(t1,1), & + 0d0, Y_vovv, size(Y_vovv,1) * size(Y_vovv,2) * size(Y_vovv,3)) + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1, size(t1,1), & + Y_vovv, size(Y_vovv,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(v,u,gam,beta) - X_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_vovv) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_oovo(u,v,beta,i) * t1(i,gam) & + ! - cc_space_v_oovo(v,u,gam,i) * t1(i,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO*nO*nV,nV,nO, & + 1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_ovvo(u,a,beta,i) * t1(v,a) * t1(i,gam) & + ! - cc_space_v_ovvo(v,a,gam,i) * t1(u,a) * t1(i,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: Y_oovo(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & + !$omp private(a,v,gam,i) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do v = 1, nO + do a = 1, nV + X_vovo(a,v,gam,i) = cc_space_v_ovvo(v,a,gam,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nO,nV, & + 1d0, t1, size(t1,1), & + X_vovo, size(X_vovo,1), & + 0d0, Y_oovo, size(Y_oovo,1)) + + call dgemm('N','N',nO*nO*nV, nV, nO, & + 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,gam,beta) - X_oovv(v,u,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_oovo) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) * & + ! (2d0 * t2(i,v,a,gam) - t2(i,v,gam,a)) & + ! + 0.5d0 * (2d0 * J1(v,a,gam,i) - K1(v,a,i,gam)) * & + ! (2d0 * t2(i,u,a,beta) - t2(i,u,beta,a)) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_voov(:,:,:,:), Z_ovov(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO), Y_voov(nV,nO,nO,nV),Z_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do a = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,a,i) = 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do gam = 1, nV + do v = 1, nO + do i = 1, nO + do a = 1, nV + Y_voov(a,i,v,gam) = 2d0 * t2(i,v,a,gam) - t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_voov, size(Y_voov,1) * size(Y_voov,2), & + 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo,Y_voov) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - 0.5d0 * K1(u,a,i,beta) * t2(i,v,gam,a) & + ! - 0.5d0 * K1(v,a,i,gam) * t2(i,u,beta,a) !P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_ovov(:,:,:,:),Y_ovov(:,:,:,:) + allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & + !$omp private(u,a,i,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do a = 1, nV + do i = 1, nO + X_ovov(i,a,u,beta) = 0.5d0 * K1(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do gam = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Y_ovov(i,a,v,gam) = t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('T','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - K1(u,a,i,gam) * t2(i,v,beta,a) & + ! - K1(v,a,i,beta) * t2(i,u,gam,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + !$omp parallel & + !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do a = 1, nV + do i = 1, nO + do gam = 1, nV + do u = 1, nO + X_ovov(u,gam,i,a) = K1(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do beta = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Z_ovov(i,a,v,beta) = t2(i,v,beta,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovov,Y_ovov,Z_ovov) + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r2) & + !$omp private(i,j,a,b) & + !$omp default(none) + !$omp do collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = -r2(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + max_r2 = 0d0 + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + if (dabs(r2(i,j,a,b)) > max_r2) then + max_r2 = dabs(r2(i,j,a,b)) + endif + enddo + enddo + enddo + enddo + + deallocate(g_occ,g_vir,J1,K1,A1) + +end +#+end_src + +*** Intermediates +**** A1 +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_A1(nO,nV,t1,t2,tau,A1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: A1(nO, nO, nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta + + !A1 = 0d0 + + !do j = 1, nO + ! do i = 1, nO + ! do v = 1, nO + ! do u = 1, nO + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + + ! do a = 1, nV + ! A1(u,v,i,j) = A1(u,v,i,j) & + ! + cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + ! + cc_space_v_vooo(a,v,i,j) * t1(u,a) + ! + ! do b = 1, nV + ! A1(u,v,i,j) = A1(u,v,i,j) + cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:) + allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO)) + + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + !$omp parallel & + !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + enddo + enddo + enddo + enddo + !$omp end do nowait + + ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do u = 1, nO + do a = 1, nV + X_vooo(a,u,i,j) = cc_space_v_ovoo(u,a,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + X_vooo, size(X_vooo,1), & + 0d0, Y_oooo, size(Y_oooo,1)) + + !$omp parallel & + !$omp shared(nO,nV,A1,Y_oooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = A1(u,v,i,j) + Y_oooo(v,u,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vooo,Y_oooo) + + ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a) + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + cc_space_v_vooo, size(cc_space_v_vooo,1), & + 1d0, A1 , size(A1,1)) + + ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + call dgemm('N','N', nO*nO, nO*nO, nV*nV, & + 1d0, tau , size(tau,1) * size(tau,2), & + cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & + 1d0, A1 , size(A1,1) * size(A1,2)) + +end +#+END_SRC + +**** B1 +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_B1(nO,nV,t1,t2,B1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: B1(nV, nV, nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !B1 = 0d0 + + !do gam = 1, nV + ! do beta = 1, nV + ! do b = 1, nV + ! do a = 1, nV + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + + ! do i = 1, nO + ! B1(a,b,beta,gam) = B1(a,b,beta,gam) & + ! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + ! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vvvo(:,:,:,:), Y_vvvv(:,:,:,:) + allocate(X_vvvo(nV,nV,nV,nO), Y_vvvv(nV,nV,nV,nV)) + + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + !$omp parallel & + !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) & + !$omp private(a,b,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do b = 1, nV + do a = 1, nV + X_vvvo(a,b,gam,i) = cc_space_v_vvov(a,b,i,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + call dgemm('N','N', nV*nV*nV, nV, nO, & + -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & + t1 , size(t1,1), & + 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) + + + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + call dgemm('N','N', nV*nV*nV, nV, nO, & + -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2) * size(X_vvvo,3), & + t1 , size(t1,1), & + 0d0, Y_vvvv, size(Y_vvvv,1) * size(Y_vvvv,2) * size(Y_vvvv,3)) + + !$omp parallel & + !$omp shared(nV,B1,Y_vvvv) & + !$omp private(a,b,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta,gam) = B1(a,b,beta,gam) + Y_vvvv(a,b,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vvvo,Y_vvvv) + +end +#+END_SRC + +**** g_occ +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_oo(nO, nO) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_occ(nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !g_occ = 0d0 + + !do i = 1, nO + ! do u = 1, nO + ! g_occ(u,i) = H_oo(u,i) + ! + ! do a = 1, nV + ! g_occ(u,i) = g_occ(u,i) + cc_space_f_vo(a,i) * t1(u,a) + ! + ! do j = 1, nO + ! g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) + ! enddo + ! + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO,nO,nV, & + 1d0, t1, size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, g_occ, size(g_occ,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_occ,H_oo, cc_space_v_ovoo,t1) & + !$omp private(i,j,a,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + H_oo(u,i) + enddo + enddo + !$omp end do + + !$omp do collapse(1) + do i = 1, nO + do j = 1, nO + do a = 1, nV + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end +#+END_SRC + +**** g_vir +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_vv(nV, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_vir(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !g_vir = 0d0 + + !do beta = 1, nV + ! do a = 1, nV + ! g_vir(a,beta) = H_vv(a,beta) + ! + ! do i = 1, nO + ! g_vir(a,beta) = g_vir(a,beta) - cc_space_f_vo(a,i) * t1(i,beta) + ! + ! do b = 1, nV + ! g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) + ! enddo + ! + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nV,nV,nO, & + -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & + t1 , size(t1,1), & + 0d0, g_vir, size(g_vir,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) & + !$omp private(i,b,a,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta) + enddo + enddo + !$omp end do + + !$omp do collapse(1) + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end +#+END_SRC + +**** J1 +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: J1(nO, nV, nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !J1 = 0d0 + + !do i = 1, nO + ! do beta = 1, nV + ! do a = 1, nV + ! do u = 1, nO + ! J1(u,a,beta,i) = cc_space_v_ovvo(u,a,beta,i) + + ! do j = 1, nO + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! - cc_space_v_ovoo(u,a,j,i) * t1(j,beta) + ! enddo + + ! do b = 1, nV + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) + ! enddo + + ! do j = 1, nO + ! do b = 1, nV + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! - cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + ! + 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) + allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) + + !$omp parallel & + !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & + !$omp private(i,j,a,u,beta) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = v_ovvo(u,a,beta,i) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do a = 1, nV + do u = 1, nO + X_ovoo(u,a,i,j) = v_ovoo(u,a,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & + t1 , size(t1,1), & + 0d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2) * size(Y_ovov,3)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Y_ovov) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Y_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(X_ovoo) + + ! v_vvvo(b,a,beta,i) * t1(u,b) + call dgemm('N','N',nO,nV*nV*nO,nV, & + 1d0, t1 , size(t1,1), & + v_vvvo, size(v_vvvo,1), & + 1d0, J1 , size(J1,1)) + + !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:) + allocate(X_voov(nV,nO,nO,nV), Z_ovvo(nO,nV,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & + !$omp private(i,beta,a,u,b,j) & + !$omp default(none) + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do beta = 1, nV + do u = 1, nO + Y_ovov(u,beta,j,b) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,j,b) = v_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nO*nV, & + -1d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + X_voov, size(X_voov,1) * size(X_voov,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + deallocate(X_voov) + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_vovo(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO),Y_vovo(nV,nO,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do i = 1, nO + do a = 1, nV + Y_vovo(a,i,b,j) = 0.5d0 * (2d0 * v_vvoo(a,b,i,j) - v_vvoo(b,a,i,j)) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,b,j) = t2(u,j,beta,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo,Z_ovvo,Y_ovov) + +end +#+END_SRC + +**** K1 +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV) + double precision, intent(in) :: v_vvov(nV,nV,nO,nV), v_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: K1(nO, nV, nO, nV) + + double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !K1 = 0d0 + + !do beta = 1, nV + ! do i = 1, nO + ! do a = 1, nV + ! do u = 1, nO + ! K1(u,a,i,beta) = cc_space_v_ovov(u,a,i,beta) + + ! do j = 1, nO + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! - cc_space_v_ovoo(u,a,i,j) * t1(j,beta) + ! enddo + + ! do b = 1, nV + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) + ! enddo + + ! do j = 1, nO + ! do b = 1, nV + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! - cc_space_v_vvoo(b,a,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do i = 1, nO + do a = 1, nV + do j = 1, nO + do b = 1, nV + X(b,j,a,i) = - v_vvoo(b,a,i,j) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do beta = 1, nV + do u = 1, nO + Y(u,beta,b,j) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, v_ovoo, size(v_ovoo,1) * size(v_ovoo,2) * size(v_ovoo,3), & + t1 , size(t1,1), & + 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) + + call dgemm('N','N',nO,nV*nO*nV,nV, & + 1d0, t1 , size(t1,1), & + v_vvov, size(v_vvov,1), & + 1d0, K1 , size(K1,1)) + + ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i) + call dgemm('N','N',nV*nO,nO*nV,nV*nO, & + 1d0, Y, size(Y,1) * size(Y,2), & + X, size(X,1) * size(X,2), & + 0d0, Z, size(Z,1) * size(Z,2)) + + !$omp parallel & + !$omp shared(nO,nV,K1,Z) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = K1(u,a,i,beta) + Z(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X,Y,Z) + +end +#+END_SRC + diff --git a/src/ccsd/org/ccsd_spin_orb.org b/src/ccsd/org/ccsd_spin_orb.org new file mode 100644 index 00000000..8fb403c3 --- /dev/null +++ b/src/ccsd/org/ccsd_spin_orb.org @@ -0,0 +1,2352 @@ +* CCSD spin orb +** Ref +A direct product decomposition approach for symmetry exploitation in manybody +methods. I. Energy calculations +John F. Stanton, Jürgen Gauss, John D. Watts, and Rodney J. Bartlett +The Journal of Chemical Physics 94, 4334 (1991) +http://dx.doi.org/10.1063/1.460620A + +** Prog +#+begin_src f90 :comments org :tangle ccsd_spin_orb.irp.f +program ccsd + + implicit none + + BEGIN_DOC + ! CCSD in spin orbitals + END_DOC + + read_wf = .True. + touch read_wf + + call run_ccsd_spin_orb + +end +#+end_src + +** Code +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine run_ccsd_spin_orb + + implicit none + + BEGIN_DOC + ! CCSD in spin orbitals + END_DOC + + double precision, allocatable :: t1(:,:), t2(:,:,:,:), tau(:,:,:,:), tau_t(:,:,:,:) + double precision, allocatable :: r1(:,:), r2(:,:,:,:) + double precision, allocatable :: cF_oo(:,:), cF_ov(:,:), cF_vv(:,:) + double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:), cW_vvvv(:,:,:,:) + + double precision, allocatable :: f_oo(:,:), f_ov(:,:), f_vv(:,:), f_o(:), f_v(:) + double precision, allocatable :: v_oooo(:,:,:,:), v_vooo(:,:,:,:), v_ovoo(:,:,:,:) + double precision, allocatable :: v_oovo(:,:,:,:), v_ooov(:,:,:,:), v_vvoo(:,:,:,:) + double precision, allocatable :: v_vovo(:,:,:,:), v_voov(:,:,:,:), v_ovvo(:,:,:,:) + double precision, allocatable :: v_ovov(:,:,:,:), v_oovv(:,:,:,:), v_vvvo(:,:,:,:) + double precision, allocatable :: v_vvov(:,:,:,:), v_vovv(:,:,:,:), v_ovvv(:,:,:,:) + double precision, allocatable :: v_vvvv(:,:,:,:) + + double precision, allocatable :: all_err(:,:), all_t(:,:) + + logical :: not_converged + integer, allocatable :: list_occ(:,:), list_vir(:,:) + integer :: nO,nV,nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4) + integer :: nb_iter, i,j,a,b + double precision :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi + integer(bit_kind) :: det(N_int,2) + + det = psi_det(:,:,cc_ref) + print*,'Reference determinant:' + call print_det(det,N_int) + + ! Extract number of occ/vir alpha/beta spin orbitals + !call extract_n_spin(det,n_spin) + nOa = cc_nOa !n_spin(1) + nOb = cc_nOb !n_spin(2) + nVa = cc_nVa !n_spin(3) + nVb = cc_nVb !n_spin(4) + + ! Total number of occ/vir spin orb + nO = cc_nOab !nOa + nOb + nV = cc_nVab !nVa + nVb + ! Debug + !print*,nO,nV + + ! Number of occ/vir spin orb per spin + nO_S = cc_nO_S !(/nOa,nOb/) + nV_S = cc_nV_S !(/nVa,nVb/) + ! Debug + !print*,nO_S,nV_S + + ! Maximal number of occ/vir + nO_m = cc_nO_m !max(nOa, nOb) + nV_m = cc_nV_m !max(nVa, nVb) + ! Debug + !print*,nO_m,nV_m + + allocate(list_occ(nO_m,2), list_vir(nV_m,2)) + list_occ = cc_list_occ_spin + list_vir = cc_list_vir_spin + ! Debug + !call extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir) + !print*,list_occ(:,1) + !print*,list_occ(:,2) + !print*,list_vir(:,1) + !print*,list_vir(:,2) + + ! Allocation + allocate(t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV), tau_t(nO,nO,nV,nV)) + allocate(r1(nO,nV), r2(nO,nO,nV,nV)) + allocate(cF_oo(nO,nO), cF_ov(nO,nV), cF_vv(nV,nV)) + allocate(cW_oooo(nO,nO,nO,nO), cW_ovvo(nO,nV,nV,nO))!, cW_vvvv(nV,nV,nV,nV)) + allocate(v_oooo(nO,nO,nO,nO)) + !allocate(v_vooo(nV,nO,nO,nO)) + allocate(v_ovoo(nO,nV,nO,nO)) + allocate(v_oovo(nO,nO,nV,nO)) + allocate(v_ooov(nO,nO,nO,nV)) + allocate(v_vvoo(nV,nV,nO,nO)) + !allocate(v_vovo(nV,nO,nV,nO)) + !allocate(v_voov(nV,nO,nO,nV)) + allocate(v_ovvo(nO,nV,nV,nO)) + allocate(v_ovov(nO,nV,nO,nV)) + allocate(v_oovv(nO,nO,nV,nV)) + !allocate(v_vvvo(nV,nV,nV,nO)) + !allocate(v_vvov(nV,nV,nO,nV)) + !allocate(v_vovv(nV,nO,nV,nV)) + !allocate(v_ovvv(nO,nV,nV,nV)) + !allocate(v_vvvv(nV,nV,nV,nV)) + allocate(f_o(nO), f_v(nV)) + allocate(f_oo(nO, nO)) + allocate(f_ov(nO, nV)) + allocate(f_vv(nV, nV)) + + ! Allocation for the diis + if (cc_update_method == 'diis') then + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + all_err = 0d0 + all_t = 0d0 + endif + + ! Fock elements + call gen_f_spin(det, nO_m,nO_m, nO_S,nO_S, list_occ,list_occ, nO,nO, f_oo) + call gen_f_spin(det, nO_m,nV_m, nO_S,nV_S, list_occ,list_vir, nO,nV, f_ov) + call gen_f_spin(det, nV_m,nV_m, nV_S,nV_S, list_vir,list_vir, nV,nV, f_vv) + + ! Diag elements + do i = 1, nO + f_o(i) = f_oo(i,i) + enddo + do i = 1, nV + f_v(i) = f_vv(i,i) + enddo + + ! Bi electronic integrals from list + call wall_time(ti) + ! OOOO + call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, list_occ,list_occ,list_occ,list_occ, nO,nO,nO,nO, v_oooo) + + ! OOO V + !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, list_vir,list_occ,list_occ,list_occ, nV,nO,nO,nO, v_vooo) + call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, list_occ,list_vir,list_occ,list_occ, nO,nV,nO,nO, v_ovoo) + call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, list_occ,list_occ,list_vir,list_occ, nO,nO,nV,nO, v_oovo) + call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, list_occ,list_occ,list_occ,list_vir, nO,nO,nO,nV, v_ooov) + + ! OO VV + call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, list_vir,list_vir,list_occ,list_occ, nV,nV,nO,nO, v_vvoo) + !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, list_vir,list_occ,list_vir,list_occ, nV,nO,nV,nO, v_vovo) + !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, list_vir,list_occ,list_occ,list_vir, nV,nO,nO,nV, v_voov) + call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, list_occ,list_vir,list_vir,list_occ, nO,nV,nV,nO, v_ovvo) + call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, list_occ,list_vir,list_occ,list_vir, nO,nV,nO,nV, v_ovov) + call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, list_occ,list_occ,list_vir,list_vir, nO,nO,nV,nV, v_oovv) + + ! O VVV + !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, list_vir,list_vir,list_vir,list_occ, nV,nV,nV,nO, v_vvvo) + !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, list_vir,list_vir,list_occ,list_vir, nV,nV,nO,nV, v_vvov) + !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, list_vir,list_occ,list_vir,list_vir, nV,nO,nV,nV, v_vovv) + !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, list_occ,list_vir,list_vir,list_vir, nO,nV,nV,nV, v_ovvv) + + ! VVVV + !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, list_vir,list_vir,list_vir,list_vir, nV,nV,nV,nV, v_vvvv) + call wall_time(tf) + if (cc_dev) then + print*,'Load bi elec int:',tf-ti,'s' + endif + + ! Init of T + t1 = 0d0 + call guess_t1(nO,nV,f_o,f_v,f_ov,t1) + call guess_t2(nO,nV,f_o,f_v,v_oovv,t2) + call compute_tau_spin(nO,nV,t1,t2,tau) + call compute_tau_t_spin(nO,nV,t1,t2,tau_t) + + ! Loop init + nb_iter = 0 + not_converged = .True. + r1 = 0d0 + r2 = 0d0 + max_r1 = 0d0 + max_r2 = 0d0 + + call det_energy(det,uncorr_energy) + print*,'Det energy', uncorr_energy + call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + print*,'guess energy', uncorr_energy+energy, energy + + write(*,'(A77)') ' -----------------------------------------------------------------------------' + write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' + write(*,'(A77)') ' -----------------------------------------------------------------------------' + + call wall_time(ta) + + ! Loop + do while (not_converged) + + ! Intermediates + call wall_time(tbi) + call wall_time(ti) + call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) + call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) + call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,cF_vv) + call wall_time(tf) + if (cc_dev) then + print*,'Compute cFs:',tf-ti,'s' + endif + + call wall_time(ti) + call compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) + call compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) + !call compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) + call wall_time(tf) + if (cc_dev) then + print*,'Compute cFs:',tf-ti,'s' + endif + + ! Residuals + call wall_time(ti) + call compute_r1_spin(nO,nV,t1,t2,f_o,f_v,F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) + call wall_time(tf) + if (cc_dev) then + print*,'Compute r1:',tf-ti,'s' + endif + call wall_time(ti) + call compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) + call wall_time(tf) + if (cc_dev) then + print*,'Compute r2:',tf-ti,'s' + endif + + ! Max elements in the residuals + max_r1 = maxval(abs(r1(:,:))) + max_r2 = maxval(abs(r2(:,:,:,:))) + max_r = max(max_r1,max_r2) + + call wall_time(ti) + ! Update + if (cc_update_method == 'diis') then + !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + call update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + call update_t1(nO,nV,f_o,f_v,r1,t1) + call update_t2(nO,nV,f_o,f_v,r2,t2) + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + + call compute_tau_spin(nO,nV,t1,t2,tau) + call compute_tau_t_spin(nO,nV,t1,t2,tau_t) + call wall_time(tf) + if (cc_dev) then + print*,'Update:',tf-ti,'s' + endif + + ! Print + call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + call wall_time(tfi) + + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', & + uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + if (cc_dev) then + print*,'Total:',tfi-tbi,'s' + endif + + ! Convergence + nb_iter = nb_iter + 1 + if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then + not_converged = .False. + endif + + enddo + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + print*,'' + if (max_r < cc_thresh_conv) then + write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations' + else + write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations' + endif + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' + write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + print*,'' + + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + + ! Deallocate + if (cc_update_method == 'diis') then + deallocate(all_err,all_t) + endif + deallocate(tau,tau_t) + deallocate(r1,r2) + deallocate(cF_oo,cF_ov,cF_vv) + deallocate(cW_oooo,cW_ovvo)!,cW_vvvv) + deallocate(v_oooo) + deallocate(v_ovoo,v_oovo) + deallocate(v_ovvo,v_ovov,v_oovv) + + if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then + double precision :: t_corr + print*,'CCSD(T) calculation...' + call wall_time(ta) + !allocate(v_vvvo(nV,nV,nV,nO)) + !call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & + ! cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + ! cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + ! nV,nV,nV,nO, v_vvvo) + + !call ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,t_corr) + call ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,t_corr) + !print*,'Working on it...' + !call abort + call wall_time(tb) + print*,'Done' + print*,'Time: ',tb-ta, ' s' + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + t_corr, ' Ha' + write(*,'(A15,F18.12,A3)') ' E(T) = ', t_corr, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + t_corr, ' Ha' + print*,'' + endif + print*,'Reference determinant:' + call print_det(det,N_int) + + deallocate(f_oo,f_ov,f_vv,f_o,f_v) + deallocate(v_ooov,v_vvoo,t1,t2) + !deallocate(v_ovvv,v_vvvo,v_vovv) + !deallocate(v_vvvv) + +end +#+end_src + +* Energy +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine ccsd_energy_spin(nO,nV,t1,t2,Fov,v_oovv,energy) + + implicit none + + BEGIN_DOC + ! CCSD energy in spin orbitals + END_DOC + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: energy + + integer :: i,j,a,b + + + energy = 0d0 + + do i=1,nO + do a=1,nV + energy = energy + Fov(i,a) * t1(i,a) + end do + end do + + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + energy = energy & + + 0.5d0 * v_oovv(i,j,a,b) * t1(i,a) * t1(j,b) & + + 0.25d0 * v_oovv(i,j,a,b) * t2(i,j,a,b) + end do + end do + end do + end do + +end +#+end_src + +* T +** Update +*** Tau +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_tau_spin(nO,nV,t1,t2,tau) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + + double precision,intent(out) :: tau(nO,nO,nV,nV) + + integer :: i,j,k,l + integer :: a,b,c,d + + !$OMP PARALLEL & + !$OMP SHARED(tau,t1,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +*** Tau_t +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_tau_t_spin(nO,nV,t1,t2,tau_t) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + + double precision,intent(out) :: tau_t(nO,nO,nV,nV) + + integer :: i,j,k,l + integer :: a,b,c,d + + !$OMP PARALLEL & + !$OMP SHARED(tau_t,t1,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + tau_t(i,j,a,b) = t2(i,j,a,b) + 0.5d0*(t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a)) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +* R +** R1 +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: f_o(nO), f_v(nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: cF_oo(nO,nO) + double precision,intent(in) :: cF_ov(nO,nV) + double precision,intent(in) :: cF_vv(nV,nV) + double precision,intent(in) :: v_oovo(nO,nO,nV,nO) + double precision,intent(in) :: v_ovov(nO,nV,nO,nV) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: r1(nO,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + !double precision, allocatable :: X_vovv(:,:,:,:) + double precision, allocatable :: X_oovv(:,:,:,:) + double precision :: accu + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,Fov,cF_vv,cF_ov, & + !$OMP v_ovov,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(1) + do a=1,nV + do i=1,nO + r1(i,a) = Fov(i,a) + do e=1,nV + do m=1,nO + r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e) + end do + end do + do f=1,nV + do n=1,nO + r1(i,a) = r1(i,a) - t1(n,f)*v_ovov(n,a,i,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !do a=1,nV + ! do i=1,nO + ! do e=1,nV + ! r1(i,a) = r1(i,a) + t1(i,e)*cF_vv(a,e) + ! end do + ! end do + !end do + call dgemm('N','T', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + cF_vv, size(cF_vv,1), & + 1d0, r1 , size(r1,1)) + + !do a=1,nV + ! do i=1,nO + ! do m=1,nO + ! r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i) + ! end do + ! end do + !end do + call dgemm('T','N', nO, nV, nO, & + -1d0, cF_oo, size(cF_oo,1), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + + !do a=1,nV + ! do i=1,nO + ! do f=1,nV + ! do e=1,nV + ! do m=1,nO + ! r1(i,a) = r1(i,a) - 0.5d0*t2(i,m,e,f)*v_ovvv(m,a,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + + !allocate(X_vovv(nV,nO,nV,nV)) + double precision, allocatable :: v_ovvf(:,:,:), X_vovf(:,:,:) + allocate(v_ovvf(nO,nV,nV),X_vovf(nV,nO,nV)) + + do f = 1, nV + call gen_v_spin_3idx(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovvf) + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,X_vovf,v_ovvf,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + + !$OMP DO collapse(3) + !do f = 1, nV + do e = 1, nV + do m = 1, nO + do a = 1, nV + !X_vovv(a,m,e,f) = v_ovvv(m,a,e,f) + X_vovf(a,m,e) = v_ovvf(m,a,e) + enddo + enddo + enddo + !enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','T', nO, nV, nO*nV, & + -0.5d0, t2(1,1,1,f), size(t2,1), & + X_vovf, size(X_vovf,1), & + 1d0 , r1 , size(r1,1)) + enddo + + !call dgemm('N','T', nO, nV, nO*nV*nV, & + ! -0.5d0, t2 , size(t2,1), & + ! X_vovv, size(X_vovv,1), & + ! 1d0 , r1 , size(r1,1)) + + deallocate(X_vovf) + !deallocate(X_vovv) + allocate(X_oovv(nO,nO,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,X_oovv, & + !$OMP f_o,f_v,v_oovo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + !do a=1,nV + ! do i=1,nO + ! do e=1,nV + ! do m=1,nO + ! do n=1,nO + ! r1(i,a) = r1(i,a) - 0.5d0*t2(m,n,a,e)*v_oovo(n,m,e,i) + ! end do + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(3) + do a = 1, nV + do e = 1, nV + do m = 1, nO + do n = 1, nO + X_oovv(n,m,e,a) = t2(m,n,a,e) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -0.5d0, v_oovo, size(v_oovo,1) * size(v_oovo,2) * size(v_oovo,3), & + X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + 1d0 , r1 , size(r1,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,X_oovv,f_o,f_v,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do a = 1, nV + do i = 1, nO + r1(i,a) = (f_o(i)-f_v(a)) * t1(i,a) - r1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_oovv) + +end +#+end_src + +** R2 +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: cF_oo(nO,nO) + double precision,intent(in) :: cF_ov(nO,nV) + double precision,intent(in) :: cF_vv(nV,nV) + double precision,intent(in) :: f_o(nO), f_v(nV) + double precision,intent(in) :: cW_oooo(nO,nO,nO,nO) + !double precision,intent(in) :: cW_vvvv(nV,nV,nV,nV) + double precision,intent(in) :: cW_ovvo(nO,nV,nV,nO) + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_ovoo(nO,nV,nO,nO) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_ovvo(nO,nV,nV,nO) + !double precision,intent(in) :: v_vvvo(nV,nV,nV,nO)!, v_vovv(nV,nO,nV,nV) + + double precision,intent(out) :: r2(nO,nO,nV,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: X_vvoo(:,:,:,:) + !double precision, allocatable :: A_vvov(:,:,:,:) + double precision, allocatable :: X_oovv(:,:,:,:), Y_oovv(:,:,:,:) + double precision, allocatable :: A_vvoo(:,:,:,:), B_ovoo(:,:,:,:), C_ovov(:,:,:,:) + double precision, allocatable :: A_ovov(:,:,:,:), B_ovvo(:,:,:,:), X_ovvo(:,:,:,:) + double precision, allocatable :: A_vv(:,:) + double precision, allocatable :: A_oo(:,:), B_oovv(:,:,:,:) + double precision, allocatable :: A_vbov(:,:,:), X_vboo(:,:,:), v_vbvo(:,:,:) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + ! r2(i,j,a,b) = v_oovv(i,j,a,b) + ! end do + ! end do + ! end do + !end do + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(i,j,a,e)*cF_vv(b,e) + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,j,b,e)*cF_vv(a,e) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(X_oovv(nO,nO,nV,nV)) + call dgemm('N','T',nO*nO*nV, nV, nV, & + 1d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + cF_VV , size(cF_vv,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = v_oovv(i,j,a,b) + X_oovv(i,j,a,b) - X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !deallocate(X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_vv(nV,nV))!, X_oovv(nO,nO,nV,nV)) + call dgemm('T','N', nV, nV, nO, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_vv , size(A_vv,1)) + + call dgemm('N','T', nO*nO*nV, nV, nV, & + 0.5d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + A_vv , size(A_vv,1), & + 0d0 , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vv)!,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j) + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(Y_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + !$OMP PARALLEL & + !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do i=1,nO + do m=1,nO + X_oovv(m,i,a,b) = t2(i,m,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nO*nV*nV, nO, & + 1d0, cF_oo , size(cF_oo,1), & + X_oovv, size(X_oovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(Y_oovv)!,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + + call dgemm('N','T', nO, nO, nV, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_oo , size(A_oo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(t2,B_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do i = 1, nO + do m = 1, nO + B_oovv(m,i,a,b) = t2(i,m,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nO*nV*nV, nO, & + 0.5d0, A_oo, size(A_oo,1), & + B_oovv, size(B_oovv,1), & + 0d0 , X_oovv, size(X_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(A_oo,B_oovv,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do n=1,nO + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(m,n,a,b)*cW_oooo(m,n,i,j) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + call dgemm('T','N', nO*nO, nV*nV, nO*nO, & + 0.5d0, cW_oooo, size(cW_oooo,1) * size(cW_oooo,2), & + tau , size(tau,1) * size(tau,2), & + 1d0 , r2 , size(r2,1) * size(r2,2)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do f=1,nV + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + !call dgemm('N','T', nO*nO, nV*nV, nV*nV, & + ! 0.5d0, tau , size(tau,1) * size(tau,2), & + ! cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2), & + ! 1d0 , r2 , size(r2,1) * size(r2,2)) + double precision :: ti,tf + call wall_time(ti) + call use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) + call wall_time(tf) + if (cc_dev) then + print*,'cW_vvvv:',tf-ti,'s' + endif + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) & + ! + t2(i,m,a,e)*cW_ovvo(m,b,e,j) & + ! - t2(j,m,a,e)*cW_ovvo(m,b,e,i) & + ! - t2(i,m,b,e)*cW_ovvo(m,a,e,j) & + ! + t2(j,m,b,e)*cW_ovvo(m,a,e,i) & + ! - t1(i,e)*t1(m,a)*v_ovvo(m,b,e,j) & + ! + t1(j,e)*t1(m,a)*v_ovvo(m,b,e,i) & + ! + t1(i,e)*t1(m,b)*v_ovvo(m,a,e,j) & + ! - t1(j,e)*t1(m,b)*v_ovvo(m,a,e,i) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_ovov(nO,nV,nO,nV), B_ovvo(nO,nV,nV,nO), X_ovvo(nO,nV,nV,nO)) + !$OMP PARALLEL & + !$OMP SHARED(t2,A_ovov,B_ovvo,cW_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do a = 1, nV + do i = 1, nO + do e = 1, nV + do m = 1, nO + A_ovov(m,e,i,a) = t2(i,m,a,e) + end do + end do + end do + end do + !$OMP END DO NOWAIT + !$OMP DO collapse(3) + do j = 1, nO + do b = 1, nV + do e = 1, nV + do m = 1, nO + B_ovvo(m,e,b,j) = cW_ovvo(m,b,e,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO*nV, nV*nO, nO*nV, & + 1d0, A_ovov, size(A_ovov,1) * size(A_ovov,2), & + B_ovvo, size(B_ovvo,1) * size(B_ovvo,2), & + 0d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = r2(i,j,a,b) + X_ovvo(i,a,b,j) - X_ovvo(j,a,b,i) & + - X_ovvo(i,b,a,j) + X_ovvo(j,b,a,i) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_ovov,B_ovvo,X_ovvo) + allocate(A_vvoo(nV,nV,nO,nO), B_ovoo(nO,nV,nO,nO), C_ovov(nO,nV,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(A_vvoo,v_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do m = 1, nO + do j = 1, nO + do b = 1, nV + do e = 1, nV + A_vvoo(e,b,j,m) = v_ovvo(m,b,e,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nV*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + A_vvoo, size(A_vvoo,1), & + 0d0, B_ovoo, size(B_ovoo,1)) + + call dgemm('N','N', nO*nV*nO, nV, nO, & + 1d0, B_ovoo, size(B_ovoo,1) * size(B_ovoo,2) * size(B_ovoo,3), & + t1 , size(t1,1), & + 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2) * size(C_ovov,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,C_ovov,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - C_ovov(i,b,j,a) + C_ovov(j,b,i,a) & + + C_ovov(i,a,j,b) - C_ovov(j,a,i,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vvoo, B_ovoo, C_ovov) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + t1(i,e)*v_vvvo(a,b,e,j) - t1(j,e)*v_vvvo(a,b,e,i) + ! end do + + ! end do + ! end do + ! end do + !end do + !allocate(A_vvov(nV,nV,nO,nV), X_vvoo(nV,nV,nO,nO)) + allocate(A_vbov(nV,nO,nV), X_vboo(nV,nO,nO), v_vbvo(nV,nV,nO)) + do b = 1, nV + + call gen_v_spin_3idx_i_kl(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, b, cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + nV,nV,nO, v_vbvo) + + !$OMP PARALLEL & + !$OMP SHARED(b,A_vbov,v_vbvo,nO,nV) & + !$OMP PRIVATE(i,j,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do e = 1, nV + do j = 1, nO + !do b = 1, nV + do a = 1, nV + !A_vvov(a,b,j,e) = v_vvvo(a,b,e,j) + A_vbov(a,j,e) = v_vbvo(a,e,j) + enddo + !enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','T', nV*nO, nO, nV, & + 1d0, A_vbov, size(A_vbov,1) * size(A_vbov,2), & + t1 , size(t1,1), & + 0d0, X_vboo, size(X_vboo,1) * size(X_vboo,2)) + !call dgemm('N','T', nV*nV*nO, nO, nV, & + ! 1d0, A_vvov, size(A_vvov,1) * size(A_vvov,2) * size(A_vvov,3), & + ! t1 , size(t1,1), & + ! 0d0, X_vvoo, size(X_vvoo,1) * size(X_vvoo,2) * size(X_vvoo,3)) + + !$OMP PARALLEL & + !$OMP SHARED(b,r2,X_vboo,nO,nV) & + !$OMP PRIVATE(i,j,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + !do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, NO + !r2(i,j,a,b ) = r2(i,j,a,b) + X_vvoo(a,b,j,i) - X_vvoo(a,b,i,j) + r2(i,j,a,b) = r2(i,j,a,b) + X_vboo(a,j,i) - X_vboo(a,i,j) + enddo + enddo + enddo + !enddo + !$OMP END DO + !$OMP END PARALLEL + enddo + + !deallocate(A_vvov)!,X_vvoo) + deallocate(A_vbov, X_vboo, v_vbvo) + allocate(X_vvoo(nV,nV,nO,nO)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t1(m,a)*v_ovoo(m,b,i,j) + t1(m,b)*v_ovoo(m,a,i,j) + ! end do + + ! end do + ! end do + ! end do + !end do + !allocate(X_vvoo(nV,nV,nO,nO)) + + call dgemm('T','N', nV, nV*nO*nO, nO, & + 1d0, t1 , size(t1,1), & + v_ovoo, size(v_ovoo,1), & + 0d0, X_vvoo, size(X_vvoo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_vvoo,f_o,f_v,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_vvoo(a,b,i,j) + X_vvoo(b,a,i,j) + end do + end do + end do + end do + !$OMP END DO + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = (f_o(i)+f_o(j)-f_v(a)-f_v(b)) * t2(i,j,a,b) - r2(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_vvoo) + +end +#+end_src + +* Use intermediates +** Use cF_oo +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2) + + implicit none + + integer,intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau_t(nO,nO,nV,nV) + double precision, intent(in) :: F_oo(nO,nV), F_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV), v_oovv(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_oo(:,:), X_oovv(:,:,:,:),Y_oovv(:,:,:,:) + integer :: i,j,m,a,b + + allocate(cF_oo(nO,nO)) + + call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) + + !do a=1,nV + ! do i=1,nO + ! do m=1,nO + ! r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i) + ! end do + ! end do + !end do + call dgemm('T','N', nO, nV, nO, & + -1d0, cF_oo, size(cF_oo,1), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j) + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i) + ! end do + + ! end do + ! end do + ! end do + !end do + + allocate(Y_oovv(nO,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + !$OMP PARALLEL & + !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do i=1,nO + do m=1,nO + X_oovv(m,i,a,b) = t2(i,m,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nO*nV*nV, nO, & + 1d0, cF_oo , size(cF_oo,1), & + X_oovv, size(X_oovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(cF_oo,X_oovv,Y_oovv) + +end +#+end_src + +** Use cF_ov +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: F_ov(nO,nV), v_oovv(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_ov(:,:), A_oo(:,:), A_vv(:,:) + double precision, allocatable :: X_oovv(:,:,:,:), B_oovv(:,:,:,:) + integer :: i,j,a,b,e,m + + allocate(cF_ov(nO,nV)) + + call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t2,cF_ov,nO,nV) & + !$OMP PRIVATE(i,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(1) + do a=1,nV + do i=1,nO + do e=1,nV + do m=1,nO + r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_vv(nV,nV), X_oovv(nO,nO,nV,nV)) + call dgemm('T','N', nV, nV, nO, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_vv , size(A_vv,1)) + + call dgemm('N','T', nO*nO*nV, nV, nV, & + 0.5d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + A_vv , size(A_vv,1), & + 0d0 , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,r2,X_oovv) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + + call dgemm('N','T', nO, nO, nV, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_oo , size(A_oo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(t2,B_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do i = 1, nO + do m = 1, nO + B_oovv(m,i,a,b) = t2(i,m,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nO*nV*nV, nO, & + 0.5d0, A_oo, size(A_oo,1), & + B_oovv, size(B_oovv,1), & + 0d0 , X_oovv, size(X_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(cF_ov,A_oo,B_oovv,X_oovv) + +end +#+end_src + +** Use cF_vv +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine use_cF_vv(nO,nV,t1,t2,r1,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_vv(:,:) + integer :: i,j,a,b,e,m + + allocate(cF_vv(nV,nV)) + + !call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,v_ovvv,cF_vv) + + deallocate(cF_vv) + +end +#+end_src + +** Use cW_vvvd +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: v_oovv(nO,nO,nV,nV) + !double precision, intent(in) :: v_vovv(nV,nO,nV,nV) + + double precision, intent(inout) :: r2(nO,nO,nV,nV) + + double precision, allocatable :: cW_vvvf(:,:,:), v_vvvf(:,:,:), tau_f(:,:,:), v_vovf(:,:,:) + integer :: i,j,e,f + double precision :: ti,tf + + allocate(cW_vvvf(nV,nV,nV),v_vvvf(nV,nV,nV),tau_f(nO,nO,nV),v_vovf(nV,nO,nV)) + + !PROVIDE cc_nVab + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do f=1,nV + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + do f = 1, nV + call wall_time(ti) + !$OMP PARALLEL & + !$OMP SHARED(tau,tau_f,f,nO,nV) & + !$OMP PRIVATE(i,j,e) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do e = 1, nV + do j = 1, nO + do i = 1, nO + tau_f(i,j,e) = tau(i,j,e,f) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'1st transpo', tf-ti + endif + + call wall_time(ti) + call gen_v_spin_3idx(cc_nV_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nV,nV,nV, v_vvvf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'vvvf', tf-ti + endif + call wall_time(ti) + call gen_v_spin_3idx(cc_nV_m,cc_nO_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nO_S,cc_nV_S,cc_nV_S, & + cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, & + nV,nO,nV, v_vovf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'vovf', tf-ti + endif + + call wall_time(ti) + call compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'cW_vvvf', tf-ti + endif + + call wall_time(ti) + call dgemm('N','T', nO*nO, nV*nV, nV, & + 0.5d0, tau_f , size(tau_f,1) * size(tau_f,2), & + cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2), & + 1d0 , r2 , size(r2,1) * size(r2,2)) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'last dgemm', tf-ti + endif + enddo + + deallocate(cW_vvvf,v_vvvf,v_vovf) + +end +#+end_src + +* Intermediates +** cF +*** cF_oo +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: tau_t(nO,nO,nV,nV) + double precision,intent(in) :: Foo(nO,nO) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: v_ooov(nO,nO,nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cF_oo(nO,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision,external :: Kronecker_Delta + + !$OMP PARALLEL & + !$OMP SHARED(cF_oo,Foo,t1,v_ooov,nO,nV) & + !$OMP PRIVATE(i,m,n,e) & + !$OMP DEFAULT(NONE) + + !do i=1,nO + ! do m=1,nO + ! cF_oo(m,i) = (1d0 - Kronecker_delta(m,i))*Foo(m,i) + ! end do + !end do + !$OMP DO collapse(1) + do i=1,nO + do m=1,nO + cF_oo(m,i) = Foo(m,i) + end do + end do + !$OMP END DO + !$OMP DO + do i = 1, nO + cF_oo(i,i) = 0d0 + end do + !$OMP END DO + + do e=1,nV + do n=1,nO + !$OMP DO collapse(1) + do i=1,nO + do m=1,nO + cF_oo(m,i) = cF_oo(m,i) + t1(n,e)*v_ooov(m,n,i,e) + end do + end do + !$OMP END DO + end do + end do + !$OMP END PARALLEL + + !do i=1,nO + ! do m=1,nO + ! do e=1,nV + ! cF_oo(m,i) = cF_oo(m,i) + 0.5d0*t1(i,e)*Fov(m,e) + ! end do + ! end do + !end do + call dgemm('N','T', nO, nO, nV,& + 0.5d0, Fov , size(Fov,1), & + t1 , size(t1,1), & + 1d0 , cF_oo, size(cF_oo,1)) + + !do i=1,nO + ! do m=1,nO + ! do f=1,nV + ! do e=1,nV + ! do n=1,nO + ! cF_oo(m,i) = cF_oo(m,i) + 0.5d0*tau_t(i,n,e,f)*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + call dgemm('N','T', nO, nO, nO*nV*nV, & + 0.5d0, v_oovv, size(v_oovv,1), & + tau_t , size(tau_t,1), & + 1d0 , cF_oo , size(cF_oo,1)) + +end +#+end_src + +*** cF_ov +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cF_ov(nO,nV,t1,Fov,v_oovv,cF_ov) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: Fov(nO,nV),v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cF_ov(nO,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + + !$OMP PARALLEL & + !$OMP SHARED(cF_ov,Fov,t1,v_oovv,nO,nV) & + !$OMP PRIVATE(i,a,m,n,e,f) & + !$OMP DEFAULT(NONE) + + !cF_ov = Fov + + !$OMP DO collapse(1) + do e=1,nV + do m=1,nO + cF_ov(m,e) = Fov(m,e) + do f=1,nV + do n=1,nO + cF_ov(m,e) = cF_ov(m,e) + t1(n,f)*v_oovv(m,n,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +*** cF_vv +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: tau_t(nO,nO,nV,nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: Fvv(nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: cF_vv(nV,nV) + + double precision, allocatable :: v_ovfv(:,:,:),X_ovfv(:,:,:) + integer :: i,j,m,n + integer :: a,b,e,f + + !$OMP PARALLEL & + !$OMP SHARED(cF_vv,Fvv,nO,nV) & + !$OMP PRIVATE(e,a) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do e=1,nV + do a=1,nV + cF_vv(a,e) = Fvv(a,e) + end do + end do + !$OMP END DO + !$OMP DO + do e = 1, nV + cF_vv(e,e) = 0d0 + enddo + !$OMP END DO + !$OMP END PARALLEL + + !do e=1,nV + ! do a=1,nV + ! do m=1,nO + ! cF_vv(a,e) = cF_vv(a,e) - 0.5d0*t1(m,a)*Fov(m,e) + ! end do + ! end do + !end do + call dgemm('T','N', nV, nV, nO, & + -0.5d0, t1 , size(t1,1), & + Fov , size(Fov,1), & + 1d0 , cF_vv, size(cF_vv,1)) + + !do e=1,nV + ! do a=1,nV + ! do m=1,nO + ! do f=1,nV + ! cF_vv(a,e) = cF_vv(a,e) + t1(m,f)*v_ovvv(m,a,f,e) + ! end do + ! end do + ! end do + !end do + allocate(v_ovfv(nO,nV,nV),X_ovfv(nO,nV,nV)) + do f = 1, nV + + call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovfv) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,v_ovfv,X_ovfv,f) & + !$OMP PRIVATE(m,a,e) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do e = 1, nV + do a = 1, nV + do m = 1, nO + !X_ovfv(m,a,e) = v_ovvv(m,a,f,e) + X_ovfv(m,a,e) = v_ovfv(m,a,e) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemv('T', nO, nV*nV, & + !1d0, v_ovvv(:,:,f,:), size(v_ovvv,1), & + 1d0, X_ovfv, size(X_ovfv,1), & + t1(1,f), 1, & + 1d0, cF_vv, 1) + enddo + deallocate(v_ovfv,X_ovfv) + + !do e=1,nV + ! do a=1,nV + ! do f=1,nV + ! do n=1,nO + ! do m=1,nO + ! cF_vv(a,e) = cF_vv(a,e) - 0.5d0*tau_t(m,n,a,f)*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + do f = 1, nV + call dgemm('T','N', nV, nV, nO*nO,& + -0.5d0, tau_t(1,1,1,f) , size(tau_t,1) * size(tau_t,2), & + v_oovv(1,1,1,f), size(v_oovv,1) * size(v_oovv,2), & + 1d0 , cF_vv, size(cF_vv,1)) + enddo + +end +#+end_src + +** cW +*** cW_oooo +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oooo(nO,nO,nO,nO) + double precision,intent(in) :: v_ooov(nO,nO,nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cW_oooo(nO,nO,nO,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: X_oooo(:,:,:,:) + + ! oooo block + + !cW_oooo = v_oooo + + !do j=1,nO + ! do i=1,nO + ! do n=1,nO + ! do m=1,nO + + ! do e=1,nV + ! cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + t1(j,e)*v_ooov(m,n,i,e) - t1(i,e)*v_ooov(m,n,j,e) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(X_oooo(nO,nO,nO,nO)) + + call dgemm('N','T', nO*nO*nO, nO, nV, & + 1d0, v_ooov, size(v_ooov,1) * size(v_ooov,2) * size(v_ooov,3), & + t1 , size(t1,1), & + 0d0, X_oooo, size(X_oooo,1) * size(X_oooo,1) * size(X_oooo,3)) + !$OMP PARALLEL & + !$OMP SHARED(cW_oooo,v_oooo,X_oooo,nO,nV) & + !$OMP PRIVATE(i,j,m,n) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do j=1,nO + do i=1,nO + do n=1,nO + do m=1,nO + cW_oooo(m,n,i,j) = v_oooo(m,n,i,j) + X_oooo(m,n,i,j) - X_oooo(m,n,j,i) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_oooo) + + !do m=1,nO + ! do n=1,nO + ! do i=1,nO + ! do j=1,nO + ! + ! do e=1,nV + ! do f=1,nV + ! cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + 0.25d0*tau(i,j,e,f)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + call dgemm('N','T', nO*nO, nO*nO, nV*nV, & + 0.25d0, v_oovv , size(v_oovv,1) * size(v_oovv,2), & + tau , size(tau,1) * size(tau,2), & + 1.d0 , cW_oooo, size(cW_oooo,1) * size(cW_oooo,2)) + +end +#+end_src + +*** cW_ovvo +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovo(nO,nO,nV,nO) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_ovvo(nO,nV,nV,nO) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: cW_ovvo(nO,nV,nV,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: A_oovo(:,:,:,:), B_vovo(:,:,:,:) + double precision, allocatable :: A_voov(:,:,:,:), B_voov(:,:,:,:), C_ovov(:,:,:,:) + double precision, allocatable :: v_ovev(:,:,:), cW_oveo(:,:,:) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,v_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do j = 1, nO + do b = 1, nV + do a = 1, nV + do i = 1, nO + cW_ovvo(i,a,b,j) = v_ovvo(i,a,b,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !do m=1,nO + ! do b=1,nV + ! do e=1,nV + ! do j=1,nO + ! do f=1,nV + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + t1(j,f)*v_ovvv(m,b,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + allocate(v_ovev(nO,nV,nV),cW_oveo(nO,nV,nO)) + do e = 1, nV + + call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, e, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovev) + + call dgemm('N','T', nO*nV, nO, nV, & + 1.d0, v_ovev , size(v_ovev,1) * size(v_ovev,2), & + t1 , size(t1,1), & + 0.d0, cW_oveo, size(cW_oveo,1) * size(cW_oveo,2)) + !$OMP PARALLEL & + !$OMP SHARED(e,cW_ovvo,cW_oveo,nO,nV) & + !$OMP PRIVATE(m,b,j) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do j = 1, nO + do b = 1, nV + do m = 1, nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + cW_oveo(m,b,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + enddo + deallocate(v_ovev,cW_oveo) + !call dgemm('N','T', nO*nV*nV, nO, nV, & + ! 1.d0, v_ovvv , size(v_ovvv,1) * size(v_ovvv,2) * size(v_ovvv,3), & + ! t1 , size(t1,1), & + ! 1.d0, cW_ovvo, size(cW_ovvo,1) * size(cW_ovvo,2) * size(cW_ovvo,3)) + + !do j=1,nO + ! do e=1,nV + ! do b=1,nV + ! do m=1,nO + ! do n=1,nO + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - t1(n,b)*v_oovo(m,n,e,j) + ! end do + ! end do + ! end do + ! end do + !end do + + allocate(A_oovo(nO,nO,nV,nO), B_vovo(nV,nO,nV,nO)) + + !$OMP PARALLEL & + !$OMP SHARED(A_oovo,v_oovo,nO,nV) & + !$OMP PRIVATE(j,e,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j=1,nO + do e=1,nV + do m=1,nO + do n=1,nO + A_oovo(n,m,e,j) = v_oovo(m,n,e,j) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nO*nV*nO, nO, & + 1d0, t1 , size(t1,1), & + A_oovo, size(A_oovo,1), & + 0d0, B_vovo, size(B_vovo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,B_vovo,nO,nV) & + !$OMP PRIVATE(j,e,m,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j=1,nO + do e=1,nV + do b=1,nV + do m=1,nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - B_vovo(b,m,e,j) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(A_oovo,B_vovo) + + !do j=1,nO + ! do e=1,nV + ! do b=1,nV + ! do m=1,nO + ! do f=1,nV + ! do n=1,nO + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) & + ! - ( 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b) )*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + ! end do + !end do + allocate(A_voov(nV,nO,nO,nV), B_voov(nV,nO,nO,nV), C_ovov(nO,nV,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,A_voov,B_voov,v_oovv,t2,t1) & + !$OMP PRIVATE(f,n,m,e,j,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do j = 1, nO + do n = 1, nO + do f = 1, nV + A_voov(f,n,j,b) = 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP DO collapse(3) + do e = 1, nV + do m = 1, nO + do n = 1, nO + do f = 1, nV + B_voov(f,n,m,e) = v_oovv(m,n,e,f) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO*nV, nV*nO, nV*nO, & + 1d0, A_voov, size(A_voov,1) * size(A_voov,2), & + B_voov, size(B_voov,1) * size(B_voov,2), & + 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2)) + + deallocate(A_voov,B_voov) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,C_ovov,nO,nV) & + !$OMP PRIVATE(j,e,m,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j = 1, nO + do e = 1, nV + do b = 1, nV + do m = 1, nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - C_ovov(j,b,m,e) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(C_ovov) + +end +#+end_src + +*** cW_vvvv +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_vovv(nV,nO,nV,nV) + double precision,intent(in) :: v_vvvv(nV,nV,nV,nV) + + double precision,intent(out) :: cW_vvvv(nV,nV,nV,nV) + + integer :: i,j,m,n + integer :: a,b,c,d,e,f + double precision, allocatable :: A_ovvv(:,:,:,:), B_vvvv(:,:,:,:) + + allocate(A_ovvv(nO,nV,nV,nV), B_vvvv(nV,nV,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvv,A_ovvv,v_vovv,v_vvvv) & + !$OMP PRIVATE(a,b,c,d,e,f,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do d = 1, nV + do c = 1, nV + do b = 1, nV + do a = 1, nV + cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !do f=1,nV + ! do e=1,nV + ! do b=1,nV + ! do a=1,nV + ! do m=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + !$OMP DO collapse(3) + do f=1,nV + do e=1,nV + do a=1,nV + do m=1,nO + A_ovvv(m,a,e,f) = v_vovv(a,m,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nV*nV*nV, nO, & + 1d0, t1 , size(t1,1), & + A_ovvv, size(A_ovvv,1), & + 0d0, B_vvvv, size(B_vvvv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvv,B_vvvv) & + !$OMP PRIVATE(a,b,c,d,e,f,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do f=1,nV + do e=1,nV + do b=1,nV + do a=1,nV + cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_ovvv,B_vvvv) + + !do a=1,nV + ! do b=1,nV + ! do e=1,nV + ! do f=1,nV + ! + ! do m=1,nO + ! do n=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + call dgemm('T','N', nV*nV, nV*nV, nO*nO, & + 0.25d0, tau , size(tau,1) * size(tau,2), & + v_oovv , size(v_oovv,1) * size(v_oovv,2), & + 1.d0 , cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2)) + +end +#+end_src + +*** cW_vvvf +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) + + implicit none + + integer,intent(in) :: nO,nV,f + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_vovf(nV,nO,nV) + double precision,intent(in) :: v_vvvf(nV,nV,nV) + + double precision,intent(out) :: cW_vvvf(nV,nV,nV) + + integer :: i,j,m,n + integer :: a,b,c,d,e + double precision, allocatable :: A_ovvf(:,:,:), B_vvvf(:,:,:), v_oovf(:,:,:) + double precision :: ti,tf + + allocate(A_ovvf(nO,nV,nV), B_vvvf(nV,nV,nV)) + allocate(v_oovf(nO,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvf,A_ovvf,v_vovf,v_vvvf,f) & + !$OMP PRIVATE(a,b,c,d,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + !cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d) + cW_vvvf(a,b,c) = v_vvvf(a,b,c) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !do f=1,nV + ! do e=1,nV + ! do b=1,nV + ! do a=1,nV + ! do m=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(2) + do e=1,nV + do a=1,nV + do m=1,nO + !A_ovvv(m,a,e,f) = v_vovv(a,m,e,f) + !A_ovvf(m,a,e) = v_vovv(a,m,e,f) + A_ovvf(m,a,e) = v_vovf(a,m,e) + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nV*nV, nO, & + 1d0, t1 , size(t1,1), & + A_ovvf, size(A_ovvf,1), & + 0d0, B_vvvf, size(B_vvvf,1)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvf,B_vvvf,v_oovf,v_oovv,f) & + !$OMP PRIVATE(a,b,c,d,e,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do e=1,nV + do b=1,nV + do a=1,nV + !cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f) + cW_vvvf(a,b,e) = cW_vvvf(a,b,e) - B_vvvf(b,a,e) + B_vvvf(a,b,e) + end do + end do + end do + !$OMP END DO NOWAIT + + !deallocate(A_ovvf,B_vvvf) + + !do a=1,nV + ! do b=1,nV + ! do e=1,nV + ! do f=1,nV + ! + ! do m=1,nO + ! do n=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(2) + do e = 1, nV + do n = 1, nO + do m = 1, nO + v_oovf(m,n,e) = v_oovv(m,n,e,f) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV*nV, nV, nO*nO, & + 0.25d0, tau , size(tau,1) * size(tau,2), & + v_oovf , size(v_oovf,1) * size(v_oovf,2), & + 1.d0 , cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2)) + + deallocate(v_oovf) + deallocate(A_ovvf,B_vvvf) + +end +#+end_src + diff --git a/src/ccsd/org/ccsd_t_space_orb.org b/src/ccsd/org/ccsd_t_space_orb.org new file mode 100644 index 00000000..8709d7be --- /dev/null +++ b/src/ccsd/org/ccsd_t_space_orb.org @@ -0,0 +1,428 @@ +Ref: +Integral-Direct and Parallel Implementation of the CCSD(T) Method: +Algorithmic Developments and Large-Scale Applications +László Gyevi-Nagy, Mihály Kállay, and Péter R. Nagy +J. Chem. Theory Comput. 2020, 16, 1, 366–384 +https://doi.org/10.1021/acs.jctc.9b00957 + +* Dumb way +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine ccsd_par_t_space(nO,nV,t1,t2,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + integer :: i,j,k,a,b,c + + allocate(W(nO,nO,nO,nV,nV,nV)) + allocate(V(nO,nO,nO,nV,nV,nV)) + + call form_w(nO,nV,t2,W) + call form_v(nO,nV,t1,W,V) + + energy = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + + energy = energy / 3d0 + + deallocate(V,W) +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine form_w(nO,nV,t2,W) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: W(nO, nO, nO, nV, nV, nV) + + integer :: i,j,k,l,a,b,c,d + + W = 0d0 + do c = 1, nV + print*,'W:',c,'/',nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + + do d = 1, nV + W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + ! chem (bd|ai) + ! phys + + cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & + + cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj + + cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik + + cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij + + cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj + + cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik + enddo + + do l = 1, nO + W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + ! chem (ck|jl) + ! phys + - cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & + - cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj + - cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik + - cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij + - cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj + - cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik + enddo + + enddo + enddo + enddo + enddo + enddo + enddo + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine form_v(nO,nV,t1,w,v) + +implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: W(nO, nO, nO, nV, nV, nV) + double precision, intent(out) :: V(nO, nO, nO, nV, nV, nV) + + integer :: i,j,k,a,b,c + + V = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + + cc_space_v_vvoo(b,c,j,k) * t1(i,a) & + + cc_space_v_vvoo(a,c,i,k) * t1(j,b) & + + cc_space_v_vvoo(a,b,i,j) * t1(k,c) + enddo + enddo + enddo + enddo + enddo + enddo + +end +#+END_SRC + +* Better way +** Main +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + double precision, allocatable :: W_ijk(:,:,:), V_ijk(:,:,:) + double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:) + double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) + integer :: i,j,k,l,a,b,c,d + double precision :: e,ta,tb, delta, delta_ijk + + !allocate(W(nV,nV,nV,nO,nO,nO)) + !allocate(V(nV,nV,nV,nO,nO,nO)) + allocate(W_ijk(nV,nV,nV), V_ijk(nV,nV,nV)) + allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO)) + allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO)) + + ! Temporary arrays + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, & + !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & + !$OMP PRIVATE(a,b,c,d,i,j,k,l) & + !$OMP DEFAULT(NONE) + + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) + + !$OMP DO collapse(3) + do i = 1, nO + do a = 1, nV + do b = 1, nV + do d = 1, nV + X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do d = 1, nV + T_vvoo(d,c,k,j) = t2(k,j,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vooo(c,j,k,l) * t2(i,l,a,b) & + !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + + !$OMP DO collapse(3) + do k = 1, nO + do j = 1, nO + do c = 1, nV + do l = 1, nO + X_ovoo(l,c,j,k) = v_vooo(c,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do l = 1, nO + T_ovvo(l,a,b,i) = t2(i,l,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vvoo(b,c,j,k) * t1(i,a) & + !X_vvoo(b,c,k,j) * T1_vo(a,i) & + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do b = 1, nV + X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(1) + do i = 1, nO + do a = 1, nV + T_vo(a,i) = t1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(ta) + energy = 0d0 + do i = 1, nO + do j = 1, nO + do k = 1, nO + delta_ijk = f_o(i) + f_o(j) + f_o(k) + call form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_ijk) + call form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,W_ijk,V_ijk) + !$OMP PARALLEL & + !$OMP SHARED(energy,nV,i,j,k,W_ijk,V_ijk,f_o,f_v,delta_ijk) & + !$OMP PRIVATE(a,b,c,e,delta) & + !$OMP DEFAULT(NONE) + e = 0d0 + !$OMP DO + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta = 1d0 / (delta_ijk - f_v(a) - f_v(b) - f_v(c)) + !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + e = e + (4d0 * W_ijk(a,b,c) + W_ijk(b,c,a) + W_ijk(c,a,b)) & + * (V_ijk(a,b,c) - V_ijk(c,b,a)) * delta + enddo + enddo + enddo + !$OMP END DO + !$OMP CRITICAL + energy = energy + e + !$OMP END CRITICAL + !$OMP END PARALLEL + enddo + enddo + call wall_time(tb) + write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' + enddo + + energy = energy / 3d0 + + deallocate(W_ijk,V_ijk,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) + !deallocate(V,W) +end +#+END_SRC + +** W_ijk +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) + + implicit none + + integer, intent(in) :: nO,nV,i,j,k + !double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) + double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: W(nV,nV,nV)!,nO,nO,nO) + + integer :: l,a,b,c,d + + !W = 0d0 + !do i = 1, nO + ! do j = 1, nO + ! do k = 1, nO + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) & + !$OMP PRIVATE(a,b,c,d,l) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + W(a,b,c) = 0d0 + + do d = 1, nV + !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + W(a,b,c) = W(a,b,c) & + ! chem (bd|ai) + ! phys + !+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj + !+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik + !+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij + !+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj + !+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik + + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & + + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & ! bc kj + + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & ! prev ac ik + + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & ! prev ab ij + + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & ! prev bc kj + + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) ! prev ac ik + enddo + + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + + do l = 1, nO + !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + W(a,b,c) = W(a,b,c) & + ! chem (ck|jl) + ! phys + !- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & + !- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj + !- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik + !- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij + !- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj + !- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik + - X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + - X_ovoo(l,b,k,j) * T_ovvo(l,a,c,i) & ! bc kj + - X_ovoo(l,b,i,j) * T_ovvo(l,c,a,k) & ! prev ac ik + - X_ovoo(l,a,j,i) * T_ovvo(l,c,b,k) & ! prev ab ij + - X_ovoo(l,a,k,i) * T_ovvo(l,b,c,j) & ! prev bc kj + - X_ovoo(l,c,i,k) * T_ovvo(l,b,a,j) ! prev ac ik + enddo + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! enddo + ! enddo + !enddo + +end +#+END_SRC + +** V_ijk +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,w,v) + +implicit none + + integer, intent(in) :: nO,nV,i,j,k + !double precision, intent(in) :: t1(nO,nV) + double precision, intent(in) :: T_vo(nV,nO) + double precision, intent(in) :: X_vvoo(nV,nV,nO,nO) + double precision, intent(in) :: W(nV,nV,nV)!,nO,nO,nO) + double precision, intent(out) :: V(nV,nV,nV)!,nO,nO,nO) + + integer :: a,b,c + + !V = 0d0 + !do i = 1, nO + ! do j = 1, nO + ! do k = 1, nO + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,i,j,k,T_vo,X_vvoo,W,V) & + !$OMP PRIVATE(a,b,c) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + V(a,b,c) = W(a,b,c) & + !+ cc_space_v_vvoo(b,c,j,k) * t1(i,a) & + !+ cc_space_v_vvoo(a,c,i,k) * t1(j,b) & + !+ cc_space_v_vvoo(a,b,i,j) * t1(k,c) + + X_vvoo(b,c,k,j) * T_vo(a,i) & + + X_vvoo(a,c,k,i) * T_vo(b,j) & + + X_vvoo(a,b,j,i) * T_vo(c,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! enddo + ! enddo + !enddo + +end +#+END_SRC diff --git a/src/ccsd/org/ccsd_t_spin_orb.org b/src/ccsd/org/ccsd_t_spin_orb.org new file mode 100644 index 00000000..c9a41abd --- /dev/null +++ b/src/ccsd/org/ccsd_t_spin_orb.org @@ -0,0 +1,385 @@ +* CCSD(T) spin orb + +Ref: +John D. Watts, Jürgen Gauss, and Rodney J. Bartlett +J. Chem. Phys. 98, 8718 (1993) +http://dx.doi.org/10.1063/1.464480 + +** v1 +#+begin_src f90 :comments org :tangle ccsd_t_spin_orb.irp.f +subroutine ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_vvvo(nV,nV,nV,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: t3(:,:,:,:,:,:), s(:,:) + double precision :: e_t, e_st, e_dt, delta_abc, delta + integer :: i,j,k,l,m,a,b,c,d,e + + allocate(t3(nO,nO,nO,nV,nV,nV), s(nO,nV)) + + t3 = 0d0 + + ! T3 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + do e = 1, nV + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) & + + t2(j,k,a,e) * v_vvvo(b,c,e,i) & + - t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij) + - t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik) + - t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab) + - t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac) + + t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab) + + t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac) + + t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab) + + t2(j,i,c,e) * v_vvvo(b,a,e,k) ! + P(ik) P(ac) + enddo + do m = 1, nO + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) & + + t2(m,i,b,c) * v_ooov(j,k,m,a) & + - t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij) + - t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik) + - t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab) + - t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac) + + t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab) + + t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac) + + t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab) + + t2(m,k,b,a) * v_ooov(j,i,m,c) ! + P(ik) P(ac) + enddo + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) * (1d0 / delta) + enddo + enddo + enddo + enddo + enddo + enddo + + + ! E_T + e_t = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + e_t = e_t + t3(i,j,k,a,b,c) * delta * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + e_t = e_t / 36d0 + + ! E_ST + s = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + + e_st = 0d0 + do a = 1, nV + do i = 1, nO + e_st = e_st + s(i,a) * t1(i,a) + enddo + enddo + e_st = e_st * 0.25d0 + + ! E_DT + e_dt = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + e_dt = e_dt + t2(i,j,a,b) * f_ov(k,c) * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + e_dt = e_dt * 0.25d0 + + ! (T) + !print*,e_t,e_st,e_dt + energy = e_t + e_st + e_dt + + deallocate(t3,s) + +end +#+end_src + +** v2 +#+begin_src f90 :comments org :tangle ccsd_t_spin_orb.irp.f +subroutine ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: t3_bc(:,:,:,:), s(:,:), e_t(:), e_dt(:) + double precision, allocatable :: A_vovv(:,:,:,:), v_vvvo(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), B_ooov(:,:,:,:) + double precision :: e_st, delta_abc, delta, ta, tb + integer :: i,j,k,l,m,a,b,c,d,e + + allocate(t3_bc(nO,nO,nO,nV), s(nO,nV), e_t(nV), e_dt(nV)) + allocate(A_vovv(nV,nO,nV,nV),v_vvvo(nV,nV,nV,nO),T_voov(nV,nO,nO,nV),B_ooov(nO,nO,nO,nV)) + + call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & + cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + nV,nV,nV,nO, v_vvvo) + + ! Init + s = 0d0 + e_t = 0d0 + e_st = 0d0 + e_dt = 0d0 + + call wall_time(ta) + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,m,a,b,c,e) & + !$OMP SHARED(A_vovv,ta,tb,t3_bc,s,e_t,e_st,e_dt,t2,v_vvvo,v_ooov, & + !$OMP v_vvoo,f_o,f_v,f_ov,delta,delta_abc,nO,nV,T_voov,B_ooov) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do c = 1, nV + do b = 1, nV + do i = 1, nO + do e = 1, nV + A_vovv(e,i,b,c) = v_vvvo(b,c,e,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$omp do collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do e = 1, nV + T_voov(e,j,k,a) = t2(j,k,a,e) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do m = 1, nO + B_ooov(m,j,k,a) = v_ooov(j,k,m,a) + enddo + enddo + enddo + enddo + !$omp end do + + do c = 1, nV + do b = 1, nV + + ! T3(:,:,:,:,b,c) + ! Init + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + t3_bc(i,j,k,a) = 0d0 + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + do e = 1, nV + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) & + !+ t2(j,k,a,e) * v_vvvo(b,c,e,i) & + !- t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij) + !- t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik) + !- t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab) + !- t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac) + !+ t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab) + !+ t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac) + !+ t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab) + !+ t2(j,i,c,e) * v_vvvo(b,a,e,k) ! + P(ik) P(ac) + + T_voov(e,j,k,a) * A_vovv(e,i,b,c) & + - T_voov(e,i,k,a) * A_vovv(e,j,b,c) & ! - P(ij) + - T_voov(e,j,i,a) * A_vovv(e,k,b,c) & ! - P(ik) + - T_voov(e,j,k,b) * A_vovv(e,i,a,c) & ! - P(ab) + - T_voov(e,j,k,c) * A_vovv(e,i,b,a) & ! - P(ac) + + T_voov(e,i,k,b) * A_vovv(e,j,a,c) & ! + P(ij) P(ab) + + T_voov(e,i,k,c) * A_vovv(e,j,b,a) & ! + P(ij) P(ac) + + T_voov(e,j,i,b) * A_vovv(e,k,a,c) & ! + P(ik) P(ab) + + T_voov(e,j,i,c) * A_vovv(e,k,b,a) ! + P(ik) P(ac) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + do m = 1, nO + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) & + !+ t2(m,i,b,c) * v_ooov(j,k,m,a) & + !- t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij) + !- t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik) + !- t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab) + !- t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac) + !+ t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab) + !+ t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac) + !+ t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab) + !+ t2(m,k,b,a) * v_ooov(j,i,m,c) ! + P(ik) P(ac) + + t2(m,i,b,c) * B_ooov(m,j,k,a) & + - t2(m,j,b,c) * B_ooov(m,i,k,a) & ! - P(ij) + - t2(m,k,b,c) * B_ooov(m,j,i,a) & ! - P(ik) + - t2(m,i,a,c) * B_ooov(m,j,k,b) & ! - P(ab) + - t2(m,i,b,a) * B_ooov(m,j,k,c) & ! - P(ac) + + t2(m,j,a,c) * B_ooov(m,i,k,b) & ! + P(ij) P(ab) + + t2(m,j,b,a) * B_ooov(m,i,k,c) & ! + P(ij) P(ac) + + t2(m,k,a,c) * B_ooov(m,j,i,b) & ! + P(ik) P(ab) + + t2(m,k,b,a) * B_ooov(m,j,i,c) ! + P(ik) P(ac) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) * (1d0 / delta) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! E_T + !$OMP DO + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + e_t(a) = e_t(a) + t3_bc(i,j,k,a) * delta * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + ! E_ST + !$OMP DO + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + ! E_DT + !$OMP DO + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + e_dt(a) = e_dt(a) + t2(i,j,a,b) * f_ov(k,c) * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO + enddo + !$OMP MASTER + call wall_time(tb) + write(*,'(A1,F6.2,A5,F10.2,A2)') ' ', dble(c)/dble(nV)*100d0, '% in ', tb-ta, ' s' + !$OMP END MASTER + enddo + !$OMP END PARALLEL + + do a = 2, nV + e_t(1) = e_t(1) + e_t(a) + enddo + + do a = 2, nV + e_dt(1) = e_dt(1) + e_dt(a) + enddo + + e_t = e_t / 36d0 + + do a = 1, nV + do i = 1, nO + e_st = e_st + s(i,a) * t1(i,a) + enddo + enddo + e_st = e_st * 0.25d0 + + e_dt = e_dt * 0.25d0 + + ! (T) + !print*,e_t(1),e_st,e_dt(1) + energy = e_t(1) + e_st + e_dt(1) + + deallocate(t3_bc,s) + +end +#+end_src