From a4834d0acee2b18820a9efea2a090e6c9e804c33 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 13 Jun 2023 16:01:05 +0200 Subject: [PATCH 01/74] Allow merge with master --- src/davidson_keywords/usef.irp.f | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/davidson_keywords/usef.irp.f b/src/davidson_keywords/usef.irp.f index fed2ba9b..7ca2d203 100644 --- a/src/davidson_keywords/usef.irp.f +++ b/src/davidson_keywords/usef.irp.f @@ -13,7 +13,9 @@ BEGIN_PROVIDER [ integer, nthreads_davidson ] character*(32) :: env call getenv('QP_NTHREADS_DAVIDSON',env) if (trim(env) /= '') then + call lock_io read(env,*) nthreads_davidson + call unlock_io call write_int(6,nthreads_davidson,'Target number of threads for ') endif END_PROVIDER From 71f6163c40d70b4f35bd65f221f4da7b370149df Mon Sep 17 00:00:00 2001 From: eginer Date: Sun, 18 Jun 2023 20:28:48 +0200 Subject: [PATCH 02/74] added some comments for normal ordering old --- src/tc_bi_ortho/normal_ordered_old.irp.f | 10 +++++- src/tc_bi_ortho/test_normal_order.irp.f | 43 ++++++++++++++++++++---- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered_old.irp.f b/src/tc_bi_ortho/normal_ordered_old.irp.f index 417580dd..6ee21a14 100644 --- a/src/tc_bi_ortho/normal_ordered_old.irp.f +++ b/src/tc_bi_ortho/normal_ordered_old.irp.f @@ -120,6 +120,13 @@ END_PROVIDER subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) use bitmasks ! you need to include the bitmasks_module.f90 features + BEGIN_DOC +! give the contribution for a double excitation of opposite spin BUT averaged over spin +! +! it is the average of and +! +! because the orbitals h1,h2,p1,p2 are spatial orbitals and therefore can be of different spins + END_DOC implicit none integer, intent(in) :: Nint, h1, h2, p1, p2 @@ -158,7 +165,8 @@ subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) int_exc_12 = -1.d0 * integral - hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) + hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) ! spin average +! hthree += 1.d0 * int_direct - 1.0d0 * (int_exc_13 + int_exc_12) enddo return diff --git a/src/tc_bi_ortho/test_normal_order.irp.f b/src/tc_bi_ortho/test_normal_order.irp.f index cb0c355c..ac84dbc6 100644 --- a/src/tc_bi_ortho/test_normal_order.irp.f +++ b/src/tc_bi_ortho/test_normal_order.irp.f @@ -20,7 +20,7 @@ subroutine test integer :: h1,h2,p1,p2,s1,s2,i_ok,degree,Ne(2) integer :: exc(0:2,2,2) integer(bit_kind), allocatable :: det_i(:,:) - double precision :: hmono,htwoe,hthree,htilde_ij,accu,phase,normal + double precision :: hmono,htwoe,hthree,htilde_ij,accu,phase,normal,hthree_tmp integer, allocatable :: occ(:,:) allocate( occ(N_int*bit_kind_size,2) ) call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) @@ -32,15 +32,44 @@ subroutine test do p1 = elec_alpha_num+1, mo_num do h2 = 1, elec_beta_num do p2 = elec_beta_num+1, mo_num + hthree = 0.d0 + det_i = ref_bitmask + s1 = 1 + s2 = 2 call do_single_excitation(det_i,h1,p1,s1,i_ok) + if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) - call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + if(i_ok.ne.1)cycle + call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - hthree *= phase -! !normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1) - call three_comp_two_e_elem(det_i,h1,h2,p1,p2,s1,s2,normal) + hthree_tmp *= phase + hthree += 0.5d0 * hthree_tmp + det_i = ref_bitmask + s1 = 2 + s2 = 1 + call do_single_excitation(det_i,h1,p1,s1,i_ok) + if(i_ok.ne.1)cycle + call do_single_excitation(det_i,h2,p2,s2,i_ok) + if(i_ok.ne.1)cycle + call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) + call get_excitation_degree(ref_bitmask,det_i,degree,N_int) + call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) + hthree_tmp *= phase + hthree += 0.5d0 * hthree_tmp + + +! normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1) + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, normal) + if(dabs(hthree).lt.1.d-10)cycle + if(dabs(hthree-normal).gt.1.d-10)then +! print*,pp2,pp1,hh2,hh1 + print*,p2,p1,h2,h1 + print*,hthree,normal,dabs(hthree-normal) + stop + endif +! call three_comp_two_e_elem(det_i,h1,h2,p1,p2,s1,s2,normal) ! normal = eff_2_e_from_3_e_ab(p2,p1,h2,h1) accu += dabs(hthree-normal) enddo @@ -73,8 +102,8 @@ do h1 = 1, elec_alpha_num integer :: hh1, pp1, hh2, pp2, ss1, ss2 call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) hthree *= phase -! normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) - normal = eff_2_e_from_3_e_aa(p2,p1,h2,h1) + normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) +! normal = eff_2_e_from_3_e_aa(p2,p1,h2,h1) if(dabs(hthree).lt.1.d-10)cycle if(dabs(hthree-normal).gt.1.d-10)then print*,pp2,pp1,hh2,hh1 From b2e44beb3e11cacc9594a58da5c7ed4295506092 Mon Sep 17 00:00:00 2001 From: eginer Date: Sun, 18 Jun 2023 21:42:40 +0200 Subject: [PATCH 03/74] added casscf_cipsi --- src/casscf_cipsi/50.casscf.bats | 49 +++ src/casscf_cipsi/EZFIO.cfg | 75 ++++ src/casscf_cipsi/NEED | 5 + src/casscf_cipsi/README.rst | 5 + src/casscf_cipsi/bavard.irp.f | 6 + src/casscf_cipsi/bielec.irp.f | 155 +++++++ src/casscf_cipsi/bielec_natorb.irp.f | 369 +++++++++++++++++ src/casscf_cipsi/casscf.irp.f | 110 +++++ src/casscf_cipsi/class.irp.f | 12 + src/casscf_cipsi/dav_sx_mat.irp.f | 45 +++ src/casscf_cipsi/densities.irp.f | 67 +++ src/casscf_cipsi/densities_peter.irp.f | 150 +++++++ src/casscf_cipsi/det_manip.irp.f | 125 ++++++ src/casscf_cipsi/driver_optorb.irp.f | 3 + src/casscf_cipsi/get_energy.irp.f | 51 +++ src/casscf_cipsi/grad_old.irp.f | 74 ++++ src/casscf_cipsi/gradient.irp.f | 215 ++++++++++ src/casscf_cipsi/hessian.irp.f | 539 +++++++++++++++++++++++++ src/casscf_cipsi/hessian_old.irp.f | 310 ++++++++++++++ src/casscf_cipsi/mcscf_fock.irp.f | 80 ++++ src/casscf_cipsi/natorb.irp.f | 231 +++++++++++ src/casscf_cipsi/neworbs.irp.f | 253 ++++++++++++ src/casscf_cipsi/reorder_orb.irp.f | 70 ++++ src/casscf_cipsi/save_energy.irp.f | 9 + src/casscf_cipsi/superci_dm.irp.f | 207 ++++++++++ src/casscf_cipsi/swap_orb.irp.f | 132 ++++++ src/casscf_cipsi/tot_en.irp.f | 101 +++++ 27 files changed, 3448 insertions(+) create mode 100644 src/casscf_cipsi/50.casscf.bats create mode 100644 src/casscf_cipsi/EZFIO.cfg create mode 100644 src/casscf_cipsi/NEED create mode 100644 src/casscf_cipsi/README.rst create mode 100644 src/casscf_cipsi/bavard.irp.f create mode 100644 src/casscf_cipsi/bielec.irp.f create mode 100644 src/casscf_cipsi/bielec_natorb.irp.f create mode 100644 src/casscf_cipsi/casscf.irp.f create mode 100644 src/casscf_cipsi/class.irp.f create mode 100644 src/casscf_cipsi/dav_sx_mat.irp.f create mode 100644 src/casscf_cipsi/densities.irp.f create mode 100644 src/casscf_cipsi/densities_peter.irp.f create mode 100644 src/casscf_cipsi/det_manip.irp.f create mode 100644 src/casscf_cipsi/driver_optorb.irp.f create mode 100644 src/casscf_cipsi/get_energy.irp.f create mode 100644 src/casscf_cipsi/grad_old.irp.f create mode 100644 src/casscf_cipsi/gradient.irp.f create mode 100644 src/casscf_cipsi/hessian.irp.f create mode 100644 src/casscf_cipsi/hessian_old.irp.f create mode 100644 src/casscf_cipsi/mcscf_fock.irp.f create mode 100644 src/casscf_cipsi/natorb.irp.f create mode 100644 src/casscf_cipsi/neworbs.irp.f create mode 100644 src/casscf_cipsi/reorder_orb.irp.f create mode 100644 src/casscf_cipsi/save_energy.irp.f create mode 100644 src/casscf_cipsi/superci_dm.irp.f create mode 100644 src/casscf_cipsi/swap_orb.irp.f create mode 100644 src/casscf_cipsi/tot_en.irp.f diff --git a/src/casscf_cipsi/50.casscf.bats b/src/casscf_cipsi/50.casscf.bats new file mode 100644 index 00000000..a0db725d --- /dev/null +++ b/src/casscf_cipsi/50.casscf.bats @@ -0,0 +1,49 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + + +function run_stoch() { + thresh=$2 + test_exe casscf || skip + qp set perturbation do_pt2 True + qp set determinants n_det_max $3 + qp set davidson threshold_davidson 1.e-10 + qp set davidson n_states_diag 4 + qp run casscf | tee casscf.out + energy1="$(ezfio get casscf energy_pt2 | tr '[]' ' ' | cut -d ',' -f 1)" + eq $energy1 $1 $thresh +} + +@test "F2" { # 18.0198s + rm -rf f2_casscf + qp_create_ezfio -b aug-cc-pvdz ../input/f2.zmt -o f2_casscf + qp set_file f2_casscf + qp run scf + qp set_mo_class --core="[1-6,8-9]" --act="[7,10]" --virt="[11-46]" + run_stoch -198.773366970 1.e-4 100000 +} + +@test "N2" { # 18.0198s + rm -rf n2_casscf + qp_create_ezfio -b aug-cc-pvdz ../input/n2.xyz -o n2_casscf + qp set_file n2_casscf + qp run scf + qp set_mo_class --core="[1-4]" --act="[5-10]" --virt="[11-46]" + run_stoch -109.0961643162 1.e-4 100000 +} + +@test "N2_stretched" { + rm -rf n2_stretched_casscf + qp_create_ezfio -b aug-cc-pvdz -m 7 ../input/n2_stretched.xyz -o n2_stretched_casscf + qp set_file n2_stretched_casscf + qp run scf | tee scf.out + qp set_mo_class --core="[1-4]" --act="[5-10]" --virt="[11-46]" + qp set electrons elec_alpha_num 7 + qp set electrons elec_beta_num 7 + run_stoch -108.7860471300 1.e-4 100000 +# + +} + diff --git a/src/casscf_cipsi/EZFIO.cfg b/src/casscf_cipsi/EZFIO.cfg new file mode 100644 index 00000000..2a1f1926 --- /dev/null +++ b/src/casscf_cipsi/EZFIO.cfg @@ -0,0 +1,75 @@ +[energy] +type: double precision +doc: Calculated Selected |FCI| energy +interface: ezfio +size: (determinants.n_states) + +[energy_pt2] +type: double precision +doc: Calculated |FCI| energy + |PT2| +interface: ezfio +size: (determinants.n_states) + +[state_following_casscf] +type: logical +doc: If |true|, the CASSCF will try to follow the guess CI vector and orbitals +interface: ezfio,provider,ocaml +default: False + + +[diag_hess_cas] +type: logical +doc: If |true|, only the DIAGONAL part of the hessian is retained for the CASSCF +interface: ezfio,provider,ocaml +default: False + +[hess_cv_cv] +type: logical +doc: If |true|, the core-virtual - core-virtual part of the hessian is computed +interface: ezfio,provider,ocaml +default: True + + +[level_shift_casscf] +type: Positive_float +doc: Energy shift on the virtual MOs to improve SCF convergence +interface: ezfio,provider,ocaml +default: 0.005 + + +[fast_2rdm] +type: logical +doc: If true, the two-rdm are computed with a fast algo +interface: ezfio,provider,ocaml +default: True + +[criterion_casscf] +type: character*(32) +doc: choice of the criterion for the convergence of the casscf: can be energy or gradients or e_pt2 +interface: ezfio, provider, ocaml +default: e_pt2 + +[thresh_casscf] +type: Threshold +doc: Threshold on the convergence of the CASCF energy. +interface: ezfio,provider,ocaml +default: 1.e-06 + + +[pt2_min_casscf] +type: Threshold +doc: Minimum value of the pt2_max parameter for the CIPSI in the CASSCF iterations. +interface: ezfio,provider,ocaml +default: 1.e-04 + +[n_big_act_orb] +type: integer +doc: Number of active orbitals from which the active space is considered as large, and therefore pt2_min_casscf is activated. +interface: ezfio,provider,ocaml +default: 16 + +[adaptive_pt2_max] +type: logical +doc: If |true|, the pt2_max value in the CIPSI iterations will automatically adapt, otherwise it is fixed at the value given in the EZFIO folder +interface: ezfio,provider,ocaml +default: True diff --git a/src/casscf_cipsi/NEED b/src/casscf_cipsi/NEED new file mode 100644 index 00000000..dd91c7bd --- /dev/null +++ b/src/casscf_cipsi/NEED @@ -0,0 +1,5 @@ +cipsi +selectors_full +generators_cas +two_body_rdm +dav_general_mat diff --git a/src/casscf_cipsi/README.rst b/src/casscf_cipsi/README.rst new file mode 100644 index 00000000..08bfd95b --- /dev/null +++ b/src/casscf_cipsi/README.rst @@ -0,0 +1,5 @@ +====== +casscf +====== + +|CASSCF| program with the CIPSI algorithm. diff --git a/src/casscf_cipsi/bavard.irp.f b/src/casscf_cipsi/bavard.irp.f new file mode 100644 index 00000000..463c3ea4 --- /dev/null +++ b/src/casscf_cipsi/bavard.irp.f @@ -0,0 +1,6 @@ +! -*- F90 -*- +BEGIN_PROVIDER [logical, bavard] +! bavard=.true. + bavard=.false. +END_PROVIDER + diff --git a/src/casscf_cipsi/bielec.irp.f b/src/casscf_cipsi/bielec.irp.f new file mode 100644 index 00000000..0a44f994 --- /dev/null +++ b/src/casscf_cipsi/bielec.irp.f @@ -0,0 +1,155 @@ +BEGIN_PROVIDER [real*8, bielec_PQxx, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] + BEGIN_DOC + ! bielec_PQxx : integral (pq|xx) with p,q arbitrary, x core or active + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,ii,jj,p,q,i3,j3,t3,v3 + real*8 :: mo_two_e_integral + + bielec_PQxx(:,:,:,:) = 0.d0 + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,ii,j,jj,i3,j3) & + !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PQxx, & + !$OMP n_act_orb,mo_integrals_map,list_act) + + !$OMP DO + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do j=i,n_core_inact_orb + jj=list_core_inact(j) + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j),mo_integrals_map) + bielec_PQxx(:,:,j,i)=bielec_PQxx(:,:,i,j) + end do + do j=1,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j3),mo_integrals_map) + bielec_PQxx(:,:,j3,i)=bielec_PQxx(:,:,i,j3) + end do + end do + !$OMP END DO + + + !$OMP DO + do i=1,n_act_orb + ii=list_act(i) + i3=i+n_core_inact_orb + do j=i,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i3,j3),mo_integrals_map) + bielec_PQxx(:,:,j3,i3)=bielec_PQxx(:,:,i3,j3) + end do + end do + !$OMP END DO + + !$OMP END PARALLEL + +END_PROVIDER + + + +BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] + BEGIN_DOC + ! bielec_PxxQ : integral (px|xq) with p,q arbitrary, x core or active + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,ii,jj,p,q,i3,j3,t3,v3 + double precision, allocatable :: integrals_array(:,:) + real*8 :: mo_two_e_integral + + PROVIDE mo_two_e_integrals_in_map + bielec_PxxQ = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,ii,j,jj,i3,j3,integrals_array) & + !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PxxQ, & + !$OMP n_act_orb,mo_integrals_map,list_act) + + allocate(integrals_array(mo_num,mo_num)) + + !$OMP DO + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do j=i,n_core_inact_orb + jj=list_core_inact(j) + call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) + do q=1,mo_num + do p=1,mo_num + bielec_PxxQ(p,i,j,q)=integrals_array(p,q) + bielec_PxxQ(p,j,i,q)=integrals_array(q,p) + end do + end do + end do + do j=1,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) + do q=1,mo_num + do p=1,mo_num + bielec_PxxQ(p,i,j3,q)=integrals_array(p,q) + bielec_PxxQ(p,j3,i,q)=integrals_array(q,p) + end do + end do + end do + end do + !$OMP END DO + + + ! (ip|qj) + !$OMP DO + do i=1,n_act_orb + ii=list_act(i) + i3=i+n_core_inact_orb + do j=i,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) + do q=1,mo_num + do p=1,mo_num + bielec_PxxQ(p,i3,j3,q)=integrals_array(p,q) + bielec_PxxQ(p,j3,i3,q)=integrals_array(q,p) + end do + end do + end do + end do + !$OMP END DO + + deallocate(integrals_array) + !$OMP END PARALLEL + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, bielecCI, (n_act_orb,n_act_orb,n_act_orb, mo_num)] + BEGIN_DOC + ! bielecCI : integrals (tu|vp) with p arbitrary, tuv active + ! index p runs over the whole basis, t,u,v only over the active orbitals + END_DOC + implicit none + integer :: i,j,k,p,t,u,v + double precision, external :: mo_two_e_integral + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL DO DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,p,t,u,v) & + !$OMP SHARED(mo_num,n_act_orb,list_act,bielecCI) + do p=1,mo_num + do j=1,n_act_orb + u=list_act(j) + do k=1,n_act_orb + v=list_act(k) + do i=1,n_act_orb + t=list_act(i) + bielecCI(i,k,j,p) = mo_two_e_integral(t,u,v,p) + end do + end do + end do + end do + !$OMP END PARALLEL DO + +END_PROVIDER diff --git a/src/casscf_cipsi/bielec_natorb.irp.f b/src/casscf_cipsi/bielec_natorb.irp.f new file mode 100644 index 00000000..9968530c --- /dev/null +++ b/src/casscf_cipsi/bielec_natorb.irp.f @@ -0,0 +1,369 @@ + BEGIN_PROVIDER [real*8, bielec_PQxx_no, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] + BEGIN_DOC + ! integral (pq|xx) in the basis of natural MOs + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,k,l,t,u,p,q + double precision, allocatable :: f(:,:,:), d(:,:,:) + + + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,l,p,d,f) & + !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & + !$OMP bielec_PQxx_no,bielec_PQxx,list_act,natorbsCI) + + allocate (f(n_act_orb,mo_num,n_core_inact_act_orb), & + d(n_act_orb,mo_num,n_core_inact_act_orb)) + + !$OMP DO + do l=1,n_core_inact_act_orb + bielec_PQxx_no(:,:,:,l) = bielec_PQxx(:,:,:,l) + + do k=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + f(p,j,k)=bielec_PQxx_no(list_act(p),j,k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,mo_num*n_core_inact_act_orb,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + bielec_PQxx_no(list_act(p),j,k,l)=d(p,j,k) + end do + end do + + do j=1,mo_num + do p=1,n_act_orb + f(p,j,k)=bielec_PQxx_no(j,list_act(p),k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,mo_num*n_core_inact_act_orb,n_act_orb,1.d0, & + natorbsCI, n_act_orb, & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_core_inact_act_orb + do p=1,n_act_orb + do j=1,mo_num + bielec_PQxx_no(j,list_act(p),k,l)=d(p,j,k) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + deallocate (f,d) + + allocate (f(mo_num,mo_num,n_act_orb),d(mo_num,mo_num,n_act_orb)) + + !$OMP DO + do l=1,n_core_inact_act_orb + + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + f(j,k,p) = bielec_PQxx_no(j,k,n_core_inact_orb+p,l) + end do + end do + end do + call dgemm('N','N',mo_num*mo_num,n_act_orb,n_act_orb,1.d0, & + f, mo_num*mo_num, & + natorbsCI, n_act_orb, & + 0.d0, & + d, mo_num*mo_num) + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + bielec_PQxx_no(j,k,n_core_inact_orb+p,l)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + !$OMP BARRIER + + !$OMP DO + do l=1,n_core_inact_act_orb + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + f(j,k,p) = bielec_PQxx_no(j,k,l,n_core_inact_orb+p) + end do + end do + end do + call dgemm('N','N',mo_num*mo_num,n_act_orb,n_act_orb,1.d0, & + f, mo_num*mo_num, & + natorbsCI, n_act_orb, & + 0.d0, & + d, mo_num*mo_num) + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + bielec_PQxx_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO + + deallocate (f,d) + !$OMP END PARALLEL + +END_PROVIDER + + + +BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] + BEGIN_DOC + ! integral (px|xq) in the basis of natural MOs + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,k,l,t,u,p,q + double precision, allocatable :: f(:,:,:), d(:,:,:) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,l,p,d,f) & + !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & + !$OMP bielec_PxxQ_no,bielec_PxxQ,list_act,natorbsCI) + + + allocate (f(n_act_orb,n_core_inact_act_orb,n_core_inact_act_orb), & + d(n_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)) + + !$OMP DO + do j=1,mo_num + bielec_PxxQ_no(:,:,:,j) = bielec_PxxQ(:,:,:,j) + do l=1,n_core_inact_act_orb + do k=1,n_core_inact_act_orb + do p=1,n_act_orb + f(p,k,l) = bielec_PxxQ_no(list_act(p),k,l,j) + end do + end do + end do + call dgemm('T','N',n_act_orb,n_core_inact_act_orb**2,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do l=1,n_core_inact_act_orb + do k=1,n_core_inact_act_orb + do p=1,n_act_orb + bielec_PxxQ_no(list_act(p),k,l,j)=d(p,k,l) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + deallocate (f,d) + + allocate (f(n_act_orb,mo_num,n_core_inact_act_orb), & + d(n_act_orb,mo_num,n_core_inact_act_orb)) + + !$OMP DO + do k=1,mo_num + do l=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + f(p,j,l) = bielec_PxxQ_no(j,n_core_inact_orb+p,l,k) + end do + end do + end do + call dgemm('T','N',n_act_orb,mo_num*n_core_inact_act_orb,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do l=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + bielec_PxxQ_no(j,n_core_inact_orb+p,l,k)=d(p,j,l) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + deallocate(f,d) + + allocate(f(mo_num,n_core_inact_act_orb,n_act_orb), & + d(mo_num,n_core_inact_act_orb,n_act_orb) ) + + !$OMP DO + do k=1,mo_num + do p=1,n_act_orb + do l=1,n_core_inact_act_orb + do j=1,mo_num + f(j,l,p) = bielec_PxxQ_no(j,l,n_core_inact_orb+p,k) + end do + end do + end do + call dgemm('N','N',mo_num*n_core_inact_act_orb,n_act_orb,n_act_orb,1.d0, & + f, mo_num*n_core_inact_act_orb, & + natorbsCI, size(natorbsCI,1), & + 0.d0, & + d, mo_num*n_core_inact_act_orb) + do p=1,n_act_orb + do l=1,n_core_inact_act_orb + do j=1,mo_num + bielec_PxxQ_no(j,l,n_core_inact_orb+p,k)=d(j,l,p) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + !$OMP BARRIER + + !$OMP DO + do l=1,n_core_inact_act_orb + do p=1,n_act_orb + do k=1,n_core_inact_act_orb + do j=1,mo_num + f(j,k,p) = bielec_PxxQ_no(j,k,l,n_core_inact_orb+p) + end do + end do + end do + call dgemm('N','N',mo_num*n_core_inact_act_orb,n_act_orb,n_act_orb,1.d0, & + f, mo_num*n_core_inact_act_orb, & + natorbsCI, size(natorbsCI,1), & + 0.d0, & + d, mo_num*n_core_inact_act_orb) + do p=1,n_act_orb + do k=1,n_core_inact_act_orb + do j=1,mo_num + bielec_PxxQ_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO NOWAIT + deallocate(f,d) + !$OMP END PARALLEL + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, bielecCI_no, (n_act_orb,n_act_orb,n_act_orb, mo_num)] + BEGIN_DOC + ! integrals (tu|vp) in the basis of natural MOs + ! index p runs over the whole basis, t,u,v only over the active orbitals + END_DOC + implicit none + integer :: i,j,k,l,t,u,p,q + double precision, allocatable :: f(:,:,:), d(:,:,:) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,l,p,d,f) & + !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & + !$OMP bielecCI_no,bielecCI,list_act,natorbsCI) + + allocate (f(n_act_orb,n_act_orb,mo_num), & + d(n_act_orb,n_act_orb,mo_num)) + + !$OMP DO + do l=1,mo_num + bielecCI_no(:,:,:,l) = bielecCI(:,:,:,l) + do k=1,n_act_orb + do j=1,n_act_orb + do p=1,n_act_orb + f(p,j,k)=bielecCI_no(p,j,k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,n_act_orb*n_act_orb,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_act_orb + do j=1,n_act_orb + do p=1,n_act_orb + bielecCI_no(p,j,k,l)=d(p,j,k) + end do + end do + + do j=1,n_act_orb + do p=1,n_act_orb + f(p,j,k)=bielecCI_no(j,p,k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,n_act_orb*n_act_orb,n_act_orb,1.d0, & + natorbsCI, n_act_orb, & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_act_orb + do p=1,n_act_orb + do j=1,n_act_orb + bielecCI_no(j,p,k,l)=d(p,j,k) + end do + end do + end do + + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + f(j,k,p)=bielecCI_no(j,k,p,l) + end do + end do + end do + call dgemm('N','N',n_act_orb*n_act_orb,n_act_orb,n_act_orb,1.d0, & + f, n_act_orb*n_act_orb, & + natorbsCI, n_act_orb, & + 0.d0, & + d, n_act_orb*n_act_orb) + + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + bielecCI_no(j,k,p,l)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO + + !$OMP DO + do l=1,n_act_orb + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + f(j,k,p)=bielecCI_no(j,k,l,list_act(p)) + end do + end do + end do + call dgemm('N','N',n_act_orb*n_act_orb,n_act_orb,n_act_orb,1.d0, & + f, n_act_orb*n_act_orb, & + natorbsCI, n_act_orb, & + 0.d0, & + d, n_act_orb*n_act_orb) + + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + bielecCI_no(j,k,l,list_act(p))=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO + + deallocate(d,f) + !$OMP END PARALLEL + + +END_PROVIDER + diff --git a/src/casscf_cipsi/casscf.irp.f b/src/casscf_cipsi/casscf.irp.f new file mode 100644 index 00000000..a2f3c5a7 --- /dev/null +++ b/src/casscf_cipsi/casscf.irp.f @@ -0,0 +1,110 @@ +program casscf + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + call reorder_orbitals_for_casscf +! no_vvvv_integrals = .True. +! touch no_vvvv_integrals + n_det_max_full = 500 + touch n_det_max_full + pt2_relative_error = 0.04 + touch pt2_relative_error +! call run_stochastic_cipsi + call run +end + +subroutine run + implicit none + double precision :: energy_old, energy, pt2_max_before, ept2_before,delta_E + logical :: converged,state_following_casscf_save + integer :: iteration + converged = .False. + + energy = 0.d0 + mo_label = "MCSCF" + iteration = 1 + state_following_casscf_save = state_following_casscf + state_following_casscf = .True. + touch state_following_casscf + ept2_before = 0.d0 + if(adaptive_pt2_max)then + pt2_max = 0.005 + SOFT_TOUCH pt2_max + endif + do while (.not.converged) + print*,'pt2_max = ',pt2_max + call run_stochastic_cipsi + energy_old = energy + energy = eone+etwo+ecore + pt2_max_before = pt2_max + + call write_time(6) + call write_int(6,iteration,'CAS-SCF iteration = ') + call write_double(6,energy,'CAS-SCF energy = ') + if(n_states == 1)then + double precision :: E_PT2, PT2 + call ezfio_get_casscf_energy_pt2(E_PT2) + call ezfio_get_casscf_energy(PT2) + PT2 -= E_PT2 + call write_double(6,E_PT2,'E + PT2 energy = ') + call write_double(6,PT2,' PT2 = ') + call write_double(6,pt2_max,' PT2_MAX = ') + endif + + print*,'' + call write_double(6,norm_grad_vec2,'Norm of gradients = ') + call write_double(6,norm_grad_vec2_tab(1), ' Core-active gradients = ') + call write_double(6,norm_grad_vec2_tab(2), ' Core-virtual gradients = ') + call write_double(6,norm_grad_vec2_tab(3), ' Active-virtual gradients = ') + print*,'' + call write_double(6,energy_improvement, 'Predicted energy improvement = ') + + if(criterion_casscf == "energy")then + converged = dabs(energy_improvement) < thresh_scf + else if (criterion_casscf == "gradients")then + converged = norm_grad_vec2 < thresh_scf + else if (criterion_casscf == "e_pt2")then + delta_E = dabs(E_PT2 - ept2_before) + converged = dabs(delta_E) < thresh_casscf + endif + ept2_before = E_PT2 + if(adaptive_pt2_max)then + pt2_max = dabs(energy_improvement / (pt2_relative_error)) + pt2_max = min(pt2_max, pt2_max_before) + if(n_act_orb.ge.n_big_act_orb)then + pt2_max = max(pt2_max,pt2_min_casscf) + endif + endif + print*,'' + call write_double(6,pt2_max, 'PT2_MAX for next iteration = ') + + mo_coef = NewOrbs + mo_occ = occnum + call save_mos + if(.not.converged)then + iteration += 1 + if(norm_grad_vec2.gt.0.01d0)then + N_det = N_states + else + N_det = max(N_det/8 ,N_states) + endif + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + read_wf = .True. + call clear_mo_map + SOFT_TOUCH mo_coef N_det psi_det psi_coef + if(adaptive_pt2_max)then + SOFT_TOUCH pt2_max + endif + if(iteration .gt. 3)then + state_following_casscf = state_following_casscf_save + soft_touch state_following_casscf + endif + endif + + enddo + +end + + diff --git a/src/casscf_cipsi/class.irp.f b/src/casscf_cipsi/class.irp.f new file mode 100644 index 00000000..7360a661 --- /dev/null +++ b/src/casscf_cipsi/class.irp.f @@ -0,0 +1,12 @@ + BEGIN_PROVIDER [ logical, do_only_1h1p ] +&BEGIN_PROVIDER [ logical, do_only_cas ] +&BEGIN_PROVIDER [ logical, do_ddci ] + implicit none + BEGIN_DOC + ! In the CAS case, all those are always false except do_only_cas + END_DOC + do_only_cas = .True. + do_only_1h1p = .False. + do_ddci = .False. +END_PROVIDER + diff --git a/src/casscf_cipsi/dav_sx_mat.irp.f b/src/casscf_cipsi/dav_sx_mat.irp.f new file mode 100644 index 00000000..1e24f0e2 --- /dev/null +++ b/src/casscf_cipsi/dav_sx_mat.irp.f @@ -0,0 +1,45 @@ + + +subroutine davidson_diag_sx_mat(N_st, u_in, energies) + implicit none + integer, intent(in) :: N_st + double precision, intent(out) :: u_in(nMonoEx+1,n_states_diag), energies(N_st) + integer :: i,j,N_st_tmp, dim_in, sze, N_st_diag_in + integer, allocatable :: list_guess(:) + double precision, allocatable :: H_jj(:) + logical :: converged + N_st_diag_in = n_states_diag + provide SXmatrix + sze = nMonoEx+1 + dim_in = sze + allocate(H_jj(sze), list_guess(sze)) + H_jj(1) = 0.d0 + N_st_tmp = 1 + list_guess(1) = 1 + do j = 2, nMonoEx+1 + H_jj(j) = SXmatrix(j,j) + if(H_jj(j).lt.0.d0)then + list_guess(N_st_tmp) = j + N_st_tmp += 1 + endif + enddo + if(N_st_tmp .ne. N_st)then + print*,'Pb in davidson_diag_sx_mat' + print*,'N_st_tmp .ne. N_st' + print*,N_st_tmp, N_st + stop + endif + print*,'Number of possibly interesting states = ',N_st + print*,'Corresponding diagonal elements of the SX matrix ' + u_in = 0.d0 + do i = 1, min(N_st, N_st_diag_in) +! do i = 1, N_st + j = list_guess(i) + print*,'i,j',i,j + print*,'SX(i,i) = ',H_jj(j) + u_in(j,i) = 1.d0 + enddo + call davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,converged,SXmatrix) + print*,'energies = ',energies + +end diff --git a/src/casscf_cipsi/densities.irp.f b/src/casscf_cipsi/densities.irp.f new file mode 100644 index 00000000..bebcf5d7 --- /dev/null +++ b/src/casscf_cipsi/densities.irp.f @@ -0,0 +1,67 @@ +use bitmasks + +BEGIN_PROVIDER [real*8, D0tu, (n_act_orb,n_act_orb) ] + implicit none + BEGIN_DOC + ! the first-order density matrix in the basis of the starting MOs. + ! matrix is state averaged. + END_DOC + integer :: t,u + + do u=1,n_act_orb + do t=1,n_act_orb + D0tu(t,u) = one_e_dm_mo_alpha_average( list_act(t), list_act(u) ) + & + one_e_dm_mo_beta_average ( list_act(t), list_act(u) ) + enddo + enddo + +END_PROVIDER + +BEGIN_PROVIDER [real*8, P0tuvx, (n_act_orb,n_act_orb,n_act_orb,n_act_orb) ] + BEGIN_DOC + ! The second-order density matrix in the basis of the starting MOs ONLY IN THE RANGE OF ACTIVE MOS + ! The values are state averaged + ! + ! We use the spin-free generators of mono-excitations + ! E_pq destroys q and creates p + ! D_pq = <0|E_pq|0> = D_qp + ! P_pqrs = 1/2 <0|E_pq E_rs - delta_qr E_ps|0> + ! + ! P0tuvx(p,q,r,s) = chemist notation : 1/2 <0|E_pq E_rs - delta_qr E_ps|0> + END_DOC + implicit none + integer :: t,u,v,x + integer :: tt,uu,vv,xx + integer :: mu,nu,istate,ispin,jspin,ihole,ipart,jhole,jpart + integer :: ierr + real*8 :: phase1,phase11,phase12,phase2,phase21,phase22 + integer :: nu1,nu2,nu11,nu12,nu21,nu22 + integer :: ierr1,ierr2,ierr11,ierr12,ierr21,ierr22 + real*8 :: cI_mu(N_states),term + integer(bit_kind), dimension(N_int,2) :: det_mu, det_mu_ex + integer(bit_kind), dimension(N_int,2) :: det_mu_ex1, det_mu_ex11, det_mu_ex12 + integer(bit_kind), dimension(N_int,2) :: det_mu_ex2, det_mu_ex21, det_mu_ex22 + + if (bavard) then + write(6,*) ' providing the 2 body RDM on the active part' + endif + + P0tuvx= 0.d0 + if(fast_2rdm)then + do istate=1,N_states + do x = 1, n_act_orb + do v = 1, n_act_orb + do u = 1, n_act_orb + do t = 1, n_act_orb + ! 1 1 2 2 1 2 1 2 + P0tuvx(t,u,v,x) = 0.5d0 * state_av_act_2_rdm_spin_trace_mo(t,v,u,x) + enddo + enddo + enddo + enddo + enddo + else + P0tuvx = P0tuvx_peter + endif + +END_PROVIDER diff --git a/src/casscf_cipsi/densities_peter.irp.f b/src/casscf_cipsi/densities_peter.irp.f new file mode 100644 index 00000000..ee7414da --- /dev/null +++ b/src/casscf_cipsi/densities_peter.irp.f @@ -0,0 +1,150 @@ +use bitmasks + +BEGIN_PROVIDER [real*8, P0tuvx_peter, (n_act_orb,n_act_orb,n_act_orb,n_act_orb) ] + BEGIN_DOC + ! the second-order density matrix in the basis of the starting MOs + ! matrices are state averaged + ! + ! we use the spin-free generators of mono-excitations + ! E_pq destroys q and creates p + ! D_pq = <0|E_pq|0> = D_qp + ! P_pqrs = 1/2 <0|E_pq E_rs - delta_qr E_ps|0> + ! + END_DOC + implicit none + integer :: t,u,v,x,mu,nu,istate,ispin,jspin,ihole,ipart,jhole,jpart + integer :: ierr + real*8 :: phase1,phase11,phase12,phase2,phase21,phase22 + integer :: nu1,nu2,nu11,nu12,nu21,nu22 + integer :: ierr1,ierr2,ierr11,ierr12,ierr21,ierr22 + real*8 :: cI_mu(N_states),term + integer(bit_kind), dimension(N_int,2) :: det_mu, det_mu_ex + integer(bit_kind), dimension(N_int,2) :: det_mu_ex1, det_mu_ex11, det_mu_ex12 + integer(bit_kind), dimension(N_int,2) :: det_mu_ex2, det_mu_ex21, det_mu_ex22 + + if (bavard) then + write(6,*) ' providing density matrix P0' + endif + + P0tuvx_peter = 0.d0 + + ! first loop: we apply E_tu, once for D_tu, once for -P_tvvu + do mu=1,n_det + call det_extract(det_mu,mu,N_int) + do istate=1,n_states + cI_mu(istate)=psi_coef(mu,istate) + end do + do t=1,n_act_orb + ipart=list_act(t) + do u=1,n_act_orb + ihole=list_act(u) + ! apply E_tu + call det_copy(det_mu,det_mu_ex1,N_int) + call det_copy(det_mu,det_mu_ex2,N_int) + call do_spinfree_mono_excitation(det_mu,det_mu_ex1 & + ,det_mu_ex2,nu1,nu2,ihole,ipart,phase1,phase2,ierr1,ierr2) + ! det_mu_ex1 is in the list + if (nu1.ne.-1) then + do istate=1,n_states + term=cI_mu(istate)*psi_coef(nu1,istate)*phase1 + ! and we fill P0_tvvu + do v=1,n_act_orb + P0tuvx_peter(t,v,v,u)-=term + end do + end do + end if + ! det_mu_ex2 is in the list + if (nu2.ne.-1) then + do istate=1,n_states + term=cI_mu(istate)*psi_coef(nu2,istate)*phase2 + do v=1,n_act_orb + P0tuvx_peter(t,v,v,u)-=term + end do + end do + end if + end do + end do + end do + ! now we do the double excitation E_tu E_vx |0> + do mu=1,n_det + call det_extract(det_mu,mu,N_int) + do istate=1,n_states + cI_mu(istate)=psi_coef(mu,istate) + end do + do v=1,n_act_orb + ipart=list_act(v) + do x=1,n_act_orb + ihole=list_act(x) + ! apply E_vx + call det_copy(det_mu,det_mu_ex1,N_int) + call det_copy(det_mu,det_mu_ex2,N_int) + call do_spinfree_mono_excitation(det_mu,det_mu_ex1 & + ,det_mu_ex2,nu1,nu2,ihole,ipart,phase1,phase2,ierr1,ierr2) + ! we apply E_tu to the first resultant determinant, thus E_tu E_vx |0> + if (ierr1.eq.1) then + do t=1,n_act_orb + jpart=list_act(t) + do u=1,n_act_orb + jhole=list_act(u) + call det_copy(det_mu_ex1,det_mu_ex11,N_int) + call det_copy(det_mu_ex1,det_mu_ex12,N_int) + call do_spinfree_mono_excitation(det_mu_ex1,det_mu_ex11& + ,det_mu_ex12,nu11,nu12,jhole,jpart,phase11,phase12,ierr11,ierr12) + if (nu11.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu11,istate)& + *phase11*phase1 + end do + end if + if (nu12.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu12,istate)& + *phase12*phase1 + end do + end if + end do + end do + end if + + ! we apply E_tu to the second resultant determinant + if (ierr2.eq.1) then + do t=1,n_act_orb + jpart=list_act(t) + do u=1,n_act_orb + jhole=list_act(u) + call det_copy(det_mu_ex2,det_mu_ex21,N_int) + call det_copy(det_mu_ex2,det_mu_ex22,N_int) + call do_spinfree_mono_excitation(det_mu_ex2,det_mu_ex21& + ,det_mu_ex22,nu21,nu22,jhole,jpart,phase21,phase22,ierr21,ierr22) + if (nu21.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu21,istate)& + *phase21*phase2 + end do + end if + if (nu22.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu22,istate)& + *phase22*phase2 + end do + end if + end do + end do + end if + + end do + end do + end do + + ! we average by just dividing by the number of states + do x=1,n_act_orb + do v=1,n_act_orb + do u=1,n_act_orb + do t=1,n_act_orb + P0tuvx_peter(t,u,v,x)*=0.5D0/dble(N_states) + end do + end do + end do + end do + +END_PROVIDER diff --git a/src/casscf_cipsi/det_manip.irp.f b/src/casscf_cipsi/det_manip.irp.f new file mode 100644 index 00000000..d8c309a4 --- /dev/null +++ b/src/casscf_cipsi/det_manip.irp.f @@ -0,0 +1,125 @@ +use bitmasks + +subroutine do_signed_mono_excitation(key1,key2,nu,ihole,ipart, & + ispin,phase,ierr) + BEGIN_DOC + ! we create the mono-excitation, and determine, if possible, + ! the phase and the number in the list of determinants + END_DOC + implicit none + integer(bit_kind) :: key1(N_int,2),key2(N_int,2) + integer(bit_kind), allocatable :: keytmp(:,:) + integer :: exc(0:2,2,2),ihole,ipart,ierr,nu,ispin + real*8 :: phase + logical :: found + allocate(keytmp(N_int,2)) + + nu=-1 + phase=1.D0 + ierr=0 + call det_copy(key1,key2,N_int) + ! write(6,*) ' key2 before excitation ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + call do_single_excitation(key2,ihole,ipart,ispin,ierr) + ! write(6,*) ' key2 after ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + ! write(6,*) ' excitation ',ihole,' -> ',ipart,' gives ierr = ',ierr + if (ierr.eq.1) then + ! excitation is possible + ! get the phase + call get_single_excitation(key1,key2,exc,phase,N_int) + ! get the number in the list + found=.false. + nu=0 + + !TODO BOTTLENECK + do while (.not.found) + nu+=1 + if (nu.gt.N_det) then + ! the determinant is possible, but not in the list + found=.true. + nu=-1 + else + call det_extract(keytmp,nu,N_int) + integer :: i,ii + found=.true. + do ii=1,2 + do i=1,N_int + if (keytmp(i,ii).ne.key2(i,ii)) then + found=.false. + end if + end do + end do + end if + end do + end if + ! + ! we found the new string, the phase, and possibly the number in the list + ! +end subroutine do_signed_mono_excitation + +subroutine det_extract(key,nu,Nint) + BEGIN_DOC + ! extract a determinant from the list of determinants + END_DOC + implicit none + integer :: ispin,i,nu,Nint + integer(bit_kind) :: key(Nint,2) + do ispin=1,2 + do i=1,Nint + key(i,ispin)=psi_det(i,ispin,nu) + end do + end do +end subroutine det_extract + +subroutine det_copy(key1,key2,Nint) + use bitmasks ! you need to include the bitmasks_module.f90 features + BEGIN_DOC + ! copy a determinant from key1 to key2 + END_DOC + implicit none + integer :: ispin,i,Nint + integer(bit_kind) :: key1(Nint,2),key2(Nint,2) + do ispin=1,2 + do i=1,Nint + key2(i,ispin)=key1(i,ispin) + end do + end do +end subroutine det_copy + +subroutine do_spinfree_mono_excitation(key_in,key_out1,key_out2 & + ,nu1,nu2,ihole,ipart,phase1,phase2,ierr,jerr) + BEGIN_DOC + ! we create the spin-free mono-excitation E_pq=(a^+_p a_q + a^+_P a_Q) + ! we may create two determinants as result + ! + END_DOC + implicit none + integer(bit_kind) :: key_in(N_int,2),key_out1(N_int,2) + integer(bit_kind) :: key_out2(N_int,2) + integer :: ihole,ipart,ierr,jerr,nu1,nu2 + integer :: ispin + real*8 :: phase1,phase2 + + ! write(6,*) ' applying E_',ipart,ihole,' on determinant ' + ! call print_det(key_in,N_int) + + ! spin alpha + ispin=1 + call do_signed_mono_excitation(key_in,key_out1,nu1,ihole & + ,ipart,ispin,phase1,ierr) + ! if (ierr.eq.1) then + ! write(6,*) ' 1 result is ',nu1,phase1 + ! call print_det(key_out1,N_int) + ! end if + ! spin beta + ispin=2 + call do_signed_mono_excitation(key_in,key_out2,nu2,ihole & + ,ipart,ispin,phase2,jerr) + ! if (jerr.eq.1) then + ! write(6,*) ' 2 result is ',nu2,phase2 + ! call print_det(key_out2,N_int) + ! end if + +end subroutine do_spinfree_mono_excitation + diff --git a/src/casscf_cipsi/driver_optorb.irp.f b/src/casscf_cipsi/driver_optorb.irp.f new file mode 100644 index 00000000..2e3e02dc --- /dev/null +++ b/src/casscf_cipsi/driver_optorb.irp.f @@ -0,0 +1,3 @@ +subroutine driver_optorb + implicit none +end diff --git a/src/casscf_cipsi/get_energy.irp.f b/src/casscf_cipsi/get_energy.irp.f new file mode 100644 index 00000000..cfb26b59 --- /dev/null +++ b/src/casscf_cipsi/get_energy.irp.f @@ -0,0 +1,51 @@ +program print_2rdm + implicit none + BEGIN_DOC + ! get the active part of the bielectronic energy on a given wave function. + ! + ! useful to test the active part of the spin trace 2 rdms + END_DOC +!no_vvvv_integrals = .True. + read_wf = .True. +!touch read_wf no_vvvv_integrals +!call routine +!call routine_bis + call print_grad +end + +subroutine print_grad + implicit none + integer :: i + do i = 1, nMonoEx + if(dabs(gradvec2(i)).gt.1.d-5)then + print*,'' + print*,i,gradvec2(i),excit(:,i) + endif + enddo +end + +subroutine routine + integer :: i,j,k,l + integer :: ii,jj,kk,ll + double precision :: accu(4),twodm,thr,act_twodm2,integral,get_two_e_integral + thr = 1.d-10 + + + accu = 0.d0 + do ll = 1, n_act_orb + l = list_act(ll) + do kk = 1, n_act_orb + k = list_act(kk) + do jj = 1, n_act_orb + j = list_act(jj) + do ii = 1, n_act_orb + i = list_act(ii) + integral = get_two_e_integral(i,j,k,l,mo_integrals_map) + accu(1) += state_av_act_2_rdm_spin_trace_mo(ii,jj,kk,ll) * integral + enddo + enddo + enddo + enddo + print*,'accu = ',accu(1) + +end diff --git a/src/casscf_cipsi/grad_old.irp.f b/src/casscf_cipsi/grad_old.irp.f new file mode 100644 index 00000000..d60a60c8 --- /dev/null +++ b/src/casscf_cipsi/grad_old.irp.f @@ -0,0 +1,74 @@ + +BEGIN_PROVIDER [real*8, gradvec_old, (nMonoEx)] + BEGIN_DOC + ! calculate the orbital gradient by hand, i.e. for + ! each determinant I we determine the string E_pq |I> (alpha and beta + ! separately) and generate + ! sum_I c_I is then the pq component of the orbital + ! gradient + ! E_pq = a^+_pa_q + a^+_Pa_Q + END_DOC + implicit none + integer :: ii,tt,aa,indx,ihole,ipart,istate + real*8 :: res + + do indx=1,nMonoEx + ihole=excit(1,indx) + ipart=excit(2,indx) + call calc_grad_elem(ihole,ipart,res) + gradvec_old(indx)=res + end do + + real*8 :: norm_grad + norm_grad=0.d0 + do indx=1,nMonoEx + norm_grad+=gradvec_old(indx)*gradvec_old(indx) + end do + norm_grad=sqrt(norm_grad) + if (bavard) then + write(6,*) + write(6,*) ' Norm of the orbital gradient (via <0|EH|0>) : ', norm_grad + write(6,*) + endif + + +END_PROVIDER + +subroutine calc_grad_elem(ihole,ipart,res) + BEGIN_DOC + ! eq 18 of Siegbahn et al, Physica Scripta 1980 + ! we calculate 2 , q=hole, p=particle + END_DOC + implicit none + integer :: ihole,ipart,mu,iii,ispin,ierr,nu,istate + real*8 :: res + integer(bit_kind), allocatable :: det_mu(:,:),det_mu_ex(:,:) + real*8 :: i_H_psi_array(N_states),phase + allocate(det_mu(N_int,2)) + allocate(det_mu_ex(N_int,2)) + + res=0.D0 + + do mu=1,n_det + ! get the string of the determinant + call det_extract(det_mu,mu,N_int) + do ispin=1,2 + ! do the monoexcitation on it + call det_copy(det_mu,det_mu_ex,N_int) + call do_signed_mono_excitation(det_mu,det_mu_ex,nu & + ,ihole,ipart,ispin,phase,ierr) + if (ierr.eq.1) then + call i_H_psi(det_mu_ex,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase + end do + end if + end do + end do + + ! state-averaged gradient + res*=2.D0/dble(N_states) + +end subroutine calc_grad_elem + diff --git a/src/casscf_cipsi/gradient.irp.f b/src/casscf_cipsi/gradient.irp.f new file mode 100644 index 00000000..a1c5e947 --- /dev/null +++ b/src/casscf_cipsi/gradient.irp.f @@ -0,0 +1,215 @@ +use bitmasks + +BEGIN_PROVIDER [ integer, nMonoEx ] + BEGIN_DOC + ! Number of single excitations + END_DOC + implicit none + nMonoEx=n_core_inact_orb*n_act_orb+n_core_inact_orb*n_virt_orb+n_act_orb*n_virt_orb +END_PROVIDER + + BEGIN_PROVIDER [integer, n_c_a_prov] +&BEGIN_PROVIDER [integer, n_c_v_prov] +&BEGIN_PROVIDER [integer, n_a_v_prov] + implicit none + n_c_a_prov = n_core_inact_orb * n_act_orb + n_c_v_prov = n_core_inact_orb * n_virt_orb + n_a_v_prov = n_act_orb * n_virt_orb + END_PROVIDER + + BEGIN_PROVIDER [integer, excit, (2,nMonoEx)] +&BEGIN_PROVIDER [character*3, excit_class, (nMonoEx)] +&BEGIN_PROVIDER [integer, list_idx_c_a, (3,n_c_a_prov) ] +&BEGIN_PROVIDER [integer, list_idx_c_v, (3,n_c_v_prov) ] +&BEGIN_PROVIDER [integer, list_idx_a_v, (3,n_a_v_prov) ] +&BEGIN_PROVIDER [integer, mat_idx_c_a, (n_core_inact_orb,n_act_orb) +&BEGIN_PROVIDER [integer, mat_idx_c_v, (n_core_inact_orb,n_virt_orb) +&BEGIN_PROVIDER [integer, mat_idx_a_v, (n_act_orb,n_virt_orb) + BEGIN_DOC + ! a list of the orbitals involved in the excitation + END_DOC + + implicit none + integer :: i,t,a,ii,tt,aa,indx,indx_tmp + indx=0 + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do tt=1,n_act_orb + t=list_act(tt) + indx+=1 + excit(1,indx)=i + excit(2,indx)=t + excit_class(indx)='c-a' + indx_tmp += 1 + list_idx_c_a(1,indx_tmp) = indx + list_idx_c_a(2,indx_tmp) = ii + list_idx_c_a(3,indx_tmp) = tt + mat_idx_c_a(ii,tt) = indx + end do + end do + + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=i + excit(2,indx)=a + excit_class(indx)='c-v' + indx_tmp += 1 + list_idx_c_v(1,indx_tmp) = indx + list_idx_c_v(2,indx_tmp) = ii + list_idx_c_v(3,indx_tmp) = aa + mat_idx_c_v(ii,aa) = indx + end do + end do + + indx_tmp = 0 + do tt=1,n_act_orb + t=list_act(tt) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=t + excit(2,indx)=a + excit_class(indx)='a-v' + indx_tmp += 1 + list_idx_a_v(1,indx_tmp) = indx + list_idx_a_v(2,indx_tmp) = tt + list_idx_a_v(3,indx_tmp) = aa + mat_idx_a_v(tt,aa) = indx + end do + end do + + if (bavard) then + write(6,*) ' Filled the table of the Monoexcitations ' + do indx=1,nMonoEx + write(6,*) ' ex ',indx,' : ',excit(1,indx),' -> ' & + ,excit(2,indx),' ',excit_class(indx) + end do + end if + +END_PROVIDER + + BEGIN_PROVIDER [real*8, gradvec2, (nMonoEx)] +&BEGIN_PROVIDER [real*8, norm_grad_vec2] +&BEGIN_PROVIDER [real*8, norm_grad_vec2_tab, (3)] + BEGIN_DOC + ! calculate the orbital gradient from density + ! matrices and integrals; Siegbahn et al, Phys Scr 1980 + ! eqs 14 a,b,c + END_DOC + implicit none + integer :: i,t,a,indx + real*8 :: gradvec_it,gradvec_ia,gradvec_ta + + indx=0 + norm_grad_vec2_tab = 0.d0 + do i=1,n_core_inact_orb + do t=1,n_act_orb + indx+=1 + gradvec2(indx)=gradvec_it(i,t) + norm_grad_vec2_tab(1) += gradvec2(indx)*gradvec2(indx) + end do + end do + + do i=1,n_core_inact_orb + do a=1,n_virt_orb + indx+=1 + gradvec2(indx)=gradvec_ia(i,a) + norm_grad_vec2_tab(2) += gradvec2(indx)*gradvec2(indx) + end do + end do + + do t=1,n_act_orb + do a=1,n_virt_orb + indx+=1 + gradvec2(indx)=gradvec_ta(t,a) + norm_grad_vec2_tab(3) += gradvec2(indx)*gradvec2(indx) + end do + end do + + norm_grad_vec2=0.d0 + do indx=1,nMonoEx + norm_grad_vec2+=gradvec2(indx)*gradvec2(indx) + end do + do i = 1, 3 + norm_grad_vec2_tab(i) = dsqrt(norm_grad_vec2_tab(i)) + enddo + norm_grad_vec2=sqrt(norm_grad_vec2) + if(bavard)then + write(6,*) + write(6,*) ' Norm of the orbital gradient (via D, P and integrals): ', norm_grad_vec2 + write(6,*) + endif + +END_PROVIDER + +real*8 function gradvec_it(i,t) + BEGIN_DOC + ! the orbital gradient core/inactive -> active + ! we assume natural orbitals + END_DOC + implicit none + integer :: i,t + + integer :: ii,tt,v,vv,x,y + integer :: x3,y3 + + ii=list_core_inact(i) + tt=list_act(t) + gradvec_it=2.D0*(Fipq(tt,ii)+Fapq(tt,ii)) + gradvec_it-=occnum(tt)*Fipq(ii,tt) + do v=1,n_act_orb ! active + vv=list_act(v) + do x=1,n_act_orb ! active + x3=x+n_core_inact_orb ! list_act(x) + do y=1,n_act_orb ! active + y3=y+n_core_inact_orb ! list_act(y) + ! Gamma(2) a a a a 1/r12 i a a a + gradvec_it-=2.D0*P0tuvx_no(t,v,x,y)*bielec_PQxx_no(ii,vv,x3,y3) + end do + end do + end do + gradvec_it*=2.D0 +end function gradvec_it + +real*8 function gradvec_ia(i,a) + BEGIN_DOC + ! the orbital gradient core/inactive -> virtual + END_DOC + implicit none + integer :: i,a,ii,aa + + ii=list_core_inact(i) + aa=list_virt(a) + gradvec_ia=2.D0*(Fipq(aa,ii)+Fapq(aa,ii)) + gradvec_ia*=2.D0 + +end function gradvec_ia + +real*8 function gradvec_ta(t,a) + BEGIN_DOC + ! the orbital gradient active -> virtual + ! we assume natural orbitals + END_DOC + implicit none + integer :: t,a,tt,aa,v,vv,x,y + + tt=list_act(t) + aa=list_virt(a) + gradvec_ta=0.D0 + gradvec_ta+=occnum(tt)*Fipq(aa,tt) + do v=1,n_act_orb + do x=1,n_act_orb + do y=1,n_act_orb + gradvec_ta+=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,aa) + end do + end do + end do + gradvec_ta*=2.D0 + +end function gradvec_ta + diff --git a/src/casscf_cipsi/hessian.irp.f b/src/casscf_cipsi/hessian.irp.f new file mode 100644 index 00000000..458c6aa6 --- /dev/null +++ b/src/casscf_cipsi/hessian.irp.f @@ -0,0 +1,539 @@ +use bitmasks + +real*8 function hessmat_itju(i,t,j,u) + BEGIN_DOC + ! the orbital hessian for core/inactive -> active, core/inactive -> active + ! i, t, j, u are list indices, the corresponding orbitals are ii,tt,jj,uu + ! + ! we assume natural orbitals + END_DOC + implicit none + integer :: i,t,j,u,ii,tt,uu,v,vv,x,xx,y,jj + real*8 :: term,t2 + + ii=list_core_inact(i) + tt=list_act(t) + if (i.eq.j) then + if (t.eq.u) then + ! diagonal element + term=occnum(tt)*Fipq(ii,ii)+2.D0*(Fipq(tt,tt)+Fapq(tt,tt)) & + -2.D0*(Fipq(ii,ii)+Fapq(ii,ii)) + term+=2.D0*(3.D0*bielec_pxxq_no(tt,i,i,tt)-bielec_pqxx_no(tt,tt,i,i)) + term-=2.D0*occnum(tt)*(3.D0*bielec_pxxq_no(tt,i,i,tt) & + -bielec_pqxx_no(tt,tt,i,i)) + term-=occnum(tt)*Fipq(tt,tt) + do v=1,n_act_orb + vv=list_act(v) + do x=1,n_act_orb + xx=list_act(x) + term+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(vv,xx,i,i) & + +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))* & + bielec_pxxq_no(vv,i,i,xx)) + do y=1,n_act_orb + term-=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(t,v,y,xx) + end do + end do + end do + else + ! it/iu, t != u + uu=list_act(u) + term=2.D0*(Fipq(tt,uu)+Fapq(tt,uu)) + term+=2.D0*(4.D0*bielec_PxxQ_no(tt,i,j,uu)-bielec_PxxQ_no(uu,i,j,tt) & + -bielec_PQxx_no(tt,uu,i,j)) + term-=occnum(tt)*Fipq(uu,tt) + term-=(occnum(tt)+occnum(uu)) & + *(3.D0*bielec_PxxQ_no(tt,i,i,uu)-bielec_PQxx_no(uu,tt,i,i)) + do v=1,n_act_orb + vv=list_act(v) + ! term-=D0tu(u,v)*Fipq(tt,vv) ! published, but inverting t and u seems more correct + do x=1,n_act_orb + xx=list_act(x) + term+=2.D0*(P0tuvx_no(u,t,v,x)*bielec_pqxx_no(vv,xx,i,i) & + +(P0tuvx_no(u,x,v,t)+P0tuvx_no(u,x,t,v)) & + *bielec_pxxq_no(vv,i,i,xx)) + do y=1,n_act_orb + term-=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(u,v,y,xx) + end do + end do + end do + end if + else + ! it/ju + jj=list_core_inact(j) + uu=list_act(u) + if (t.eq.u) then + term=occnum(tt)*Fipq(ii,jj) + term-=2.D0*(Fipq(ii,jj)+Fapq(ii,jj)) + else + term=0.D0 + end if + term+=2.D0*(4.D0*bielec_PxxQ_no(tt,i,j,uu)-bielec_PxxQ_no(uu,i,j,tt) & + -bielec_PQxx_no(tt,uu,i,j)) + term-=(occnum(tt)+occnum(uu))* & + (4.D0*bielec_PxxQ_no(tt,i,j,uu)-bielec_PxxQ_no(uu,i,j,tt) & + -bielec_PQxx_no(uu,tt,i,j)) + do v=1,n_act_orb + vv=list_act(v) + do x=1,n_act_orb + xx=list_act(x) + term+=2.D0*(P0tuvx_no(u,t,v,x)*bielec_pqxx_no(vv,xx,i,j) & + +(P0tuvx_no(u,x,v,t)+P0tuvx_no(u,x,t,v)) & + *bielec_pxxq_no(vv,i,j,xx)) + end do + end do + end if + + term*=2.D0 + hessmat_itju=term + +end function hessmat_itju + +real*8 function hessmat_itja(i,t,j,a) + BEGIN_DOC + ! the orbital hessian for core/inactive -> active, core/inactive -> virtual + END_DOC + implicit none + integer :: i,t,j,a,ii,tt,jj,aa,v,vv,x,y + real*8 :: term + + ! it/ja + ii=list_core_inact(i) + tt=list_act(t) + jj=list_core_inact(j) + aa=list_virt(a) + term=2.D0*(4.D0*bielec_pxxq_no(aa,j,i,tt) & + -bielec_pqxx_no(aa,tt,i,j) -bielec_pxxq_no(aa,i,j,tt)) + term-=occnum(tt)*(4.D0*bielec_pxxq_no(aa,j,i,tt) & + -bielec_pqxx_no(aa,tt,i,j) -bielec_pxxq_no(aa,i,j,tt)) + if (i.eq.j) then + term+=2.D0*(Fipq(aa,tt)+Fapq(aa,tt)) + term-=0.5D0*occnum(tt)*Fipq(aa,tt) + do v=1,n_act_orb + do x=1,n_act_orb + do y=1,n_act_orb + term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,aa) + end do + end do + end do + end if + term*=2.D0 + hessmat_itja=term + +end function hessmat_itja + +real*8 function hessmat_itua(i,t,u,a) + BEGIN_DOC + ! the orbital hessian for core/inactive -> active, active -> virtual + END_DOC + implicit none + integer :: i,t,u,a,ii,tt,uu,aa,v,vv,x,xx,u3,t3,v3 + real*8 :: term + + ii=list_core_inact(i) + tt=list_act(t) + t3=t+n_core_inact_orb + uu=list_act(u) + u3=u+n_core_inact_orb + aa=list_virt(a) + if (t.eq.u) then + term=-occnum(tt)*Fipq(aa,ii) + else + term=0.D0 + end if + term-=occnum(uu)*(bielec_pqxx_no(aa,ii,t3,u3)-4.D0*bielec_pqxx_no(aa,uu,t3,i)& + +bielec_pxxq_no(aa,t3,u3,ii)) + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + integer :: x3 + xx=list_act(x) + x3=x+n_core_inact_orb + term-=2.D0*(P0tuvx_no(t,u,v,x)*bielec_pqxx_no(aa,ii,v3,x3) & + +(P0tuvx_no(t,v,u,x)+P0tuvx_no(t,v,x,u)) & + *bielec_pqxx_no(aa,xx,v3,i)) + end do + end do + if (t.eq.u) then + term+=Fipq(aa,ii)+Fapq(aa,ii) + end if + term*=2.D0 + hessmat_itua=term + +end function hessmat_itua + +real*8 function hessmat_iajb(i,a,j,b) + BEGIN_DOC + ! the orbital hessian for core/inactive -> virtual, core/inactive -> virtual + END_DOC + implicit none + integer :: i,a,j,b,ii,aa,jj,bb + real*8 :: term + + ii=list_core_inact(i) + aa=list_virt(a) + if (i.eq.j) then + if (a.eq.b) then + ! ia/ia + term=2.D0*(Fipq(aa,aa)+Fapq(aa,aa)-Fipq(ii,ii)-Fapq(ii,ii)) + term+=2.D0*(3.D0*bielec_pxxq_no(aa,i,i,aa)-bielec_pqxx_no(aa,aa,i,i)) + else + bb=list_virt(b) + ! ia/ib + term=2.D0*(Fipq(aa,bb)+Fapq(aa,bb)) + term+=2.D0*(3.D0*bielec_pxxq_no(aa,i,i,bb)-bielec_pqxx_no(aa,bb,i,i)) + end if + else + ! ia/jb + jj=list_core_inact(j) + bb=list_virt(b) + term=2.D0*(4.D0*bielec_pxxq_no(aa,i,j,bb)-bielec_pqxx_no(aa,bb,i,j) & + -bielec_pxxq_no(aa,j,i,bb)) + if (a.eq.b) then + term-=2.D0*(Fipq(ii,jj)+Fapq(ii,jj)) + end if + end if + term*=2.D0 + hessmat_iajb=term + +end function hessmat_iajb + +real*8 function hessmat_iatb(i,a,t,b) + BEGIN_DOC + ! the orbital hessian for core/inactive -> virtual, active -> virtual + END_DOC + implicit none + integer :: i,a,t,b,ii,aa,tt,bb,v,vv,x,y,v3,t3 + real*8 :: term + + ii=list_core_inact(i) + aa=list_virt(a) + tt=list_act(t) + bb=list_virt(b) + t3=t+n_core_inact_orb + term=occnum(tt)*(4.D0*bielec_pxxq_no(aa,i,t3,bb)-bielec_pxxq_no(aa,t3,i,bb)& + -bielec_pqxx_no(aa,bb,i,t3)) + if (a.eq.b) then + term-=Fipq(tt,ii)+Fapq(tt,ii) + term-=0.5D0*occnum(tt)*Fipq(tt,ii) + do v=1,n_act_orb + do x=1,n_act_orb + do y=1,n_act_orb + term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,ii) + end do + end do + end do + end if + term*=2.D0 + hessmat_iatb=term + +end function hessmat_iatb + +real*8 function hessmat_taub(t,a,u,b) + BEGIN_DOC + ! the orbital hessian for act->virt,act->virt + END_DOC + implicit none + integer :: t,a,u,b,tt,aa,uu,bb,v,vv,x,xx,y + integer :: v3,x3 + real*8 :: term,t1,t2,t3 + + tt=list_act(t) + aa=list_virt(a) + if (t == u) then + if (a == b) then + ! ta/ta + t1=occnum(tt)*Fipq(aa,aa) + t2=0.D0 + t3=0.D0 + t1-=occnum(tt)*Fipq(tt,tt) + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + t2+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,aa,v3,x3) & + +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))* & + bielec_pxxq_no(aa,x3,v3,aa)) + do y=1,n_act_orb + t3-=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(t,v,y,xx) + end do + end do + end do + term=t1+t2+t3 + else + bb=list_virt(b) + ! ta/tb b/=a + term=occnum(tt)*Fipq(aa,bb) + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + term+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,bb,v3,x3) & + +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v)) & + *bielec_pxxq_no(aa,x3,v3,bb)) + end do + end do + end if + else + ! ta/ub t/=u + uu=list_act(u) + bb=list_virt(b) + term=0.D0 + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + term+=2.D0*(P0tuvx_no(t,u,v,x)*bielec_pqxx_no(aa,bb,v3,x3) & + +(P0tuvx_no(t,x,v,u)+P0tuvx_no(t,x,u,v)) & + *bielec_pxxq_no(aa,x3,v3,bb)) + end do + end do + if (a.eq.b) then + term-=0.5D0*(occnum(tt)*Fipq(uu,tt)+occnum(uu)*Fipq(tt,uu)) + do v=1,n_act_orb + do y=1,n_act_orb + do x=1,n_act_orb + term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,uu) + term-=P0tuvx_no(u,v,x,y)*bielecCI_no(x,y,v,tt) + end do + end do + end do + end if + + end if + + term*=2.D0 + hessmat_taub=term + +end function hessmat_taub + +BEGIN_PROVIDER [real*8, hessdiag, (nMonoEx)] + BEGIN_DOC + ! the diagonal of the Hessian, needed for the Davidson procedure + END_DOC + implicit none + integer :: i,t,a,indx,indx_shift + real*8 :: hessmat_itju,hessmat_iajb,hessmat_taub + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessdiag,n_core_inact_orb,n_act_orb,n_virt_orb,nMonoEx) & + !$OMP PRIVATE(i,indx,t,a,indx_shift) + + !$OMP DO + do i=1,n_core_inact_orb + do t=1,n_act_orb + indx = t + (i-1)*n_act_orb + hessdiag(indx)=hessmat_itju(i,t,i,t) + end do + end do + !$OMP END DO NOWAIT + + indx_shift = n_core_inact_orb*n_act_orb + !$OMP DO + do a=1,n_virt_orb + do i=1,n_core_inact_orb + indx = a + (i-1)*n_virt_orb + indx_shift + hessdiag(indx)=hessmat_iajb(i,a,i,a) + end do + end do + !$OMP END DO NOWAIT + + indx_shift += n_core_inact_orb*n_virt_orb + !$OMP DO + do a=1,n_virt_orb + do t=1,n_act_orb + indx = a + (t-1)*n_virt_orb + indx_shift + hessdiag(indx)=hessmat_taub(t,a,t,a) + end do + end do + !$OMP END DO + !$OMP END PARALLEL + +END_PROVIDER + + +BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)] + implicit none + integer :: i,j,t,u,a,b + integer :: indx,indx_tmp, jndx, jndx_tmp + integer :: ustart,bstart + real*8 :: hessmat_itju + real*8 :: hessmat_itja + real*8 :: hessmat_itua + real*8 :: hessmat_iajb + real*8 :: hessmat_iatb + real*8 :: hessmat_taub + ! c-a c-v a-v + ! c-a | X X X + ! c-v | X X + ! a-v | X + + provide mo_two_e_integrals_in_map + + hessmat = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_a_prov,list_idx_c_a,n_core_inact_orb,n_act_orb,mat_idx_c_a) & + !$OMP PRIVATE(indx_tmp,indx,i,t,j,u,ustart,jndx) + + !$OMP DO +!!!! < Core-active| H |Core-active > + ! Core-active excitations + do indx_tmp = 1, n_c_a_prov + indx = list_idx_c_a(1,indx_tmp) + i = list_idx_c_a(2,indx_tmp) + t = list_idx_c_a(3,indx_tmp) + ! Core-active excitations + do j = 1, n_core_inact_orb + if (i.eq.j) then + ustart=t + else + ustart=1 + end if + do u=ustart,n_act_orb + jndx = mat_idx_c_a(j,u) + hessmat(jndx,indx) = hessmat_itju(i,t,j,u) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_a_prov,n_c_v_prov,list_idx_c_a,list_idx_c_v) & + !$OMP PRIVATE(indx_tmp,jndx_tmp,indx,i,t,j,a,jndx) + + !$OMP DO +!!!! < Core-active| H |Core-VIRTUAL > + ! Core-active excitations + do indx_tmp = 1, n_c_a_prov + indx = list_idx_c_a(1,indx_tmp) + i = list_idx_c_a(2,indx_tmp) + t = list_idx_c_a(3,indx_tmp) + ! Core-VIRTUAL excitations + do jndx_tmp = 1, n_c_v_prov + jndx = list_idx_c_v(1,jndx_tmp) + j = list_idx_c_v(2,jndx_tmp) + a = list_idx_c_v(3,jndx_tmp) + hessmat(jndx,indx) = hessmat_itja(i,t,j,a) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_a_prov,n_a_v_prov,list_idx_c_a,list_idx_a_v) & + !$OMP PRIVATE(indx_tmp,jndx_tmp,indx,i,t,u,a,jndx) + + !$OMP DO +!!!! < Core-active| H |ACTIVE-VIRTUAL > + ! Core-active excitations + do indx_tmp = 1, n_c_a_prov + indx = list_idx_c_a(1,indx_tmp) + i = list_idx_c_a(2,indx_tmp) + t = list_idx_c_a(3,indx_tmp) + ! ACTIVE-VIRTUAL excitations + do jndx_tmp = 1, n_a_v_prov + jndx = list_idx_a_v(1,jndx_tmp) + u = list_idx_a_v(2,jndx_tmp) + a = list_idx_a_v(3,jndx_tmp) + hessmat(jndx,indx) = hessmat_itua(i,t,u,a) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + + if(hess_cv_cv)then + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_v_prov,list_idx_c_v,n_core_inact_orb,n_virt_orb,mat_idx_c_v) & + !$OMP PRIVATE(indx_tmp,indx,i,a,j,b,bstart,jndx) + !$OMP DO +!!!!! < Core-VIRTUAL | H |Core-VIRTUAL > + ! Core-VIRTUAL excitations + do indx_tmp = 1, n_c_v_prov + indx = list_idx_c_v(1,indx_tmp) + i = list_idx_c_v(2,indx_tmp) + a = list_idx_c_v(3,indx_tmp) + ! Core-VIRTUAL excitations + do j = 1, n_core_inact_orb + if (i.eq.j) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + jndx = mat_idx_c_v(j,b) + hessmat(jndx,indx) = hessmat_iajb(i,a,j,b) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + enddo + + !$OMP END DO NOWAIT + !$OMP END PARALLEL + endif + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_v_prov,n_a_v_prov,list_idx_c_v,list_idx_a_v) & + !$OMP PRIVATE(indx_tmp,jndx_tmp,indx,i,a,t,b,jndx) + + !$OMP DO +!!!! < Core-VIRTUAL | H |Active-VIRTUAL > + ! Core-VIRTUAL excitations + do indx_tmp = 1, n_c_v_prov + indx = list_idx_c_v(1,indx_tmp) + i = list_idx_c_v(2,indx_tmp) + a = list_idx_c_v(3,indx_tmp) + ! Active-VIRTUAL excitations + do jndx_tmp = 1, n_a_v_prov + jndx = list_idx_a_v(1,jndx_tmp) + t = list_idx_a_v(2,jndx_tmp) + b = list_idx_a_v(3,jndx_tmp) + hessmat(jndx,indx) = hessmat_iatb(i,a,t,b) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_a_v_prov,list_idx_a_v,n_act_orb,n_virt_orb,mat_idx_a_v) & + !$OMP PRIVATE(indx_tmp,indx,t,a,u,b,bstart,jndx) + + !$OMP DO +!!!! < Active-VIRTUAL | H |Active-VIRTUAL > + ! Active-VIRTUAL excitations + do indx_tmp = 1, n_a_v_prov + indx = list_idx_a_v(1,indx_tmp) + t = list_idx_a_v(2,indx_tmp) + a = list_idx_a_v(3,indx_tmp) + ! Active-VIRTUAL excitations + do u=t,n_act_orb + if (t.eq.u) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + jndx = mat_idx_a_v(u,b) + hessmat(jndx,indx) = hessmat_taub(t,a,u,b) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + +END_PROVIDER diff --git a/src/casscf_cipsi/hessian_old.irp.f b/src/casscf_cipsi/hessian_old.irp.f new file mode 100644 index 00000000..d17f1f0a --- /dev/null +++ b/src/casscf_cipsi/hessian_old.irp.f @@ -0,0 +1,310 @@ + +use bitmasks +BEGIN_PROVIDER [real*8, hessmat_old, (nMonoEx,nMonoEx)] + BEGIN_DOC + ! calculate the orbital hessian 2 + ! + + by hand, + ! determinant per determinant, as for the gradient + ! + ! we assume that we have natural active orbitals + END_DOC + implicit none + integer :: indx,ihole,ipart + integer :: jndx,jhole,jpart + character*3 :: iexc,jexc + real*8 :: res + + if (bavard) then + write(6,*) ' providing Hessian matrix hessmat_old ' + write(6,*) ' nMonoEx = ',nMonoEx + endif + + do indx=1,nMonoEx + do jndx=1,nMonoEx + hessmat_old(indx,jndx)=0.D0 + end do + end do + + do indx=1,nMonoEx + ihole=excit(1,indx) + ipart=excit(2,indx) + iexc=excit_class(indx) + do jndx=indx,nMonoEx + jhole=excit(1,jndx) + jpart=excit(2,jndx) + jexc=excit_class(jndx) + call calc_hess_elem(ihole,ipart,jhole,jpart,res) + hessmat_old(indx,jndx)=res + hessmat_old(jndx,indx)=res + end do + end do + +END_PROVIDER + +subroutine calc_hess_elem(ihole,ipart,jhole,jpart,res) + BEGIN_DOC + ! eq 19 of Siegbahn et al, Physica Scripta 1980 + ! we calculate 2 + ! + + + ! average over all states is performed. + ! no transition between states. + END_DOC + implicit none + integer :: ihole,ipart,ispin,mu,istate + integer :: jhole,jpart,jspin + integer :: mu_pq, mu_pqrs, mu_rs, mu_rspq, nu_rs,nu + real*8 :: res + integer(bit_kind), allocatable :: det_mu(:,:) + integer(bit_kind), allocatable :: det_nu(:,:) + integer(bit_kind), allocatable :: det_mu_pq(:,:) + integer(bit_kind), allocatable :: det_mu_rs(:,:) + integer(bit_kind), allocatable :: det_nu_rs(:,:) + integer(bit_kind), allocatable :: det_mu_pqrs(:,:) + integer(bit_kind), allocatable :: det_mu_rspq(:,:) + real*8 :: i_H_psi_array(N_states),phase,phase2,phase3 + real*8 :: i_H_j_element + allocate(det_mu(N_int,2)) + allocate(det_nu(N_int,2)) + allocate(det_mu_pq(N_int,2)) + allocate(det_mu_rs(N_int,2)) + allocate(det_nu_rs(N_int,2)) + allocate(det_mu_pqrs(N_int,2)) + allocate(det_mu_rspq(N_int,2)) + integer :: mu_pq_possible + integer :: mu_rs_possible + integer :: nu_rs_possible + integer :: mu_pqrs_possible + integer :: mu_rspq_possible + + res=0.D0 + + ! the terms <0|E E H |0> + do mu=1,n_det + ! get the string of the determinant + call det_extract(det_mu,mu,N_int) + do ispin=1,2 + ! do the monoexcitation pq on it + call det_copy(det_mu,det_mu_pq,N_int) + call do_signed_mono_excitation(det_mu,det_mu_pq,mu_pq & + ,ihole,ipart,ispin,phase,mu_pq_possible) + if (mu_pq_possible.eq.1) then + ! possible, but not necessarily in the list + ! do the second excitation + do jspin=1,2 + call det_copy(det_mu_pq,det_mu_pqrs,N_int) + call do_signed_mono_excitation(det_mu_pq,det_mu_pqrs,mu_pqrs& + ,jhole,jpart,jspin,phase2,mu_pqrs_possible) + ! excitation possible + if (mu_pqrs_possible.eq.1) then + call i_H_psi(det_mu_pqrs,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase*phase2 + end do + end if + ! try the de-excitation with opposite sign + call det_copy(det_mu_pq,det_mu_pqrs,N_int) + call do_signed_mono_excitation(det_mu_pq,det_mu_pqrs,mu_pqrs& + ,jpart,jhole,jspin,phase2,mu_pqrs_possible) + phase2=-phase2 + ! excitation possible + if (mu_pqrs_possible.eq.1) then + call i_H_psi(det_mu_pqrs,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase*phase2 + end do + end if + end do + end if + ! exchange the notion of pq and rs + ! do the monoexcitation rs on the initial determinant + call det_copy(det_mu,det_mu_rs,N_int) + call do_signed_mono_excitation(det_mu,det_mu_rs,mu_rs & + ,jhole,jpart,ispin,phase2,mu_rs_possible) + if (mu_rs_possible.eq.1) then + ! do the second excitation + do jspin=1,2 + call det_copy(det_mu_rs,det_mu_rspq,N_int) + call do_signed_mono_excitation(det_mu_rs,det_mu_rspq,mu_rspq& + ,ihole,ipart,jspin,phase3,mu_rspq_possible) + ! excitation possible (of course, the result is outside the CAS) + if (mu_rspq_possible.eq.1) then + call i_H_psi(det_mu_rspq,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase2*phase3 + end do + end if + ! we may try the de-excitation, with opposite sign + call det_copy(det_mu_rs,det_mu_rspq,N_int) + call do_signed_mono_excitation(det_mu_rs,det_mu_rspq,mu_rspq& + ,ipart,ihole,jspin,phase3,mu_rspq_possible) + phase3=-phase3 + ! excitation possible (of course, the result is outside the CAS) + if (mu_rspq_possible.eq.1) then + call i_H_psi(det_mu_rspq,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase2*phase3 + end do + end if + end do + end if + ! + ! the operator E H E, we have to do a double loop over the determinants + ! we still have the determinant mu_pq and the phase in memory + if (mu_pq_possible.eq.1) then + do nu=1,N_det + call det_extract(det_nu,nu,N_int) + do jspin=1,2 + call det_copy(det_nu,det_nu_rs,N_int) + call do_signed_mono_excitation(det_nu,det_nu_rs,nu_rs & + ,jhole,jpart,jspin,phase2,nu_rs_possible) + ! excitation possible ? + if (nu_rs_possible.eq.1) then + call i_H_j(det_mu_pq,det_nu_rs,N_int,i_H_j_element) + do istate=1,N_states + res+=2.D0*i_H_j_element*psi_coef(mu,istate) & + *psi_coef(nu,istate)*phase*phase2 + end do + end if + end do + end do + end if + end do + end do + + ! state-averaged Hessian + res*=1.D0/dble(N_states) + +end subroutine calc_hess_elem + +BEGIN_PROVIDER [real*8, hessmat_peter, (nMonoEx,nMonoEx)] + BEGIN_DOC + ! explicit hessian matrix from density matrices and integrals + ! of course, this will be used for a direct Davidson procedure later + ! we will not store the matrix in real life + ! formulas are broken down as functions for the 6 classes of matrix elements + ! + END_DOC + implicit none + integer :: i,j,t,u,a,b,indx,jndx,bstart,ustart,indx_shift + + real*8 :: hessmat_itju + real*8 :: hessmat_itja + real*8 :: hessmat_itua + real*8 :: hessmat_iajb + real*8 :: hessmat_iatb + real*8 :: hessmat_taub + + if (bavard) then + write(6,*) ' providing Hessian matrix hessmat_peter ' + write(6,*) ' nMonoEx = ',nMonoEx + endif + provide mo_two_e_integrals_in_map + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat_peter,n_core_inact_orb,n_act_orb,n_virt_orb,nMonoEx) & + !$OMP PRIVATE(i,indx,jndx,j,ustart,t,u,a,bstart,indx_shift) + + !$OMP DO + ! (DOUBLY OCCUPIED ---> ACT ) + do i=1,n_core_inact_orb + do t=1,n_act_orb + indx = t + (i-1)*n_act_orb + jndx=indx + ! (DOUBLY OCCUPIED ---> ACT ) + do j=i,n_core_inact_orb + if (i.eq.j) then + ustart=t + else + ustart=1 + end if + do u=ustart,n_act_orb + hessmat_peter(jndx,indx)=hessmat_itju(i,t,j,u) + jndx+=1 + end do + end do + ! (DOUBLY OCCUPIED ---> VIRTUAL) + do j=1,n_core_inact_orb + do a=1,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_itja(i,t,j,a) + jndx+=1 + end do + end do + ! (ACTIVE ---> VIRTUAL) + do u=1,n_act_orb + do a=1,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_itua(i,t,u,a) + jndx+=1 + end do + end do + end do + end do + !$OMP END DO NOWAIT + + indx_shift = n_core_inact_orb*n_act_orb + !$OMP DO + ! (DOUBLY OCCUPIED ---> VIRTUAL) + do a=1,n_virt_orb + do i=1,n_core_inact_orb + indx = a + (i-1)*n_virt_orb + indx_shift + jndx=indx + ! (DOUBLY OCCUPIED ---> VIRTUAL) + do j=i,n_core_inact_orb + if (i.eq.j) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_iajb(i,a,j,b) + jndx+=1 + end do + end do + ! (ACT ---> VIRTUAL) + do t=1,n_act_orb + do b=1,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_iatb(i,a,t,b) + jndx+=1 + end do + end do + end do + end do + !$OMP END DO NOWAIT + + indx_shift += n_core_inact_orb*n_virt_orb + !$OMP DO + ! (ACT ---> VIRTUAL) + do a=1,n_virt_orb + do t=1,n_act_orb + indx = a + (t-1)*n_virt_orb + indx_shift + jndx=indx + ! (ACT ---> VIRTUAL) + do u=t,n_act_orb + if (t.eq.u) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_taub(t,a,u,b) + jndx+=1 + end do + end do + end do + end do + !$OMP END DO + + !$OMP END PARALLEL + + do jndx=1,nMonoEx + do indx=1,jndx-1 + hessmat_peter(indx,jndx) = hessmat_peter(jndx,indx) + enddo + enddo + + +END_PROVIDER + diff --git a/src/casscf_cipsi/mcscf_fock.irp.f b/src/casscf_cipsi/mcscf_fock.irp.f new file mode 100644 index 00000000..e4568405 --- /dev/null +++ b/src/casscf_cipsi/mcscf_fock.irp.f @@ -0,0 +1,80 @@ +BEGIN_PROVIDER [real*8, Fipq, (mo_num,mo_num) ] + BEGIN_DOC + ! the inactive Fock matrix, in molecular orbitals + END_DOC + implicit none + integer :: p,q,k,kk,t,tt,u,uu + + do q=1,mo_num + do p=1,mo_num + Fipq(p,q)=one_ints_no(p,q) + end do + end do + + ! the inactive Fock matrix + do k=1,n_core_inact_orb + kk=list_core_inact(k) + do q=1,mo_num + do p=1,mo_num + Fipq(p,q)+=2.D0*bielec_pqxx_no(p,q,k,k) -bielec_pxxq_no(p,k,k,q) + end do + end do + end do + + if (bavard) then + integer :: i + write(6,*) + write(6,*) ' the diagonal of the inactive effective Fock matrix ' + write(6,'(5(i3,F12.5))') (i,Fipq(i,i),i=1,mo_num) + write(6,*) + end if + + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ] + BEGIN_DOC + ! the active active Fock matrix, in molecular orbitals + ! we create them in MOs, quite expensive + ! + ! for an implementation in AOs we need first the natural orbitals + ! for forming an active density matrix in AOs + ! + END_DOC + implicit none + integer :: p,q,k,kk,t,tt,u,uu + + Fapq = 0.d0 + + ! the active Fock matrix, D0tu is diagonal + do t=1,n_act_orb + tt=list_act(t) + do q=1,mo_num + do p=1,mo_num + Fapq(p,q)+=occnum(tt) & + *(bielec_pqxx_no(p,q,tt,tt)-0.5D0*bielec_pxxq_no(p,tt,tt,q)) + end do + end do + end do + + if (bavard) then + integer :: i + write(6,*) + write(6,*) ' the effective Fock matrix over MOs' + write(6,*) + + write(6,*) + write(6,*) ' the diagonal of the inactive effective Fock matrix ' + write(6,'(5(i3,F12.5))') (i,Fipq(i,i),i=1,mo_num) + write(6,*) + write(6,*) + write(6,*) ' the diagonal of the active Fock matrix ' + write(6,'(5(i3,F12.5))') (i,Fapq(i,i),i=1,mo_num) + write(6,*) + end if + + +END_PROVIDER + + diff --git a/src/casscf_cipsi/natorb.irp.f b/src/casscf_cipsi/natorb.irp.f new file mode 100644 index 00000000..9ce90304 --- /dev/null +++ b/src/casscf_cipsi/natorb.irp.f @@ -0,0 +1,231 @@ + BEGIN_PROVIDER [real*8, occnum, (mo_num)] + implicit none + BEGIN_DOC + ! MO occupation numbers + END_DOC + + integer :: i + occnum=0.D0 + do i=1,n_core_inact_orb + occnum(list_core_inact(i))=2.D0 + end do + + do i=1,n_act_orb + occnum(list_act(i))=occ_act(i) + end do + + if (bavard) then + write(6,*) ' occupation numbers ' + do i=1,mo_num + write(6,*) i,occnum(i) + end do + endif + +END_PROVIDER + + + BEGIN_PROVIDER [ real*8, natorbsCI, (n_act_orb,n_act_orb) ] +&BEGIN_PROVIDER [ real*8, occ_act, (n_act_orb) ] + implicit none + BEGIN_DOC + ! Natural orbitals of CI + END_DOC + integer :: i, j + double precision :: Vt(n_act_orb,n_act_orb) + +! call lapack_diag(occ_act,natorbsCI,D0tu,n_act_orb,n_act_orb) + call svd(D0tu, size(D0tu,1), natorbsCI,size(natorbsCI,1), occ_act, Vt, size(Vt,1),n_act_orb,n_act_orb) + + if (bavard) then + write(6,*) ' found occupation numbers as ' + do i=1,n_act_orb + write(6,*) i,occ_act(i) + end do + + integer :: nmx + real*8 :: xmx + do i=1,n_act_orb + ! largest element of the eigenvector should be positive + xmx=0.D0 + nmx=0 + do j=1,n_act_orb + if (abs(natOrbsCI(j,i)).gt.xmx) then + nmx=j + xmx=abs(natOrbsCI(j,i)) + end if + end do + xmx=sign(1.D0,natOrbsCI(nmx,i)) + do j=1,n_act_orb + natOrbsCI(j,i)*=xmx + end do + + write(6,*) ' Eigenvector No ',i + write(6,'(5(I3,F12.5))') (j,natOrbsCI(j,i),j=1,n_act_orb) + end do + end if + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, P0tuvx_no, (n_act_orb,n_act_orb,n_act_orb,n_act_orb)] + implicit none + BEGIN_DOC + ! 4-index transformation of 2part matrices + END_DOC + integer :: i,j,k,l,p,q + real*8 :: d(n_act_orb) + + ! index per index + ! first quarter + P0tuvx_no(:,:,:,:) = P0tuvx(:,:,:,:) + + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(q,j,k,l)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(p,j,k,l)=d(p) + end do + end do + end do + end do + ! 2nd quarter + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(j,q,k,l)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(j,p,k,l)=d(p) + end do + end do + end do + end do + ! 3rd quarter + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(j,k,q,l)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(j,k,p,l)=d(p) + end do + end do + end do + end do + ! 4th quarter + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(j,k,l,q)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(j,k,l,p)=d(p) + end do + end do + end do + end do + +END_PROVIDER + + + +BEGIN_PROVIDER [real*8, one_ints_no, (mo_num,mo_num)] + implicit none + BEGIN_DOC + ! Transformed one-e integrals + END_DOC + integer :: i,j, p, q + real*8 :: d(n_act_orb) + one_ints_no(:,:)=mo_one_e_integrals(:,:) + + ! 1st half-trf + do j=1,mo_num + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=one_ints_no(list_act(q),j)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + one_ints_no(list_act(p),j)=d(p) + end do + end do + + ! 2nd half-trf + do j=1,mo_num + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=one_ints_no(j,list_act(q))*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + one_ints_no(j,list_act(p))=d(p) + end do + end do +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, NatOrbsCI_mos, (mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Rotation matrix from current MOs to the CI natural MOs + END_DOC + integer :: p,q + + NatOrbsCI_mos(:,:) = 0.d0 + + do q = 1,mo_num + NatOrbsCI_mos(q,q) = 1.d0 + enddo + + do q = 1,n_act_orb + do p = 1,n_act_orb + NatOrbsCI_mos(list_act(p),list_act(q)) = natorbsCI(p,q) + enddo + enddo +END_PROVIDER + + +BEGIN_PROVIDER [real*8, NatOrbsFCI, (ao_num,mo_num)] + implicit none + BEGIN_DOC +! FCI natural orbitals + END_DOC + + call dgemm('N','N', ao_num,mo_num,mo_num,1.d0, & + mo_coef, size(mo_coef,1), & + NatOrbsCI_mos, size(NatOrbsCI_mos,1), 0.d0, & + NatOrbsFCI, size(NatOrbsFCI,1)) +END_PROVIDER + diff --git a/src/casscf_cipsi/neworbs.irp.f b/src/casscf_cipsi/neworbs.irp.f new file mode 100644 index 00000000..a7cebbb2 --- /dev/null +++ b/src/casscf_cipsi/neworbs.irp.f @@ -0,0 +1,253 @@ + BEGIN_PROVIDER [real*8, SXmatrix, (nMonoEx+1,nMonoEx+1)] +&BEGIN_PROVIDER [integer, n_guess_sx_mat ] + implicit none + BEGIN_DOC + ! Single-excitation matrix + END_DOC + + integer :: i,j + + do i=1,nMonoEx+1 + do j=1,nMonoEx+1 + SXmatrix(i,j)=0.D0 + end do + end do + + do i=1,nMonoEx + SXmatrix(1,i+1)=gradvec2(i) + SXmatrix(1+i,1)=gradvec2(i) + end do + if(diag_hess_cas)then + do i = 1, nMonoEx + SXmatrix(i+1,i+1) = hessdiag(i) + enddo + else + do i=1,nMonoEx + do j=1,nMonoEx + SXmatrix(i+1,j+1)=hessmat(i,j) + SXmatrix(j+1,i+1)=hessmat(i,j) + end do + end do + endif + + do i = 1, nMonoEx + SXmatrix(i+1,i+1) += level_shift_casscf + enddo + n_guess_sx_mat = 1 + do i = 1, nMonoEx + if(SXmatrix(i+1,i+1).lt.0.d0 )then + n_guess_sx_mat += 1 + endif + enddo + if (bavard) then + do i=2,nMonoEx + write(6,*) ' diagonal of the Hessian : ',i,hessmat(i,i) + end do + end if + +END_PROVIDER + + BEGIN_PROVIDER [real*8, SXeigenvec, (nMonoEx+1,nMonoEx+1)] +&BEGIN_PROVIDER [real*8, SXeigenval, (nMonoEx+1)] + implicit none + BEGIN_DOC + ! Eigenvectors/eigenvalues of the single-excitation matrix + END_DOC + if(nMonoEx+1.gt.n_det_max_full)then + if(bavard)then + print*,'Using the Davidson algorithm to diagonalize the SXmatrix' + endif + double precision, allocatable :: u_in(:,:),energies(:) + allocate(u_in(nMonoEx+1,n_states_diag),energies(n_guess_sx_mat)) + call davidson_diag_sx_mat(n_guess_sx_mat, u_in, energies) + integer :: i,j + SXeigenvec = 0.d0 + SXeigenval = 0.d0 + do i = 1, n_guess_sx_mat + SXeigenval(i) = energies(i) + do j = 1, nMonoEx+1 + SXeigenvec(j,i) = u_in(j,i) + enddo + enddo + else + if(bavard)then + print*,'Diagonalize the SXmatrix with Jacobi' + endif + call lapack_diag(SXeigenval,SXeigenvec,SXmatrix,nMonoEx+1,nMonoEx+1) + endif + if (bavard) then + write(6,*) ' SXdiag : lowest eigenvalues ' + write(6,*) ' 1 - ',SXeigenval(1),SXeigenvec(1,1) + if(n_guess_sx_mat.gt.0)then + write(6,*) ' 2 - ',SXeigenval(2),SXeigenvec(1,2) + write(6,*) ' 3 - ',SXeigenval(3),SXeigenvec(1,3) + write(6,*) ' 4 - ',SXeigenval(4),SXeigenvec(1,4) + write(6,*) ' 5 - ',SXeigenval(5),SXeigenvec(1,5) + endif + write(6,*) + write(6,*) ' SXdiag : lowest eigenvalue = ',SXeigenval(1) + endif +END_PROVIDER + + BEGIN_PROVIDER [real*8, energy_improvement] + implicit none + if(state_following_casscf)then + energy_improvement = SXeigenval(best_vector_ovrlp_casscf) + else + energy_improvement = SXeigenval(1) + endif + END_PROVIDER + + + + BEGIN_PROVIDER [ integer, best_vector_ovrlp_casscf ] +&BEGIN_PROVIDER [ double precision, best_overlap_casscf ] + implicit none + integer :: i + double precision :: c0 + best_overlap_casscf = 0.D0 + best_vector_ovrlp_casscf = -1000 + do i=1,nMonoEx+1 + if (SXeigenval(i).lt.0.D0) then + if (dabs(SXeigenvec(1,i)).gt.best_overlap_casscf) then + best_overlap_casscf=dabs(SXeigenvec(1,i)) + best_vector_ovrlp_casscf = i + end if + end if + end do + if(best_vector_ovrlp_casscf.lt.0)then + best_vector_ovrlp_casscf = minloc(SXeigenval,nMonoEx+1) + endif + c0=SXeigenvec(1,best_vector_ovrlp_casscf) + if (bavard) then + write(6,*) ' SXdiag : eigenvalue for best overlap with ' + write(6,*) ' previous orbitals = ',SXeigenval(best_vector_ovrlp_casscf) + write(6,*) ' weight of the 1st element ',c0 + endif + END_PROVIDER + + BEGIN_PROVIDER [double precision, SXvector, (nMonoEx+1)] + implicit none + BEGIN_DOC + ! Best eigenvector of the single-excitation matrix + END_DOC + integer :: i + double precision :: c0 + c0=SXeigenvec(1,best_vector_ovrlp_casscf) + do i=1,nMonoEx+1 + SXvector(i)=SXeigenvec(i,best_vector_ovrlp_casscf)/c0 + end do + END_PROVIDER + + +BEGIN_PROVIDER [double precision, NewOrbs, (ao_num,mo_num) ] + implicit none + BEGIN_DOC + ! Updated orbitals + END_DOC + integer :: i,j,ialph + + if(state_following_casscf)then + print*,'Using the state following casscf ' + call dgemm('N','T', ao_num,mo_num,mo_num,1.d0, & + NatOrbsFCI, size(NatOrbsFCI,1), & + Umat, size(Umat,1), 0.d0, & + NewOrbs, size(NewOrbs,1)) + + level_shift_casscf *= 0.5D0 + level_shift_casscf = max(level_shift_casscf,0.002d0) + !touch level_shift_casscf + else + if(best_vector_ovrlp_casscf.ne.1.and.n_orb_swap.ne.0)then + print*,'Taking the lowest root for the CASSCF' + print*,'!!! SWAPPING MOS !!!!!!' + level_shift_casscf *= 2.D0 + level_shift_casscf = min(level_shift_casscf,0.5d0) + print*,'level_shift_casscf = ',level_shift_casscf + NewOrbs = switch_mo_coef + !mo_coef = switch_mo_coef + !soft_touch mo_coef + !call save_mos_no_occ + !stop + else + level_shift_casscf *= 0.5D0 + level_shift_casscf = max(level_shift_casscf,0.002d0) + !touch level_shift_casscf + call dgemm('N','T', ao_num,mo_num,mo_num,1.d0, & + NatOrbsFCI, size(NatOrbsFCI,1), & + Umat, size(Umat,1), 0.d0, & + NewOrbs, size(NewOrbs,1)) + endif + endif + +END_PROVIDER + +BEGIN_PROVIDER [real*8, Umat, (mo_num,mo_num) ] + implicit none + BEGIN_DOC + ! Orbital rotation matrix + END_DOC + integer :: i,j,indx,k,iter,t,a,ii,tt,aa + logical :: converged + + real*8 :: Tpotmat (mo_num,mo_num), Tpotmat2 (mo_num,mo_num) + real*8 :: Tmat(mo_num,mo_num) + real*8 :: f + + ! the orbital rotation matrix T + Tmat(:,:)=0.D0 + indx=1 + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do t=1,n_act_orb + tt=list_act(t) + indx+=1 + Tmat(ii,tt)= SXvector(indx) + Tmat(tt,ii)=-SXvector(indx) + end do + end do + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do a=1,n_virt_orb + aa=list_virt(a) + indx+=1 + Tmat(ii,aa)= SXvector(indx) + Tmat(aa,ii)=-SXvector(indx) + end do + end do + do t=1,n_act_orb + tt=list_act(t) + do a=1,n_virt_orb + aa=list_virt(a) + indx+=1 + Tmat(tt,aa)= SXvector(indx) + Tmat(aa,tt)=-SXvector(indx) + end do + end do + + ! Form the exponential + + Tpotmat(:,:)=0.D0 + Umat(:,:) =0.D0 + do i=1,mo_num + Tpotmat(i,i)=1.D0 + Umat(i,i) =1.d0 + end do + iter=0 + converged=.false. + do while (.not.converged) + iter+=1 + f = 1.d0 / dble(iter) + Tpotmat2(:,:) = Tpotmat(:,:) * f + call dgemm('N','N', mo_num,mo_num,mo_num,1.d0, & + Tpotmat2, size(Tpotmat2,1), & + Tmat, size(Tmat,1), 0.d0, & + Tpotmat, size(Tpotmat,1)) + Umat(:,:) = Umat(:,:) + Tpotmat(:,:) + + converged = ( sum(abs(Tpotmat(:,:))) < 1.d-6).or.(iter>30) + end do +END_PROVIDER + + + diff --git a/src/casscf_cipsi/reorder_orb.irp.f b/src/casscf_cipsi/reorder_orb.irp.f new file mode 100644 index 00000000..3cb90522 --- /dev/null +++ b/src/casscf_cipsi/reorder_orb.irp.f @@ -0,0 +1,70 @@ +subroutine reorder_orbitals_for_casscf + implicit none + BEGIN_DOC +! routine that reorders the orbitals of the CASSCF in terms block of core, active and virtual + END_DOC + integer :: i,j,iorb + integer, allocatable :: iorder(:),array(:) + allocate(iorder(mo_num),array(mo_num)) + do i = 1, n_core_orb + iorb = list_core(i) + array(iorb) = i + enddo + + do i = 1, n_inact_orb + iorb = list_inact(i) + array(iorb) = mo_num + i + enddo + + do i = 1, n_act_orb + iorb = list_act(i) + array(iorb) = 2 * mo_num + i + enddo + + do i = 1, n_virt_orb + iorb = list_virt(i) + array(iorb) = 3 * mo_num + i + enddo + + do i = 1, mo_num + iorder(i) = i + enddo + call isort(array,iorder,mo_num) + double precision, allocatable :: mo_coef_new(:,:) + allocate(mo_coef_new(ao_num,mo_num)) + do i = 1, mo_num + mo_coef_new(:,i) = mo_coef(:,iorder(i)) + enddo + mo_coef = mo_coef_new + touch mo_coef + + list_core_reverse = 0 + do i = 1, n_core_orb + list_core(i) = i + list_core_reverse(i) = i + mo_class(i) = "Core" + enddo + + list_inact_reverse = 0 + do i = 1, n_inact_orb + list_inact(i) = i + n_core_orb + list_inact_reverse(i+n_core_orb) = i + mo_class(i+n_core_orb) = "Inactive" + enddo + + list_act_reverse = 0 + do i = 1, n_act_orb + list_act(i) = n_core_inact_orb + i + list_act_reverse(n_core_inact_orb + i) = i + mo_class(n_core_inact_orb + i) = "Active" + enddo + + list_virt_reverse = 0 + do i = 1, n_virt_orb + list_virt(i) = n_core_inact_orb + n_act_orb + i + list_virt_reverse(n_core_inact_orb + n_act_orb + i) = i + mo_class(n_core_inact_orb + n_act_orb + i) = "Virtual" + enddo + touch list_core_reverse list_core list_inact list_inact_reverse list_act list_act_reverse list_virt list_virt_reverse + +end diff --git a/src/casscf_cipsi/save_energy.irp.f b/src/casscf_cipsi/save_energy.irp.f new file mode 100644 index 00000000..8729c5af --- /dev/null +++ b/src/casscf_cipsi/save_energy.irp.f @@ -0,0 +1,9 @@ +subroutine save_energy(E,pt2) + implicit none + BEGIN_DOC +! Saves the energy in |EZFIO|. + END_DOC + double precision, intent(in) :: E(N_states), pt2(N_states) + call ezfio_set_casscf_energy(E(1:N_states)) + call ezfio_set_casscf_energy_pt2(E(1:N_states)+pt2(1:N_states)) +end diff --git a/src/casscf_cipsi/superci_dm.irp.f b/src/casscf_cipsi/superci_dm.irp.f new file mode 100644 index 00000000..ee831c35 --- /dev/null +++ b/src/casscf_cipsi/superci_dm.irp.f @@ -0,0 +1,207 @@ + BEGIN_PROVIDER [double precision, super_ci_dm, (mo_num,mo_num)] + implicit none + BEGIN_DOC +! density matrix of the super CI matrix, in the basis of NATURAL ORBITALS OF THE CASCI WF +! +! This is obtained from annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 +! +! WARNING ::: in the equation B3.d there is a TYPO with a forgotten MINUS SIGN (see variable mat_tmp_dm_super_ci ) + END_DOC + super_ci_dm = 0.d0 + integer :: i,j,iorb,jorb + integer :: a,aorb,b,borb + integer :: t,torb,v,vorb,u,uorb,x,xorb + double precision :: c0,ci + c0 = SXeigenvec(1,1) + ! equation B3.a of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + ! loop over the core/inact + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(iorb,iorb) = 2.d0 ! first term of B3.a + ! loop over the core/inact + do j = 1, n_core_inact_orb + jorb = list_core_inact(j) + ! loop over the virtual + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(jorb,iorb) += -2.d0 * lowest_super_ci_coef_mo(aorb,iorb) * lowest_super_ci_coef_mo(aorb,jorb) ! second term in B3.a + enddo + do t = 1, n_act_orb + torb = list_act(t) + ! thrid term of the B3.a + super_ci_dm(jorb,iorb) += - lowest_super_ci_coef_mo(iorb,torb) * lowest_super_ci_coef_mo(jorb,torb) * (2.d0 - occ_act(t)) + enddo + enddo + enddo + + ! equation B3.b of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do t = 1, n_act_orb + torb = list_act(t) + super_ci_dm(iorb,torb) = c0 * lowest_super_ci_coef_mo(torb,iorb) * (2.d0 - occ_act(t)) + super_ci_dm(torb,iorb) = c0 * lowest_super_ci_coef_mo(torb,iorb) * (2.d0 - occ_act(t)) + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(iorb,torb) += - lowest_super_ci_coef_mo(aorb,iorb) * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + super_ci_dm(torb,iorb) += - lowest_super_ci_coef_mo(aorb,iorb) * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + enddo + enddo + enddo + + ! equation B3.c of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(aorb,iorb) = 2.d0 * c0 * lowest_super_ci_coef_mo(aorb,iorb) + super_ci_dm(iorb,aorb) = 2.d0 * c0 * lowest_super_ci_coef_mo(aorb,iorb) + enddo + enddo + + ! equation B3.d of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do t = 1, n_act_orb + torb = list_act(t) + super_ci_dm(torb,torb) = occ_act(t) ! first term of equation B3.d + do x = 1, n_act_orb + xorb = list_act(x) + super_ci_dm(torb,torb) += - occ_act(x) * occ_act(t)* mat_tmp_dm_super_ci(x,x) ! second term involving the ONE-rdm + enddo + do u = 1, n_act_orb + uorb = list_act(u) + + ! second term of equation B3.d + do x = 1, n_act_orb + xorb = list_act(x) + do v = 1, n_act_orb + vorb = list_act(v) + super_ci_dm(torb,uorb) += 2.d0 * P0tuvx_no(v,x,t,u) * mat_tmp_dm_super_ci(v,x) ! second term involving the TWO-rdm + enddo + enddo + + ! third term of equation B3.d + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(torb,uorb) += lowest_super_ci_coef_mo(iorb,torb) * lowest_super_ci_coef_mo(iorb,uorb) * (2.d0 - occ_act(t) - occ_act(u)) + enddo + + enddo + enddo + + ! equation B3.e of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do t = 1, n_act_orb + torb = list_act(t) + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(aorb,torb) += c0 * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + super_ci_dm(torb,aorb) += c0 * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(aorb,torb) += lowest_super_ci_coef_mo(iorb,aorb) * lowest_super_ci_coef_mo(iorb,torb) * (2.d0 - occ_act(t)) + super_ci_dm(torb,aorb) += lowest_super_ci_coef_mo(iorb,aorb) * lowest_super_ci_coef_mo(iorb,torb) * (2.d0 - occ_act(t)) + enddo + enddo + enddo + + ! equation B3.f of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do a = 1, n_virt_orb + aorb = list_virt(a) + do b = 1, n_virt_orb + borb= list_virt(b) + + ! First term of equation B3.f + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(borb,aorb) += 2.d0 * lowest_super_ci_coef_mo(iorb,aorb) * lowest_super_ci_coef_mo(iorb,borb) + enddo + + ! Second term of equation B3.f + do t = 1, n_act_orb + torb = list_act(t) + super_ci_dm(borb,aorb) += lowest_super_ci_coef_mo(torb,aorb) * lowest_super_ci_coef_mo(torb,borb) * occ_act(t) + enddo + enddo + enddo + + END_PROVIDER + + BEGIN_PROVIDER [double precision, superci_natorb, (ao_num,mo_num) +&BEGIN_PROVIDER [double precision, superci_nat_occ, (mo_num) + implicit none + call general_mo_coef_new_as_svd_vectors_of_mo_matrix_eig(super_ci_dm,mo_num,mo_num,mo_num,NatOrbsFCI,superci_nat_occ,superci_natorb) + +END_PROVIDER + + BEGIN_PROVIDER [double precision, mat_tmp_dm_super_ci, (n_act_orb,n_act_orb)] + implicit none + BEGIN_DOC + ! computation of the term in [ ] in the equation B3.d of Roos et. al. Chemical Physics 48 (1980) 157-173 + ! + ! !!!!! WARNING !!!!!! there is a TYPO: a MINUS SIGN SHOULD APPEAR in that term + END_DOC + integer :: a,aorb,i,iorb + integer :: x,xorb,v,vorb + mat_tmp_dm_super_ci = 0.d0 + do v = 1, n_act_orb + vorb = list_act(v) + do x = 1, n_act_orb + xorb = list_act(x) + do a = 1, n_virt_orb + aorb = list_virt(a) + mat_tmp_dm_super_ci(x,v) += lowest_super_ci_coef_mo(aorb,vorb) * lowest_super_ci_coef_mo(aorb,xorb) + enddo + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + ! MARK THE MINUS SIGN HERE !!!!!!!!!!! BECAUSE OF TYPO IN THE ORIGINAL PAPER + mat_tmp_dm_super_ci(x,v) -= lowest_super_ci_coef_mo(iorb,vorb) * lowest_super_ci_coef_mo(iorb,xorb) + enddo + enddo + enddo + END_PROVIDER + + BEGIN_PROVIDER [double precision, lowest_super_ci_coef_mo, (mo_num,mo_num)] + implicit none + integer :: i,j,iorb,jorb + integer :: a, aorb,t, torb + double precision :: sqrt2 + + sqrt2 = 1.d0/dsqrt(2.d0) + do i = 1, nMonoEx + iorb = excit(1,i) + jorb = excit(2,i) + lowest_super_ci_coef_mo(iorb,jorb) = SXeigenvec(i+1,1) + lowest_super_ci_coef_mo(jorb,iorb) = SXeigenvec(i+1,1) + enddo + + ! a_{it} of the equation B.2 of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do t = 1, n_act_orb + torb = list_act(t) + lowest_super_ci_coef_mo(torb,iorb) *= (2.d0 - occ_act(t))**(-0.5d0) + lowest_super_ci_coef_mo(iorb,torb) *= (2.d0 - occ_act(t))**(-0.5d0) + enddo + enddo + + ! a_{ia} of the equation B.2 of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do a = 1, n_virt_orb + aorb = list_virt(a) + lowest_super_ci_coef_mo(aorb,iorb) *= sqrt2 + lowest_super_ci_coef_mo(iorb,aorb) *= sqrt2 + enddo + enddo + + ! a_{ta} of the equation B.2 of Roos et. al. Chemical Physics 48 (1980) 157-173 + do a = 1, n_virt_orb + aorb = list_virt(a) + do t = 1, n_act_orb + torb = list_act(t) + lowest_super_ci_coef_mo(torb,aorb) *= occ_act(t)**(-0.5d0) + lowest_super_ci_coef_mo(aorb,torb) *= occ_act(t)**(-0.5d0) + enddo + enddo + + END_PROVIDER + diff --git a/src/casscf_cipsi/swap_orb.irp.f b/src/casscf_cipsi/swap_orb.irp.f new file mode 100644 index 00000000..49af207c --- /dev/null +++ b/src/casscf_cipsi/swap_orb.irp.f @@ -0,0 +1,132 @@ + BEGIN_PROVIDER [double precision, SXvector_lowest, (nMonoEx)] + implicit none + integer :: i + do i=2,nMonoEx+1 + SXvector_lowest(i-1)=SXeigenvec(i,1) + enddo + END_PROVIDER + + BEGIN_PROVIDER [double precision, thresh_overlap_switch] + implicit none + thresh_overlap_switch = 0.5d0 + END_PROVIDER + + BEGIN_PROVIDER [integer, max_overlap, (nMonoEx)] +&BEGIN_PROVIDER [integer, n_max_overlap] +&BEGIN_PROVIDER [integer, dim_n_max_overlap] + implicit none + double precision, allocatable :: vec_tmp(:) + integer, allocatable :: iorder(:) + allocate(vec_tmp(nMonoEx),iorder(nMonoEx)) + integer :: i + do i = 1, nMonoEx + iorder(i) = i + vec_tmp(i) = -dabs(SXvector_lowest(i)) + enddo + call dsort(vec_tmp,iorder,nMonoEx) + n_max_overlap = 0 + do i = 1, nMonoEx + if(dabs(vec_tmp(i)).gt.thresh_overlap_switch)then + n_max_overlap += 1 + max_overlap(n_max_overlap) = iorder(i) + endif + enddo + dim_n_max_overlap = max(1,n_max_overlap) + END_PROVIDER + + BEGIN_PROVIDER [integer, orb_swap, (2,dim_n_max_overlap)] +&BEGIN_PROVIDER [integer, index_orb_swap, (dim_n_max_overlap)] +&BEGIN_PROVIDER [integer, n_orb_swap ] + implicit none + use bitmasks ! you need to include the bitmasks_module.f90 features + integer :: i,imono,iorb,jorb,j + n_orb_swap = 0 + do i = 1, n_max_overlap + imono = max_overlap(i) + iorb = excit(1,imono) + jorb = excit(2,imono) + if (excit_class(imono) == "c-a" .and.hessmat(imono,imono).gt.0.d0)then ! core --> active rotation + n_orb_swap += 1 + orb_swap(1,n_orb_swap) = iorb ! core + orb_swap(2,n_orb_swap) = jorb ! active + index_orb_swap(n_orb_swap) = imono + else if (excit_class(imono) == "a-v" .and.hessmat(imono,imono).gt.0.d0)then ! active --> virtual rotation + n_orb_swap += 1 + orb_swap(1,n_orb_swap) = jorb ! virtual + orb_swap(2,n_orb_swap) = iorb ! active + index_orb_swap(n_orb_swap) = imono + endif + enddo + + integer,allocatable :: orb_swap_tmp(:,:) + allocate(orb_swap_tmp(2,dim_n_max_overlap)) + do i = 1, n_orb_swap + orb_swap_tmp(1,i) = orb_swap(1,i) + orb_swap_tmp(2,i) = orb_swap(2,i) + enddo + + integer(bit_kind), allocatable :: det_i(:),det_j(:) + allocate(det_i(N_int),det_j(N_int)) + logical, allocatable :: good_orb_rot(:) + allocate(good_orb_rot(n_orb_swap)) + integer, allocatable :: index_orb_swap_tmp(:) + allocate(index_orb_swap_tmp(dim_n_max_overlap)) + index_orb_swap_tmp = index_orb_swap + good_orb_rot = .True. + integer :: icount,k + do i = 1, n_orb_swap + if(.not.good_orb_rot(i))cycle + det_i = 0_bit_kind + call set_bit_to_integer(orb_swap(1,i),det_i,N_int) + call set_bit_to_integer(orb_swap(2,i),det_i,N_int) + do j = i+1, n_orb_swap + det_j = 0_bit_kind + call set_bit_to_integer(orb_swap(1,j),det_j,N_int) + call set_bit_to_integer(orb_swap(2,j),det_j,N_int) + icount = 0 + do k = 1, N_int + icount += popcnt(ior(det_i(k),det_j(k))) + enddo + if (icount.ne.4)then + good_orb_rot(i) = .False. + good_orb_rot(j) = .False. + exit + endif + enddo + enddo + icount = n_orb_swap + n_orb_swap = 0 + do i = 1, icount + if(good_orb_rot(i))then + n_orb_swap += 1 + index_orb_swap(n_orb_swap) = index_orb_swap_tmp(i) + orb_swap(1,n_orb_swap) = orb_swap_tmp(1,i) + orb_swap(2,n_orb_swap) = orb_swap_tmp(2,i) + endif + enddo + + if(n_orb_swap.gt.0)then + print*,'n_orb_swap = ',n_orb_swap + endif + do i = 1, n_orb_swap + print*,'imono = ',index_orb_swap(i) + print*,orb_swap(1,i),'-->',orb_swap(2,i) + enddo + END_PROVIDER + + BEGIN_PROVIDER [double precision, switch_mo_coef, (ao_num,mo_num)] + implicit none + integer :: i,j,iorb,jorb + switch_mo_coef = NatOrbsFCI + do i = 1, n_orb_swap + iorb = orb_swap(1,i) + jorb = orb_swap(2,i) + do j = 1, ao_num + switch_mo_coef(j,jorb) = NatOrbsFCI(j,iorb) + enddo + do j = 1, ao_num + switch_mo_coef(j,iorb) = NatOrbsFCI(j,jorb) + enddo + enddo + + END_PROVIDER diff --git a/src/casscf_cipsi/tot_en.irp.f b/src/casscf_cipsi/tot_en.irp.f new file mode 100644 index 00000000..1d70e087 --- /dev/null +++ b/src/casscf_cipsi/tot_en.irp.f @@ -0,0 +1,101 @@ + BEGIN_PROVIDER [real*8, etwo] +&BEGIN_PROVIDER [real*8, eone] +&BEGIN_PROVIDER [real*8, eone_bis] +&BEGIN_PROVIDER [real*8, etwo_bis] +&BEGIN_PROVIDER [real*8, etwo_ter] +&BEGIN_PROVIDER [real*8, ecore] +&BEGIN_PROVIDER [real*8, ecore_bis] + implicit none + integer :: t,u,v,x,i,ii,tt,uu,vv,xx,j,jj,t3,u3,v3,x3 + real*8 :: e_one_all,e_two_all + e_one_all=0.D0 + e_two_all=0.D0 + do i=1,n_core_inact_orb + ii=list_core_inact(i) + e_one_all+=2.D0*mo_one_e_integrals(ii,ii) + do j=1,n_core_inact_orb + jj=list_core_inact(j) + e_two_all+=2.D0*bielec_PQxx(ii,ii,j,j)-bielec_PQxx(ii,jj,j,i) + end do + do t=1,n_act_orb + tt=list_act(t) + t3=t+n_core_inact_orb + do u=1,n_act_orb + uu=list_act(u) + u3=u+n_core_inact_orb + e_two_all+=D0tu(t,u)*(2.D0*bielec_PQxx(tt,uu,i,i) & + -bielec_PQxx(tt,ii,i,u3)) + end do + end do + end do + do t=1,n_act_orb + tt=list_act(t) + do u=1,n_act_orb + uu=list_act(u) + e_one_all+=D0tu(t,u)*mo_one_e_integrals(tt,uu) + do v=1,n_act_orb + v3=v+n_core_inact_orb + do x=1,n_act_orb + x3=x+n_core_inact_orb + e_two_all +=P0tuvx(t,u,v,x)*bielec_PQxx(tt,uu,v3,x3) + end do + end do + end do + end do + ecore =nuclear_repulsion + ecore_bis=nuclear_repulsion + do i=1,n_core_inact_orb + ii=list_core_inact(i) + ecore +=2.D0*mo_one_e_integrals(ii,ii) + ecore_bis+=2.D0*mo_one_e_integrals(ii,ii) + do j=1,n_core_inact_orb + jj=list_core_inact(j) + ecore +=2.D0*bielec_PQxx(ii,ii,j,j)-bielec_PQxx(ii,jj,j,i) + ecore_bis+=2.D0*bielec_PxxQ(ii,i,j,jj)-bielec_PxxQ(ii,j,j,ii) + end do + end do + eone =0.D0 + eone_bis=0.D0 + etwo =0.D0 + etwo_bis=0.D0 + etwo_ter=0.D0 + do t=1,n_act_orb + tt=list_act(t) + t3=t+n_core_inact_orb + do u=1,n_act_orb + uu=list_act(u) + u3=u+n_core_inact_orb + eone +=D0tu(t,u)*mo_one_e_integrals(tt,uu) + eone_bis+=D0tu(t,u)*mo_one_e_integrals(tt,uu) + do i=1,n_core_inact_orb + ii=list_core_inact(i) + eone +=D0tu(t,u)*(2.D0*bielec_PQxx(tt,uu,i,i) & + -bielec_PQxx(tt,ii,i,u3)) + eone_bis+=D0tu(t,u)*(2.D0*bielec_PxxQ(tt,u3,i,ii) & + -bielec_PxxQ(tt,i,i,uu)) + end do + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + real*8 :: h1,h2,h3 + h1=bielec_PQxx(tt,uu,v3,x3) + h2=bielec_PxxQ(tt,u3,v3,xx) + h3=bielecCI(t,u,v,xx) + etwo +=P0tuvx(t,u,v,x)*h1 + etwo_bis+=P0tuvx(t,u,v,x)*h2 + etwo_ter+=P0tuvx(t,u,v,x)*h3 + if ((h1.ne.h2).or.(h1.ne.h3)) then + write(6,9901) t,u,v,x,h1,h2,h3 + 9901 format('aie: ',4I4,3E20.12) + end if + end do + end do + end do + end do + +END_PROVIDER + + From 1e0e06d9cd705553501af87b417e719760b918da Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 19 Jun 2023 15:26:14 +0200 Subject: [PATCH 04/74] fixed bug in bi_ort_ints/three_body_ijmk.irp.f: deallocate(tmp1) is wrong --- src/bi_ort_ints/three_body_ijmk.irp.f | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index ee7e88ef..4a99fb1b 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -97,7 +97,7 @@ , tmp3, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) - deallocate(tmp1) +! deallocate(tmp1) !$OMP PARALLEL DO PRIVATE(i,j,k,m) do i = 1, mo_num @@ -112,7 +112,7 @@ !$OMP END PARALLEL DO - +! allocate(tmp1(n_points_final_grid, 2, mo_num, mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, l, ipoint) & @@ -195,7 +195,7 @@ , tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) - deallocate(tmp1) +! deallocate(tmp1) !$OMP PARALLEL DO PRIVATE(i,j,k,m) do i = 1, mo_num From c3d257c7aceff4c8641fc751cf9996783c1ce96f Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 22 Jun 2023 11:58:32 +0200 Subject: [PATCH 05/74] added routines to rotate orbitals without touching core orbitals --- src/tc_bi_ortho/tc_natorb.irp.f | 13 +- src/tc_keywords/EZFIO.cfg | 6 + src/tc_scf/routines_rotates.irp.f | 16 +- src/utils/block_diag_degen_core.irp.f | 244 ++++++++++++++++++++++++++ 4 files changed, 271 insertions(+), 8 deletions(-) create mode 100644 src/utils/block_diag_degen_core.irp.f diff --git a/src/tc_bi_ortho/tc_natorb.irp.f b/src/tc_bi_ortho/tc_natorb.irp.f index b7e5ae81..1b5a66f3 100644 --- a/src/tc_bi_ortho/tc_natorb.irp.f +++ b/src/tc_bi_ortho/tc_natorb.irp.f @@ -23,7 +23,7 @@ dm_tmp(1:mo_num,1:mo_num) = -tc_transition_matrix_mo(1:mo_num,1:mo_num,1,1) - print *, ' dm_tmp' + print *, ' Transition density matrix ' do i = 1, mo_num fock_diag(i) = fock_matrix_tc_mo_tot(i,i) write(*, '(100(F16.10,X))') -dm_tmp(:,i) @@ -32,8 +32,15 @@ thr_d = 1.d-6 thr_nd = 1.d-6 thr_deg = 1.d-3 - call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & - , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) + if(n_core_orb.ne.0)then +! print*,'core orbitals' +! pause + call diag_mat_per_fock_degen_core( fock_diag, dm_tmp, list_core, n_core_orb, mo_num, thr_d, thr_nd, thr_deg & + , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) + else + call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & + , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) + endif ! call non_hrmt_bieig( mo_num, dm_tmp& ! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo& ! , mo_num, natorb_tc_eigval ) diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index a69f5bac..f984d53a 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -208,6 +208,12 @@ doc: Threshold to determine if diagonal elements of the bi-orthogonal condition interface: ezfio,provider,ocaml default: 1.e-6 +[thresh_lr_angle] +type: double precision +doc: Maximum value of the angle between the couple of left and right orbital for the rotations +interface: ezfio,provider,ocaml +default: 20.0 + [thresh_biorthog_nondiag] type: Threshold doc: Threshold to determine if non-diagonal elements of L.T x R are close enouph to 0 diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f index 755c35b9..588382b5 100644 --- a/src/tc_scf/routines_rotates.irp.f +++ b/src/tc_scf/routines_rotates.irp.f @@ -140,7 +140,11 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) ! compute the overlap between the left and rescaled right call build_s_matrix(ao_num, mo_num, mo_r_coef_new, mo_r_coef_new, ao_overlap, s_mat) ! call give_degen(fock_diag,mo_num,thr_deg,list_degen,n_degen_list) - call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list) + if(n_core_orb.ne.0)then + call give_degen_full_listcore(fock_diag, mo_num, list_core, n_core_orb, thr_deg, list_degen, n_degen_list) + else + call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list) + endif print *, ' fock_matrix_mo' do i = 1, mo_num print *, i, fock_diag(i), angle_left_right(i) @@ -152,6 +156,8 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) ! n_degen = ilast - ifirst +1 n_degen = list_degen(i,0) + if(n_degen .ge. 1000)n_degen = 1 ! convention for core orbitals + if(n_degen .eq. 1) cycle allocate(stmp(n_degen,n_degen), smat2(n_degen,n_degen)) @@ -279,7 +285,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) allocate(new_angles(mo_num)) new_angles(1:mo_num) = dabs(angle_left_right(1:mo_num)) max_angle = maxval(new_angles) - good_angles = max_angle.lt.45.d0 + good_angles = max_angle.lt.thresh_lr_angle print *, ' max_angle = ', max_angle deallocate(new_angles) @@ -397,11 +403,11 @@ subroutine print_energy_and_mos(good_angles) print *, ' TC SCF energy gradient = ', grad_non_hermit print *, ' Max angle Left/right = ', max_angle_left_right - if(max_angle_left_right .lt. 45.d0) then + if(max_angle_left_right .lt. thresh_lr_angle) then print *, ' Maximum angle BELOW 45 degrees, everthing is OK !' good_angles = .true. - else if(max_angle_left_right .gt. 45.d0 .and. max_angle_left_right .lt. 75.d0) then - print *, ' Maximum angle between 45 and 75 degrees, this is not the best for TC-CI calculations ...' + else if(max_angle_left_right .gt. thresh_lr_angle .and. max_angle_left_right .lt. 75.d0) then + print *, ' Maximum angle between thresh_lr_angle and 75 degrees, this is not the best for TC-CI calculations ...' good_angles = .false. else if(max_angle_left_right .gt. 75.d0) then print *, ' Maximum angle between ABOVE 75 degrees, YOU WILL CERTAINLY FIND TROUBLES IN TC-CI calculations ...' diff --git a/src/utils/block_diag_degen_core.irp.f b/src/utils/block_diag_degen_core.irp.f new file mode 100644 index 00000000..5d46bd87 --- /dev/null +++ b/src/utils/block_diag_degen_core.irp.f @@ -0,0 +1,244 @@ + +subroutine diag_mat_per_fock_degen_core(fock_diag, mat_ref, listcore,ncore, n, thr_d, thr_nd, thr_deg, leigvec, reigvec, eigval) + + + BEGIN_DOC + ! + ! subroutine that diagonalizes a matrix mat_ref BY BLOCK + ! + ! the blocks are defined by the elements having the SAME DEGENERACIES in the entries "fock_diag" + ! + ! the elements of listcore are untouched + ! + ! examples : all elements having degeneracy 1 in fock_diag (i.e. not being degenerated) will be treated together + ! + ! : all elements having degeneracy 2 in fock_diag (i.e. two elements are equal) will be treated together + ! + ! : all elements having degeneracy 3 in fock_diag (i.e. two elements are equal) will be treated together + ! + ! etc... the advantage is to guarentee no spurious mixing because of numerical problems. + ! + END_DOC + + implicit none + integer, intent(in) :: n,ncore, listcore(ncore) + double precision, intent(in) :: fock_diag(n), mat_ref(n,n), thr_d, thr_nd, thr_deg + double precision, intent(out) :: leigvec(n,n), reigvec(n,n), eigval(n) + + integer :: n_degen_list, n_degen,size_mat, i, j, k, icount, m, index_degen + integer :: ii, jj, i_good, j_good, n_real + integer :: icount_eigval + logical, allocatable :: is_ok(:) + integer, allocatable :: list_degen(:,:), list_same_degen(:) + integer, allocatable :: iorder(:), list_degen_sorted(:) + double precision, allocatable :: leigvec_unsrtd(:,:), reigvec_unsrtd(:,:), eigval_unsrtd(:) + double precision, allocatable :: mat_tmp(:,:), eigval_tmp(:), leigvec_tmp(:,:), reigvec_tmp(:,:) + + allocate(leigvec_unsrtd(n,n), reigvec_unsrtd(n,n), eigval_unsrtd(n)) + leigvec_unsrtd = 0.d0 + reigvec_unsrtd = 0.d0 + eigval_unsrtd = 0.d0 + + ! obtain degeneracies + allocate(list_degen(n,0:n)) + call give_degen_full_listcore(fock_diag, n, listcore, ncore, thr_deg, list_degen, n_degen_list) + + allocate(iorder(n_degen_list), list_degen_sorted(n_degen_list)) + do i = 1, n_degen_list + n_degen = list_degen(i,0) + list_degen_sorted(i) = n_degen + iorder(i) = i + enddo + + ! sort by number of degeneracies + call isort(list_degen_sorted, iorder, n_degen_list) + + allocate(is_ok(n_degen_list)) + is_ok = .True. + icount_eigval = 0 + + ! loop over degeneracies + do i = 1, n_degen_list + if(.not.is_ok(i)) cycle + + is_ok(i) = .False. + n_degen = list_degen_sorted(i) + + + if(n_degen.ge.1000)then + print*,'core orbital ' + else + print *, ' diagonalizing for n_degen = ', n_degen + endif + + k = 1 + + ! group all the entries having the same degeneracies +!! do while (list_degen_sorted(i+k)==n_degen) + do m = i+1, n_degen_list + if(list_degen_sorted(m)==n_degen) then + is_ok(i+k) = .False. + k += 1 + endif + enddo + + print *, ' number of identical degeneracies = ', k + if(n_degen.ge.1000)then + n_degen = 1 + endif + size_mat = k*n_degen + print *, ' size_mat = ', size_mat + allocate(mat_tmp(size_mat,size_mat), list_same_degen(size_mat)) + allocate(eigval_tmp(size_mat), leigvec_tmp(size_mat,size_mat), reigvec_tmp(size_mat,size_mat)) + ! group all the elements sharing the same degeneracy + icount = 0 + do j = 1, k ! jth set of degeneracy + index_degen = iorder(i+j-1) + do m = 1, n_degen + icount += 1 + list_same_degen(icount) = list_degen(index_degen,m) + enddo + enddo + + print *, ' list of elements ' + do icount = 1, size_mat + print *, icount, list_same_degen(icount) + enddo + + ! you copy subset of matrix elements having all the same degeneracy in mat_tmp + do ii = 1, size_mat + i_good = list_same_degen(ii) + do jj = 1, size_mat + j_good = list_same_degen(jj) + mat_tmp(jj,ii) = mat_ref(j_good,i_good) + enddo + enddo + + call non_hrmt_bieig( size_mat, mat_tmp, thr_d, thr_nd & + , leigvec_tmp, reigvec_tmp & + , n_real, eigval_tmp ) + + do ii = 1, size_mat + icount_eigval += 1 + eigval_unsrtd(icount_eigval) = eigval_tmp(ii) ! copy eigenvalues + do jj = 1, size_mat ! copy the eigenvectors + j_good = list_same_degen(jj) + leigvec_unsrtd(j_good,icount_eigval) = leigvec_tmp(jj,ii) + reigvec_unsrtd(j_good,icount_eigval) = reigvec_tmp(jj,ii) + enddo + enddo + + deallocate(mat_tmp, list_same_degen) + deallocate(eigval_tmp, leigvec_tmp, reigvec_tmp) + enddo + + if(icount_eigval .ne. n) then + print *, ' pb !! (icount_eigval.ne.n)' + print *, ' icount_eigval,n', icount_eigval, n + stop + endif + + deallocate(iorder) + allocate(iorder(n)) + do i = 1, n + iorder(i) = i + enddo + call dsort(eigval_unsrtd, iorder, n) + + do i = 1, n + print*,'sorted eigenvalues ' + i_good = iorder(i) + eigval(i) = eigval_unsrtd(i) + print*,'i,eigval(i) = ',i,eigval(i) + do j = 1, n + leigvec(j,i) = leigvec_unsrtd(j,i_good) + reigvec(j,i) = reigvec_unsrtd(j,i_good) + enddo + enddo + + deallocate(leigvec_unsrtd, reigvec_unsrtd, eigval_unsrtd) + deallocate(list_degen) + deallocate(iorder, list_degen_sorted) + deallocate(is_ok) + +end + +! --- + +subroutine give_degen_full_listcore(A, n, listcore, ncore, thr, list_degen, n_degen_list) + + BEGIN_DOC + ! you enter with an array A(n) and spits out all the elements degenerated up to thr + ! + ! the elements of A(n) DON'T HAVE TO BE SORTED IN THE ENTRANCE: TOTALLY GENERAL + ! + ! list_degen(i,0) = number of degenerate entries + ! + ! list_degen(i,1) = index of the first degenerate entry + ! + ! list_degen(i,2:list_degen(i,0)) = list of all other dengenerate entries + ! + ! if list_degen(i,0) == 1 it means that there is no degeneracy for that element + ! + ! if list_degen(i,0) >= 1000 it means that it is core orbitals + END_DOC + + implicit none + + double precision, intent(in) :: A(n) + double precision, intent(in) :: thr + integer, intent(in) :: n,ncore, listcore(ncore) + integer, intent(out) :: list_degen(n,0:n), n_degen_list + integer :: i, j, icount, icheck,k + logical, allocatable :: is_ok(:) + + + allocate(is_ok(n)) + n_degen_list = 0 + is_ok = .True. + ! you first exclude the "core" orbitals + do i = 1, ncore + j=listcore(i) + is_ok(j) = .False. + enddo + do i = 1, n + if(.not.is_ok(i)) cycle + n_degen_list +=1 + is_ok(i) = .False. + list_degen(n_degen_list,1) = i + icount = 1 + do j = i+1, n + if(dabs(A(i)-A(j)).lt.thr.and.is_ok(j)) then + is_ok(j) = .False. + icount += 1 + list_degen(n_degen_list,icount) = j + endif + enddo + + list_degen(n_degen_list,0) = icount + enddo + ! you set all the core orbitals as separate entities + icheck = 0 + do i = 1, n_degen_list + icheck += list_degen(i,0) + enddo + if(icheck.ne.(n-ncore))then + print *, ' pb ! :: icheck.ne.n-ncore' + print *, icheck, n-ncore + stop + endif + k=1000 + do i = 1, ncore + n_degen_list+= 1 + j=listcore(i) + list_degen(n_degen_list,1) = i + list_degen(n_degen_list,0) = k + k+=1 + enddo + + + +end + +! --- + From 6881a65994fe04eebcfedde0871e34d7737b9b8c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 22 Jun 2023 13:34:36 +0200 Subject: [PATCH 06/74] Fix possible float_of_string: 0.160099927795302-102 error --- src/utils/linear_algebra.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 76a539a6..65c57a76 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1565,7 +1565,7 @@ subroutine nullify_small_elements(m,n,A,LDA,thresh) ! Remove tiny elements do j=1,n do i=1,m - if ( dabs(A(i,j) * amax) < thresh ) then + if ( (dabs(A(i,j) * amax) < thresh).or.(dabs(A(i,j)) < 1.d-99) ) then A(i,j) = 0.d0 endif enddo From d911f4eee8b1569d6e34dc4ec4081031f400bcc1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 3 Jul 2023 17:41:34 +0200 Subject: [PATCH 07/74] Rewrote Cholesky --- src/ao_two_e_ints/cholesky.irp.f | 197 ++++++++++++++++++++++++++++++- 1 file changed, 193 insertions(+), 4 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 77eb6ddc..2d2a40ab 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -1,10 +1,28 @@ -BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] +BEGIN_PROVIDER [ integer, mini_basis_size, (128) ] + implicit none + BEGIN_DOC + ! Size of the minimal basis set per element + END_DOC + + mini_basis_size(1:2) = 1 + mini_basis_size(3:4) = 2 + mini_basis_size(5:10) = 5 + mini_basis_size(11:12) = 6 + mini_basis_size(13:18) = 9 + mini_basis_size(19:20) = 13 + mini_basis_size(21:36) = 18 + mini_basis_size(37:38) = 22 + mini_basis_size(39:54) = 27 + mini_basis_size(55:) = 36 +END_PROVIDER + + BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] implicit none BEGIN_DOC ! Number of Cholesky vectors in AO basis END_DOC - cholesky_ao_num_guess = ao_num*ao_num + cholesky_ao_num_guess = ao_num*ao_num !sum(mini_basis_size(int(nucl_charge(:)))) END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_ao_num ] @@ -103,8 +121,10 @@ END_PROVIDER ! Call Lapack cholesky_ao_num = cholesky_ao_num_guess - call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) - print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' +! call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) + + call direct_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) + print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' ! Remove mmap double precision, external :: getUnitAndOpen @@ -131,3 +151,172 @@ BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, enddo END_PROVIDER + +subroutine direct_cholesky( A, rank, tau, ndim, L) + implicit none + integer :: ndim + integer, intent(inout) :: rank + double precision, intent(inout) :: A(ndim, ndim) + double precision, intent(out) :: L(ndim, rank) + double precision, intent(in) :: tau + + double precision, parameter :: s = 1.d-2 + double precision, parameter :: dscale = 1.d0 + + double precision, allocatable :: D(:), Delta(:,:) + integer, allocatable :: Lset(:), Dset(:) + + integer :: i,j,k,m,p,q, qj, dj + integer :: N, np, nq + + double precision :: Dmax, Dmin, Qmax, f + allocate( D(ndim), Lset(ndim), Dset(ndim) ) + + L = 0.d0 + + ! 1. + do i=1,ndim + D(i) = A(i,i) + enddo + Dmax = maxval(D) +! print *, '# 1. ', D +! print *, '# 1. ', Dmax + + ! 2. + np=0 + do p=1,ndim + if ( dscale*dscale*Dmax*D(p) > tau*tau ) then + np = np+1 + Lset(np) = p + endif + enddo +! print *, '# 2. ', Lset(:np) + + ! 3. + N = 0 +! print *, '# 3. ', N + + ! 4. + i = 0 +! print *, '# 4. ', i + + ! 5. + do while (Dmax > tau) + ! a. + i = i+1 +! print *, '# 5.a ', i + + ! b. + Dmin = max(s*Dmax, tau) +! print *, '# 5.b ', Dmin + + ! c. + nq=0 + do q=1,np + if ( D(Lset(q)) > Dmin ) then + nq = nq+1 + Dset(nq) = Lset(q) + endif + enddo +! print *, '# 5.c ', Dset(:nq) + + ! d. + allocate(Delta(np,nq)) + do m=1,nq + do k=1,np + Delta(k,m) = A(Lset(k), Dset(m)) + enddo + enddo +! print *, '# 5.d ', Delta + + ! e. + do m=1,nq + do k=1,np + do p=1,N + Delta(k,m) = Delta(k,m) - L(Lset(k),p) * L(Dset(m),p) + enddo + enddo + enddo +! print *, '# 5.e ', Delta + + ! f. + Qmax = D(Dset(1)) + do q=1,nq + Qmax = max(Qmax, D(Dset(q))) + enddo +! print *, '# 5.f ', Qmax + + ! g. + j = 0 +! print *, '# 5.g ', j + + do while ( (j <= nq).and.(Qmax > Dmin) ) + ! i. + j = j+1 + rank = N+j +! print *, '# 5.h.i ', j, rank + + ! ii. + do dj=1,nq + qj = Dset(dj) + if (D(qj) == Qmax) then + exit + endif + enddo +! print *, ' # 5.h.ii ', qj, dj + + ! iii. + f = 1.d0/dsqrt(Qmax) + do p=1,np + L(Lset(p), rank) = Delta(p,dj) * f + enddo +! print *, ' # 5.h.iii ' +! do k=1,20 +! print *, L(k,1:rank) +! enddo + + ! iv. + do m=1, nq + do k=1, np + Delta(k,m) = Delta(k,m) - L(Lset(k),rank) * L(Dset(m),rank) + enddo + enddo + + do k=1, np + D(Lset(k)) = D(Lset(k)) - L(Lset(k),rank) * L(Lset(k),rank) + enddo + + Qmax = D(Dset(1)) + do q=1,np + Qmax = max(Qmax, D(Lset(q))) + enddo +! print *, '# 5.h.iv ', Delta +! print *, '# 5.h.iv ', D +! print *, '# 5.h.iv ', Qmax + + enddo + + deallocate(Delta) + + ! i. + N = N+j +! print *, '# 5.i ', N + + ! j. + Dmax = D(Lset(1)) + do p=1,np + Dmax = max(Dmax, D(Lset(p))) + enddo +! print *, '# 5.j ', Dmax + + np=0 + do p=1,ndim + if ( dscale*dscale*Dmax*D(p) > tau*tau ) then + np = np+1 + Lset(np) = p + endif + enddo +! print *, '# k. ', Lset(:np) + enddo + +end From 487e85c6aef05219eb9e716eae429b5a126600c6 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 3 Jul 2023 18:19:31 +0200 Subject: [PATCH 08/74] Cholesky OK --- src/ao_two_e_ints/cholesky.irp.f | 188 ++++++++----------------------- 1 file changed, 48 insertions(+), 140 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 2d2a40ab..6a78e9ff 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -34,106 +34,11 @@ END_PROVIDER ! = (ik|jl) = sum_a (ik|a).(a|jl) END_DOC - type(c_ptr) :: ptr - integer :: fd, i,j,k,l,m,rank - double precision, pointer :: ao_integrals(:,:,:,:) - double precision, external :: ao_two_e_integral - - ! Store AO integrals in a memory mapped file - call mmap(trim(ezfio_work_dir)//'ao_integrals', & - (/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), & - 8, fd, .False., ptr) - call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/)) - - print*, 'Providing the AO integrals (Cholesky)' - call wall_time(wall_1) - call cpu_time(cpu_1) - - ao_integrals = 0.d0 - - double precision :: integral, cpu_1, cpu_2, wall_1, wall_2 - logical, external :: ao_two_e_integral_zero - double precision, external :: get_ao_two_e_integral - - if (read_ao_two_e_integrals) then - PROVIDE ao_two_e_integrals_in_map - - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) - do m=0,9 - do l=1+m,ao_num,10 - !$OMP DO SCHEDULE(dynamic) - do j=1,ao_num - do k=1,ao_num - do i=1,ao_num - if (ao_two_e_integral_zero(i,j,k,l)) cycle - integral = get_ao_two_e_integral(i,j,k,l, ao_integrals_map) - ao_integrals(i,k,j,l) = integral - enddo - enddo - enddo - !$OMP END DO NOWAIT - enddo - !$OMP MASTER - call wall_time(wall_2) - print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 - !$OMP END MASTER - enddo - !$OMP END PARALLEL - - else - - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) - do m=0,9 - do l=1+m,ao_num,10 - !$OMP DO SCHEDULE(dynamic) - do j=1,l - do k=1,ao_num - do i=1,min(k,j) - if (ao_two_e_integral_zero(i,j,k,l)) cycle - integral = ao_two_e_integral(i,k,j,l) - ao_integrals(i,k,j,l) = integral - ao_integrals(k,i,j,l) = integral - ao_integrals(i,k,l,j) = integral - ao_integrals(k,i,l,j) = integral - ao_integrals(j,l,i,k) = integral - ao_integrals(j,l,k,i) = integral - ao_integrals(l,j,i,k) = integral - ao_integrals(l,j,k,i) = integral - enddo - enddo - enddo - !$OMP END DO NOWAIT - enddo - !$OMP MASTER - call wall_time(wall_2) - print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 - !$OMP END MASTER - enddo - !$OMP END PARALLEL - - call wall_time(wall_2) - call cpu_time(cpu_2) - print*, 'AO integrals provided:' - print*, ' cpu time :',cpu_2 - cpu_1, 's' - print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' - - endif - - ! Call Lapack cholesky_ao_num = cholesky_ao_num_guess -! call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) - call direct_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) + call direct_cholesky(cholesky_ao, ao_num*ao_num, cholesky_ao_num, ao_cholesky_threshold) print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' - ! Remove mmap - double precision, external :: getUnitAndOpen - call munmap( & - (/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), & - 8, fd, ptr) - open(unit=99,file=trim(ezfio_work_dir)//'ao_integrals') - close(99, status='delete') - END_PROVIDER BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ] @@ -152,35 +57,53 @@ BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, END_PROVIDER -subroutine direct_cholesky( A, rank, tau, ndim, L) +subroutine direct_cholesky(L, ndim, rank, tau) implicit none + BEGIN_DOC +! Cholesky-decomposed AOs. +! +! https://www.diva-portal.org/smash/get/diva2:396223/FULLTEXT01.pdf : +! Page 32, section 13.5 + END_DOC integer :: ndim - integer, intent(inout) :: rank - double precision, intent(inout) :: A(ndim, ndim) - double precision, intent(out) :: L(ndim, rank) + integer, intent(out) :: rank + double precision, intent(out) :: L(ndim, ndim) double precision, intent(in) :: tau double precision, parameter :: s = 1.d-2 double precision, parameter :: dscale = 1.d0 double precision, allocatable :: D(:), Delta(:,:) - integer, allocatable :: Lset(:), Dset(:) + integer, allocatable :: Lset(:), Dset(:), addr(:,:) integer :: i,j,k,m,p,q, qj, dj integer :: N, np, nq double precision :: Dmax, Dmin, Qmax, f - allocate( D(ndim), Lset(ndim), Dset(ndim) ) + double precision, external :: get_ao_two_e_integral - L = 0.d0 + allocate( D(ndim), Lset(ndim), Dset(ndim) ) + allocate( addr(2,ndim) ) ! 1. - do i=1,ndim - D(i) = A(i,i) + k=0 + do i=1,ao_num + do j=1,ao_num + k = k+1 + addr(1,k) = i + addr(2,k) = j + enddo enddo + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + do i=1,ndim + D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & + addr(2,i), addr(2,i), & + ao_integrals_map) + enddo + !$OMP END PARALLEL DO + Dmax = maxval(D) -! print *, '# 1. ', D -! print *, '# 1. ', Dmax ! 2. np=0 @@ -190,25 +113,20 @@ subroutine direct_cholesky( A, rank, tau, ndim, L) Lset(np) = p endif enddo -! print *, '# 2. ', Lset(:np) ! 3. N = 0 -! print *, '# 3. ', N - ! 4. + ! 4. i = 0 -! print *, '# 4. ', i - ! 5. + ! 5. do while (Dmax > tau) ! a. i = i+1 -! print *, '# 5.a ', i ! b. Dmin = max(s*Dmax, tau) -! print *, '# 5.b ', Dmin ! c. nq=0 @@ -218,43 +136,42 @@ subroutine direct_cholesky( A, rank, tau, ndim, L) Dset(nq) = Lset(q) endif enddo -! print *, '# 5.c ', Dset(:nq) - ! d. + ! d., e. allocate(Delta(np,nq)) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(m,k) do m=1,nq do k=1,np - Delta(k,m) = A(Lset(k), Dset(m)) + Delta(k,m) = get_ao_two_e_integral( & + addr(1,Lset(k)), & + addr(1,Dset(m)), & + addr(2,Lset(k)), & + addr(2,Dset(m)), & + ao_integrals_map) enddo - enddo -! print *, '# 5.d ', Delta - ! e. - do m=1,nq - do k=1,np - do p=1,N - Delta(k,m) = Delta(k,m) - L(Lset(k),p) * L(Dset(m),p) + do p=1,N + f = L(Dset(m),p) + do k=1,np + Delta(k,m) = Delta(k,m) - L(Lset(k),p) * f enddo enddo enddo -! print *, '# 5.e ', Delta + !$OMP END PARALLEL DO ! f. Qmax = D(Dset(1)) do q=1,nq Qmax = max(Qmax, D(Dset(q))) enddo -! print *, '# 5.f ', Qmax ! g. j = 0 -! print *, '# 5.g ', j do while ( (j <= nq).and.(Qmax > Dmin) ) ! i. j = j+1 rank = N+j -! print *, '# 5.h.i ', j, rank ! ii. do dj=1,nq @@ -263,22 +180,18 @@ subroutine direct_cholesky( A, rank, tau, ndim, L) exit endif enddo -! print *, ' # 5.h.ii ', qj, dj ! iii. f = 1.d0/dsqrt(Qmax) do p=1,np L(Lset(p), rank) = Delta(p,dj) * f enddo -! print *, ' # 5.h.iii ' -! do k=1,20 -! print *, L(k,1:rank) -! enddo ! iv. do m=1, nq + f = L(Dset(m),rank) do k=1, np - Delta(k,m) = Delta(k,m) - L(Lset(k),rank) * L(Dset(m),rank) + Delta(k,m) = Delta(k,m) - L(Lset(k),rank) * f enddo enddo @@ -290,9 +203,6 @@ subroutine direct_cholesky( A, rank, tau, ndim, L) do q=1,np Qmax = max(Qmax, D(Lset(q))) enddo -! print *, '# 5.h.iv ', Delta -! print *, '# 5.h.iv ', D -! print *, '# 5.h.iv ', Qmax enddo @@ -300,14 +210,12 @@ subroutine direct_cholesky( A, rank, tau, ndim, L) ! i. N = N+j -! print *, '# 5.i ', N ! j. Dmax = D(Lset(1)) do p=1,np Dmax = max(Dmax, D(Lset(p))) enddo -! print *, '# 5.j ', Dmax np=0 do p=1,ndim @@ -316,7 +224,7 @@ subroutine direct_cholesky( A, rank, tau, ndim, L) Lset(np) = p endif enddo -! print *, '# k. ', Lset(:np) + enddo end From 3c7a10934f51aea2b97ea3196fae6442b1c0030a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 3 Jul 2023 19:54:00 +0200 Subject: [PATCH 09/74] Accelerated Cholesky --- external/ezfio | 2 +- external/irpf90 | 2 +- external/qp2-dependencies | 2 +- src/ao_two_e_ints/cholesky.irp.f | 42 ++++++++++++++++++++------------ 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/external/ezfio b/external/ezfio index 0520b5e2..ed1df9f3 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit 0520b5e2cf70e2451c37ce5b7f2f64f6d2e5e956 +Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c diff --git a/external/irpf90 b/external/irpf90 index 0007f72f..33ca5e10 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 +Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 diff --git a/external/qp2-dependencies b/external/qp2-dependencies index e0d0e02e..f40bde09 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit e0d0e02e9f5ece138d1520106954a881ab0b8db2 +Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8 diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 6a78e9ff..dc5040be 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -73,7 +73,7 @@ subroutine direct_cholesky(L, ndim, rank, tau) double precision, parameter :: s = 1.d-2 double precision, parameter :: dscale = 1.d0 - double precision, allocatable :: D(:), Delta(:,:) + double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) integer, allocatable :: Lset(:), Dset(:), addr(:,:) integer :: i,j,k,m,p,q, qj, dj @@ -138,7 +138,16 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo ! d., e. - allocate(Delta(np,nq)) + allocate(Delta(np,nq), Ltmp_p(max(np,1),max(N,1)), Ltmp_q(max(nq,1),max(N,1))) + do k=1,N + do p=1,np + Ltmp_p(p,k) = L(Lset(p),k) + enddo + do q=1,nq + Ltmp_q(q,k) = L(Dset(q),k) + enddo + enddo + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(m,k) do m=1,nq do k=1,np @@ -149,17 +158,13 @@ subroutine direct_cholesky(L, ndim, rank, tau) addr(2,Dset(m)), & ao_integrals_map) enddo - - do p=1,N - f = L(Dset(m),p) - do k=1,np - Delta(k,m) = Delta(k,m) - L(Lset(k),p) * f - enddo - enddo enddo !$OMP END PARALLEL DO - ! f. + call dgemm('N','T',np,nq,N,-1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + + ! f. Qmax = D(Dset(1)) do q=1,nq Qmax = max(Qmax, D(Dset(q))) @@ -184,19 +189,26 @@ subroutine direct_cholesky(L, ndim, rank, tau) ! iii. f = 1.d0/dsqrt(Qmax) do p=1,np - L(Lset(p), rank) = Delta(p,dj) * f + Ltmp_p(p,1) = Delta(p,dj) * f + L(Lset(p), rank) = Ltmp_p(p,1) + enddo + + do q=1,nq + Ltmp_q(q,1) = L(Dset(q), rank) enddo ! iv. +! call dger(np, nq, -1.d0, Ltmp_p, 1, Ltmp_q, 1, Delta, np) + !$OMP PARALLEL DO PRIVATE(f,m,k) do m=1, nq - f = L(Dset(m),rank) do k=1, np - Delta(k,m) = Delta(k,m) - L(Lset(k),rank) * f + Delta(k,m) = Delta(k,m) - Ltmp_p(k,1) * Ltmp_q(m,1) enddo enddo + !$OMP END PARALLEL DO do k=1, np - D(Lset(k)) = D(Lset(k)) - L(Lset(k),rank) * L(Lset(k),rank) + D(Lset(k)) = D(Lset(k)) - Ltmp_p(k,1) * Ltmp_p(k,1) enddo Qmax = D(Dset(1)) @@ -206,7 +218,7 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo - deallocate(Delta) + deallocate(Delta, Ltmp_p, Ltmp_q) ! i. N = N+j From 837ec89f1baf8cef63a045a63394edce15f2883d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 3 Jul 2023 21:04:50 +0200 Subject: [PATCH 10/74] Accelerate Cholesky --- src/ao_two_e_ints/cholesky.irp.f | 40 ++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index dc5040be..27aa1aa6 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -81,6 +81,9 @@ subroutine direct_cholesky(L, ndim, rank, tau) double precision :: Dmax, Dmin, Qmax, f double precision, external :: get_ao_two_e_integral + logical, external :: ao_two_e_integral_zero + + print *, 'Entering Cholesky' allocate( D(ndim), Lset(ndim), Dset(ndim) ) allocate( addr(2,ndim) ) @@ -139,6 +142,9 @@ subroutine direct_cholesky(L, ndim, rank, tau) ! d., e. allocate(Delta(np,nq), Ltmp_p(max(np,1),max(N,1)), Ltmp_q(max(nq,1),max(N,1))) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q) + + !$OMP DO do k=1,N do p=1,np Ltmp_p(p,k) = L(Lset(p),k) @@ -147,10 +153,24 @@ subroutine direct_cholesky(L, ndim, rank, tau) Ltmp_q(q,k) = L(Dset(q),k) enddo enddo + !$OMP END DO NOWAIT - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(m,k) + !$OMP DO do m=1,nq do k=1,np + Delta(k,m) = 0.d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do m=1,nq + do k=1,np + if (ao_two_e_integral_zero( & + addr(1,Lset(k)), & + addr(1,Dset(m)), & + addr(2,Lset(k)), & + addr(2,Dset(m)) ) ) cycle Delta(k,m) = get_ao_two_e_integral( & addr(1,Lset(k)), & addr(1,Dset(m)), & @@ -159,7 +179,9 @@ subroutine direct_cholesky(L, ndim, rank, tau) ao_integrals_map) enddo enddo - !$OMP END PARALLEL DO + !$OMP END DO + + !$OMP END PARALLEL call dgemm('N','T',np,nq,N,-1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) @@ -188,28 +210,36 @@ subroutine direct_cholesky(L, ndim, rank, tau) ! iii. f = 1.d0/dsqrt(Qmax) + !$OMP PARALLEL PRIVATE(m,k) + !$OMP DO do p=1,np Ltmp_p(p,1) = Delta(p,dj) * f L(Lset(p), rank) = Ltmp_p(p,1) enddo + !$OMP END DO + !$OMP DO do q=1,nq Ltmp_q(q,1) = L(Dset(q), rank) enddo + !$OMP END DO ! iv. -! call dger(np, nq, -1.d0, Ltmp_p, 1, Ltmp_q, 1, Delta, np) - !$OMP PARALLEL DO PRIVATE(f,m,k) + !$OMP DO do m=1, nq do k=1, np Delta(k,m) = Delta(k,m) - Ltmp_p(k,1) * Ltmp_q(m,1) enddo enddo - !$OMP END PARALLEL DO + !$OMP END DO NOWAIT +! call dger(np, nq, -1.d0, Ltmp_p, 1, Ltmp_q, 1, Delta, np) + !$OMP DO do k=1, np D(Lset(k)) = D(Lset(k)) - Ltmp_p(k,1) * Ltmp_p(k,1) enddo + !$OMP END DO + !$OMP END PARALLEL Qmax = D(Dset(1)) do q=1,np From 06720f3f210bc548346f9194a8d89761aa228f35 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jul 2023 01:22:12 +0200 Subject: [PATCH 11/74] integer8 in cholesky --- external/qp2-dependencies | 2 +- src/ao_two_e_ints/cholesky.irp.f | 111 +++++++++++++++++++------------ src/utils/fast_mkl.c | 5 ++ 3 files changed, 73 insertions(+), 45 deletions(-) create mode 100644 src/utils/fast_mkl.c diff --git a/external/qp2-dependencies b/external/qp2-dependencies index f40bde09..e0d0e02e 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8 +Subproject commit e0d0e02e9f5ece138d1520106954a881ab0b8db2 diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 27aa1aa6..01c79d12 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -74,27 +74,31 @@ subroutine direct_cholesky(L, ndim, rank, tau) double precision, parameter :: dscale = 1.d0 double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) - integer, allocatable :: Lset(:), Dset(:), addr(:,:) + integer*8, allocatable :: Lset(:), Dset(:), addr(:,:), LDmap(:), DLmap(:) + integer*8, allocatable :: Lset_rev(:), Dset_rev(:) - integer :: i,j,k,m,p,q, qj, dj - integer :: N, np, nq + integer*8 :: i,j,k,m,p,q, qj, dj, p2, q2 + integer*8 :: N, np, nq double precision :: Dmax, Dmin, Qmax, f double precision, external :: get_ao_two_e_integral logical, external :: ao_two_e_integral_zero print *, 'Entering Cholesky' + rank = 0 - allocate( D(ndim), Lset(ndim), Dset(ndim) ) - allocate( addr(2,ndim) ) + allocate( D(ndim), Lset(ndim), LDmap(ndim), DLmap(ndim), Dset(ndim) ) + allocate( Lset_rev(ndim), Dset_rev(ndim) ) + allocate( addr(3,ndim) ) ! 1. k=0 - do i=1,ao_num - do j=1,ao_num + do j=1,ao_num + do i=1,ao_num k = k+1 addr(1,k) = i addr(2,k) = j + addr(3,k) = (i-1)*ao_num + j enddo enddo @@ -110,10 +114,12 @@ subroutine direct_cholesky(L, ndim, rank, tau) ! 2. np=0 + Lset_rev = 0 do p=1,ndim if ( dscale*dscale*Dmax*D(p) > tau*tau ) then np = np+1 Lset(np) = p + Lset_rev(p) = np endif enddo @@ -133,15 +139,21 @@ subroutine direct_cholesky(L, ndim, rank, tau) ! c. nq=0 - do q=1,np - if ( D(Lset(q)) > Dmin ) then + LDmap = 0 + DLmap = 0 + do p=1,np + if ( D(Lset(p)) > Dmin ) then nq = nq+1 - Dset(nq) = Lset(q) + Dset(nq) = Lset(p) + Dset_rev(Dset(nq)) = nq + LDmap(p) = nq + DLmap(nq) = p endif enddo ! d., e. allocate(Delta(np,nq), Ltmp_p(max(np,1),max(N,1)), Ltmp_q(max(nq,1),max(N,1))) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q) !$OMP DO @@ -153,38 +165,47 @@ subroutine direct_cholesky(L, ndim, rank, tau) Ltmp_q(q,k) = L(Dset(q),k) enddo enddo - !$OMP END DO NOWAIT + !$OMP END DO - !$OMP DO + !$OMP DO SCHEDULE(dynamic,8) do m=1,nq - do k=1,np - Delta(k,m) = 0.d0 + + do k=1, nq + ! Apply only to (k,m) pairs both in Dset + p = DLmap(k) + q = Lset_rev(addr(3,Dset(k))) + if ((0 < q).and.(q < p)) cycle + if (ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), & + addr(2,Dset(k)), addr(2,Dset(m)) ) ) then + Delta(p,m) = 0.d0 + else + Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), & + addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) + endif + Delta(q,m) = Delta(p,m) enddo - enddo - !$OMP END DO - !$OMP DO - do m=1,nq do k=1,np - if (ao_two_e_integral_zero( & - addr(1,Lset(k)), & - addr(1,Dset(m)), & - addr(2,Lset(k)), & - addr(2,Dset(m)) ) ) cycle - Delta(k,m) = get_ao_two_e_integral( & - addr(1,Lset(k)), & - addr(1,Dset(m)), & - addr(2,Lset(k)), & - addr(2,Dset(m)), & - ao_integrals_map) + ! Apply only to (k,m) pairs where k is not in Dset + if (LDmap(k) /= 0) cycle + q = Lset_rev(addr(3,Lset(k))) + if ((0 < q).and.(q < k)) cycle + if (ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)), & + addr(2,Lset(k)), addr(2,Dset(m)) ) ) then + Delta(k,m) = 0.d0 + else + Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), & + addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + endif + Delta(q,m) = Delta(k,m) enddo enddo !$OMP END DO !$OMP END PARALLEL - call dgemm('N','T',np,nq,N,-1.d0, & - Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + call dgemm('N','T', int(np,4), int(nq,4), int(N,4), -1.d0, & + Ltmp_p, int(np,4), Ltmp_q, int(nq,4), 1.d0, Delta, int(np,4)) ! f. Qmax = D(Dset(1)) @@ -193,11 +214,11 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo ! g. - j = 0 - do while ( (j <= nq).and.(Qmax > Dmin) ) + do j=1,nq + + if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit ! i. - j = j+1 rank = N+j ! ii. @@ -208,13 +229,17 @@ subroutine direct_cholesky(L, ndim, rank, tau) endif enddo + L(:, rank) = 0.d0 + ! iii. f = 1.d0/dsqrt(Qmax) - !$OMP PARALLEL PRIVATE(m,k) + + !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) !$OMP DO do p=1,np Ltmp_p(p,1) = Delta(p,dj) * f L(Lset(p), rank) = Ltmp_p(p,1) + D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,1) * Ltmp_p(p,1) enddo !$OMP END DO @@ -223,22 +248,17 @@ subroutine direct_cholesky(L, ndim, rank, tau) Ltmp_q(q,1) = L(Dset(q), rank) enddo !$OMP END DO - + ! iv. - !$OMP DO + + !$OMP DO SCHEDULE(static) do m=1, nq do k=1, np Delta(k,m) = Delta(k,m) - Ltmp_p(k,1) * Ltmp_q(m,1) enddo enddo - !$OMP END DO NOWAIT -! call dger(np, nq, -1.d0, Ltmp_p, 1, Ltmp_q, 1, Delta, np) - - !$OMP DO - do k=1, np - D(Lset(k)) = D(Lset(k)) - Ltmp_p(k,1) * Ltmp_p(k,1) - enddo !$OMP END DO + !$OMP END PARALLEL Qmax = D(Dset(1)) @@ -247,6 +267,7 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo enddo + print *, Qmax deallocate(Delta, Ltmp_p, Ltmp_q) @@ -260,10 +281,12 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo np=0 + Lset_rev = 0 do p=1,ndim if ( dscale*dscale*Dmax*D(p) > tau*tau ) then np = np+1 Lset(np) = p + Lset_rev(p) = np endif enddo diff --git a/src/utils/fast_mkl.c b/src/utils/fast_mkl.c new file mode 100644 index 00000000..aa1f82f1 --- /dev/null +++ b/src/utils/fast_mkl.c @@ -0,0 +1,5 @@ +int mkl_serv_intel_cpu_true() { + return 1; +} + + From 6a53e44e9bed0bf6aa40f24c1fc13a25889ef727 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jul 2023 01:43:44 +0200 Subject: [PATCH 12/74] Fast MKL on AMD --- src/ezfio_files/NEED | 1 + src/ezfio_files/ezfio.irp.f | 7 ++++++- src/utils/c_functions.f90 | 7 ++++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/ezfio_files/NEED b/src/ezfio_files/NEED index d06d604c..1766924f 100644 --- a/src/ezfio_files/NEED +++ b/src/ezfio_files/NEED @@ -1,2 +1,3 @@ mpi zmq +utils diff --git a/src/ezfio_files/ezfio.irp.f b/src/ezfio_files/ezfio.irp.f index 4f53b173..e18b2378 100644 --- a/src/ezfio_files/ezfio.irp.f +++ b/src/ezfio_files/ezfio.irp.f @@ -7,6 +7,8 @@ BEGIN_PROVIDER [ character*(1024), ezfio_filename ] PROVIDE mpi_initialized + integer :: i + ! Get the QPACKAGE_INPUT environment variable call getenv('QPACKAGE_INPUT',ezfio_filename) if (ezfio_filename == '') then @@ -44,11 +46,14 @@ BEGIN_PROVIDER [ character*(1024), ezfio_filename ] END_PROVIDER BEGIN_PROVIDER [ character*(1024), ezfio_work_dir ] + use c_functions implicit none BEGIN_DOC ! EZFIO/work/ END_DOC - call ezfio_set_work_empty(.False.) + logical :: b + b = mkl_serv_intel_cpu_true() /= 1 + call ezfio_set_work_empty(b) ezfio_work_dir = trim(ezfio_filename)//'/work/' END_PROVIDER diff --git a/src/utils/c_functions.f90 b/src/utils/c_functions.f90 index 65d4ad62..a9c8900b 100644 --- a/src/utils/c_functions.f90 +++ b/src/utils/c_functions.f90 @@ -57,6 +57,12 @@ module c_functions end subroutine sscanf_sd_c end interface + interface + integer(kind=c_int) function mkl_serv_intel_cpu_true() bind(C) + use iso_c_binding + end function + end interface + contains integer function atoi(a) @@ -131,4 +137,3 @@ subroutine usleep(us) call usleep_c(u) end subroutine usleep - From faf43331edb20391a10ec6cb85a354d471f1612c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jul 2023 01:46:49 +0200 Subject: [PATCH 13/74] Fix segfault in CC --- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 31fe67ce..770d629a 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -332,7 +332,10 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ print '(A)', ' +----------------------+--------------+----------+' print '(A)', '' - deallocate(X_vovv,X_ooov,T_voov,T_oovv) + deallocate(X_vovv) + deallocate(X_ooov) + deallocate(T_voov) + deallocate(T_oovv) end From 9b0c270662c35f856ae98f4832a13d39dca59c8c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jul 2023 10:46:05 +0200 Subject: [PATCH 14/74] Block cholesky --- src/ao_two_e_ints/cholesky.irp.f | 46 +++++++++++++++++++-------- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 4 ++- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 01c79d12..f26a2729 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -22,7 +22,8 @@ END_PROVIDER ! Number of Cholesky vectors in AO basis END_DOC - cholesky_ao_num_guess = ao_num*ao_num !sum(mini_basis_size(int(nucl_charge(:)))) + cholesky_ao_num_guess = ao_num*ao_num + cholesky_ao_num_guess = 2* ao_num * sum(mini_basis_size(int(nucl_charge(:)))) END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_ao_num ] @@ -84,6 +85,8 @@ subroutine direct_cholesky(L, ndim, rank, tau) double precision, external :: get_ao_two_e_integral logical, external :: ao_two_e_integral_zero + integer :: block_size, iblock + print *, 'Entering Cholesky' rank = 0 @@ -152,7 +155,10 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo ! d., e. - allocate(Delta(np,nq), Ltmp_p(max(np,1),max(N,1)), Ltmp_q(max(nq,1),max(N,1))) + block_size = max(N,32) + allocate(Delta(np,nq), & + Ltmp_p(max(np,1),block_size), & + Ltmp_q(max(nq,1),block_size) ) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q) @@ -215,12 +221,19 @@ subroutine direct_cholesky(L, ndim, rank, tau) ! g. + iblock = 0 do j=1,nq if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit ! i. rank = N+j + if (iblock == block_size) then + call dgemm('N','T',np,nq,block_size,-1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + iblock = 0 + endif + ! ii. do dj=1,nq qj = Dset(dj) @@ -231,33 +244,40 @@ subroutine direct_cholesky(L, ndim, rank, tau) L(:, rank) = 0.d0 + iblock = iblock+1 + do p=1,np + Ltmp_p(p,iblock) = Delta(p,dj) + enddo + call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0, & + Ltmp_p(1,iblock), 1) + ! iii. f = 1.d0/dsqrt(Qmax) !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) !$OMP DO do p=1,np - Ltmp_p(p,1) = Delta(p,dj) * f - L(Lset(p), rank) = Ltmp_p(p,1) - D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,1) * Ltmp_p(p,1) + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) * f + L(Lset(p), rank) = Ltmp_p(p,iblock) + D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,iblock) * Ltmp_p(p,iblock) enddo !$OMP END DO !$OMP DO do q=1,nq - Ltmp_q(q,1) = L(Dset(q), rank) + Ltmp_q(q,iblock) = L(Dset(q), rank) enddo !$OMP END DO ! iv. - !$OMP DO SCHEDULE(static) - do m=1, nq - do k=1, np - Delta(k,m) = Delta(k,m) - Ltmp_p(k,1) * Ltmp_q(m,1) - enddo - enddo - !$OMP END DO +! !$OMP DO SCHEDULE(static) +! do m=1, nq +! do k=1, np +! Delta(k,m) = Delta(k,m) - Ltmp_p(k,iblock) * Ltmp_q(m,iblock) +! enddo +! enddo +! !$OMP END DO !$OMP END PARALLEL diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 770d629a..dbbed19e 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -94,6 +94,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ enddo !$OMP END DO nowait + !$OMP BARRIER !$OMP END PARALLEL double precision, external :: ccsd_t_task_aba @@ -280,9 +281,10 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ call wall_time(t01) if ((t01-t00 > 1.0d0).or.(imin >= Nabc)) then - t00 = t01 !$OMP TASKWAIT + call wall_time(t01) + t00 = t01 double precision :: ET, ET2 double precision :: energy_stoch, energy_det From 0242e9c37634ec593c3dcd3dd37d5c4a18ec3b69 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jul 2023 22:17:31 +0200 Subject: [PATCH 15/74] Changed formats E to ES --- config/ifort_2021_debug.cfg | 66 ++++++ src/ao_two_e_ints/cholesky.irp.f | 192 ++++++++++-------- src/ccsd/ccsd_space_orb_sub.irp.f | 4 +- src/ccsd/ccsd_spin_orb_sub.irp.f | 4 +- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 12 +- src/cipsi/pt2_stoch_routines.irp.f | 2 +- .../dav_diag_dressed_ext_rout.irp.f | 2 +- .../dav_double_dress_ext_rout.irp.f | 2 +- .../dav_dressed_ext_rout.irp.f | 2 +- src/dav_general_mat/dav_ext_rout.irp.f | 2 +- src/dav_general_mat/dav_general.irp.f | 2 +- src/davidson/diagonalization_h_dressed.irp.f | 2 +- .../diagonalization_hcsf_dressed.irp.f | 2 +- .../diagonalization_hs2_dressed.irp.f | 2 +- .../diagonalization_nonsym_h_dressed.irp.f | 2 +- src/determinants/dipole_moments.irp.f | 6 +- src/ezfio_files/ezfio.irp.f | 2 +- src/mo_optimization/first_gradient_opt.irp.f | 2 +- src/tc_bi_ortho/print_tc_dump.irp.f | 10 +- src/tc_scf/molden_lr_mos.irp.f | 14 +- src/tools/molden.irp.f | 4 +- src/tools/print_ci_vectors.irp.f | 2 +- src/utils/format_w_error.irp.f | 2 +- .../rotation_matrix_iterative.irp.f | 4 +- .../trust_region_optimal_lambda.irp.f | 6 +- 25 files changed, 214 insertions(+), 136 deletions(-) create mode 100644 config/ifort_2021_debug.cfg diff --git a/config/ifort_2021_debug.cfg b/config/ifort_2021_debug.cfg new file mode 100644 index 00000000..d70b1465 --- /dev/null +++ b/config/ifort_2021_debug.cfg @@ -0,0 +1,66 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FC : -traceback +FCFLAGS : -msse4.2 -O2 -ip -ftz -g + + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -msse4.2 -O2 -ip -ftz + + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -msse4.2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -msse4.2 -check all -debug all -fpe-all=0 -implicitnone + + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index f26a2729..18180efb 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -1,47 +1,3 @@ -BEGIN_PROVIDER [ integer, mini_basis_size, (128) ] - implicit none - BEGIN_DOC - ! Size of the minimal basis set per element - END_DOC - - mini_basis_size(1:2) = 1 - mini_basis_size(3:4) = 2 - mini_basis_size(5:10) = 5 - mini_basis_size(11:12) = 6 - mini_basis_size(13:18) = 9 - mini_basis_size(19:20) = 13 - mini_basis_size(21:36) = 18 - mini_basis_size(37:38) = 22 - mini_basis_size(39:54) = 27 - mini_basis_size(55:) = 36 -END_PROVIDER - - BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] - implicit none - BEGIN_DOC - ! Number of Cholesky vectors in AO basis - END_DOC - - cholesky_ao_num_guess = ao_num*ao_num - cholesky_ao_num_guess = 2* ao_num * sum(mini_basis_size(int(nucl_charge(:)))) -END_PROVIDER - - BEGIN_PROVIDER [ integer, cholesky_ao_num ] -&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, cholesky_ao_num_guess) ] - use mmap_module - implicit none - BEGIN_DOC - ! Cholesky vectors in AO basis: (ik|a): - ! = (ik|jl) = sum_a (ik|a).(a|jl) - END_DOC - - cholesky_ao_num = cholesky_ao_num_guess - - call direct_cholesky(cholesky_ao, ao_num*ao_num, cholesky_ao_num, ao_cholesky_threshold) - print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' - -END_PROVIDER - BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ] implicit none BEGIN_DOC @@ -58,36 +14,55 @@ BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, END_PROVIDER -subroutine direct_cholesky(L, ndim, rank, tau) +BEGIN_PROVIDER [ integer, cholesky_ao_num ] +&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, 1) ] implicit none BEGIN_DOC -! Cholesky-decomposed AOs. -! -! https://www.diva-portal.org/smash/get/diva2:396223/FULLTEXT01.pdf : -! Page 32, section 13.5 + ! Cholesky vectors in AO basis: (ik|a): + ! = (ik|jl) = sum_a (ik|a).(a|jl) + ! + ! Last dimension of cholesky_ao is cholesky_ao_num END_DOC - integer :: ndim - integer, intent(out) :: rank - double precision, intent(out) :: L(ndim, ndim) - double precision, intent(in) :: tau + + integer :: rank, ndim + double precision :: tau + double precision, pointer :: L(:,:), L_old(:,:) + double precision, parameter :: s = 1.d-2 double precision, parameter :: dscale = 1.d0 double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) - integer*8, allocatable :: Lset(:), Dset(:), addr(:,:), LDmap(:), DLmap(:) - integer*8, allocatable :: Lset_rev(:), Dset_rev(:) + integer, allocatable :: Lset(:), Dset(:), addr(:,:), LDmap(:), DLmap(:) + integer, allocatable :: Lset_rev(:), Dset_rev(:) - integer*8 :: i,j,k,m,p,q, qj, dj, p2, q2 - integer*8 :: N, np, nq + integer :: i,j,k,m,p,q, qj, dj, p2, q2 + integer :: N, np, nq double precision :: Dmax, Dmin, Qmax, f double precision, external :: get_ao_two_e_integral logical, external :: ao_two_e_integral_zero - integer :: block_size, iblock + integer :: block_size, iblock, ierr + + PROVIDE ao_two_e_integrals_in_map + deallocate(cholesky_ao) + + ndim = ao_num*ao_num + tau = ao_cholesky_threshold + + + allocate(L(ndim,1)) + + print *, '' + print *, 'Cholesky decomposition of AO integrals' + print *, '======================================' + print *, '' + print *, '============ =============' + print *, ' Rank Threshold' + print *, '============ =============' + - print *, 'Entering Cholesky' rank = 0 allocate( D(ndim), Lset(ndim), LDmap(ndim), DLmap(ndim), Dset(ndim) ) @@ -155,10 +130,40 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo ! d., e. - block_size = max(N,32) - allocate(Delta(np,nq), & - Ltmp_p(max(np,1),block_size), & - Ltmp_q(max(nq,1),block_size) ) + block_size = max(N,24) + + L_old => L + allocate(L(ndim,rank+nq), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Delta(np,nq))' + stop -1 + endif + + !$OMP PARALLEL DO PRIVATE(k) + do k=1,rank + L(:,k) = L_old(:,k) + enddo + !$OMP END PARALLEL DO + + deallocate(L_old) + + allocate(Delta(np,nq), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Delta(np,nq))' + stop -1 + endif + + allocate(Ltmp_p(np,block_size), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' + stop -1 + endif + + allocate(Ltmp_q(nq,block_size), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' + stop -1 + endif !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q) @@ -176,19 +181,18 @@ subroutine direct_cholesky(L, ndim, rank, tau) !$OMP DO SCHEDULE(dynamic,8) do m=1,nq + Delta(:,m) = 0.d0 do k=1, nq ! Apply only to (k,m) pairs both in Dset p = DLmap(k) q = Lset_rev(addr(3,Dset(k))) if ((0 < q).and.(q < p)) cycle - if (ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), & + if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), & addr(2,Dset(k)), addr(2,Dset(m)) ) ) then - Delta(p,m) = 0.d0 - else Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), & addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) + Delta(q,m) = Delta(p,m) endif - Delta(q,m) = Delta(p,m) enddo do k=1,np @@ -196,22 +200,22 @@ subroutine direct_cholesky(L, ndim, rank, tau) if (LDmap(k) /= 0) cycle q = Lset_rev(addr(3,Lset(k))) if ((0 < q).and.(q < k)) cycle - if (ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)), & + if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)), & addr(2,Lset(k)), addr(2,Dset(m)) ) ) then - Delta(k,m) = 0.d0 - else Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), & addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + Delta(q,m) = Delta(k,m) endif - Delta(q,m) = Delta(k,m) enddo enddo !$OMP END DO !$OMP END PARALLEL - call dgemm('N','T', int(np,4), int(nq,4), int(N,4), -1.d0, & - Ltmp_p, int(np,4), Ltmp_q, int(nq,4), 1.d0, Delta, int(np,4)) + if (N>0) then + call dgemm('N','T', np, nq, N, -1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + endif ! f. Qmax = D(Dset(1)) @@ -242,14 +246,18 @@ subroutine direct_cholesky(L, ndim, rank, tau) endif enddo - L(:, rank) = 0.d0 + L(1:ndim, rank) = 0.d0 iblock = iblock+1 do p=1,np Ltmp_p(p,iblock) = Delta(p,dj) enddo - call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0, & + + ! iv. + if (iblock > 1) then + call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0, & Ltmp_p(1,iblock), 1) + endif ! iii. f = 1.d0/dsqrt(Qmax) @@ -269,27 +277,20 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo !$OMP END DO - ! iv. - -! !$OMP DO SCHEDULE(static) -! do m=1, nq -! do k=1, np -! Delta(k,m) = Delta(k,m) - Ltmp_p(k,iblock) * Ltmp_q(m,iblock) -! enddo -! enddo -! !$OMP END DO - !$OMP END PARALLEL Qmax = D(Dset(1)) - do q=1,np - Qmax = max(Qmax, D(Lset(q))) + do q=1,nq + Qmax = max(Qmax, D(Dset(q))) enddo enddo - print *, Qmax - deallocate(Delta, Ltmp_p, Ltmp_q) + print '(I10, 4X, ES12.3)', rank, Qmax + + deallocate(Delta, stat=ierr) + deallocate(Ltmp_p, stat=ierr) + deallocate(Ltmp_q, stat=ierr) ! i. N = N+j @@ -312,4 +313,15 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo -end + allocate(cholesky_ao(ao_num,ao_num,rank)) + call dcopy(ndim*rank, L, 1, cholesky_ao, 1) + deallocate(L) + cholesky_ao_num = rank + + print *, '============ =============' + print *, '' + print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' + print *, '' + +END_PROVIDER + diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 40c57188..d23073b8 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -112,7 +112,7 @@ subroutine run_ccsd_space_orb ! Energy call ccsd_energy_space(nO,nV,tau,t1,energy) - write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' nb_iter = nb_iter + 1 if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then @@ -132,7 +132,7 @@ subroutine run_ccsd_space_orb print*,'' write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' - write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + write(*,'(A15,ES10.2,A3)')' Conv = ', max_r print*,'' if (write_amplitudes) then diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f index a267cc45..09d6a0fe 100644 --- a/src/ccsd/ccsd_spin_orb_sub.irp.f +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -241,7 +241,7 @@ subroutine run_ccsd_spin_orb call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) call wall_time(tfi) - write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', & + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', & uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' if (cc_dev) then print*,'Total:',tfi-tbi,'s' @@ -266,7 +266,7 @@ subroutine run_ccsd_spin_orb print*,'' write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' - write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + write(*,'(A15,ES10.2,A3)')' Conv = ', max_r print*,'' if (write_amplitudes) then diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index dbbed19e..13fa4f1a 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -210,9 +210,9 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ Pabc(:) = 1.d0/Pabc(:) print '(A)', '' - print '(A)', ' +----------------------+--------------+----------+' - print '(A)', ' | E(CCSD(T)) | Error | % |' - print '(A)', ' +----------------------+--------------+----------+' + print '(A)', ' ======================= ============== ==========' + print '(A)', ' E(CCSD(T)) Error % ' + print '(A)', ' ======================= ============== ==========' call wall_time(t00) @@ -257,7 +257,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ if (imin >= bounds(2,isample)) then cycle endif - ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc) + ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc)+1 if (sampled(ieta) == -1_8) then sampled(ieta) = 0_8 @@ -324,14 +324,14 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ energy = energy_det + energy_stoch - print '('' | '',F20.8, '' | '', E12.4,'' | '', F8.2,'' |'')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) + print '('' '',F20.8, '' '', ES12.4,'' '', F8.2,'' '')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) endif !$OMP END MASTER if (imin >= Nabc) exit enddo !$OMP END PARALLEL - print '(A)', ' +----------------------+--------------+----------+' + print '(A)', ' ======================= ============== ========== ' print '(A)', '' deallocate(X_vovv) diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f index 7909007a..3b048c14 100644 --- a/src/cipsi/pt2_stoch_routines.irp.f +++ b/src/cipsi/pt2_stoch_routines.irp.f @@ -591,7 +591,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_ time-time0 ! Old print - !print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1,1pE16.6,1pE16.6)', c, & + !print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1,ES16.6,ES16.6)', c, & ! pt2_data % pt2(pt2_stoch_istate) +E, & ! pt2_data_err % pt2(pt2_stoch_istate), & ! pt2_data % variance(pt2_stoch_istate), & diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f index 73608720..0dc939cb 100644 --- a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f @@ -331,7 +331,7 @@ subroutine davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,energies,sz !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f index e59d21d1..24f4fa10 100644 --- a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f +++ b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f @@ -405,7 +405,7 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_dressed_ext_rout.irp.f index c045aa1a..cedaaf0a 100644 --- a/src/dav_general_mat/dav_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_dressed_ext_rout.irp.f @@ -398,7 +398,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_ext_rout.irp.f b/src/dav_general_mat/dav_ext_rout.irp.f index 2621e3a9..deb7e3a9 100644 --- a/src/dav_general_mat/dav_ext_rout.irp.f +++ b/src/dav_general_mat/dav_ext_rout.irp.f @@ -316,7 +316,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_general.irp.f b/src/dav_general_mat/dav_general.irp.f index cd9124e6..9940bf1e 100644 --- a/src/dav_general_mat/dav_general.irp.f +++ b/src/dav_general_mat/dav_general.irp.f @@ -327,7 +327,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_h_dressed.irp.f b/src/davidson/diagonalization_h_dressed.irp.f index 26853df9..b7179c18 100644 --- a/src/davidson/diagonalization_h_dressed.irp.f +++ b/src/davidson/diagonalization_h_dressed.irp.f @@ -457,7 +457,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index 0c3c6f92..fa8aff80 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -477,7 +477,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 45258c1c..7b559925 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -611,7 +611,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:3,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:3,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_nonsym_h_dressed.irp.f b/src/davidson/diagonalization_nonsym_h_dressed.irp.f index 3ff060a6..96ca84ab 100644 --- a/src/davidson/diagonalization_nonsym_h_dressed.irp.f +++ b/src/davidson/diagonalization_nonsym_h_dressed.irp.f @@ -436,7 +436,7 @@ subroutine davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze, !don't print continue else - write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, E11.3))') iter-1, to_print(1:2,1:N_st) + write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/determinants/dipole_moments.irp.f b/src/determinants/dipole_moments.irp.f index 06fca0cd..e445c56b 100644 --- a/src/determinants/dipole_moments.irp.f +++ b/src/determinants/dipole_moments.irp.f @@ -66,9 +66,9 @@ END_PROVIDER write(*,'(i16)',advance='no') i end do write(*,*) '' - write(*,'(A17,100(1pE16.8))') 'x_dipole_moment = ',x_dipole_moment - write(*,'(A17,100(1pE16.8))') 'y_dipole_moment = ',y_dipole_moment - write(*,'(A17,100(1pE16.8))') 'z_dipole_moment = ',z_dipole_moment + write(*,'(A17,100(ES16.8))') 'x_dipole_moment = ',x_dipole_moment + write(*,'(A17,100(ES16.8))') 'y_dipole_moment = ',y_dipole_moment + write(*,'(A17,100(ES16.8))') 'z_dipole_moment = ',z_dipole_moment !print*, 'x_dipole_moment = ',x_dipole_moment !print*, 'y_dipole_moment = ',y_dipole_moment !print*, 'z_dipole_moment = ',z_dipole_moment diff --git a/src/ezfio_files/ezfio.irp.f b/src/ezfio_files/ezfio.irp.f index e18b2378..7e414a04 100644 --- a/src/ezfio_files/ezfio.irp.f +++ b/src/ezfio_files/ezfio.irp.f @@ -5,7 +5,7 @@ BEGIN_PROVIDER [ character*(1024), ezfio_filename ] ! variable if it is set, or as the 1st argument of the command line. END_DOC - PROVIDE mpi_initialized + PROVIDE mpi_initialized output_wall_time_0 integer :: i diff --git a/src/mo_optimization/first_gradient_opt.irp.f b/src/mo_optimization/first_gradient_opt.irp.f index d6918a00..f08b9d1f 100644 --- a/src/mo_optimization/first_gradient_opt.irp.f +++ b/src/mo_optimization/first_gradient_opt.irp.f @@ -111,7 +111,7 @@ subroutine first_gradient_opt(n,v_grad) if (debug) then print*,'Matrix containing the gradient :' do i = 1, mo_num - write(*,'(100(E12.5))') A(i,1:mo_num) + write(*,'(100(ES12.5))') A(i,1:mo_num) enddo endif diff --git a/src/tc_bi_ortho/print_tc_dump.irp.f b/src/tc_bi_ortho/print_tc_dump.irp.f index 868de444..37dfe051 100644 --- a/src/tc_bi_ortho/print_tc_dump.irp.f +++ b/src/tc_bi_ortho/print_tc_dump.irp.f @@ -62,7 +62,7 @@ subroutine KMat_tilde_dump() do j = 1, mo_num do i = 1, mo_num ! TCHint convention - write(33, '(E15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_two_e_chemist(j,i,l,k), i, j, k, l + write(33, '(ES15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_two_e_chemist(j,i,l,k), i, j, k, l enddo enddo enddo @@ -71,7 +71,7 @@ subroutine KMat_tilde_dump() do j = 1, mo_num do i = 1, mo_num ! TCHint convention - write(33, '(E15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_one_e(i,j), i, j, 0, 0 + write(33, '(ES15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_one_e(i,j), i, j, 0, 0 enddo enddo @@ -128,7 +128,7 @@ subroutine ERI_dump() do k = 1, mo_num do j = 1, mo_num do i = 1, mo_num - write(33, '(4(I4, 2X), 4X, E15.7)') i, j, k, l, a1(i,j,k,l) + write(33, '(4(I4, 2X), 4X, ES15.7)') i, j, k, l, a1(i,j,k,l) enddo enddo enddo @@ -167,8 +167,8 @@ subroutine LMat_tilde_dump() !write(33, '(6(I4, 2X), 4X, E15.7)') i, j, k, l, m, n, integral ! TCHint convention if(dabs(integral).gt.1d-10) then - write(33, '(E15.7, 4X, 6(I4, 2X))') -integral/3.d0, i, j, k, l, m, n - !write(33, '(E15.7, 4X, 6(I4, 2X))') -integral/3.d0, l, m, n, i, j, k + write(33, '(ES15.7, 4X, 6(I4, 2X))') -integral/3.d0, i, j, k, l, m, n + !write(33, '(ES15.7, 4X, 6(I4, 2X))') -integral/3.d0, l, m, n, i, j, k endif enddo enddo diff --git a/src/tc_scf/molden_lr_mos.irp.f b/src/tc_scf/molden_lr_mos.irp.f index b86009ee..98c7b230 100644 --- a/src/tc_scf/molden_lr_mos.irp.f +++ b/src/tc_scf/molden_lr_mos.irp.f @@ -72,7 +72,7 @@ subroutine molden_lr write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -170,7 +170,7 @@ subroutine molden_lr write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_r_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_r_coef(iorder(j),i) enddo write (i_unit_output,*) 'Sym= 1' @@ -178,7 +178,7 @@ subroutine molden_lr write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_l_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_l_coef(iorder(j),i) enddo enddo close(i_unit_output) @@ -235,7 +235,7 @@ subroutine molden_l() write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -333,7 +333,7 @@ subroutine molden_l() write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_l_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_l_coef(iorder(j),i) enddo enddo close(i_unit_output) @@ -390,7 +390,7 @@ subroutine molden_r() write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -488,7 +488,7 @@ subroutine molden_r() write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_r_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_r_coef(iorder(j),i) enddo enddo close(i_unit_output) diff --git a/src/tools/molden.irp.f b/src/tools/molden.irp.f index 830a141e..e5902a6f 100644 --- a/src/tools/molden.irp.f +++ b/src/tools/molden.irp.f @@ -44,7 +44,7 @@ program molden write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -142,7 +142,7 @@ program molden write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_coef(iorder(j),i) enddo enddo close(i_unit_output) diff --git a/src/tools/print_ci_vectors.irp.f b/src/tools/print_ci_vectors.irp.f index 97dfdc0b..d5f86213 100644 --- a/src/tools/print_ci_vectors.irp.f +++ b/src/tools/print_ci_vectors.irp.f @@ -28,7 +28,7 @@ subroutine routine do i = 1, N_det print *, 'Determinant ', i call debug_det(psi_det(1,1,i),N_int) - print '(4E20.12,X)', (psi_coef(i,k), k=1,N_states) + print '(4ES20.12,X)', (psi_coef(i,k), k=1,N_states) print *, '' print *, '' enddo diff --git a/src/utils/format_w_error.irp.f b/src/utils/format_w_error.irp.f index 7f7458b6..c253456e 100644 --- a/src/utils/format_w_error.irp.f +++ b/src/utils/format_w_error.irp.f @@ -39,7 +39,7 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err write(str_size,'(I3)') size_nb ! Error - write(str_exp,'(1pE20.0)') error + write(str_exp,'(ES20.0)') error str_error = trim(adjustl(str_exp)) ! Number of digit: Y (FX.Y) from the exponent diff --git a/src/utils_trust_region/rotation_matrix_iterative.irp.f b/src/utils_trust_region/rotation_matrix_iterative.irp.f index f268df04..db3d5c99 100644 --- a/src/utils_trust_region/rotation_matrix_iterative.irp.f +++ b/src/utils_trust_region/rotation_matrix_iterative.irp.f @@ -73,7 +73,7 @@ subroutine rotation_matrix_iterative(m,X,R) !print*,'R' !do i = 1, m - ! write(*,'(10(E12.5))') R(i,:) + ! write(*,'(10(ES12.5))') R(i,:) !enddo do i = 1, m @@ -82,7 +82,7 @@ subroutine rotation_matrix_iterative(m,X,R) !print*,'RRT' !do i = 1, m - ! write(*,'(10(E12.5))') RRT(i,:) + ! write(*,'(10(ES12.5))') RRT(i,:) !enddo max_elem = 0d0 diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f index b7dcf875..e98bbfb7 100644 --- a/src/utils_trust_region/trust_region_optimal_lambda.irp.f +++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f @@ -336,7 +336,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 endif - !write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + !write(*,'(a,ES12.5,a,ES12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 ! Newton's step y = -(1d0/DABS(d_2))*d_1 @@ -345,7 +345,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) if (DABS(y) > alpha) then y = alpha * (y/DABS(y)) ! preservation of the sign of y endif - !write(*,'(a,E12.5)') ' Step length: ', y + !write(*,'(a,ES12.5)') ' Step length: ', y ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 @@ -414,7 +414,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) else alpha = 0.25d0 * alpha endif - !write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + !write(*,'(a,ES12.5)') ' New trust length alpha: ', alpha ! cancellaion of the step if rho < 0.1 if (rho_2 < thresh_rho_2) then !0.1d0) then From 119779595aba655ce1effe2f7cb93ea26701c226 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jul 2023 23:43:26 +0200 Subject: [PATCH 16/74] Accelerate Cholesky CCSD --- src/ccsd/ccsd_space_orb_sub.irp.f | 13 +---- src/utils_cc/mo_integrals_cc.irp.f | 91 ++++++++++++++++++++++++------ 2 files changed, 76 insertions(+), 28 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index d23073b8..3c9a2cfc 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -1549,19 +1549,12 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:) allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV)) ! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,1, cc_list_vir,cc_list_vir,cc_list_vir,(/ gam /), B1) + !$omp parallel & !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) & !$omp private(a,b,beta) & !$omp default(none) - !$omp do - do beta = 1, nV - do b = 1, nV - do a = 1, nV - B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) - enddo - enddo - enddo - !$omp end do nowait do i = 1, nO !$omp do do b = 1, nV @@ -1569,7 +1562,7 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) X_vvvo(a,b,i) = cc_space_v_vvov(a,b,i,gam) enddo enddo - !$omp end do nowait + !$omp end do enddo !$omp end parallel diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index dafcf7af..2e7ecdd4 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -48,32 +48,86 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k if (do_ao_cholesky) then - double precision, allocatable :: buffer(:,:,:) - !$OMP PARALLEL & - !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) & - !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)& - !$OMP DEFAULT(NONE) - allocate(buffer(mo_num,mo_num,mo_num)) + double precision, allocatable :: buffer(:,:,:,:) + double precision, allocatable :: v1(:,:,:), v2(:,:,:) + allocate(v1(cholesky_ao_num,n1,n3), v2(cholesky_ao_num,n2,n4)) + allocate(buffer(n1,n3,n2,n4)) + + !$OMP PARALLEL PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k) !$OMP DO - do i4 = 1, n4 + do i3=1,n3 + idx3 = list3(i3) + do i1=1,n1 + idx1 = list1(i1) + do k=1,cholesky_ao_num + v1(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO + do i4=1,n4 idx4 = list4(i4) - call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) - do i2 = 1, n2 + do i2=1,n2 idx2 = list2(i2) - do i3 = 1, n3 - idx3 = list3(i3) + do k=1,cholesky_ao_num + v2(k,i2,i4) = cholesky_mo_transp(k,idx2,idx4) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP BARRIER + !$OMP END PARALLEL + + call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & + v1, cholesky_ao_num, & + v2, cholesky_ao_num, 0.d0, buffer, n1*n3) + + deallocate(v1,v2) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 do i1 = 1, n1 - idx1 = list1(i1) - v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2) + v(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) enddo enddo enddo enddo - !$OMP END DO - deallocate(buffer) - !$OMP END PARALLEL + !$OMP END PARALLEL DO + +! !$OMP PARALLEL & +! !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,cholesky_mo_transp,cholesky_ao_num,v1) & +! !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer,v2)& +! !$OMP DEFAULT(NONE) +! allocate(buffer(n1,n3,n2), v2(cholesky_ao_num,n2)) +! !$OMP DO +! do i4 = 1, n4 +! idx4 = list4(i4) +! do i2=1,n2 +! idx2 = list2(i2) +! do k=1,cholesky_ao_num +! v2(k,i2) = cholesky_mo_transp(k,idx2,idx4) +! enddo +! enddo +! call dgemm('T','N', n1*n3, n2, cholesky_ao_num, 1.d0, & +! v1, cholesky_ao_num, & +! v2, cholesky_ao_num, 0.d0, buffer, n1*n3) +! do i3 = 1, n3 +! do i2 = 1, n2 +! do i1 = 1, n1 +! v(i1,i2,i3,i4) = buffer(i1,i3,i2) +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! deallocate(buffer, v2) +! !$OMP END PARALLEL +! deallocate(v1) else double precision :: get_two_e_integral @@ -112,6 +166,7 @@ BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] if (do_ao_cholesky) then integer :: i1,i2,i3,i4 double precision, allocatable :: buffer(:,:,:) + call set_multiple_levels_omp(.False.) !$OMP PARALLEL & !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) & !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)& From 94b1ae138b999517a62aa5ae0bcb9ab7fb00db77 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Jul 2023 00:07:46 +0200 Subject: [PATCH 17/74] Cleaning --- src/ao_two_e_ints/cholesky.irp.f | 4 ++-- src/ccsd/ccsd_space_orb_sub.irp.f | 16 +++++++++++++++- src/utils_cc/mo_integrals_cc.irp.f | 30 ------------------------------ 3 files changed, 17 insertions(+), 33 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 18180efb..98652d8f 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -29,7 +29,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] double precision, pointer :: L(:,:), L_old(:,:) - double precision, parameter :: s = 1.d-2 + double precision, parameter :: s = 3.d-2 double precision, parameter :: dscale = 1.d0 double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) @@ -135,7 +135,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] L_old => L allocate(L(ndim,rank+nq), stat=ierr) if (ierr /= 0) then - print *, irp_here, ': allocation failed : (Delta(np,nq))' + print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' stop -1 endif diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 3c9a2cfc..1d77180e 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -1549,12 +1549,26 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:) allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV)) ! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) - call gen_v_space(cc_nVa,cc_nVa,cc_nVa,1, cc_list_vir,cc_list_vir,cc_list_vir,(/ gam /), B1) + + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,1, & + cc_list_vir,cc_list_vir,cc_list_vir,(/ cc_list_vir(gam) /), B1) + !$omp parallel & !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) & !$omp private(a,b,beta) & !$omp default(none) + +! !$omp do +! do beta = 1, nV +! do b = 1, nV +! do a = 1, nV +! B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) +! enddo +! enddo +! enddo +! !$omp end do nowait + do i = 1, nO !$omp do do b = 1, nV diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 2e7ecdd4..2db614b4 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -99,36 +99,6 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) enddo !$OMP END PARALLEL DO -! !$OMP PARALLEL & -! !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,cholesky_mo_transp,cholesky_ao_num,v1) & -! !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer,v2)& -! !$OMP DEFAULT(NONE) -! allocate(buffer(n1,n3,n2), v2(cholesky_ao_num,n2)) -! !$OMP DO -! do i4 = 1, n4 -! idx4 = list4(i4) -! do i2=1,n2 -! idx2 = list2(i2) -! do k=1,cholesky_ao_num -! v2(k,i2) = cholesky_mo_transp(k,idx2,idx4) -! enddo -! enddo -! call dgemm('T','N', n1*n3, n2, cholesky_ao_num, 1.d0, & -! v1, cholesky_ao_num, & -! v2, cholesky_ao_num, 0.d0, buffer, n1*n3) -! do i3 = 1, n3 -! do i2 = 1, n2 -! do i1 = 1, n1 -! v(i1,i2,i3,i4) = buffer(i1,i3,i2) -! enddo -! enddo -! enddo -! enddo -! !$OMP END DO -! deallocate(buffer, v2) -! !$OMP END PARALLEL -! deallocate(v1) - else double precision :: get_two_e_integral From 0132eb87fe786f39ee4e9326844829229716c19d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Jul 2023 02:40:59 +0200 Subject: [PATCH 18/74] Symmetry in cholesky --- src/ao_two_e_ints/cholesky.irp.f | 64 ++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 98652d8f..f4746144 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -29,7 +29,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] double precision, pointer :: L(:,:), L_old(:,:) - double precision, parameter :: s = 3.d-2 + double precision, parameter :: s = 1.d-1 double precision, parameter :: dscale = 1.d0 double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) @@ -45,6 +45,8 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] integer :: block_size, iblock, ierr + integer(omp_lock_kind), allocatable :: lock(:) + PROVIDE ao_two_e_integrals_in_map deallocate(cholesky_ao) @@ -66,8 +68,11 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] rank = 0 allocate( D(ndim), Lset(ndim), LDmap(ndim), DLmap(ndim), Dset(ndim) ) - allocate( Lset_rev(ndim), Dset_rev(ndim) ) + allocate( Lset_rev(ndim), Dset_rev(ndim), lock(ndim) ) allocate( addr(3,ndim) ) + do k=1,ndim + call omp_init_lock(lock(k)) + enddo ! 1. k=0 @@ -113,7 +118,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] i = i+1 ! b. - Dmin = max(s*Dmax, tau) + Dmin = max(s*Dmax,tau) ! c. nq=0 @@ -165,7 +170,9 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] stop -1 endif - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q) + Delta(:,:) = 0.d0 + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) !$OMP DO do k=1,N @@ -181,20 +188,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] !$OMP DO SCHEDULE(dynamic,8) do m=1,nq - Delta(:,m) = 0.d0 - do k=1, nq - ! Apply only to (k,m) pairs both in Dset - p = DLmap(k) - q = Lset_rev(addr(3,Dset(k))) - if ((0 < q).and.(q < p)) cycle - if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), & - addr(2,Dset(k)), addr(2,Dset(m)) ) ) then - Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), & - addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) - Delta(q,m) = Delta(p,m) - endif - enddo - + call omp_set_lock(lock(m)) do k=1,np ! Apply only to (k,m) pairs where k is not in Dset if (LDmap(k) /= 0) cycle @@ -204,9 +198,37 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] addr(2,Lset(k)), addr(2,Dset(m)) ) ) then Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), & addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) - Delta(q,m) = Delta(k,m) + if (q /= 0) Delta(q,m) = Delta(k,m) endif enddo + + j = Dset_rev(addr(3,Dset(m))) + if ((0 < j).and.(j < m)) then + call omp_unset_lock(lock(m)) + cycle + endif + + if ((j /= m).and.(j /= 0)) then + call omp_set_lock(lock(j)) + endif + do k=1,nq + ! Apply only to (k,m) pairs both in Dset + p = DLmap(k) + q = Lset_rev(addr(3,Dset(k))) + if ((0 < q).and.(q < p)) cycle + if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), & + addr(2,Dset(k)), addr(2,Dset(m)) ) ) then + Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), & + addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) + if (q /= 0) Delta(q,m) = Delta(p,m) + if (j /= 0) Delta(p,j) = Delta(p,m) + if (q*j /= 0) Delta(q,j) = Delta(p,m) + endif + enddo + call omp_unset_lock(lock(m)) + if ((j /= m).and.(j /= 0)) then + call omp_unset_lock(lock(j)) + endif enddo !$OMP END DO @@ -313,6 +335,10 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] enddo + do k=1,ndim + call omp_destroy_lock(lock(k)) + enddo + allocate(cholesky_ao(ao_num,ao_num,rank)) call dcopy(ndim*rank, L, 1, cholesky_ao, 1) deallocate(L) From 9293f360d51d31248d2edcd9cffeed16d90924f1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Jul 2023 09:09:12 +0200 Subject: [PATCH 19/74] RELEASE_NOTES.org --- RELEASE_NOTES.org | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/RELEASE_NOTES.org b/RELEASE_NOTES.org index 3bd02898..a0e6d104 100644 --- a/RELEASE_NOTES.org +++ b/RELEASE_NOTES.org @@ -10,7 +10,8 @@ - Added many types of integrals - Accelerated four-index transformation - Added transcorrelated SCF - - Added transcorrelated CIPSI + - Added bi-orthonormal transcorrelated CIPSI + - Added Cholesky decomposition of AO integrals - Added CCSD and CCSD(T) - Added MO localization - Changed coupling parameters for ROHF @@ -20,7 +21,7 @@ - Removed cryptokit dependency in OCaml - Using now standard convention in RDM - Added molecular properties - - [ ] Added GTOs with complex exponent + - Added GTOs with complex exponent *** TODO: take from dev - Updated version of f77-zmq From 41a369dd687fd498917c675930f169e736d766a0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Jul 2023 17:43:31 +0200 Subject: [PATCH 20/74] Optimized 4idx with Cholesky --- src/mo_two_e_ints/mo_bi_integrals.irp.f | 47 ++++++++++++++++--------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index a461504e..b15d9745 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -465,31 +465,34 @@ subroutine add_integrals_to_map_cholesky integer :: size_buffer, n_integrals size_buffer = min(mo_num*mo_num*mo_num,16000000) - double precision, allocatable :: Vtmp(:,:,:,:) + double precision, allocatable :: Vtmp(:,:,:) integer(key_kind) , allocatable :: buffer_i(:) real(integral_kind), allocatable :: buffer_value(:) if (.True.) then ! In-memory transformation - allocate (Vtmp(mo_num,mo_num,mo_num,mo_num)) + call set_multiple_levels_omp(.False.) - call dgemm('N','T',mo_num*mo_num,mo_num*mo_num,cholesky_ao_num,1.d0, & - cholesky_mo, mo_num*mo_num, & - cholesky_mo, mo_num*mo_num, 0.d0, & - Vtmp, mo_num*mo_num) - - !$OMP PARALLEL PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i) + !$OMP PARALLEL PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i, Vtmp) allocate (buffer_i(size_buffer), buffer_value(size_buffer)) n_integrals = 0 + + allocate (Vtmp(mo_num,mo_num,mo_num)) + !$OMP DO do l=1,mo_num + + call dgemm('T','N',mo_num*mo_num,mo_num,cholesky_ao_num,1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,l), cholesky_ao_num, 0.d0, & + Vtmp, mo_num*mo_num) do k=1,l do j=1,mo_num do i=1,j - if (abs(Vtmp(i,j,k,l)) > mo_integrals_threshold) then + if (abs(Vtmp(i,j,k)) > mo_integrals_threshold) then n_integrals += 1 - buffer_value(n_integrals) = Vtmp(i,j,k,l) + buffer_value(n_integrals) = Vtmp(i,j,k) !DIR$ FORCEINLINE call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals)) if (n_integrals == size_buffer) then @@ -503,10 +506,9 @@ subroutine add_integrals_to_map_cholesky enddo !$OMP END DO call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) - deallocate(buffer_i, buffer_value) + deallocate(buffer_i, buffer_value, Vtmp) !$OMP END PARALLEL - deallocate(Vtmp) call map_unique(mo_integrals_map) endif @@ -1350,16 +1352,29 @@ END_PROVIDER ! mo_two_e_integrals_jj_anti(i,j) = J_ij - K_ij END_DOC - integer :: i,j + integer :: i,j,k double precision :: get_two_e_integral if (do_ao_cholesky) then + double precision, allocatable :: buffer(:,:) + allocate (buffer(cholesky_ao_num,mo_num)) + do k=1,cholesky_ao_num + do i=1,mo_num + buffer(k,i) = cholesky_mo_transp(k,i,i) + enddo + enddo + call dgemm('T','N',mo_num,mo_num,cholesky_ao_num,1.d0, & + buffer, cholesky_ao_num, buffer, cholesky_ao_num, 0.d0, mo_two_e_integrals_jj, mo_num) + deallocate(buffer) + do j=1,mo_num do i=1,mo_num - !TODO: use dgemm - mo_two_e_integrals_jj(i,j) = sum(cholesky_mo_transp(:,i,i)*cholesky_mo_transp(:,j,j)) - mo_two_e_integrals_jj_exchange(i,j) = sum(cholesky_mo_transp(:,i,j)*cholesky_mo_transp(:,j,i)) + mo_two_e_integrals_jj_exchange(i,j) = 0.d0 + do k=1,cholesky_ao_num + mo_two_e_integrals_jj_exchange(i,j) = mo_two_e_integrals_jj_exchange(i,j) + & + cholesky_mo_transp(k,i,j)*cholesky_mo_transp(k,j,i) + enddo enddo enddo From 5a0c8de5a39390a63b29d2748fa6a92cb00107ea Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Jul 2023 19:12:03 +0200 Subject: [PATCH 21/74] Accelerated cholesky AO-MO transformation --- src/mo_two_e_ints/cholesky.irp.f | 38 ++++++++++++-------------------- 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 32c0dccd..7cfbaa58 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -4,16 +4,18 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num ! Cholesky vectors in MO basis END_DOC - integer :: k + integer :: k, i, j call set_multiple_levels_omp(.False.) - print *, 'AO->MO Transformation of Cholesky vectors' !$OMP PARALLEL DO PRIVATE(k) do k=1,cholesky_ao_num - call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num) + do j=1,mo_num + do i=1,mo_num + cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j) + enddo + enddo enddo !$OMP END PARALLEL DO - print *, '' END_PROVIDER @@ -23,27 +25,15 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, ! Cholesky vectors in MO basis END_DOC - integer :: i,j,k - double precision, allocatable :: buffer(:,:) + double precision, allocatable :: X(:,:,:) + print *, 'AO->MO Transformation of Cholesky vectors' - print *, 'AO->MO Transformation of Cholesky vectors .' - - call set_multiple_levels_omp(.False.) - !$OMP PARALLEL PRIVATE(i,j,k,buffer) - allocate(buffer(mo_num,mo_num)) - !$OMP DO SCHEDULE(static) - do k=1,cholesky_ao_num - call ao_to_mo(cholesky_ao(1,1,k),ao_num,buffer,mo_num) - do j=1,mo_num - do i=1,mo_num - cholesky_mo_transp(k,i,j) = buffer(i,j) - enddo - enddo - enddo - !$OMP END DO - deallocate(buffer) - !$OMP END PARALLEL - print *, '' + allocate(X(mo_num,cholesky_ao_num,ao_num)) + call dgemm('T','N', ao_num*cholesky_ao_num, mo_num, ao_num, 1.d0, & + cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_ao_num) + call dgemm('T','N', cholesky_ao_num*mo_num, mo_num, ao_num, 1.d0, & + X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_ao_num*mo_num) + deallocate(X) END_PROVIDER From e82220a6a414bd20eac08d5bca584ad0fb315495 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Jul 2023 02:12:42 +0200 Subject: [PATCH 22/74] Working on Cholesky CCSD --- src/ccsd/ccsd_space_orb_sub.irp.f | 26 +- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 1395 +++++++++++++++++++++++ src/mo_two_e_ints/mo_bi_integrals.irp.f | 81 +- src/tools/four_idx_transform.irp.f | 7 + src/utils/fortran_mmap.c | 14 +- src/utils/mmap.f90 | 25 +- src/utils_cc/mo_integrals_cc.irp.f | 85 +- 7 files changed, 1542 insertions(+), 91 deletions(-) create mode 100644 src/ccsd/ccsd_space_orb_sub_chol.irp.f diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 1d77180e..76c9351e 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -85,13 +85,23 @@ subroutine run_ccsd_space_orb do while (not_converged) - call compute_H_oo(nO,nV,t1,t2,tau,H_oo) - call compute_H_vv(nO,nV,t1,t2,tau,H_vv) - call compute_H_vo(nO,nV,t1,t2,H_vo) - ! Residue - call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) - call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) +! if (do_ao_cholesky) then + if (.False.) then + call compute_H_oo_chol(nO,nV,t1,t2,tau,H_oo) + call compute_H_vv_chol(nO,nV,t1,t2,tau,H_vv) + call compute_H_vo_chol(nO,nV,t1,t2,H_vo) + + call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + else + call compute_H_oo(nO,nV,t1,t2,tau,H_oo) + call compute_H_vv(nO,nV,t1,t2,tau,H_vv) + call compute_H_vo(nO,nV,t1,t2,H_vo) + + call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + endif max_r = max(max_r1,max_r2) ! Update @@ -839,6 +849,10 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) ! allocate(B1(nV,nV,nV,nV)) ! call compute_B1(nO,nV,t1,t2,B1) +! call dgemm('N','N',nO*nO,nV*nV,nV*nV, & +! 1d0, tau, size(tau,1) * size(tau,2), & +! B1 , size(B1_gam,1) * size(B1_gam,2), & +! 1d0, r2, size(r2,1) * size(r2,2)) allocate(B1_gam(nV,nV,nV)) do gam=1,nV call compute_B1_gam(nO,nV,t1,t2,B1_gam,gam) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f new file mode 100644 index 00000000..190c163b --- /dev/null +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -0,0 +1,1395 @@ +subroutine ccsd_energy_space_chol(nO,nV,tau,t1,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau(nO,nO,nV,nV) + double precision, intent(in) :: t1(nO,nV) + double precision, intent(out) :: energy + + ! internal + integer :: i,j,a,b + double precision :: e + + energy = 0d0 + !$omp parallel & + !$omp shared(nO,nV,energy,tau,t1,& + !$omp cc_space_f_vo,cc_space_w_oovv) & + !$omp private(i,j,a,b,e) & + !$omp default(none) + e = 0d0 + !$omp do + do a = 1, nV + do i = 1, nO + e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) + enddo + enddo + !$omp end do nowait + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + energy = energy + e + !$omp end critical + !$omp end parallel + +end + +! Tau + +subroutine update_tau_space_chol(nO,nV,t1,t2,tau) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + ! out + double precision, intent(out) :: tau(nO,nO,nV,nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tau,t2,t1) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! R1 + +subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r1(nO,nV), max_r1 + + ! internal + integer :: u,i,j,beta,a,b + + !$omp parallel & + !$omp shared(nO,nV,r1,cc_space_f_ov) & + !$omp private(u,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + r1(u,beta) = cc_space_f_ov(u,beta) + enddo + enddo + !$omp end do + !$omp end parallel + + double precision, allocatable :: X_oo(:,:) + allocate(X_oo(nO,nO)) + call dgemm('N','N', nO, nO, nV, & + -2d0, t1 , size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, X_oo , size(X_oo,1)) + + call dgemm('T','N', nO, nV, nO, & + 1d0, X_oo, size(X_oo,2), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + deallocate(X_oo) + + call dgemm('N','N', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + H_vv, size(H_vv,1), & + 1d0, r1 , size(r1,1)) + + call dgemm('N','N', nO, nV, nO, & + -1d0, H_oo, size(H_oo,1), & + t1 , size(t1,1), & + 1d0, r1, size(r1,1)) + + double precision, allocatable :: X_voov(:,:,:,:) + allocate(X_voov(nV, nO, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_voov,t2,t1) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,u,beta) = 2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nV*nO, nO*nV, & + 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & + H_vo , 1, & + 1d0, r1 , 1) + + deallocate(X_voov) + + double precision, allocatable :: X_ovov(:,:,:,:) + allocate(X_ovov(nO, nV, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + do a = 1, nv + do i = 1, nO + X_ovov(i,a,u,beta) = 2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nO*nV, nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + t1 , 1, & + 1d0, r1 , 1) + + deallocate(X_ovov) + + double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) + allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & + !$omp private(b,beta,i,a) & + !$omp default(none) + !$omp do + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do + do u = 1, nO + do i = 1, nO + do b = 1, nV + do a = 1, nV + T_vvoo(a,b,i,u) = tau(i,u,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp end parallel + + call dgemm('T','N',nO,nV,nO*nV*nV, & + 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & + W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_vvov,T_vvoo) + + double precision, allocatable :: W_oovo(:,:,:,:) + allocate(W_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) & + !$omp private(u,a,i,j) & + !$omp default(none) + do u = 1, nO + !$omp do + do a = 1, nV + do j = 1, nO + do i = 1, nO + W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & + tau , size(tau,1) * size(tau,2) * size(tau,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_oovo) + + max_r1 = 0d0 + do a = 1, nV + do i = 1, nO + max_r1 = max(dabs(r1(i,a)), max_r1) + enddo + enddo + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r1) & + !$omp private(a,i) & + !$omp default(none) + !$omp do + do a = 1, nV + do i = 1, nO + r1(i,a) = -r1(i,a) + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! H_oo + +subroutine compute_H_oo_chol(nO,nV,t1,t2,tau,H_oo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_oo(nO, nO) + + integer :: a,tmp_a,k,b,l,c,d,tmp_c,tmp_d,i,j,u + + ! H_oo(u,i) = cc_space_f_oo(u,i) + !$omp parallel & + !$omp shared(nO,H_oo,cc_space_f_oo) & + !$omp private(i,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + H_oo(u,i) = cc_space_f_oo(u,i) + enddo + enddo + !$omp end do + !$omp end parallel + + ! H_oo(u,i) += cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) + ! H_oo(u,i) += tau(u,j,a,b) * cc_space_w_oovv(i,j,a,b) + call dgemm('N','T', nO, nO, nO*nV*nV, & + 1d0, tau , size(tau,1), & + cc_space_w_oovv, size(cc_space_w_oovv,1), & + 1d0, H_oo , size(H_oo,1)) + +end + +! H_vv + +subroutine compute_H_vv_chol(nO,nV,t1,t2,tau,H_vv) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_vv(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + double precision, allocatable :: tmp_tau(:,:,:,:) + + allocate(tmp_tau(nV,nO,nO,nV)) + + ! H_vv(a,beta) = cc_space_f_vv(a,beta) + !$omp parallel & + !$omp shared(nV,nO,H_vv,cc_space_f_vv,tmp_tau,tau) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + H_vv(a,beta) = cc_space_f_vv(a,beta) + enddo + enddo + !$omp end do nowait + + !$omp do + do beta = 1, nV + do j = 1, nO + do i = 1, nO + do b = 1, nV + tmp_tau(b,i,j,beta) = tau(i,j,beta,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nV,nV,nO*nO*nV, & + -1d0, cc_space_w_vvoo, size(cc_space_w_vvoo,1), & + tmp_tau , size(tmp_tau,1) * size(tmp_tau,2) * size(tmp_tau,3), & + 1d0, H_vv , size(H_vv,1)) + + deallocate(tmp_tau) + +end + +! H_vo + +subroutine compute_H_vo_chol(nO,nV,t1,t2,H_vo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: H_vo(nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + double precision, allocatable :: w(:,:,:,:) + + allocate(w(nV,nO,nO,nV)) + + !$omp parallel & + !$omp shared(nV,nO,H_vo,cc_space_f_vo,w,cc_space_w_vvoo,t1) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do i = 1, nO + do a = 1, nV + H_vo(a,i) = cc_space_f_vo(a,i) + enddo + enddo + !$omp end do nowait + + ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) + ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) + + !$omp do + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + w(a,i,j,b) = cc_space_w_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('N',nV*nO, nO*nV, & + 1d0, w , size(w,1) * size(w,2), & + t1 , 1, & + 1d0, H_vo, 1) + + deallocate(w) + +end + +! R2 + +subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2 + + ! internal + double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:) + double precision, allocatable :: A1(:,:,:,:) + integer :: u,v,i,j,beta,gam,a,b + + allocate(g_occ(nO,nO), g_vir(nV,nV)) + allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV)) + allocate(A1(nO,nO,nO,nO)) + + call compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) + call compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) + call compute_A1_chol(nO,nV,t1,t2,tau,A1) + call compute_J1_chol(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & + cc_space_v_vvvo,cc_space_v_vvoo,J1) + call compute_K1_chol(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & + cc_space_v_ovov,cc_space_v_vvov,K1) + + ! Residual + !r2 = 0d0 + + !$omp parallel & + !$omp shared(nO,nV,r2,cc_space_v_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = cc_space_v_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nO,nV*nV,nO*nO, & + 1d0, A1, size(A1,1) * size(A1,2), & + tau, size(tau,1) * size(tau,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + + double precision, dimension(:,:,:,:), allocatable :: r2_chem, tmp, tau_chem + double precision, dimension(:,:,:,:), allocatable :: B1 + + allocate(B1(nV,nV,nV,nV)) + call compute_B1_chol(nO,nV,t1,B1,cholesky_ao_num) + call dgemm('N','N',nO*nO,nV*nV,nV*nV, & + 1d0, tau, size(tau,1) * size(tau,2), & + B1 , size(B1 ,1) * size(B1 ,2), & + 1d0, r2, size(r2 ,1) * size(r2 ,2)) + + double precision, allocatable :: X_oovv(:,:,:,:),Y_oovv(:,:,:,:) + allocate(X_oovv(nO,nO,nV,nV),Y_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,t2,X_oovv) & + !$omp private(u,v,gam,a) & + !$omp default(none) + !$omp do + do a = 1, nV + do gam = 1, nV + do v = 1, nO + do u = 1, nO + X_oovv(u,v,gam,a) = t2(u,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nO*nV,nV,nV, & + 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + g_vir, size(g_vir,1), & + 0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(u,v,beta,gam) + Y_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nV,nO, & + 1d0, g_occ , size(g_occ,1), & + t2 , size(t2,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_oovv) + + double precision, allocatable :: X_vovv(:,:,:,:) + allocate(X_vovv(nV,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & + !$omp private(u,a,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do u = 1, nO + do a = 1, nV + X_vovv(a,u,beta,gam) = cc_space_v_ovvv(u,a,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1 , size(t1,1), & + X_vovv, size(X_vovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & + !$omp private(u,v,gam,i) & + !$omp default(none) + do i = 1, nO + !$omp do + do gam = 1, nV + do u = 1, nO + do a = 1, nV + X_vovo(a,u,gam,i) = cc_space_v_ovov(u,a,i,gam) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + call dgemm('N','N',nV*nO*nV,nV,nO, & + 1d0, X_vovo, size(X_vovo,1) * size(X_vovo,2) * size(X_vovo,3), & + t1 , size(t1,1), & + 0d0, Y_vovv, size(Y_vovv,1) * size(Y_vovv,2) * size(Y_vovv,3)) + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1, size(t1,1), & + Y_vovv, size(Y_vovv,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(v,u,gam,beta) - X_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_vovv) + + call dgemm('N','N',nO*nO*nV,nV,nO, & + 1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + double precision, allocatable :: Y_oovo(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & + !$omp private(a,v,gam,i) & + !$omp default(none) + do i = 1, nO + !$omp do + do gam = 1, nV + do v = 1, nO + do a = 1, nV + X_vovo(a,v,gam,i) = cc_space_v_ovvo(v,a,gam,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nO,nV, & + 1d0, t1, size(t1,1), & + X_vovo, size(X_vovo,1), & + 0d0, Y_oovo, size(Y_oovo,1)) + + call dgemm('N','N',nO*nO*nV, nV, nO, & + 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,gam,beta) - X_oovv(v,u,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_oovo) + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_voov(:,:,:,:), Z_ovov(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO), Y_voov(nV,nO,nO,nV),Z_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + do i = 1, nO + !$omp do + do a = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,a,i) = (J1(u,a,beta,i) - 0.5d0 * K1(u,a,i,beta)) + enddo + enddo + enddo + !$omp end do nowait + enddo + + !$omp do + do gam = 1, nV + do v = 1, nO + do i = 1, nO + do a = 1, nV + Y_voov(a,i,v,gam) = 2d0 * t2(i,v,a,gam) - t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_voov, size(Y_voov,1) * size(Y_voov,2), & + 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo,Y_voov) + + double precision, allocatable :: X_ovov(:,:,:,:),Y_ovov(:,:,:,:) + allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & + !$omp private(u,a,i,beta,gam) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + do a = 1, nV + do i = 1, nO + X_ovov(i,a,u,beta) = 0.5d0 * K1(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do + do gam = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Y_ovov(i,a,v,gam) = t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('T','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !$omp parallel & + !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do + do a = 1, nV + do i = 1, nO + do gam = 1, nV + do u = 1, nO + X_ovov(u,gam,i,a) = K1(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do + do beta = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Z_ovov(i,a,v,beta) = t2(i,v,beta,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovov,Y_ovov,Z_ovov) + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r2) & + !$omp private(i,j,a,b) & + !$omp default(none) + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = -r2(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + max_r2 = 0d0 + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + max_r2 = max(r2(i,j,a,b), max_r2) + enddo + enddo + enddo + enddo + + deallocate(g_occ,g_vir,J1,K1,A1) + +end + +! A1 + +subroutine compute_A1_chol(nO,nV,t1,t2,tau,A1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: A1(nO, nO, nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta + + double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:) + allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO)) + + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + !$omp parallel & + !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(2) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + enddo + enddo + enddo + enddo + !$omp end do nowait + + ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + + !$omp do collapse(2) + do j = 1, nO + do i = 1, nO + do u = 1, nO + do a = 1, nV + X_vooo(a,u,i,j) = cc_space_v_ovoo(u,a,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + X_vooo, size(X_vooo,1), & + 0d0, Y_oooo, size(Y_oooo,1)) + + !$omp parallel & + !$omp shared(nO,nV,A1,Y_oooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(2) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = A1(u,v,i,j) + Y_oooo(v,u,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vooo,Y_oooo) + + ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a) + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + cc_space_v_vooo, size(cc_space_v_vooo,1), & + 1d0, A1 , size(A1,1)) + + ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + call dgemm('N','N', nO*nO, nO*nO, nV*nV, & + 1d0, tau , size(tau,1) * size(tau,2), & + cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & + 1d0, A1 , size(A1,1) * size(A1,2)) + +end + +! B1 +subroutine compute_B1_chol(nO,nV,t1,B1,ldb) + + implicit none + + integer, intent(in) :: nO,nV,ldb + double precision, intent(in) :: t1(nO, nV) + double precision, intent(out) :: B1(nV, nV, nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + do gam = 1, nV + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + + do i = 1, nO + B1(a,b,beta,gam) = B1(a,b,beta,gam) & + - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + enddo + + enddo + enddo + enddo + enddo + +end + +! g_occ + +subroutine compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_oo(nO, nO) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_occ(nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + call dgemm('N','N',nO,nO,nV, & + 1d0, t1, size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, g_occ, size(g_occ,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_occ,H_oo, cc_space_v_ovoo,t1) & + !$omp private(i,j,a,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + H_oo(u,i) + enddo + enddo + !$omp end do + + !$omp do + do i = 1, nO + do j = 1, nO + do a = 1, nV + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! g_vir + +subroutine compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_vv(nV, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_vir(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + call dgemm('N','N',nV,nV,nO, & + -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & + t1 , size(t1,1), & + 0d0, g_vir, size(g_vir,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) & + !$omp private(i,b,a,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta) + enddo + enddo + !$omp end do + + !$omp do + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! J1 + +subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: J1(nO, nV, nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) + allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) + + !$omp parallel & + !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & + !$omp private(i,j,a,u,beta) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = v_ovvo(u,a,beta,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + + !$omp do collapse(2) + do j = 1, nO + do i = 1, nO + do a = 1, nV + do u = 1, nO + X_ovoo(u,a,i,j) = v_ovoo(u,a,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & + t1 , size(t1,1), & + 0d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2) * size(Y_ovov,3)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Y_ovov) & + !$omp private(i,beta,a,u) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Y_ovov(u,a,i,beta) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + deallocate(X_ovoo) + + ! v_vvvo(b,a,beta,i) * t1(u,b) + call dgemm('N','N',nO,nV*nV*nO,nV, & + 1d0, t1 , size(t1,1), & + v_vvvo, size(v_vvvo,1), & + 1d0, J1 , size(J1,1)) + + !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:) + allocate(X_voov(nV,nO,nO,nV), Z_ovvo(nO,nV,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & + !$omp private(i,beta,a,u,b,j) & + !$omp default(none) + !$omp do + do b = 1, nV + do j = 1, nO + do beta = 1, nV + do u = 1, nO + Y_ovov(u,beta,j,b) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,j,b) = v_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nO*nV, & + -1d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + X_voov, size(X_voov,1) * size(X_voov,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + deallocate(X_voov) + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_vovo(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO),Y_vovo(nV,nO,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + + !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + do j = 1, nO + !$omp do + do b = 1, nV + do i = 1, nO + do a = 1, nV + Y_vovo(a,i,b,j) = 0.5d0 * (2d0 * v_vvoo(a,b,i,j) - v_vvoo(b,a,i,j)) + enddo + enddo + enddo + !$omp end do nowait + enddo + + do j = 1, nO + !$omp do + do b = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,b,j) = t2(u,j,beta,b) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo) & + !$omp private(i,beta,a,u) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + deallocate(X_ovvo,Z_ovvo,Y_ovov) + +end + +! K1 + +subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV) + double precision, intent(in) :: v_vvov(nV,nV,nO,nV), v_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: K1(nO, nV, nO, nV) + + double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + do i = 1, nO + !$omp do + do a = 1, nV + do j = 1, nO + do b = 1, nV + X(b,j,a,i) = - v_vvoo(b,a,i,j) + enddo + enddo + enddo + !$omp end do nowait + enddo + + do j = 1, nO + !$omp do + do b = 1, nV + do beta = 1, nV + do u = 1, nO + Y(u,beta,b,j) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + !$omp end do + enddo + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, v_ovoo, size(v_ovoo,1) * size(v_ovoo,2) * size(v_ovoo,3), & + t1 , size(t1,1), & + 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) + + call dgemm('N','N',nO,nV*nO*nV,nV, & + 1d0, t1 , size(t1,1), & + v_vvov, size(v_vvov,1), & + 1d0, K1 , size(K1,1)) + + ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i) + call dgemm('N','N',nV*nO,nO*nV,nV*nO, & + 1d0, Y, size(Y,1) * size(Y,2), & + X, size(X,1) * size(X,2), & + 0d0, Z, size(Z,1) * size(Z,2)) + + !$omp parallel & + !$omp shared(nO,nV,K1,Z) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = K1(u,a,i,beta) + Z(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X,Y,Z) + +end diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index b15d9745..af40e571 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -166,11 +166,9 @@ subroutine four_idx_dgemm deallocate (a1) + call map_sort(mo_integrals_map) call map_unique(mo_integrals_map) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - end subroutine subroutine add_integrals_to_map(mask_ijkl) @@ -250,7 +248,7 @@ subroutine add_integrals_to_map(mask_ijkl) call wall_time(wall_1) - size_buffer = min(ao_num*ao_num*ao_num,8000000) + size_buffer = min(ao_num*ao_num,8000000) print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' @@ -443,11 +441,6 @@ subroutine add_integrals_to_map(mask_ijkl) !$OMP END PARALLEL call map_merge(mo_integrals_map) - call wall_time(wall_2) - call cpu_time(cpu_2) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - deallocate(list_ijkl) @@ -463,55 +456,55 @@ subroutine add_integrals_to_map_cholesky integer :: i,j,k,l,m integer :: size_buffer, n_integrals - size_buffer = min(mo_num*mo_num*mo_num,16000000) + size_buffer = min(mo_num*mo_num,16000000) double precision, allocatable :: Vtmp(:,:,:) integer(key_kind) , allocatable :: buffer_i(:) real(integral_kind), allocatable :: buffer_value(:) - if (.True.) then - ! In-memory transformation + call set_multiple_levels_omp(.False.) - call set_multiple_levels_omp(.False.) + !$OMP PARALLEL DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i, Vtmp) + allocate (buffer_i(size_buffer), buffer_value(size_buffer)) + allocate (Vtmp(mo_num,mo_num,mo_num)) + n_integrals = 0 - !$OMP PARALLEL PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i, Vtmp) - allocate (buffer_i(size_buffer), buffer_value(size_buffer)) - n_integrals = 0 + !$OMP DO SCHEDULE(dynamic) + do l=1,mo_num + call dgemm('T','N',mo_num*mo_num,mo_num,cholesky_ao_num,1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,l), cholesky_ao_num, 0.d0, & + Vtmp, mo_num*mo_num) - allocate (Vtmp(mo_num,mo_num,mo_num)) - - !$OMP DO - do l=1,mo_num - - call dgemm('T','N',mo_num*mo_num,mo_num,cholesky_ao_num,1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,l), cholesky_ao_num, 0.d0, & - Vtmp, mo_num*mo_num) - do k=1,l - do j=1,mo_num - do i=1,j - if (abs(Vtmp(i,j,k)) > mo_integrals_threshold) then - n_integrals += 1 - buffer_value(n_integrals) = Vtmp(i,j,k) - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) - n_integrals = 0 - endif + do k=1,l + do j=1,mo_num + do i=1,j + if (dabs(Vtmp(i,j,k)) > mo_integrals_threshold) then + n_integrals = n_integrals + 1 + buffer_value(n_integrals) = Vtmp(i,j,k) + !DIR$ FORCEINLINE + call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals)) + if (n_integrals == size_buffer) then + call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) + n_integrals = 0 endif - enddo + endif enddo enddo enddo - !$OMP END DO + enddo + !$OMP END DO NOWAIT + + if (n_integrals > 0) then call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) - deallocate(buffer_i, buffer_value, Vtmp) - !$OMP END PARALLEL - - call map_unique(mo_integrals_map) - endif + deallocate(buffer_i, buffer_value, Vtmp) + !$OMP BARRIER + !$OMP END PARALLEL + + call map_sort(mo_integrals_map) + call map_unique(mo_integrals_map) end diff --git a/src/tools/four_idx_transform.irp.f b/src/tools/four_idx_transform.irp.f index 92e87cad..f7520e71 100644 --- a/src/tools/four_idx_transform.irp.f +++ b/src/tools/four_idx_transform.irp.f @@ -14,6 +14,13 @@ program four_idx_transform io_mo_two_e_integrals = 'Write' SOFT_TOUCH io_mo_two_e_integrals + if (.true.) then + PROVIDE ao_two_e_integrals_in_map + endif + if (do_ao_cholesky) then + PROVIDE cholesky_mo_transp + FREE cholesky_ao + endif if (.true.) then PROVIDE mo_two_e_integrals_in_map endif diff --git a/src/utils/fortran_mmap.c b/src/utils/fortran_mmap.c index 52df2476..71426002 100644 --- a/src/utils/fortran_mmap.c +++ b/src/utils/fortran_mmap.c @@ -22,11 +22,7 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) perror("Error opening mmap file for reading"); exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ, MAP_SHARED | MAP_HUGETLB, fd, 0); - if (map == MAP_FAILED) { - /* try again without huge pages */ - map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0); - } + map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0); } else { @@ -53,16 +49,12 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_HUGETLB, fd, 0); - if (map == MAP_FAILED) { - /* try again without huge pages */ - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - } + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); } if (map == MAP_FAILED) { close(fd); - printf("%s:\n", filename); + printf("%s: %lu\n", filename, bytes); perror("Error mmapping the file"); exit(EXIT_FAILURE); } diff --git a/src/utils/mmap.f90 b/src/utils/mmap.f90 index 49147283..caabc6f1 100644 --- a/src/utils/mmap.f90 +++ b/src/utils/mmap.f90 @@ -46,7 +46,14 @@ module mmap_module integer(c_size_t) :: length integer(c_int) :: fd_ - length = PRODUCT( shape(:) ) * bytes + integer :: i + + length = int(bytes,8) + do i=1,size(shape) + length = length * shape(i) + enddo +print *, 'map_length: ', length + if (read_only) then map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 1) else @@ -66,7 +73,13 @@ module mmap_module integer(c_size_t) :: length integer(c_int) :: fd_ - length = PRODUCT( shape(:) ) * bytes + integer :: i + + length = int(bytes,8) + do i=1,size(shape) + length = length * shape(i) + enddo +print *, 'map_length: ', length fd_ = fd call c_munmap_fortran( length, fd_, map) end subroutine @@ -82,7 +95,13 @@ module mmap_module integer(c_size_t) :: length integer(c_int) :: fd_ - length = PRODUCT( shape(:) ) * bytes + integer :: i + + length = int(bytes,8) + do i=1,size(shape) + length = length * shape(i) + enddo +print *, 'map_length: ', length fd_ = fd call c_msync_fortran( length, fd_, map) end subroutine diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 2db614b4..62237229 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -53,33 +53,8 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) allocate(v1(cholesky_ao_num,n1,n3), v2(cholesky_ao_num,n2,n4)) allocate(buffer(n1,n3,n2,n4)) - !$OMP PARALLEL PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k) - !$OMP DO - do i3=1,n3 - idx3 = list3(i3) - do i1=1,n1 - idx1 = list1(i1) - do k=1,cholesky_ao_num - v1(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP DO - do i4=1,n4 - idx4 = list4(i4) - do i2=1,n2 - idx2 = list2(i2) - do k=1,cholesky_ao_num - v2(k,i2,i4) = cholesky_mo_transp(k,idx2,idx4) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP BARRIER - !$OMP END PARALLEL + call gen_v_space_chol(n1,n3,list1,list3,v1,cholesky_ao_num) + call gen_v_space_chol(n2,n4,list2,list4,v2,cholesky_ao_num) call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & v1, cholesky_ao_num, & @@ -129,6 +104,30 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) end +subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) + + implicit none + + integer, intent(in) :: n1,n3,ldv + integer, intent(in) :: list1(n1),list3(n3) + double precision, intent(out) :: v(ldv,n1,n3) + + integer :: i1,i3,idx1,idx3,k + + !$OMP PARALLEL DO PRIVATE(i1,i3,idx1,idx3,k) + do i3=1,n3 + idx3 = list3(i3) + do i1=1,n1 + idx1 = list1(i1) + do k=1,cholesky_ao_num + v(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3) + enddo + enddo + enddo + !$OMP END PARALLEL DO + +end + ! full BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] @@ -345,6 +344,38 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvvv, (cc_nVa, cc_nVa, cc_nVa, cc_n END_PROVIDER +BEGIN_PROVIDER [double precision, cc_space_v_vv_chol, (cholesky_ao_num, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space_chol(cc_nVa, cc_nVa, cc_list_vir, cc_list_vir, cc_space_v_vv_chol, cholesky_ao_num) + +END_PROVIDER + +BEGIN_PROVIDER [double precision, cc_space_v_vo_chol, (cholesky_ao_num, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space_chol(cc_nVa, cc_nOa, cc_list_vir, cc_list_occ, cc_space_v_vo_chol, cholesky_ao_num) + +END_PROVIDER + +BEGIN_PROVIDER [double precision, cc_space_v_ov_chol, (cholesky_ao_num, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space_chol(cc_nOa, cc_nVa, cc_list_occ, cc_list_vir, cc_space_v_ov_chol, cholesky_ao_num) + +END_PROVIDER + +BEGIN_PROVIDER [double precision, cc_space_v_oo_chol, (cholesky_ao_num, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space_chol(cc_nOa, cc_nOa, cc_list_occ, cc_list_occ, cc_space_v_oo_chol, cholesky_ao_num) + +END_PROVIDER + ! ppqq BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)] From a2c4a74d926e0017b701d1f6f510b2bf9a751f74 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Jul 2023 16:21:37 +0200 Subject: [PATCH 23/74] Fixed writing MOs for large sizes' --- src/ao_two_e_ints/cholesky.irp.f | 12 ++++++++++-- src/mo_two_e_ints/cholesky.irp.f | 6 +++++- src/mo_two_e_ints/mo_bi_integrals.irp.f | 13 ++++++++++++- src/utils/map_functions.irp.f | 4 ++++ src/utils/mmap.f90 | 3 --- 5 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index f4746144..ce05de5b 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -339,8 +339,16 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] call omp_destroy_lock(lock(k)) enddo - allocate(cholesky_ao(ao_num,ao_num,rank)) - call dcopy(ndim*rank, L, 1, cholesky_ao, 1) + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': Allocation failed' + stop -1 + endif + !$OMP PARALLEL DO PRIVATE(k) + do k=1,rank + call dcopy(ndim, L(1,k), 1, cholesky_ao(1,1,k), 1) + enddo + !$OMP END PARALLEL DO deallocate(L) cholesky_ao_num = rank diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 7cfbaa58..3a868cbe 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -26,9 +26,13 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, END_DOC double precision, allocatable :: X(:,:,:) + integer :: ierr print *, 'AO->MO Transformation of Cholesky vectors' - allocate(X(mo_num,cholesky_ao_num,ao_num)) + allocate(X(mo_num,cholesky_ao_num,ao_num), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': Allocation failed' + endif call dgemm('T','N', ao_num*cholesky_ao_num, mo_num, ao_num, 1.d0, & cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_ao_num) call dgemm('T','N', cholesky_ao_num*mo_num, mo_num, ao_num, 1.d0, & diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index af40e571..0ed6f373 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -90,6 +90,10 @@ subroutine four_idx_dgemm double precision, allocatable :: a1(:,:,:,:) double precision, allocatable :: a2(:,:,:,:) + if (ao_num > 1289) then + print *, irp_here, ': Integer overflow in ao_num**3' + endif + allocate (a1(ao_num,ao_num,ao_num,ao_num)) print *, 'Getting AOs' @@ -103,6 +107,7 @@ subroutine four_idx_dgemm enddo !$OMP END PARALLEL DO + print *, '1st transformation' ! 1st transformation allocate (a2(ao_num,ao_num,ao_num,mo_num)) @@ -456,7 +461,7 @@ subroutine add_integrals_to_map_cholesky integer :: i,j,k,l,m integer :: size_buffer, n_integrals - size_buffer = min(mo_num*mo_num,16000000) + size_buffer = min(mo_num*mo_num*mo_num,16000000) double precision, allocatable :: Vtmp(:,:,:) integer(key_kind) , allocatable :: buffer_i(:) @@ -575,6 +580,9 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) return endif + if (ao_num > 1289) then + print *, irp_here, ': Integer overflow in ao_num**3' + endif size_buffer = min(ao_num*ao_num*ao_num,16000000) print*, 'Providing the molecular integrals ' print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& @@ -850,6 +858,9 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl) call bitstring_to_list( mask_ijkl(1,3), list_ijkl(1,3), n_k, N_int ) call bitstring_to_list( mask_ijkl(1,4), list_ijkl(1,4), n_l, N_int ) + if (ao_num > 1289) then + print *, irp_here, ': Integer overflow in ao_num**3' + endif size_buffer = min(ao_num*ao_num*ao_num,16000000) print*, 'Providing the molecular integrals ' print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& diff --git a/src/utils/map_functions.irp.f b/src/utils/map_functions.irp.f index cd3b28a8..97d0e8bf 100644 --- a/src/utils/map_functions.irp.f +++ b/src/utils/map_functions.irp.f @@ -11,6 +11,10 @@ subroutine map_save_to_disk(filename,map) integer*8 :: n_elements n_elements = int(map % n_elements,8) + if (n_elements <= 0) then + print *, 'Unable to write map to disk: n_elements = ', n_elements + stop -1 + endif if (map % consolidated) then diff --git a/src/utils/mmap.f90 b/src/utils/mmap.f90 index caabc6f1..41e60224 100644 --- a/src/utils/mmap.f90 +++ b/src/utils/mmap.f90 @@ -52,7 +52,6 @@ module mmap_module do i=1,size(shape) length = length * shape(i) enddo -print *, 'map_length: ', length if (read_only) then map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 1) @@ -79,7 +78,6 @@ print *, 'map_length: ', length do i=1,size(shape) length = length * shape(i) enddo -print *, 'map_length: ', length fd_ = fd call c_munmap_fortran( length, fd_, map) end subroutine @@ -101,7 +99,6 @@ print *, 'map_length: ', length do i=1,size(shape) length = length * shape(i) enddo -print *, 'map_length: ', length fd_ = fd call c_msync_fortran( length, fd_, map) end subroutine From e35f847341cf6094886cd675d5bf37b8e752c652 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Jul 2023 17:51:59 +0200 Subject: [PATCH 24/74] Enabled direct integrals in Cholesky --- src/ao_two_e_ints/cholesky.irp.f | 43 +++++++++++++++++++------ src/ao_two_e_ints/two_e_integrals.irp.f | 3 +- src/mo_two_e_ints/mo_bi_integrals.irp.f | 4 ++- src/tools/four_idx_transform.irp.f | 7 ---- 4 files changed, 38 insertions(+), 19 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index ce05de5b..f7eae638 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -43,11 +43,15 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] double precision, external :: get_ao_two_e_integral logical, external :: ao_two_e_integral_zero + double precision, external :: ao_two_e_integral integer :: block_size, iblock, ierr integer(omp_lock_kind), allocatable :: lock(:) + PROVIDE nucl_coord - PROVIDE ao_two_e_integrals_in_map + if (.not.do_direct_integrals) then + PROVIDE ao_two_e_integrals_in_map + endif deallocate(cholesky_ao) ndim = ao_num*ao_num @@ -85,13 +89,22 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] enddo enddo - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) - do i=1,ndim - D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & - addr(2,i), addr(2,i), & - ao_integrals_map) - enddo - !$OMP END PARALLEL DO + if (do_direct_integrals) then + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + do i=1,ndim + D(i) = ao_two_e_integral(addr(1,i), addr(2,i), & + addr(1,i), addr(2,i)) + enddo + !$OMP END PARALLEL DO + else + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + do i=1,ndim + D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & + addr(2,i), addr(2,i), & + ao_integrals_map) + enddo + !$OMP END PARALLEL DO + endif Dmax = maxval(D) @@ -196,8 +209,13 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] if ((0 < q).and.(q < k)) cycle if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)), & addr(2,Lset(k)), addr(2,Dset(m)) ) ) then - Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), & + if (do_direct_integrals) then + Delta(k,m) = ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)), & + addr(1,Dset(m)), addr(2,Dset(m))) + else + Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), & addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + endif if (q /= 0) Delta(q,m) = Delta(k,m) endif enddo @@ -218,8 +236,13 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] if ((0 < q).and.(q < p)) cycle if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), & addr(2,Dset(k)), addr(2,Dset(m)) ) ) then - Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), & + if (do_direct_integrals) then + Delta(p,m) = ao_two_e_integral(addr(1,Dset(k)), addr(2,Dset(k)), & + addr(1,Dset(m)), addr(2,Dset(m))) + else + Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), & addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) + endif if (q /= 0) Delta(q,m) = Delta(p,m) if (j /= 0) Delta(p,j) = Delta(p,m) if (q*j /= 0) Delta(q,j) = Delta(p,m) diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 85ff5bcf..0c70aae5 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -1232,7 +1232,8 @@ subroutine compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value) logical, external :: ao_two_e_integral_zero integer :: i,k - double precision :: ao_two_e_integral,cpu_1,cpu_2, wall_1, wall_2 + double precision, external :: ao_two_e_integral + double precision :: cpu_1,cpu_2, wall_1, wall_2 double precision :: integral, wall_0 double precision :: thr integer :: kk, m, j1, i1 diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index 0ed6f373..0d3fe176 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -37,7 +37,9 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] call map_load_from_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) print*, 'MO integrals provided' return - else + endif + + if (.not. do_direct_integrals) then PROVIDE ao_two_e_integrals_in_map endif diff --git a/src/tools/four_idx_transform.irp.f b/src/tools/four_idx_transform.irp.f index f7520e71..92e87cad 100644 --- a/src/tools/four_idx_transform.irp.f +++ b/src/tools/four_idx_transform.irp.f @@ -14,13 +14,6 @@ program four_idx_transform io_mo_two_e_integrals = 'Write' SOFT_TOUCH io_mo_two_e_integrals - if (.true.) then - PROVIDE ao_two_e_integrals_in_map - endif - if (do_ao_cholesky) then - PROVIDE cholesky_mo_transp - FREE cholesky_ao - endif if (.true.) then PROVIDE mo_two_e_integrals_in_map endif From 905d88529f1b3a2711951cd59585dbea2b398fea Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jul 2023 17:42:20 +0200 Subject: [PATCH 25/74] Reduced memory in cholesky SCF --- src/ao_two_e_ints/cholesky.irp.f | 68 ++++++++++++++----- src/ao_two_e_ints/two_e_integrals.irp.f | 90 +++++++++++++++++++++---- src/ccsd/ccsd_t_space_orb_abc.irp.f | 2 +- src/determinants/density_matrix.irp.f | 4 +- src/determinants/h_apply.irp.f | 2 +- src/determinants/s2.irp.f | 2 +- src/hartree_fock/fock_matrix_hf.irp.f | 90 ++++++++++++++++--------- src/utils/memory.irp.f | 2 +- 8 files changed, 193 insertions(+), 67 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index f7eae638..d0fa735d 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -29,7 +29,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] double precision, pointer :: L(:,:), L_old(:,:) - double precision, parameter :: s = 1.d-1 + double precision :: s double precision, parameter :: dscale = 1.d0 double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) @@ -47,6 +47,11 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] integer :: block_size, iblock, ierr integer(omp_lock_kind), allocatable :: lock(:) + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + + PROVIDE nucl_coord if (.not.do_direct_integrals) then @@ -57,6 +62,9 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] ndim = ao_num*ao_num tau = ao_cholesky_threshold + rss = 6.d0 * memory_of_double(ndim) + & + 6.d0 * memory_of_int(ndim) + call check_mem(rss, irp_here) allocate(L(ndim,1)) @@ -97,7 +105,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] enddo !$OMP END PARALLEL DO else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) do i=1,ndim D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & addr(2,i), addr(2,i), & @@ -130,21 +138,49 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] ! a. i = i+1 - ! b. - Dmin = max(s*Dmax,tau) + logical :: memory_ok + memory_ok = .False. - ! c. - nq=0 - LDmap = 0 - DLmap = 0 - do p=1,np - if ( D(Lset(p)) > Dmin ) then - nq = nq+1 - Dset(nq) = Lset(p) - Dset_rev(Dset(nq)) = nq - LDmap(p) = nq - DLmap(nq) = p + s = 1.d-2 + + ! Inrease s until the arrays fit in memory + do + + ! b. + Dmin = max(s*Dmax,tau) + + ! c. + nq=0 + LDmap = 0 + DLmap = 0 + do p=1,np + if ( D(Lset(p)) > Dmin ) then + nq = nq+1 + Dset(nq) = Lset(p) + Dset_rev(Dset(nq)) = nq + LDmap(p) = nq + DLmap(nq) = p + endif + enddo + + call resident_memory(rss) + rss = rss & + + np*memory_of_double(nq) & ! Delta(np,nq) + + (rank+nq)* memory_of_double(ndim) & ! L(ndim,rank+nq) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + ! Ltmp_q(nq,block_size) + + if (rss > qp_max_mem) then + s = s*2.d0 + else + exit endif + + if (nq == 0) then + print *, 'Not enough memory. Reduce cholesky threshold' + stop -1 + endif + enddo ! d., e. @@ -198,7 +234,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] enddo !$OMP END DO - !$OMP DO SCHEDULE(dynamic,8) + !$OMP DO SCHEDULE(guided) do m=1,nq call omp_set_lock(lock(m)) diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 0c70aae5..f86fb269 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -460,7 +460,7 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz, (ao_num, ao_num) !$OMP PARALLEL DO PRIVATE(i,k) & !$OMP DEFAULT(NONE) & !$OMP SHARED (ao_num,ao_two_e_integral_schwartz) & - !$OMP SCHEDULE(dynamic) + !$OMP SCHEDULE(guided) do i=1,ao_num do k=1,i ao_two_e_integral_schwartz(i,k) = dsqrt(ao_two_e_integral(i,i,k,k)) @@ -975,7 +975,8 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_10,2,d,nd) - call multiply_poly_c2(X,nx,B_10,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) nx = nd !DIR$ LOOP COUNT(8) @@ -998,7 +999,8 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt endif ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_00,2,d,nd) - call multiply_poly_c2(X,nx,B_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) endif ny=0 @@ -1017,7 +1019,8 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,C_00,2,d,nd) - call multiply_poly_c2(Y,ny,C_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) end recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) @@ -1057,7 +1060,8 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_00,2,d,nd) - call multiply_poly_c2(X,nx,B_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) ny=0 @@ -1069,7 +1073,8 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,C_00,2,d,nd) - call multiply_poly_c2(Y,ny,C_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) end @@ -1098,7 +1103,8 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_10,2,d,nd) - call multiply_poly_c2(X,nx,B_10,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) nx = nd !DIR$ LOOP COUNT(8) @@ -1118,7 +1124,8 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_00,2,d,nd) - call multiply_poly_c2(X,nx,B_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) ny=0 !DIR$ LOOP COUNT(8) @@ -1130,7 +1137,8 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,C_00,2,d,nd) - call multiply_poly_c2(Y,ny,C_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) end recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) @@ -1177,9 +1185,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) Y(1) = D_00(1) Y(2) = D_00(2) -! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,D_00,2,d,nd) - call multiply_poly_c2(Y,ny,D_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) return @@ -1199,7 +1207,8 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_01,2,d,nd) - call multiply_poly_c2(X,nx,B_01,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(X,nx,B_01,d,nd) ny = 0 !DIR$ LOOP COUNT(6) @@ -1208,9 +1217,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) enddo call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim) -! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,D_00,2,d,nd) - call multiply_poly_c2(Y,ny,D_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) end select end @@ -1300,3 +1309,56 @@ subroutine multiply_poly_local(b,nb,c,nc,d,nd) end +!DIR$ FORCEINLINE +subroutine multiply_poly_c2_inline_2e(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:2) + double precision, intent(inout) :: d(0:nb+2) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + select case (nb) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(1) * b(0) + d(2) = d(2) + c(2) * b(0) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(2) * b(1) + + case (2) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(1) * b(2) + c(2) * b(1) + d(4) = d(4) + c(2) * b(2) + + case default + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + do ib=2,nb + d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + c(2) * b(ib-2) + enddo + d(nb+1) = d(nb+1) + c(1) * b(nb) + c(2) * b(nb-1) + d(nb+2) = d(nb+2) + c(2) * b(nb) + + end select + + do nd = nb+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 1aab6bd7..12a71045 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -101,7 +101,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED) e = 0d0 - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(guided) do a = 1, nV do b = a+1, nV do c = b+1, nV diff --git a/src/determinants/density_matrix.irp.f b/src/determinants/density_matrix.irp.f index 1a1d92b5..ce4d96c2 100644 --- a/src/determinants/density_matrix.irp.f +++ b/src/determinants/density_matrix.irp.f @@ -117,7 +117,7 @@ END_PROVIDER !$OMP N_det_alpha_unique,N_det_beta_unique,irp_here) allocate(tmp_a(mo_num,mo_num,N_states), tmp_b(mo_num,mo_num,N_states) ) tmp_a = 0.d0 - !$OMP DO SCHEDULE(dynamic,64) + !$OMP DO SCHEDULE(guided) do k_a=1,N_det krow = psi_bilinear_matrix_rows(k_a) ASSERT (krow <= N_det_alpha_unique) @@ -173,7 +173,7 @@ END_PROVIDER deallocate(tmp_a) tmp_b = 0.d0 - !$OMP DO SCHEDULE(dynamic,64) + !$OMP DO SCHEDULE(guided) do k_b=1,N_det krow = psi_bilinear_matrix_transp_rows(k_b) ASSERT (krow <= N_det_alpha_unique) diff --git a/src/determinants/h_apply.irp.f b/src/determinants/h_apply.irp.f index 078c2104..65f1a832 100644 --- a/src/determinants/h_apply.irp.f +++ b/src/determinants/h_apply.irp.f @@ -250,7 +250,7 @@ subroutine remove_duplicates_in_psi_det(found_duplicates) enddo !$OMP END DO - !$OMP DO schedule(dynamic,1024) + !$OMP DO schedule(guided,64) do i=1,N_det-1 if (duplicate(i)) then cycle diff --git a/src/determinants/s2.irp.f b/src/determinants/s2.irp.f index 2c1a8757..6dc49526 100644 --- a/src/determinants/s2.irp.f +++ b/src/determinants/s2.irp.f @@ -317,7 +317,7 @@ subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nst !$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates)& !$OMP REDUCTION(+:accu) allocate(idx(0:n)) - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(guided) do i = n,1,-1 ! Better OMP scheduling call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),N_int,s2_tmp) accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj) diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f index 8c6658c5..a5ab6a60 100644 --- a/src/hartree_fock/fock_matrix_hf.irp.f +++ b/src/hartree_fock/fock_matrix_hf.irp.f @@ -190,47 +190,75 @@ END_PROVIDER deallocate(X) - ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol + if (elec_alpha_num > elec_beta_num) then + ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol + endif - allocate(X2(ao_num,ao_num,cholesky_ao_num,2)) + double precision :: rss + double precision :: memory_of_double + integer :: iblock + integer, parameter :: block_size = 32 + + rss = memory_of_double(ao_num*ao_num) + call check_mem(2.d0*block_size*rss, irp_here) + allocate(X2(ao_num,ao_num,block_size,2)) + allocate(X3(ao_num,block_size,ao_num,2)) + ! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j) - call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, & - SCF_density_matrix_ao_alpha, ao_num, & - cholesky_ao, ao_num, 0.d0, & - X2(1,1,1,1), ao_num) + do iblock=1,cholesky_ao_num,block_size - call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, & - SCF_density_matrix_ao_beta, ao_num, & - cholesky_ao, ao_num, 0.d0, & - X2(1,1,1,2), ao_num) + call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, & + SCF_density_matrix_ao_alpha, ao_num, & + cholesky_ao(1,1,iblock), ao_num, 0.d0, & + X2(1,1,1,1), ao_num) - allocate(X3(ao_num,cholesky_ao_num,ao_num,2)) + if (elec_alpha_num > elec_beta_num) then + call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, & + SCF_density_matrix_ao_beta, ao_num, & + cholesky_ao(1,1,iblock), ao_num, 0.d0, & + X2(1,1,1,2), ao_num) + + do s=1,ao_num + do j=1,min(cholesky_ao_num-iblock+1,block_size) + do m=1,ao_num + X3(m,j,s,1) = X2(m,s,j,1) + X3(m,j,s,2) = X2(m,s,j,2) + enddo + enddo + enddo + + else + + do s=1,ao_num + do j=1,min(cholesky_ao_num-iblock+1,block_size) + do m=1,ao_num + X3(m,j,s,1) = X2(m,s,j,1) + enddo + enddo + enddo + endif + + call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, & + cholesky_ao(1,1,iblock), ao_num, & + X3(1,1,1,1), ao_num*block_size, 1.d0, & + ao_two_e_integral_alpha_chol, ao_num) + + if (elec_alpha_num > elec_beta_num) then + call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, & + cholesky_ao(1,1,iblock), ao_num, & + X3(1,1,1,2), ao_num*block_size, 1.d0, & + ao_two_e_integral_beta_chol, ao_num) + endif - do s=1,ao_num - do j=1,cholesky_ao_num - do m=1,ao_num - X3(m,j,s,1) = X2(m,s,j,1) - X3(m,j,s,2) = X2(m,s,j,2) - enddo - enddo enddo - deallocate(X2) - - call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, & - cholesky_ao, ao_num, & - X3(1,1,1,1), ao_num*cholesky_ao_num, 1.d0, & - ao_two_e_integral_alpha_chol, ao_num) - - call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, & - cholesky_ao, ao_num, & - X3(1,1,1,2), ao_num*cholesky_ao_num, 1.d0, & - ao_two_e_integral_beta_chol, ao_num) - - deallocate(X3) + if (elec_alpha_num == elec_beta_num) then + ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol + endif + deallocate(X2,X3) END_PROVIDER diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index 115b2cbe..0cd2133e 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -5,7 +5,7 @@ BEGIN_PROVIDER [ integer, qp_max_mem ] END_DOC character*(128) :: env - qp_max_mem = 2000 + qp_max_mem = 500 call getenv('QP_MAXMEM',env) if (trim(env) /= '') then call lock_io() From 4237fa888f0f537f113825557a6f0c38c2efeaff Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jul 2023 19:05:46 +0200 Subject: [PATCH 26/74] Get total memory --- src/ao_two_e_ints/cholesky.irp.f | 15 +++++++++---- src/utils/memory.irp.f | 36 +++++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index d0fa735d..4702c850 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -141,7 +141,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] logical :: memory_ok memory_ok = .False. - s = 1.d-2 + s = 0.1d0 ! Inrease s until the arrays fit in memory do @@ -176,7 +176,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] exit endif - if (nq == 0) then + if ((s > 1.d0).or.(nq == 0)) then print *, 'Not enough memory. Reduce cholesky threshold' stop -1 endif @@ -219,10 +219,15 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] stop -1 endif - Delta(:,:) = 0.d0 !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) + !$OMP DO + do q=1,nq + Delta(:,q) = 0.d0 + enddo + !$OMP ENDDO NOWAIT + !$OMP DO do k=1,N do p=1,np @@ -232,7 +237,9 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] Ltmp_q(q,k) = L(Dset(q),k) enddo enddo - !$OMP END DO + !$OMP END DO NOWAIT + + !$OMP BARRIER !$OMP DO SCHEDULE(guided) do m=1,nq diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index 0cd2133e..7da283ec 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -4,8 +4,10 @@ BEGIN_PROVIDER [ integer, qp_max_mem ] ! Maximum memory in Gb END_DOC character*(128) :: env + integer, external :: get_total_available_memory - qp_max_mem = 500 + qp_max_mem = get_total_available_memory() + call write_int(6,qp_max_mem,'Total available memory (GB)') call getenv('QP_MAXMEM',env) if (trim(env) /= '') then call lock_io() @@ -122,3 +124,35 @@ subroutine print_memory_usage() '.. >>>>> [ RES MEM : ', rss , & ' GB ] [ VIRT MEM : ', mem, ' GB ] <<<<< ..' end + +integer function get_total_available_memory() result(res) + implicit none + BEGIN_DOC +! Returns the total available memory on the current machine + END_DOC + + character(len=128) :: line + integer :: status + integer :: iunit + integer*8, parameter :: KB = 1024 + integer*8, parameter :: GiB = 1024**3 + integer, external :: getUnitAndOpen + + iunit = getUnitAndOpen('/proc/meminfo','r') + + res = 512 + do + read(iunit, '(A)', END=10) line + if (line(1:10) == "MemTotal: ") then + read(line(11:), *, ERR=20) res + res = int((res*KB) / GiB,4) + exit + 20 continue + end if + end do + 10 continue + close(iunit) + +end function get_total_available_memory + + From 073aef70b8891d1027f14c8a3ca21d9261a81abe Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jul 2023 21:54:06 +0200 Subject: [PATCH 27/74] Inlined function in integrals --- src/ao_two_e_ints/two_e_integrals.irp.f | 466 ++++++++++++++++++++++-- 1 file changed, 429 insertions(+), 37 deletions(-) diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index f86fb269..148ebb62 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -951,7 +951,7 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt double precision :: X(0:max_dim) double precision :: Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y - integer :: nx, ix,iy,ny + integer :: nx, ix,iy,ny,ib ASSERT (a>2) !DIR$ LOOP COUNT(8) @@ -974,9 +974,43 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt enddo ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_10,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) + if (nx >= 0) then + select case (nx) + case (0) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(2) * X(0) + + case (1) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(2) * X(1) + + case (2) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(0) * X(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(1) * X(2) + B_10(2) * X(1) + d(4) = d(4) + B_10(2) * X(2) + + case default + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_10(0) * X(ib) + B_10(1) * X(ib-1) + B_10(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_10(1) * X(nx) + B_10(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_10(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif nx = nd !DIR$ LOOP COUNT(8) @@ -997,10 +1031,47 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt X(ix) *= c enddo endif + ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(2) * X(0) + + case (1) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(2) * X(1) + + case (2) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(0) * X(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(1) * X(2) + B_00(2) * X(1) + d(4) = d(4) + B_00(2) * X(2) + + case default + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_00(0) * X(ib) + B_00(1) * X(ib-1) + B_00(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_00(1) * X(nx) + B_00(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_00(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + endif ny=0 @@ -1018,9 +1089,45 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt endif ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,C_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(2) * Y(0) + + case (1) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(2) * Y(1) + + case (2) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(0) * Y(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(1) * Y(2) + C_00(2) * Y(1) + d(4) = d(4) + C_00(2) * Y(2) + + case default + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + C_00(0) * Y(ib) + C_00(1) * Y(ib-1) + C_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + C_00(1) * Y(ny) + C_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + C_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + end recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) @@ -1037,7 +1144,7 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) double precision :: X(0:max_dim) double precision :: Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y - integer :: nx, ix,iy,ny + integer :: nx, ix,iy,ny,ib if( (c<0).or.(nd<0) )then nd = -1 @@ -1059,9 +1166,44 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) endif ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(2) * X(0) + + case (1) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(2) * X(1) + + case (2) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(0) * X(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(1) * X(2) + B_00(2) * X(1) + d(4) = d(4) + B_00(2) * X(2) + + case default + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_00(0) * X(ib) + B_00(1) * X(ib-1) + B_00(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_00(1) * X(nx) + B_00(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_00(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif ny=0 @@ -1072,9 +1214,44 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,C_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(2) * Y(0) + + case (1) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(2) * Y(1) + + case (2) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(0) * Y(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(1) * Y(2) + C_00(2) * Y(1) + d(4) = d(4) + C_00(2) * Y(2) + + case default + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + C_00(0) * Y(ib) + C_00(1) * Y(ib-1) + C_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + C_00(1) * Y(ny) + C_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + C_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif end @@ -1092,7 +1269,7 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) double precision :: X(0:max_dim) double precision :: Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y - integer :: nx, ix,iy,ny + integer :: nx, ix,iy,ny,ib !DIR$ LOOP COUNT(8) do ix=0,n_pt_in @@ -1102,9 +1279,44 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in) ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_10,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(2) * X(0) + + case (1) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(2) * X(1) + + case (2) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(0) * X(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(1) * X(2) + B_10(2) * X(1) + d(4) = d(4) + B_10(2) * X(2) + + case default + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_10(0) * X(ib) + B_10(1) * X(ib-1) + B_10(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_10(1) * X(nx) + B_10(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_10(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif nx = nd !DIR$ LOOP COUNT(8) @@ -1123,9 +1335,44 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) endif ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(2) * X(0) + + case (1) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(2) * X(1) + + case (2) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(0) * X(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(1) * X(2) + B_00(2) * X(1) + d(4) = d(4) + B_00(2) * X(2) + + case default + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_00(0) * X(ib) + B_00(1) * X(ib-1) + B_00(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_00(1) * X(nx) + B_00(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_00(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif ny=0 !DIR$ LOOP COUNT(8) @@ -1136,9 +1383,45 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) call I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,C_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(2) * Y(0) + + case (1) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(2) * Y(1) + + case (2) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(0) * Y(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(1) * Y(2) + C_00(2) * Y(1) + d(4) = d(4) + C_00(2) * Y(2) + + case default + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + C_00(0) * Y(ib) + C_00(1) * Y(ib-1) + C_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + C_00(1) * Y(ny) + C_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + C_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + end recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) @@ -1155,7 +1438,7 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) integer :: nx, ix,ny double precision :: X(0:max_dim),Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y - integer :: i + integer :: i, ib select case (c) case (0) @@ -1185,9 +1468,46 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) Y(1) = D_00(1) Y(2) = D_00(2) -! call multiply_poly(Y,ny,D_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(2) * Y(0) + + case (1) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(2) * Y(1) + + case (2) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(0) * Y(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(1) * Y(2) + D_00(2) * Y(1) + d(4) = d(4) + D_00(2) * Y(2) + + case default + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + D_00(0) * Y(ib) + D_00(1) * Y(ib-1) + D_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + D_00(1) * Y(ny) + D_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + D_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + return @@ -1206,9 +1526,44 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) enddo ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_01,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(X,nx,B_01,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_01,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(1) * X(0) + d(2) = d(2) + B_01(2) * X(0) + + case (1) + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(0) * X(1) + B_01(1) * X(0) + d(2) = d(2) + B_01(1) * X(1) + B_01(2) * X(0) + d(3) = d(3) + B_01(2) * X(1) + + case (2) + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(0) * X(1) + B_01(1) * X(0) + d(2) = d(2) + B_01(0) * X(2) + B_01(1) * X(1) + B_01(2) * X(0) + d(3) = d(3) + B_01(1) * X(2) + B_01(2) * X(1) + d(4) = d(4) + B_01(2) * X(2) + + case default + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(0) * X(1) + B_01(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_01(0) * X(ib) + B_01(1) * X(ib-1) + B_01(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_01(1) * X(nx) + B_01(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_01(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif ny = 0 !DIR$ LOOP COUNT(6) @@ -1217,9 +1572,46 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) enddo call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim) -! call multiply_poly(Y,ny,D_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) + + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(2) * Y(0) + + case (1) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(2) * Y(1) + + case (2) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(0) * Y(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(1) * Y(2) + D_00(2) * Y(1) + d(4) = d(4) + D_00(2) * Y(2) + + case default + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + D_00(0) * Y(ib) + D_00(1) * Y(ib-1) + D_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + D_00(1) * Y(ny) + D_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + D_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif end select end From 8c4a7226cdb7b594528db269cdb39c1d556b8bc6 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Jul 2023 11:32:06 +0200 Subject: [PATCH 28/74] minor changes --- src/casscf_cipsi/casscf.irp.f | 10 +++++----- src/casscf_cipsi/save_energy.irp.f | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/casscf_cipsi/casscf.irp.f b/src/casscf_cipsi/casscf.irp.f index a2f3c5a7..02954ebf 100644 --- a/src/casscf_cipsi/casscf.irp.f +++ b/src/casscf_cipsi/casscf.irp.f @@ -17,14 +17,14 @@ end subroutine run implicit none double precision :: energy_old, energy, pt2_max_before, ept2_before,delta_E - logical :: converged,state_following_casscf_save + logical :: converged,state_following_casscf_cipsi_save integer :: iteration converged = .False. energy = 0.d0 mo_label = "MCSCF" iteration = 1 - state_following_casscf_save = state_following_casscf + state_following_casscf_cipsi_save = state_following_casscf state_following_casscf = .True. touch state_following_casscf ept2_before = 0.d0 @@ -44,8 +44,8 @@ subroutine run call write_double(6,energy,'CAS-SCF energy = ') if(n_states == 1)then double precision :: E_PT2, PT2 - call ezfio_get_casscf_energy_pt2(E_PT2) - call ezfio_get_casscf_energy(PT2) + call ezfio_get_casscf_cipsi_energy_pt2(E_PT2) + call ezfio_get_casscf_cipsi_energy(PT2) PT2 -= E_PT2 call write_double(6,E_PT2,'E + PT2 energy = ') call write_double(6,PT2,' PT2 = ') @@ -98,7 +98,7 @@ subroutine run SOFT_TOUCH pt2_max endif if(iteration .gt. 3)then - state_following_casscf = state_following_casscf_save + state_following_casscf = state_following_casscf_cipsi_save soft_touch state_following_casscf endif endif diff --git a/src/casscf_cipsi/save_energy.irp.f b/src/casscf_cipsi/save_energy.irp.f index 8729c5af..18750c3d 100644 --- a/src/casscf_cipsi/save_energy.irp.f +++ b/src/casscf_cipsi/save_energy.irp.f @@ -4,6 +4,6 @@ subroutine save_energy(E,pt2) ! Saves the energy in |EZFIO|. END_DOC double precision, intent(in) :: E(N_states), pt2(N_states) - call ezfio_set_casscf_energy(E(1:N_states)) - call ezfio_set_casscf_energy_pt2(E(1:N_states)+pt2(1:N_states)) + call ezfio_set_casscf_cipsi_energy(E(1:N_states)) + call ezfio_set_casscf_cipsi_energy_pt2(E(1:N_states)+pt2(1:N_states)) end From 9ce6eb78c84802739286092b1f3860d981bac361 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 10 Jul 2023 12:40:56 +0200 Subject: [PATCH 29/74] Cholesky in CCSD --- src/ccsd/ccsd_space_orb_sub.irp.f | 99 ++++++++++++-- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 173 +++++++++++++------------ 2 files changed, 178 insertions(+), 94 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 76c9351e..04b7e955 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -9,7 +9,7 @@ subroutine run_ccsd_space_orb double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb logical :: not_converged - double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:) + double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:), tau_x(:,:,:,:) double precision, allocatable :: t1(:,:), r1(:,:) double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) @@ -18,8 +18,6 @@ subroutine run_ccsd_space_orb integer(bit_kind) :: det(N_int,2) integer :: nO, nV, nOa, nVa -! PROVIDE mo_two_e_integrals_in_map - det = psi_det(:,:,cc_ref) print*,'Reference determinant:' call print_det(det,N_int) @@ -46,6 +44,7 @@ subroutine run_ccsd_space_orb allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) allocate(tau(nO,nO,nV,nV)) + allocate(tau_x(nO,nO,nV,nV)) allocate(t1(nO,nV), r1(nO,nV)) allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) @@ -67,10 +66,11 @@ subroutine run_ccsd_space_orb call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1) call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2) call update_tau_space(nO,nV,t1,t2,tau) + call update_tau_x_space(nO,nV,tau,tau_x) !print*,'hf_energy', hf_energy call det_energy(det,uncorr_energy) print*,'Det energy', uncorr_energy - call ccsd_energy_space(nO,nV,tau,t1,energy) + call ccsd_energy_space_x(nO,nV,tau_x,t1,energy) print*,'Guess energy', uncorr_energy+energy, energy nb_iter = 0 @@ -86,11 +86,11 @@ subroutine run_ccsd_space_orb do while (not_converged) ! Residue -! if (do_ao_cholesky) then - if (.False.) then - call compute_H_oo_chol(nO,nV,t1,t2,tau,H_oo) - call compute_H_vv_chol(nO,nV,t1,t2,tau,H_vv) - call compute_H_vo_chol(nO,nV,t1,t2,H_vo) + if (do_ao_cholesky) then +! if (.False.) then + call compute_H_oo_chol(nO,nV,tau_x,H_oo) + call compute_H_vv_chol(nO,nV,tau_x,H_vv) + call compute_H_vo_chol(nO,nV,t1,H_vo) call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) @@ -119,9 +119,10 @@ subroutine run_ccsd_space_orb endif call update_tau_space(nO,nV,t1,t2,tau) + call update_tau_x_space(nO,nV,tau,tau_x) ! Energy - call ccsd_energy_space(nO,nV,tau,t1,energy) + call ccsd_energy_space_x(nO,nV,tau_x,t1,energy) write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' nb_iter = nb_iter + 1 @@ -249,6 +250,51 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy) end +subroutine ccsd_energy_space_x(nO,nV,tau_x,t1,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau_x(nO,nO,nV,nV) + double precision, intent(in) :: t1(nO,nV) + double precision, intent(out) :: energy + + ! internal + integer :: i,j,a,b + double precision :: e + + energy = 0d0 + !$omp parallel & + !$omp shared(nO,nV,energy,tau_x,t1,& + !$omp cc_space_f_vo,cc_space_v_oovv) & + !$omp private(i,j,a,b,e) & + !$omp default(none) + e = 0d0 + !$omp do + do a = 1, nV + do i = 1, nO + e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) + enddo + enddo + !$omp end do nowait + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + e = e + tau_x(i,j,a,b) * cc_space_v_oovv(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + energy = energy + e + !$omp end critical + !$omp end parallel + +end + ! Tau subroutine update_tau_space(nO,nV,t1,t2,tau) @@ -284,6 +330,39 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) end +subroutine update_tau_x_space(nO,nV,tau,tau_x) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau(nO,nO,nV,nV) + + ! out + double precision, intent(out) :: tau_x(nO,nO,nV,nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tau,tau_x) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + tau_x(i,j,a,b) = 2.d0*tau(i,j,a,b) - tau(i,j,b,a) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + ! R1 subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 190c163b..0b9e123e 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -276,64 +276,88 @@ end ! H_oo -subroutine compute_H_oo_chol(nO,nV,t1,t2,tau,H_oo) +subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) implicit none integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV) - double precision, intent(in) :: t2(nO, nO, nV, nV) - double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(in) :: tau_x(nO, nO, nV, nV) double precision, intent(out) :: H_oo(nO, nO) - integer :: a,tmp_a,k,b,l,c,d,tmp_c,tmp_d,i,j,u + integer :: a,b,i,j,u,k - ! H_oo(u,i) = cc_space_f_oo(u,i) + double precision, allocatable :: tau_kau(:,:,:), tmp_vov(:,:,:) + + allocate(tau_kau(cholesky_ao_num,nV,nO)) !$omp parallel & - !$omp shared(nO,H_oo,cc_space_f_oo) & - !$omp private(i,u) & - !$omp default(none) + !$omp default(shared) & + !$omp private(i,u,j,k,a,b,tmp_vov) + allocate(tmp_vov(nV,nO,nV) ) + !$omp do + do u = 1, nO + do b=1,nV + do j=1,nO + do a=1,nV + tmp_vov(a,j,b) = tau_x(u,j,a,b) + enddo + enddo + enddo + call dgemm('N','T',cholesky_ao_num,nV,nO*nV,1.d0, & + cc_space_v_ov_chol, cholesky_ao_num, tmp_vov, nV, & + 0.d0, tau_kau(1,1,u), cholesky_ao_num) + enddo + !$omp end do nowait + deallocate(tmp_vov) !$omp do do i = 1, nO do u = 1, nO H_oo(u,i) = cc_space_f_oo(u,i) enddo enddo - !$omp end do - !$omp end parallel - - ! H_oo(u,i) += cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) - ! H_oo(u,i) += tau(u,j,a,b) * cc_space_w_oovv(i,j,a,b) - call dgemm('N','T', nO, nO, nO*nV*nV, & - 1d0, tau , size(tau,1), & - cc_space_w_oovv, size(cc_space_w_oovv,1), & - 1d0, H_oo , size(H_oo,1)) + !$omp end do nowait + !$omp barrier + !$omp end parallel + call dgemm('T', 'N', nO, nO, cholesky_ao_num*nV, 1.d0, & + tau_kau, cholesky_ao_num*nV, cc_space_v_vo_chol, cholesky_ao_num*nV, & + 1.d0, H_oo, nO) end ! H_vv -subroutine compute_H_vv_chol(nO,nV,t1,t2,tau,H_vv) +subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) implicit none integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV) - double precision, intent(in) :: t2(nO, nO, nV, nV) - double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(in) :: tau_x(nO, nO, nV, nV) double precision, intent(out) :: H_vv(nV, nV) - integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + integer :: a,b,i,j,u,k, beta - double precision, allocatable :: tmp_tau(:,:,:,:) + double precision, allocatable :: tau_kia(:,:,:), tmp_oov(:,:,:) - allocate(tmp_tau(nV,nO,nO,nV)) - - ! H_vv(a,beta) = cc_space_f_vv(a,beta) + allocate(tau_kia(cholesky_ao_num,nO,nV)) !$omp parallel & - !$omp shared(nV,nO,H_vv,cc_space_f_vv,tmp_tau,tau) & - !$omp private(a,beta,i,j,b) & - !$omp default(none) + !$omp default(shared) & + !$omp private(i,beta,j,k,a,b,tmp_oov) + allocate(tmp_oov(nO,nO,nV) ) + !$omp do + do a = 1, nV + do b=1,nV + do j=1,nO + do i=1,nO + tmp_oov(i,j,b) = tau_x(i,j,a,b) + enddo + enddo + enddo + call dgemm('N','T',cholesky_ao_num,nO,nO*nV,1.d0, & + cc_space_v_ov_chol, cholesky_ao_num, tmp_oov, nO, & + 0.d0, tau_kia(1,1,a), cholesky_ao_num) + enddo + !$omp end do nowait + deallocate(tmp_oov) + !$omp do do beta = 1, nV do a = 1, nV @@ -341,83 +365,64 @@ subroutine compute_H_vv_chol(nO,nV,t1,t2,tau,H_vv) enddo enddo !$omp end do nowait - - !$omp do - do beta = 1, nV - do j = 1, nO - do i = 1, nO - do b = 1, nV - tmp_tau(b,i,j,beta) = tau(i,j,beta,b) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel - - call dgemm('N','N',nV,nV,nO*nO*nV, & - -1d0, cc_space_w_vvoo, size(cc_space_w_vvoo,1), & - tmp_tau , size(tmp_tau,1) * size(tmp_tau,2) * size(tmp_tau,3), & - 1d0, H_vv , size(H_vv,1)) - - deallocate(tmp_tau) + !$omp barrier + !$omp end parallel + call dgemm('T', 'N', nV, nV, cholesky_ao_num*nO, -1.d0, & + tau_kia, cholesky_ao_num*nO, cc_space_v_ov_chol, cholesky_ao_num*nO, & + 1.d0, H_vv, nV) end ! H_vo - -subroutine compute_H_vo_chol(nO,nV,t1,t2,H_vo) +subroutine compute_H_vo_chol(nO,nV,t1,H_vo) implicit none integer, intent(in) :: nO,nV double precision, intent(in) :: t1(nO, nV) - double precision, intent(in) :: t2(nO, nO, nV, nV) double precision, intent(out) :: H_vo(nV, nO) - integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + integer :: a,b,i,j,u,k - double precision, allocatable :: w(:,:,:,:) - - allocate(w(nV,nO,nO,nV)) - - !$omp parallel & - !$omp shared(nV,nO,H_vo,cc_space_f_vo,w,cc_space_w_vvoo,t1) & - !$omp private(a,beta,i,j,b) & - !$omp default(none) - !$omp do - do i = 1, nO - do a = 1, nV + double precision, allocatable :: tmp_k(:), tmp(:,:,:), tmp2(:,:,:) + do i=1,nO + do a=1,nV H_vo(a,i) = cc_space_f_vo(a,i) enddo enddo - !$omp end do nowait - ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) - ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) + allocate(tmp_k(cholesky_ao_num)) + call dgemm('N', 'N', cholesky_ao_num, 1, nO*nV, 2.d0, & + cc_space_v_ov_chol, cholesky_ao_num, & + t1, nO*nV, 0.d0, tmp_k, cholesky_ao_num) - !$omp do - do b = 1, nV - do j = 1, nO - do i = 1, nO - do a = 1, nV - w(a,i,j,b) = cc_space_w_vvoo(a,b,i,j) - enddo + call dgemm('T','N',nV*nO,1,cholesky_ao_num,1.d0, & + cc_space_v_vo_chol, cholesky_ao_num, tmp_k, cholesky_ao_num, 1.d0, & + H_vo, nV*nO) + deallocate(tmp_k) + + allocate(tmp(cholesky_ao_num,nO,nO)) + allocate(tmp2(cholesky_ao_num,nO,nO)) + + call dgemm('N','T', cholesky_ao_num*nO, nO, nV, 1.d0, & + cc_space_v_ov_chol, cholesky_ao_num*nO, t1, nO, 0.d0, tmp, cholesky_ao_num*nO) + + do i=1,nO + do j=1,nO + do k=1,cholesky_ao_num + tmp2(k,j,i) = tmp(k,i,j) enddo enddo enddo - !$omp end do - !$omp end parallel + deallocate(tmp) - call dgemv('N',nV*nO, nO*nV, & - 1d0, w , size(w,1) * size(w,2), & - t1 , 1, & - 1d0, H_vo, 1) - - deallocate(w) + call dgemm('T','N', nV, nO, cholesky_ao_num*nO, -1.d0, & + cc_space_v_ov_chol, cholesky_ao_num*nO, tmp2, cholesky_ao_num*nO, & + 1.d0, H_vo, nV) end + ! R2 subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) @@ -1015,7 +1020,7 @@ subroutine compute_B1_chol(nO,nV,t1,B1,ldb) - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) enddo - + enddo enddo enddo From 2d383d09c6381f4a421d39a45e27abe36fe99133 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Jul 2023 12:10:53 +0200 Subject: [PATCH 30/74] routine htilde_mu_mat_opt_bi_ortho works --- src/bi_ort_ints/three_body_ijmk.irp.f | 7 ++ src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f | 93 +++++++++++++++++++++++++ src/tc_bi_ortho/slater_tc_3e_slow.irp.f | 2 +- src/tc_bi_ortho/test_tc_fock.irp.f | 9 +-- src/tc_scf/fock_tc.irp.f | 24 +++---- 5 files changed, 114 insertions(+), 21 deletions(-) diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 0c02e4c5..669861b7 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -86,6 +86,13 @@ tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) tmp_loc_2 = tmp_aux_2(ipoint,n) + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,i) * tmp_loc_2 + tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,i) * tmp_loc_2 + tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,i) * tmp_loc_2 + tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + + int2_grad1_u12_bimo_t(ipoint,2,n,n) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + + int2_grad1_u12_bimo_t(ipoint,3,n,n) * int2_grad1_u12_bimo_t(ipoint,3,k,i) + enddo enddo !$OMP END DO diff --git a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f index 1d1b26cc..8524253a 100644 --- a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f +++ b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f @@ -90,3 +90,96 @@ subroutine htcdag_bi_ortho_calc_tdav_slow(v, u, N_st, sze) end +subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,i_H_chi_array,i_H_phi_array) + use bitmasks + implicit none + BEGIN_DOC +! Computes $\langle i|H|Phi \rangle = \sum_J c^R_J \langle i | H | J \rangle$. +! +! AND $\langle Chi|H| i \rangle = \sum_J c^L_J \langle J | H | i \rangle$. +! +! CONVENTION: i_H_phi_array(0) = total matrix element, +! +! i_H_phi_array(1) = one-electron matrix element, +! +! i_H_phi_array(2) = two-electron matrix element, +! +! i_H_phi_array(3) = three-electron matrix element, +! +! Uses filter_connected_i_H_psi0 to get all the $|J \rangle$ to which $|i \rangle$ +! is connected. +! +! The i_H_psi_minilist is much faster but requires to build the +! minilists. + END_DOC + integer, intent(in) :: Nint, Ndet,Ndet_max,Nstate + integer(bit_kind), intent(in) :: keys(Nint,2,Ndet) + integer(bit_kind), intent(in) :: key(Nint,2) + double precision, intent(in) :: coef_l(Ndet_max,Nstate),coef_r(Ndet_max,Nstate) + double precision, intent(out) :: i_H_chi_array(0:3,Nstate),i_H_phi_array(0:3,Nstate) + + integer :: i, ii,j + double precision :: phase + integer :: exc(0:2,2,2) + double precision :: hmono, htwoe, hthree, htot + integer, allocatable :: idx(:) + + ASSERT (Nint > 0) + ASSERT (N_int == Nint) + ASSERT (Nstate > 0) + ASSERT (Ndet > 0) + ASSERT (Ndet_max >= Ndet) + allocate(idx(0:Ndet)) + + i_H_chi_array = 0.d0 + i_H_phi_array = 0.d0 + + call filter_connected_i_H_psi0(keys,key,Nint,Ndet,idx) + if (Nstate == 1) then + + do ii=1,idx(0) + i = idx(ii) + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(keys(1,1,i), key, Nint, hmono, htwoe, hthree, htot) + i_H_chi_array(0,1) = i_H_chi_array(0,1) + coef_l(i,1)*htot + i_H_chi_array(1,1) = i_H_chi_array(1,1) + coef_l(i,1)*hmono + i_H_chi_array(2,1) = i_H_chi_array(2,1) + coef_l(i,1)*htwoe + i_H_chi_array(3,1) = i_H_chi_array(3,1) + coef_l(i,1)*hthree + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(key,keys(1,1,i), Nint, hmono, htwoe, hthree, htot) + i_H_phi_array(0,1) = i_H_phi_array(0,1) + coef_r(i,1)*htot + i_H_phi_array(1,1) = i_H_phi_array(1,1) + coef_r(i,1)*hmono + i_H_phi_array(2,1) = i_H_phi_array(2,1) + coef_r(i,1)*htwoe + i_H_phi_array(3,1) = i_H_phi_array(3,1) + coef_r(i,1)*hthree + enddo + + else + + do ii=1,idx(0) + i = idx(ii) + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(keys(1,1,i), key, Nint, hmono, htwoe, hthree, htot) + do j = 1, Nstate + i_H_chi_array(0,j) = i_H_chi_array(0,j) + coef_l(i,j)*htot + i_H_chi_array(1,j) = i_H_chi_array(1,j) + coef_l(i,j)*hmono + i_H_chi_array(2,j) = i_H_chi_array(2,j) + coef_l(i,j)*htwoe + i_H_chi_array(3,j) = i_H_chi_array(3,j) + coef_l(i,j)*hthree + enddo + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(key,keys(1,1,i), Nint, hmono, htwoe, hthree, htot) + do j = 1, Nstate + i_H_phi_array(0,j) = i_H_phi_array(0,j) + coef_r(i,j)*htot + i_H_phi_array(1,j) = i_H_phi_array(1,j) + coef_r(i,j)*hmono + i_H_phi_array(2,j) = i_H_phi_array(2,j) + coef_r(i,j)*htwoe + i_H_phi_array(3,j) = i_H_phi_array(3,j) + coef_r(i,j)*hthree + enddo + enddo + + endif + +end + diff --git a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f index 35abbbc4..cb33d343 100644 --- a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f @@ -184,7 +184,7 @@ subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) ii = occ(i,s1) do j = i+1, Ne(s1) jj = occ(j,s1) -! ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) +! !ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) hthree += three_e_single_parrallel_spin(jj,ii,p1,h1) ! USES THE 4-IDX TENSOR enddo enddo diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f index 182c03d7..f1a7cc0a 100644 --- a/src/tc_bi_ortho/test_tc_fock.irp.f +++ b/src/tc_bi_ortho/test_tc_fock.irp.f @@ -152,9 +152,7 @@ subroutine routine_tot() ! do i = 1, elec_num_tab(s1) ! do a = elec_num_tab(s1)+1, mo_num ! virtual do i = 1, elec_beta_num - do a = elec_beta_num+1, elec_alpha_num! virtual -! do i = elec_beta_num+1, elec_alpha_num -! do a = elec_alpha_num+1, mo_num! virtual + do a = elec_beta_num+1, mo_num! virtual print*,i,a det_i = ref_bitmask @@ -167,7 +165,7 @@ subroutine routine_tot() call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) print*,htilde_ij - if(dabs(htilde_ij).lt.1.d-10)cycle +! if(dabs(htilde_ij).lt.1.d-10)cycle print*, ' excited det' call debug_det(det_i, N_int) @@ -184,9 +182,12 @@ subroutine routine_tot() ! endif err_ai = dabs(dabs(ref) - dabs(new)) if(err_ai .gt. 1d-7) then + print*,'---------' print*,'s1 = ',s1 print*, ' warning on', i, a print*, ref,new,err_ai + print*,hmono, htwoe, hthree + print*,'---------' endif print*, ref,new,err_ai err_tot += err_ai diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f index 0ae515bb..f4553f3e 100644 --- a/src/tc_scf/fock_tc.irp.f +++ b/src/tc_scf/fock_tc.irp.f @@ -208,10 +208,10 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ] if(three_body_h_tc) then !call wall_time(tt0) - !PROVIDE fock_a_tot_3e_bi_orth - !Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth - PROVIDE fock_3e_uhf_mo_a - Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a + PROVIDE fock_a_tot_3e_bi_orth + Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth +! PROVIDE fock_3e_uhf_mo_a +! Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a !call wall_time(tt1) !print*, ' 3-e term:', tt1-tt0 endif @@ -241,21 +241,13 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ] if(bi_ortho) then - !allocate(tmp(ao_num,ao_num)) - !tmp = Fock_matrix_tc_ao_beta - !if(three_body_h_tc) then - ! tmp += fock_3e_uhf_ao_b - !endif - !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1)) - !deallocate(tmp) - call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) if(three_body_h_tc) then - !PROVIDE fock_b_tot_3e_bi_orth - !Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth - PROVIDE fock_3e_uhf_mo_b - Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b + PROVIDE fock_b_tot_3e_bi_orth + Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth +! PROVIDE fock_3e_uhf_mo_b +! Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b endif else From 44956060e7321b8bb76a0b13e83ae254039a8fa1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 10 Jul 2023 17:06:34 +0200 Subject: [PATCH 31/74] Removed vvv arrays --- src/ccsd/ccsd_space_orb_sub.irp.f | 52 +++-- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 310 ++++++++++++++----------- 2 files changed, 209 insertions(+), 153 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 04b7e955..35e14313 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -92,7 +92,7 @@ subroutine run_ccsd_space_orb call compute_H_vv_chol(nO,nV,tau_x,H_vv) call compute_H_vo_chol(nO,nV,t1,H_vo) - call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) else call compute_H_oo(nO,nV,t1,t2,tau,H_oo) @@ -538,25 +538,16 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) ! enddo ! enddo !enddo + + integer :: iblock, block_size, nVmax double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) - allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO)) + block_size = 8 + allocate(W_vvov(nV,nV,nO,block_size), T_vvoo(nV,nV,nO,nO)) !$omp parallel & !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & !$omp private(b,beta,i,a) & !$omp default(none) - !$omp do - do beta = 1, nV - do i = 1, nO - do b = 1, nV - do a = 1, nV - W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) - enddo - enddo - enddo - enddo - !$omp end do nowait - !$omp do do u = 1, nO do i = 1, nO @@ -570,13 +561,35 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp end do nowait !$omp end parallel - call dgemm('T','N',nO,nV,nO*nV*nV, & - 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & - W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & - 1d0, r1 , size(r1,1)) + do iblock = 1, nV, block_size + nVmax = min(block_size,nV-iblock+1) + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau,nVmax,iblock) & + !$omp private(b,i,a,beta) & + !$omp default(none) + !$omp do collapse(2) + do beta = iblock, iblock + nVmax - 1 + do i = 1, nO + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta-iblock+1) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp end parallel + + call dgemm('T','N',nO,nVmax,nO*nV*nV, & + 1d0, T_vvoo, nV*nV*nO, & + W_vvov, nO*nV*nV, & + 1d0, r1(1,iblock), nO) + enddo deallocate(W_vvov,T_vvoo) + + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) !do beta = 1, nV @@ -1640,11 +1653,12 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) ! enddo double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:) + allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV)) ! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) call gen_v_space(cc_nVa,cc_nVa,cc_nVa,1, & - cc_list_vir,cc_list_vir,cc_list_vir,(/ cc_list_vir(gam) /), B1) + cc_list_vir,cc_list_vir,cc_list_vir,cc_list_vir(gam), B1) !$omp parallel & diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 0b9e123e..50f5f603 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -185,25 +185,15 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) deallocate(X_ovov) + integer :: iblock, block_size, nVmax double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) - allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO)) + block_size = 8 + allocate(W_vvov(nV,nV,nO,block_size), T_vvoo(nV,nV,nO,nO)) !$omp parallel & !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & !$omp private(b,beta,i,a) & !$omp default(none) - !$omp do - do beta = 1, nV - do i = 1, nO - do b = 1, nV - do a = 1, nV - W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) - enddo - enddo - enddo - enddo - !$omp end do nowait - !$omp do do u = 1, nO do i = 1, nO @@ -217,10 +207,30 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp end do nowait !$omp end parallel - call dgemm('T','N',nO,nV,nO*nV*nV, & - 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & - W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & - 1d0, r1 , size(r1,1)) + do iblock = 1, nV, block_size + nVmax = min(block_size,nV-iblock+1) + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau,nVmax,iblock) & + !$omp private(b,i,a,beta) & + !$omp default(none) + !$omp do collapse(2) + do beta = iblock, iblock + nVmax - 1 + do i = 1, nO + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta-iblock+1) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp end parallel + + call dgemm('T','N',nO,nVmax,nO*nV*nV, & + 1d0, T_vvoo, nV*nV*nO, & + W_vvov, nO*nV*nV, & + 1d0, r1(1,iblock), nO) + enddo deallocate(W_vvov,T_vvoo) @@ -450,7 +460,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) call compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) call compute_A1_chol(nO,nV,t1,t2,tau,A1) call compute_J1_chol(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & - cc_space_v_vvvo,cc_space_v_vvoo,J1) + cc_space_v_vvoo,J1) call compute_K1_chol(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & cc_space_v_ovov,cc_space_v_vvov,K1) @@ -479,15 +489,54 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) tau, size(tau,1) * size(tau,2), & 1d0, r2, size(r2,1) * size(r2,2)) - double precision, dimension(:,:,:,:), allocatable :: r2_chem, tmp, tau_chem - double precision, dimension(:,:,:,:), allocatable :: B1 + integer :: block_size, iblock, k + block_size = 16 + double precision, dimension(:,:,:), allocatable :: B1, tmp_cc, tmpB1 - allocate(B1(nV,nV,nV,nV)) - call compute_B1_chol(nO,nV,t1,B1,cholesky_ao_num) - call dgemm('N','N',nO*nO,nV*nV,nV*nV, & + allocate(tmp_cc(cholesky_ao_num,nV,nV)) + call dgemm('N','N', cholesky_ao_num*nV, nV, nO, 1.d0, & + cc_space_v_vo_chol, cholesky_ao_num*nV, t1, nO, 0.d0, tmp_cc, cholesky_ao_num*nV) + + !$OMP PARALLEL PRIVATE(gam, iblock, B1, tmpB1, beta, b, a) + allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV)) + !$OMP DO + do gam = 1, nV + do iblock = 1, nV, block_size + call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, & + -1.d0, cc_space_v_vv_chol(1,1,iblock), cholesky_ao_num, & + tmp_cc(1,1,gam), cholesky_ao_num, 0.d0, tmpB1, nV*block_size) + + call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, & + -1.d0, tmp_cc(1,1,iblock), cholesky_ao_num, & + cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, 1.d0, tmpB1, nV*block_size) + + call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, 1.d0, & + cc_space_v_vv_chol(1,1,iblock), cholesky_ao_num, & + cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, 1.d0, & + tmpB1, nV*block_size) + + do beta = iblock, min(nV, iblock+block_size-1) + do b = 1, nV + do a = 1, nV + B1(a,b,beta-iblock+1) = tmpB1(a,beta-iblock+1,b) + enddo + enddo + enddo + + call dgemm('N','N',nO*nO,min(block_size, nV-iblock+1),nV*nV, & 1d0, tau, size(tau,1) * size(tau,2), & B1 , size(B1 ,1) * size(B1 ,2), & - 1d0, r2, size(r2 ,1) * size(r2 ,2)) + 1d0, r2(1,1,iblock,gam), size(r2 ,1) * size(r2 ,2)) + enddo + + enddo + !$OMP ENDDO + + deallocate(B1, tmpB1) + !$OMP END PARALLEL + + deallocate(tmp_cc) + double precision, allocatable :: X_oovv(:,:,:,:),Y_oovv(:,:,:,:) allocate(X_oovv(nO,nO,nV,nV),Y_oovv(nO,nO,nV,nV)) @@ -556,29 +605,21 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) deallocate(X_oovv) double precision, allocatable :: X_vovv(:,:,:,:) - allocate(X_vovv(nV,nO,nV,nV)) - !$omp parallel & - !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & - !$omp private(u,a,gam,beta) & - !$omp default(none) - !$omp do - do gam = 1, nV - do beta = 1, nV - do u = 1, nO - do a = 1, nV - X_vovv(a,u,beta,gam) = cc_space_v_ovvv(u,a,beta,gam) - enddo - enddo + allocate(X_vovv(nV,nO,nV,block_size)) + do iblock = 1, nV, block_size + do gam = iblock, min(nV, iblock+block_size-1) + call dgemm('T','N',nV, nO*nV, cholesky_ao_num, 1.d0, & + cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, cc_space_v_ov_chol, & + cholesky_ao_num, 0.d0, X_vovv(1,1,1,gam-iblock+1), nV) + enddo - enddo - !$omp end do - !$omp end parallel - - call dgemm('N','N',nO,nO*nV*nV,nV, & + call dgemm('N','N',nO,nO*nV*min(block_size, nV-iblock+1),nV, & 1d0, t1 , size(t1,1), & X_vovv, size(X_vovv,1), & - 0d0, Y_oovv, size(Y_oovv,1)) + 0d0, Y_oovv(1,1,1,iblock), size(Y_oovv,1)) + + enddo !$omp parallel & !$omp shared(nO,nV,r2,Y_oovv) & @@ -597,38 +638,27 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:) - allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + double precision, allocatable :: X_ovvo(:,:,:,:) + double precision, allocatable :: tcc(:,:,:), tcc2(:,:,:) + allocate(tcc2(cholesky_ao_num,nV,nO), X_ovvo(nO,nV,nV,nO)) + allocate(tcc(cholesky_ao_num,nO,nV)) + + call dgemm('N','T', cholesky_ao_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_ao_num*nV, t1, nO, & + 0.d0, tcc2, cholesky_ao_num*nV) + + call dgemm('N','N', cholesky_ao_num*nO, nV, nO, 1.d0, & + cc_space_v_oo_chol, cholesky_ao_num*nO, t1, nO, & + 0.d0, tcc, cholesky_ao_num*nO) + + call dgemm('T','N', nO*nV, nV*nO, cholesky_ao_num, 1.d0, & + tcc, cholesky_ao_num, tcc2, cholesky_ao_num, 0.d0, & + X_ovvo, nO*nV) + + deallocate(tcc, tcc2) !$omp parallel & - !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & - !$omp private(u,v,gam,i) & - !$omp default(none) - do i = 1, nO - !$omp do - do gam = 1, nV - do u = 1, nO - do a = 1, nV - X_vovo(a,u,gam,i) = cc_space_v_ovov(u,a,i,gam) - enddo - enddo - enddo - !$omp end do nowait - enddo - !$omp end parallel - - call dgemm('N','N',nV*nO*nV,nV,nO, & - 1d0, X_vovo, size(X_vovo,1) * size(X_vovo,2) * size(X_vovo,3), & - t1 , size(t1,1), & - 0d0, Y_vovv, size(Y_vovv,1) * size(Y_vovv,2) * size(Y_vovv,3)) - - call dgemm('N','N',nO,nO*nV*nV,nV, & - 1d0, t1, size(t1,1), & - Y_vovv, size(Y_vovv,1), & - 0d0, X_oovv, size(X_oovv,1)) - - !$omp parallel & - !$omp shared(nO,nV,r2,X_oovv) & + !$omp shared(nO,nV,r2,X_ovvo) & !$omp private(u,v,gam,beta) & !$omp default(none) !$omp do @@ -636,7 +666,18 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do beta = 1, nV do v = 1, nO do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(v,u,gam,beta) - X_oovv(u,v,beta,gam) + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_ovvo(u,beta,gam,v) + enddo + enddo + enddo + enddo + !$omp end do + !$omp do + do beta = 1, nV + do gam = 1, nV + do v = 1, nO + do u = 1, nO + r2(v,u,gam,beta) = r2(v,u,gam,beta) - X_ovvo(u,beta,gam,v) enddo enddo enddo @@ -644,7 +685,9 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - deallocate(X_vovo,Y_vovv) + deallocate(X_ovvo) + !----- + allocate(X_oovv(nO,nO,nV,nV)) call dgemm('N','N',nO*nO*nV,nV,nO, & 1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & @@ -668,7 +711,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - double precision, allocatable :: Y_oovo(:,:,:,:) + double precision, allocatable :: X_vovo(:,:,:,:), Y_oovo(:,:,:,:) allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) !$omp parallel & @@ -717,7 +760,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) deallocate(X_vovo,Y_oovo) - double precision, allocatable :: X_ovvo(:,:,:,:), Y_voov(:,:,:,:), Z_ovov(:,:,:,:) + double precision, allocatable :: Y_voov(:,:,:,:), Z_ovov(:,:,:,:) allocate(X_ovvo(nO,nV,nV,nO), Y_voov(nV,nO,nO,nV),Z_ovov(nO,nV,nO,nV)) !$omp parallel & !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & @@ -772,8 +815,9 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) deallocate(X_ovvo,Y_voov) - double precision, allocatable :: X_ovov(:,:,:,:),Y_ovov(:,:,:,:) + double precision, allocatable :: Y_ovov(:,:,:,:), X_ovov(:,:,:,:) allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) + !$omp parallel & !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & !$omp private(u,a,i,beta,gam) & @@ -998,36 +1042,6 @@ subroutine compute_A1_chol(nO,nV,t1,t2,tau,A1) end -! B1 -subroutine compute_B1_chol(nO,nV,t1,B1,ldb) - - implicit none - - integer, intent(in) :: nO,nV,ldb - double precision, intent(in) :: t1(nO, nV) - double precision, intent(out) :: B1(nV, nV, nV, nV) - - integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam - - do gam = 1, nV - do beta = 1, nV - do b = 1, nV - do a = 1, nV - B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) - - do i = 1, nO - B1(a,b,beta,gam) = B1(a,b,beta,gam) & - - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & - - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) - enddo - - enddo - enddo - enddo - enddo - -end - ! g_occ subroutine compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) @@ -1091,44 +1105,52 @@ subroutine compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) t1 , size(t1,1), & 0d0, g_vir, size(g_vir,1)) - !$omp parallel & - !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) & - !$omp private(i,b,a,beta) & - !$omp default(none) - !$omp do + double precision, allocatable :: tmp_k(:), tmp_vo(:,:,:), tmp_vo2(:,:,:) + allocate(tmp_k(cholesky_ao_num)) + call dgemm('N','N', cholesky_ao_num, 1, nO*nV, 1.d0, & + cc_space_v_ov_chol, cholesky_ao_num, t1, nO*nV, 0.d0, tmp_k, cholesky_ao_num) + + call dgemm('T','N', nV*nV, 1, cholesky_ao_num, 2.d0, & + cc_space_v_vv_chol, cholesky_ao_num, tmp_k, cholesky_ao_num, 1.d0, & + g_vir, nV*nV) + deallocate(tmp_k) + + allocate(tmp_vo(cholesky_ao_num,nV,nO)) + call dgemm('N','T',cholesky_ao_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_ao_num*nV, t1, nO, 0.d0, tmp_vo, cholesky_ao_num*nV) + + allocate(tmp_vo2(cholesky_ao_num,nO,nV)) + do beta=1,nV + do i=1,nO + do k=1,cholesky_ao_num + tmp_vo2(k,i,beta) = -tmp_vo(k,beta,i) + enddo + enddo + enddo + deallocate(tmp_vo) + do beta = 1, nV do a = 1, nV g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta) enddo enddo - !$omp end do - !$omp do - do beta = 1, nV - do i = 1, nO - do b = 1, nV - do a = 1, nV - g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel + call dgemm('T','N', nV, nV, nO*cholesky_ao_num, 1.d0, & + cc_space_v_ov_chol, cholesky_ao_num*nO, & + tmp_vo2, cholesky_ao_num*nO, 1.d0, g_vir, nV) end ! J1 -subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) - +subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvoo,J1) implicit none integer, intent(in) :: nO,nV double precision, intent(in) :: t1(nO, nV) double precision, intent(in) :: t2(nO, nO, nV, nV) double precision, intent(in) :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO) - double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO) double precision, intent(out) :: J1(nO, nV, nV, nO) integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam @@ -1188,11 +1210,31 @@ subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp end parallel deallocate(X_ovoo) - ! v_vvvo(b,a,beta,i) * t1(u,b) - call dgemm('N','N',nO,nV*nV*nO,nV, & - 1d0, t1 , size(t1,1), & - v_vvvo, size(v_vvvo,1), & - 1d0, J1 , size(J1,1)) + double precision, allocatable :: tmp_cc(:,:,:), J1_tmp(:,:,:,:) + allocate(tmp_cc(cholesky_ao_num,nV,nO), J1_tmp(nV,nO,nV,nO)) + + call dgemm('N','T', cholesky_ao_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_ao_num*nV, & + t1, nO, & + 0.d0, tmp_cc, cholesky_ao_num*nV) + + call dgemm('T','N', nV*nO, nV*nO, cholesky_ao_num, 1.d0, & + tmp_cc, cholesky_ao_num, cc_space_v_vo_chol, cholesky_ao_num, & + 0.d0, J1_tmp, nV*nO) + + deallocate(tmp_cc) + + do i=1,nO + do b=1,nV + do a=1,nV + do u=1,nO + J1(u,a,b,i) = J1(u,a,b,i) + J1_tmp(b,u,a,i) + enddo + enddo + enddo + enddo + + deallocate(J1_tmp) !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:) From 8729a7ca451bde897be57d61358643bb3cd84229 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Jul 2023 23:24:12 +0200 Subject: [PATCH 32/74] inactive --> virtual one-e term gradient ok --- src/casscf_tc_bi/grad_dm.irp.f | 78 +++++++++++++++++++++++ src/casscf_tc_bi/grad_old.irp.f | 109 ++++++++++++++++++++++++++++++++ src/casscf_tc_bi/gradient.irp.f | 94 +++++++++++++++++++++++++++ 3 files changed, 281 insertions(+) create mode 100644 src/casscf_tc_bi/grad_dm.irp.f create mode 100644 src/casscf_tc_bi/grad_old.irp.f create mode 100644 src/casscf_tc_bi/gradient.irp.f diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f new file mode 100644 index 00000000..0fc2e4eb --- /dev/null +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -0,0 +1,78 @@ + BEGIN_PROVIDER [real*8, gradvec_tc_r, (0:3,nMonoEx)] +&BEGIN_PROVIDER [real*8, gradvec_tc_l, (0:3,nMonoEx)] + implicit none + integer :: ii,tt,aa,indx + integer :: i,t,a,fff + double precision :: res_l(0:3), res_r(0:3) + gradvec_tc_l = 0.d0 + gradvec_tc_r = 0.d0 + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do t=1,n_act_orb + tt=list_act(t) + indx = mat_idx_c_a(i,t) + call gradvec_tc_it(ii,tt,res_l) + call gradvec_tc_it(tt,ii,res_r) + do fff = 0,3 + gradvec_tc_l(fff,indx)=res_l(fff) + gradvec_tc_r(fff,indx)=res_r(fff) + enddo + end do + end do + + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do a=1,n_virt_orb + indx = mat_idx_c_v(i,a) + aa=list_virt(a) + call gradvec_tc_ia(ii,aa,res_l) + call gradvec_tc_ia(aa,ii,res_r) + do fff = 0,3 + gradvec_tc_l(fff,indx)=res_l(fff) + gradvec_tc_r(fff,indx)=res_r(fff) + enddo + end do + end do + + do t=1,n_act_orb + do a=1,n_virt_orb + indx = mat_idx_a_v(i,a) +! gradvec_tc_l(indx)=gradvec_ta(t,a) + end do + end do +END_PROVIDER + +subroutine gradvec_tc_ia(i,a,res) + implicit none + BEGIN_DOC +! doubly occupied --> virtual TC gradient +! +! Corresponds to + END_DOC + integer, intent(in) :: i,a + double precision, intent(out) :: res(0:3) + res = 0.d0 + res(1) = -2 * mo_bi_ortho_tc_one_e(i,a) + +end + +subroutine gradvec_tc_it(i,t,res) + implicit none + BEGIN_DOC +! doubly occupied --> active TC gradient +! +! Corresponds to + END_DOC + integer, intent(in) :: i,t + double precision, intent(out) :: res(0:3) + integer :: rr,r,ss,s + double precision :: dm + res = 0.d0 + res(1) = -2 * mo_bi_ortho_tc_one_e(i,t) + do rr = 1, n_act_orb + r = list_act(rr) + dm = tc_transition_matrix_mo(t,r,1,1) + res(1) += mo_bi_ortho_tc_one_e(i,r) * dm + enddo + +end diff --git a/src/casscf_tc_bi/grad_old.irp.f b/src/casscf_tc_bi/grad_old.irp.f new file mode 100644 index 00000000..6610dee3 --- /dev/null +++ b/src/casscf_tc_bi/grad_old.irp.f @@ -0,0 +1,109 @@ + + BEGIN_PROVIDER [real*8, gradvec_detail_right_old, (0:3,nMonoEx)] +&BEGIN_PROVIDER [real*8, gradvec_detail_left_old, (0:3,nMonoEx)] + BEGIN_DOC + ! calculate the orbital gradient by hand, i.e. for + ! each determinant I we determine the string E_pq |I> (alpha and beta + ! separately) and generate + ! sum_I c_I is then the pq component of the orbital + ! gradient + ! E_pq = a^+_pa_q + a^+_Pa_Q + END_DOC + implicit none + integer :: ii,tt,aa,indx,ihole,ipart,istate,ll + real*8 :: res_l(0:3), res_r(0:3) + + do ii = 1, n_core_inact_orb + ihole = list_core_inact(ii) + do aa = 1, n_virt_orb + ipart = list_virt(aa) + indx = mat_idx_c_v(ii,aa) + call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + do ll = 0, 3 + gradvec_detail_left_old (ll,indx)=res_l(ll) + gradvec_detail_right_old(ll,indx)=res_r(ll) + enddo + enddo + enddo +! do indx=1,nMonoEx +! ihole=excit(1,indx) +! ipart=excit(2,indx) +! call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) +! do ll = 0, 3 +! gradvec_detail_left_old (ll,indx)=res_l(ll) +! gradvec_detail_right_old(ll,indx)=res_r(ll) +! enddo +! end do + + real*8 :: norm_grad_left, norm_grad_right + norm_grad_left=0.d0 + norm_grad_right=0.d0 + do indx=1,nMonoEx + norm_grad_left+=gradvec_detail_left_old(0,indx)*gradvec_detail_left_old(0,indx) + norm_grad_right+=gradvec_detail_right_old(0,indx)*gradvec_detail_right_old(0,indx) + end do + norm_grad_left=sqrt(norm_grad_left) + norm_grad_right=sqrt(norm_grad_right) +! if (bavard) then + write(6,*) + write(6,*) ' Norm of the LEFT orbital gradient (via <0|EH|0>) : ', norm_grad_left + write(6,*) ' Norm of the RIGHT orbital gradient (via <0|HE|0>) : ', norm_grad_right + write(6,*) +! endif + + +END_PROVIDER + +subroutine calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + BEGIN_DOC + ! eq 18 of Siegbahn et al, Physica Scripta 1980 + ! we calculate res_l = , and res_r = + ! q=hole, p=particle + ! res_l(0) = total matrix element + ! res_l(1) = one-electron part + ! res_l(2) = two-electron part + ! res_l(3) = three-electron part + END_DOC + implicit none + integer, intent(in) :: ihole,ipart + double precision, intent(out) :: res_l(0:3), res_r(0:3) + integer :: mu,iii,ispin,ierr,nu,istate,ll + integer(bit_kind), allocatable :: det_mu(:,:),det_mu_ex(:,:) + real*8 :: i_H_chi_array(0:3,N_states),i_H_phi_array(0:3,N_states),phase + allocate(det_mu(N_int,2)) + allocate(det_mu_ex(N_int,2)) + + res_l=0.D0 + res_r=0.D0 + +! print*,'in i_h_psi' +! print*,ihole,ipart + do mu=1,n_det + ! get the string of the determinant + call det_extract(det_mu,mu,N_int) + do ispin=1,2 + ! do the monoexcitation on it + call det_copy(det_mu,det_mu_ex,N_int) + call do_signed_mono_excitation(det_mu,det_mu_ex,nu & + ,ihole,ipart,ispin,phase,ierr) + if (ierr.eq.1) then + + call i_H_tc_psi_phi(det_mu_ex,psi_det,psi_l_coef_bi_ortho,psi_r_coef_bi_ortho,N_int & + ,N_det,N_det,N_states,i_H_chi_array,i_H_phi_array) +! print*,i_H_chi_array(1,1),i_H_phi_array(1,1) + do istate=1,N_states + do ll = 0,3 + res_l(ll)+=i_H_chi_array(ll,istate)*psi_r_coef_bi_ortho(mu,istate)*phase + res_r(ll)+=i_H_phi_array(ll,istate)*psi_l_coef_bi_ortho(mu,istate)*phase + enddo + end do + end if + end do + end do + + ! state-averaged gradient + res_l*=1.d0/dble(N_states) + res_r*=1.d0/dble(N_states) + +end + diff --git a/src/casscf_tc_bi/gradient.irp.f b/src/casscf_tc_bi/gradient.irp.f new file mode 100644 index 00000000..630bd891 --- /dev/null +++ b/src/casscf_tc_bi/gradient.irp.f @@ -0,0 +1,94 @@ +use bitmasks + +BEGIN_PROVIDER [ integer, nMonoEx ] + BEGIN_DOC + ! Number of single excitations + END_DOC + implicit none + nMonoEx=n_core_inact_orb*n_act_orb+n_core_inact_orb*n_virt_orb+n_act_orb*n_virt_orb +END_PROVIDER + + BEGIN_PROVIDER [integer, n_c_a_prov] +&BEGIN_PROVIDER [integer, n_c_v_prov] +&BEGIN_PROVIDER [integer, n_a_v_prov] + implicit none + n_c_a_prov = n_core_inact_orb * n_act_orb + n_c_v_prov = n_core_inact_orb * n_virt_orb + n_a_v_prov = n_act_orb * n_virt_orb + END_PROVIDER + + BEGIN_PROVIDER [integer, excit, (2,nMonoEx)] +&BEGIN_PROVIDER [character*3, excit_class, (nMonoEx)] +&BEGIN_PROVIDER [integer, list_idx_c_a, (3,n_c_a_prov) ] +&BEGIN_PROVIDER [integer, list_idx_c_v, (3,n_c_v_prov) ] +&BEGIN_PROVIDER [integer, list_idx_a_v, (3,n_a_v_prov) ] +&BEGIN_PROVIDER [integer, mat_idx_c_a, (n_core_inact_orb,n_act_orb) +&BEGIN_PROVIDER [integer, mat_idx_c_v, (n_core_inact_orb,n_virt_orb) +&BEGIN_PROVIDER [integer, mat_idx_a_v, (n_act_orb,n_virt_orb) + BEGIN_DOC + ! a list of the orbitals involved in the excitation + END_DOC + + implicit none + integer :: i,t,a,ii,tt,aa,indx,indx_tmp + indx=0 + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do tt=1,n_act_orb + t=list_act(tt) + indx+=1 + excit(1,indx)=i + excit(2,indx)=t + excit_class(indx)='c-a' + indx_tmp += 1 + list_idx_c_a(1,indx_tmp) = indx + list_idx_c_a(2,indx_tmp) = ii + list_idx_c_a(3,indx_tmp) = tt + mat_idx_c_a(ii,tt) = indx + end do + end do + + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=i + excit(2,indx)=a + excit_class(indx)='c-v' + indx_tmp += 1 + list_idx_c_v(1,indx_tmp) = indx + list_idx_c_v(2,indx_tmp) = ii + list_idx_c_v(3,indx_tmp) = aa + mat_idx_c_v(ii,aa) = indx + end do + end do + + indx_tmp = 0 + do tt=1,n_act_orb + t=list_act(tt) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=t + excit(2,indx)=a + excit_class(indx)='a-v' + indx_tmp += 1 + list_idx_a_v(1,indx_tmp) = indx + list_idx_a_v(2,indx_tmp) = tt + list_idx_a_v(3,indx_tmp) = aa + mat_idx_a_v(tt,aa) = indx + end do + end do + +! if (bavard) then + write(6,*) ' Filled the table of the Monoexcitations ' + do indx=1,nMonoEx + write(6,*) ' ex ',indx,' : ',excit(1,indx),' -> ' & + ,excit(2,indx),' ',excit_class(indx) + end do +! end if + +END_PROVIDER From b4a2e9bd7648bb11f3dc379cd33673fe74a65102 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 10 Jul 2023 23:32:05 +0200 Subject: [PATCH 33/74] Fixed cholesky for tiny thresholds --- src/ao_two_e_ints/cholesky.irp.f | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 4702c850..128aa483 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -134,17 +134,14 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] i = 0 ! 5. - do while (Dmax > tau) + do while ( (Dmax > tau).and.(rank < ndim) ) ! a. i = i+1 - logical :: memory_ok - memory_ok = .False. - s = 0.1d0 ! Inrease s until the arrays fit in memory - do + do ! b. Dmin = max(s*Dmax,tau) @@ -153,6 +150,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] nq=0 LDmap = 0 DLmap = 0 + Dset_rev = 0 do p=1,np if ( D(Lset(p)) > Dmin ) then nq = nq+1 @@ -180,7 +178,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] print *, 'Not enough memory. Reduce cholesky threshold' stop -1 endif - + enddo ! d., e. @@ -197,11 +195,11 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] do k=1,rank L(:,k) = L_old(:,k) enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO deallocate(L_old) - allocate(Delta(np,nq), stat=ierr) + allocate(Delta(np,nq), stat=ierr) if (ierr /= 0) then print *, irp_here, ': allocation failed : (Delta(np,nq))' stop -1 @@ -228,7 +226,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] enddo !$OMP ENDDO NOWAIT - !$OMP DO + !$OMP DO do k=1,N do p=1,np Ltmp_p(p,k) = L(Lset(p),k) @@ -364,7 +362,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] Ltmp_q(q,iblock) = L(Dset(q), rank) enddo !$OMP END DO - + !$OMP END PARALLEL Qmax = D(Dset(1)) @@ -381,7 +379,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] deallocate(Ltmp_q, stat=ierr) ! i. - N = N+j + N = rank ! j. Dmax = D(Lset(1)) From 326dbe77408eecd626e57608aeb43d9f5d597114 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 10 Jul 2023 23:32:43 +0200 Subject: [PATCH 34/74] Removed vvov --- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 50f5f603..b804792f 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -1409,11 +1409,23 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) t1 , size(t1,1), & 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) - call dgemm('N','N',nO,nV*nO*nV,nV, & - 1d0, t1 , size(t1,1), & - v_vvov, size(v_vvov,1), & - 1d0, K1 , size(K1,1)) + double precision, allocatable :: K1tmp(:,:,:,:), t1v(:,:,:) + allocate(K1tmp(nO,nO,nV,nV), t1v(cholesky_ao_num,nO,nO)) +! call dgemm('N','N',nO,nV*nO*nV,nV, & +! 1d0, t1 , size(t1,1), & +! v_vvov, size(v_vvov,1), & +! 1d0, K1 , size(K1,1)) + + call dgemm('N','T', cholesky_ao_num*nO, nO, nV, 1.d0, & + cc_space_v_ov_chol, cholesky_ao_num*nO, t1, nO, 0.d0, & + t1v, cholesky_ao_num*nO) + + call dgemm('T','N', nO*nO, nV*nV, cholesky_ao_num, 1.d0, & + t1v, cholesky_ao_num, cc_space_v_vv_chol, cholesky_ao_num, 0.d0, & + K1tmp, nO*nO) + + deallocate(t1v) ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i) call dgemm('N','N',nV*nO,nO*nV,nV*nO, & 1d0, Y, size(Y,1) * size(Y,2), & @@ -1421,7 +1433,7 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) 0d0, Z, size(Z,1) * size(Z,2)) !$omp parallel & - !$omp shared(nO,nV,K1,Z) & + !$omp shared(nO,nV,K1,Z,K1tmp) & !$omp private(i,beta,a,u) & !$omp default(none) !$omp do @@ -1429,7 +1441,7 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) do i = 1, nO do a = 1, nV do u = 1, nO - K1(u,a,i,beta) = K1(u,a,i,beta) + Z(u,beta,a,i) + K1(u,a,i,beta) = K1(u,a,i,beta) + K1tmp(u,i,a,beta) + Z(u,beta,a,i) enddo enddo enddo @@ -1437,6 +1449,6 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) !$omp end do !$omp end parallel - deallocate(X,Y,Z) + deallocate(K1tmp,X,Y,Z) end From b3b080929b38c152ac1bd868bad4dd311108e7e9 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 11 Jul 2023 01:56:28 +0200 Subject: [PATCH 35/74] fixed stupid bug in TC 1-RDM and one-e gradient: o-v, o-a are ok --- src/casscf_tc_bi/grad_dm.irp.f | 28 ++++++++++++++++------------ src/casscf_tc_bi/grad_old.irp.f | 21 +++++++++++++++++---- src/tc_bi_ortho/tc_prop.irp.f | 8 +++++--- 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index 0fc2e4eb..7f6155ab 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -11,8 +11,7 @@ do t=1,n_act_orb tt=list_act(t) indx = mat_idx_c_a(i,t) - call gradvec_tc_it(ii,tt,res_l) - call gradvec_tc_it(tt,ii,res_r) + call gradvec_tc_it(ii,tt,res_l,res_r) do fff = 0,3 gradvec_tc_l(fff,indx)=res_l(fff) gradvec_tc_r(fff,indx)=res_r(fff) @@ -56,23 +55,28 @@ subroutine gradvec_tc_ia(i,a,res) end -subroutine gradvec_tc_it(i,t,res) +subroutine gradvec_tc_it(i,t,res_l, res_r) implicit none BEGIN_DOC ! doubly occupied --> active TC gradient ! -! Corresponds to +! Corresponds to res_r = +! +! res_l = END_DOC integer, intent(in) :: i,t - double precision, intent(out) :: res(0:3) - integer :: rr,r,ss,s + double precision, intent(out) :: res_l(0:3),res_r(0:3) + integer :: rr,r,ss,s,m double precision :: dm - res = 0.d0 - res(1) = -2 * mo_bi_ortho_tc_one_e(i,t) - do rr = 1, n_act_orb - r = list_act(rr) - dm = tc_transition_matrix_mo(t,r,1,1) - res(1) += mo_bi_ortho_tc_one_e(i,r) * dm + res_r = 0.d0 + do m = 1, mo_num + res_r(1) += mo_bi_ortho_tc_one_e(i,m) * tc_transition_matrix_mo(t,m,1,1) & + -mo_bi_ortho_tc_one_e(m,t) * tc_transition_matrix_mo(m,i,1,1) + enddo + res_l = 0.d0 + do m = 1, mo_num + res_l(1) += mo_bi_ortho_tc_one_e(t,m) * tc_transition_matrix_mo(i,m,1,1) & + -mo_bi_ortho_tc_one_e(m,i) * tc_transition_matrix_mo(m,t,1,1) enddo end diff --git a/src/casscf_tc_bi/grad_old.irp.f b/src/casscf_tc_bi/grad_old.irp.f index 6610dee3..ea6747b1 100644 --- a/src/casscf_tc_bi/grad_old.irp.f +++ b/src/casscf_tc_bi/grad_old.irp.f @@ -25,6 +25,19 @@ enddo enddo enddo + + do ii = 1, n_core_inact_orb + ihole = list_core_inact(ii) + do tt = 1, n_act_orb + ipart = list_act(tt) + indx = mat_idx_c_a(ii,tt) + call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + do ll = 0, 3 + gradvec_detail_left_old (ll,indx)=res_l(ll) + gradvec_detail_right_old(ll,indx)=res_r(ll) + enddo + enddo + enddo ! do indx=1,nMonoEx ! ihole=excit(1,indx) ! ipart=excit(2,indx) @@ -57,7 +70,7 @@ END_PROVIDER subroutine calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) BEGIN_DOC ! eq 18 of Siegbahn et al, Physica Scripta 1980 - ! we calculate res_l = , and res_r = + ! we calculate res_r = , and res_r = ! q=hole, p=particle ! res_l(0) = total matrix element ! res_l(1) = one-electron part @@ -89,12 +102,12 @@ subroutine calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) if (ierr.eq.1) then call i_H_tc_psi_phi(det_mu_ex,psi_det,psi_l_coef_bi_ortho,psi_r_coef_bi_ortho,N_int & - ,N_det,N_det,N_states,i_H_chi_array,i_H_phi_array) + ,N_det,psi_det_size,N_states,i_H_chi_array,i_H_phi_array) ! print*,i_H_chi_array(1,1),i_H_phi_array(1,1) do istate=1,N_states do ll = 0,3 - res_l(ll)+=i_H_chi_array(ll,istate)*psi_r_coef_bi_ortho(mu,istate)*phase - res_r(ll)+=i_H_phi_array(ll,istate)*psi_l_coef_bi_ortho(mu,istate)*phase + res_l(ll)+=i_H_phi_array(ll,istate)*psi_l_coef_bi_ortho(mu,istate)*phase + res_r(ll)+=i_H_chi_array(ll,istate)*psi_r_coef_bi_ortho(mu,istate)*phase enddo end do end if diff --git a/src/tc_bi_ortho/tc_prop.irp.f b/src/tc_bi_ortho/tc_prop.irp.f index 5bb0e2c0..a13dc9a2 100644 --- a/src/tc_bi_ortho/tc_prop.irp.f +++ b/src/tc_bi_ortho/tc_prop.irp.f @@ -29,7 +29,7 @@ tc_transition_matrix_mo_alpha(m,m,istate,jstate)+= psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) enddo do p = 1, n_occ_ab(2) ! browsing the beta electrons - m = occ(p,1) + m = occ(p,2) tc_transition_matrix_mo_beta(m,m,istate,jstate)+= psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) enddo else @@ -38,12 +38,14 @@ ! Single alpha h = exc(1,1,1) ! hole in psi_det(1,1,j) p = exc(1,2,1) ! particle in psi_det(1,1,j) - tc_transition_matrix_mo_alpha(p,h,istate,jstate)+= phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) + tc_transition_matrix_mo_alpha(p,h,istate,jstate)+= & + phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) else ! Single beta h = exc(1,1,2) ! hole in psi_det(1,1,j) p = exc(1,2,2) ! particle in psi_det(1,1,j) - tc_transition_matrix_mo_beta(p,h,istate,jstate)+= phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) + tc_transition_matrix_mo_beta(p,h,istate,jstate)+= & + phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) endif endif enddo From 64ee4eab75165e4fa283cdf03393c7e93d29f66c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jul 2023 15:13:01 +0200 Subject: [PATCH 36/74] Removed all vvv in CCSD --- src/ccsd/ccsd_space_orb_sub.irp.f | 4 +- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 43 ++-- src/utils_cc/mo_integrals_cc.irp.f | 323 ++++++++++++++++++++++++- 3 files changed, 335 insertions(+), 35 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 35e14313..e7b115bb 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -92,7 +92,7 @@ subroutine run_ccsd_space_orb call compute_H_vv_chol(nO,nV,tau_x,H_vv) call compute_H_vo_chol(nO,nV,t1,H_vo) - call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) else call compute_H_oo(nO,nV,t1,t2,tau,H_oo) @@ -588,8 +588,6 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) deallocate(W_vvov,T_vvoo) - - ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) !do beta = 1, nV diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index b804792f..99a4e426 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -186,14 +186,13 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) deallocate(X_ovov) integer :: iblock, block_size, nVmax - double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) - block_size = 8 - allocate(W_vvov(nV,nV,nO,block_size), T_vvoo(nV,nV,nO,nO)) + double precision, allocatable :: W_vvov(:,:,:,:), W_vvov_tmp(:,:,:,:), T_vvoo(:,:,:,:) + block_size = 16 + allocate(W_vvov(nV,nV,nO,block_size), W_vvov_tmp(nV,nO,nV,block_size), T_vvoo(nV,nV,nO,nO)) !$omp parallel & - !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & - !$omp private(b,beta,i,a) & - !$omp default(none) + !$omp private(u,i,b,a) & + !$omp default(shared) !$omp do do u = 1, nO do i = 1, nO @@ -204,26 +203,32 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo enddo enddo - !$omp end do nowait + !$omp end do !$omp end parallel do iblock = 1, nV, block_size nVmax = min(block_size,nV-iblock+1) + + call dgemm('T','N', nV*nO, nV*nVmax, cholesky_ao_num, 1.d0, & + cc_space_v_vo_chol , cholesky_ao_num, & + cc_space_v_vv_chol(1,1,iblock), cholesky_ao_num, & + 0.d0, W_vvov_tmp, nV*nO) + !$omp parallel & - !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau,nVmax,iblock) & !$omp private(b,i,a,beta) & - !$omp default(none) - !$omp do collapse(2) - do beta = iblock, iblock + nVmax - 1 + !$omp default(shared) + do beta = 1, nVmax do i = 1, nO + !$omp do do b = 1, nV do a = 1, nV - W_vvov(a,b,i,beta-iblock+1) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + W_vvov(a,b,i,beta) = 2d0 * W_vvov_tmp(a,i,b,beta) - W_vvov_tmp(b,i,a,beta) enddo enddo + !$omp end do nowait enddo enddo - !$omp end do nowait + !$omp barrier !$omp end parallel call dgemm('T','N',nO,nVmax,nO*nV*nV, & @@ -234,6 +239,7 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) deallocate(W_vvov,T_vvoo) + double precision, allocatable :: W_oovo(:,:,:,:) allocate(W_oovo(nO,nO,nV,nO)) @@ -462,7 +468,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) call compute_J1_chol(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & cc_space_v_vvoo,J1) call compute_K1_chol(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & - cc_space_v_ovov,cc_space_v_vvov,K1) + cc_space_v_ovov,K1) ! Residual !r2 = 0d0 @@ -1346,7 +1352,7 @@ end ! K1 -subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) +subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,K1) implicit none @@ -1354,7 +1360,7 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) double precision, intent(in) :: t1(nO, nV) double precision, intent(in) :: t2(nO, nO, nV, nV) double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV) - double precision, intent(in) :: v_vvov(nV,nV,nO,nV), v_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: v_ovoo(nO,nV,nO,nO) double precision, intent(out) :: K1(nO, nV, nO, nV) double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:) @@ -1412,11 +1418,6 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) double precision, allocatable :: K1tmp(:,:,:,:), t1v(:,:,:) allocate(K1tmp(nO,nO,nV,nV), t1v(cholesky_ao_num,nO,nO)) -! call dgemm('N','N',nO,nV*nO*nV,nV, & -! 1d0, t1 , size(t1,1), & -! v_vvov, size(v_vvov,1), & -! 1d0, K1 , size(K1,1)) - call dgemm('N','T', cholesky_ao_num*nO, nO, nV, 1.d0, & cc_space_v_ov_chol, cholesky_ao_num*nO, t1, nO, 0.d0, & t1v, cholesky_ao_num*nO) diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 62237229..a68ab8de 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -190,7 +190,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oooo,1) + n2 = size(cc_space_v_oooo,2) + n3 = size(cc_space_v_oooo,3) + n4 = size(cc_space_v_oooo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & + cc_space_v_oo_chol, cholesky_ao_num, & + cc_space_v_oo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_oooo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo) + endif END_PROVIDER @@ -200,7 +233,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_vooo,1) + n2 = size(cc_space_v_vooo,2) + n3 = size(cc_space_v_vooo,3) + n4 = size(cc_space_v_vooo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & + cc_space_v_vo_chol, cholesky_ao_num, & + cc_space_v_oo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_vooo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo) + endif END_PROVIDER @@ -210,7 +276,32 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovoo, (cc_nOa, cc_nVa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo) + + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_ovoo,1) + n2 = size(cc_space_v_ovoo,2) + n3 = size(cc_space_v_ovoo,3) + n4 = size(cc_space_v_ovoo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ovoo(i1,i2,i3,i4) = cc_space_v_vooo(i2,i1,i4,i3) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo) + endif END_PROVIDER @@ -220,7 +311,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oovo,1) + n2 = size(cc_space_v_oovo,2) + n3 = size(cc_space_v_oovo,3) + n4 = size(cc_space_v_oovo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_oovo(i1,i2,i3,i4) = cc_space_v_vooo(i3,i2,i1,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo) + endif END_PROVIDER @@ -230,7 +345,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oovo,1) + n2 = size(cc_space_v_oovo,2) + n3 = size(cc_space_v_oovo,3) + n4 = size(cc_space_v_oovo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ooov(i1,i2,i3,i4) = cc_space_v_ovoo(i1,i4,i3,i2) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov) + endif END_PROVIDER @@ -240,7 +379,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_vvoo,1) + n2 = size(cc_space_v_vvoo,2) + n3 = size(cc_space_v_vvoo,3) + n4 = size(cc_space_v_vvoo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & + cc_space_v_vo_chol, cholesky_ao_num, & + cc_space_v_vo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_vvoo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo) + endif END_PROVIDER @@ -250,7 +422,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_vovo,1) + n2 = size(cc_space_v_vovo,2) + n3 = size(cc_space_v_vovo,3) + n4 = size(cc_space_v_vovo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & + cc_space_v_vv_chol, cholesky_ao_num, & + cc_space_v_oo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_vovo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo) + endif END_PROVIDER @@ -260,7 +465,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_voov, (cc_nVa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_voov,1) + n2 = size(cc_space_v_voov,2) + n3 = size(cc_space_v_voov,3) + n4 = size(cc_space_v_voov,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_voov(i1,i2,i3,i4) = cc_space_v_vvoo(i1,i4,i3,i2) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov) + endif END_PROVIDER @@ -270,7 +499,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovvo, (cc_nOa, cc_nVa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_ovvo,1) + n2 = size(cc_space_v_ovvo,2) + n3 = size(cc_space_v_ovvo,3) + n4 = size(cc_space_v_ovvo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ovvo(i1,i2,i3,i4) = cc_space_v_vvoo(i3,i2,i1,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo) + endif END_PROVIDER @@ -280,7 +533,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_ovov,1) + n2 = size(cc_space_v_ovov,2) + n3 = size(cc_space_v_ovov,3) + n4 = size(cc_space_v_ovov,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ovov(i1,i2,i3,i4) = cc_space_v_vovo(i2,i1,i4,i3) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov) + endif END_PROVIDER @@ -290,7 +567,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oovv,1) + n2 = size(cc_space_v_oovv,2) + n3 = size(cc_space_v_oovv,3) + n4 = size(cc_space_v_oovv,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_oovv(i1,i2,i3,i4) = cc_space_v_vvoo(i3,i4,i1,i2) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv) + endif END_PROVIDER From 8c65e01eedebcf164e16ea78097d35ee42ca0b7e Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jul 2023 17:31:58 +0200 Subject: [PATCH 37/74] I/O in Cholesky --- src/ao_two_e_ints/EZFIO.cfg | 6 + src/ao_two_e_ints/cholesky.irp.f | 836 +++++++++++++------------ src/ccsd/ccsd_space_orb_sub.irp.f | 35 +- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 154 +++-- 4 files changed, 568 insertions(+), 463 deletions(-) diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index 9f523fca..9c017813 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -4,6 +4,12 @@ doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None +[io_ao_cholesky] +type: Disk_access +doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None + [ao_integrals_threshold] type: Threshold doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 128aa483..8b969174 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -14,412 +14,438 @@ BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, END_PROVIDER -BEGIN_PROVIDER [ integer, cholesky_ao_num ] + BEGIN_PROVIDER [ integer, cholesky_ao_num ] &BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, 1) ] - implicit none - BEGIN_DOC - ! Cholesky vectors in AO basis: (ik|a): - ! = (ik|jl) = sum_a (ik|a).(a|jl) - ! - ! Last dimension of cholesky_ao is cholesky_ao_num - END_DOC - - integer :: rank, ndim - double precision :: tau - double precision, pointer :: L(:,:), L_old(:,:) - - - double precision :: s - double precision, parameter :: dscale = 1.d0 - - double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) - integer, allocatable :: Lset(:), Dset(:), addr(:,:), LDmap(:), DLmap(:) - integer, allocatable :: Lset_rev(:), Dset_rev(:) - - integer :: i,j,k,m,p,q, qj, dj, p2, q2 - integer :: N, np, nq - - double precision :: Dmax, Dmin, Qmax, f - double precision, external :: get_ao_two_e_integral - logical, external :: ao_two_e_integral_zero - - double precision, external :: ao_two_e_integral - integer :: block_size, iblock, ierr - - integer(omp_lock_kind), allocatable :: lock(:) - - double precision :: rss - double precision, external :: memory_of_double, memory_of_int - - - PROVIDE nucl_coord - - if (.not.do_direct_integrals) then - PROVIDE ao_two_e_integrals_in_map - endif - deallocate(cholesky_ao) - - ndim = ao_num*ao_num - tau = ao_cholesky_threshold - - rss = 6.d0 * memory_of_double(ndim) + & - 6.d0 * memory_of_int(ndim) - call check_mem(rss, irp_here) - - allocate(L(ndim,1)) - - print *, '' - print *, 'Cholesky decomposition of AO integrals' - print *, '======================================' - print *, '' - print *, '============ =============' - print *, ' Rank Threshold' - print *, '============ =============' - - - rank = 0 - - allocate( D(ndim), Lset(ndim), LDmap(ndim), DLmap(ndim), Dset(ndim) ) - allocate( Lset_rev(ndim), Dset_rev(ndim), lock(ndim) ) - allocate( addr(3,ndim) ) - do k=1,ndim - call omp_init_lock(lock(k)) - enddo - - ! 1. - k=0 - do j=1,ao_num - do i=1,ao_num - k = k+1 - addr(1,k) = i - addr(2,k) = j - addr(3,k) = (i-1)*ao_num + j - enddo - enddo - - if (do_direct_integrals) then - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) - do i=1,ndim - D(i) = ao_two_e_integral(addr(1,i), addr(2,i), & - addr(1,i), addr(2,i)) - enddo - !$OMP END PARALLEL DO - else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) - do i=1,ndim - D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & - addr(2,i), addr(2,i), & - ao_integrals_map) - enddo - !$OMP END PARALLEL DO - endif - - Dmax = maxval(D) - - ! 2. - np=0 - Lset_rev = 0 - do p=1,ndim - if ( dscale*dscale*Dmax*D(p) > tau*tau ) then - np = np+1 - Lset(np) = p - Lset_rev(p) = np - endif - enddo - - ! 3. - N = 0 - - ! 4. - i = 0 - - ! 5. - do while ( (Dmax > tau).and.(rank < ndim) ) - ! a. - i = i+1 - - s = 0.1d0 - - ! Inrease s until the arrays fit in memory - do - - ! b. - Dmin = max(s*Dmax,tau) - - ! c. - nq=0 - LDmap = 0 - DLmap = 0 - Dset_rev = 0 - do p=1,np - if ( D(Lset(p)) > Dmin ) then - nq = nq+1 - Dset(nq) = Lset(p) - Dset_rev(Dset(nq)) = nq - LDmap(p) = nq - DLmap(nq) = p - endif - enddo - - call resident_memory(rss) - rss = rss & - + np*memory_of_double(nq) & ! Delta(np,nq) - + (rank+nq)* memory_of_double(ndim) & ! L(ndim,rank+nq) - + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) - ! Ltmp_q(nq,block_size) - - if (rss > qp_max_mem) then - s = s*2.d0 - else - exit - endif - - if ((s > 1.d0).or.(nq == 0)) then - print *, 'Not enough memory. Reduce cholesky threshold' - stop -1 - endif - - enddo - - ! d., e. - block_size = max(N,24) - - L_old => L - allocate(L(ndim,rank+nq), stat=ierr) - if (ierr /= 0) then - print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' - stop -1 - endif - - !$OMP PARALLEL DO PRIVATE(k) - do k=1,rank - L(:,k) = L_old(:,k) - enddo - !$OMP END PARALLEL DO - - deallocate(L_old) - - allocate(Delta(np,nq), stat=ierr) - if (ierr /= 0) then - print *, irp_here, ': allocation failed : (Delta(np,nq))' - stop -1 - endif - - allocate(Ltmp_p(np,block_size), stat=ierr) - if (ierr /= 0) then - print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' - stop -1 - endif - - allocate(Ltmp_q(nq,block_size), stat=ierr) - if (ierr /= 0) then - print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' - stop -1 - endif - - - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) - - !$OMP DO - do q=1,nq - Delta(:,q) = 0.d0 - enddo - !$OMP ENDDO NOWAIT - - !$OMP DO - do k=1,N - do p=1,np - Ltmp_p(p,k) = L(Lset(p),k) - enddo - do q=1,nq - Ltmp_q(q,k) = L(Dset(q),k) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP BARRIER - - !$OMP DO SCHEDULE(guided) - do m=1,nq - - call omp_set_lock(lock(m)) - do k=1,np - ! Apply only to (k,m) pairs where k is not in Dset - if (LDmap(k) /= 0) cycle - q = Lset_rev(addr(3,Lset(k))) - if ((0 < q).and.(q < k)) cycle - if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)), & - addr(2,Lset(k)), addr(2,Dset(m)) ) ) then - if (do_direct_integrals) then - Delta(k,m) = ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)), & - addr(1,Dset(m)), addr(2,Dset(m))) - else - Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), & - addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + implicit none + BEGIN_DOC + ! Cholesky vectors in AO basis: (ik|a): + ! = (ik|jl) = sum_a (ik|a).(a|jl) + ! + ! Last dimension of cholesky_ao is cholesky_ao_num + END_DOC + + integer :: rank, ndim + double precision :: tau + double precision, pointer :: L(:,:), L_old(:,:) + + + double precision :: s + double precision, parameter :: dscale = 1.d0 + + double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) + integer, allocatable :: Lset(:), Dset(:), addr(:,:), LDmap(:), DLmap(:) + integer, allocatable :: Lset_rev(:), Dset_rev(:) + + integer :: i,j,k,m,p,q, qj, dj, p2, q2 + integer :: N, np, nq + + double precision :: Dmax, Dmin, Qmax, f + double precision, external :: get_ao_two_e_integral + logical, external :: ao_two_e_integral_zero + + double precision, external :: ao_two_e_integral + integer :: block_size, iblock, ierr + + integer(omp_lock_kind), allocatable :: lock(:) + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + + integer, external :: getUnitAndOpen + integer :: iunit + + ndim = ao_num*ao_num + deallocate(cholesky_ao) + + if (read_ao_cholesky) then + print *, 'Reading Cholesky vectors from disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'R') + read(iunit) rank + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) + read(iunit) cholesky_ao + close(iunit) + cholesky_ao_num = rank + + else + + PROVIDE nucl_coord + + if (.not.do_direct_integrals) then + PROVIDE ao_two_e_integrals_in_map + endif + + tau = ao_cholesky_threshold + + rss = 6.d0 * memory_of_double(ndim) + & + 6.d0 * memory_of_int(ndim) + call check_mem(rss, irp_here) + + allocate(L(ndim,1)) + + print *, '' + print *, 'Cholesky decomposition of AO integrals' + print *, '======================================' + print *, '' + print *, '============ =============' + print *, ' Rank Threshold' + print *, '============ =============' + + + rank = 0 + + allocate( D(ndim), Lset(ndim), LDmap(ndim), DLmap(ndim), Dset(ndim) ) + allocate( Lset_rev(ndim), Dset_rev(ndim), lock(ndim) ) + allocate( addr(3,ndim) ) + do k=1,ndim + call omp_init_lock(lock(k)) + enddo + + ! 1. + k=0 + do j=1,ao_num + do i=1,ao_num + k = k+1 + addr(1,k) = i + addr(2,k) = j + addr(3,k) = (i-1)*ao_num + j + enddo + enddo + + if (do_direct_integrals) then + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + do i=1,ndim + D(i) = ao_two_e_integral(addr(1,i), addr(2,i), & + addr(1,i), addr(2,i)) + enddo + !$OMP END PARALLEL DO + else + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) + do i=1,ndim + D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & + addr(2,i), addr(2,i), & + ao_integrals_map) + enddo + !$OMP END PARALLEL DO + endif + + Dmax = maxval(D) + + ! 2. + np=0 + Lset_rev = 0 + do p=1,ndim + if ( dscale*dscale*Dmax*D(p) > tau*tau ) then + np = np+1 + Lset(np) = p + Lset_rev(p) = np + endif + enddo + + ! 3. + N = 0 + + ! 4. + i = 0 + + ! 5. + do while ( (Dmax > tau).and.(rank < ndim) ) + ! a. + i = i+1 + + s = 0.1d0 + + ! Inrease s until the arrays fit in memory + do while (.True.) + + ! b. + Dmin = max(s*Dmax,tau) + + ! c. + nq=0 + LDmap = 0 + DLmap = 0 + Dset_rev = 0 + do p=1,np + if ( D(Lset(p)) > Dmin ) then + nq = nq+1 + Dset(nq) = Lset(p) + Dset_rev(Dset(nq)) = nq + LDmap(p) = nq + DLmap(nq) = p endif - if (q /= 0) Delta(q,m) = Delta(k,m) - endif - enddo - - j = Dset_rev(addr(3,Dset(m))) - if ((0 < j).and.(j < m)) then - call omp_unset_lock(lock(m)) - cycle - endif - - if ((j /= m).and.(j /= 0)) then - call omp_set_lock(lock(j)) - endif - do k=1,nq - ! Apply only to (k,m) pairs both in Dset - p = DLmap(k) - q = Lset_rev(addr(3,Dset(k))) - if ((0 < q).and.(q < p)) cycle - if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), & - addr(2,Dset(k)), addr(2,Dset(m)) ) ) then - if (do_direct_integrals) then - Delta(p,m) = ao_two_e_integral(addr(1,Dset(k)), addr(2,Dset(k)), & - addr(1,Dset(m)), addr(2,Dset(m))) - else - Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), & - addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) - endif - if (q /= 0) Delta(q,m) = Delta(p,m) - if (j /= 0) Delta(p,j) = Delta(p,m) - if (q*j /= 0) Delta(q,j) = Delta(p,m) - endif - enddo - call omp_unset_lock(lock(m)) - if ((j /= m).and.(j /= 0)) then - call omp_unset_lock(lock(j)) - endif - enddo - !$OMP END DO - - !$OMP END PARALLEL - - if (N>0) then - call dgemm('N','T', np, nq, N, -1.d0, & - Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) - endif - - ! f. - Qmax = D(Dset(1)) - do q=1,nq - Qmax = max(Qmax, D(Dset(q))) - enddo - - ! g. - - iblock = 0 - do j=1,nq - - if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit - ! i. - rank = N+j - - if (iblock == block_size) then - call dgemm('N','T',np,nq,block_size,-1.d0, & - Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) - iblock = 0 - endif - - ! ii. - do dj=1,nq - qj = Dset(dj) - if (D(qj) == Qmax) then - exit - endif - enddo - - L(1:ndim, rank) = 0.d0 - - iblock = iblock+1 - do p=1,np - Ltmp_p(p,iblock) = Delta(p,dj) - enddo - - ! iv. - if (iblock > 1) then - call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0, & - Ltmp_p(1,iblock), 1) - endif - - ! iii. - f = 1.d0/dsqrt(Qmax) - - !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) - !$OMP DO - do p=1,np - Ltmp_p(p,iblock) = Ltmp_p(p,iblock) * f - L(Lset(p), rank) = Ltmp_p(p,iblock) - D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,iblock) * Ltmp_p(p,iblock) - enddo - !$OMP END DO - - !$OMP DO - do q=1,nq - Ltmp_q(q,iblock) = L(Dset(q), rank) - enddo - !$OMP END DO - - !$OMP END PARALLEL - - Qmax = D(Dset(1)) - do q=1,nq - Qmax = max(Qmax, D(Dset(q))) - enddo - - enddo - - print '(I10, 4X, ES12.3)', rank, Qmax - - deallocate(Delta, stat=ierr) - deallocate(Ltmp_p, stat=ierr) - deallocate(Ltmp_q, stat=ierr) - - ! i. - N = rank - - ! j. - Dmax = D(Lset(1)) - do p=1,np - Dmax = max(Dmax, D(Lset(p))) - enddo - - np=0 - Lset_rev = 0 - do p=1,ndim - if ( dscale*dscale*Dmax*D(p) > tau*tau ) then - np = np+1 - Lset(np) = p - Lset_rev(p) = np - endif - enddo - - enddo - - do k=1,ndim - call omp_destroy_lock(lock(k)) - enddo - - allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) - if (ierr /= 0) then - print *, irp_here, ': Allocation failed' - stop -1 - endif - !$OMP PARALLEL DO PRIVATE(k) - do k=1,rank - call dcopy(ndim, L(1,k), 1, cholesky_ao(1,1,k), 1) - enddo - !$OMP END PARALLEL DO - deallocate(L) - cholesky_ao_num = rank - - print *, '============ =============' - print *, '' - print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' - print *, '' + enddo + + call resident_memory(rss) + rss = rss & + + np*memory_of_double(nq) &! Delta(np,nq) + + (rank+nq)* memory_of_double(ndim) &! L(ndim,rank+nq) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + ! Ltmp_q(nq,block_size) + + if (rss > qp_max_mem) then + s = s*2.d0 + else + exit + endif + + if ((s > 1.d0).or.(nq == 0)) then + print *, 'Not enough memory. Reduce cholesky threshold' + stop -1 + endif + + enddo + + ! d., e. + block_size = max(N,24) + + L_old => L + allocate(L(ndim,rank+nq), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' + stop -1 + endif + + !$OMP PARALLEL DO PRIVATE(k) + do k=1,rank + L(:,k) = L_old(:,k) + enddo + !$OMP END PARALLEL DO + + deallocate(L_old) + + allocate(Delta(np,nq), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Delta(np,nq))' + stop -1 + endif + + allocate(Ltmp_p(np,block_size), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' + stop -1 + endif + + allocate(Ltmp_q(nq,block_size), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' + stop -1 + endif + + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) + + !$OMP DO + do q=1,nq + Delta(:,q) = 0.d0 + enddo + !$OMP ENDDO NOWAIT + + !$OMP DO + do k=1,N + do p=1,np + Ltmp_p(p,k) = L(Lset(p),k) + enddo + do q=1,nq + Ltmp_q(q,k) = L(Dset(q),k) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP BARRIER + + !$OMP DO SCHEDULE(guided) + do m=1,nq + + call omp_set_lock(lock(m)) + do k=1,np + ! Apply only to (k,m) pairs where k is not in Dset + if (LDmap(k) /= 0) cycle + q = Lset_rev(addr(3,Lset(k))) + if ((0 < q).and.(q < k)) cycle + if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)),& + addr(2,Lset(k)), addr(2,Dset(m)) ) ) then + if (do_direct_integrals) then + Delta(k,m) = ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)),& + addr(1,Dset(m)), addr(2,Dset(m))) + else + Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)),& + addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + endif + if (q /= 0) Delta(q,m) = Delta(k,m) + endif + enddo + + j = Dset_rev(addr(3,Dset(m))) + if ((0 < j).and.(j < m)) then + call omp_unset_lock(lock(m)) + cycle + endif + + if ((j /= m).and.(j /= 0)) then + call omp_set_lock(lock(j)) + endif + do k=1,nq + ! Apply only to (k,m) pairs both in Dset + p = DLmap(k) + q = Lset_rev(addr(3,Dset(k))) + if ((0 < q).and.(q < p)) cycle + if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)),& + addr(2,Dset(k)), addr(2,Dset(m)) ) ) then + if (do_direct_integrals) then + Delta(p,m) = ao_two_e_integral(addr(1,Dset(k)), addr(2,Dset(k)),& + addr(1,Dset(m)), addr(2,Dset(m))) + else + Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)),& + addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) + endif + if (q /= 0) Delta(q,m) = Delta(p,m) + if (j /= 0) Delta(p,j) = Delta(p,m) + if (q*j /= 0) Delta(q,j) = Delta(p,m) + endif + enddo + call omp_unset_lock(lock(m)) + if ((j /= m).and.(j /= 0)) then + call omp_unset_lock(lock(j)) + endif + enddo + !$OMP END DO + + !$OMP END PARALLEL + + if (N>0) then + call dgemm('N','T', np, nq, N, -1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + endif + + ! f. + Qmax = D(Dset(1)) + do q=1,nq + Qmax = max(Qmax, D(Dset(q))) + enddo + + ! g. + + iblock = 0 + do j=1,nq + + if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit + ! i. + rank = N+j + + if (iblock == block_size) then + call dgemm('N','T',np,nq,block_size,-1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + iblock = 0 + endif + + ! ii. + do dj=1,nq + qj = Dset(dj) + if (D(qj) == Qmax) then + exit + endif + enddo + + L(1:ndim, rank) = 0.d0 + + iblock = iblock+1 + do p=1,np + Ltmp_p(p,iblock) = Delta(p,dj) + enddo + + ! iv. + if (iblock > 1) then + call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0,& + Ltmp_p(1,iblock), 1) + endif + + ! iii. + f = 1.d0/dsqrt(Qmax) + + !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) + !$OMP DO + do p=1,np + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) * f + L(Lset(p), rank) = Ltmp_p(p,iblock) + D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,iblock) * Ltmp_p(p,iblock) + enddo + !$OMP END DO + + !$OMP DO + do q=1,nq + Ltmp_q(q,iblock) = L(Dset(q), rank) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + Qmax = D(Dset(1)) + do q=1,nq + Qmax = max(Qmax, D(Dset(q))) + enddo + + enddo + + print '(I10, 4X, ES12.3)', rank, Qmax + + deallocate(Delta, stat=ierr) + deallocate(Ltmp_p, stat=ierr) + deallocate(Ltmp_q, stat=ierr) + + ! i. + N = rank + + ! j. + Dmax = D(Lset(1)) + do p=1,np + Dmax = max(Dmax, D(Lset(p))) + enddo + + np=0 + Lset_rev = 0 + do p=1,ndim + if ( dscale*dscale*Dmax*D(p) > tau*tau ) then + np = np+1 + Lset(np) = p + Lset_rev(p) = np + endif + enddo + + enddo + + do k=1,ndim + call omp_destroy_lock(lock(k)) + enddo + + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': Allocation failed' + stop -1 + endif + !$OMP PARALLEL DO PRIVATE(k) + do k=1,rank + call dcopy(ndim, L(1,k), 1, cholesky_ao(1,1,k), 1) + enddo + !$OMP END PARALLEL DO + deallocate(L) + cholesky_ao_num = rank + + print *, '============ =============' + print *, '' + + if (write_ao_cholesky) then + print *, 'Writing Cholesky vectors to disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'W') + write(iunit) rank + write(iunit) cholesky_ao + close(iunit) + call ezfio_set_ao_two_e_ints_io_ao_cholesky('Read') + endif + endif + + print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' + print *, '' + END_PROVIDER - + diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index e7b115bb..f97514cd 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -49,9 +49,34 @@ subroutine run_ccsd_space_orb allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) if (cc_update_method == 'diis') then - allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) - all_err = 0d0 - all_t = 0d0 + double precision :: rss, diis_mem, extra_mem + double precision, external :: memory_of_double + diis_mem = 2.d0*memory_of_double(nO*nV)*(1.d0+nO*nV) + call resident_memory(rss) + do while (cc_diis_depth > 1) + if (rss + diis_mem * cc_diis_depth > qp_max_mem) then + cc_diis_depth = cc_diis_depth - 1 + else + exit + endif + end do + if (cc_diis_depth <= 1) then + print *, 'Not enough memory for DIIS' + stop -1 + endif + print *, 'DIIS size ', cc_diis_depth + + allocate(all_err(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth), all_t(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth)) + !$OMP PARALLEL PRIVATE(i,j) DEFAULT(SHARED) + do j=1,cc_diis_depth + !$OMP DO + do i=1, size(all_err,1) + all_err(i,j) = 0d0 + all_t(i,j) = 0d0 + enddo + !$OMP END DO NOWAIT + enddo + !$OMP END PARALLEL endif if (elec_alpha_num /= elec_beta_num) then @@ -1427,7 +1452,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !enddo !$omp parallel & - !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & + !$omp shared(nO,nV,K1,X_ovov,Y_ovov,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) !$omp do @@ -1447,7 +1472,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do v = 1, nO do a = 1, nV do i = 1, nO - Z_ovov(i,a,v,beta) = t2(i,v,beta,a) + Y_ovov(i,a,v,beta) = t2(i,v,beta,a) enddo enddo enddo diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 99a4e426..1c56996e 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -454,21 +454,8 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2 ! internal - double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:) - double precision, allocatable :: A1(:,:,:,:) integer :: u,v,i,j,beta,gam,a,b - - allocate(g_occ(nO,nO), g_vir(nV,nV)) - allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV)) - allocate(A1(nO,nO,nO,nO)) - - call compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) - call compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) - call compute_A1_chol(nO,nV,t1,t2,tau,A1) - call compute_J1_chol(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & - cc_space_v_vvoo,J1) - call compute_K1_chol(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & - cc_space_v_ovov,K1) + double precision :: max_r2_local ! Residual !r2 = 0d0 @@ -490,36 +477,47 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel + double precision, allocatable :: A1(:,:,:,:) + allocate(A1(nO,nO,nO,nO)) + call compute_A1_chol(nO,nV,t1,t2,tau,A1) call dgemm('N','N',nO*nO,nV*nV,nO*nO, & 1d0, A1, size(A1,1) * size(A1,2), & tau, size(tau,1) * size(tau,2), & 1d0, r2, size(r2,1) * size(r2,2)) + deallocate(A1) integer :: block_size, iblock, k block_size = 16 double precision, dimension(:,:,:), allocatable :: B1, tmp_cc, tmpB1 + double precision, dimension(:,:), allocatable :: tmp_cc2 allocate(tmp_cc(cholesky_ao_num,nV,nV)) call dgemm('N','N', cholesky_ao_num*nV, nV, nO, 1.d0, & cc_space_v_vo_chol, cholesky_ao_num*nV, t1, nO, 0.d0, tmp_cc, cholesky_ao_num*nV) - !$OMP PARALLEL PRIVATE(gam, iblock, B1, tmpB1, beta, b, a) - allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV)) + call set_multiple_levels_omp(.False.) + + !$OMP PARALLEL PRIVATE(gam, iblock, B1, tmpB1, tmp_cc2, beta, b, a) + allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV), tmp_cc2(cholesky_ao_num,nV)) !$OMP DO do gam = 1, nV do iblock = 1, nV, block_size - call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, & - -1.d0, cc_space_v_vv_chol(1,1,iblock), cholesky_ao_num, & - tmp_cc(1,1,gam), cholesky_ao_num, 0.d0, tmpB1, nV*block_size) call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, & -1.d0, tmp_cc(1,1,iblock), cholesky_ao_num, & - cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, 1.d0, tmpB1, nV*block_size) + cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, & + 0.d0, tmpB1, nV*block_size) + + do a=1,nV + do k=1,cholesky_ao_num + tmp_cc2(k,a) = cc_space_v_vv_chol(k,a,gam) - tmp_cc(k,a,gam) + enddo + enddo call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, 1.d0, & cc_space_v_vv_chol(1,1,iblock), cholesky_ao_num, & - cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, 1.d0, & - tmpB1, nV*block_size) + tmp_cc2, cholesky_ao_num, & + 1.d0, tmpB1, nV*block_size) do beta = iblock, min(nV, iblock+block_size-1) do b = 1, nV @@ -538,15 +536,14 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$OMP ENDDO - deallocate(B1, tmpB1) + deallocate(B1, tmpB1, tmp_cc2) !$OMP END PARALLEL deallocate(tmp_cc) - double precision, allocatable :: X_oovv(:,:,:,:),Y_oovv(:,:,:,:) - allocate(X_oovv(nO,nO,nV,nV),Y_oovv(nO,nO,nV,nV)) - + double precision, allocatable :: X_oovv(:,:,:,:) + allocate(X_oovv(nO,nO,nV,nV)) !$omp parallel & !$omp shared(nO,nV,t2,X_oovv) & !$omp private(u,v,gam,a) & @@ -564,10 +561,19 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel + double precision, allocatable :: g_vir(:,:) + allocate(g_vir(nV,nV)) + call compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) + + double precision, allocatable :: Y_oovv(:,:,:,:) + allocate(Y_oovv(nO,nO,nV,nV)) + call dgemm('N','N',nO*nO*nV,nV,nV, & 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & g_vir, size(g_vir,1), & 0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3)) + deallocate(g_vir) + deallocate(X_oovv) !$omp parallel & !$omp shared(nO,nV,r2,Y_oovv) & @@ -585,11 +591,18 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel + deallocate(Y_oovv) + double precision, allocatable :: g_occ(:,:) + allocate(g_occ(nO,nO)) + call compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) + + allocate(X_oovv(nO,nO,nV,nV)) call dgemm('N','N',nO,nO*nV*nV,nO, & 1d0, g_occ , size(g_occ,1), & t2 , size(t2,1), & 0d0, X_oovv, size(X_oovv,1)) + deallocate(g_occ) !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & @@ -613,6 +626,8 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) double precision, allocatable :: X_vovv(:,:,:,:) allocate(X_vovv(nV,nO,nV,block_size)) + allocate(Y_oovv(nO,nO,nV,nV)) + do iblock = 1, nV, block_size do gam = iblock, min(nV, iblock+block_size-1) call dgemm('T','N',nV, nO*nV, cholesky_ao_num, 1.d0, & @@ -626,6 +641,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 0d0, Y_oovv(1,1,1,iblock), size(Y_oovv,1)) enddo + deallocate(X_vovv) !$omp parallel & !$omp shared(nO,nV,r2,Y_oovv) & @@ -643,6 +659,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel + deallocate(Y_oovv) double precision, allocatable :: X_ovvo(:,:,:,:) double precision, allocatable :: tcc(:,:,:), tcc2(:,:,:) @@ -693,6 +710,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) deallocate(X_ovvo) !----- + allocate(X_oovv(nO,nO,nV,nV)) call dgemm('N','N',nO*nO*nV,nV,nO, & @@ -716,9 +734,10 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel + deallocate(X_oovv) double precision, allocatable :: X_vovo(:,:,:,:), Y_oovo(:,:,:,:) - allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) + allocate(X_vovo(nV,nO,nV,nO)) !$omp parallel & !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & @@ -737,15 +756,19 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end parallel + allocate(Y_oovo(nO,nO,nV,nO)) call dgemm('N','N',nO,nO*nV*nO,nV, & 1d0, t1, size(t1,1), & X_vovo, size(X_vovo,1), & 0d0, Y_oovo, size(Y_oovo,1)) + deallocate(X_vovo) + allocate(X_oovv(nO,nO,nV,nV)) call dgemm('N','N',nO*nO*nV, nV, nO, & 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & t1 , size(t1,1), & 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + deallocate(Y_oovo) !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & @@ -763,15 +786,23 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel + deallocate(X_oovv) - deallocate(X_vovo,Y_oovo) - double precision, allocatable :: Y_voov(:,:,:,:), Z_ovov(:,:,:,:) - allocate(X_ovvo(nO,nV,nV,nO), Y_voov(nV,nO,nO,nV),Z_ovov(nO,nV,nO,nV)) + double precision, allocatable :: J1(:,:,:,:) + allocate(J1(nO,nV,nV,nO)) + call compute_J1_chol(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & + cc_space_v_vvoo,J1) + + double precision, allocatable :: K1(:,:,:,:) + allocate(K1(nO,nV,nO,nV)) + call compute_K1_chol(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & + cc_space_v_ovov,K1) + + allocate(X_ovvo(nO,nV,nV,nO)) !$omp parallel & - !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & !$omp private(u,v,gam,beta,i,a) & - !$omp default(none) + !$omp default(shared) do i = 1, nO !$omp do do a = 1, nV @@ -783,7 +814,15 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do nowait enddo + !$omp end parallel + deallocate(J1) + double precision, allocatable :: Y_voov(:,:,:,:) + allocate(Y_voov(nV,nO,nO,nV)) + + !$omp parallel & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(shared) !$omp do do gam = 1, nV do v = 1, nO @@ -797,11 +836,16 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel + double precision, allocatable :: Z_ovov(:,:,:,:) + allocate(Z_ovov(nO,nV,nO,nV)) + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & Y_voov, size(Y_voov,1) * size(Y_voov,2), & 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) + deallocate(X_ovvo,Y_voov) + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & @@ -819,10 +863,11 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - deallocate(X_ovvo,Y_voov) + deallocate(Z_ovov) double precision, allocatable :: Y_ovov(:,:,:,:), X_ovov(:,:,:,:) - allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) + allocate(X_ovov(nO,nV,nO,nV)) + allocate(Y_ovov(nO,nV,nO,nV)) !$omp parallel & !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & @@ -853,10 +898,12 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel + allocate(Z_ovov(nO,nV,nO,nV)) call dgemm('T','N',nO*nV,nO*nV,nO*nV, & 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + deallocate(X_ovov, Y_ovov) !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & @@ -874,9 +921,11 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel + deallocate(Z_ovov) + allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) !$omp parallel & - !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & + !$omp shared(nO,nV,K1,X_ovov,Y_ovov,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) !$omp do @@ -896,7 +945,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do v = 1, nO do a = 1, nV do i = 1, nO - Z_ovov(i,a,v,beta) = t2(i,v,beta,a) + Y_ovov(i,a,v,beta) = t2(i,v,beta,a) enddo enddo enddo @@ -904,11 +953,16 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel + deallocate(K1) + + allocate(Z_ovov(nO,nV,nO,nV)) call dgemm('N','N',nO*nV,nO*nV,nO*nV, & 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + deallocate(X_ovov,Y_ovov) + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & @@ -926,39 +980,33 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - deallocate(X_ovov,Y_ovov,Z_ovov) + deallocate(Z_ovov) ! Change the sign for consistency with the code in spin orbitals + + max_r2 = 0d0 !$omp parallel & - !$omp shared(nO,nV,r2) & - !$omp private(i,j,a,b) & + !$omp shared(nO,nV,r2,max_r2) & + !$omp private(i,j,a,b,max_r2_local) & !$omp default(none) + max_r2_local = 0.d0 !$omp do do b = 1, nV do a = 1, nV do j = 1, nO do i = 1, nO r2(i,j,a,b) = -r2(i,j,a,b) + max_r2_local = max(r2(i,j,a,b), max_r2_local) enddo enddo enddo enddo - !$omp end do + !$omp end do nowait + !$omp critical + max_r2 = max(max_r2, max_r2_local) + !$omp end critical !$omp end parallel - max_r2 = 0d0 - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - max_r2 = max(r2(i,j,a,b), max_r2) - enddo - enddo - enddo - enddo - - deallocate(g_occ,g_vir,J1,K1,A1) - end ! A1 From 9e833cc47627e819e60e05faace5fabb3540f760 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jul 2023 22:17:40 +0200 Subject: [PATCH 38/74] Improve memory control --- src/ao_two_e_ints/cholesky.irp.f | 35 +++++++++++++++++++++----------- src/utils/memory.irp.f | 13 ++++++------ 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 8b969174..4bf60847 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -48,7 +48,7 @@ END_PROVIDER integer(omp_lock_kind), allocatable :: lock(:) - double precision :: rss + double precision :: mem double precision, external :: memory_of_double, memory_of_int integer, external :: getUnitAndOpen @@ -70,16 +70,22 @@ END_PROVIDER PROVIDE nucl_coord - if (.not.do_direct_integrals) then + if (do_direct_integrals) then + if (ao_two_e_integral(1,1,1,1) < huge(1.d0)) then + ! Trigger providers inside ao_two_e_integral + continue + endif + else PROVIDE ao_two_e_integrals_in_map endif tau = ao_cholesky_threshold - rss = 6.d0 * memory_of_double(ndim) + & - 6.d0 * memory_of_int(ndim) - call check_mem(rss, irp_here) + mem = 6.d0 * memory_of_double(ndim) + 6.d0 * memory_of_int(ndim) + call check_mem(mem, irp_here) + call print_memory_usage() + allocate(L(ndim,1)) print *, '' @@ -112,7 +118,7 @@ END_PROVIDER enddo if (do_direct_integrals) then - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) do i=1,ndim D(i) = ao_two_e_integral(addr(1,i), addr(2,i), & addr(1,i), addr(2,i)) @@ -175,20 +181,20 @@ END_PROVIDER endif enddo - call resident_memory(rss) - rss = rss & + call total_memory(mem) + mem = mem & + np*memory_of_double(nq) &! Delta(np,nq) + (rank+nq)* memory_of_double(ndim) &! L(ndim,rank+nq) - + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) - ! Ltmp_q(nq,block_size) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) - if (rss > qp_max_mem) then + if (mem > qp_max_mem) then s = s*2.d0 else exit endif if ((s > 1.d0).or.(nq == 0)) then + call print_memory_usage() print *, 'Not enough memory. Reduce cholesky threshold' stop -1 endif @@ -201,6 +207,7 @@ END_PROVIDER L_old => L allocate(L(ndim,rank+nq), stat=ierr) if (ierr /= 0) then + call print_memory_usage() print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' stop -1 endif @@ -215,18 +222,21 @@ END_PROVIDER allocate(Delta(np,nq), stat=ierr) if (ierr /= 0) then + call print_memory_usage() print *, irp_here, ': allocation failed : (Delta(np,nq))' stop -1 endif allocate(Ltmp_p(np,block_size), stat=ierr) if (ierr /= 0) then + call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' stop -1 endif allocate(Ltmp_q(nq,block_size), stat=ierr) if (ierr /= 0) then + call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' stop -1 endif @@ -253,7 +263,7 @@ END_PROVIDER !$OMP BARRIER - !$OMP DO SCHEDULE(guided) + !$OMP DO SCHEDULE(dynamic) do m=1,nq call omp_set_lock(lock(m)) @@ -419,6 +429,7 @@ END_PROVIDER allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) if (ierr /= 0) then + call print_memory_usage() print *, irp_here, ': Allocation failed' stop -1 endif diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index 7da283ec..41ec0428 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -99,16 +99,15 @@ subroutine check_mem(rss_in,routine) END_DOC double precision, intent(in) :: rss_in character*(*) :: routine - double precision :: rss - !$OMP CRITICAL - call resident_memory(rss) - rss += rss_in - if (int(rss)+1 > qp_max_mem) then + double precision :: mem + call total_memory(mem) + mem += rss_in + if (mem > qp_max_mem) then + call print_memory_usage() print *, 'Not enough memory: aborting in ', routine - print *, int(rss)+1, ' GB required' + print *, mem, ' GB required' stop -1 endif - !$OMP END CRITICAL end subroutine print_memory_usage() From 349f956e1cd8c6af519718b3043d4f8fd26b7f4f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jul 2023 22:31:51 +0200 Subject: [PATCH 39/74] Super fast cholesky --- src/ao_two_e_ints/cholesky.irp.f | 92 +++++++++++--------------------- 1 file changed, 30 insertions(+), 62 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 4bf60847..7d02d27f 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -35,6 +35,7 @@ END_PROVIDER double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) integer, allocatable :: Lset(:), Dset(:), addr(:,:), LDmap(:), DLmap(:) integer, allocatable :: Lset_rev(:), Dset_rev(:) + logical, allocatable :: computed(:) integer :: i,j,k,m,p,q, qj, dj, p2, q2 integer :: N, np, nq @@ -158,7 +159,7 @@ END_PROVIDER ! a. i = i+1 - s = 0.1d0 + s = 0.01d0 ! Inrease s until the arrays fit in memory do while (.True.) @@ -242,11 +243,14 @@ END_PROVIDER endif + allocate(computed(nq)) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) !$OMP DO do q=1,nq Delta(:,q) = 0.d0 + computed(q) = .False. enddo !$OMP ENDDO NOWAIT @@ -262,64 +266,6 @@ END_PROVIDER !$OMP END DO NOWAIT !$OMP BARRIER - - !$OMP DO SCHEDULE(dynamic) - do m=1,nq - - call omp_set_lock(lock(m)) - do k=1,np - ! Apply only to (k,m) pairs where k is not in Dset - if (LDmap(k) /= 0) cycle - q = Lset_rev(addr(3,Lset(k))) - if ((0 < q).and.(q < k)) cycle - if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)),& - addr(2,Lset(k)), addr(2,Dset(m)) ) ) then - if (do_direct_integrals) then - Delta(k,m) = ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)),& - addr(1,Dset(m)), addr(2,Dset(m))) - else - Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)),& - addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) - endif - if (q /= 0) Delta(q,m) = Delta(k,m) - endif - enddo - - j = Dset_rev(addr(3,Dset(m))) - if ((0 < j).and.(j < m)) then - call omp_unset_lock(lock(m)) - cycle - endif - - if ((j /= m).and.(j /= 0)) then - call omp_set_lock(lock(j)) - endif - do k=1,nq - ! Apply only to (k,m) pairs both in Dset - p = DLmap(k) - q = Lset_rev(addr(3,Dset(k))) - if ((0 < q).and.(q < p)) cycle - if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)),& - addr(2,Dset(k)), addr(2,Dset(m)) ) ) then - if (do_direct_integrals) then - Delta(p,m) = ao_two_e_integral(addr(1,Dset(k)), addr(2,Dset(k)),& - addr(1,Dset(m)), addr(2,Dset(m))) - else - Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)),& - addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) - endif - if (q /= 0) Delta(q,m) = Delta(p,m) - if (j /= 0) Delta(p,j) = Delta(p,m) - if (q*j /= 0) Delta(q,j) = Delta(p,m) - endif - enddo - call omp_unset_lock(lock(m)) - if ((j /= m).and.(j /= 0)) then - call omp_unset_lock(lock(j)) - endif - enddo - !$OMP END DO - !$OMP END PARALLEL if (N>0) then @@ -358,6 +304,27 @@ END_PROVIDER L(1:ndim, rank) = 0.d0 + if (.not.computed(dj)) then + m = dj + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(guided) + do k=np,1,-1 + if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)),& + addr(2,Lset(k)), addr(2,Dset(m)) ) ) then + if (do_direct_integrals) then + Delta(k,m) = Delta(k,m) + & + ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)),& + addr(1,Dset(m)), addr(2,Dset(m))) + else + Delta(k,m) = Delta(k,m) + & + get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)),& + addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + endif + endif + enddo + !$OMP END PARALLEL DO + computed(dj) = .True. + endif + iblock = iblock+1 do p=1,np Ltmp_p(p,iblock) = Delta(p,dj) @@ -398,9 +365,10 @@ END_PROVIDER print '(I10, 4X, ES12.3)', rank, Qmax - deallocate(Delta, stat=ierr) - deallocate(Ltmp_p, stat=ierr) - deallocate(Ltmp_q, stat=ierr) + deallocate(computed) + deallocate(Delta) + deallocate(Ltmp_p) + deallocate(Ltmp_q) ! i. N = rank From 1e390d83574392887c6e3890b9f860c98cd66904 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jul 2023 23:50:31 +0200 Subject: [PATCH 40/74] Reduce memory --- src/ao_two_e_ints/cholesky.irp.f | 25 ++----------------------- src/ccsd/ccsd_space_orb_sub.irp.f | 7 +++++++ 2 files changed, 9 insertions(+), 23 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 7d02d27f..175ccf6e 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -33,8 +33,7 @@ END_PROVIDER double precision, parameter :: dscale = 1.d0 double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) - integer, allocatable :: Lset(:), Dset(:), addr(:,:), LDmap(:), DLmap(:) - integer, allocatable :: Lset_rev(:), Dset_rev(:) + integer, allocatable :: Lset(:), Dset(:), addr(:,:) logical, allocatable :: computed(:) integer :: i,j,k,m,p,q, qj, dj, p2, q2 @@ -47,8 +46,6 @@ END_PROVIDER double precision, external :: ao_two_e_integral integer :: block_size, iblock, ierr - integer(omp_lock_kind), allocatable :: lock(:) - double precision :: mem double precision, external :: memory_of_double, memory_of_int @@ -100,12 +97,8 @@ END_PROVIDER rank = 0 - allocate( D(ndim), Lset(ndim), LDmap(ndim), DLmap(ndim), Dset(ndim) ) - allocate( Lset_rev(ndim), Dset_rev(ndim), lock(ndim) ) + allocate( D(ndim), Lset(ndim), Dset(ndim) ) allocate( addr(3,ndim) ) - do k=1,ndim - call omp_init_lock(lock(k)) - enddo ! 1. k=0 @@ -139,12 +132,10 @@ END_PROVIDER ! 2. np=0 - Lset_rev = 0 do p=1,ndim if ( dscale*dscale*Dmax*D(p) > tau*tau ) then np = np+1 Lset(np) = p - Lset_rev(p) = np endif enddo @@ -169,16 +160,10 @@ END_PROVIDER ! c. nq=0 - LDmap = 0 - DLmap = 0 - Dset_rev = 0 do p=1,np if ( D(Lset(p)) > Dmin ) then nq = nq+1 Dset(nq) = Lset(p) - Dset_rev(Dset(nq)) = nq - LDmap(p) = nq - DLmap(nq) = p endif enddo @@ -380,21 +365,15 @@ END_PROVIDER enddo np=0 - Lset_rev = 0 do p=1,ndim if ( dscale*dscale*Dmax*D(p) > tau*tau ) then np = np+1 Lset(np) = p - Lset_rev(p) = np endif enddo enddo - do k=1,ndim - call omp_destroy_lock(lock(k)) - enddo - allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) if (ierr /= 0) then call print_memory_usage() diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index f97514cd..b48ca7da 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -18,6 +18,13 @@ subroutine run_ccsd_space_orb integer(bit_kind) :: det(N_int,2) integer :: nO, nV, nOa, nVa + if (do_ao_cholesky) then + PROVIDE cholesky_mo_transp + FREE cholesky_ao + else + PROVIDE mo_two_e_integrals_in_map + endif + det = psi_det(:,:,cc_ref) print*,'Reference determinant:' call print_det(det,N_int) From 3c89e9d88d21d6ea171889989008172b53262e67 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 12 Jul 2023 11:50:34 +0200 Subject: [PATCH 41/74] Fixed qp set_file --- etc/qp.rc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/etc/qp.rc b/etc/qp.rc index 9eec4570..c485abea 100644 --- a/etc/qp.rc +++ b/etc/qp.rc @@ -188,7 +188,19 @@ _qp_Complete() ;; esac;; set_file) - COMPREPLY=( $(compgen -W "$(for i in */ $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) ) + # Array to store directory names + dirs=() + + # Find directories containing "ezfio/.version" file recursively + for i in $(find . -name ezfio | sed 's/ezfio$/.version/') + do + dir_name=${i%/.version} # Remove the ".version" suffix + dir_name=${dir_name#./} # Remove the leading "./" if present + dirs+=("$dir_name") + done + + # Output the directory names for completion + COMPREPLY=("${dirs[@]/#/.\/}") # Prefix each directory name with "./" return 0 ;; plugins) From 0aed20f53a68c0225d05b7917da351926c2234e0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 12 Jul 2023 12:04:42 +0200 Subject: [PATCH 42/74] Fixed previous commit --- etc/qp.rc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/etc/qp.rc b/etc/qp.rc index c485abea..d316faf5 100644 --- a/etc/qp.rc +++ b/etc/qp.rc @@ -189,18 +189,17 @@ _qp_Complete() esac;; set_file) # Array to store directory names - dirs=() + dirs="" # Find directories containing "ezfio/.version" file recursively for i in $(find . -name ezfio | sed 's/ezfio$/.version/') do dir_name=${i%/.version} # Remove the ".version" suffix - dir_name=${dir_name#./} # Remove the leading "./" if present - dirs+=("$dir_name") + dir_name=${dir_name#./} # Remove the leading "./" + dirs+="./$dir_name " done - # Output the directory names for completion - COMPREPLY=("${dirs[@]/#/.\/}") # Prefix each directory name with "./" + COMPREPLY=( $(compgen -W "$dirs" -- ${cur} ) ) return 0 ;; plugins) From d4574f24d981d793a6038039a4b19af6733fe7a3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 12 Jul 2023 12:34:48 +0200 Subject: [PATCH 43/74] Reduced memory in CCSD --- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 154 +++++++++++----------- src/mo_two_e_ints/cholesky.irp.f | 24 ++-- src/mo_two_e_ints/integrals_3_index.irp.f | 14 +- src/mo_two_e_ints/mo_bi_integrals.irp.f | 16 +-- src/utils_cc/mo_integrals_cc.irp.f | 62 ++++----- 5 files changed, 139 insertions(+), 131 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 1c56996e..5969928a 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -209,9 +209,9 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) do iblock = 1, nV, block_size nVmax = min(block_size,nV-iblock+1) - call dgemm('T','N', nV*nO, nV*nVmax, cholesky_ao_num, 1.d0, & - cc_space_v_vo_chol , cholesky_ao_num, & - cc_space_v_vv_chol(1,1,iblock), cholesky_ao_num, & + call dgemm('T','N', nV*nO, nV*nVmax, cholesky_mo_num, 1.d0, & + cc_space_v_vo_chol , cholesky_mo_num, & + cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & 0.d0, W_vvov_tmp, nV*nO) !$omp parallel & @@ -304,7 +304,7 @@ subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) double precision, allocatable :: tau_kau(:,:,:), tmp_vov(:,:,:) - allocate(tau_kau(cholesky_ao_num,nV,nO)) + allocate(tau_kau(cholesky_mo_num,nV,nO)) !$omp parallel & !$omp default(shared) & !$omp private(i,u,j,k,a,b,tmp_vov) @@ -318,9 +318,9 @@ subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) enddo enddo enddo - call dgemm('N','T',cholesky_ao_num,nV,nO*nV,1.d0, & - cc_space_v_ov_chol, cholesky_ao_num, tmp_vov, nV, & - 0.d0, tau_kau(1,1,u), cholesky_ao_num) + call dgemm('N','T',cholesky_mo_num,nV,nO*nV,1.d0, & + cc_space_v_ov_chol, cholesky_mo_num, tmp_vov, nV, & + 0.d0, tau_kau(1,1,u), cholesky_mo_num) enddo !$omp end do nowait deallocate(tmp_vov) @@ -333,8 +333,8 @@ subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) !$omp end do nowait !$omp barrier !$omp end parallel - call dgemm('T', 'N', nO, nO, cholesky_ao_num*nV, 1.d0, & - tau_kau, cholesky_ao_num*nV, cc_space_v_vo_chol, cholesky_ao_num*nV, & + call dgemm('T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & + tau_kau, cholesky_mo_num*nV, cc_space_v_vo_chol, cholesky_mo_num*nV, & 1.d0, H_oo, nO) end @@ -353,7 +353,7 @@ subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) double precision, allocatable :: tau_kia(:,:,:), tmp_oov(:,:,:) - allocate(tau_kia(cholesky_ao_num,nO,nV)) + allocate(tau_kia(cholesky_mo_num,nO,nV)) !$omp parallel & !$omp default(shared) & !$omp private(i,beta,j,k,a,b,tmp_oov) @@ -367,9 +367,9 @@ subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) enddo enddo enddo - call dgemm('N','T',cholesky_ao_num,nO,nO*nV,1.d0, & - cc_space_v_ov_chol, cholesky_ao_num, tmp_oov, nO, & - 0.d0, tau_kia(1,1,a), cholesky_ao_num) + call dgemm('N','T',cholesky_mo_num,nO,nO*nV,1.d0, & + cc_space_v_ov_chol, cholesky_mo_num, tmp_oov, nO, & + 0.d0, tau_kia(1,1,a), cholesky_mo_num) enddo !$omp end do nowait deallocate(tmp_oov) @@ -383,8 +383,8 @@ subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) !$omp end do nowait !$omp barrier !$omp end parallel - call dgemm('T', 'N', nV, nV, cholesky_ao_num*nO, -1.d0, & - tau_kia, cholesky_ao_num*nO, cc_space_v_ov_chol, cholesky_ao_num*nO, & + call dgemm('T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & + tau_kia, cholesky_mo_num*nO, cc_space_v_ov_chol, cholesky_mo_num*nO, & 1.d0, H_vv, nV) end @@ -407,33 +407,33 @@ subroutine compute_H_vo_chol(nO,nV,t1,H_vo) enddo enddo - allocate(tmp_k(cholesky_ao_num)) - call dgemm('N', 'N', cholesky_ao_num, 1, nO*nV, 2.d0, & - cc_space_v_ov_chol, cholesky_ao_num, & - t1, nO*nV, 0.d0, tmp_k, cholesky_ao_num) + allocate(tmp_k(cholesky_mo_num)) + call dgemm('N', 'N', cholesky_mo_num, 1, nO*nV, 2.d0, & + cc_space_v_ov_chol, cholesky_mo_num, & + t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) - call dgemm('T','N',nV*nO,1,cholesky_ao_num,1.d0, & - cc_space_v_vo_chol, cholesky_ao_num, tmp_k, cholesky_ao_num, 1.d0, & + call dgemm('T','N',nV*nO,1,cholesky_mo_num,1.d0, & + cc_space_v_vo_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & H_vo, nV*nO) deallocate(tmp_k) - allocate(tmp(cholesky_ao_num,nO,nO)) - allocate(tmp2(cholesky_ao_num,nO,nO)) + allocate(tmp(cholesky_mo_num,nO,nO)) + allocate(tmp2(cholesky_mo_num,nO,nO)) - call dgemm('N','T', cholesky_ao_num*nO, nO, nV, 1.d0, & - cc_space_v_ov_chol, cholesky_ao_num*nO, t1, nO, 0.d0, tmp, cholesky_ao_num*nO) + call dgemm('N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, tmp, cholesky_mo_num*nO) do i=1,nO do j=1,nO - do k=1,cholesky_ao_num + do k=1,cholesky_mo_num tmp2(k,j,i) = tmp(k,i,j) enddo enddo enddo deallocate(tmp) - call dgemm('T','N', nV, nO, cholesky_ao_num*nO, -1.d0, & - cc_space_v_ov_chol, cholesky_ao_num*nO, tmp2, cholesky_ao_num*nO, & + call dgemm('T','N', nV, nO, cholesky_mo_num*nO, -1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, tmp2, cholesky_mo_num*nO, & 1.d0, H_vo, nV) end @@ -491,32 +491,32 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) double precision, dimension(:,:,:), allocatable :: B1, tmp_cc, tmpB1 double precision, dimension(:,:), allocatable :: tmp_cc2 - allocate(tmp_cc(cholesky_ao_num,nV,nV)) - call dgemm('N','N', cholesky_ao_num*nV, nV, nO, 1.d0, & - cc_space_v_vo_chol, cholesky_ao_num*nV, t1, nO, 0.d0, tmp_cc, cholesky_ao_num*nV) + allocate(tmp_cc(cholesky_mo_num,nV,nV)) + call dgemm('N','N', cholesky_mo_num*nV, nV, nO, 1.d0, & + cc_space_v_vo_chol, cholesky_mo_num*nV, t1, nO, 0.d0, tmp_cc, cholesky_mo_num*nV) call set_multiple_levels_omp(.False.) !$OMP PARALLEL PRIVATE(gam, iblock, B1, tmpB1, tmp_cc2, beta, b, a) - allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV), tmp_cc2(cholesky_ao_num,nV)) + allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV), tmp_cc2(cholesky_mo_num,nV)) !$OMP DO do gam = 1, nV do iblock = 1, nV, block_size - call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, & - -1.d0, tmp_cc(1,1,iblock), cholesky_ao_num, & - cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, & + call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & + -1.d0, tmp_cc(1,1,iblock), cholesky_mo_num, & + cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, & 0.d0, tmpB1, nV*block_size) do a=1,nV - do k=1,cholesky_ao_num + do k=1,cholesky_mo_num tmp_cc2(k,a) = cc_space_v_vv_chol(k,a,gam) - tmp_cc(k,a,gam) enddo enddo - call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, 1.d0, & - cc_space_v_vv_chol(1,1,iblock), cholesky_ao_num, & - tmp_cc2, cholesky_ao_num, & + call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, 1.d0, & + cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & + tmp_cc2, cholesky_mo_num, & 1.d0, tmpB1, nV*block_size) do beta = iblock, min(nV, iblock+block_size-1) @@ -630,9 +630,9 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do iblock = 1, nV, block_size do gam = iblock, min(nV, iblock+block_size-1) - call dgemm('T','N',nV, nO*nV, cholesky_ao_num, 1.d0, & - cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, cc_space_v_ov_chol, & - cholesky_ao_num, 0.d0, X_vovv(1,1,1,gam-iblock+1), nV) + call dgemm('T','N',nV, nO*nV, cholesky_mo_num, 1.d0, & + cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, cc_space_v_ov_chol, & + cholesky_mo_num, 0.d0, X_vovv(1,1,1,gam-iblock+1), nV) enddo call dgemm('N','N',nO,nO*nV*min(block_size, nV-iblock+1),nV, & @@ -663,19 +663,19 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) double precision, allocatable :: X_ovvo(:,:,:,:) double precision, allocatable :: tcc(:,:,:), tcc2(:,:,:) - allocate(tcc2(cholesky_ao_num,nV,nO), X_ovvo(nO,nV,nV,nO)) - allocate(tcc(cholesky_ao_num,nO,nV)) + allocate(tcc2(cholesky_mo_num,nV,nO), X_ovvo(nO,nV,nV,nO)) + allocate(tcc(cholesky_mo_num,nO,nV)) - call dgemm('N','T', cholesky_ao_num*nV, nO, nV, 1.d0, & - cc_space_v_vv_chol, cholesky_ao_num*nV, t1, nO, & - 0.d0, tcc2, cholesky_ao_num*nV) + call dgemm('N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num*nV, t1, nO, & + 0.d0, tcc2, cholesky_mo_num*nV) - call dgemm('N','N', cholesky_ao_num*nO, nV, nO, 1.d0, & - cc_space_v_oo_chol, cholesky_ao_num*nO, t1, nO, & - 0.d0, tcc, cholesky_ao_num*nO) + call dgemm('N','N', cholesky_mo_num*nO, nV, nO, 1.d0, & + cc_space_v_oo_chol, cholesky_mo_num*nO, t1, nO, & + 0.d0, tcc, cholesky_mo_num*nO) - call dgemm('T','N', nO*nV, nV*nO, cholesky_ao_num, 1.d0, & - tcc, cholesky_ao_num, tcc2, cholesky_ao_num, 0.d0, & + call dgemm('T','N', nO*nV, nV*nO, cholesky_mo_num, 1.d0, & + tcc, cholesky_mo_num, tcc2, cholesky_mo_num, 0.d0, & X_ovvo, nO*nV) deallocate(tcc, tcc2) @@ -1160,23 +1160,23 @@ subroutine compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) 0d0, g_vir, size(g_vir,1)) double precision, allocatable :: tmp_k(:), tmp_vo(:,:,:), tmp_vo2(:,:,:) - allocate(tmp_k(cholesky_ao_num)) - call dgemm('N','N', cholesky_ao_num, 1, nO*nV, 1.d0, & - cc_space_v_ov_chol, cholesky_ao_num, t1, nO*nV, 0.d0, tmp_k, cholesky_ao_num) + allocate(tmp_k(cholesky_mo_num)) + call dgemm('N','N', cholesky_mo_num, 1, nO*nV, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num, t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) - call dgemm('T','N', nV*nV, 1, cholesky_ao_num, 2.d0, & - cc_space_v_vv_chol, cholesky_ao_num, tmp_k, cholesky_ao_num, 1.d0, & + call dgemm('T','N', nV*nV, 1, cholesky_mo_num, 2.d0, & + cc_space_v_vv_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & g_vir, nV*nV) deallocate(tmp_k) - allocate(tmp_vo(cholesky_ao_num,nV,nO)) - call dgemm('N','T',cholesky_ao_num*nV, nO, nV, 1.d0, & - cc_space_v_vv_chol, cholesky_ao_num*nV, t1, nO, 0.d0, tmp_vo, cholesky_ao_num*nV) + allocate(tmp_vo(cholesky_mo_num,nV,nO)) + call dgemm('N','T',cholesky_mo_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num*nV, t1, nO, 0.d0, tmp_vo, cholesky_mo_num*nV) - allocate(tmp_vo2(cholesky_ao_num,nO,nV)) + allocate(tmp_vo2(cholesky_mo_num,nO,nV)) do beta=1,nV do i=1,nO - do k=1,cholesky_ao_num + do k=1,cholesky_mo_num tmp_vo2(k,i,beta) = -tmp_vo(k,beta,i) enddo enddo @@ -1189,9 +1189,9 @@ subroutine compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) enddo enddo - call dgemm('T','N', nV, nV, nO*cholesky_ao_num, 1.d0, & - cc_space_v_ov_chol, cholesky_ao_num*nO, & - tmp_vo2, cholesky_ao_num*nO, 1.d0, g_vir, nV) + call dgemm('T','N', nV, nV, nO*cholesky_mo_num, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, & + tmp_vo2, cholesky_mo_num*nO, 1.d0, g_vir, nV) end @@ -1265,15 +1265,15 @@ subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvoo,J1) deallocate(X_ovoo) double precision, allocatable :: tmp_cc(:,:,:), J1_tmp(:,:,:,:) - allocate(tmp_cc(cholesky_ao_num,nV,nO), J1_tmp(nV,nO,nV,nO)) + allocate(tmp_cc(cholesky_mo_num,nV,nO), J1_tmp(nV,nO,nV,nO)) - call dgemm('N','T', cholesky_ao_num*nV, nO, nV, 1.d0, & - cc_space_v_vv_chol, cholesky_ao_num*nV, & + call dgemm('N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num*nV, & t1, nO, & - 0.d0, tmp_cc, cholesky_ao_num*nV) + 0.d0, tmp_cc, cholesky_mo_num*nV) - call dgemm('T','N', nV*nO, nV*nO, cholesky_ao_num, 1.d0, & - tmp_cc, cholesky_ao_num, cc_space_v_vo_chol, cholesky_ao_num, & + call dgemm('T','N', nV*nO, nV*nO, cholesky_mo_num, 1.d0, & + tmp_cc, cholesky_mo_num, cc_space_v_vo_chol, cholesky_mo_num, & 0.d0, J1_tmp, nV*nO) deallocate(tmp_cc) @@ -1464,14 +1464,14 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,K1) 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) double precision, allocatable :: K1tmp(:,:,:,:), t1v(:,:,:) - allocate(K1tmp(nO,nO,nV,nV), t1v(cholesky_ao_num,nO,nO)) + allocate(K1tmp(nO,nO,nV,nV), t1v(cholesky_mo_num,nO,nO)) - call dgemm('N','T', cholesky_ao_num*nO, nO, nV, 1.d0, & - cc_space_v_ov_chol, cholesky_ao_num*nO, t1, nO, 0.d0, & - t1v, cholesky_ao_num*nO) + call dgemm('N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, & + t1v, cholesky_mo_num*nO) - call dgemm('T','N', nO*nO, nV*nV, cholesky_ao_num, 1.d0, & - t1v, cholesky_ao_num, cc_space_v_vv_chol, cholesky_ao_num, 0.d0, & + call dgemm('T','N', nO*nO, nV*nV, cholesky_mo_num, 1.d0, & + t1v, cholesky_mo_num, cc_space_v_vv_chol, cholesky_mo_num, 0.d0, & K1tmp, nO*nO) deallocate(t1v) diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 3a868cbe..349f13b9 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -1,4 +1,12 @@ -BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num) ] +BEGIN_PROVIDER [ integer, cholesky_mo_num ] + implicit none + BEGIN_DOC + ! Number of Cholesky vectors in MO basis + END_DOC + cholesky_mo_num = cholesky_ao_num +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] implicit none BEGIN_DOC ! Cholesky vectors in MO basis @@ -8,7 +16,7 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num call set_multiple_levels_omp(.False.) !$OMP PARALLEL DO PRIVATE(k) - do k=1,cholesky_ao_num + do k=1,cholesky_mo_num do j=1,mo_num do i=1,mo_num cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j) @@ -19,7 +27,7 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num END_PROVIDER -BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, mo_num) ] +BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, mo_num) ] implicit none BEGIN_DOC ! Cholesky vectors in MO basis @@ -29,14 +37,14 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, integer :: ierr print *, 'AO->MO Transformation of Cholesky vectors' - allocate(X(mo_num,cholesky_ao_num,ao_num), stat=ierr) + allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) if (ierr /= 0) then print *, irp_here, ': Allocation failed' endif - call dgemm('T','N', ao_num*cholesky_ao_num, mo_num, ao_num, 1.d0, & - cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_ao_num) - call dgemm('T','N', cholesky_ao_num*mo_num, mo_num, ao_num, 1.d0, & - X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_ao_num*mo_num) + call dgemm('T','N', ao_num*cholesky_mo_num, mo_num, ao_num, 1.d0, & + cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_mo_num) + call dgemm('T','N', cholesky_mo_num*mo_num, mo_num, ao_num, 1.d0, & + X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_mo_num*mo_num) deallocate(X) END_PROVIDER diff --git a/src/mo_two_e_ints/integrals_3_index.irp.f b/src/mo_two_e_ints/integrals_3_index.irp.f index d807f619..eb05da84 100644 --- a/src/mo_two_e_ints/integrals_3_index.irp.f +++ b/src/mo_two_e_ints/integrals_3_index.irp.f @@ -13,14 +13,14 @@ if (do_ao_cholesky) then double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:) - allocate(buffer_jj(cholesky_ao_num,mo_num), buffer(mo_num,mo_num,mo_num)) + allocate(buffer_jj(cholesky_mo_num,mo_num), buffer(mo_num,mo_num,mo_num)) do j=1,mo_num buffer_jj(:,j) = cholesky_mo_transp(:,j,j) enddo - call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - buffer_jj, cholesky_ao_num, 0.d0, & + call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_mo_num, 1.d0, & + cholesky_mo_transp, cholesky_mo_num, & + buffer_jj, cholesky_mo_num, 0.d0, & buffer, mo_num*mo_num) do k = 1, mo_num @@ -36,9 +36,9 @@ do j = 1, mo_num - call dgemm('T','N',mo_num,mo_num,cholesky_ao_num, 1.d0, & - cholesky_mo_transp(1,1,j), cholesky_ao_num, & - cholesky_mo_transp(1,1,j), cholesky_ao_num, 0.d0, & + call dgemm('T','N',mo_num,mo_num,cholesky_mo_num, 1.d0, & + cholesky_mo_transp(1,1,j), cholesky_mo_num, & + cholesky_mo_transp(1,1,j), cholesky_mo_num, 0.d0, & buffer_jj, mo_num) do k=1,mo_num diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index 0d3fe176..0e77b6a2 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -479,9 +479,9 @@ subroutine add_integrals_to_map_cholesky !$OMP DO SCHEDULE(dynamic) do l=1,mo_num - call dgemm('T','N',mo_num*mo_num,mo_num,cholesky_ao_num,1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,l), cholesky_ao_num, 0.d0, & + call dgemm('T','N',mo_num*mo_num,mo_num,cholesky_mo_num,1.d0, & + cholesky_mo_transp, cholesky_mo_num, & + cholesky_mo_transp(1,1,l), cholesky_mo_num, 0.d0, & Vtmp, mo_num*mo_num) do k=1,l @@ -1364,20 +1364,20 @@ END_PROVIDER if (do_ao_cholesky) then double precision, allocatable :: buffer(:,:) - allocate (buffer(cholesky_ao_num,mo_num)) - do k=1,cholesky_ao_num + allocate (buffer(cholesky_mo_num,mo_num)) + do k=1,cholesky_mo_num do i=1,mo_num buffer(k,i) = cholesky_mo_transp(k,i,i) enddo enddo - call dgemm('T','N',mo_num,mo_num,cholesky_ao_num,1.d0, & - buffer, cholesky_ao_num, buffer, cholesky_ao_num, 0.d0, mo_two_e_integrals_jj, mo_num) + call dgemm('T','N',mo_num,mo_num,cholesky_mo_num,1.d0, & + buffer, cholesky_mo_num, buffer, cholesky_mo_num, 0.d0, mo_two_e_integrals_jj, mo_num) deallocate(buffer) do j=1,mo_num do i=1,mo_num mo_two_e_integrals_jj_exchange(i,j) = 0.d0 - do k=1,cholesky_ao_num + do k=1,cholesky_mo_num mo_two_e_integrals_jj_exchange(i,j) = mo_two_e_integrals_jj_exchange(i,j) + & cholesky_mo_transp(k,i,j)*cholesky_mo_transp(k,j,i) enddo diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index a68ab8de..b2b68d05 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -50,15 +50,15 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) if (do_ao_cholesky) then double precision, allocatable :: buffer(:,:,:,:) double precision, allocatable :: v1(:,:,:), v2(:,:,:) - allocate(v1(cholesky_ao_num,n1,n3), v2(cholesky_ao_num,n2,n4)) + allocate(v1(cholesky_mo_num,n1,n3), v2(cholesky_mo_num,n2,n4)) allocate(buffer(n1,n3,n2,n4)) - call gen_v_space_chol(n1,n3,list1,list3,v1,cholesky_ao_num) - call gen_v_space_chol(n2,n4,list2,list4,v2,cholesky_ao_num) + call gen_v_space_chol(n1,n3,list1,list3,v1,cholesky_mo_num) + call gen_v_space_chol(n2,n4,list2,list4,v2,cholesky_mo_num) - call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & - v1, cholesky_ao_num, & - v2, cholesky_ao_num, 0.d0, buffer, n1*n3) + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1*n3) deallocate(v1,v2) @@ -119,7 +119,7 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) idx3 = list3(i3) do i1=1,n1 idx1 = list1(i1) - do k=1,cholesky_ao_num + do k=1,cholesky_mo_num v(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3) enddo enddo @@ -137,15 +137,15 @@ BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] double precision, allocatable :: buffer(:,:,:) call set_multiple_levels_omp(.False.) !$OMP PARALLEL & - !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_mo_num) & !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)& !$OMP DEFAULT(NONE) allocate(buffer(mo_num,mo_num,mo_num)) !$OMP DO do i4 = 1, mo_num - call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,i4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_mo_num, 1.d0, & + cholesky_mo_transp, cholesky_mo_num, & + cholesky_mo_transp(1,1,i4), cholesky_mo_num, 0.d0, buffer, mo_num*mo_num) do i2 = 1, mo_num do i3 = 1, mo_num do i1 = 1, mo_num @@ -203,9 +203,9 @@ BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_n double precision, allocatable :: buffer(:,:,:,:) allocate(buffer(n1,n3,n2,n4)) - call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & - cc_space_v_oo_chol, cholesky_ao_num, & - cc_space_v_oo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_oo_chol, cholesky_mo_num, & + cc_space_v_oo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) do i4 = 1, n4 @@ -246,9 +246,9 @@ BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_n double precision, allocatable :: buffer(:,:,:,:) allocate(buffer(n1,n3,n2,n4)) - call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & - cc_space_v_vo_chol, cholesky_ao_num, & - cc_space_v_oo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_vo_chol, cholesky_mo_num, & + cc_space_v_oo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) do i4 = 1, n4 @@ -392,9 +392,9 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n double precision, allocatable :: buffer(:,:,:,:) allocate(buffer(n1,n3,n2,n4)) - call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & - cc_space_v_vo_chol, cholesky_ao_num, & - cc_space_v_vo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_vo_chol, cholesky_mo_num, & + cc_space_v_vo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) do i4 = 1, n4 @@ -435,9 +435,9 @@ BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_n double precision, allocatable :: buffer(:,:,:,:) allocate(buffer(n1,n3,n2,n4)) - call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & - cc_space_v_vv_chol, cholesky_ao_num, & - cc_space_v_oo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num, & + cc_space_v_oo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) do i4 = 1, n4 @@ -645,35 +645,35 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvvv, (cc_nVa, cc_nVa, cc_nVa, cc_n END_PROVIDER -BEGIN_PROVIDER [double precision, cc_space_v_vv_chol, (cholesky_ao_num, cc_nVa, cc_nVa)] +BEGIN_PROVIDER [double precision, cc_space_v_vv_chol, (cholesky_mo_num, cc_nVa, cc_nVa)] implicit none - call gen_v_space_chol(cc_nVa, cc_nVa, cc_list_vir, cc_list_vir, cc_space_v_vv_chol, cholesky_ao_num) + call gen_v_space_chol(cc_nVa, cc_nVa, cc_list_vir, cc_list_vir, cc_space_v_vv_chol, cholesky_mo_num) END_PROVIDER -BEGIN_PROVIDER [double precision, cc_space_v_vo_chol, (cholesky_ao_num, cc_nVa, cc_nOa)] +BEGIN_PROVIDER [double precision, cc_space_v_vo_chol, (cholesky_mo_num, cc_nVa, cc_nOa)] implicit none - call gen_v_space_chol(cc_nVa, cc_nOa, cc_list_vir, cc_list_occ, cc_space_v_vo_chol, cholesky_ao_num) + call gen_v_space_chol(cc_nVa, cc_nOa, cc_list_vir, cc_list_occ, cc_space_v_vo_chol, cholesky_mo_num) END_PROVIDER -BEGIN_PROVIDER [double precision, cc_space_v_ov_chol, (cholesky_ao_num, cc_nOa, cc_nVa)] +BEGIN_PROVIDER [double precision, cc_space_v_ov_chol, (cholesky_mo_num, cc_nOa, cc_nVa)] implicit none - call gen_v_space_chol(cc_nOa, cc_nVa, cc_list_occ, cc_list_vir, cc_space_v_ov_chol, cholesky_ao_num) + call gen_v_space_chol(cc_nOa, cc_nVa, cc_list_occ, cc_list_vir, cc_space_v_ov_chol, cholesky_mo_num) END_PROVIDER -BEGIN_PROVIDER [double precision, cc_space_v_oo_chol, (cholesky_ao_num, cc_nOa, cc_nOa)] +BEGIN_PROVIDER [double precision, cc_space_v_oo_chol, (cholesky_mo_num, cc_nOa, cc_nOa)] implicit none - call gen_v_space_chol(cc_nOa, cc_nOa, cc_list_occ, cc_list_occ, cc_space_v_oo_chol, cholesky_ao_num) + call gen_v_space_chol(cc_nOa, cc_nOa, cc_list_occ, cc_list_occ, cc_space_v_oo_chol, cholesky_mo_num) END_PROVIDER From fba2fefb1943cff26ba7c18d1ee92448b8482b3a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 12 Jul 2023 18:33:18 +0200 Subject: [PATCH 44/74] Moved loop --- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 5969928a..fc5da8c0 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -501,6 +501,13 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV), tmp_cc2(cholesky_mo_num,nV)) !$OMP DO do gam = 1, nV + + do a=1,nV + do k=1,cholesky_mo_num + tmp_cc2(k,a) = cc_space_v_vv_chol(k,a,gam) - tmp_cc(k,a,gam) + enddo + enddo + do iblock = 1, nV, block_size call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & @@ -508,12 +515,6 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, & 0.d0, tmpB1, nV*block_size) - do a=1,nV - do k=1,cholesky_mo_num - tmp_cc2(k,a) = cc_space_v_vv_chol(k,a,gam) - tmp_cc(k,a,gam) - enddo - enddo - call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, 1.d0, & cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & tmp_cc2, cholesky_mo_num, & From 4ad77651276305fdfbed5648a57bb9965dab636b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 12 Jul 2023 18:56:13 +0200 Subject: [PATCH 45/74] Minor changes --- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index fc5da8c0..0ba46e56 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -457,8 +457,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) integer :: u,v,i,j,beta,gam,a,b double precision :: max_r2_local - ! Residual - !r2 = 0d0 + call set_multiple_levels_omp(.False.) !$omp parallel & !$omp shared(nO,nV,r2,cc_space_v_oovv) & From b102cc816aa709da2f105ab989816b4031daf1bd Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 15 Jul 2023 18:44:47 +0200 Subject: [PATCH 46/74] ARM perf library in parallel --- config/gfortran_armpl.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/gfortran_armpl.cfg b/config/gfortran_armpl.cfg index fb5ee1cc..370e396e 100644 --- a/config/gfortran_armpl.cfg +++ b/config/gfortran_armpl.cfg @@ -14,7 +14,7 @@ # [COMMON] FC : gfortran -g -ffree-line-length-none -I . -fPIC -march=native -LAPACK_LIB : -larmpl_lp64 +LAPACK_LIB : -larmpl_lp64_mp IRPF90 : irpf90 IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED From 467f7563797c4c32fc9597542b9d761309e3565c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 16 Jul 2023 20:04:17 +0200 Subject: [PATCH 47/74] Optimized A1 in CCSD --- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 48 ++++---------------------- 1 file changed, 6 insertions(+), 42 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 0ba46e56..ec6c2afb 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -1023,56 +1023,26 @@ subroutine compute_A1_chol(nO,nV,t1,t2,tau,A1) integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta - double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:) - allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO)) + double precision, allocatable :: Y_oooo(:,:,:,:) + allocate(Y_oooo(nO,nO,nO,nO)) ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) - !$omp parallel & - !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & - !$omp private(u,v,i,j) & - !$omp default(none) - !$omp do collapse(2) - do j = 1, nO - do i = 1, nO - do v = 1, nO - do u = 1, nO - A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) - enddo - enddo - enddo - enddo - !$omp end do nowait - ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & - !$omp do collapse(2) - do j = 1, nO - do i = 1, nO - do u = 1, nO - do a = 1, nV - X_vooo(a,u,i,j) = cc_space_v_ovoo(u,a,i,j) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel - call dgemm('N','N', nO, nO*nO*nO, nV, & 1d0, t1 , size(t1,1), & - X_vooo, size(X_vooo,1), & + cc_space_v_vooo, size(cc_space_v_vooo,1), & 0d0, Y_oooo, size(Y_oooo,1)) !$omp parallel & - !$omp shared(nO,nV,A1,Y_oooo) & !$omp private(u,v,i,j) & - !$omp default(none) + !$omp default(shared) !$omp do collapse(2) do j = 1, nO do i = 1, nO do v = 1, nO do u = 1, nO - A1(u,v,i,j) = A1(u,v,i,j) + Y_oooo(v,u,i,j) + A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j) enddo enddo enddo @@ -1080,13 +1050,7 @@ subroutine compute_A1_chol(nO,nV,t1,t2,tau,A1) !$omp end do !$omp end parallel - deallocate(X_vooo,Y_oooo) - - ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a) - call dgemm('N','N', nO, nO*nO*nO, nV, & - 1d0, t1 , size(t1,1), & - cc_space_v_vooo, size(cc_space_v_vooo,1), & - 1d0, A1 , size(A1,1)) + deallocate(Y_oooo) ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) call dgemm('N','N', nO*nO, nO*nO, nV*nV, & From bd570b19c1d4ae0479b8ff4c4611ae1127605441 Mon Sep 17 00:00:00 2001 From: ydamour Date: Mon, 17 Jul 2023 17:05:48 +0200 Subject: [PATCH 48/74] fix bug restore_symmetry --- src/utils/linear_algebra.irp.f | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 65c57a76..314ad4f6 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1661,7 +1661,15 @@ subroutine restore_symmetry(m,n,A,LDA,thresh) ! Update i i = i + 1 enddo - copy(i:) = 0.d0 + + ! To nullify the remaining elements that are below the threshold + if (i == sze) then + if (-copy(i) <= thresh) then + copy(i) = 0d0 + endif + else + copy(i:) = 0.d0 + endif !$OMP PARALLEL if (sze>10000) & !$OMP SHARED(m,sze,copy_sign,copy,key,A,ii,jj) & From cc7b97c09b5f8a970319a3e247551c34401e731c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 5 Aug 2023 01:47:48 +0200 Subject: [PATCH 49/74] Cleaning in C --- external/ezfio | 2 +- external/irpf90 | 2 +- src/utils/fortran_mmap.c | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/external/ezfio b/external/ezfio index ed1df9f3..d5805497 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c +Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 diff --git a/external/irpf90 b/external/irpf90 index 33ca5e10..0007f72f 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 +Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 diff --git a/src/utils/fortran_mmap.c b/src/utils/fortran_mmap.c index 71426002..e8d85a2f 100644 --- a/src/utils/fortran_mmap.c +++ b/src/utils/fortran_mmap.c @@ -9,7 +9,6 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) { - int i; int fd; int result; void* map; From c6b50d5f500faed66fdc13a2b2c4d2dd874fbca1 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 7 Aug 2023 16:39:53 +0200 Subject: [PATCH 50/74] found a bug in left-right coefficients in perturbation --- src/cipsi_tc_bi_ortho/selection.irp.f | 35 +++++++++++++++++++++++---- src/fci_tc_bi/selectors.irp.f | 2 ++ src/tc_bi_ortho/tc_h_eigvectors.irp.f | 5 ++++ 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f index 77377554..06cf848b 100644 --- a/src/cipsi_tc_bi_ortho/selection.irp.f +++ b/src/cipsi_tc_bi_ortho/selection.irp.f @@ -893,20 +893,45 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d alpha_h_psi += alpha_h_i * psi_selectors_coef_tc(iii,1,1) ! right function enddo else if(debug_tc_pt2 == 2)then !! debugging the new version +! psi_h_alpha_tmp = 0.d0 +! alpha_h_psi_tmp = 0.d0 +! do iii = 1, N_det_selectors ! old version +! call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) +! call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) +! psi_h_alpha_tmp += i_h_alpha * psi_selectors_coef_tc(iii,1,1) ! left function +! alpha_h_psi_tmp += alpha_h_i * psi_selectors_coef_tc(iii,2,1) ! right function +! enddo psi_h_alpha_tmp = mat_l(istate, p1, p2) ! new version alpha_h_psi_tmp = mat_r(istate, p1, p2) ! new version psi_h_alpha = 0.d0 alpha_h_psi = 0.d0 - do iii = 1, N_det_selectors ! old version - call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - psi_h_alpha += i_h_alpha * psi_selectors_coef_tc(iii,2,1) ! left function - alpha_h_psi += alpha_h_i * psi_selectors_coef_tc(iii,1,1) ! right function + do iii = 1, N_det ! old version + call htilde_mu_mat_opt_bi_ortho_no_3e(psi_det(1,1,iii), det, N_int, i_h_alpha) + call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_det(1,1,iii), N_int, alpha_h_i) + psi_h_alpha += i_h_alpha * psi_l_coef_bi_ortho(iii,1) ! left function + alpha_h_psi += alpha_h_i * psi_r_coef_bi_ortho(iii,1) ! right function enddo if(dabs(psi_h_alpha*alpha_h_psi/delta_E).gt.1.d-10)then error = dabs(psi_h_alpha * alpha_h_psi - psi_h_alpha_tmp * alpha_h_psi_tmp)/dabs(psi_h_alpha * alpha_h_psi) if(error.gt.1.d-2)then + call debug_det(det, N_int) print*,'error =',error,psi_h_alpha * alpha_h_psi/delta_E,psi_h_alpha_tmp * alpha_h_psi_tmp/delta_E + print*,psi_h_alpha , alpha_h_psi + print*,psi_h_alpha_tmp , alpha_h_psi_tmp + print*,'selectors ' + do iii = 1, N_det_selectors ! old version + print*,'iii',iii,psi_selectors_coef_tc(iii,1,1),psi_selectors_coef_tc(iii,2,1) + call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) + call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) + print*,i_h_alpha,alpha_h_i + call debug_det(psi_selectors(1,1,iii),N_int) + enddo +! print*,'psi_det ' +! do iii = 1, N_det! old version +! print*,'iii',iii,psi_l_coef_bi_ortho(iii,1),psi_r_coef_bi_ortho(iii,1) +! call debug_det(psi_det(1,1,iii),N_int) +! enddo + stop endif endif else diff --git a/src/fci_tc_bi/selectors.irp.f b/src/fci_tc_bi/selectors.irp.f index 4d3de7d0..7f93ae55 100644 --- a/src/fci_tc_bi/selectors.irp.f +++ b/src/fci_tc_bi/selectors.irp.f @@ -27,6 +27,8 @@ END_PROVIDER implicit none BEGIN_DOC ! Determinants on which we apply for perturbation. + ! psi_selectors_coef_tc(iii,1,istate) = left coefficient of the iii determinant + ! psi_selectors_coef_tc(iii,2,istate) = right coefficient of the iii determinant END_DOC integer :: i,k diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index f027c38f..48257943 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -328,6 +328,11 @@ end TOUCH psi_r_coef_bi_ortho call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer) deallocate(buffer) +! print*,'After diag' +! do i = 1, N_det! old version +! print*,'i',i,psi_l_coef_bi_ortho(i,1),psi_r_coef_bi_ortho(i,1) +! call debug_det(psi_det(1,1,i),N_int) +! enddo END_PROVIDER From c945e027c03c283682f6bffdb933c588069450bf Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 7 Aug 2023 16:56:10 +0200 Subject: [PATCH 51/74] fixed a bug in psi_selectors_coef --- src/cipsi_tc_bi_ortho/get_d0_good.irp.f | 8 +++--- src/cipsi_tc_bi_ortho/get_d1_good.irp.f | 36 ++++++++++++------------- src/cipsi_tc_bi_ortho/get_d2_good.irp.f | 28 +++++++++---------- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/src/cipsi_tc_bi_ortho/get_d0_good.irp.f b/src/cipsi_tc_bi_ortho/get_d0_good.irp.f index 4270e7b8..9bba162e 100644 --- a/src/cipsi_tc_bi_ortho/get_d0_good.irp.f +++ b/src/cipsi_tc_bi_ortho/get_d0_good.irp.f @@ -53,7 +53,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hij == (0.d0,0.d0)) cycle !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * hij ! HOTSPOT + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij ! HOTSPOT enddo end do !!!!!!!!!! @@ -72,7 +72,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hji == (0.d0,0.d0)) cycle !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,2) * hji ! HOTSPOT + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji ! HOTSPOT enddo end do end do @@ -109,7 +109,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, end if !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo end do @@ -128,7 +128,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, end if !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo end do end do diff --git a/src/cipsi_tc_bi_ortho/get_d1_good.irp.f b/src/cipsi_tc_bi_ortho/get_d1_good.irp.f index bc19e7e4..b2a38e02 100644 --- a/src/cipsi_tc_bi_ortho/get_d1_good.irp.f +++ b/src/cipsi_tc_bi_ortho/get_d1_good.irp.f @@ -76,7 +76,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,1) + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) enddo endif end do @@ -88,7 +88,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,1) + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) enddo endif end do @@ -114,7 +114,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) enddo endif end do @@ -126,7 +126,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) enddo endif end do @@ -169,7 +169,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,1) + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) enddo endif end if @@ -180,7 +180,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) do k=1,N_states - tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,1) + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) enddo endif end if @@ -211,7 +211,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) enddo endif end if @@ -222,7 +222,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) do k=1,N_states - tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) enddo endif end if @@ -265,7 +265,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij_cache(putj,1) - hij_cache(putj,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,1) + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) endif end do do putj=hfix+1,mo_num @@ -274,7 +274,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij_cache(putj,2) - hij_cache(putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,1) + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) endif end do @@ -293,7 +293,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji_cache(putj,1) - hji_cache(putj,2) if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) endif end do do putj=hfix+1,mo_num @@ -302,7 +302,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji_cache(putj,2) - hji_cache(putj,1) if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) endif end do @@ -342,7 +342,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,1) + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) enddo endif end if @@ -353,7 +353,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) do k=1,N_states - tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,1) + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) enddo endif end if @@ -385,7 +385,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) enddo endif end if @@ -396,7 +396,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) do k=1,N_states - tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) enddo endif end if @@ -445,8 +445,8 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, do k=1,N_states ! take conjugate to get contribution to instead of ! mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * dconjg(hij) - mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * hij - mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,2) * hji + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji enddo end do end do diff --git a/src/cipsi_tc_bi_ortho/get_d2_good.irp.f b/src/cipsi_tc_bi_ortho/get_d2_good.irp.f index 0a08c808..d01ed433 100644 --- a/src/cipsi_tc_bi_ortho/get_d2_good.irp.f +++ b/src/cipsi_tc_bi_ortho/get_d2_good.irp.f @@ -79,12 +79,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,1) * hij + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij enddo else ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo end if end do @@ -103,12 +103,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,2) * hji + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji enddo else ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo end if end do @@ -135,7 +135,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo endif end do @@ -154,7 +154,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo endif end do @@ -189,7 +189,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) +coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) +coefs(k,2) * hij enddo end do end do @@ -210,7 +210,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) +coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) +coefs(k,1) * hji enddo end do end do @@ -239,12 +239,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (puti < putj) then !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo else !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,1) * hij + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij enddo endif end do @@ -262,12 +262,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (puti < putj) then !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo else !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,2) * hji + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji enddo endif end do @@ -290,7 +290,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo end if !! @@ -299,7 +299,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo end if end if From 0440def3637b4fc528ffd488f117247662e12c4d Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 9 Aug 2023 16:23:09 +0200 Subject: [PATCH 52/74] added lccsd --- src/cisd/EZFIO.cfg | 8 ++++ src/cisd/NEED | 1 + src/cisd/lccsd.irp.f | 95 +++++++++++++++++++++++++++++++++++++++ src/cisd/lccsd_prov.irp.f | 44 ++++++++++++++++++ 4 files changed, 148 insertions(+) create mode 100644 src/cisd/lccsd.irp.f create mode 100644 src/cisd/lccsd_prov.irp.f diff --git a/src/cisd/EZFIO.cfg b/src/cisd/EZFIO.cfg index 4565d2df..688f802a 100644 --- a/src/cisd/EZFIO.cfg +++ b/src/cisd/EZFIO.cfg @@ -5,3 +5,11 @@ interface: ezfio size: (determinants.n_states) + +[lcc_energy] +type: double precision +doc: lccsd energy +interface: ezfio +size: (determinants.n_states) + + diff --git a/src/cisd/NEED b/src/cisd/NEED index d9ad3efc..616d021e 100644 --- a/src/cisd/NEED +++ b/src/cisd/NEED @@ -1,3 +1,4 @@ selectors_full single_ref_method davidson_undressed +dav_general_mat diff --git a/src/cisd/lccsd.irp.f b/src/cisd/lccsd.irp.f new file mode 100644 index 00000000..919c5aaa --- /dev/null +++ b/src/cisd/lccsd.irp.f @@ -0,0 +1,95 @@ +program lccsd + implicit none + BEGIN_DOC +! Linerarized CCSD +! + ! This program takes a reference Slater determinant of ROHF-like occupancy, + ! + ! and performs all single and double excitations on top of it, disregarding + ! spatial symmetry and compute the "n_states" lowest eigenstates of that CI + ! matrix (see :option:`determinants n_states`). + ! + ! This program can be useful in many cases: + ! + ! * **Ground state calculation**: if even after a :c:func:`cis` calculation, natural + ! orbitals (see :c:func:`save_natorb`) and then :c:func:`scf` optimization, you are not sure to have the lowest scf + ! solution, + ! do the same strategy with the :c:func:`cisd` executable instead of the :c:func:`cis` exectuable to generate the natural + ! orbitals as a guess for the :c:func:`scf`. + ! + ! + ! + ! * **Excited states calculations**: the lowest excited states are much likely to + ! be dominanted by single- or double-excitations. + ! Therefore, running a :c:func:`cisd` will save the "n_states" lowest states within + ! the CISD space + ! in the |EZFIO| directory, which can afterward be used as guess wave functions + ! for a further multi-state fci calculation if you specify "read_wf" = True + ! before running the fci executable (see :option:`determinants read_wf`). + ! Also, if you specify "s2_eig" = True, the cisd will only retain states + ! having the good value :math:`S^2` value + ! (see :option:`determinants expected_s2` and :option:`determinants s2_eig`). + ! If "s2_eig" = False, it will take the lowest n_states, whatever + ! multiplicity they are. + ! + ! + ! + ! Note: if you would like to discard some orbitals, use + ! :ref:`qp_set_mo_class` to specify: + ! + ! * "core" orbitals which will be always doubly occupied + ! + ! * "act" orbitals where an electron can be either excited from or to + ! + ! * "del" orbitals which will be never occupied + ! + END_DOC + PROVIDE N_states + read_wf = .False. + TOUCH read_wf + call run +end + +subroutine run + implicit none + + if(pseudo_sym)then + call H_apply_cisd_sym + else + call H_apply_cisd + endif + call get_lccsd_2 +end + +subroutine get_lccsd_2 + implicit none + integer :: i,k + double precision :: cisdq(N_states), delta_e + double precision,external :: diag_h_mat_elem + psi_coef = lccsd_coef + SOFT_TOUCH psi_coef + call save_wavefunction_truncated(save_threshold) + call ezfio_set_cisd_lcc_energy(lccsd_energies) + + print *, 'N_det = ', N_det + print*,'' + print*,'******************************' + print *, 'LCCSD Energies' + do i = 1,N_states + print *, i, lccsd_energies(i) + enddo + if (N_states > 1) then + print*,'******************************' + print*,'Excitation energies (au) (LCCSD)' + do i = 2, N_states + print*, i ,lccsd_energies(i) - lccsd_energies(1) + enddo + print*,'' + print*,'******************************' + print*,'Excitation energies (eV) (LCCSD)' + do i = 2, N_states + print*, i ,(lccsd_energies(i) - lccsd_energies(1)) * ha_to_ev + enddo + endif + +end diff --git a/src/cisd/lccsd_prov.irp.f b/src/cisd/lccsd_prov.irp.f new file mode 100644 index 00000000..b071a8f8 --- /dev/null +++ b/src/cisd/lccsd_prov.irp.f @@ -0,0 +1,44 @@ + BEGIN_PROVIDER [ double precision, lccsd_coef, (N_det, N_states)] +&BEGIN_PROVIDER [ double precision, lccsd_energies, (N_states)] + implicit none + double precision, allocatable :: Dress_jj(:), H_jj(:), u_in(:,:) + double precision :: ebefore, eafter, ecorr, thresh + integer :: i,it + logical :: converged + external H_u_0_nstates_openmp + allocate(Dress_jj(N_det),H_jj(N_det),u_in(N_det,N_states_diag)) + thresh = 1.d-6 + converged = .False. + Dress_jj = 0.d0 + u_in = 0.d0 + it = 0 + ! initial guess + do i = 1, N_states_diag + u_in(i,i) = 1.d0 + enddo + do i = 1,N_det + call i_H_j(psi_det(1,1,i),psi_det(1,1,i),N_int,H_jj(i)) + enddo + ebefore = H_jj(1) + do while (.not.converged) + it += 1 + print*,'N_det = ',N_det + call davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,lccsd_energies,& + N_det,N_states,N_states_diag,converged,H_u_0_nstates_openmp) + ecorr = lccsd_energies(1) - H_jj(1) + print*,'---------------------' + print*,'it = ',it + print*,'ecorr = ',ecorr + Dress_jj(1) = 0.d0 + do i = 2, N_det + Dress_jj(i) = ecorr + enddo + eafter = lccsd_energies(1) + converged = (dabs(eafter - ebefore).lt.thresh) + ebefore = eafter + enddo + do i = 1, N_states + lccsd_coef(1:N_det,i) = u_in(1:N_det,i) + enddo + +END_PROVIDER From 1a2632c280bc74eca95912fc7eec4e4b9d1f8587 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 9 Aug 2023 16:59:47 +0200 Subject: [PATCH 53/74] added condition in lccsd --- src/cisd/lccsd_prov.irp.f | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cisd/lccsd_prov.irp.f b/src/cisd/lccsd_prov.irp.f index b071a8f8..38149ac9 100644 --- a/src/cisd/lccsd_prov.irp.f +++ b/src/cisd/lccsd_prov.irp.f @@ -31,7 +31,9 @@ print*,'ecorr = ',ecorr Dress_jj(1) = 0.d0 do i = 2, N_det - Dress_jj(i) = ecorr + if(ecorr + H_jj(i) .gt. H_jj(1))then + Dress_jj(i) = ecorr + endif enddo eafter = lccsd_energies(1) converged = (dabs(eafter - ebefore).lt.thresh) From 2e458e93bafcd652e996cd3067d652f61522a915 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 10 Aug 2023 08:38:05 +0200 Subject: [PATCH 54/74] Fixing CI --- .github/workflows/configuration.yml | 2 +- external/ezfio | 2 +- external/irpf90 | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/configuration.yml b/.github/workflows/configuration.yml index ba37f5dd..178b394e 100644 --- a/.github/workflows/configuration.yml +++ b/.github/workflows/configuration.yml @@ -22,7 +22,7 @@ jobs: - uses: actions/checkout@v3 - name: Install dependencies run: | - sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config hdf5 + sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config libhdf5-dev - name: zlib run: | ./configure -i zlib || echo OK diff --git a/external/ezfio b/external/ezfio index d5805497..ed1df9f3 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 +Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c diff --git a/external/irpf90 b/external/irpf90 index 0007f72f..33ca5e10 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 +Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 From ee2c470054b76bffc680f54b960daae99ed9679d Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 10 Aug 2023 15:53:35 +0200 Subject: [PATCH 55/74] clarified the TC-CASSCF gradients --- src/casscf_tc_bi/grad_dm.irp.f | 17 +++++----- src/casscf_tc_bi/grad_old.irp.f | 42 ++++++++++++++----------- src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f | 22 ++++++------- 3 files changed, 45 insertions(+), 36 deletions(-) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index 7f6155ab..1618adc6 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -24,8 +24,7 @@ do a=1,n_virt_orb indx = mat_idx_c_v(i,a) aa=list_virt(a) - call gradvec_tc_ia(ii,aa,res_l) - call gradvec_tc_ia(aa,ii,res_r) + call gradvec_tc_ia(ii,aa,res_l,res_r) do fff = 0,3 gradvec_tc_l(fff,indx)=res_l(fff) gradvec_tc_r(fff,indx)=res_r(fff) @@ -41,17 +40,21 @@ end do END_PROVIDER -subroutine gradvec_tc_ia(i,a,res) +subroutine gradvec_tc_ia(i,a,res_l, res_r) implicit none BEGIN_DOC ! doubly occupied --> virtual TC gradient ! -! Corresponds to +! Corresponds to res_r = , +! +! res_l = END_DOC integer, intent(in) :: i,a - double precision, intent(out) :: res(0:3) - res = 0.d0 - res(1) = -2 * mo_bi_ortho_tc_one_e(i,a) + double precision, intent(out) :: res_l(0:3), res_r(0:3) + res_l = 0.d0 + res_r = 0.d0 + res_l(1) = -2 * mo_bi_ortho_tc_one_e(a,i) + res_r(1) = -2 * mo_bi_ortho_tc_one_e(i,a) end diff --git a/src/casscf_tc_bi/grad_old.irp.f b/src/casscf_tc_bi/grad_old.irp.f index ea6747b1..6c976d66 100644 --- a/src/casscf_tc_bi/grad_old.irp.f +++ b/src/casscf_tc_bi/grad_old.irp.f @@ -69,45 +69,51 @@ END_PROVIDER subroutine calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) BEGIN_DOC - ! eq 18 of Siegbahn et al, Physica Scripta 1980 - ! we calculate res_r = , and res_r = - ! q=hole, p=particle - ! res_l(0) = total matrix element - ! res_l(1) = one-electron part - ! res_l(2) = two-electron part - ! res_l(3) = three-electron part + ! Computes the gradient with respect to orbital rotation BRUT FORCE + ! + ! res_l = + ! + ! res_r = + ! + ! q=hole, p=particle. NOTE that on res_l it is E_qp and on res_r it is E_pq + ! + ! res_l(0) = total matrix element, res_l(1) = one-electron part, + ! + ! res_l(2) = two-electron part, res_l(3) = three-electron part + ! END_DOC implicit none integer, intent(in) :: ihole,ipart double precision, intent(out) :: res_l(0:3), res_r(0:3) integer :: mu,iii,ispin,ierr,nu,istate,ll integer(bit_kind), allocatable :: det_mu(:,:),det_mu_ex(:,:) - real*8 :: i_H_chi_array(0:3,N_states),i_H_phi_array(0:3,N_states),phase + real*8 :: chi_H_mu_ex_array(0:3,N_states),mu_ex_H_phi_array(0:3,N_states),phase allocate(det_mu(N_int,2)) allocate(det_mu_ex(N_int,2)) res_l=0.D0 res_r=0.D0 -! print*,'in i_h_psi' -! print*,ihole,ipart do mu=1,n_det - ! get the string of the determinant + ! get the string of the determinant |mu> call det_extract(det_mu,mu,N_int) do ispin=1,2 - ! do the monoexcitation on it + ! do the monoexcitation on it: |det_mu_ex> = a^dagger_{p,ispin} a_{q,ispin} |mu> call det_copy(det_mu,det_mu_ex,N_int) call do_signed_mono_excitation(det_mu,det_mu_ex,nu & ,ihole,ipart,ispin,phase,ierr) + ! |det_mu_ex> = a^dagger_{p,ispin} a_{q,ispin} |mu> if (ierr.eq.1) then - call i_H_tc_psi_phi(det_mu_ex,psi_det,psi_l_coef_bi_ortho,psi_r_coef_bi_ortho,N_int & - ,N_det,psi_det_size,N_states,i_H_chi_array,i_H_phi_array) -! print*,i_H_chi_array(1,1),i_H_phi_array(1,1) + ,N_det,psi_det_size,N_states,chi_H_mu_ex_array,mu_ex_H_phi_array) + ! chi_H_mu_ex_array = + ! mu_ex_H_phi_array = do istate=1,N_states - do ll = 0,3 - res_l(ll)+=i_H_phi_array(ll,istate)*psi_l_coef_bi_ortho(mu,istate)*phase - res_r(ll)+=i_H_chi_array(ll,istate)*psi_r_coef_bi_ortho(mu,istate)*phase + do ll = 0,3 ! loop over the body components (1e,2e,3e) + !res_l = \sum_mu c_mu^l = + res_l(ll)+= mu_ex_H_phi_array(ll,istate)*psi_l_coef_bi_ortho(mu,istate)*phase + !res_r = \sum_mu c_mu^r = + res_r(ll)+= chi_H_mu_ex_array(ll,istate)*psi_r_coef_bi_ortho(mu,istate)*phase enddo end do end if diff --git a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f index 8524253a..e96e738e 100644 --- a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f +++ b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f @@ -90,7 +90,7 @@ subroutine htcdag_bi_ortho_calc_tdav_slow(v, u, N_st, sze) end -subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,i_H_chi_array,i_H_phi_array) +subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,chi_H_i_array,i_H_phi_array) use bitmasks implicit none BEGIN_DOC @@ -116,7 +116,7 @@ subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,i_H_c integer(bit_kind), intent(in) :: keys(Nint,2,Ndet) integer(bit_kind), intent(in) :: key(Nint,2) double precision, intent(in) :: coef_l(Ndet_max,Nstate),coef_r(Ndet_max,Nstate) - double precision, intent(out) :: i_H_chi_array(0:3,Nstate),i_H_phi_array(0:3,Nstate) + double precision, intent(out) :: chi_H_i_array(0:3,Nstate),i_H_phi_array(0:3,Nstate) integer :: i, ii,j double precision :: phase @@ -131,7 +131,7 @@ subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,i_H_c ASSERT (Ndet_max >= Ndet) allocate(idx(0:Ndet)) - i_H_chi_array = 0.d0 + chi_H_i_array = 0.d0 i_H_phi_array = 0.d0 call filter_connected_i_H_psi0(keys,key,Nint,Ndet,idx) @@ -142,10 +142,10 @@ subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,i_H_c ! computes !DIR$ FORCEINLINE call htilde_mu_mat_opt_bi_ortho(keys(1,1,i), key, Nint, hmono, htwoe, hthree, htot) - i_H_chi_array(0,1) = i_H_chi_array(0,1) + coef_l(i,1)*htot - i_H_chi_array(1,1) = i_H_chi_array(1,1) + coef_l(i,1)*hmono - i_H_chi_array(2,1) = i_H_chi_array(2,1) + coef_l(i,1)*htwoe - i_H_chi_array(3,1) = i_H_chi_array(3,1) + coef_l(i,1)*hthree + chi_H_i_array(0,1) = chi_H_i_array(0,1) + coef_l(i,1)*htot + chi_H_i_array(1,1) = chi_H_i_array(1,1) + coef_l(i,1)*hmono + chi_H_i_array(2,1) = chi_H_i_array(2,1) + coef_l(i,1)*htwoe + chi_H_i_array(3,1) = chi_H_i_array(3,1) + coef_l(i,1)*hthree ! computes !DIR$ FORCEINLINE call htilde_mu_mat_opt_bi_ortho(key,keys(1,1,i), Nint, hmono, htwoe, hthree, htot) @@ -163,10 +163,10 @@ subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,i_H_c !DIR$ FORCEINLINE call htilde_mu_mat_opt_bi_ortho(keys(1,1,i), key, Nint, hmono, htwoe, hthree, htot) do j = 1, Nstate - i_H_chi_array(0,j) = i_H_chi_array(0,j) + coef_l(i,j)*htot - i_H_chi_array(1,j) = i_H_chi_array(1,j) + coef_l(i,j)*hmono - i_H_chi_array(2,j) = i_H_chi_array(2,j) + coef_l(i,j)*htwoe - i_H_chi_array(3,j) = i_H_chi_array(3,j) + coef_l(i,j)*hthree + chi_H_i_array(0,j) = chi_H_i_array(0,j) + coef_l(i,j)*htot + chi_H_i_array(1,j) = chi_H_i_array(1,j) + coef_l(i,j)*hmono + chi_H_i_array(2,j) = chi_H_i_array(2,j) + coef_l(i,j)*htwoe + chi_H_i_array(3,j) = chi_H_i_array(3,j) + coef_l(i,j)*hthree enddo ! computes !DIR$ FORCEINLINE From cc20c97eda6bdf4240c7d689022f37a867bce81a Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 10 Aug 2023 17:07:40 +0200 Subject: [PATCH 56/74] all the one-body gradients are correct for TC-CASSCF --- src/casscf_tc_bi/grad_dm.irp.f | 59 +++++++++++++++++++++++++++++++-- src/casscf_tc_bi/grad_old.irp.f | 22 +++++++----- 2 files changed, 69 insertions(+), 12 deletions(-) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index 1618adc6..00b20d41 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -1,11 +1,29 @@ BEGIN_PROVIDER [real*8, gradvec_tc_r, (0:3,nMonoEx)] &BEGIN_PROVIDER [real*8, gradvec_tc_l, (0:3,nMonoEx)] + BEGIN_DOC +! gradvec_tc_r(0:3,i) = +! +! gradvec_tc_l(0:3,i) = +! +! where the indices "i" corresponds to E_q^p(i) +! +! i = mat_idx_c_a(q,p) +! +! and gradvec_tc_r/l(0) = full matrix element +! +! gradvec_tc_r/l(1) = one-body part + +! gradvec_tc_r/l(2) = two-body part + +! gradvec_tc_r/l(3) = three-body part + END_DOC implicit none integer :: ii,tt,aa,indx integer :: i,t,a,fff double precision :: res_l(0:3), res_r(0:3) gradvec_tc_l = 0.d0 gradvec_tc_r = 0.d0 + ! computing the core/inactive --> virtual orbitals gradients do i=1,n_core_inact_orb ii=list_core_inact(i) do t=1,n_act_orb @@ -33,9 +51,15 @@ end do do t=1,n_act_orb + tt=list_act(t) do a=1,n_virt_orb - indx = mat_idx_a_v(i,a) -! gradvec_tc_l(indx)=gradvec_ta(t,a) + aa=list_virt(a) + indx = mat_idx_a_v(t,a) + call gradvec_tc_ta(tt,aa,res_l, res_r) + do fff = 0,3 + gradvec_tc_l(fff,indx)=res_l(fff) + gradvec_tc_r(fff,indx)=res_r(fff) + enddo end do end do END_PROVIDER @@ -65,7 +89,7 @@ subroutine gradvec_tc_it(i,t,res_l, res_r) ! ! Corresponds to res_r = ! -! res_l = +! res_l = END_DOC integer, intent(in) :: i,t double precision, intent(out) :: res_l(0:3),res_r(0:3) @@ -83,3 +107,32 @@ subroutine gradvec_tc_it(i,t,res_l, res_r) enddo end + +subroutine gradvec_tc_ta(t,a,res_l, res_r) + implicit none + BEGIN_DOC +! active --> virtual TC gradient +! +! Corresponds to res_r = +! +! res_l = + END_DOC + integer, intent(in) :: t,a + double precision, intent(out) :: res_l(0:3),res_r(0:3) + integer :: rr,r,m + double precision :: dm + res_r = 0.d0 + res_l = 0.d0 +! do rr = 1, n_act_orb +! r = list_act(rr) +! res_l(1) += mo_bi_ortho_tc_one_e(a,r) * tc_transition_matrix_mo(t,r,1,1) +! res_r(1) += -mo_bi_ortho_tc_one_e(r,a) * tc_transition_matrix_mo(r,t,1,1) +! enddo + do m = 1, mo_num + res_r(1) += mo_bi_ortho_tc_one_e(t,m) * tc_transition_matrix_mo(a,m,1,1) & + -mo_bi_ortho_tc_one_e(m,a) * tc_transition_matrix_mo(m,t,1,1) + res_l(1) += mo_bi_ortho_tc_one_e(a,m) * tc_transition_matrix_mo(t,m,1,1) & + -mo_bi_ortho_tc_one_e(m,t) * tc_transition_matrix_mo(m,a,1,1) + enddo + +end diff --git a/src/casscf_tc_bi/grad_old.irp.f b/src/casscf_tc_bi/grad_old.irp.f index 6c976d66..e8440513 100644 --- a/src/casscf_tc_bi/grad_old.irp.f +++ b/src/casscf_tc_bi/grad_old.irp.f @@ -38,15 +38,19 @@ enddo enddo enddo -! do indx=1,nMonoEx -! ihole=excit(1,indx) -! ipart=excit(2,indx) -! call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) -! do ll = 0, 3 -! gradvec_detail_left_old (ll,indx)=res_l(ll) -! gradvec_detail_right_old(ll,indx)=res_r(ll) -! enddo -! end do + + do tt = 1, n_act_orb + ihole = list_act(tt) + do aa = 1, n_virt_orb + ipart = list_virt(aa) + indx = mat_idx_a_v(tt,aa) + call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + do ll = 0, 3 + gradvec_detail_left_old (ll,indx)=res_l(ll) + gradvec_detail_right_old(ll,indx)=res_r(ll) + enddo + enddo + enddo real*8 :: norm_grad_left, norm_grad_right norm_grad_left=0.d0 From a15055e9648d65fd4c95513ccb9f7ed0b765199c Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 10 Aug 2023 17:26:33 +0200 Subject: [PATCH 57/74] optimization of one-body TC-CASSCF gradients --- src/casscf_tc_bi/grad_dm.irp.f | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index 00b20d41..6e06f6ce 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -93,17 +93,17 @@ subroutine gradvec_tc_it(i,t,res_l, res_r) END_DOC integer, intent(in) :: i,t double precision, intent(out) :: res_l(0:3),res_r(0:3) - integer :: rr,r,ss,s,m + integer :: rr,r,ss,s,m,mm double precision :: dm res_r = 0.d0 - do m = 1, mo_num - res_r(1) += mo_bi_ortho_tc_one_e(i,m) * tc_transition_matrix_mo(t,m,1,1) & - -mo_bi_ortho_tc_one_e(m,t) * tc_transition_matrix_mo(m,i,1,1) - enddo res_l = 0.d0 - do m = 1, mo_num - res_l(1) += mo_bi_ortho_tc_one_e(t,m) * tc_transition_matrix_mo(i,m,1,1) & - -mo_bi_ortho_tc_one_e(m,i) * tc_transition_matrix_mo(m,t,1,1) + res_r(1) += -2.d0 * mo_bi_ortho_tc_one_e(i,t) + res_l(1) += 2.D0 * mo_bi_ortho_tc_one_e(t,i) + + do rr = 1, n_act_orb + r = list_act(rr) + res_r(1) += mo_bi_ortho_tc_one_e(i,r) * tc_transition_matrix_mo(t,r,1,1) + res_l(1) += -mo_bi_ortho_tc_one_e(r,i) * tc_transition_matrix_mo(r,t,1,1) enddo end @@ -123,16 +123,10 @@ subroutine gradvec_tc_ta(t,a,res_l, res_r) double precision :: dm res_r = 0.d0 res_l = 0.d0 -! do rr = 1, n_act_orb -! r = list_act(rr) -! res_l(1) += mo_bi_ortho_tc_one_e(a,r) * tc_transition_matrix_mo(t,r,1,1) -! res_r(1) += -mo_bi_ortho_tc_one_e(r,a) * tc_transition_matrix_mo(r,t,1,1) -! enddo - do m = 1, mo_num - res_r(1) += mo_bi_ortho_tc_one_e(t,m) * tc_transition_matrix_mo(a,m,1,1) & - -mo_bi_ortho_tc_one_e(m,a) * tc_transition_matrix_mo(m,t,1,1) - res_l(1) += mo_bi_ortho_tc_one_e(a,m) * tc_transition_matrix_mo(t,m,1,1) & - -mo_bi_ortho_tc_one_e(m,t) * tc_transition_matrix_mo(m,a,1,1) + do rr = 1, n_act_orb + r = list_act(rr) + res_l(1) += mo_bi_ortho_tc_one_e(a,r) * tc_transition_matrix_mo(t,r,1,1) + res_r(1) += -mo_bi_ortho_tc_one_e(r,a) * tc_transition_matrix_mo(r,t,1,1) enddo end From 5dc4fb29284c496cf3b6921e9c9422e94b972a97 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 16 Aug 2023 14:06:29 +0200 Subject: [PATCH 58/74] naive two rdm in tc works for He in cisd and bi ortho orbitals --- src/tc_bi_ortho/test_tc_two_rdm.irp.f | 60 ++++++++++++ src/tc_bi_ortho/two_rdm_naive.irp.f | 132 ++++++++++++++++++++++++++ 2 files changed, 192 insertions(+) create mode 100644 src/tc_bi_ortho/test_tc_two_rdm.irp.f create mode 100644 src/tc_bi_ortho/two_rdm_naive.irp.f diff --git a/src/tc_bi_ortho/test_tc_two_rdm.irp.f b/src/tc_bi_ortho/test_tc_two_rdm.irp.f new file mode 100644 index 00000000..ecdeef43 --- /dev/null +++ b/src/tc_bi_ortho/test_tc_two_rdm.irp.f @@ -0,0 +1,60 @@ +program test_tc_rdm + + BEGIN_DOC + ! + ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together + ! with the energy. Saves the left-right wave functions at the end. + ! + END_DOC + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + read_wf = .True. + touch read_wf + + print*, ' nb of states = ', N_states + print*, ' nb of det = ', N_det + + call test() + +end + +subroutine test + implicit none + integer :: h1,p1,h2,p2,i,j,istate + double precision :: rdm, integral, accu,ref + double precision :: hmono, htwoe, hthree, htot + accu = 0.d0 + do h1 = 1, mo_num + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + integral = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + rdm = tc_two_rdm(p1,h1,p2,h2) + accu += integral * rdm + enddo + enddo + enddo + enddo + accu *= 0.5d0 + print*,'accu = ',accu +! print*,tc_two_rdm(1,1,1,1),mo_bi_ortho_tc_two_e(1,1,1,1) + ref = 0.d0 + do i = 1, N_det + do j = 1, N_det +! if(i.ne.j)cycle + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + do istate = 1,N_states +! print*,'i,j',i,j,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) * htwoe +! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) , htwoe + ref += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) * htwoe + enddo + enddo + enddo + print*,' ref = ',ref + +end diff --git a/src/tc_bi_ortho/two_rdm_naive.irp.f b/src/tc_bi_ortho/two_rdm_naive.irp.f new file mode 100644 index 00000000..9694c653 --- /dev/null +++ b/src/tc_bi_ortho/two_rdm_naive.irp.f @@ -0,0 +1,132 @@ +BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] + implicit none + BEGIN_DOC + ! tc_two_rdm(p,s,q,r) = psi_det(i) + call get_excitation(psi_det(1,1,j),psi_det(1,1,i),exc,degree,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * phase * state_average_weight(1) + do istate = 2, N_states + contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * phase * state_average_weight(istate) + enddo + if(degree == 2)then + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + else if(degree==1)then +! cycle + ! occupation of the determinant psi_det(j) + call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) + + ! run over the electrons of opposite spin than the excitation + s2 = other_spin(s1) + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + enddo + ! run over the electrons of same spin than the excitation + s2 = s1 + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + if(h2.le.h1)cycle + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + enddo + endif + else if(degree == 0)then + contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * state_average_weight(1) +! print*,'contrib',contrib + do istate = 2, N_states + contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) + enddo + ! occupation of the determinant psi_det(j) + call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) + s1 = 1 ! alpha electrons + do nn = 1, n_occ_ab(s1) + h1 = occ(nn,s1) + p1 = occ(nn,s1) + ! run over the couple of alpha-beta electrons + s2 = other_spin(s1) + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + enddo + ! run over the couple of alpha-alpha electrons + s2 = s1 + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + if(h2.le.h1)cycle + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + enddo + enddo + s1 = 2 + do nn = 1, n_occ_ab(s1) + h1 = occ(nn,s1) + p1 = occ(nn,s1) + ! run over the couple of beta-beta electrons + s2 = s1 + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + if(h2.le.h1)cycle + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + enddo + enddo + endif + enddo + enddo + +END_PROVIDER + +subroutine update_tc_rdm(h1,p1,h2,p2,s1,s2,array,sze,contrib) + implicit none + integer, intent(in) :: h1,p1,h2,p2,s1,s2,sze + double precision, intent(in) :: contrib + double precision, intent(inout) :: array(sze, sze, sze, sze) + integer :: istate + if(s1.ne.s2)then + array(p1,h1,p2,h2) += contrib + ! permutation for particle symmetry + array(p2,h2,p1,h1) += contrib + else ! same spin double excitation + array(p1,h1,p2,h2) += contrib + ! exchange + ! exchanging the holes + array(p2,h1,p1,h2) -= contrib + ! exchanging the particles + array(p1,h2,p2,h1) -= contrib + + ! permutation for particle symmetry + array(p2,h2,p1,h1) += contrib + ! exchange + ! exchanging the holes + array(p1,h2,p2,h1) -= contrib + ! exchanging the particles + array(p2,h1,p1,h2) -= contrib + endif + +end From 0ffaf820a2bc156e021e0f68bddf119e04ce82b5 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 16 Aug 2023 18:07:50 +0200 Subject: [PATCH 59/74] tc two rdm works for O CISD in biorthonormal basis --- src/tc_bi_ortho/test_tc_two_rdm.irp.f | 19 ++++++++++++++++--- src/tc_bi_ortho/two_rdm_naive.irp.f | 14 +++----------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/tc_bi_ortho/test_tc_two_rdm.irp.f b/src/tc_bi_ortho/test_tc_two_rdm.irp.f index ecdeef43..3e556312 100644 --- a/src/tc_bi_ortho/test_tc_two_rdm.irp.f +++ b/src/tc_bi_ortho/test_tc_two_rdm.irp.f @@ -35,6 +35,10 @@ subroutine test do p2 = 1, mo_num integral = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) rdm = tc_two_rdm(p1,h1,p2,h2) +! if(dabs(rdm).gt.1.d-10)then +! print*,h1,p1,h2,p2 +! print*,rdm,integral,rdm*integral +! endif accu += integral * rdm enddo enddo @@ -42,19 +46,28 @@ subroutine test enddo accu *= 0.5d0 print*,'accu = ',accu -! print*,tc_two_rdm(1,1,1,1),mo_bi_ortho_tc_two_e(1,1,1,1) +! print*,mo_bi_ortho_tc_two_e(2,15,2,1) +! print*,mo_bi_ortho_tc_two_e(15,2,2,1) +! print*,mo_bi_ortho_tc_two_e(2,1,2,15) +! print*,mo_bi_ortho_tc_two_e(2,1,15,2) ref = 0.d0 do i = 1, N_det do j = 1, N_det -! if(i.ne.j)cycle +! if(i.eq.j)cycle call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) do istate = 1,N_states -! print*,'i,j',i,j,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) * htwoe +! print*,'i,j',i,j +! print*,psi_l_coef_bi_ortho(i,istate) , psi_r_coef_bi_ortho(j,istate) , htwoe +! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * htwoe +! if(i.ne.j)then ! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) , htwoe +! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * htwoe +! endif ref += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) * htwoe enddo enddo enddo print*,' ref = ',ref + print*,'delta= ',ref-accu end diff --git a/src/tc_bi_ortho/two_rdm_naive.irp.f b/src/tc_bi_ortho/two_rdm_naive.irp.f index 9694c653..8fd34975 100644 --- a/src/tc_bi_ortho/two_rdm_naive.irp.f +++ b/src/tc_bi_ortho/two_rdm_naive.irp.f @@ -30,7 +30,6 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] if(degree == 2)then call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) else if(degree==1)then -! cycle ! occupation of the determinant psi_det(j) call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) @@ -48,13 +47,12 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] m = occ(mm,s2) h2 = m p2 = m - if(h2.le.h1)cycle call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) enddo endif else if(degree == 0)then +! cycle contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * state_average_weight(1) -! print*,'contrib',contrib do istate = 2, N_states contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) enddo @@ -115,18 +113,12 @@ subroutine update_tc_rdm(h1,p1,h2,p2,s1,s2,array,sze,contrib) else ! same spin double excitation array(p1,h1,p2,h2) += contrib ! exchange - ! exchanging the holes - array(p2,h1,p1,h2) -= contrib ! exchanging the particles + array(p2,h1,p1,h2) -= contrib + ! exchanging the array(p1,h2,p2,h1) -= contrib - ! permutation for particle symmetry array(p2,h2,p1,h1) += contrib - ! exchange - ! exchanging the holes - array(p1,h2,p2,h1) -= contrib - ! exchanging the particles - array(p2,h1,p1,h2) -= contrib endif end From 6b56e213d89cd7ee07a33ec460ed7613fb9a28e6 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 17 Aug 2023 18:17:46 +0200 Subject: [PATCH 60/74] right two-body inactive-virtual gradients implemented --- src/casscf_tc_bi/grad_dm.irp.f | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index 6e06f6ce..77336c93 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -79,7 +79,22 @@ subroutine gradvec_tc_ia(i,a,res_l, res_r) res_r = 0.d0 res_l(1) = -2 * mo_bi_ortho_tc_one_e(a,i) res_r(1) = -2 * mo_bi_ortho_tc_one_e(i,a) - + integer :: j,t,r,jj,tt,rr + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + res_r(2) += -2.d0 * ( 2.d0 * mo_bi_ortho_tc_two_e(j,i,j,a) - mo_bi_ortho_tc_two_e(i,j,j,a)) + res_l(2) += -2.d0 * ( 2.d0 * mo_bi_ortho_tc_two_e(j,a,j,i) - mo_bi_ortho_tc_two_e(j,a,i,j)) + enddo + do tt = 1, n_act_orb + t = list_act(tt) + do rr = 1, n_act_orb + r = list_act(rr) + res_r(2) += -0.5d0 * ( & + tc_transition_matrix_mo(r,t,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,i,t,a) - mo_bi_ortho_tc_two_e(i,r,t,a)) & + +tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,i,r,a) - mo_bi_ortho_tc_two_e(i,t,r,a)) & + ) + enddo + enddo end subroutine gradvec_tc_it(i,t,res_l, res_r) From a4d7648bb011ef80dd210baf411f1a74a67671a0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 21 Aug 2023 09:56:07 +0200 Subject: [PATCH 61/74] Fix segfault in cholesky due to array syntax --- src/ao_two_e_ints/cholesky.irp.f | 164 +++++++++++++------------ src/ccsd/ccsd_space_orb_sub_chol.irp.f | 51 ++++---- 2 files changed, 105 insertions(+), 110 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 175ccf6e..2977f0f4 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -23,38 +23,38 @@ END_PROVIDER ! ! Last dimension of cholesky_ao is cholesky_ao_num END_DOC - + integer :: rank, ndim double precision :: tau double precision, pointer :: L(:,:), L_old(:,:) - - + + double precision :: s double precision, parameter :: dscale = 1.d0 - + double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) integer, allocatable :: Lset(:), Dset(:), addr(:,:) logical, allocatable :: computed(:) - + integer :: i,j,k,m,p,q, qj, dj, p2, q2 integer :: N, np, nq - + double precision :: Dmax, Dmin, Qmax, f double precision, external :: get_ao_two_e_integral logical, external :: ao_two_e_integral_zero - + double precision, external :: ao_two_e_integral integer :: block_size, iblock, ierr - + double precision :: mem double precision, external :: memory_of_double, memory_of_int - + integer, external :: getUnitAndOpen integer :: iunit - + ndim = ao_num*ao_num deallocate(cholesky_ao) - + if (read_ao_cholesky) then print *, 'Reading Cholesky vectors from disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'R') @@ -63,11 +63,11 @@ END_PROVIDER read(iunit) cholesky_ao close(iunit) cholesky_ao_num = rank - + else - + PROVIDE nucl_coord - + if (do_direct_integrals) then if (ao_two_e_integral(1,1,1,1) < huge(1.d0)) then ! Trigger providers inside ao_two_e_integral @@ -76,16 +76,16 @@ END_PROVIDER else PROVIDE ao_two_e_integrals_in_map endif - + tau = ao_cholesky_threshold - + mem = 6.d0 * memory_of_double(ndim) + 6.d0 * memory_of_int(ndim) call check_mem(mem, irp_here) - + call print_memory_usage() allocate(L(ndim,1)) - + print *, '' print *, 'Cholesky decomposition of AO integrals' print *, '======================================' @@ -93,13 +93,13 @@ END_PROVIDER print *, '============ =============' print *, ' Rank Threshold' print *, '============ =============' - - + + rank = 0 - + allocate( D(ndim), Lset(ndim), Dset(ndim) ) allocate( addr(3,ndim) ) - + ! 1. k=0 do j=1,ao_num @@ -110,7 +110,7 @@ END_PROVIDER addr(3,k) = (i-1)*ao_num + j enddo enddo - + if (do_direct_integrals) then !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) do i=1,ndim @@ -127,9 +127,9 @@ END_PROVIDER enddo !$OMP END PARALLEL DO endif - + Dmax = maxval(D) - + ! 2. np=0 do p=1,ndim @@ -138,26 +138,26 @@ END_PROVIDER Lset(np) = p endif enddo - + ! 3. N = 0 - + ! 4. i = 0 - + ! 5. do while ( (Dmax > tau).and.(rank < ndim) ) ! a. i = i+1 - + s = 0.01d0 - + ! Inrease s until the arrays fit in memory do while (.True.) - + ! b. Dmin = max(s*Dmax,tau) - + ! c. nq=0 do p=1,np @@ -166,30 +166,30 @@ END_PROVIDER Dset(nq) = Lset(p) endif enddo - + call total_memory(mem) mem = mem & + np*memory_of_double(nq) &! Delta(np,nq) + (rank+nq)* memory_of_double(ndim) &! L(ndim,rank+nq) + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) - + if (mem > qp_max_mem) then s = s*2.d0 else exit endif - + if ((s > 1.d0).or.(nq == 0)) then call print_memory_usage() print *, 'Not enough memory. Reduce cholesky threshold' stop -1 endif - + enddo - + ! d., e. block_size = max(N,24) - + L_old => L allocate(L(ndim,rank+nq), stat=ierr) if (ierr /= 0) then @@ -197,48 +197,52 @@ END_PROVIDER print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' stop -1 endif - - !$OMP PARALLEL DO PRIVATE(k) + + !$OMP PARALLEL DO PRIVATE(k,j) do k=1,rank - L(:,k) = L_old(:,k) + do j=1,ndim + L(j,k) = L_old(j,k) + enddo enddo !$OMP END PARALLEL DO - + deallocate(L_old) - + allocate(Delta(np,nq), stat=ierr) if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Delta(np,nq))' stop -1 endif - + allocate(Ltmp_p(np,block_size), stat=ierr) if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' stop -1 endif - + allocate(Ltmp_q(nq,block_size), stat=ierr) if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' stop -1 endif - - + + allocate(computed(nq)) - + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) - + !$OMP DO do q=1,nq - Delta(:,q) = 0.d0 + do j=1,np + Delta(j,q) = 0.d0 + enddo computed(q) = .False. enddo !$OMP ENDDO NOWAIT - + !$OMP DO do k=1,N do p=1,np @@ -249,36 +253,36 @@ END_PROVIDER enddo enddo !$OMP END DO NOWAIT - + !$OMP BARRIER !$OMP END PARALLEL - + if (N>0) then call dgemm('N','T', np, nq, N, -1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) endif - + ! f. Qmax = D(Dset(1)) do q=1,nq Qmax = max(Qmax, D(Dset(q))) enddo - + ! g. - + iblock = 0 do j=1,nq - + if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit ! i. rank = N+j - + if (iblock == block_size) then call dgemm('N','T',np,nq,block_size,-1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) iblock = 0 endif - + ! ii. do dj=1,nq qj = Dset(dj) @@ -286,9 +290,9 @@ END_PROVIDER exit endif enddo - + L(1:ndim, rank) = 0.d0 - + if (.not.computed(dj)) then m = dj !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(guided) @@ -314,16 +318,16 @@ END_PROVIDER do p=1,np Ltmp_p(p,iblock) = Delta(p,dj) enddo - + ! iv. if (iblock > 1) then call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0,& Ltmp_p(1,iblock), 1) endif - + ! iii. f = 1.d0/dsqrt(Qmax) - + !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) !$OMP DO do p=1,np @@ -332,38 +336,38 @@ END_PROVIDER D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,iblock) * Ltmp_p(p,iblock) enddo !$OMP END DO - + !$OMP DO do q=1,nq Ltmp_q(q,iblock) = L(Dset(q), rank) enddo !$OMP END DO - + !$OMP END PARALLEL - + Qmax = D(Dset(1)) do q=1,nq Qmax = max(Qmax, D(Dset(q))) enddo - + enddo - + print '(I10, 4X, ES12.3)', rank, Qmax - + deallocate(computed) deallocate(Delta) deallocate(Ltmp_p) deallocate(Ltmp_q) - + ! i. N = rank - + ! j. Dmax = D(Lset(1)) do p=1,np Dmax = max(Dmax, D(Lset(p))) enddo - + np=0 do p=1,ndim if ( dscale*dscale*Dmax*D(p) > tau*tau ) then @@ -371,9 +375,9 @@ END_PROVIDER Lset(np) = p endif enddo - + enddo - + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) if (ierr /= 0) then call print_memory_usage() @@ -387,10 +391,10 @@ END_PROVIDER !$OMP END PARALLEL DO deallocate(L) cholesky_ao_num = rank - + print *, '============ =============' print *, '' - + if (write_ao_cholesky) then print *, 'Writing Cholesky vectors to disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'W') @@ -401,9 +405,9 @@ END_PROVIDER endif endif - + print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' print *, '' - + END_PROVIDER - + diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 0ba46e56..54ebff73 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -252,7 +252,8 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) do a = 1, nV do j = 1, nO do i = 1, nO - W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) +! W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) + W_oovo(i,j,a,u) = 2d0 * cc_space_v_oovo(i,j,a,u) - cc_space_v_oovo(j,i,a,u) enddo enddo enddo @@ -514,10 +515,10 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, & 0.d0, tmpB1, nV*block_size) - call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, 1.d0, & - cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & - tmp_cc2, cholesky_mo_num, & - 1.d0, tmpB1, nV*block_size) + call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & + 1.d0, cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & + tmp_cc2, cholesky_mo_num, & + 1.d0, tmpB1, nV*block_size) do beta = iblock, min(nV, iblock+block_size-1) do b = 1, nV @@ -1107,37 +1108,27 @@ subroutine compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) double precision, intent(in) :: t2(nO, nO, nV, nV) double precision, intent(out) :: g_occ(nO, nO) - integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + g_occ = H_oo call dgemm('N','N',nO,nO,nV, & 1d0, t1, size(t1,1), & cc_space_f_vo, size(cc_space_f_vo,1), & - 0d0, g_occ, size(g_occ,1)) + 1d0, g_occ, size(g_occ,1)) - !$omp parallel & - !$omp shared(nO,nV,g_occ,H_oo, cc_space_v_ovoo,t1) & - !$omp private(i,j,a,u) & - !$omp default(none) - !$omp do - do i = 1, nO - do u = 1, nO - g_occ(u,i) = g_occ(u,i) + H_oo(u,i) - enddo - enddo - !$omp end do + double precision, allocatable :: X(:) + allocate(X(cholesky_mo_num)) + call dgemv('N',cholesky_mo_num,nO*nV,2.d0, & + cc_space_v_ov_chol, cholesky_mo_num, & + t1, 1, 0.d0, X, 1) - !$omp do - do i = 1, nO - do j = 1, nO - do a = 1, nV - do u = 1, nO - g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel + call dgemv('T',cholesky_mo_num,nO*nO,1.d0, & + cc_space_v_oo_chol, cholesky_mo_num, & + X, 1, 1.d0, g_occ, 1) + deallocate(X) + + call dgemv('T',nO*nV,nO*nO,-1.d0, & + cc_space_v_ovoo, nO*nV, & + t1, 1, 1.d0, g_occ, 1) end From e2416a2d680e84137d6a44d335ca09f39bda5fad Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 21 Aug 2023 10:05:48 +0200 Subject: [PATCH 62/74] Fix openMP compilation in CCSD --- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 01068f4f..b59dc0bb 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -244,7 +244,7 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) allocate(W_oovo(nO,nO,nV,nO)) !$omp parallel & - !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) & + !$omp shared(nO,nV,cc_space_v_oovo,W_oovo) & !$omp private(u,a,i,j) & !$omp default(none) do u = 1, nO From 30c238656f03e5c4b45fc31cb4f67dc3231c2d2d Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 21 Aug 2023 10:06:58 +0200 Subject: [PATCH 63/74] Gradient for inactive-->virtual work --- src/casscf_tc_bi/grad_dm.irp.f | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index 77336c93..d7d7046d 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -93,6 +93,10 @@ subroutine gradvec_tc_ia(i,a,res_l, res_r) tc_transition_matrix_mo(r,t,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,i,t,a) - mo_bi_ortho_tc_two_e(i,r,t,a)) & +tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,i,r,a) - mo_bi_ortho_tc_two_e(i,t,r,a)) & ) + res_l(2) += -0.5d0 * ( & + tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,a,r,i) - mo_bi_ortho_tc_two_e(t,a,i,r)) & + +tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,a,t,i) - mo_bi_ortho_tc_two_e(r,a,i,t)) & + ) enddo enddo end From b2fa6b0b9c0da217d87ff7fc348fef87a76a0e3b Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 21 Aug 2023 11:17:58 +0200 Subject: [PATCH 64/74] inactive --> active gradient are OK for real ! --- src/casscf_tc_bi/grad_dm.irp.f | 27 +++++++++++++++++++-- src/tc_bi_ortho/two_rdm_naive.irp.f | 37 ++++++++++++++++++++++------- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index d7d7046d..f62acbdd 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -112,8 +112,7 @@ subroutine gradvec_tc_it(i,t,res_l, res_r) END_DOC integer, intent(in) :: i,t double precision, intent(out) :: res_l(0:3),res_r(0:3) - integer :: rr,r,ss,s,m,mm - double precision :: dm + integer :: rr,r,j,jj,u,uu,v,vv res_r = 0.d0 res_l = 0.d0 res_r(1) += -2.d0 * mo_bi_ortho_tc_one_e(i,t) @@ -124,6 +123,30 @@ subroutine gradvec_tc_it(i,t,res_l, res_r) res_r(1) += mo_bi_ortho_tc_one_e(i,r) * tc_transition_matrix_mo(t,r,1,1) res_l(1) += -mo_bi_ortho_tc_one_e(r,i) * tc_transition_matrix_mo(r,t,1,1) enddo + + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + res_r(2) += 2.d0 * (2d0 * mo_bi_ortho_tc_two_e(i,j,t,j) - mo_bi_ortho_tc_two_e(j,i,t,j)) + do rr = 1, n_act_orb + r = list_act(rr) + res_r(2) += tc_transition_matrix_mo(t,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(i,j,r,j) - mo_bi_ortho_tc_two_e(i,j,j,r)) + enddo + enddo + do rr = 1, n_act_orb + r = list_act(rr) + do uu = 1, n_act_orb + u = list_act(uu) + res_r(2) += -0.5d0 * ( & + tc_transition_matrix_mo(u,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(u,i,r,t) - mo_bi_ortho_tc_two_e(u,i,t,r)) & + + tc_transition_matrix_mo(r,u,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(i,r,t,u) - mo_bi_ortho_tc_two_e(i,r,u,t)) & + ) + do vv = 1, n_act_orb + v = list_act(vv) + res_r(2) += 0.5d0 * ( & + mo_bi_ortho_tc_two_e(i,r,v,u) * tc_two_rdm(t,r,v,u) + mo_bi_ortho_tc_two_e(r,i,v,u) * tc_two_rdm(r,t,v,u) ) + enddo + enddo + enddo end diff --git a/src/tc_bi_ortho/two_rdm_naive.irp.f b/src/tc_bi_ortho/two_rdm_naive.irp.f index 8fd34975..3963d09e 100644 --- a/src/tc_bi_ortho/two_rdm_naive.irp.f +++ b/src/tc_bi_ortho/two_rdm_naive.irp.f @@ -1,7 +1,7 @@ -BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] +BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, mo_num)] implicit none BEGIN_DOC - ! tc_two_rdm(p,s,q,r) = = CHEMIST NOTATION END_DOC integer :: i,j,istate,m,mm,nn integer :: exc(0:2,2,2) @@ -13,7 +13,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] other_spin(1) = 2 other_spin(2) = 1 allocate(occ(N_int*bit_kind_size,2)) - tc_two_rdm = 0.d0 + tc_two_rdm_chemist = 0.d0 do i = 1, N_det ! psi_left do j = 1, N_det ! psi_right @@ -28,7 +28,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * phase * state_average_weight(istate) enddo if(degree == 2)then - call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) else if(degree==1)then ! occupation of the determinant psi_det(j) call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) @@ -39,7 +39,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] m = occ(mm,s2) h2 = m p2 = m - call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) enddo ! run over the electrons of same spin than the excitation s2 = s1 @@ -47,7 +47,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] m = occ(mm,s2) h2 = m p2 = m - call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) enddo endif else if(degree == 0)then @@ -68,7 +68,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] m = occ(mm,s2) h2 = m p2 = m - call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) enddo ! run over the couple of alpha-alpha electrons s2 = s1 @@ -77,7 +77,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] h2 = m p2 = m if(h2.le.h1)cycle - call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) enddo enddo s1 = 2 @@ -91,7 +91,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] h2 = m p2 = m if(h2.le.h1)cycle - call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) enddo enddo endif @@ -122,3 +122,22 @@ subroutine update_tc_rdm(h1,p1,h2,p2,s1,s2,array,sze,contrib) endif end + + +BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] + implicit none + BEGIN_DOC + ! tc_two_rdm(p,q,s,r) = = PHYSICIST NOTATION + END_DOC + integer :: p,q,r,s + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + do p = 1, mo_num + tc_two_rdm(p,q,s,r) = tc_two_rdm_chemist(p,s,q,r) + enddo + enddo + enddo + enddo + +END_PROVIDER From 7afc7e5fa4f645f36d8c2100364b792889f3434b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 22 Aug 2023 10:49:59 +0200 Subject: [PATCH 65/74] OpenMP in MO optimization --- external/ezfio | 2 +- external/irpf90 | 2 +- ...optimization.irp.f => cipsi_orb_opt.irp.f} | 2 +- .../state_average_energy.irp.f | 17 +- src/two_body_rdm/state_av_act_2rdm.irp.f | 10 +- .../state_av_full_orb_2_rdm.irp.f | 320 +++++++++++------- src/two_body_rdm/two_e_dm_mo.irp.f | 5 +- 7 files changed, 231 insertions(+), 127 deletions(-) rename src/mo_optimization/{optimization.irp.f => cipsi_orb_opt.irp.f} (96%) diff --git a/external/ezfio b/external/ezfio index ed1df9f3..d5805497 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c +Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 diff --git a/external/irpf90 b/external/irpf90 index 33ca5e10..0007f72f 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 +Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 diff --git a/src/mo_optimization/optimization.irp.f b/src/mo_optimization/cipsi_orb_opt.irp.f similarity index 96% rename from src/mo_optimization/optimization.irp.f rename to src/mo_optimization/cipsi_orb_opt.irp.f index 9892b3e3..ae3aa1bf 100644 --- a/src/mo_optimization/optimization.irp.f +++ b/src/mo_optimization/cipsi_orb_opt.irp.f @@ -15,7 +15,7 @@ subroutine run_optimization logical :: not_converged character (len=100) :: filename - PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map ao_pseudo_integrals not_converged = .True. nb_iter = 0 diff --git a/src/mo_optimization/state_average_energy.irp.f b/src/mo_optimization/state_average_energy.irp.f index 2cd063da..05aec18a 100644 --- a/src/mo_optimization/state_average_energy.irp.f +++ b/src/mo_optimization/state_average_energy.irp.f @@ -39,17 +39,24 @@ subroutine state_average_energy(energy) double precision :: get_two_e_integral double precision :: mono_e, bi_e integer :: i,j,k,l - + + energy = nuclear_repulsion ! mono electronic part + !$OMP PARALLEL DEFAULT(NONE) PRIVATE(i,j,k,l,mono_e, bi_e) & + !$OMP SHARED(mo_num, mo_integrals_map, two_e_dm_mo, one_e_dm_mo, energy, & + !$OMP mo_one_e_integrals) mono_e = 0d0 + !$OMP DO do j = 1, mo_num do i = 1, mo_num mono_e = mono_e + mo_one_e_integrals(i,j) * one_e_dm_mo(i,j) enddo enddo + !$OMP END DO NOWAIT ! bi electronic part bi_e = 0d0 + !$OMP DO do l = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -59,13 +66,17 @@ subroutine state_average_energy(energy) enddo enddo enddo + !$OMP END DO ! State average energy - energy = mono_e + 0.5d0 * bi_e + nuclear_repulsion + !$OMP CRITICAL + energy = energy + mono_e + 0.5d0 * bi_e + !$OMP END CRITICAL + !$OMP END PARALLEL ! Check !call print_energy_components - + print*,'State average energy:', energy !print*,ci_energy diff --git a/src/two_body_rdm/state_av_act_2rdm.irp.f b/src/two_body_rdm/state_av_act_2rdm.irp.f index cd417a9d..ea636212 100644 --- a/src/two_body_rdm/state_av_act_2rdm.irp.f +++ b/src/two_body_rdm/state_av_act_2rdm.irp.f @@ -17,12 +17,12 @@ state_weights = state_average_weight integer :: ispin ! condition for alpha/beta spin - print*,'' - print*,'' - print*,'' - print*,'providint state_av_act_2_rdm_ab_mo ' +! print*,'' +! print*,'' +! print*,'' +! print*,'Providing state_av_act_2_rdm_ab_mo ' ispin = 3 - print*,'ispin = ',ispin +! print*,'ispin = ',ispin state_av_act_2_rdm_ab_mo = 0.d0 call wall_time(wall_1) double precision :: wall_1, wall_2 diff --git a/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f b/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f index 2e44665d..5fb9e475 100644 --- a/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f +++ b/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f @@ -4,7 +4,7 @@ state_av_full_occ_2_rdm_ab_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_ab_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/beta + beta/alpha electrons +! state_av_full_occ_2_rdm_ab_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/beta + beta/alpha electrons ! ! = \sum_{istate} w(istate) * ! @@ -12,11 +12,19 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * N_{\beta} * 2 ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" -! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC + PROVIDE n_core_orb list_core state_av_full_occ_2_rdm_ab_mo = 0.d0 + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_ab_mo, state_av_full_occ_2_rdm_ab_mo) + + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -25,15 +33,17 @@ korb = list_act(k) do l = 1, n_act_orb lorb = list_act(l) - ! alph beta alph beta - state_av_full_occ_2_rdm_ab_mo(lorb,korb,jorb,iorb) = & + ! alph beta alph beta + state_av_full_occ_2_rdm_ab_mo(lorb,korb,jorb,iorb) = & state_av_act_2_rdm_ab_mo(l,k,j,i) enddo enddo enddo enddo - !! BETA ACTIVE - ALPHA inactive - !! + !$OMP END DO + !! BETA ACTIVE - ALPHA inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -45,9 +55,11 @@ enddo enddo enddo + !$OMP END DO - !! ALPHA ACTIVE - BETA inactive - !! + !! ALPHA ACTIVE - BETA inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -59,9 +71,11 @@ enddo enddo enddo + !$OMP END DO - !! ALPHA INACTIVE - BETA INACTIVE - !! + !! ALPHA INACTIVE - BETA INACTIVE + !! + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb @@ -70,13 +84,15 @@ state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb) = 2.D0 enddo enddo + !$OMP END DO !!!!!!!!!!!! -!!!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!!!! CAN BE USED +!!!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!!!! CAN BE USED if (.not.no_core_density)then - !! BETA ACTIVE - ALPHA CORE - !! + !! BETA ACTIVE - ALPHA CORE + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -88,9 +104,11 @@ enddo enddo enddo - + !$OMP END DO + !! ALPHA ACTIVE - BETA CORE - !! + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -102,9 +120,11 @@ enddo enddo enddo + !$OMP END DO - !! ALPHA CORE - BETA CORE - !! + !! ALPHA CORE - BETA CORE + !! + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb @@ -113,9 +133,11 @@ state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb) = 2.D0 enddo enddo + !$OMP END DO endif - END_PROVIDER + !$OMP END PARALLEL + END_PROVIDER BEGIN_PROVIDER [double precision, state_av_full_occ_2_rdm_aa_mo, (n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)] @@ -123,7 +145,7 @@ state_av_full_occ_2_rdm_aa_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_aa_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/alpha electrons +! state_av_full_occ_2_rdm_aa_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/alpha electrons ! ! = \sum_{istate} w(istate) * ! @@ -131,13 +153,20 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * (N_{\alpha} - 1) ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" ! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC + PROVIDE n_core_orb list_core + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_aa_mo, state_av_full_occ_2_rdm_aa_mo) !! PURE ACTIVE PART ALPHA-ALPHA - !! + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -152,74 +181,84 @@ enddo enddo enddo - !! ALPHA ACTIVE - ALPHA inactive - !! + !$OMP END DO + !! ALPHA ACTIVE - ALPHA inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo + !$OMP END DO - !! ALPHA INACTIVE - ALPHA INACTIVE + !! ALPHA INACTIVE - ALPHA INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO !!!!!!!!!! -!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!! CAN BE USED +!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!! CAN BE USED if (.not.no_core_density)then - !! ALPHA ACTIVE - ALPHA CORE + !! ALPHA ACTIVE - ALPHA CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA CORE - ALPHA CORE - + !$OMP END DO + !! ALPHA CORE - ALPHA CORE + + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif - END_PROVIDER + !$OMP END PARALLEL + END_PROVIDER BEGIN_PROVIDER [double precision, state_av_full_occ_2_rdm_bb_mo, (n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)] implicit none state_av_full_occ_2_rdm_bb_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons +! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons ! ! = \sum_{istate} w(istate) * ! @@ -227,13 +266,20 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\beta} * (N_{\beta} - 1) ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" ! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC + PROVIDE n_core_orb list_core + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_bb_mo, state_av_full_occ_2_rdm_bb_mo) !! PURE ACTIVE PART beta-beta - !! + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -242,80 +288,90 @@ korb = list_act(k) do l = 1, n_act_orb lorb = list_act(l) - state_av_full_occ_2_rdm_bb_mo(lorb,korb,jorb,iorb) = & + state_av_full_occ_2_rdm_bb_mo(lorb,korb,jorb,iorb) = & state_av_act_2_rdm_bb_mo(l,k,j,i) enddo enddo enddo enddo - !! beta ACTIVE - beta inactive - !! + !$OMP END DO + !! beta ACTIVE - beta inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo + !$OMP END DO - !! beta INACTIVE - beta INACTIVE + !! beta INACTIVE - beta INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO !!!!!!!!!!!! -!!!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!!!! CAN BE USED +!!!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!!!! CAN BE USED if (.not.no_core_density)then - !! beta ACTIVE - beta CORE + !! beta ACTIVE - beta CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo - !! beta CORE - beta CORE - + !$OMP END DO + !! beta CORE - beta CORE + + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif + !$OMP END PARALLEL - END_PROVIDER + END_PROVIDER BEGIN_PROVIDER [double precision, state_av_full_occ_2_rdm_spin_trace_mo, (n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)] implicit none state_av_full_occ_2_rdm_spin_trace_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons +! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons ! ! = \sum_{istate} w(istate) * \sum_{sigma,sigma'} ! @@ -324,14 +380,22 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{elec} * (N_{elec} - 1) ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" ! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + PROVIDE n_core_orb list_core + + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_spin_trace_mo, state_av_full_occ_2_rdm_spin_trace_mo) + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !! PURE ACTIVE PART SPIN-TRACE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -340,128 +404,146 @@ korb = list_act(k) do l = 1, n_act_orb lorb = list_act(l) - state_av_full_occ_2_rdm_spin_trace_mo(lorb,korb,jorb,iorb) += & + state_av_full_occ_2_rdm_spin_trace_mo(lorb,korb,jorb,iorb) += & state_av_act_2_rdm_spin_trace_mo(l,k,j,i) enddo enddo enddo enddo + !$OMP END DO - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !!!!! BETA-BETA !!!!! - !! beta ACTIVE - beta inactive + !! beta ACTIVE - beta inactive + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo - !! beta INACTIVE - beta INACTIVE + !$OMP END DO + !! beta INACTIVE - beta INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO if (.not.no_core_density)then - !! beta ACTIVE - beta CORE + !! beta ACTIVE - beta CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo - !! beta CORE - beta CORE + !$OMP END DO + !! beta CORE - beta CORE + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !!!!! ALPHA-ALPHA !!!!! - !! ALPHA ACTIVE - ALPHA inactive + !! ALPHA ACTIVE - ALPHA inactive + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA INACTIVE - ALPHA INACTIVE + !$OMP END DO + !! ALPHA INACTIVE - ALPHA INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO if (.not.no_core_density)then - !! ALPHA ACTIVE - ALPHA CORE + !! ALPHA ACTIVE - ALPHA CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA CORE - ALPHA CORE + !$OMP END DO + !! ALPHA CORE - ALPHA CORE + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !!!!! ALPHA-BETA + BETA-ALPHA !!!!! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -474,14 +556,16 @@ ! beta alph beta alph state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) ! BETA INACTIVE - ALPHA ACTIVE - ! beta alph beta alpha + ! beta alph beta alpha state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! alph beta alph beta + ! alph beta alph beta state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA INACTIVE - BETA INACTIVE + !$OMP END DO + !! ALPHA INACTIVE - BETA INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb @@ -491,31 +575,35 @@ state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb) += 1.0d0 enddo enddo + !$OMP END DO !!!!!!!!!!!! -!!!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!!!! CAN BE USED +!!!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!!!! CAN BE USED if (.not.no_core_density)then + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - !! BETA ACTIVE - ALPHA CORE + !! BETA ACTIVE - ALPHA CORE ! alph beta alph beta state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0D0 * one_e_dm_mo_beta_average(jorb,iorb) - ! beta alph beta alph + ! beta alph beta alph state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0D0 * one_e_dm_mo_beta_average(jorb,iorb) - !! ALPHA ACTIVE - BETA CORE + !! ALPHA ACTIVE - BETA CORE ! alph beta alph beta state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0D0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! beta alph beta alph + ! beta alph beta alph state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0D0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA CORE - BETA CORE + !$OMP END DO + !! ALPHA CORE - BETA CORE + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb @@ -525,7 +613,9 @@ state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb) += 1.0D0 enddo enddo + !$OMP END DO endif + !$OMP END PARALLEL - END_PROVIDER + END_PROVIDER diff --git a/src/two_body_rdm/two_e_dm_mo.irp.f b/src/two_body_rdm/two_e_dm_mo.irp.f index 99be1f54..04c44f61 100644 --- a/src/two_body_rdm/two_e_dm_mo.irp.f +++ b/src/two_body_rdm/two_e_dm_mo.irp.f @@ -16,6 +16,9 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] two_e_dm_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb,istate + !$OMP PARALLEL DO PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_core_inact_act_orb, list_core_inact_act, & + !$OMP two_e_dm_mo, state_av_full_occ_2_rdm_spin_trace_mo) do l=1,n_core_inact_act_orb lorb = list_core_inact_act(l) do k=1,n_core_inact_act_orb @@ -29,7 +32,7 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] enddo enddo enddo - two_e_dm_mo(:,:,:,:) = two_e_dm_mo(:,:,:,:) + !$OMP END PARALLEL DO END_PROVIDER From c0b221f64706ef9a52d71a5b1655a16e04a69ff1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 22 Aug 2023 11:27:36 +0200 Subject: [PATCH 66/74] Change defaults in mo_optimization --- src/mo_optimization/EZFIO.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mo_optimization/EZFIO.cfg b/src/mo_optimization/EZFIO.cfg index e6aa2d67..078da3a2 100644 --- a/src/mo_optimization/EZFIO.cfg +++ b/src/mo_optimization/EZFIO.cfg @@ -2,7 +2,7 @@ type: character*(32) doc: Define the kind of hessian for the orbital optimization full : full hessian, diag : diagonal hessian, none : no hessian interface: ezfio,provider,ocaml -default: full +default: diag [n_det_max_opt] type: integer @@ -14,7 +14,7 @@ default: 200000 type: integer doc: Maximal number of iterations for the orbital optimization interface: ezfio,provider,ocaml -default: 20 +default: 10 [thresh_opt_max_elem_grad] type: double precision From 50f2fb0bfad74a0c6a1cfc2c71354e1690b74e6c Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 22 Aug 2023 13:48:01 +0200 Subject: [PATCH 67/74] all gradients are OK ! --- src/casscf_tc_bi/grad_dm.irp.f | 111 ++++++++++++++++++++++++++++---- src/casscf_tc_bi/grad_old.irp.f | 2 + 2 files changed, 102 insertions(+), 11 deletions(-) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index f62acbdd..be19e6f0 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -50,11 +50,13 @@ end do end do +! print*,'DM grad' do t=1,n_act_orb tt=list_act(t) do a=1,n_virt_orb aa=list_virt(a) indx = mat_idx_a_v(t,a) +! print*,indx,t,a call gradvec_tc_ta(tt,aa,res_l, res_r) do fff = 0,3 gradvec_tc_l(fff,indx)=res_l(fff) @@ -83,7 +85,7 @@ subroutine gradvec_tc_ia(i,a,res_l, res_r) do jj = 1, n_core_inact_orb j = list_core_inact(jj) res_r(2) += -2.d0 * ( 2.d0 * mo_bi_ortho_tc_two_e(j,i,j,a) - mo_bi_ortho_tc_two_e(i,j,j,a)) - res_l(2) += -2.d0 * ( 2.d0 * mo_bi_ortho_tc_two_e(j,a,j,i) - mo_bi_ortho_tc_two_e(j,a,i,j)) + res_l(2) -= -2.d0 * ( 2.d0 * mo_bi_ortho_tc_two_e(j,a,j,i) - mo_bi_ortho_tc_two_e(j,a,i,j)) enddo do tt = 1, n_act_orb t = list_act(tt) @@ -93,12 +95,14 @@ subroutine gradvec_tc_ia(i,a,res_l, res_r) tc_transition_matrix_mo(r,t,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,i,t,a) - mo_bi_ortho_tc_two_e(i,r,t,a)) & +tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,i,r,a) - mo_bi_ortho_tc_two_e(i,t,r,a)) & ) - res_l(2) += -0.5d0 * ( & - tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,a,r,i) - mo_bi_ortho_tc_two_e(t,a,i,r)) & - +tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,a,t,i) - mo_bi_ortho_tc_two_e(r,a,i,t)) & + res_l(2) -= -0.5d0 * ( & + tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,a,r,i) - mo_bi_ortho_tc_two_e(t,a,i,r)) & + +tc_transition_matrix_mo(r,t,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,a,t,i) - mo_bi_ortho_tc_two_e(r,a,i,t)) & ) enddo enddo + res_r(0) = res_r(1) + res_r(2) + res_r(3) + res_l(0) = res_l(1) + res_l(2) + res_l(3) end subroutine gradvec_tc_it(i,t,res_l, res_r) @@ -116,20 +120,22 @@ subroutine gradvec_tc_it(i,t,res_l, res_r) res_r = 0.d0 res_l = 0.d0 res_r(1) += -2.d0 * mo_bi_ortho_tc_one_e(i,t) - res_l(1) += 2.D0 * mo_bi_ortho_tc_one_e(t,i) + res_l(1) -= -2.D0 * mo_bi_ortho_tc_one_e(t,i) do rr = 1, n_act_orb r = list_act(rr) res_r(1) += mo_bi_ortho_tc_one_e(i,r) * tc_transition_matrix_mo(t,r,1,1) - res_l(1) += -mo_bi_ortho_tc_one_e(r,i) * tc_transition_matrix_mo(r,t,1,1) + res_l(1) -= mo_bi_ortho_tc_one_e(r,i) * tc_transition_matrix_mo(r,t,1,1) enddo do jj = 1, n_core_inact_orb j = list_core_inact(jj) - res_r(2) += 2.d0 * (2d0 * mo_bi_ortho_tc_two_e(i,j,t,j) - mo_bi_ortho_tc_two_e(j,i,t,j)) + res_r(2) += -2.d0 * (2d0 * mo_bi_ortho_tc_two_e(i,j,t,j) - mo_bi_ortho_tc_two_e(j,i,t,j)) + res_l(2) -= -2.d0 * (2d0 * mo_bi_ortho_tc_two_e(t,j,i,j) - mo_bi_ortho_tc_two_e(t,j,j,i)) do rr = 1, n_act_orb r = list_act(rr) res_r(2) += tc_transition_matrix_mo(t,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(i,j,r,j) - mo_bi_ortho_tc_two_e(i,j,j,r)) + res_l(2) -= tc_transition_matrix_mo(r,t,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(r,j,i,j) - mo_bi_ortho_tc_two_e(j,r,j,i)) enddo enddo do rr = 1, n_act_orb @@ -140,14 +146,21 @@ subroutine gradvec_tc_it(i,t,res_l, res_r) tc_transition_matrix_mo(u,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(u,i,r,t) - mo_bi_ortho_tc_two_e(u,i,t,r)) & + tc_transition_matrix_mo(r,u,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(i,r,t,u) - mo_bi_ortho_tc_two_e(i,r,u,t)) & ) + res_l(2) -= -0.5d0 * ( & + tc_transition_matrix_mo(r,u,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(r,t,u,i) - mo_bi_ortho_tc_two_e(t,r,u,i)) & + + tc_transition_matrix_mo(u,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(t,u,i,r) - mo_bi_ortho_tc_two_e(u,t,i,r)) & + ) do vv = 1, n_act_orb v = list_act(vv) res_r(2) += 0.5d0 * ( & mo_bi_ortho_tc_two_e(i,r,v,u) * tc_two_rdm(t,r,v,u) + mo_bi_ortho_tc_two_e(r,i,v,u) * tc_two_rdm(r,t,v,u) ) + res_l(2) -= 0.5d0 * ( & + mo_bi_ortho_tc_two_e(v,u,i,r) * tc_two_rdm(v,u,t,r) + mo_bi_ortho_tc_two_e(v,u,r,i) * tc_two_rdm(v,u,r,t) ) enddo enddo enddo - + res_r(0) = res_r(1) + res_r(2) + res_r(3) + res_l(0) = res_l(1) + res_l(2) + res_l(3) end subroutine gradvec_tc_ta(t,a,res_l, res_r) @@ -161,14 +174,90 @@ subroutine gradvec_tc_ta(t,a,res_l, res_r) END_DOC integer, intent(in) :: t,a double precision, intent(out) :: res_l(0:3),res_r(0:3) - integer :: rr,r,m - double precision :: dm + integer :: rr,r,j,jj,u,uu,v,vv + double precision :: res_r_inact_test, res_r_act_test + double precision :: res_l_inact_test, res_l_act_test res_r = 0.d0 res_l = 0.d0 do rr = 1, n_act_orb r = list_act(rr) res_l(1) += mo_bi_ortho_tc_one_e(a,r) * tc_transition_matrix_mo(t,r,1,1) - res_r(1) += -mo_bi_ortho_tc_one_e(r,a) * tc_transition_matrix_mo(r,t,1,1) + res_r(1) -= mo_bi_ortho_tc_one_e(r,a) * tc_transition_matrix_mo(r,t,1,1) enddo + + res_r_inact_test = 0.d0 + res_l_inact_test = 0.d0 + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + do rr = 1, n_act_orb + r = list_act(rr) + res_r_inact_test += -tc_transition_matrix_mo(r,t,1,1) * & + (2.d0 * mo_bi_ortho_tc_two_e(r,j,a,j) - mo_bi_ortho_tc_two_e(r,j,j,a)) + res_l_inact_test += -tc_transition_matrix_mo(t,r,1,1) * & + (2.d0 * mo_bi_ortho_tc_two_e(a,j,r,j) - mo_bi_ortho_tc_two_e(j,a,r,j)) + enddo + enddo + res_r_act_test = 0.d0 + res_l_act_test = 0.d0 + do rr = 1, n_act_orb + r = list_act(rr) + do vv = 1, n_act_orb + v = list_act(vv) + do uu = 1, n_act_orb + u = list_act(uu) + res_r_act_test += - (mo_bi_ortho_tc_two_e(v,r,u,a) * tc_two_rdm(r,v,t,u) & + +mo_bi_ortho_tc_two_e(v,r,a,u) * tc_two_rdm(r,v,u,t)) + res_l_act_test += - (mo_bi_ortho_tc_two_e(u,a,v,r) * tc_two_rdm(t,u,r,v) & + +mo_bi_ortho_tc_two_e(a,u,v,r) * tc_two_rdm(u,t,r,v)) + enddo + enddo + enddo + res_r_act_test *= 0.5d0 + res_l_act_test *= 0.5d0 + res_r(2) = res_r_inact_test + res_r_act_test + res_l(2) = res_l_inact_test + res_l_act_test + + integer :: m,x,y + double precision :: res_r_inact, res_r_act + if(.False.)then + ! test quantities + res_r_inact = 0.d0 + res_r_act = 0.d0 + do m = 1, mo_num + do x = 1, mo_num + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + res_r_inact += 0.5d0 * mo_bi_ortho_tc_two_e(t,j,m,x) * tc_two_rdm(a,j,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(m,j,a,x) * tc_two_rdm(m,j,t,x) & + +0.5d0 * mo_bi_ortho_tc_two_e(j,t,m,x) * tc_two_rdm(j,a,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(x,j,m,a) * tc_two_rdm(x,j,m,t) + enddo + do rr = 1, n_act_orb + r = list_act(rr) + res_r_act += 0.5d0 * mo_bi_ortho_tc_two_e(t,r,m,x) * tc_two_rdm(a,r,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(m,r,a,x) * tc_two_rdm(m,r,t,x) & + +0.5d0 * mo_bi_ortho_tc_two_e(r,t,m,x) * tc_two_rdm(r,a,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(x,r,m,a) * tc_two_rdm(x,r,m,t) + enddo + enddo + enddo + if(dabs(res_r_inact).gt.1.d-12)then + if(dabs(res_r_inact_test - res_r_inact).gt.1.d-10)then + print*,'inact' + print*,'t,a',t,a + print*,res_r_inact_test , res_r_inact, dabs(res_r_inact_test - res_r_inact) + endif + endif + if(dabs(res_r_act).gt.1.d-12)then + if(dabs(res_r_act_test - res_r_act).gt.1.d-10)then + print*,'act' + print*,'t,a',t,a + print*,res_r_act_test , res_r_act, dabs(res_r_act_test - res_r_act) + endif + endif + endif + + res_r(0) = res_r(1) + res_r(2) + res_r(3) + res_l(0) = res_l(1) + res_l(2) + res_l(3) end diff --git a/src/casscf_tc_bi/grad_old.irp.f b/src/casscf_tc_bi/grad_old.irp.f index e8440513..3f0ffb5e 100644 --- a/src/casscf_tc_bi/grad_old.irp.f +++ b/src/casscf_tc_bi/grad_old.irp.f @@ -39,11 +39,13 @@ enddo enddo +! print*,'old grad' do tt = 1, n_act_orb ihole = list_act(tt) do aa = 1, n_virt_orb ipart = list_virt(aa) indx = mat_idx_a_v(tt,aa) +! print*,indx,tt,aa call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) do ll = 0, 3 gradvec_detail_left_old (ll,indx)=res_l(ll) From ff9a57c978d772723470d4ebd131707310154a52 Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 1 Sep 2023 11:35:28 +0200 Subject: [PATCH 68/74] added some stuffs for TC-CASSCF --- external/ezfio | 2 +- external/irpf90 | 2 +- src/casscf_tc_bi/det_manip.irp.f | 125 +++++++++++++ src/casscf_tc_bi/grad_dm.irp.f | 4 +- src/casscf_tc_bi/test_tc_casscf.irp.f | 252 ++++++++++++++++++++++++++ src/cisd/lccsd_prov.irp.f | 8 +- 6 files changed, 387 insertions(+), 6 deletions(-) create mode 100644 src/casscf_tc_bi/det_manip.irp.f create mode 100644 src/casscf_tc_bi/test_tc_casscf.irp.f diff --git a/external/ezfio b/external/ezfio index d5805497..ed1df9f3 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 +Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c diff --git a/external/irpf90 b/external/irpf90 index 0007f72f..33ca5e10 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 +Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 diff --git a/src/casscf_tc_bi/det_manip.irp.f b/src/casscf_tc_bi/det_manip.irp.f new file mode 100644 index 00000000..d8c309a4 --- /dev/null +++ b/src/casscf_tc_bi/det_manip.irp.f @@ -0,0 +1,125 @@ +use bitmasks + +subroutine do_signed_mono_excitation(key1,key2,nu,ihole,ipart, & + ispin,phase,ierr) + BEGIN_DOC + ! we create the mono-excitation, and determine, if possible, + ! the phase and the number in the list of determinants + END_DOC + implicit none + integer(bit_kind) :: key1(N_int,2),key2(N_int,2) + integer(bit_kind), allocatable :: keytmp(:,:) + integer :: exc(0:2,2,2),ihole,ipart,ierr,nu,ispin + real*8 :: phase + logical :: found + allocate(keytmp(N_int,2)) + + nu=-1 + phase=1.D0 + ierr=0 + call det_copy(key1,key2,N_int) + ! write(6,*) ' key2 before excitation ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + call do_single_excitation(key2,ihole,ipart,ispin,ierr) + ! write(6,*) ' key2 after ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + ! write(6,*) ' excitation ',ihole,' -> ',ipart,' gives ierr = ',ierr + if (ierr.eq.1) then + ! excitation is possible + ! get the phase + call get_single_excitation(key1,key2,exc,phase,N_int) + ! get the number in the list + found=.false. + nu=0 + + !TODO BOTTLENECK + do while (.not.found) + nu+=1 + if (nu.gt.N_det) then + ! the determinant is possible, but not in the list + found=.true. + nu=-1 + else + call det_extract(keytmp,nu,N_int) + integer :: i,ii + found=.true. + do ii=1,2 + do i=1,N_int + if (keytmp(i,ii).ne.key2(i,ii)) then + found=.false. + end if + end do + end do + end if + end do + end if + ! + ! we found the new string, the phase, and possibly the number in the list + ! +end subroutine do_signed_mono_excitation + +subroutine det_extract(key,nu,Nint) + BEGIN_DOC + ! extract a determinant from the list of determinants + END_DOC + implicit none + integer :: ispin,i,nu,Nint + integer(bit_kind) :: key(Nint,2) + do ispin=1,2 + do i=1,Nint + key(i,ispin)=psi_det(i,ispin,nu) + end do + end do +end subroutine det_extract + +subroutine det_copy(key1,key2,Nint) + use bitmasks ! you need to include the bitmasks_module.f90 features + BEGIN_DOC + ! copy a determinant from key1 to key2 + END_DOC + implicit none + integer :: ispin,i,Nint + integer(bit_kind) :: key1(Nint,2),key2(Nint,2) + do ispin=1,2 + do i=1,Nint + key2(i,ispin)=key1(i,ispin) + end do + end do +end subroutine det_copy + +subroutine do_spinfree_mono_excitation(key_in,key_out1,key_out2 & + ,nu1,nu2,ihole,ipart,phase1,phase2,ierr,jerr) + BEGIN_DOC + ! we create the spin-free mono-excitation E_pq=(a^+_p a_q + a^+_P a_Q) + ! we may create two determinants as result + ! + END_DOC + implicit none + integer(bit_kind) :: key_in(N_int,2),key_out1(N_int,2) + integer(bit_kind) :: key_out2(N_int,2) + integer :: ihole,ipart,ierr,jerr,nu1,nu2 + integer :: ispin + real*8 :: phase1,phase2 + + ! write(6,*) ' applying E_',ipart,ihole,' on determinant ' + ! call print_det(key_in,N_int) + + ! spin alpha + ispin=1 + call do_signed_mono_excitation(key_in,key_out1,nu1,ihole & + ,ipart,ispin,phase1,ierr) + ! if (ierr.eq.1) then + ! write(6,*) ' 1 result is ',nu1,phase1 + ! call print_det(key_out1,N_int) + ! end if + ! spin beta + ispin=2 + call do_signed_mono_excitation(key_in,key_out2,nu2,ihole & + ,ipart,ispin,phase2,jerr) + ! if (jerr.eq.1) then + ! write(6,*) ' 2 result is ',nu2,phase2 + ! call print_det(key_out2,N_int) + ! end if + +end subroutine do_spinfree_mono_excitation + diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index be19e6f0..047b5718 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -193,7 +193,7 @@ subroutine gradvec_tc_ta(t,a,res_l, res_r) r = list_act(rr) res_r_inact_test += -tc_transition_matrix_mo(r,t,1,1) * & (2.d0 * mo_bi_ortho_tc_two_e(r,j,a,j) - mo_bi_ortho_tc_two_e(r,j,j,a)) - res_l_inact_test += -tc_transition_matrix_mo(t,r,1,1) * & + res_l_inact_test -= -tc_transition_matrix_mo(t,r,1,1) * & (2.d0 * mo_bi_ortho_tc_two_e(a,j,r,j) - mo_bi_ortho_tc_two_e(j,a,r,j)) enddo enddo @@ -207,7 +207,7 @@ subroutine gradvec_tc_ta(t,a,res_l, res_r) u = list_act(uu) res_r_act_test += - (mo_bi_ortho_tc_two_e(v,r,u,a) * tc_two_rdm(r,v,t,u) & +mo_bi_ortho_tc_two_e(v,r,a,u) * tc_two_rdm(r,v,u,t)) - res_l_act_test += - (mo_bi_ortho_tc_two_e(u,a,v,r) * tc_two_rdm(t,u,r,v) & + res_l_act_test -= - (mo_bi_ortho_tc_two_e(u,a,v,r) * tc_two_rdm(t,u,r,v) & +mo_bi_ortho_tc_two_e(a,u,v,r) * tc_two_rdm(u,t,r,v)) enddo enddo diff --git a/src/casscf_tc_bi/test_tc_casscf.irp.f b/src/casscf_tc_bi/test_tc_casscf.irp.f new file mode 100644 index 00000000..baa50c0f --- /dev/null +++ b/src/casscf_tc_bi/test_tc_casscf.irp.f @@ -0,0 +1,252 @@ +program tc_bi_ortho + + BEGIN_DOC + ! + ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together + ! with the energy. Saves the left-right wave functions at the end. + ! + END_DOC + + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + read_wf = .True. + touch read_wf + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + print*, ' nb of states = ', N_states + print*, ' nb of det = ', N_det + +! call routine_i_h_psi +! call routine_grad + call routine_grad_num_dm_one_body +end + +subroutine routine_i_h_psi + implicit none + integer :: i,j + double precision :: i_H_chi_array(0:3,N_states),i_H_phi_array(0:3,N_states) + double precision :: hmono, htwoe, hthree, htot + double precision :: accu_l_hmono, accu_l_htwoe, accu_l_hthree, accu_l_htot + double precision :: accu_r_hmono, accu_r_htwoe, accu_r_hthree, accu_r_htot + double precision :: test_l_hmono, test_l_htwoe, test_l_hthree, test_l_htot + double precision :: test_r_hmono, test_r_htwoe, test_r_hthree, test_r_htot + + test_l_hmono = 0.d0 + test_l_htwoe = 0.d0 + test_l_hthree= 0.d0 + test_l_htot = 0.d0 + test_r_hmono = 0.d0 + test_r_htwoe = 0.d0 + test_r_hthree= 0.d0 + test_r_htot = 0.d0 + + do i = 1, N_det + call i_H_tc_psi_phi(psi_det(1,1,i),psi_det,psi_l_coef_bi_ortho,psi_r_coef_bi_ortho,& + N_int,N_det,N_det,N_states,i_H_chi_array,i_H_phi_array) + accu_l_hmono = 0.d0 + accu_l_htwoe = 0.d0 + accu_l_hthree= 0.d0 + accu_l_htot = 0.d0 + accu_r_hmono = 0.d0 + accu_r_htwoe = 0.d0 + accu_r_hthree= 0.d0 + accu_r_htot = 0.d0 + do j = 1, N_det + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + accu_l_hmono += psi_l_coef_bi_ortho(j,1) * hmono + accu_l_htwoe += psi_l_coef_bi_ortho(j,1) * htwoe + accu_l_hthree += psi_l_coef_bi_ortho(j,1) * hthree + accu_l_htot += psi_l_coef_bi_ortho(j,1) * htot + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + accu_r_hmono += psi_r_coef_bi_ortho(j,1) * hmono + accu_r_htwoe += psi_r_coef_bi_ortho(j,1) * htwoe + accu_r_hthree += psi_r_coef_bi_ortho(j,1) * hthree + accu_r_htot += psi_r_coef_bi_ortho(j,1) * htot + enddo + test_l_htot += dabs(i_H_chi_array(0,1)-accu_l_htot) + test_l_hmono += dabs(i_H_chi_array(1,1)-accu_l_hmono) + test_l_htwoe += dabs(i_H_chi_array(2,1)-accu_l_htwoe) + test_l_hthree += dabs(i_H_chi_array(3,1)-accu_l_hthree) + + test_r_htot += dabs(i_H_phi_array(0,1)-accu_r_htot) + test_r_hmono += dabs(i_H_phi_array(1,1)-accu_r_hmono) + test_r_htwoe += dabs(i_H_phi_array(2,1)-accu_r_htwoe) + test_r_hthree += dabs(i_H_phi_array(3,1)-accu_r_hthree) + + enddo + + test_l_htot *= 1.D0/dble(N_det) + test_l_hmono *= 1.D0/dble(N_det) + test_l_htwoe *= 1.D0/dble(N_det) + test_l_hthree *= 1.D0/dble(N_det) + + test_r_htot *= 1.D0/dble(N_det) + test_r_hmono *= 1.D0/dble(N_det) + test_r_htwoe *= 1.D0/dble(N_det) + test_r_hthree *= 1.D0/dble(N_det) + + print*,'**************************' + print*,'test_l_htot = ',test_l_htot + print*,'test_l_hmono = ',test_l_hmono + print*,'test_l_htwoe = ',test_l_htwoe + print*,'test_l_hthree = ',test_l_hthree + print*,'**************************' + print*,'test_r_htot = ',test_r_htot + print*,'test_r_hmono = ',test_r_hmono + print*,'test_r_htwoe = ',test_r_htwoe + print*,'test_r_hthree = ',test_r_hthree + +end + +subroutine routine_grad_num + implicit none + integer :: indx,ihole,ipart + integer :: p,q + double precision :: accu_l, accu_r + double precision :: contrib_l, contrib_r + + accu_l = 0.d0 + accu_r = 0.d0 + do indx=1,nMonoEx + q = excit(1,indx) + p = excit(2,indx) + contrib_l = dabs(dabs(gradvec_detail_left_old(0,indx)) - 2.D0 * dabs( Fock_matrix_tc_mo_tot(q,p))) + contrib_r = dabs(dabs(gradvec_detail_right_old(0,indx)) -2.D0 * dabs( Fock_matrix_tc_mo_tot(p,q))) + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,indx,q,p + print*,gradvec_detail_left_old(0,indx),gradvec_detail_right_old(0,indx) + print*,2.D0* Fock_matrix_tc_mo_tot(q,p), 2.d0* Fock_matrix_tc_mo_tot(p,q) + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + +! do i = 1, nMonoEx +! print*,i,gradvec_old(i) +! enddo + +end + +subroutine routine_grad_num_dm_one_body + implicit none + integer :: indx,ii,i,a,aa,tt,t,ibody + double precision :: accu_l, accu_r,ref_r, new_r, ref_l, new_l + double precision :: contrib_l, contrib_r + double precision :: res_l(0:3),res_r(0:3) + + ibody = 2 ! check only the two-body term + provide gradvec_detail_left_old gradvec_tc_l + if(.True.)then + print*,'**************************' + print*,'**************************' + print*,'testing inactive-->virtual' + accu_l = 0.d0 + accu_r = 0.d0 + do ii = 1, n_core_inact_orb + do aa = 1, n_virt_orb + indx = mat_idx_c_v(ii,aa) + ref_l = gradvec_detail_left_old(ibody,indx) + new_l = gradvec_tc_l(ibody,indx) + contrib_l = dabs(dabs(ref_l) - dabs(new_l)) + ref_r = gradvec_detail_right_old(ibody,indx) + new_r = gradvec_tc_r(ibody,indx) + contrib_r = dabs(dabs(ref_r) - dabs(new_r)) + i = list_core_inact(ii) + a = list_virt(aa) +! if(i==1.and.a==9)then +! print*,i,a,ref_r, new_r +! stop +! endif + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,'---------' + print*,'warning !' + print*,indx,i,a,ii,aa + print*,ref_l, new_l, contrib_l + print*,ref_r, new_r, contrib_r + print*,gradvec_detail_left_old(0,indx),gradvec_tc_l(0,indx) + print*,gradvec_detail_right_old(0,indx),gradvec_tc_r(0,indx) + print*,'---------' + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + print*,'**************************' + print*,'**************************' + endif + + ibody = 2 ! check only the two-body term + if(.True.)then + print*,'**************************' + print*,'**************************' + print*,'testing inactive-->active' + accu_l = 0.d0 + accu_r = 0.d0 + do ii = 1, n_core_inact_orb + do tt = 1, n_act_orb + indx = mat_idx_c_a(ii,tt) + ref_l = gradvec_detail_left_old(ibody,indx) + new_l = gradvec_tc_l(ibody,indx) + contrib_l = dabs(dabs(ref_l) - dabs(new_l)) + ref_r = gradvec_detail_right_old(ibody,indx) + new_r = gradvec_tc_r(ibody,indx) + contrib_r = dabs(dabs(ref_r) - dabs(new_r)) + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,'---------' + print*,'warning !' + i = list_core_inact(ii) + t = list_act(tt) + print*,indx,i,t + print*,ref_l, new_l, contrib_l + print*,ref_r, new_r, contrib_r + print*,'---------' + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + endif + + if(.True.)then + print*,'**************************' + print*,'**************************' + print*,'testing active-->virtual ' + accu_l = 0.d0 + accu_r = 0.d0 + do tt = 1, n_act_orb + do aa = 1, n_virt_orb + indx = mat_idx_a_v(tt,aa) + ref_l = gradvec_detail_left_old(ibody,indx) + new_l = gradvec_tc_l(ibody,indx) + contrib_l = dabs(dabs(ref_l) - dabs(new_l)) + ref_r = gradvec_detail_right_old(ibody,indx) + new_r = gradvec_tc_r(ibody,indx) + contrib_r = dabs(dabs(ref_r) - dabs(new_r)) + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,'---------' + print*,'warning !' + a = list_virt(aa) + t = list_act(tt) + print*,indx,t,a + print*,ref_l, new_l, contrib_l + print*,ref_r, new_r, contrib_r +! print*,gradvec_detail_right_old(0,indx),gradvec_tc_r(0,indx) + print*,'---------' + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + endif + + +end diff --git a/src/cisd/lccsd_prov.irp.f b/src/cisd/lccsd_prov.irp.f index 38149ac9..8338cf81 100644 --- a/src/cisd/lccsd_prov.irp.f +++ b/src/cisd/lccsd_prov.irp.f @@ -3,7 +3,7 @@ implicit none double precision, allocatable :: Dress_jj(:), H_jj(:), u_in(:,:) double precision :: ebefore, eafter, ecorr, thresh - integer :: i,it + integer :: i,it,degree logical :: converged external H_u_0_nstates_openmp allocate(Dress_jj(N_det),H_jj(N_det),u_in(N_det,N_states_diag)) @@ -31,7 +31,11 @@ print*,'ecorr = ',ecorr Dress_jj(1) = 0.d0 do i = 2, N_det - if(ecorr + H_jj(i) .gt. H_jj(1))then + if(ecorr + H_jj(i) .lt. H_jj(1))then + print*,'Warning, some dets are not dressed: ' + call get_excitation_degree(ref_bitmask,psi_det(1,1,i),degree,N_int) + print*,'degree, Delta E, coef', degree, H_jj(i)-H_jj(1), u_in(i,1)/u_in(1,1) + else Dress_jj(i) = ecorr endif enddo From 3f95bf40edda376243fa5e423f3e81ef8029bbd9 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 13 Sep 2023 12:58:26 +0200 Subject: [PATCH 69/74] tc-two-rdm broken ... --- src/tc_bi_ortho/tc_natorb.irp.f | 22 +++++++------- src/tc_bi_ortho/test_tc_two_rdm.irp.f | 32 ++++++++++----------- src/tc_bi_ortho/two_rdm_naive.irp.f | 41 +++++++++++++++++++++------ 3 files changed, 57 insertions(+), 38 deletions(-) diff --git a/src/tc_bi_ortho/tc_natorb.irp.f b/src/tc_bi_ortho/tc_natorb.irp.f index 1b5a66f3..a72d356a 100644 --- a/src/tc_bi_ortho/tc_natorb.irp.f +++ b/src/tc_bi_ortho/tc_natorb.irp.f @@ -32,19 +32,17 @@ thr_d = 1.d-6 thr_nd = 1.d-6 thr_deg = 1.d-3 - if(n_core_orb.ne.0)then -! print*,'core orbitals' -! pause - call diag_mat_per_fock_degen_core( fock_diag, dm_tmp, list_core, n_core_orb, mo_num, thr_d, thr_nd, thr_deg & - , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) - else - call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & - , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) - endif -! call non_hrmt_bieig( mo_num, dm_tmp& -! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo& -! , mo_num, natorb_tc_eigval ) +! if(n_core_orb.ne.0)then +! call diag_mat_per_fock_degen_core( fock_diag, dm_tmp, list_core, n_core_orb, mo_num, thr_d, thr_nd, thr_deg & +! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) +! else +! call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & +! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) +! endif + call non_hrmt_bieig(mo_num, dm_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag & + , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo & + , mo_num, natorb_tc_eigval ) accu = 0.d0 do i = 1, mo_num print*,'natorb_tc_eigval(i) = ',-natorb_tc_eigval(i) diff --git a/src/tc_bi_ortho/test_tc_two_rdm.irp.f b/src/tc_bi_ortho/test_tc_two_rdm.irp.f index 3e556312..044c31e0 100644 --- a/src/tc_bi_ortho/test_tc_two_rdm.irp.f +++ b/src/tc_bi_ortho/test_tc_two_rdm.irp.f @@ -25,49 +25,47 @@ end subroutine test implicit none - integer :: h1,p1,h2,p2,i,j,istate - double precision :: rdm, integral, accu,ref + integer :: h1,p1,h2,p2,i,j,istate,s1,s2 + double precision :: rdm, integral, accu,ref, accu_new ,rdm_new double precision :: hmono, htwoe, hthree, htot accu = 0.d0 + accu_new = 0.d0 do h1 = 1, mo_num do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num integral = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) rdm = tc_two_rdm(p1,h1,p2,h2) + accu += integral * rdm + rdm_new = 0.d0 + do s2 = 1, 2 + do s1 = 1, 2 + rdm_new += tc_two_rdm_chemist_s1s2(p1,h1,p2,h2,s1,s2) + enddo + enddo + accu_new += integral * rdm_new ! if(dabs(rdm).gt.1.d-10)then ! print*,h1,p1,h2,p2 ! print*,rdm,integral,rdm*integral ! endif - accu += integral * rdm enddo enddo enddo enddo accu *= 0.5d0 - print*,'accu = ',accu -! print*,mo_bi_ortho_tc_two_e(2,15,2,1) -! print*,mo_bi_ortho_tc_two_e(15,2,2,1) -! print*,mo_bi_ortho_tc_two_e(2,1,2,15) -! print*,mo_bi_ortho_tc_two_e(2,1,15,2) + accu_new *= 0.5d0 + print*,'accu = ',accu + print*,'accu_new = ',accu_new ref = 0.d0 do i = 1, N_det do j = 1, N_det -! if(i.eq.j)cycle call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) do istate = 1,N_states -! print*,'i,j',i,j -! print*,psi_l_coef_bi_ortho(i,istate) , psi_r_coef_bi_ortho(j,istate) , htwoe -! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * htwoe -! if(i.ne.j)then -! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) , htwoe -! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * htwoe -! endif ref += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) * htwoe enddo enddo enddo - print*,' ref = ',ref + print*,' ref = ',ref print*,'delta= ',ref-accu end diff --git a/src/tc_bi_ortho/two_rdm_naive.irp.f b/src/tc_bi_ortho/two_rdm_naive.irp.f index 3963d09e..d21d6a87 100644 --- a/src/tc_bi_ortho/two_rdm_naive.irp.f +++ b/src/tc_bi_ortho/two_rdm_naive.irp.f @@ -1,4 +1,5 @@ -BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist_s1s2, (mo_num, mo_num, mo_num, mo_num, 2,2)] implicit none BEGIN_DOC ! tc_two_rdm_chemist(p,s,q,r) = = CHEMIST NOTATION @@ -14,6 +15,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, other_spin(2) = 1 allocate(occ(N_int*bit_kind_size,2)) tc_two_rdm_chemist = 0.d0 + tc_two_rdm_chemist_s1s2 = 0.d0 do i = 1, N_det ! psi_left do j = 1, N_det ! psi_right @@ -21,14 +23,16 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, if(degree.gt.2)cycle if(degree.gt.0)then ! get excitation operators: from psi_det(j) --> psi_det(i) - call get_excitation(psi_det(1,1,j),psi_det(1,1,i),exc,degree,phase,N_int) - call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) - contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * phase * state_average_weight(1) - do istate = 2, N_states - contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * phase * state_average_weight(istate) - enddo + ! T_{j-->i} = a^p1_s1 a_h1_s1 + call get_excitation(psi_det(1,1,j),psi_det(1,1,i),exc,degree,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * phase * state_average_weight(1) + do istate = 2, N_states + contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * phase * state_average_weight(istate) + enddo if(degree == 2)then call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) else if(degree==1)then ! occupation of the determinant psi_det(j) call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) @@ -40,6 +44,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo ! run over the electrons of same spin than the excitation s2 = s1 @@ -48,6 +53,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo endif else if(degree == 0)then @@ -69,6 +75,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo ! run over the couple of alpha-alpha electrons s2 = s1 @@ -78,6 +85,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, p2 = m if(h2.le.h1)cycle call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo enddo s1 = 2 @@ -92,6 +100,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, p2 = m if(h2.le.h1)cycle call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo enddo endif @@ -124,12 +133,13 @@ subroutine update_tc_rdm(h1,p1,h2,p2,s1,s2,array,sze,contrib) end -BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, tc_two_rdm_s1s2, (mo_num, mo_num, mo_num, mo_num,2,2)] implicit none BEGIN_DOC ! tc_two_rdm(p,q,s,r) = = PHYSICIST NOTATION END_DOC - integer :: p,q,r,s + integer :: p,q,r,s,s1,s2 do r = 1, mo_num do q = 1, mo_num do s = 1, mo_num @@ -139,5 +149,18 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] enddo enddo enddo + do s2 = 1, 2 + do s1 = 1, 2 + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + do p = 1, mo_num + tc_two_rdm_s1s2(p,q,s,r,s1,s2) = tc_two_rdm_chemist_s1s2(p,s,q,r,s1,s2) + enddo + enddo + enddo + enddo + enddo + enddo END_PROVIDER From 8b14a2b7ab3a6138ce8410671f8edd18640979b7 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 13 Sep 2023 12:58:26 +0200 Subject: [PATCH 70/74] Added spin dependent two-rdm. --- src/tc_bi_ortho/tc_natorb.irp.f | 22 +++++++------- src/tc_bi_ortho/test_tc_two_rdm.irp.f | 32 ++++++++++----------- src/tc_bi_ortho/two_rdm_naive.irp.f | 41 +++++++++++++++++++++------ 3 files changed, 57 insertions(+), 38 deletions(-) diff --git a/src/tc_bi_ortho/tc_natorb.irp.f b/src/tc_bi_ortho/tc_natorb.irp.f index 1b5a66f3..a72d356a 100644 --- a/src/tc_bi_ortho/tc_natorb.irp.f +++ b/src/tc_bi_ortho/tc_natorb.irp.f @@ -32,19 +32,17 @@ thr_d = 1.d-6 thr_nd = 1.d-6 thr_deg = 1.d-3 - if(n_core_orb.ne.0)then -! print*,'core orbitals' -! pause - call diag_mat_per_fock_degen_core( fock_diag, dm_tmp, list_core, n_core_orb, mo_num, thr_d, thr_nd, thr_deg & - , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) - else - call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & - , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) - endif -! call non_hrmt_bieig( mo_num, dm_tmp& -! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo& -! , mo_num, natorb_tc_eigval ) +! if(n_core_orb.ne.0)then +! call diag_mat_per_fock_degen_core( fock_diag, dm_tmp, list_core, n_core_orb, mo_num, thr_d, thr_nd, thr_deg & +! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) +! else +! call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & +! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) +! endif + call non_hrmt_bieig(mo_num, dm_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag & + , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo & + , mo_num, natorb_tc_eigval ) accu = 0.d0 do i = 1, mo_num print*,'natorb_tc_eigval(i) = ',-natorb_tc_eigval(i) diff --git a/src/tc_bi_ortho/test_tc_two_rdm.irp.f b/src/tc_bi_ortho/test_tc_two_rdm.irp.f index 3e556312..044c31e0 100644 --- a/src/tc_bi_ortho/test_tc_two_rdm.irp.f +++ b/src/tc_bi_ortho/test_tc_two_rdm.irp.f @@ -25,49 +25,47 @@ end subroutine test implicit none - integer :: h1,p1,h2,p2,i,j,istate - double precision :: rdm, integral, accu,ref + integer :: h1,p1,h2,p2,i,j,istate,s1,s2 + double precision :: rdm, integral, accu,ref, accu_new ,rdm_new double precision :: hmono, htwoe, hthree, htot accu = 0.d0 + accu_new = 0.d0 do h1 = 1, mo_num do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num integral = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) rdm = tc_two_rdm(p1,h1,p2,h2) + accu += integral * rdm + rdm_new = 0.d0 + do s2 = 1, 2 + do s1 = 1, 2 + rdm_new += tc_two_rdm_chemist_s1s2(p1,h1,p2,h2,s1,s2) + enddo + enddo + accu_new += integral * rdm_new ! if(dabs(rdm).gt.1.d-10)then ! print*,h1,p1,h2,p2 ! print*,rdm,integral,rdm*integral ! endif - accu += integral * rdm enddo enddo enddo enddo accu *= 0.5d0 - print*,'accu = ',accu -! print*,mo_bi_ortho_tc_two_e(2,15,2,1) -! print*,mo_bi_ortho_tc_two_e(15,2,2,1) -! print*,mo_bi_ortho_tc_two_e(2,1,2,15) -! print*,mo_bi_ortho_tc_two_e(2,1,15,2) + accu_new *= 0.5d0 + print*,'accu = ',accu + print*,'accu_new = ',accu_new ref = 0.d0 do i = 1, N_det do j = 1, N_det -! if(i.eq.j)cycle call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) do istate = 1,N_states -! print*,'i,j',i,j -! print*,psi_l_coef_bi_ortho(i,istate) , psi_r_coef_bi_ortho(j,istate) , htwoe -! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * htwoe -! if(i.ne.j)then -! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) , htwoe -! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * htwoe -! endif ref += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) * htwoe enddo enddo enddo - print*,' ref = ',ref + print*,' ref = ',ref print*,'delta= ',ref-accu end diff --git a/src/tc_bi_ortho/two_rdm_naive.irp.f b/src/tc_bi_ortho/two_rdm_naive.irp.f index 3963d09e..d21d6a87 100644 --- a/src/tc_bi_ortho/two_rdm_naive.irp.f +++ b/src/tc_bi_ortho/two_rdm_naive.irp.f @@ -1,4 +1,5 @@ -BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist_s1s2, (mo_num, mo_num, mo_num, mo_num, 2,2)] implicit none BEGIN_DOC ! tc_two_rdm_chemist(p,s,q,r) = = CHEMIST NOTATION @@ -14,6 +15,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, other_spin(2) = 1 allocate(occ(N_int*bit_kind_size,2)) tc_two_rdm_chemist = 0.d0 + tc_two_rdm_chemist_s1s2 = 0.d0 do i = 1, N_det ! psi_left do j = 1, N_det ! psi_right @@ -21,14 +23,16 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, if(degree.gt.2)cycle if(degree.gt.0)then ! get excitation operators: from psi_det(j) --> psi_det(i) - call get_excitation(psi_det(1,1,j),psi_det(1,1,i),exc,degree,phase,N_int) - call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) - contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * phase * state_average_weight(1) - do istate = 2, N_states - contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * phase * state_average_weight(istate) - enddo + ! T_{j-->i} = a^p1_s1 a_h1_s1 + call get_excitation(psi_det(1,1,j),psi_det(1,1,i),exc,degree,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * phase * state_average_weight(1) + do istate = 2, N_states + contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * phase * state_average_weight(istate) + enddo if(degree == 2)then call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) else if(degree==1)then ! occupation of the determinant psi_det(j) call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) @@ -40,6 +44,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo ! run over the electrons of same spin than the excitation s2 = s1 @@ -48,6 +53,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo endif else if(degree == 0)then @@ -69,6 +75,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo ! run over the couple of alpha-alpha electrons s2 = s1 @@ -78,6 +85,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, p2 = m if(h2.le.h1)cycle call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo enddo s1 = 2 @@ -92,6 +100,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, p2 = m if(h2.le.h1)cycle call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo enddo endif @@ -124,12 +133,13 @@ subroutine update_tc_rdm(h1,p1,h2,p2,s1,s2,array,sze,contrib) end -BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, tc_two_rdm_s1s2, (mo_num, mo_num, mo_num, mo_num,2,2)] implicit none BEGIN_DOC ! tc_two_rdm(p,q,s,r) = = PHYSICIST NOTATION END_DOC - integer :: p,q,r,s + integer :: p,q,r,s,s1,s2 do r = 1, mo_num do q = 1, mo_num do s = 1, mo_num @@ -139,5 +149,18 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] enddo enddo enddo + do s2 = 1, 2 + do s1 = 1, 2 + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + do p = 1, mo_num + tc_two_rdm_s1s2(p,q,s,r,s1,s2) = tc_two_rdm_chemist_s1s2(p,s,q,r,s1,s2) + enddo + enddo + enddo + enddo + enddo + enddo END_PROVIDER From 6ba3f48acb7e4016ca5adf425d39e646c6e6628c Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 13 Sep 2023 18:28:52 +0200 Subject: [PATCH 71/74] added general Slater rules --- src/tc_bi_ortho/h_mat_triple.irp.f | 330 ++++++++++++++++++++++++++ src/tc_bi_ortho/slater_tc_opt.irp.f | 31 ++- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 14 +- src/tc_bi_ortho/test_tc_two_rdm.irp.f | 8 +- src/tc_bi_ortho/two_rdm_naive.irp.f | 12 +- 5 files changed, 373 insertions(+), 22 deletions(-) create mode 100644 src/tc_bi_ortho/h_mat_triple.irp.f diff --git a/src/tc_bi_ortho/h_mat_triple.irp.f b/src/tc_bi_ortho/h_mat_triple.irp.f new file mode 100644 index 00000000..5e1d32fe --- /dev/null +++ b/src/tc_bi_ortho/h_mat_triple.irp.f @@ -0,0 +1,330 @@ +subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) + use bitmasks + BEGIN_DOC +! returns the array, for each spin, of holes/particles between key_i and key_j +! +! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> + END_DOC + include 'utils/constants.include.F' + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision, intent(out) :: phase + integer :: ispin,k,i,pos + integer(bit_kind) :: key_hole, key_particle + integer(bit_kind) :: xorvec(N_int_max,2) + holes_array = -1 + particles_array = -1 + degree_array = 0 + do i = 1, N_int + xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) + xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) + degree_array(1) += popcnt(xorvec(i,1)) + degree_array(2) += popcnt(xorvec(i,2)) + enddo + degree_array(1) = shiftr(degree_array(1),1) + degree_array(2) = shiftr(degree_array(2),1) + + do ispin = 1, 2 + k = 1 + !!! GETTING THE HOLES + do i = 1, N_int + key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + do ispin = 1, 2 + k = 1 + !!! GETTING THE PARTICLES + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general ' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + integer :: h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree_array(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + +subroutine get_holes_general(key_i, key_j,Nint, holes_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of holes between key_i and key_j +! +! with the following convention: a_{hole}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2) + integer(bit_kind) :: key_hole + integer :: ispin,k,i,pos + holes_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_particles_general(key_i, key_j,Nint,particles_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of particles between key_i and key_j +! +! with the following convention: a^dagger_{particle}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: particles_array(100,2) + integer(bit_kind) :: key_particle + integer :: ispin,k,i,pos + particles_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'Those are the two determinants' + call debug_det(key_i, N_int) + call debug_det(key_j, N_int) + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) + implicit none + integer, intent(in) :: degree(2), Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + integer, intent(in) :: holes_array(100,2),particles_array(100,2) + double precision, intent(out) :: phase + integer :: i,ispin,h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + +subroutine H_tc_s2_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo +end + +! --- + +subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo +end + +! --- +subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + use bitmasks + BEGIN_DOC +! for triple excitation +!! +!! WARNING !! +! +! Genuine triple excitations of the same spin are not yet implemented + END_DOC + implicit none + integer(bit_kind), intent(in) :: key_j(N_int,2),key_i(N_int,2) + integer, intent(in) :: Nint + double precision, intent(out) :: hmono, htwoe, hthree, htot + integer :: occ(N_int*bit_kind_size,2) + integer :: Ne(2),i,j,ii,jj,ispin,jspin,k,kk + integer :: degree,exc_double(0:2,2,2),exc_single(0:2,2,2) + integer :: degree_alpha,degree_beta + integer :: h1, p1, h2, p2, s1, s2, h3, p3, s3, h4, p4, s4 + double precision :: phase_double, phase_single + integer(bit_kind) :: key_j_alpha(N_int,2),key_i_alpha(N_int,2) + integer(bit_kind) :: key_j_beta(N_int,2),key_i_beta(N_int,2) + integer :: other_spin(2) + + hmono = 0.d0 + htwoe = 0.d0 + hthree = 0.d0 + htot = 0.d0 + call bitstring_to_list_ab(key_i,occ,Ne,N_int) + call get_excitation_degree(key_i,key_j,degree,N_int) + if(degree.ne.3)then + return + endif + other_spin(1) = 2 + other_spin(2) = 1 + do i = 1, N_int + key_j_alpha(i,1) = key_j(i,1) + key_j_alpha(i,2) = 0_bit_kind + key_i_alpha(i,1) = key_i(i,1) + key_i_alpha(i,2) = 0_bit_kind + + key_j_beta(i,2) = key_j(i,2) + key_j_beta(i,1) = 0_bit_kind + key_i_beta(i,2) = key_i(i,2) + key_i_beta(i,1) = 0_bit_kind + enddo + ! check whether it is a triple excitation of the same spin + + call get_excitation_degree(key_i_alpha,key_j_alpha,degree_alpha,N_int) + call get_excitation_degree(key_i_beta,key_j_beta,degree_beta,N_int) + if(degree_alpha==3.or.degree_beta==3)then + return + else + if(degree_alpha == 2.and.degree_beta == 1)then ! double alpha + single beta + call get_double_excitation(key_i_alpha,key_j_alpha,exc_double,phase_double,N_int) + call decode_exc(exc_double,2,h1,p1,h2,p2,s1,s2) + call get_single_excitation(key_i_beta,key_j_beta,exc_single,phase_single,N_int) + call decode_exc(exc_single,1,h3,p3,h4,p4,s3,s4) + else if(degree_beta == 2 .and. degree_alpha == 1)then ! double beta + single alpha + call get_double_excitation(key_i_beta,key_j_beta,exc_double,phase_double,N_int) + call decode_exc(exc_double,2,h1,p1,h2,p2,s1,s2) + call get_single_excitation(key_i_alpha,key_j_alpha,exc_single,phase_single,N_int) + call decode_exc(exc_single,1,h3,p3,h4,p4,s3,s4) + else + print*,'PB !!' + print*,'degree_beta, degree_alpha',degree_beta, degree_alpha + print*,'degree',degree + stop + endif + hthree = three_body_ints_bi_ort(p3,p2,p1,h3,h2,h1) - three_body_ints_bi_ort(p3,p2,p1,h3,h1,h2) + hthree *= phase_single * phase_double + endif + htot = hthree + end + diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index 72f55aca..c69632f6 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -85,14 +85,29 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, hthree = 0.D0 call get_excitation_degree(key_i, key_j, degree, Nint) - if(degree.gt.2) return - - if(degree == 0) then - call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) - else if (degree == 1) then - call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) - else if(degree == 2) then - call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + if(.not.pure_three_body_h_tc)then + if(degree.gt.2) return + if(degree == 0) then + call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) + else if (degree == 1) then + call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) + else if(degree == 2) then + call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + endif + else + if(degree==3)then + print*,'degree == 3' + endif + if(degree.gt.3) return + if(degree == 0) then + call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) + else if (degree == 1) then + call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) + else if(degree == 2) then + call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + else + call triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + endif endif if(degree==0) then diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 48257943..7cb23d77 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -225,6 +225,8 @@ end external H_tc_dagger_u_0_opt external H_tc_s2_dagger_u_0_opt external H_tc_s2_u_0_opt + external H_tc_s2_dagger_u_0_with_pure_three + external H_tc_s2_u_0_with_pure_three allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) @@ -250,7 +252,11 @@ end converged = .False. i_it = 0 do while (.not.converged) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + if(.not.pure_three_body_h_tc)then + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + else + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_with_pure_three) + endif i_it += 1 if(i_it .gt. 5) exit enddo @@ -275,7 +281,11 @@ end converged = .False. i_it = 0 do while (.not. converged) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) + if(.not.pure_three_body_h_tc)then + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) + else + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_with_pure_three) + endif i_it += 1 if(i_it .gt. 5) exit enddo diff --git a/src/tc_bi_ortho/test_tc_two_rdm.irp.f b/src/tc_bi_ortho/test_tc_two_rdm.irp.f index 044c31e0..68b96f37 100644 --- a/src/tc_bi_ortho/test_tc_two_rdm.irp.f +++ b/src/tc_bi_ortho/test_tc_two_rdm.irp.f @@ -35,19 +35,15 @@ subroutine test do h2 = 1, mo_num do p2 = 1, mo_num integral = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) - rdm = tc_two_rdm(p1,h1,p2,h2) + rdm = tc_two_rdm(p2,p1,h2,h1) accu += integral * rdm rdm_new = 0.d0 do s2 = 1, 2 do s1 = 1, 2 - rdm_new += tc_two_rdm_chemist_s1s2(p1,h1,p2,h2,s1,s2) + rdm_new += tc_two_rdm_s1s2(p2,p1,h2,h1,s1,s2) enddo enddo accu_new += integral * rdm_new -! if(dabs(rdm).gt.1.d-10)then -! print*,h1,p1,h2,p2 -! print*,rdm,integral,rdm*integral -! endif enddo enddo enddo diff --git a/src/tc_bi_ortho/two_rdm_naive.irp.f b/src/tc_bi_ortho/two_rdm_naive.irp.f index d21d6a87..90163de5 100644 --- a/src/tc_bi_ortho/two_rdm_naive.irp.f +++ b/src/tc_bi_ortho/two_rdm_naive.irp.f @@ -32,7 +32,7 @@ enddo if(degree == 2)then call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) -! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) else if(degree==1)then ! occupation of the determinant psi_det(j) call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) @@ -44,7 +44,7 @@ h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) -! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo ! run over the electrons of same spin than the excitation s2 = s1 @@ -53,7 +53,7 @@ h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) -! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo endif else if(degree == 0)then @@ -75,7 +75,7 @@ h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) -! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo ! run over the couple of alpha-alpha electrons s2 = s1 @@ -85,7 +85,7 @@ p2 = m if(h2.le.h1)cycle call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) -! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo enddo s1 = 2 @@ -100,7 +100,7 @@ p2 = m if(h2.le.h1)cycle call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) -! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo enddo endif From 14edfa839b39d3b43687920498e177c96bc3d41b Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 13 Sep 2023 18:42:50 +0200 Subject: [PATCH 72/74] added full three body --- src/tc_bi_ortho/h_mat_triple.irp.f | 85 +++++++++++++---------------- src/tc_bi_ortho/slater_tc_opt.irp.f | 3 - 2 files changed, 39 insertions(+), 49 deletions(-) diff --git a/src/tc_bi_ortho/h_mat_triple.irp.f b/src/tc_bi_ortho/h_mat_triple.irp.f index 5e1d32fe..5f332599 100644 --- a/src/tc_bi_ortho/h_mat_triple.irp.f +++ b/src/tc_bi_ortho/h_mat_triple.irp.f @@ -267,64 +267,57 @@ subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, integer(bit_kind), intent(in) :: key_j(N_int,2),key_i(N_int,2) integer, intent(in) :: Nint double precision, intent(out) :: hmono, htwoe, hthree, htot - integer :: occ(N_int*bit_kind_size,2) - integer :: Ne(2),i,j,ii,jj,ispin,jspin,k,kk - integer :: degree,exc_double(0:2,2,2),exc_single(0:2,2,2) - integer :: degree_alpha,degree_beta - integer :: h1, p1, h2, p2, s1, s2, h3, p3, s3, h4, p4, s4 - double precision :: phase_double, phase_single - integer(bit_kind) :: key_j_alpha(N_int,2),key_i_alpha(N_int,2) - integer(bit_kind) :: key_j_beta(N_int,2),key_i_beta(N_int,2) - integer :: other_spin(2) + integer :: degree + integer :: h1, p1, h2, p2, s1, s2, h3, p3, s3 + integer :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision :: phase,sym_3_e_int_from_6_idx_tensor hmono = 0.d0 htwoe = 0.d0 hthree = 0.d0 htot = 0.d0 - call bitstring_to_list_ab(key_i,occ,Ne,N_int) - call get_excitation_degree(key_i,key_j,degree,N_int) - if(degree.ne.3)then - return - endif - other_spin(1) = 2 - other_spin(2) = 1 - do i = 1, N_int - key_j_alpha(i,1) = key_j(i,1) - key_j_alpha(i,2) = 0_bit_kind - key_i_alpha(i,1) = key_i(i,1) - key_i_alpha(i,2) = 0_bit_kind - - key_j_beta(i,2) = key_j(i,2) - key_j_beta(i,1) = 0_bit_kind - key_i_beta(i,2) = key_i(i,2) - key_i_beta(i,1) = 0_bit_kind - enddo - ! check whether it is a triple excitation of the same spin - - call get_excitation_degree(key_i_alpha,key_j_alpha,degree_alpha,N_int) - call get_excitation_degree(key_i_beta,key_j_beta,degree_beta,N_int) - if(degree_alpha==3.or.degree_beta==3)then - return + call get_excitation_general(key_j, key_i, Nint,degree_array,holes_array, particles_array,phase) + degree = degree_array(1) + degree_array(2) + if(degree .ne. 3)return + if(degree_array(1)==3.or.degree_array(2)==3)then + if(degree_array(1) == 3)then + h1 = holes_array(1,1) + h2 = holes_array(2,1) + h3 = holes_array(1,1) + p1 = particles_array(1,1) + p2 = particles_array(2,1) + p3 = particles_array(1,1) + else + h1 = holes_array(1,2) + h2 = holes_array(2,2) + h3 = holes_array(1,2) + p1 = particles_array(1,2) + p2 = particles_array(2,2) + p3 = particles_array(1,2) + endif + hthree = sym_3_e_int_from_6_idx_tensor(p3, p2, p1, h3, h2, h1) else - if(degree_alpha == 2.and.degree_beta == 1)then ! double alpha + single beta - call get_double_excitation(key_i_alpha,key_j_alpha,exc_double,phase_double,N_int) - call decode_exc(exc_double,2,h1,p1,h2,p2,s1,s2) - call get_single_excitation(key_i_beta,key_j_beta,exc_single,phase_single,N_int) - call decode_exc(exc_single,1,h3,p3,h4,p4,s3,s4) - else if(degree_beta == 2 .and. degree_alpha == 1)then ! double beta + single alpha - call get_double_excitation(key_i_beta,key_j_beta,exc_double,phase_double,N_int) - call decode_exc(exc_double,2,h1,p1,h2,p2,s1,s2) - call get_single_excitation(key_i_alpha,key_j_alpha,exc_single,phase_single,N_int) - call decode_exc(exc_single,1,h3,p3,h4,p4,s3,s4) + if(degree_array(1) == 2.and.degree_array(2) == 1)then ! double alpha + single beta + h1 = holes_array(1,1) + h2 = holes_array(2,1) + h3 = holes_array(1,2) + p1 = particles_array(1,1) + p2 = particles_array(2,1) + p3 = particles_array(1,2) + else if(degree_array(2) == 2 .and. degree_array(1) == 1)then ! double beta + single alpha + h1 = holes_array(1,2) + h2 = holes_array(2,2) + h3 = holes_array(1,1) + p1 = particles_array(1,2) + p2 = particles_array(2,2) + p3 = particles_array(1,1) else print*,'PB !!' - print*,'degree_beta, degree_alpha',degree_beta, degree_alpha - print*,'degree',degree stop endif hthree = three_body_ints_bi_ort(p3,p2,p1,h3,h2,h1) - three_body_ints_bi_ort(p3,p2,p1,h3,h1,h2) - hthree *= phase_single * phase_double endif + hthree *= phase htot = hthree end diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index c69632f6..e398d8f2 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -95,9 +95,6 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) endif else - if(degree==3)then - print*,'degree == 3' - endif if(degree.gt.3) return if(degree == 0) then call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) From 73fc6078caf1a818dfdee1b117c6b20df0aee443 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 14 Sep 2023 19:55:41 +0200 Subject: [PATCH 73/74] fixed stupid bug in purely parallel spin triple excitation term --- src/tc_bi_ortho/h_mat_triple.irp.f | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tc_bi_ortho/h_mat_triple.irp.f b/src/tc_bi_ortho/h_mat_triple.irp.f index 5f332599..8d5d1ce4 100644 --- a/src/tc_bi_ortho/h_mat_triple.irp.f +++ b/src/tc_bi_ortho/h_mat_triple.irp.f @@ -283,17 +283,17 @@ subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, if(degree_array(1) == 3)then h1 = holes_array(1,1) h2 = holes_array(2,1) - h3 = holes_array(1,1) + h3 = holes_array(3,1) p1 = particles_array(1,1) p2 = particles_array(2,1) - p3 = particles_array(1,1) + p3 = particles_array(3,1) else h1 = holes_array(1,2) h2 = holes_array(2,2) - h3 = holes_array(1,2) + h3 = holes_array(3,2) p1 = particles_array(1,2) p2 = particles_array(2,2) - p3 = particles_array(1,2) + p3 = particles_array(3,2) endif hthree = sym_3_e_int_from_6_idx_tensor(p3, p2, p1, h3, h2, h1) else From 9b4082c2350a66e5c70296efdd4401d3e8cc49cc Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 15 Sep 2023 01:06:32 +0200 Subject: [PATCH 74/74] added OMP loops in H_tc_triple psi --- src/tc_bi_ortho/h_mat_triple.irp.f | 68 ++++++++++++++++++ src/tc_bi_ortho/slater_tc_opt.irp.f | 3 + src/tc_bi_ortho/tc_h_eigvectors.irp.f | 8 +-- src/tc_bi_ortho/test_s2_tc.irp.f | 100 +++++++++++++------------- 4 files changed, 125 insertions(+), 54 deletions(-) diff --git a/src/tc_bi_ortho/h_mat_triple.irp.f b/src/tc_bi_ortho/h_mat_triple.irp.f index 8d5d1ce4..4c8c107a 100644 --- a/src/tc_bi_ortho/h_mat_triple.irp.f +++ b/src/tc_bi_ortho/h_mat_triple.irp.f @@ -221,6 +221,40 @@ subroutine H_tc_s2_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) enddo end +subroutine H_tc_s2_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & + !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & + !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo + !$OMP END PARALLEL DO +end + ! --- subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) @@ -253,6 +287,40 @@ subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) enddo end +subroutine H_tc_s2_dagger_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & + !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & + !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo + !$OMP END PARALLEL DO +end + ! --- subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) use bitmasks diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index e398d8f2..ab21d3e8 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -19,6 +19,9 @@ subroutine provide_all_three_ints_bi_ortho() if(three_e_4_idx_term) then PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort endif + if(pure_three_body_h_tc)then + provide three_body_ints_bi_ort + endif if(.not. double_normal_ord .and. three_e_5_idx_term) then PROVIDE three_e_5_idx_direct_bi_ort diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 7cb23d77..a9e22e03 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -225,8 +225,8 @@ end external H_tc_dagger_u_0_opt external H_tc_s2_dagger_u_0_opt external H_tc_s2_u_0_opt - external H_tc_s2_dagger_u_0_with_pure_three - external H_tc_s2_u_0_with_pure_three + external H_tc_s2_dagger_u_0_with_pure_three_omp + external H_tc_s2_u_0_with_pure_three_omp allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) @@ -255,7 +255,7 @@ end if(.not.pure_three_body_h_tc)then call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) else - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_with_pure_three) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_with_pure_three_omp) endif i_it += 1 if(i_it .gt. 5) exit @@ -284,7 +284,7 @@ end if(.not.pure_three_body_h_tc)then call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) else - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_with_pure_three) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_with_pure_three_omp) endif i_it += 1 if(i_it .gt. 5) exit diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index b398507a..7c70b119 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -14,12 +14,14 @@ program test_tc read_wf = .True. touch read_wf - call routine_test_s2 - call routine_test_s2_davidson + call provide_all_three_ints_bi_ortho() + call routine_h_triple_left + call routine_h_triple_right +! call routine_test_s2_davidson end -subroutine routine_test_s2 +subroutine routine_h_triple_right implicit none logical :: do_right integer :: sze ,i, N_st, j @@ -29,67 +31,65 @@ subroutine routine_test_s2 sze = N_det N_st = 1 allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) - print*,'Checking first the Left ' - do_right = .False. - do i = 1, sze - u_0(i,1) = psi_l_coef_bi_ortho(i,1) - enddo - call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) - s_0_ref = 0.d0 - do i = 1, sze - do j = 1, sze - call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) - s_0_ref(i,1) += u_0(j,1) * sij - enddo - enddo - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) - accu_e = 0.d0 - accu_s = 0.d0 - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 - do i = 1, sze - accu_e_0 += v_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) - accu_s_0 += s_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) - accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) - accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) - enddo - print*,'accu_e = ',accu_e - print*,'accu_s = ',accu_s - print*,'accu_e_0 = ',accu_e_0 - print*,'accu_s_0 = ',accu_s_0 - - print*,'Checking then the right ' - do_right = .True. + print*,'Checking first the Right ' do i = 1, sze u_0(i,1) = psi_r_coef_bi_ortho(i,1) enddo - call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) - s_0_ref = 0.d0 - do i = 1, sze - do j = 1, sze - call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) - s_0_ref(i,1) += u_0(j,1) * sij - enddo - enddo - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) + double precision :: wall0,wall1 + call wall_time(wall0) + call H_tc_s2_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) + call wall_time(wall1) + print*,'time for omp',wall1 - wall0 + call wall_time(wall0) + call H_tc_s2_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) + call wall_time(wall1) + print*,'time serial ',wall1 - wall0 accu_e = 0.d0 accu_s = 0.d0 - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 do i = 1, sze - accu_e_0 += v_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) - accu_s_0 += s_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) enddo print*,'accu_e = ',accu_e print*,'accu_s = ',accu_s - print*,'accu_e_0 = ',accu_e_0 - print*,'accu_s_0 = ',accu_s_0 - end +subroutine routine_h_triple_left + implicit none + logical :: do_right + integer :: sze ,i, N_st, j + double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 + double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) + double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) + sze = N_det + N_st = 1 + allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) + print*,'Checking the Left ' + do i = 1, sze + u_0(i,1) = psi_l_coef_bi_ortho(i,1) + enddo + double precision :: wall0,wall1 + call wall_time(wall0) + call H_tc_s2_dagger_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) + call wall_time(wall1) + print*,'time for omp',wall1 - wall0 + call wall_time(wall0) + call H_tc_s2_dagger_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) + call wall_time(wall1) + print*,'time serial ',wall1 - wall0 + accu_e = 0.d0 + accu_s = 0.d0 + do i = 1, sze + accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) + accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) + enddo + print*,'accu_e = ',accu_e + print*,'accu_s = ',accu_s + +end + + subroutine routine_test_s2_davidson implicit none double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) , s2(:)