From 932befb2bb528c87d367b9d4aab6c1300b4e66aa Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 4 Jul 2019 00:22:44 +0200 Subject: [PATCH 1/5] Properly ordered natural MOs --- src/casscf/bavard.irp.f | 4 +-- src/casscf/bielec.irp.f | 2 +- src/casscf/bielec_natorb.irp.f | 48 +++++++++++++--------------------- src/casscf/casscf.irp.f | 1 + src/casscf/natorb.irp.f | 38 +++++++++++++-------------- 5 files changed, 41 insertions(+), 52 deletions(-) diff --git a/src/casscf/bavard.irp.f b/src/casscf/bavard.irp.f index 402e67ec..a9797712 100644 --- a/src/casscf/bavard.irp.f +++ b/src/casscf/bavard.irp.f @@ -1,6 +1,6 @@ ! -*- F90 -*- BEGIN_PROVIDER [logical, bavard] -! bavard=.true. - bavard=.false. + bavard=.true. +! bavard=.false. END_PROVIDER diff --git a/src/casscf/bielec.irp.f b/src/casscf/bielec.irp.f index daf3f68b..1c6d9e6b 100644 --- a/src/casscf/bielec.irp.f +++ b/src/casscf/bielec.irp.f @@ -52,7 +52,7 @@ END_PROVIDER -BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_orb+n_act_orb,n_core_inact_orb+n_act_orb, mo_num)] +BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] BEGIN_DOC ! bielec_PxxQ : integral (px|xq) with p,q arbitrary, x core or active ! indices are unshifted orbital numbers diff --git a/src/casscf/bielec_natorb.irp.f b/src/casscf/bielec_natorb.irp.f index cb09be3e..9968530c 100644 --- a/src/casscf/bielec_natorb.irp.f +++ b/src/casscf/bielec_natorb.irp.f @@ -4,13 +4,13 @@ ! indices are unshifted orbital numbers END_DOC implicit none - integer :: i,j,k,l,t,u,p,q,pp + integer :: i,j,k,l,t,u,p,q double precision, allocatable :: f(:,:,:), d(:,:,:) !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(j,k,l,p,pp,d,f) & + !$OMP PRIVATE(j,k,l,p,d,f) & !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & !$OMP bielec_PQxx_no,bielec_PQxx,list_act,natorbsCI) @@ -36,8 +36,7 @@ do k=1,n_core_inact_act_orb do j=1,mo_num do p=1,n_act_orb - pp=n_act_orb-p+1 - bielec_PQxx_no(list_act(p),j,k,l)=d(pp,j,k) + bielec_PQxx_no(list_act(p),j,k,l)=d(p,j,k) end do end do @@ -54,9 +53,8 @@ d, n_act_orb) do k=1,n_core_inact_act_orb do p=1,n_act_orb - pp=n_act_orb-p+1 do j=1,mo_num - bielec_PQxx_no(j,list_act(p),k,l)=d(pp,j,k) + bielec_PQxx_no(j,list_act(p),k,l)=d(p,j,k) end do end do end do @@ -83,10 +81,9 @@ 0.d0, & d, mo_num*mo_num) do p=1,n_act_orb - pp=n_act_orb-p+1 do k=1,mo_num do j=1,mo_num - bielec_PQxx_no(j,k,n_core_inact_orb+p,l)=d(j,k,pp) + bielec_PQxx_no(j,k,n_core_inact_orb+p,l)=d(j,k,p) end do end do end do @@ -110,10 +107,9 @@ 0.d0, & d, mo_num*mo_num) do p=1,n_act_orb - pp=n_act_orb-p+1 do k=1,mo_num do j=1,mo_num - bielec_PQxx_no(j,k,l,n_core_inact_orb+p)=d(j,k,pp) + bielec_PQxx_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) end do end do end do @@ -133,11 +129,11 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac ! indices are unshifted orbital numbers END_DOC implicit none - integer :: i,j,k,l,t,u,p,q,pp + integer :: i,j,k,l,t,u,p,q double precision, allocatable :: f(:,:,:), d(:,:,:) !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(j,k,l,p,pp,d,f) & + !$OMP PRIVATE(j,k,l,p,d,f) & !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & !$OMP bielec_PxxQ_no,bielec_PxxQ,list_act,natorbsCI) @@ -163,8 +159,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac do l=1,n_core_inact_act_orb do k=1,n_core_inact_act_orb do p=1,n_act_orb - pp=n_act_orb-p+1 - bielec_PxxQ_no(list_act(p),k,l,j)=d(pp,k,l) + bielec_PxxQ_no(list_act(p),k,l,j)=d(p,k,l) end do end do end do @@ -193,8 +188,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac do l=1,n_core_inact_act_orb do j=1,mo_num do p=1,n_act_orb - pp=n_act_orb-p+1 - bielec_PxxQ_no(j,n_core_inact_orb+p,l,k)=d(pp,j,l) + bielec_PxxQ_no(j,n_core_inact_orb+p,l,k)=d(p,j,l) end do end do end do @@ -221,10 +215,9 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac 0.d0, & d, mo_num*n_core_inact_act_orb) do p=1,n_act_orb - pp=n_act_orb-p+1 do l=1,n_core_inact_act_orb do j=1,mo_num - bielec_PxxQ_no(j,l,n_core_inact_orb+p,k)=d(j,l,pp) + bielec_PxxQ_no(j,l,n_core_inact_orb+p,k)=d(j,l,p) end do end do end do @@ -248,10 +241,9 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac 0.d0, & d, mo_num*n_core_inact_act_orb) do p=1,n_act_orb - pp=n_act_orb-p+1 do k=1,n_core_inact_act_orb do j=1,mo_num - bielec_PxxQ_no(j,k,l,n_core_inact_orb+p)=d(j,k,pp) + bielec_PxxQ_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) end do end do end do @@ -269,11 +261,11 @@ BEGIN_PROVIDER [real*8, bielecCI_no, (n_act_orb,n_act_orb,n_act_orb, mo_num)] ! index p runs over the whole basis, t,u,v only over the active orbitals END_DOC implicit none - integer :: i,j,k,l,t,u,p,q,pp + integer :: i,j,k,l,t,u,p,q double precision, allocatable :: f(:,:,:), d(:,:,:) !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(j,k,l,p,pp,d,f) & + !$OMP PRIVATE(j,k,l,p,d,f) & !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & !$OMP bielecCI_no,bielecCI,list_act,natorbsCI) @@ -298,8 +290,7 @@ BEGIN_PROVIDER [real*8, bielecCI_no, (n_act_orb,n_act_orb,n_act_orb, mo_num)] do k=1,n_act_orb do j=1,n_act_orb do p=1,n_act_orb - pp=n_act_orb-p+1 - bielecCI_no(p,j,k,l)=d(pp,j,k) + bielecCI_no(p,j,k,l)=d(p,j,k) end do end do @@ -316,9 +307,8 @@ BEGIN_PROVIDER [real*8, bielecCI_no, (n_act_orb,n_act_orb,n_act_orb, mo_num)] d, n_act_orb) do k=1,n_act_orb do p=1,n_act_orb - pp=n_act_orb-p+1 do j=1,n_act_orb - bielecCI_no(j,p,k,l)=d(pp,j,k) + bielecCI_no(j,p,k,l)=d(p,j,k) end do end do end do @@ -337,10 +327,9 @@ BEGIN_PROVIDER [real*8, bielecCI_no, (n_act_orb,n_act_orb,n_act_orb, mo_num)] d, n_act_orb*n_act_orb) do p=1,n_act_orb - pp=n_act_orb-p+1 do k=1,n_act_orb do j=1,n_act_orb - bielecCI_no(j,k,p,l)=d(j,k,pp) + bielecCI_no(j,k,p,l)=d(j,k,p) end do end do end do @@ -363,10 +352,9 @@ BEGIN_PROVIDER [real*8, bielecCI_no, (n_act_orb,n_act_orb,n_act_orb, mo_num)] d, n_act_orb*n_act_orb) do p=1,n_act_orb - pp=n_act_orb-p+1 do k=1,n_act_orb do j=1,n_act_orb - bielecCI_no(j,k,l,list_act(p))=d(j,k,pp) + bielecCI_no(j,k,l,list_act(p))=d(j,k,p) end do end do end do diff --git a/src/casscf/casscf.irp.f b/src/casscf/casscf.irp.f index 4270a9fb..1b77cf43 100644 --- a/src/casscf/casscf.irp.f +++ b/src/casscf/casscf.irp.f @@ -32,6 +32,7 @@ subroutine run converged = dabs(energy_improvement) < thresh_scf pt2_max = dabs(energy_improvement / pt2_relative_error) + call update_integrals mo_coef = NewOrbs call save_mos call map_deinit(mo_integrals_map) diff --git a/src/casscf/natorb.irp.f b/src/casscf/natorb.irp.f index dcbcd413..c84b4862 100644 --- a/src/casscf/natorb.irp.f +++ b/src/casscf/natorb.irp.f @@ -11,7 +11,7 @@ end do do i=1,n_act_orb - occnum(list_act(i))=occ_act(n_act_orb-i+1) + occnum(list_act(i))=occ_act(i) end do if (bavard) then @@ -31,8 +31,10 @@ END_PROVIDER ! Natural orbitals of CI END_DOC integer :: i, j + double precision :: Vt(n_act_orb,n_act_orb) - call lapack_diag(occ_act,natorbsCI,D0tu,n_act_orb,n_act_orb) +! call lapack_diag(occ_act,natorbsCI,D0tu,n_act_orb,n_act_orb) + call svd(D0tu, size(D0tu,1), natorbsCI,size(natorbsCI,1), occ_act, Vt, size(Vt,1),n_act_orb,n_act_orb) if (bavard) then write(6,*) ' found occupation numbers as ' @@ -70,7 +72,7 @@ BEGIN_PROVIDER [real*8, P0tuvx_no, (n_act_orb,n_act_orb,n_act_orb,n_act_orb)] BEGIN_DOC ! 4-index transformation of 2part matrices END_DOC - integer :: i,j,k,l,p,q,pp + integer :: i,j,k,l,p,q real*8 :: d(n_act_orb) ! index per index @@ -84,9 +86,8 @@ BEGIN_PROVIDER [real*8, P0tuvx_no, (n_act_orb,n_act_orb,n_act_orb,n_act_orb)] d(p)=0.D0 end do do p=1,n_act_orb - pp=n_act_orb-p+1 do q=1,n_act_orb - d(pp)+=P0tuvx_no(q,j,k,l)*natorbsCI(q,p) + d(p)+=P0tuvx_no(q,j,k,l)*natorbsCI(q,p) end do end do do p=1,n_act_orb @@ -103,9 +104,8 @@ BEGIN_PROVIDER [real*8, P0tuvx_no, (n_act_orb,n_act_orb,n_act_orb,n_act_orb)] d(p)=0.D0 end do do p=1,n_act_orb - pp=n_act_orb-p+1 do q=1,n_act_orb - d(pp)+=P0tuvx_no(j,q,k,l)*natorbsCI(q,p) + d(p)+=P0tuvx_no(j,q,k,l)*natorbsCI(q,p) end do end do do p=1,n_act_orb @@ -122,9 +122,8 @@ BEGIN_PROVIDER [real*8, P0tuvx_no, (n_act_orb,n_act_orb,n_act_orb,n_act_orb)] d(p)=0.D0 end do do p=1,n_act_orb - pp=n_act_orb-p+1 do q=1,n_act_orb - d(pp)+=P0tuvx_no(j,k,q,l)*natorbsCI(q,p) + d(p)+=P0tuvx_no(j,k,q,l)*natorbsCI(q,p) end do end do do p=1,n_act_orb @@ -141,9 +140,8 @@ BEGIN_PROVIDER [real*8, P0tuvx_no, (n_act_orb,n_act_orb,n_act_orb,n_act_orb)] d(p)=0.D0 end do do p=1,n_act_orb - pp=n_act_orb-p+1 do q=1,n_act_orb - d(pp)+=P0tuvx_no(j,k,l,q)*natorbsCI(q,p) + d(p)+=P0tuvx_no(j,k,l,q)*natorbsCI(q,p) end do end do do p=1,n_act_orb @@ -162,7 +160,7 @@ BEGIN_PROVIDER [real*8, one_ints_no, (mo_num,mo_num)] BEGIN_DOC ! Transformed one-e integrals END_DOC - integer :: i,j, p, pp, q + integer :: i,j, p, q real*8 :: d(n_act_orb) one_ints_no(:,:)=mo_one_e_integrals(:,:) @@ -172,9 +170,8 @@ BEGIN_PROVIDER [real*8, one_ints_no, (mo_num,mo_num)] d(p)=0.D0 end do do p=1,n_act_orb - pp=n_act_orb-p+1 do q=1,n_act_orb - d(pp)+=one_ints_no(list_act(q),j)*natorbsCI(q,p) + d(p)+=one_ints_no(list_act(q),j)*natorbsCI(q,p) end do end do do p=1,n_act_orb @@ -188,9 +185,8 @@ BEGIN_PROVIDER [real*8, one_ints_no, (mo_num,mo_num)] d(p)=0.D0 end do do p=1,n_act_orb - pp=n_act_orb-p+1 do q=1,n_act_orb - d(pp)+=one_ints_no(j,list_act(q))*natorbsCI(q,p) + d(p)+=one_ints_no(j,list_act(q))*natorbsCI(q,p) end do end do do p=1,n_act_orb @@ -205,7 +201,7 @@ BEGIN_PROVIDER [real*8, NatOrbsFCI, (ao_num,mo_num)] BEGIN_DOC ! FCI natural orbitals END_DOC - integer :: i,j, p, pp, q + integer :: i,j, p, q real*8 :: d(n_act_orb) NatOrbsFCI(:,:)=mo_coef(:,:) @@ -215,14 +211,18 @@ BEGIN_PROVIDER [real*8, NatOrbsFCI, (ao_num,mo_num)] d(p)=0.D0 end do do p=1,n_act_orb - pp=n_act_orb-p+1 do q=1,n_act_orb - d(pp)+=NatOrbsFCI(j,list_act(q))*natorbsCI(q,p) + d(p)+=NatOrbsFCI(j,list_act(q))*natorbsCI(q,p) end do end do do p=1,n_act_orb NatOrbsFCI(j,list_act(p))=d(p) end do end do + +! call dgemm('N','T', ao_num,mo_num,mo_num,1.d0, & +! NatOrbsFCI, size(NatOrbsFCI,1), & +! Umat, size(Umat,1), 0.d0, & +! NewOrbs, size(NewOrbs,1)) END_PROVIDER From 03367381097efc90c32f73ac244ee1c2570b618d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 5 Jul 2019 18:50:22 +0200 Subject: [PATCH 2/5] Working on fast integrals --- src/casscf/bavard.irp.f | 4 +- src/casscf/casscf.irp.f | 11 +- src/casscf/get_energy.irp.f | 26 +-- src/casscf/natorb.irp.f | 47 +++--- src/mo_two_e_ints/four_idx_novvvv.irp.f | 202 ++++++++++++++++++++++++ src/mo_two_e_ints/mo_bi_integrals.irp.f | 134 +++------------- 6 files changed, 260 insertions(+), 164 deletions(-) create mode 100644 src/mo_two_e_ints/four_idx_novvvv.irp.f diff --git a/src/casscf/bavard.irp.f b/src/casscf/bavard.irp.f index a9797712..402e67ec 100644 --- a/src/casscf/bavard.irp.f +++ b/src/casscf/bavard.irp.f @@ -1,6 +1,6 @@ ! -*- F90 -*- BEGIN_PROVIDER [logical, bavard] - bavard=.true. -! bavard=.false. +! bavard=.true. + bavard=.false. END_PROVIDER diff --git a/src/casscf/casscf.irp.f b/src/casscf/casscf.irp.f index 1b77cf43..3fc6fd8e 100644 --- a/src/casscf/casscf.irp.f +++ b/src/casscf/casscf.irp.f @@ -4,8 +4,10 @@ program casscf ! TODO : Put the documentation of the program here END_DOC no_vvvv_integrals = .True. + no_ivvv_integrals = .True. + no_vvv_integrals = .True. pt2_max = 0.02 - SOFT_TOUCH no_vvvv_integrals pt2_max + SOFT_TOUCH no_vvvv_integrals no_vvv_integrals pt2_max call run end @@ -32,17 +34,16 @@ subroutine run converged = dabs(energy_improvement) < thresh_scf pt2_max = dabs(energy_improvement / pt2_relative_error) - call update_integrals mo_coef = NewOrbs call save_mos - call map_deinit(mo_integrals_map) iteration += 1 N_det = N_det/2 psi_det = psi_det_sorted psi_coef = psi_coef_sorted read_wf = .True. - FREE mo_integrals_map mo_two_e_integrals_in_map - SOFT_TOUCH mo_coef N_det pt2_max psi_det psi_coef + call map_deinit(mo_integrals_map) + FREE mo_two_e_integrals_in_map mo_integrals_map + SOFT_TOUCH mo_coef N_det pt2_max psi_det psi_coef enddo diff --git a/src/casscf/get_energy.irp.f b/src/casscf/get_energy.irp.f index 0a5cfb49..772af92a 100644 --- a/src/casscf/get_energy.irp.f +++ b/src/casscf/get_energy.irp.f @@ -5,32 +5,12 @@ program print_2rdm ! ! useful to test the active part of the spin trace 2 rdms END_DOC + no_vvvv_integrals = .True. read_wf = .True. - touch read_wf + touch read_wf no_vvvv_integrals call routine end subroutine routine - integer :: i,j,k,l - integer :: ii,jj,kk,ll - double precision :: accu(4),twodm,thr,act_twodm2,integral,get_two_e_integral - thr = 1.d-10 - - - accu = 0.d0 - do ll = 1, n_act_orb - l = list_act(ll) - do kk = 1, n_act_orb - k = list_act(kk) - do jj = 1, n_act_orb - j = list_act(jj) - do ii = 1, n_act_orb - i = list_act(ii) - integral = get_two_e_integral(i,j,k,l,mo_integrals_map) - accu(1) += act_two_rdm_spin_trace_mo(ii,jj,kk,ll) * integral - enddo - enddo - enddo - enddo - print*,'accu = ',accu(1) + print *, psi_energy_with_nucl_rep end diff --git a/src/casscf/natorb.irp.f b/src/casscf/natorb.irp.f index c84b4862..9ce90304 100644 --- a/src/casscf/natorb.irp.f +++ b/src/casscf/natorb.irp.f @@ -196,33 +196,36 @@ BEGIN_PROVIDER [real*8, one_ints_no, (mo_num,mo_num)] END_PROVIDER +BEGIN_PROVIDER [ double precision, NatOrbsCI_mos, (mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Rotation matrix from current MOs to the CI natural MOs + END_DOC + integer :: p,q + + NatOrbsCI_mos(:,:) = 0.d0 + + do q = 1,mo_num + NatOrbsCI_mos(q,q) = 1.d0 + enddo + + do q = 1,n_act_orb + do p = 1,n_act_orb + NatOrbsCI_mos(list_act(p),list_act(q)) = natorbsCI(p,q) + enddo + enddo +END_PROVIDER + + BEGIN_PROVIDER [real*8, NatOrbsFCI, (ao_num,mo_num)] implicit none BEGIN_DOC ! FCI natural orbitals END_DOC - integer :: i,j, p, q - real*8 :: d(n_act_orb) - NatOrbsFCI(:,:)=mo_coef(:,:) - - do j=1,ao_num - do p=1,n_act_orb - d(p)=0.D0 - end do - do p=1,n_act_orb - do q=1,n_act_orb - d(p)+=NatOrbsFCI(j,list_act(q))*natorbsCI(q,p) - end do - end do - do p=1,n_act_orb - NatOrbsFCI(j,list_act(p))=d(p) - end do - end do - -! call dgemm('N','T', ao_num,mo_num,mo_num,1.d0, & -! NatOrbsFCI, size(NatOrbsFCI,1), & -! Umat, size(Umat,1), 0.d0, & -! NewOrbs, size(NewOrbs,1)) + call dgemm('N','N', ao_num,mo_num,mo_num,1.d0, & + mo_coef, size(mo_coef,1), & + NatOrbsCI_mos, size(NatOrbsCI_mos,1), 0.d0, & + NatOrbsFCI, size(NatOrbsFCI,1)) END_PROVIDER diff --git a/src/mo_two_e_ints/four_idx_novvvv.irp.f b/src/mo_two_e_ints/four_idx_novvvv.irp.f new file mode 100644 index 00000000..124c946a --- /dev/null +++ b/src/mo_two_e_ints/four_idx_novvvv.irp.f @@ -0,0 +1,202 @@ +BEGIN_PROVIDER [ double precision, mo_coef_novirt, (ao_num,n_core_inact_act_orb) ] + implicit none + BEGIN_DOC + ! MO coefficients without virtual MOs + END_DOC + integer :: j,jj + + do j=1,n_core_inact_act_orb + jj = list_core_inact_act(j) + mo_coef_novirt(:,j) = mo_coef(:,jj) + enddo + +END_PROVIDER + +subroutine ao_to_mo_novirt(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis excluding virtuals + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,n_core_inact_act_orb) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,n_core_inact_act_orb) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, n_core_inact_act_orb, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef_novirt, size(mo_coef_novirt,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', n_core_inact_act_orb, n_core_inact_act_orb, ao_num,& + 1.d0, mo_coef_novirt,size(mo_coef_novirt,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end + + +subroutine four_idx_novvvv + use map_module + implicit none + BEGIN_DOC + ! Retransform MO integrals for next CAS-SCF step + END_DOC + integer :: i,j,k,l,n_integrals + double precision, allocatable :: f(:,:,:), d(:,:), T(:,:,:,:) + double precision, external :: get_ao_two_e_integral + integer(key_kind), allocatable :: idx(:) + real(integral_kind), allocatable :: values(:) + + integer :: p,q,r,s + double precision, allocatable :: ijij(:), ijji(:), jqjs(:,:,:), jqrj(:,:,:) + double precision :: c + allocate (jqjs(mo_num,ao_num,ao_num), jqrj(mo_num,ao_num,ao_num)) + + allocate( T(n_core_inact_act_orb,n_core_inact_act_orb,ao_num,ao_num) ) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(mo_num,ao_num,T,n_core_inact_act_orb, mo_coef_transp, & + !$OMP mo_integrals_threshold,mo_coef,mo_integrals_map, & + !$OMP list_core_inact_act,jqjs,jqrj,ao_integrals_map) & + !$OMP PRIVATE(i,j,k,l,p,q,r,s,idx,values,n_integrals, & + !$OMP f,d,c,ijij,ijji) + allocate(f(ao_num,ao_num,ao_num), d(mo_num,mo_num), & + idx(mo_num*mo_num), values(mo_num*mo_num) ) + allocate(ijij(mo_num), ijji(mo_num)) + + ! + !$OMP DO + do s=1,ao_num + T(:,:,:,s) = 0.d0 + jqjs(:,:,s) = 0.d0 + jqrj(:,:,s) = 0.d0 + enddo + !$OMP END DO + + !$OMP DO + do s=1,ao_num + do r=1,ao_num + do q=1,ao_num + do p=1,r + f(p,q,r) = get_ao_two_e_integral(p,q,r,s,ao_integrals_map) + f(r,q,p) = f(p,q,r) + enddo + enddo + enddo + ! f(p,q,r) = + + do r=1,ao_num + call ao_to_mo_novirt(f(1,1,r),size(f,1),T(1,1,r,s),size(T,1)) + enddo + ! T(i,j,p,q) = + + ! Diagonal + do r=1,ao_num + do q=1,ao_num + do p=1,ao_num + if (dabs(f(p,q,r)) >= mo_integrals_threshold) then + do i=1,mo_num + jqjs(i,q,s) = jqjs(i,q,s) + mo_coef_transp(i,p) * f(p,q,r) * mo_coef_transp(i,r) + enddo + endif + enddo + enddo + enddo + + enddo + !$OMP END DO NOWAIT + + !$OMP DO + do s=1,ao_num + do r=1,ao_num + do q=1,r + do p=1,ao_num + f(p,q,r) = get_ao_two_e_integral(p,q,s,r,ao_integrals_map) + f(p,r,q) = f(p,q,r) + enddo + enddo + enddo + ! f(p,q,r) = + + ! Diagonal + do r=1,ao_num + do q=1,ao_num + do p=1,ao_num + if (dabs(f(p,q,r)) >= mo_integrals_threshold) then + do i=1,mo_num + jqrj(i,q,s) = jqrj(i,q,s) + mo_coef_transp(i,p) * f(p,q,r) * mo_coef_transp(i,r) + enddo + endif + enddo + enddo + enddo + + enddo + !$OMP END DO NOWAIT + + !$OMP BARRIER + + !$OMP DO + do i=1,mo_num + ijij(:) = 0.d0 + ijji(:) = 0.d0 + do s=1,ao_num + do q=1,ao_num + do j=1,mo_num + c = mo_coef_transp(j,q) * mo_coef_transp(j,s) + ijij(j) = ijij(j) + jqjs(i,q,s) * c + ijji(j) = ijji(j) + jqrj(i,q,s) * c + enddo + enddo + enddo + do j=1,mo_num + call two_e_integrals_index(i,j,i,j,idx(j)) + values(j) = ijij(j) + enddo + do j=1,mo_num + call two_e_integrals_index(i,j,j,i,idx(mo_num+j)) + values(mo_num+j) = ijji(j) + enddo + call map_append(mo_integrals_map, idx, values, 2*mo_num) + enddo + !$OMP END DO + + !$OMP DO + do j=1,n_core_inact_act_orb + do i=1,n_core_inact_act_orb + do s=1,ao_num + do r=1,ao_num + f(r,s,1) = T(i,j,r,s) + enddo + enddo + call ao_to_mo(f,size(f,1),d,size(d,1)) + n_integrals = 0 + do l=1,mo_num + do k=1,mo_num + n_integrals+=1 + call two_e_integrals_index(list_core_inact_act(i),list_core_inact_act(j),k,l,idx(n_integrals)) + values(n_integrals) = d(k,l) + enddo + enddo + call map_append(mo_integrals_map, idx, values, n_integrals) + enddo + enddo + !$OMP END DO + deallocate(f,d,ijij,ijji,idx,values) + + !$OMP END PARALLEL + + deallocate(T,jqrj,jqjs) + + + call map_sort(mo_integrals_map) + call map_unique(mo_integrals_map) + call map_shrink(mo_integrals_map,real(mo_integrals_threshold,integral_kind)) + +end diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index fccf22a6..bca3df1a 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -22,16 +22,13 @@ end BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] use map_module implicit none - integer(bit_kind) :: mask_ijkl(N_int,4) - integer(bit_kind) :: mask_ijk(N_int,3) - BEGIN_DOC ! If True, the map of MO two-electron integrals is provided END_DOC + integer(bit_kind) :: mask_ijkl(N_int,4) + integer(bit_kind) :: mask_ijk(N_int,3) + double precision :: cpu_1, cpu_2, wall_1, wall_2 - ! The following line avoids a subsequent crash when the memory used is more - ! than half of the virtual memory, due to a fork in zcat when reading arrays - ! with EZFIO PROVIDE mo_class mo_two_e_integrals_in_map = .True. @@ -49,106 +46,28 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] print *, '---------------------------------' print *, '' + call wall_time(wall_1) + call cpu_time(cpu_1) + if(no_vvvv_integrals)then - integer :: i,j,k,l - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I I I !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 4 - ! - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = core_inact_act_bitmask_4(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I V V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 2 (virt) ^2 - ! = J_iv - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = virt_bitmask(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - - ! (core+inact+act) ^ 2 (virt) ^2 - ! = (iv|iv) - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! V V V !!!!!!!!!!!!!!!!!!!!!!! - if(.not.no_vvv_integrals)then - print*, '' - print*, ' and ' - do i = 1,N_int - mask_ijk(i,1) = virt_bitmask(i,1) - mask_ijk(i,2) = virt_bitmask(i,1) - mask_ijk(i,3) = virt_bitmask(i,1) - enddo - call add_integrals_to_map_three_indices(mask_ijk) - endif - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I I V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 3 (virt) ^1 - ! - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I V V V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 1 (virt) ^3 - ! - if(.not.no_ivvv_integrals)then - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = virt_bitmask(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map_no_exit_34(mask_ijkl) - endif - + call four_idx_novvvv else call add_integrals_to_map(full_ijkl_bitmask_4) - -! call four_index_transform_zmq(ao_integrals_map,mo_integrals_map, & -! mo_coef, size(mo_coef,1), & -! 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & -! 1, 1, 1, 1, mo_num, mo_num, mo_num, mo_num) -! -! call four_index_transform_block(ao_integrals_map,mo_integrals_map, & -! mo_coef, size(mo_coef,1), & -! 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & -! 1, 1, 1, 1, mo_num, mo_num, mo_num, mo_num) -! -! call four_index_transform(ao_integrals_map,mo_integrals_map, & -! mo_coef, size(mo_coef,1), & -! 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & -! 1, 1, 1, 1, mo_num, mo_num, mo_num, mo_num) - - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - - print*,'Molecular integrals provided' endif + + call wall_time(wall_2) + call cpu_time(cpu_2) + + integer*8 :: get_mo_map_size, mo_map_size + mo_map_size = get_mo_map_size() + + double precision, external :: map_mb + print*,'Molecular integrals provided:' + print*,' Size of MO map ', map_mb(mo_integrals_map) ,'MB' + print*,' Number of MO integrals: ', mo_map_size + print*,' cpu time :',cpu_2 - cpu_1, 's' + print*,' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')' + if (write_mo_two_e_integrals.and.mpi_master) then call ezfio_set_work_empty(.False.) call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) @@ -185,7 +104,7 @@ subroutine add_integrals_to_map(mask_ijkl) integer :: size_buffer integer(key_kind),allocatable :: buffer_i(:) real(integral_kind),allocatable :: buffer_value(:) - double precision :: map_mb + double precision, external :: map_mb integer :: i1,j1,k1,l1, ii1, kmax, thread_num integer :: i2,i3,i4 @@ -247,12 +166,9 @@ subroutine add_integrals_to_map(mask_ijkl) endif size_buffer = min(ao_num*ao_num*ao_num,16000000) - print*, 'Providing the molecular integrals ' print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' - call wall_time(wall_1) - call cpu_time(cpu_1) double precision :: accu_bis accu_bis = 0.d0 @@ -452,12 +368,6 @@ subroutine add_integrals_to_map(mask_ijkl) deallocate(list_ijkl) - print*,'Molecular integrals provided:' - print*,' Size of MO map ', map_mb(mo_integrals_map) ,'MB' - print*,' Number of MO integrals: ', mo_map_size - print*,' cpu time :',cpu_2 - cpu_1, 's' - print*,' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')' - end From a744bc30d4f00944c3232d6a7724b2af6adb9fe0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 5 Jul 2019 23:53:05 +0200 Subject: [PATCH 3/5] Fast 4-idx transformation --- src/casscf/bielec.irp.f | 1 - src/casscf/casscf.irp.f | 7 +- src/mo_two_e_ints/EZFIO.cfg | 21 ---- src/mo_two_e_ints/four_idx_novvvv.irp.f | 156 ++++++++++-------------- src/mo_two_e_ints/mo_bi_integrals.irp.f | 24 +--- 5 files changed, 70 insertions(+), 139 deletions(-) diff --git a/src/casscf/bielec.irp.f b/src/casscf/bielec.irp.f index 1c6d9e6b..0a44f994 100644 --- a/src/casscf/bielec.irp.f +++ b/src/casscf/bielec.irp.f @@ -153,4 +153,3 @@ BEGIN_PROVIDER [real*8, bielecCI, (n_act_orb,n_act_orb,n_act_orb, mo_num)] !$OMP END PARALLEL DO END_PROVIDER - diff --git a/src/casscf/casscf.irp.f b/src/casscf/casscf.irp.f index 3fc6fd8e..8fe77fcc 100644 --- a/src/casscf/casscf.irp.f +++ b/src/casscf/casscf.irp.f @@ -4,10 +4,8 @@ program casscf ! TODO : Put the documentation of the program here END_DOC no_vvvv_integrals = .True. - no_ivvv_integrals = .True. - no_vvv_integrals = .True. pt2_max = 0.02 - SOFT_TOUCH no_vvvv_integrals no_vvv_integrals pt2_max + SOFT_TOUCH no_vvvv_integrals pt2_max call run end @@ -41,8 +39,7 @@ subroutine run psi_det = psi_det_sorted psi_coef = psi_coef_sorted read_wf = .True. - call map_deinit(mo_integrals_map) - FREE mo_two_e_integrals_in_map mo_integrals_map + call clear_mo_map SOFT_TOUCH mo_coef N_det pt2_max psi_det psi_coef enddo diff --git a/src/mo_two_e_ints/EZFIO.cfg b/src/mo_two_e_ints/EZFIO.cfg index 57681638..bec74552 100644 --- a/src/mo_two_e_ints/EZFIO.cfg +++ b/src/mo_two_e_ints/EZFIO.cfg @@ -11,24 +11,3 @@ interface: ezfio,provider,ocaml default: 1.e-15 ezfio_name: threshold_mo -[no_vvvv_integrals] -type: logical -doc: If `True`, computes all integrals except for the integrals having 4 virtual indices -interface: ezfio,provider,ocaml -default: False -ezfio_name: no_vvvv_integrals - -[no_ivvv_integrals] -type: logical -doc: Can be switched on only if `no_vvvv_integrals` is `True`, then does not compute the integrals with 3 virtual indices and 1 belonging to the core inactive active orbitals -interface: ezfio,provider,ocaml -default: False -ezfio_name: no_ivvv_integrals - -[no_vvv_integrals] -type: logical -doc: Can be switched on only if `no_vvvv_integrals` is `True`, then does not compute the integrals with 3 virtual orbitals -interface: ezfio,provider,ocaml -default: False -ezfio_name: no_vvv_integrals - diff --git a/src/mo_two_e_ints/four_idx_novvvv.irp.f b/src/mo_two_e_ints/four_idx_novvvv.irp.f index 124c946a..054d0a35 100644 --- a/src/mo_two_e_ints/four_idx_novvvv.irp.f +++ b/src/mo_two_e_ints/four_idx_novvvv.irp.f @@ -1,3 +1,12 @@ +BEGIN_PROVIDER [ logical, no_vvvv_integrals ] + implicit none + BEGIN_DOC +! If `True`, computes all integrals except for the integrals having 3 or 4 virtual indices + END_DOC + + no_vvvv_integrals = .False. +END_PROVIDER + BEGIN_PROVIDER [ double precision, mo_coef_novirt, (ao_num,n_core_inact_act_orb) ] implicit none BEGIN_DOC @@ -48,134 +57,66 @@ subroutine four_idx_novvvv ! Retransform MO integrals for next CAS-SCF step END_DOC integer :: i,j,k,l,n_integrals - double precision, allocatable :: f(:,:,:), d(:,:), T(:,:,:,:) + double precision, allocatable :: f(:,:,:), f2(:,:,:), d(:,:), T(:,:,:,:), T2(:,:,:,:) double precision, external :: get_ao_two_e_integral integer(key_kind), allocatable :: idx(:) real(integral_kind), allocatable :: values(:) integer :: p,q,r,s - double precision, allocatable :: ijij(:), ijji(:), jqjs(:,:,:), jqrj(:,:,:) double precision :: c - allocate (jqjs(mo_num,ao_num,ao_num), jqrj(mo_num,ao_num,ao_num)) - - allocate( T(n_core_inact_act_orb,n_core_inact_act_orb,ao_num,ao_num) ) + allocate( T(n_core_inact_act_orb,n_core_inact_act_orb,ao_num,ao_num) , & + T2(n_core_inact_act_orb,n_core_inact_act_orb,ao_num,ao_num) ) !$OMP PARALLEL DEFAULT(NONE) & !$OMP SHARED(mo_num,ao_num,T,n_core_inact_act_orb, mo_coef_transp, & !$OMP mo_integrals_threshold,mo_coef,mo_integrals_map, & - !$OMP list_core_inact_act,jqjs,jqrj,ao_integrals_map) & + !$OMP list_core_inact_act,T2,ao_integrals_map) & !$OMP PRIVATE(i,j,k,l,p,q,r,s,idx,values,n_integrals, & - !$OMP f,d,c,ijij,ijji) - allocate(f(ao_num,ao_num,ao_num), d(mo_num,mo_num), & + !$OMP f,f2,d,c) + allocate(f(ao_num,ao_num,ao_num), f2(ao_num,ao_num,ao_num), d(mo_num,mo_num), & idx(mo_num*mo_num), values(mo_num*mo_num) ) - allocate(ijij(mo_num), ijji(mo_num)) ! - !$OMP DO - do s=1,ao_num - T(:,:,:,s) = 0.d0 - jqjs(:,:,s) = 0.d0 - jqrj(:,:,s) = 0.d0 - enddo - !$OMP END DO - !$OMP DO do s=1,ao_num do r=1,ao_num do q=1,ao_num do p=1,r - f(p,q,r) = get_ao_two_e_integral(p,q,r,s,ao_integrals_map) - f(r,q,p) = f(p,q,r) + f (p,q,r) = get_ao_two_e_integral(p,q,r,s,ao_integrals_map) + f (r,q,p) = f(p,q,r) enddo enddo enddo - ! f(p,q,r) = - - do r=1,ao_num - call ao_to_mo_novirt(f(1,1,r),size(f,1),T(1,1,r,s),size(T,1)) - enddo - ! T(i,j,p,q) = - - ! Diagonal do r=1,ao_num do q=1,ao_num do p=1,ao_num - if (dabs(f(p,q,r)) >= mo_integrals_threshold) then - do i=1,mo_num - jqjs(i,q,s) = jqjs(i,q,s) + mo_coef_transp(i,p) * f(p,q,r) * mo_coef_transp(i,r) - enddo - endif + f2(p,q,r) = f(p,r,q) enddo enddo enddo - - enddo - !$OMP END DO NOWAIT + ! f (p,q,r) = + ! f2(p,q,r) = - !$OMP DO - do s=1,ao_num do r=1,ao_num - do q=1,r - do p=1,ao_num - f(p,q,r) = get_ao_two_e_integral(p,q,s,r,ao_integrals_map) - f(p,r,q) = f(p,q,r) - enddo - enddo + call ao_to_mo_novirt(f (1,1,r),size(f ,1),T (1,1,r,s),size(T,1)) + call ao_to_mo_novirt(f2(1,1,r),size(f2,1),T2(1,1,r,s),size(T,1)) enddo - ! f(p,q,r) = + ! T (i,j,p,q) = + ! T2(i,j,p,q) = - ! Diagonal - do r=1,ao_num - do q=1,ao_num - do p=1,ao_num - if (dabs(f(p,q,r)) >= mo_integrals_threshold) then - do i=1,mo_num - jqrj(i,q,s) = jqrj(i,q,s) + mo_coef_transp(i,p) * f(p,q,r) * mo_coef_transp(i,r) - enddo - endif - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP BARRIER - - !$OMP DO - do i=1,mo_num - ijij(:) = 0.d0 - ijji(:) = 0.d0 - do s=1,ao_num - do q=1,ao_num - do j=1,mo_num - c = mo_coef_transp(j,q) * mo_coef_transp(j,s) - ijij(j) = ijij(j) + jqjs(i,q,s) * c - ijji(j) = ijji(j) + jqrj(i,q,s) * c - enddo - enddo - enddo - do j=1,mo_num - call two_e_integrals_index(i,j,i,j,idx(j)) - values(j) = ijij(j) - enddo - do j=1,mo_num - call two_e_integrals_index(i,j,j,i,idx(mo_num+j)) - values(mo_num+j) = ijji(j) - enddo - call map_append(mo_integrals_map, idx, values, 2*mo_num) - enddo - !$OMP END DO + !$OMP END DO !$OMP DO do j=1,n_core_inact_act_orb do i=1,n_core_inact_act_orb do s=1,ao_num do r=1,ao_num - f(r,s,1) = T(i,j,r,s) + f (r,s,1) = T (i,j,r,s) + f2(r,s,1) = T2(i,j,r,s) enddo enddo - call ao_to_mo(f,size(f,1),d,size(d,1)) + call ao_to_mo(f ,size(f ,1),d,size(d,1)) n_integrals = 0 do l=1,mo_num do k=1,mo_num @@ -185,14 +126,25 @@ subroutine four_idx_novvvv enddo enddo call map_append(mo_integrals_map, idx, values, n_integrals) + + call ao_to_mo(f2,size(f2,1),d,size(d,1)) + n_integrals = 0 + do l=1,mo_num + do k=1,mo_num + n_integrals+=1 + call two_e_integrals_index(list_core_inact_act(i),k,list_core_inact_act(j),l,idx(n_integrals)) + values(n_integrals) = d(k,l) + enddo + enddo + call map_append(mo_integrals_map, idx, values, n_integrals) enddo enddo !$OMP END DO - deallocate(f,d,ijij,ijji,idx,values) + deallocate(f,f2,d,idx,values) !$OMP END PARALLEL - deallocate(T,jqrj,jqjs) + deallocate(T,T2) call map_sort(mo_integrals_map) @@ -200,3 +152,29 @@ subroutine four_idx_novvvv call map_shrink(mo_integrals_map,real(mo_integrals_threshold,integral_kind)) end + +subroutine four_idx_novvvv2 + use bitmasks + implicit none + integer :: i + integer(bit_kind) :: mask_ijkl(N_int,4) + + print*, '' + do i = 1,N_int + mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) + mask_ijkl(i,2) = full_ijkl_bitmask_4(i,1) + mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) + mask_ijkl(i,4) = full_ijkl_bitmask_4(i,1) + enddo + call add_integrals_to_map(mask_ijkl) + + print*, '' + do i = 1,N_int + mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) + mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) + mask_ijkl(i,3) = virt_bitmask(i,1) + mask_ijkl(i,4) = virt_bitmask(i,1) + enddo + call add_integrals_to_map(mask_ijkl) + +end diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index bca3df1a..a9983e51 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -120,10 +120,6 @@ subroutine add_integrals_to_map(mask_ijkl) call bitstring_to_list( mask_ijkl(1,2), list_ijkl(1,2), n_j, N_int ) call bitstring_to_list( mask_ijkl(1,3), list_ijkl(1,3), n_k, N_int ) call bitstring_to_list( mask_ijkl(1,4), list_ijkl(1,4), n_l, N_int ) - character*(2048) :: output(1) - print *, 'i' - call bitstring_to_str( output(1), mask_ijkl(1,1), N_int ) - print *, trim(output(1)) j = 0 do i = 1, N_int j += popcnt(mask_ijkl(i,1)) @@ -132,9 +128,6 @@ subroutine add_integrals_to_map(mask_ijkl) return endif - print*, 'j' - call bitstring_to_str( output(1), mask_ijkl(1,2), N_int ) - print *, trim(output(1)) j = 0 do i = 1, N_int j += popcnt(mask_ijkl(i,2)) @@ -143,9 +136,6 @@ subroutine add_integrals_to_map(mask_ijkl) return endif - print*, 'k' - call bitstring_to_str( output(1), mask_ijkl(1,3), N_int ) - print *, trim(output(1)) j = 0 do i = 1, N_int j += popcnt(mask_ijkl(i,3)) @@ -154,9 +144,6 @@ subroutine add_integrals_to_map(mask_ijkl) return endif - print*, 'l' - call bitstring_to_str( output(1), mask_ijkl(1,4), N_int ) - print *, trim(output(1)) j = 0 do i = 1, N_int j += popcnt(mask_ijkl(i,4)) @@ -171,6 +158,7 @@ subroutine add_integrals_to_map(mask_ijkl) double precision :: accu_bis accu_bis = 0.d0 + call wall_time(wall_1) !$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, & !$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,& @@ -414,10 +402,6 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) call bitstring_to_list( mask_ijk(1,1), list_ijkl(1,1), n_i, N_int ) call bitstring_to_list( mask_ijk(1,2), list_ijkl(1,2), n_j, N_int ) call bitstring_to_list( mask_ijk(1,3), list_ijkl(1,3), n_k, N_int ) - character*(2048) :: output(1) - print*, 'i' - call bitstring_to_str( output(1), mask_ijk(1,1), N_int ) - print *, trim(output(1)) j = 0 do i = 1, N_int j += popcnt(mask_ijk(i,1)) @@ -426,9 +410,6 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) return endif - print*, 'j' - call bitstring_to_str( output(1), mask_ijk(1,2), N_int ) - print *, trim(output(1)) j = 0 do i = 1, N_int j += popcnt(mask_ijk(i,2)) @@ -437,9 +418,6 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) return endif - print*, 'k' - call bitstring_to_str( output(1), mask_ijk(1,3), N_int ) - print *, trim(output(1)) j = 0 do i = 1, N_int j += popcnt(mask_ijk(i,3)) From 970fd8837ad447cab5ffba0a7a4fdbcef54ff33f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 6 Jul 2019 01:04:50 +0200 Subject: [PATCH 4/5] OpenMP in Hessian --- src/casscf/hessian.irp.f | 94 +++++++++++++++++++++++++--------------- 1 file changed, 60 insertions(+), 34 deletions(-) diff --git a/src/casscf/hessian.irp.f b/src/casscf/hessian.irp.f index 75a27410..52be1b76 100644 --- a/src/casscf/hessian.irp.f +++ b/src/casscf/hessian.irp.f @@ -189,7 +189,7 @@ BEGIN_PROVIDER [real*8, hessmat2, (nMonoEx,nMonoEx)] ! END_DOC implicit none - integer :: i,j,t,u,a,b,indx,jndx,bstart,ustart + integer :: i,j,t,u,a,b,indx,jndx,bstart,ustart,indx_shift real*8 :: hessmat_itju real*8 :: hessmat_itja @@ -203,9 +203,14 @@ BEGIN_PROVIDER [real*8, hessmat2, (nMonoEx,nMonoEx)] write(6,*) ' nMonoEx = ',nMonoEx endif - indx=1 + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat2,n_core_inact_orb,n_act_orb,n_virt_orb,nMonoEx) & + !$OMP PRIVATE(i,indx,jndx,j,ustart,t,u,a,bstart,indx_shift) + + !$OMP DO do i=1,n_core_inact_orb do t=1,n_act_orb + indx = t + (i-1)*n_act_orb jndx=indx do j=i,n_core_inact_orb if (i.eq.j) then @@ -214,31 +219,31 @@ BEGIN_PROVIDER [real*8, hessmat2, (nMonoEx,nMonoEx)] ustart=1 end if do u=ustart,n_act_orb - hessmat2(indx,jndx)=hessmat_itju(i,t,j,u) - hessmat2(jndx,indx)=hessmat2(indx,jndx) + hessmat2(jndx,indx)=hessmat_itju(i,t,j,u) jndx+=1 end do end do do j=1,n_core_inact_orb do a=1,n_virt_orb - hessmat2(indx,jndx)=hessmat_itja(i,t,j,a) - hessmat2(jndx,indx)=hessmat2(indx,jndx) + hessmat2(jndx,indx)=hessmat_itja(i,t,j,a) jndx+=1 end do end do do u=1,n_act_orb do a=1,n_virt_orb - hessmat2(indx,jndx)=hessmat_itua(i,t,u,a) - hessmat2(jndx,indx)=hessmat2(indx,jndx) + hessmat2(jndx,indx)=hessmat_itua(i,t,u,a) jndx+=1 end do end do - indx+=1 end do end do + !$OMP END DO NOWAIT - do i=1,n_core_inact_orb - do a=1,n_virt_orb + indx_shift = n_core_inact_orb*n_act_orb + !$OMP DO + do a=1,n_virt_orb + do i=1,n_core_inact_orb + indx = a + (i-1)*n_virt_orb + indx_shift jndx=indx do j=i,n_core_inact_orb if (i.eq.j) then @@ -247,24 +252,25 @@ BEGIN_PROVIDER [real*8, hessmat2, (nMonoEx,nMonoEx)] bstart=1 end if do b=bstart,n_virt_orb - hessmat2(indx,jndx)=hessmat_iajb(i,a,j,b) - hessmat2(jndx,indx)=hessmat2(indx,jndx) + hessmat2(jndx,indx)=hessmat_iajb(i,a,j,b) jndx+=1 end do end do do t=1,n_act_orb do b=1,n_virt_orb - hessmat2(indx,jndx)=hessmat_iatb(i,a,t,b) - hessmat2(jndx,indx)=hessmat2(indx,jndx) + hessmat2(jndx,indx)=hessmat_iatb(i,a,t,b) jndx+=1 end do end do - indx+=1 end do end do + !$OMP END DO NOWAIT - do t=1,n_act_orb - do a=1,n_virt_orb + indx_shift += n_core_inact_orb*n_virt_orb + !$OMP DO + do a=1,n_virt_orb + do t=1,n_act_orb + indx = a + (t-1)*n_virt_orb + indx_shift jndx=indx do u=t,n_act_orb if (t.eq.u) then @@ -273,14 +279,22 @@ BEGIN_PROVIDER [real*8, hessmat2, (nMonoEx,nMonoEx)] bstart=1 end if do b=bstart,n_virt_orb - hessmat2(indx,jndx)=hessmat_taub(t,a,u,b) - hessmat2(jndx,indx)=hessmat2(indx,jndx) + hessmat2(jndx,indx)=hessmat_taub(t,a,u,b) jndx+=1 end do end do - indx+=1 end do end do + !$OMP END DO + + !$OMP END PARALLEL + + do jndx=1,nMonoEx + do indx=1,jndx-1 + hessmat2(indx,jndx) = hessmat2(jndx,indx) + enddo + enddo + END_PROVIDER @@ -524,8 +538,8 @@ real*8 function hessmat_taub(t,a,u,b) tt=list_act(t) aa=list_virt(a) - if (t.eq.u) then - if (a.eq.b) then + if (t == u) then + if (a == b) then ! ta/ta t1=occnum(tt)*Fipq(aa,aa) t2=0.D0 @@ -581,8 +595,8 @@ real*8 function hessmat_taub(t,a,u,b) if (a.eq.b) then term-=0.5D0*(occnum(tt)*Fipq(uu,tt)+occnum(uu)*Fipq(tt,uu)) do v=1,n_act_orb - do x=1,n_act_orb - do y=1,n_act_orb + do y=1,n_act_orb + do x=1,n_act_orb term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,uu) term-=P0tuvx_no(u,v,x,y)*bielecCI_no(x,y,v,tt) end do @@ -602,29 +616,41 @@ BEGIN_PROVIDER [real*8, hessdiag, (nMonoEx)] ! the diagonal of the Hessian, needed for the Davidson procedure END_DOC implicit none - integer :: i,t,a,indx + integer :: i,t,a,indx,indx_shift real*8 :: hessmat_itju,hessmat_iajb,hessmat_taub - indx=0 + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessdiag,n_core_inact_orb,n_act_orb,n_virt_orb,nMonoEx) & + !$OMP PRIVATE(i,indx,t,a,indx_shift) + + !$OMP DO do i=1,n_core_inact_orb do t=1,n_act_orb - indx+=1 + indx = t + (i-1)*n_act_orb hessdiag(indx)=hessmat_itju(i,t,i,t) end do end do + !$OMP END DO NOWAIT - do i=1,n_core_inact_orb - do a=1,n_virt_orb - indx+=1 + indx_shift = n_core_inact_orb*n_act_orb + !$OMP DO + do a=1,n_virt_orb + do i=1,n_core_inact_orb + indx = a + (i-1)*n_virt_orb + indx_shift hessdiag(indx)=hessmat_iajb(i,a,i,a) end do end do + !$OMP END DO NOWAIT - do t=1,n_act_orb - do a=1,n_virt_orb - indx+=1 + indx_shift += n_core_inact_orb*n_virt_orb + !$OMP DO + do a=1,n_virt_orb + do t=1,n_act_orb + indx = a + (t-1)*n_virt_orb + indx_shift hessdiag(indx)=hessmat_taub(t,a,t,a) end do end do + !$OMP END DO + !$OMP END PARALLEL END_PROVIDER From 3f69f95275d62048437d0c43b754e54dab800b31 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 6 Jul 2019 02:17:07 +0200 Subject: [PATCH 5/5] Optimized Hessian --- src/casscf/hessian.irp.f | 73 ++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 21 deletions(-) diff --git a/src/casscf/hessian.irp.f b/src/casscf/hessian.irp.f index 52be1b76..06aed6ef 100644 --- a/src/casscf/hessian.irp.f +++ b/src/casscf/hessian.irp.f @@ -536,6 +536,9 @@ real*8 function hessmat_taub(t,a,u,b) integer :: v3,x3 real*8 :: term,t1,t2,t3 + double precision,allocatable :: P0tuvx_no_t(:,:,:) + double precision :: bielec_pqxx_no_2(n_act_orb,n_act_orb) + double precision :: bielec_pxxq_no_2(n_act_orb,n_act_orb) tt=list_act(t) aa=list_virt(a) if (t == u) then @@ -545,59 +548,87 @@ real*8 function hessmat_taub(t,a,u,b) t2=0.D0 t3=0.D0 t1-=occnum(tt)*Fipq(tt,tt) + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + t2+=P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,aa,v3,x3) + end do + end do do v=1,n_act_orb vv=list_act(v) v3=v+n_core_inact_orb do x=1,n_act_orb xx=list_act(x) x3=x+n_core_inact_orb - t2+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,aa,v3,x3) & - +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))* & - bielec_pxxq_no(aa,x3,v3,aa)) - do y=1,n_act_orb - t3-=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(t,v,y,xx) + t2+=(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))* & + bielec_pxxq_no(aa,x3,v3,aa) + end do + end do + do y=1,n_act_orb + do x=1,n_act_orb + xx=list_act(x) + do v=1,n_act_orb + t3-=P0tuvx_no(t,v,x,y)*bielecCI_no(t,v,y,xx) end do end do end do - term=t1+t2+t3 + term=t1+2.d0*(t2+t3) else bb=list_virt(b) ! ta/tb b/=a - term=occnum(tt)*Fipq(aa,bb) + term=0.5d0*occnum(tt)*Fipq(aa,bb) + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + term = term + P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,bb,v3,x3) + end do + end do do v=1,n_act_orb vv=list_act(v) v3=v+n_core_inact_orb do x=1,n_act_orb xx=list_act(x) x3=x+n_core_inact_orb - term+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,bb,v3,x3) & - +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v)) & - *bielec_pxxq_no(aa,x3,v3,bb)) + term= term + (P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v)) & + *bielec_pxxq_no(aa,x3,v3,bb) end do end do + term += term end if else ! ta/ub t/=u uu=list_act(u) bb=list_virt(b) - term=0.D0 - do v=1,n_act_orb - vv=list_act(v) - v3=v+n_core_inact_orb - do x=1,n_act_orb - xx=list_act(x) - x3=x+n_core_inact_orb - term+=2.D0*(P0tuvx_no(t,u,v,x)*bielec_pqxx_no(aa,bb,v3,x3) & - +(P0tuvx_no(t,x,v,u)+P0tuvx_no(t,x,u,v)) & - *bielec_pxxq_no(aa,x3,v3,bb)) + allocate(P0tuvx_no_t(n_act_orb,n_act_orb,n_act_orb)) + P0tuvx_no_t(:,:,:) = P0tuvx_no(t,:,:,:) + do x=1,n_act_orb + x3=x+n_core_inact_orb + do v=1,n_act_orb + v3=v+n_core_inact_orb + bielec_pqxx_no_2(v,x) = bielec_pqxx_no(aa,bb,v3,x3) + bielec_pxxq_no_2(v,x) = bielec_pxxq_no(aa,v3,x3,bb) end do end do + term=0.D0 + do x=1,n_act_orb + do v=1,n_act_orb + term += P0tuvx_no_t(u,v,x)*bielec_pqxx_no_2(v,x) + term += bielec_pxxq_no_2(x,v) * (P0tuvx_no_t(x,v,u)+P0tuvx_no_t(x,u,v)) + end do + end do + term = 6.d0*term if (a.eq.b) then term-=0.5D0*(occnum(tt)*Fipq(uu,tt)+occnum(uu)*Fipq(tt,uu)) do v=1,n_act_orb do y=1,n_act_orb do x=1,n_act_orb - term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,uu) + term-=P0tuvx_no_t(v,x,y)*bielecCI_no(x,y,v,uu) term-=P0tuvx_no(u,v,x,y)*bielecCI_no(x,y,v,tt) end do end do