From 03367381097efc90c32f73ac244ee1c2570b618d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 5 Jul 2019 18:50:22 +0200 Subject: [PATCH] Working on fast integrals --- src/casscf/bavard.irp.f | 4 +- src/casscf/casscf.irp.f | 11 +- src/casscf/get_energy.irp.f | 26 +-- src/casscf/natorb.irp.f | 47 +++--- src/mo_two_e_ints/four_idx_novvvv.irp.f | 202 ++++++++++++++++++++++++ src/mo_two_e_ints/mo_bi_integrals.irp.f | 134 +++------------- 6 files changed, 260 insertions(+), 164 deletions(-) create mode 100644 src/mo_two_e_ints/four_idx_novvvv.irp.f diff --git a/src/casscf/bavard.irp.f b/src/casscf/bavard.irp.f index a9797712..402e67ec 100644 --- a/src/casscf/bavard.irp.f +++ b/src/casscf/bavard.irp.f @@ -1,6 +1,6 @@ ! -*- F90 -*- BEGIN_PROVIDER [logical, bavard] - bavard=.true. -! bavard=.false. +! bavard=.true. + bavard=.false. END_PROVIDER diff --git a/src/casscf/casscf.irp.f b/src/casscf/casscf.irp.f index 1b77cf43..3fc6fd8e 100644 --- a/src/casscf/casscf.irp.f +++ b/src/casscf/casscf.irp.f @@ -4,8 +4,10 @@ program casscf ! TODO : Put the documentation of the program here END_DOC no_vvvv_integrals = .True. + no_ivvv_integrals = .True. + no_vvv_integrals = .True. pt2_max = 0.02 - SOFT_TOUCH no_vvvv_integrals pt2_max + SOFT_TOUCH no_vvvv_integrals no_vvv_integrals pt2_max call run end @@ -32,17 +34,16 @@ subroutine run converged = dabs(energy_improvement) < thresh_scf pt2_max = dabs(energy_improvement / pt2_relative_error) - call update_integrals mo_coef = NewOrbs call save_mos - call map_deinit(mo_integrals_map) iteration += 1 N_det = N_det/2 psi_det = psi_det_sorted psi_coef = psi_coef_sorted read_wf = .True. - FREE mo_integrals_map mo_two_e_integrals_in_map - SOFT_TOUCH mo_coef N_det pt2_max psi_det psi_coef + call map_deinit(mo_integrals_map) + FREE mo_two_e_integrals_in_map mo_integrals_map + SOFT_TOUCH mo_coef N_det pt2_max psi_det psi_coef enddo diff --git a/src/casscf/get_energy.irp.f b/src/casscf/get_energy.irp.f index 0a5cfb49..772af92a 100644 --- a/src/casscf/get_energy.irp.f +++ b/src/casscf/get_energy.irp.f @@ -5,32 +5,12 @@ program print_2rdm ! ! useful to test the active part of the spin trace 2 rdms END_DOC + no_vvvv_integrals = .True. read_wf = .True. - touch read_wf + touch read_wf no_vvvv_integrals call routine end subroutine routine - integer :: i,j,k,l - integer :: ii,jj,kk,ll - double precision :: accu(4),twodm,thr,act_twodm2,integral,get_two_e_integral - thr = 1.d-10 - - - accu = 0.d0 - do ll = 1, n_act_orb - l = list_act(ll) - do kk = 1, n_act_orb - k = list_act(kk) - do jj = 1, n_act_orb - j = list_act(jj) - do ii = 1, n_act_orb - i = list_act(ii) - integral = get_two_e_integral(i,j,k,l,mo_integrals_map) - accu(1) += act_two_rdm_spin_trace_mo(ii,jj,kk,ll) * integral - enddo - enddo - enddo - enddo - print*,'accu = ',accu(1) + print *, psi_energy_with_nucl_rep end diff --git a/src/casscf/natorb.irp.f b/src/casscf/natorb.irp.f index c84b4862..9ce90304 100644 --- a/src/casscf/natorb.irp.f +++ b/src/casscf/natorb.irp.f @@ -196,33 +196,36 @@ BEGIN_PROVIDER [real*8, one_ints_no, (mo_num,mo_num)] END_PROVIDER +BEGIN_PROVIDER [ double precision, NatOrbsCI_mos, (mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Rotation matrix from current MOs to the CI natural MOs + END_DOC + integer :: p,q + + NatOrbsCI_mos(:,:) = 0.d0 + + do q = 1,mo_num + NatOrbsCI_mos(q,q) = 1.d0 + enddo + + do q = 1,n_act_orb + do p = 1,n_act_orb + NatOrbsCI_mos(list_act(p),list_act(q)) = natorbsCI(p,q) + enddo + enddo +END_PROVIDER + + BEGIN_PROVIDER [real*8, NatOrbsFCI, (ao_num,mo_num)] implicit none BEGIN_DOC ! FCI natural orbitals END_DOC - integer :: i,j, p, q - real*8 :: d(n_act_orb) - NatOrbsFCI(:,:)=mo_coef(:,:) - - do j=1,ao_num - do p=1,n_act_orb - d(p)=0.D0 - end do - do p=1,n_act_orb - do q=1,n_act_orb - d(p)+=NatOrbsFCI(j,list_act(q))*natorbsCI(q,p) - end do - end do - do p=1,n_act_orb - NatOrbsFCI(j,list_act(p))=d(p) - end do - end do - -! call dgemm('N','T', ao_num,mo_num,mo_num,1.d0, & -! NatOrbsFCI, size(NatOrbsFCI,1), & -! Umat, size(Umat,1), 0.d0, & -! NewOrbs, size(NewOrbs,1)) + call dgemm('N','N', ao_num,mo_num,mo_num,1.d0, & + mo_coef, size(mo_coef,1), & + NatOrbsCI_mos, size(NatOrbsCI_mos,1), 0.d0, & + NatOrbsFCI, size(NatOrbsFCI,1)) END_PROVIDER diff --git a/src/mo_two_e_ints/four_idx_novvvv.irp.f b/src/mo_two_e_ints/four_idx_novvvv.irp.f new file mode 100644 index 00000000..124c946a --- /dev/null +++ b/src/mo_two_e_ints/four_idx_novvvv.irp.f @@ -0,0 +1,202 @@ +BEGIN_PROVIDER [ double precision, mo_coef_novirt, (ao_num,n_core_inact_act_orb) ] + implicit none + BEGIN_DOC + ! MO coefficients without virtual MOs + END_DOC + integer :: j,jj + + do j=1,n_core_inact_act_orb + jj = list_core_inact_act(j) + mo_coef_novirt(:,j) = mo_coef(:,jj) + enddo + +END_PROVIDER + +subroutine ao_to_mo_novirt(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis excluding virtuals + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,n_core_inact_act_orb) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,n_core_inact_act_orb) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, n_core_inact_act_orb, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef_novirt, size(mo_coef_novirt,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', n_core_inact_act_orb, n_core_inact_act_orb, ao_num,& + 1.d0, mo_coef_novirt,size(mo_coef_novirt,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end + + +subroutine four_idx_novvvv + use map_module + implicit none + BEGIN_DOC + ! Retransform MO integrals for next CAS-SCF step + END_DOC + integer :: i,j,k,l,n_integrals + double precision, allocatable :: f(:,:,:), d(:,:), T(:,:,:,:) + double precision, external :: get_ao_two_e_integral + integer(key_kind), allocatable :: idx(:) + real(integral_kind), allocatable :: values(:) + + integer :: p,q,r,s + double precision, allocatable :: ijij(:), ijji(:), jqjs(:,:,:), jqrj(:,:,:) + double precision :: c + allocate (jqjs(mo_num,ao_num,ao_num), jqrj(mo_num,ao_num,ao_num)) + + allocate( T(n_core_inact_act_orb,n_core_inact_act_orb,ao_num,ao_num) ) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(mo_num,ao_num,T,n_core_inact_act_orb, mo_coef_transp, & + !$OMP mo_integrals_threshold,mo_coef,mo_integrals_map, & + !$OMP list_core_inact_act,jqjs,jqrj,ao_integrals_map) & + !$OMP PRIVATE(i,j,k,l,p,q,r,s,idx,values,n_integrals, & + !$OMP f,d,c,ijij,ijji) + allocate(f(ao_num,ao_num,ao_num), d(mo_num,mo_num), & + idx(mo_num*mo_num), values(mo_num*mo_num) ) + allocate(ijij(mo_num), ijji(mo_num)) + + ! + !$OMP DO + do s=1,ao_num + T(:,:,:,s) = 0.d0 + jqjs(:,:,s) = 0.d0 + jqrj(:,:,s) = 0.d0 + enddo + !$OMP END DO + + !$OMP DO + do s=1,ao_num + do r=1,ao_num + do q=1,ao_num + do p=1,r + f(p,q,r) = get_ao_two_e_integral(p,q,r,s,ao_integrals_map) + f(r,q,p) = f(p,q,r) + enddo + enddo + enddo + ! f(p,q,r) = + + do r=1,ao_num + call ao_to_mo_novirt(f(1,1,r),size(f,1),T(1,1,r,s),size(T,1)) + enddo + ! T(i,j,p,q) = + + ! Diagonal + do r=1,ao_num + do q=1,ao_num + do p=1,ao_num + if (dabs(f(p,q,r)) >= mo_integrals_threshold) then + do i=1,mo_num + jqjs(i,q,s) = jqjs(i,q,s) + mo_coef_transp(i,p) * f(p,q,r) * mo_coef_transp(i,r) + enddo + endif + enddo + enddo + enddo + + enddo + !$OMP END DO NOWAIT + + !$OMP DO + do s=1,ao_num + do r=1,ao_num + do q=1,r + do p=1,ao_num + f(p,q,r) = get_ao_two_e_integral(p,q,s,r,ao_integrals_map) + f(p,r,q) = f(p,q,r) + enddo + enddo + enddo + ! f(p,q,r) = + + ! Diagonal + do r=1,ao_num + do q=1,ao_num + do p=1,ao_num + if (dabs(f(p,q,r)) >= mo_integrals_threshold) then + do i=1,mo_num + jqrj(i,q,s) = jqrj(i,q,s) + mo_coef_transp(i,p) * f(p,q,r) * mo_coef_transp(i,r) + enddo + endif + enddo + enddo + enddo + + enddo + !$OMP END DO NOWAIT + + !$OMP BARRIER + + !$OMP DO + do i=1,mo_num + ijij(:) = 0.d0 + ijji(:) = 0.d0 + do s=1,ao_num + do q=1,ao_num + do j=1,mo_num + c = mo_coef_transp(j,q) * mo_coef_transp(j,s) + ijij(j) = ijij(j) + jqjs(i,q,s) * c + ijji(j) = ijji(j) + jqrj(i,q,s) * c + enddo + enddo + enddo + do j=1,mo_num + call two_e_integrals_index(i,j,i,j,idx(j)) + values(j) = ijij(j) + enddo + do j=1,mo_num + call two_e_integrals_index(i,j,j,i,idx(mo_num+j)) + values(mo_num+j) = ijji(j) + enddo + call map_append(mo_integrals_map, idx, values, 2*mo_num) + enddo + !$OMP END DO + + !$OMP DO + do j=1,n_core_inact_act_orb + do i=1,n_core_inact_act_orb + do s=1,ao_num + do r=1,ao_num + f(r,s,1) = T(i,j,r,s) + enddo + enddo + call ao_to_mo(f,size(f,1),d,size(d,1)) + n_integrals = 0 + do l=1,mo_num + do k=1,mo_num + n_integrals+=1 + call two_e_integrals_index(list_core_inact_act(i),list_core_inact_act(j),k,l,idx(n_integrals)) + values(n_integrals) = d(k,l) + enddo + enddo + call map_append(mo_integrals_map, idx, values, n_integrals) + enddo + enddo + !$OMP END DO + deallocate(f,d,ijij,ijji,idx,values) + + !$OMP END PARALLEL + + deallocate(T,jqrj,jqjs) + + + call map_sort(mo_integrals_map) + call map_unique(mo_integrals_map) + call map_shrink(mo_integrals_map,real(mo_integrals_threshold,integral_kind)) + +end diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index fccf22a6..bca3df1a 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -22,16 +22,13 @@ end BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] use map_module implicit none - integer(bit_kind) :: mask_ijkl(N_int,4) - integer(bit_kind) :: mask_ijk(N_int,3) - BEGIN_DOC ! If True, the map of MO two-electron integrals is provided END_DOC + integer(bit_kind) :: mask_ijkl(N_int,4) + integer(bit_kind) :: mask_ijk(N_int,3) + double precision :: cpu_1, cpu_2, wall_1, wall_2 - ! The following line avoids a subsequent crash when the memory used is more - ! than half of the virtual memory, due to a fork in zcat when reading arrays - ! with EZFIO PROVIDE mo_class mo_two_e_integrals_in_map = .True. @@ -49,106 +46,28 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] print *, '---------------------------------' print *, '' + call wall_time(wall_1) + call cpu_time(cpu_1) + if(no_vvvv_integrals)then - integer :: i,j,k,l - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I I I !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 4 - ! - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = core_inact_act_bitmask_4(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I V V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 2 (virt) ^2 - ! = J_iv - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = virt_bitmask(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - - ! (core+inact+act) ^ 2 (virt) ^2 - ! = (iv|iv) - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! V V V !!!!!!!!!!!!!!!!!!!!!!! - if(.not.no_vvv_integrals)then - print*, '' - print*, ' and ' - do i = 1,N_int - mask_ijk(i,1) = virt_bitmask(i,1) - mask_ijk(i,2) = virt_bitmask(i,1) - mask_ijk(i,3) = virt_bitmask(i,1) - enddo - call add_integrals_to_map_three_indices(mask_ijk) - endif - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I I V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 3 (virt) ^1 - ! - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I V V V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 1 (virt) ^3 - ! - if(.not.no_ivvv_integrals)then - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = virt_bitmask(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map_no_exit_34(mask_ijkl) - endif - + call four_idx_novvvv else call add_integrals_to_map(full_ijkl_bitmask_4) - -! call four_index_transform_zmq(ao_integrals_map,mo_integrals_map, & -! mo_coef, size(mo_coef,1), & -! 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & -! 1, 1, 1, 1, mo_num, mo_num, mo_num, mo_num) -! -! call four_index_transform_block(ao_integrals_map,mo_integrals_map, & -! mo_coef, size(mo_coef,1), & -! 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & -! 1, 1, 1, 1, mo_num, mo_num, mo_num, mo_num) -! -! call four_index_transform(ao_integrals_map,mo_integrals_map, & -! mo_coef, size(mo_coef,1), & -! 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & -! 1, 1, 1, 1, mo_num, mo_num, mo_num, mo_num) - - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - - print*,'Molecular integrals provided' endif + + call wall_time(wall_2) + call cpu_time(cpu_2) + + integer*8 :: get_mo_map_size, mo_map_size + mo_map_size = get_mo_map_size() + + double precision, external :: map_mb + print*,'Molecular integrals provided:' + print*,' Size of MO map ', map_mb(mo_integrals_map) ,'MB' + print*,' Number of MO integrals: ', mo_map_size + print*,' cpu time :',cpu_2 - cpu_1, 's' + print*,' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')' + if (write_mo_two_e_integrals.and.mpi_master) then call ezfio_set_work_empty(.False.) call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) @@ -185,7 +104,7 @@ subroutine add_integrals_to_map(mask_ijkl) integer :: size_buffer integer(key_kind),allocatable :: buffer_i(:) real(integral_kind),allocatable :: buffer_value(:) - double precision :: map_mb + double precision, external :: map_mb integer :: i1,j1,k1,l1, ii1, kmax, thread_num integer :: i2,i3,i4 @@ -247,12 +166,9 @@ subroutine add_integrals_to_map(mask_ijkl) endif size_buffer = min(ao_num*ao_num*ao_num,16000000) - print*, 'Providing the molecular integrals ' print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' - call wall_time(wall_1) - call cpu_time(cpu_1) double precision :: accu_bis accu_bis = 0.d0 @@ -452,12 +368,6 @@ subroutine add_integrals_to_map(mask_ijkl) deallocate(list_ijkl) - print*,'Molecular integrals provided:' - print*,' Size of MO map ', map_mb(mo_integrals_map) ,'MB' - print*,' Number of MO integrals: ', mo_map_size - print*,' cpu time :',cpu_2 - cpu_1, 's' - print*,' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')' - end