Introducing Cholesky-decomposed SCF
continuous-integration/drone/push Build is failing Details

This commit is contained in:
Anthony Scemama 2023-04-28 10:31:24 +02:00
parent 64bfddbb00
commit c80ebe27b8
4 changed files with 298 additions and 117 deletions

View File

@ -0,0 +1,100 @@
BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ]
implicit none
BEGIN_DOC
! Number of Cholesky vectors in AO basis
END_DOC
integer :: i,j,k,l
double precision :: xnorm0, x, integral
double precision, external :: ao_two_e_integral
cholesky_ao_num_guess = 0
xnorm0 = 0.d0
x = 0.d0
do j=1,ao_num
do i=1,ao_num
integral = ao_two_e_integral(i,i,j,j)
if (integral > ao_integrals_threshold) then
cholesky_ao_num_guess += 1
else
x += integral
endif
enddo
enddo
print *, 'Cholesky decomposition of AO integrals'
print *, '--------------------------------------'
print *, ''
print *, 'Estimated Error: ', x
print *, 'Guess size: ', cholesky_ao_num_guess, '(', 100.d0*dble(cholesky_ao_num_guess)/dble(ao_num*ao_num), ' %)'
END_PROVIDER
BEGIN_PROVIDER [ integer, cholesky_ao_num ]
&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, cholesky_ao_num_guess) ]
use mmap_module
implicit none
BEGIN_DOC
! Cholesky vectors in AO basis: (ik|a):
! <ij|kl> = (ik|jl) = sum_a (ik|a).(a|jl)
END_DOC
type(c_ptr) :: ptr
integer :: fd, i,j,k,l, rank
double precision, pointer :: ao_integrals(:,:,:,:)
double precision, external :: ao_two_e_integral
! Store AO integrals in a memory mapped file
call mmap(trim(ezfio_work_dir)//'ao_integrals', &
(/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), &
8, fd, .False., ptr)
call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/))
double precision :: integral
logical, external :: ao_two_e_integral_zero
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) SCHEDULE(dynamic)
do l=1,ao_num
do j=1,l
do k=1,ao_num
do i=1,k
if (ao_two_e_integral_zero(i,j,k,l)) cycle
integral = ao_two_e_integral(i,k,j,l)
ao_integrals(i,k,j,l) = integral
ao_integrals(k,i,j,l) = integral
ao_integrals(i,k,l,j) = integral
ao_integrals(k,i,l,j) = integral
enddo
enddo
enddo
enddo
!$OMP END PARALLEL DO
! Call Lapack
cholesky_ao_num = cholesky_ao_num_guess
call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_integrals_threshold, ao_num*ao_num, cholesky_ao)
print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)'
! Remove mmap
double precision, external :: getUnitAndOpen
call munmap( &
(/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), &
8, fd, ptr)
open(unit=99,file=trim(ezfio_work_dir)//'ao_integrals')
close(99, status='delete')
END_PROVIDER
BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ]
implicit none
BEGIN_DOC
! Transposed of the Cholesky vectors in AO basis set
END_DOC
integer :: i,j,k
do j=1,ao_num
do i=1,ao_num
do k=1,ao_num
cholesky_ao_transp(k,i,j) = cholesky_ao(i,j,k)
enddo
enddo
enddo
END_PROVIDER

View File

@ -486,7 +486,7 @@ subroutine get_ao_two_e_integrals(j,k,l,sze,out_val)
PROVIDE ao_two_e_integrals_in_map ao_integrals_map
if (ao_one_e_integral_zero(j,l)) then
out_val = 0.d0
out_val(1:sze) = 0.d0
return
endif

View File

@ -15,115 +15,59 @@
double precision, allocatable :: ao_two_e_integral_alpha_tmp(:,:)
double precision, allocatable :: ao_two_e_integral_beta_tmp(:,:)
ao_two_e_integral_alpha = 0.d0
ao_two_e_integral_beta = 0.d0
if (do_direct_integrals) then
if (.True.) then ! Use Cholesky-decomposed integrals
ao_two_e_integral_alpha(:,:) = ao_two_e_integral_alpha_chol(:,:)
ao_two_e_integral_beta (:,:) = ao_two_e_integral_beta_chol (:,:)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,keys,values,p,q,r,s,i0,j0,k0,l0, &
!$OMP ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp, c0, c1, c2, &
!$OMP local_threshold)&
!$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,&
!$OMP ao_integrals_map,ao_integrals_threshold, ao_two_e_integral_schwartz, &
!$OMP ao_two_e_integral_alpha, ao_two_e_integral_beta)
else ! Use integrals in AO basis set
allocate(keys(1), values(1))
allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), &
ao_two_e_integral_beta_tmp(ao_num,ao_num))
ao_two_e_integral_alpha_tmp = 0.d0
ao_two_e_integral_beta_tmp = 0.d0
ao_two_e_integral_alpha = 0.d0
ao_two_e_integral_beta = 0.d0
if (do_direct_integrals) then
q = ao_num*ao_num*ao_num*ao_num
!$OMP DO SCHEDULE(static,64)
do p=1_8,q
call two_e_integrals_index_reverse(kk,ii,ll,jj,p)
if ( (kk(1)>ao_num).or. &
(ii(1)>ao_num).or. &
(jj(1)>ao_num).or. &
(ll(1)>ao_num) ) then
cycle
endif
k = kk(1)
i = ii(1)
l = ll(1)
j = jj(1)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,keys,values,p,q,r,s,i0,j0,k0,l0,&
!$OMP ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp, c0, c1, c2,&
!$OMP local_threshold) &
!$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,&
!$OMP ao_integrals_map,ao_integrals_threshold, ao_two_e_integral_schwartz,&
!$OMP ao_two_e_integral_alpha, ao_two_e_integral_beta)
logical, external :: ao_two_e_integral_zero
if (ao_two_e_integral_zero(i,k,j,l)) then
cycle
endif
local_threshold = ao_two_e_integral_schwartz(k,l)*ao_two_e_integral_schwartz(i,j)
if (local_threshold < ao_integrals_threshold) then
cycle
endif
i0 = i
j0 = j
k0 = k
l0 = l
values(1) = 0.d0
local_threshold = ao_integrals_threshold/local_threshold
do k2=1,8
if (kk(k2)==0) then
cycle
endif
i = ii(k2)
j = jj(k2)
k = kk(k2)
l = ll(k2)
c0 = SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l)
c1 = SCF_density_matrix_ao_alpha(k,i)
c2 = SCF_density_matrix_ao_beta(k,i)
if ( dabs(c0)+dabs(c1)+dabs(c2) < local_threshold) then
cycle
endif
if (values(1) == 0.d0) then
values(1) = ao_two_e_integral(k0,l0,i0,j0)
endif
integral = c0 * values(1)
ao_two_e_integral_alpha_tmp(i,j) += integral
ao_two_e_integral_beta_tmp (i,j) += integral
integral = values(1)
ao_two_e_integral_alpha_tmp(l,j) -= c1 * integral
ao_two_e_integral_beta_tmp (l,j) -= c2 * integral
enddo
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp
ao_two_e_integral_beta += ao_two_e_integral_beta_tmp
!$OMP END CRITICAL
deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)
!$OMP END PARALLEL
else
PROVIDE ao_two_e_integrals_in_map
allocate(keys(1), values(1))
allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), &
ao_two_e_integral_beta_tmp(ao_num,ao_num))
ao_two_e_integral_alpha_tmp = 0.d0
ao_two_e_integral_beta_tmp = 0.d0
integer(omp_lock_kind) :: lck(ao_num)
integer(map_size_kind) :: i8
integer :: ii(8), jj(8), kk(8), ll(8), k2
integer(cache_map_size_kind) :: n_elements_max, n_elements
integer(key_kind), allocatable :: keys(:)
double precision, allocatable :: values(:)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,n_elements_max, &
!$OMP n_elements,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)&
!$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,&
!$OMP ao_integrals_map, ao_two_e_integral_alpha, ao_two_e_integral_beta)
call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max)
allocate(keys(n_elements_max), values(n_elements_max))
allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), &
ao_two_e_integral_beta_tmp(ao_num,ao_num))
ao_two_e_integral_alpha_tmp = 0.d0
ao_two_e_integral_beta_tmp = 0.d0
!$OMP DO SCHEDULE(static,1)
do i8=0_8,ao_integrals_map%map_size
n_elements = n_elements_max
call get_cache_map(ao_integrals_map,i8,keys,values,n_elements)
do k1=1,n_elements
call two_e_integrals_index_reverse(kk,ii,ll,jj,keys(k1))
q = ao_num*ao_num*ao_num*ao_num
!$OMP DO SCHEDULE(static,64)
do p=1_8,q
call two_e_integrals_index_reverse(kk,ii,ll,jj,p)
if ( (kk(1)>ao_num).or. &
(ii(1)>ao_num).or. &
(jj(1)>ao_num).or. &
(ll(1)>ao_num) ) then
cycle
endif
k = kk(1)
i = ii(1)
l = ll(1)
j = jj(1)
logical, external :: ao_two_e_integral_zero
if (ao_two_e_integral_zero(i,k,j,l)) then
cycle
endif
local_threshold = ao_two_e_integral_schwartz(k,l)*ao_two_e_integral_schwartz(i,j)
if (local_threshold < ao_integrals_threshold) then
cycle
endif
i0 = i
j0 = j
k0 = k
l0 = l
values(1) = 0.d0
local_threshold = ao_integrals_threshold/local_threshold
do k2=1,8
if (kk(k2)==0) then
cycle
@ -132,25 +76,162 @@
j = jj(k2)
k = kk(k2)
l = ll(k2)
integral = (SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l)) * values(k1)
c0 = SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l)
c1 = SCF_density_matrix_ao_alpha(k,i)
c2 = SCF_density_matrix_ao_beta(k,i)
if ( dabs(c0)+dabs(c1)+dabs(c2) < local_threshold) then
cycle
endif
if (values(1) == 0.d0) then
values(1) = ao_two_e_integral(k0,l0,i0,j0)
endif
integral = c0 * values(1)
ao_two_e_integral_alpha_tmp(i,j) += integral
ao_two_e_integral_beta_tmp (i,j) += integral
integral = values(k1)
ao_two_e_integral_alpha_tmp(l,j) -= SCF_density_matrix_ao_alpha(k,i) * integral
ao_two_e_integral_beta_tmp (l,j) -= SCF_density_matrix_ao_beta (k,i) * integral
integral = values(1)
ao_two_e_integral_alpha_tmp(l,j) -= c1 * integral
ao_two_e_integral_beta_tmp (l,j) -= c2 * integral
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp
ao_two_e_integral_beta += ao_two_e_integral_beta_tmp
!$OMP END CRITICAL
deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)
!$OMP END PARALLEL
!$OMP END DO NOWAIT
!$OMP CRITICAL
ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp
ao_two_e_integral_beta += ao_two_e_integral_beta_tmp
!$OMP END CRITICAL
deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)
!$OMP END PARALLEL
else
PROVIDE ao_two_e_integrals_in_map
integer(omp_lock_kind) :: lck(ao_num)
integer(map_size_kind) :: i8
integer :: ii(8), jj(8), kk(8), ll(8), k2
integer(cache_map_size_kind) :: n_elements_max, n_elements
integer(key_kind), allocatable :: keys(:)
double precision, allocatable :: values(:)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,n_elements_max,&
!$OMP n_elements,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)&
!$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,&
!$OMP ao_integrals_map, ao_two_e_integral_alpha, ao_two_e_integral_beta)
call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max)
allocate(keys(n_elements_max), values(n_elements_max))
allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), &
ao_two_e_integral_beta_tmp(ao_num,ao_num))
ao_two_e_integral_alpha_tmp = 0.d0
ao_two_e_integral_beta_tmp = 0.d0
!$OMP DO SCHEDULE(static,1)
do i8=0_8,ao_integrals_map%map_size
n_elements = n_elements_max
call get_cache_map(ao_integrals_map,i8,keys,values,n_elements)
do k1=1,n_elements
call two_e_integrals_index_reverse(kk,ii,ll,jj,keys(k1))
do k2=1,8
if (kk(k2)==0) then
cycle
endif
i = ii(k2)
j = jj(k2)
k = kk(k2)
l = ll(k2)
integral = (SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l)) * values(k1)
ao_two_e_integral_alpha_tmp(i,j) += integral
ao_two_e_integral_beta_tmp (i,j) += integral
integral = values(k1)
ao_two_e_integral_alpha_tmp(l,j) -= SCF_density_matrix_ao_alpha(k,i) * integral
ao_two_e_integral_beta_tmp (l,j) -= SCF_density_matrix_ao_beta (k,i) * integral
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp
ao_two_e_integral_beta += ao_two_e_integral_beta_tmp
!$OMP END CRITICAL
deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)
!$OMP END PARALLEL
endif
endif
END_PROVIDER
BEGIN_PROVIDER [ double precision, ao_two_e_integral_alpha_chol, (ao_num, ao_num) ]
&BEGIN_PROVIDER [ double precision, ao_two_e_integral_beta_chol , (ao_num, ao_num) ]
use map_module
implicit none
BEGIN_DOC
! Alpha and Beta Fock matrices in AO basis set
END_DOC
integer :: m,n,l,s,j
double precision :: integral
double precision, allocatable :: X(:), X2(:,:,:,:), X3(:,:,:,:)
allocate (X(cholesky_ao_num))
! X(j) = \sum_{mn} SCF_density_matrix_ao(m,n) * cholesky_ao(m,n,j)
call dgemm('T','N',cholesky_ao_num,1,ao_num*ao_num,1.d0, &
cholesky_ao, ao_num*ao_num, &
SCF_density_matrix_ao, ao_num*ao_num,0.d0, &
X, cholesky_ao_num)
!
! ao_two_e_integral_alpha(m,n) = \sum_{j} cholesky_ao(m,n,j) * X(j)
call dgemm('N','N',ao_num*ao_num,1,cholesky_ao_num, 1.d0, &
cholesky_ao, ao_num*ao_num, &
X, cholesky_ao_num, 0.d0, &
ao_two_e_integral_alpha_chol, ao_num*ao_num)
deallocate(X)
ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol
allocate(X2(ao_num,ao_num,cholesky_ao_num,2))
! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j)
call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, &
SCF_density_matrix_ao_alpha, ao_num, &
cholesky_ao, ao_num, 0.d0, &
X2(1,1,1,1), ao_num)
call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, &
SCF_density_matrix_ao_beta, ao_num, &
cholesky_ao, ao_num, 0.d0, &
X2(1,1,1,2), ao_num)
allocate(X3(ao_num,cholesky_ao_num,ao_num,2))
do s=1,ao_num
do j=1,cholesky_ao_num
do m=1,ao_num
X3(m,j,s,1) = X2(m,s,j,1)
X3(m,j,s,2) = X2(m,s,j,2)
enddo
enddo
enddo
deallocate(X2)
call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, &
cholesky_ao, ao_num, &
X3(1,1,1,1), ao_num*cholesky_ao_num, 1.d0, &
ao_two_e_integral_alpha_chol, ao_num)
call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, &
cholesky_ao, ao_num, &
X3(1,1,1,2), ao_num*cholesky_ao_num, 1.d0, &
ao_two_e_integral_beta_chol, ao_num)
deallocate(X3)
END_PROVIDER
BEGIN_PROVIDER [ double precision, Fock_matrix_ao_alpha, (ao_num, ao_num) ]

View File

@ -1854,7 +1854,7 @@ do k = 1, N
end do
! TODO: It should be possible to use only one vector of size (1:rank) as a buffer
! to do the swapping in-place
U = 0.00D+0
U(:,:) = 0.00D+0
do k = 1, N
l = piv(k)
U(l, :) = A(1:rank, k)