9
1
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-12-22 03:23:29 +01:00

Merge branch 'dev-stable' of github.com:QuantumPackage/qp2 into dev-stable

This commit is contained in:
Anthony Scemama 2023-07-10 08:24:20 +02:00
commit 96c5734ed0
16 changed files with 1864 additions and 198 deletions

View File

@ -29,7 +29,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
double precision, pointer :: L(:,:), L_old(:,:)
double precision, parameter :: s = 1.d-1
double precision :: s
double precision, parameter :: dscale = 1.d0
double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:)
@ -43,16 +43,28 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
double precision, external :: get_ao_two_e_integral
logical, external :: ao_two_e_integral_zero
double precision, external :: ao_two_e_integral
integer :: block_size, iblock, ierr
integer(omp_lock_kind), allocatable :: lock(:)
double precision :: rss
double precision, external :: memory_of_double, memory_of_int
PROVIDE nucl_coord
if (.not.do_direct_integrals) then
PROVIDE ao_two_e_integrals_in_map
endif
deallocate(cholesky_ao)
ndim = ao_num*ao_num
tau = ao_cholesky_threshold
rss = 6.d0 * memory_of_double(ndim) + &
6.d0 * memory_of_int(ndim)
call check_mem(rss, irp_here)
allocate(L(ndim,1))
@ -85,13 +97,22 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
enddo
enddo
if (do_direct_integrals) then
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
do i=1,ndim
D(i) = ao_two_e_integral(addr(1,i), addr(2,i), &
addr(1,i), addr(2,i))
enddo
!$OMP END PARALLEL DO
else
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided)
do i=1,ndim
D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), &
addr(2,i), addr(2,i), &
ao_integrals_map)
enddo
!$OMP END PARALLEL DO
endif
Dmax = maxval(D)
@ -117,6 +138,14 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
! a.
i = i+1
logical :: memory_ok
memory_ok = .False.
s = 0.1d0
! Inrease s until the arrays fit in memory
do
! b.
Dmin = max(s*Dmax,tau)
@ -134,6 +163,26 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
endif
enddo
call resident_memory(rss)
rss = rss &
+ np*memory_of_double(nq) & ! Delta(np,nq)
+ (rank+nq)* memory_of_double(ndim) & ! L(ndim,rank+nq)
+ (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size)
! Ltmp_q(nq,block_size)
if (rss > qp_max_mem) then
s = s*2.d0
else
exit
endif
if ((s > 1.d0).or.(nq == 0)) then
print *, 'Not enough memory. Reduce cholesky threshold'
stop -1
endif
enddo
! d., e.
block_size = max(N,24)
@ -170,10 +219,15 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
stop -1
endif
Delta(:,:) = 0.d0
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j)
!$OMP DO
do q=1,nq
Delta(:,q) = 0.d0
enddo
!$OMP ENDDO NOWAIT
!$OMP DO
do k=1,N
do p=1,np
@ -183,9 +237,11 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
Ltmp_q(q,k) = L(Dset(q),k)
enddo
enddo
!$OMP END DO
!$OMP END DO NOWAIT
!$OMP DO SCHEDULE(dynamic,8)
!$OMP BARRIER
!$OMP DO SCHEDULE(guided)
do m=1,nq
call omp_set_lock(lock(m))
@ -196,8 +252,13 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
if ((0 < q).and.(q < k)) cycle
if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)), &
addr(2,Lset(k)), addr(2,Dset(m)) ) ) then
if (do_direct_integrals) then
Delta(k,m) = ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)), &
addr(1,Dset(m)), addr(2,Dset(m)))
else
Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), &
addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map)
endif
if (q /= 0) Delta(q,m) = Delta(k,m)
endif
enddo
@ -218,8 +279,13 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
if ((0 < q).and.(q < p)) cycle
if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), &
addr(2,Dset(k)), addr(2,Dset(m)) ) ) then
if (do_direct_integrals) then
Delta(p,m) = ao_two_e_integral(addr(1,Dset(k)), addr(2,Dset(k)), &
addr(1,Dset(m)), addr(2,Dset(m)))
else
Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), &
addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map)
endif
if (q /= 0) Delta(q,m) = Delta(p,m)
if (j /= 0) Delta(p,j) = Delta(p,m)
if (q*j /= 0) Delta(q,j) = Delta(p,m)
@ -339,8 +405,16 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
call omp_destroy_lock(lock(k))
enddo
allocate(cholesky_ao(ao_num,ao_num,rank))
call dcopy(ndim*rank, L, 1, cholesky_ao, 1)
allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr)
if (ierr /= 0) then
print *, irp_here, ': Allocation failed'
stop -1
endif
!$OMP PARALLEL DO PRIVATE(k)
do k=1,rank
call dcopy(ndim, L(1,k), 1, cholesky_ao(1,1,k), 1)
enddo
!$OMP END PARALLEL DO
deallocate(L)
cholesky_ao_num = rank

View File

@ -460,7 +460,7 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz, (ao_num, ao_num)
!$OMP PARALLEL DO PRIVATE(i,k) &
!$OMP DEFAULT(NONE) &
!$OMP SHARED (ao_num,ao_two_e_integral_schwartz) &
!$OMP SCHEDULE(dynamic)
!$OMP SCHEDULE(guided)
do i=1,ao_num
do k=1,i
ao_two_e_integral_schwartz(i,k) = dsqrt(ao_two_e_integral(i,i,k,k))
@ -975,7 +975,8 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
! !DIR$ FORCEINLINE
! call multiply_poly(X,nx,B_10,2,d,nd)
call multiply_poly_c2(X,nx,B_10,d,nd)
!DIR$ FORCEINLINE
call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd)
nx = nd
!DIR$ LOOP COUNT(8)
@ -998,7 +999,8 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
endif
! !DIR$ FORCEINLINE
! call multiply_poly(X,nx,B_00,2,d,nd)
call multiply_poly_c2(X,nx,B_00,d,nd)
!DIR$ FORCEINLINE
call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd)
endif
ny=0
@ -1017,7 +1019,8 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
! !DIR$ FORCEINLINE
! call multiply_poly(Y,ny,C_00,2,d,nd)
call multiply_poly_c2(Y,ny,C_00,d,nd)
!DIR$ FORCEINLINE
call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd)
end
recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
@ -1057,7 +1060,8 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
! !DIR$ FORCEINLINE
! call multiply_poly(X,nx,B_00,2,d,nd)
call multiply_poly_c2(X,nx,B_00,d,nd)
!DIR$ FORCEINLINE
call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd)
ny=0
@ -1069,7 +1073,8 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
! !DIR$ FORCEINLINE
! call multiply_poly(Y,ny,C_00,2,d,nd)
call multiply_poly_c2(Y,ny,C_00,d,nd)
!DIR$ FORCEINLINE
call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd)
end
@ -1098,7 +1103,8 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
! !DIR$ FORCEINLINE
! call multiply_poly(X,nx,B_10,2,d,nd)
call multiply_poly_c2(X,nx,B_10,d,nd)
!DIR$ FORCEINLINE
call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd)
nx = nd
!DIR$ LOOP COUNT(8)
@ -1118,7 +1124,8 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
! !DIR$ FORCEINLINE
! call multiply_poly(X,nx,B_00,2,d,nd)
call multiply_poly_c2(X,nx,B_00,d,nd)
!DIR$ FORCEINLINE
call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd)
ny=0
!DIR$ LOOP COUNT(8)
@ -1130,7 +1137,8 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
! !DIR$ FORCEINLINE
! call multiply_poly(Y,ny,C_00,2,d,nd)
call multiply_poly_c2(Y,ny,C_00,d,nd)
!DIR$ FORCEINLINE
call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd)
end
recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
@ -1177,9 +1185,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
Y(1) = D_00(1)
Y(2) = D_00(2)
! !DIR$ FORCEINLINE
! call multiply_poly(Y,ny,D_00,2,d,nd)
call multiply_poly_c2(Y,ny,D_00,d,nd)
!DIR$ FORCEINLINE
call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd)
return
@ -1199,7 +1207,8 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
! !DIR$ FORCEINLINE
! call multiply_poly(X,nx,B_01,2,d,nd)
call multiply_poly_c2(X,nx,B_01,d,nd)
!DIR$ FORCEINLINE
call multiply_poly_c2_inline_2e(X,nx,B_01,d,nd)
ny = 0
!DIR$ LOOP COUNT(6)
@ -1208,9 +1217,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
enddo
call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim)
! !DIR$ FORCEINLINE
! call multiply_poly(Y,ny,D_00,2,d,nd)
call multiply_poly_c2(Y,ny,D_00,d,nd)
!DIR$ FORCEINLINE
call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd)
end select
end
@ -1232,7 +1241,8 @@ subroutine compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value)
logical, external :: ao_two_e_integral_zero
integer :: i,k
double precision :: ao_two_e_integral,cpu_1,cpu_2, wall_1, wall_2
double precision, external :: ao_two_e_integral
double precision :: cpu_1,cpu_2, wall_1, wall_2
double precision :: integral, wall_0
double precision :: thr
integer :: kk, m, j1, i1
@ -1299,3 +1309,56 @@ subroutine multiply_poly_local(b,nb,c,nc,d,nd)
end
!DIR$ FORCEINLINE
subroutine multiply_poly_c2_inline_2e(b,nb,c,d,nd)
implicit none
BEGIN_DOC
! Multiply two polynomials
! D(t) += B(t)*C(t)
END_DOC
integer, intent(in) :: nb
integer, intent(out) :: nd
double precision, intent(in) :: b(0:nb), c(0:2)
double precision, intent(inout) :: d(0:nb+2)
integer :: ndtmp
integer :: ib, ic, id, k
if(nb < 0) return !False if nb>=0
select case (nb)
case (0)
d(0) = d(0) + c(0) * b(0)
d(1) = d(1) + c(1) * b(0)
d(2) = d(2) + c(2) * b(0)
case (1)
d(0) = d(0) + c(0) * b(0)
d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
d(2) = d(2) + c(1) * b(1) + c(2) * b(0)
d(3) = d(3) + c(2) * b(1)
case (2)
d(0) = d(0) + c(0) * b(0)
d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0)
d(3) = d(3) + c(1) * b(2) + c(2) * b(1)
d(4) = d(4) + c(2) * b(2)
case default
d(0) = d(0) + c(0) * b(0)
d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
do ib=2,nb
d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + c(2) * b(ib-2)
enddo
d(nb+1) = d(nb+1) + c(1) * b(nb) + c(2) * b(nb-1)
d(nb+2) = d(nb+2) + c(2) * b(nb)
end select
do nd = nb+2,0,-1
if (d(nd) /= 0.d0) exit
enddo
end

View File

@ -85,13 +85,23 @@ subroutine run_ccsd_space_orb
do while (not_converged)
! Residue
! if (do_ao_cholesky) then
if (.False.) then
call compute_H_oo_chol(nO,nV,t1,t2,tau,H_oo)
call compute_H_vv_chol(nO,nV,t1,t2,tau,H_vv)
call compute_H_vo_chol(nO,nV,t1,t2,H_vo)
call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
else
call compute_H_oo(nO,nV,t1,t2,tau,H_oo)
call compute_H_vv(nO,nV,t1,t2,tau,H_vv)
call compute_H_vo(nO,nV,t1,t2,H_vo)
! Residue
call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
endif
max_r = max(max_r1,max_r2)
! Update
@ -839,6 +849,10 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
! allocate(B1(nV,nV,nV,nV))
! call compute_B1(nO,nV,t1,t2,B1)
! call dgemm('N','N',nO*nO,nV*nV,nV*nV, &
! 1d0, tau, size(tau,1) * size(tau,2), &
! B1 , size(B1_gam,1) * size(B1_gam,2), &
! 1d0, r2, size(r2,1) * size(r2,2))
allocate(B1_gam(nV,nV,nV))
do gam=1,nV
call compute_B1_gam(nO,nV,t1,t2,B1_gam,gam)

File diff suppressed because it is too large Load Diff

View File

@ -101,7 +101,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
!$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED)
e = 0d0
!$OMP DO SCHEDULE(dynamic)
!$OMP DO SCHEDULE(guided)
do a = 1, nV
do b = a+1, nV
do c = b+1, nV

View File

@ -117,7 +117,7 @@ END_PROVIDER
!$OMP N_det_alpha_unique,N_det_beta_unique,irp_here)
allocate(tmp_a(mo_num,mo_num,N_states), tmp_b(mo_num,mo_num,N_states) )
tmp_a = 0.d0
!$OMP DO SCHEDULE(dynamic,64)
!$OMP DO SCHEDULE(guided)
do k_a=1,N_det
krow = psi_bilinear_matrix_rows(k_a)
ASSERT (krow <= N_det_alpha_unique)
@ -173,7 +173,7 @@ END_PROVIDER
deallocate(tmp_a)
tmp_b = 0.d0
!$OMP DO SCHEDULE(dynamic,64)
!$OMP DO SCHEDULE(guided)
do k_b=1,N_det
krow = psi_bilinear_matrix_transp_rows(k_b)
ASSERT (krow <= N_det_alpha_unique)

View File

@ -250,7 +250,7 @@ subroutine remove_duplicates_in_psi_det(found_duplicates)
enddo
!$OMP END DO
!$OMP DO schedule(dynamic,1024)
!$OMP DO schedule(guided,64)
do i=1,N_det-1
if (duplicate(i)) then
cycle

View File

@ -317,7 +317,7 @@ subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nst
!$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates)&
!$OMP REDUCTION(+:accu)
allocate(idx(0:n))
!$OMP DO SCHEDULE(dynamic)
!$OMP DO SCHEDULE(guided)
do i = n,1,-1 ! Better OMP scheduling
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),N_int,s2_tmp)
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj)

View File

@ -190,27 +190,39 @@ END_PROVIDER
deallocate(X)
if (elec_alpha_num > elec_beta_num) then
ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol
endif
allocate(X2(ao_num,ao_num,cholesky_ao_num,2))
double precision :: rss
double precision :: memory_of_double
integer :: iblock
integer, parameter :: block_size = 32
rss = memory_of_double(ao_num*ao_num)
call check_mem(2.d0*block_size*rss, irp_here)
allocate(X2(ao_num,ao_num,block_size,2))
allocate(X3(ao_num,block_size,ao_num,2))
! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j)
call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, &
do iblock=1,cholesky_ao_num,block_size
call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, &
SCF_density_matrix_ao_alpha, ao_num, &
cholesky_ao, ao_num, 0.d0, &
cholesky_ao(1,1,iblock), ao_num, 0.d0, &
X2(1,1,1,1), ao_num)
call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, &
if (elec_alpha_num > elec_beta_num) then
call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, &
SCF_density_matrix_ao_beta, ao_num, &
cholesky_ao, ao_num, 0.d0, &
cholesky_ao(1,1,iblock), ao_num, 0.d0, &
X2(1,1,1,2), ao_num)
allocate(X3(ao_num,cholesky_ao_num,ao_num,2))
do s=1,ao_num
do j=1,cholesky_ao_num
do j=1,min(cholesky_ao_num-iblock+1,block_size)
do m=1,ao_num
X3(m,j,s,1) = X2(m,s,j,1)
X3(m,j,s,2) = X2(m,s,j,2)
@ -218,19 +230,35 @@ END_PROVIDER
enddo
enddo
deallocate(X2)
else
call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, &
cholesky_ao, ao_num, &
X3(1,1,1,1), ao_num*cholesky_ao_num, 1.d0, &
do s=1,ao_num
do j=1,min(cholesky_ao_num-iblock+1,block_size)
do m=1,ao_num
X3(m,j,s,1) = X2(m,s,j,1)
enddo
enddo
enddo
endif
call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, &
cholesky_ao(1,1,iblock), ao_num, &
X3(1,1,1,1), ao_num*block_size, 1.d0, &
ao_two_e_integral_alpha_chol, ao_num)
call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, &
cholesky_ao, ao_num, &
X3(1,1,1,2), ao_num*cholesky_ao_num, 1.d0, &
if (elec_alpha_num > elec_beta_num) then
call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, &
cholesky_ao(1,1,iblock), ao_num, &
X3(1,1,1,2), ao_num*block_size, 1.d0, &
ao_two_e_integral_beta_chol, ao_num)
endif
deallocate(X3)
enddo
if (elec_alpha_num == elec_beta_num) then
ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol
endif
deallocate(X2,X3)
END_PROVIDER

View File

@ -4,16 +4,18 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num
! Cholesky vectors in MO basis
END_DOC
integer :: k
integer :: k, i, j
call set_multiple_levels_omp(.False.)
print *, 'AO->MO Transformation of Cholesky vectors'
!$OMP PARALLEL DO PRIVATE(k)
do k=1,cholesky_ao_num
call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num)
do j=1,mo_num
do i=1,mo_num
cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j)
enddo
enddo
enddo
!$OMP END PARALLEL DO
print *, ''
END_PROVIDER
@ -23,27 +25,19 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num,
! Cholesky vectors in MO basis
END_DOC
integer :: i,j,k
double precision, allocatable :: buffer(:,:)
double precision, allocatable :: X(:,:,:)
integer :: ierr
print *, 'AO->MO Transformation of Cholesky vectors'
print *, 'AO->MO Transformation of Cholesky vectors .'
call set_multiple_levels_omp(.False.)
!$OMP PARALLEL PRIVATE(i,j,k,buffer)
allocate(buffer(mo_num,mo_num))
!$OMP DO SCHEDULE(static)
do k=1,cholesky_ao_num
call ao_to_mo(cholesky_ao(1,1,k),ao_num,buffer,mo_num)
do j=1,mo_num
do i=1,mo_num
cholesky_mo_transp(k,i,j) = buffer(i,j)
enddo
enddo
enddo
!$OMP END DO
deallocate(buffer)
!$OMP END PARALLEL
print *, ''
allocate(X(mo_num,cholesky_ao_num,ao_num), stat=ierr)
if (ierr /= 0) then
print *, irp_here, ': Allocation failed'
endif
call dgemm('T','N', ao_num*cholesky_ao_num, mo_num, ao_num, 1.d0, &
cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_ao_num)
call dgemm('T','N', cholesky_ao_num*mo_num, mo_num, ao_num, 1.d0, &
X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_ao_num*mo_num)
deallocate(X)
END_PROVIDER

View File

@ -37,7 +37,9 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]
call map_load_from_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map)
print*, 'MO integrals provided'
return
else
endif
if (.not. do_direct_integrals) then
PROVIDE ao_two_e_integrals_in_map
endif
@ -90,6 +92,10 @@ subroutine four_idx_dgemm
double precision, allocatable :: a1(:,:,:,:)
double precision, allocatable :: a2(:,:,:,:)
if (ao_num > 1289) then
print *, irp_here, ': Integer overflow in ao_num**3'
endif
allocate (a1(ao_num,ao_num,ao_num,ao_num))
print *, 'Getting AOs'
@ -103,6 +109,7 @@ subroutine four_idx_dgemm
enddo
!$OMP END PARALLEL DO
print *, '1st transformation'
! 1st transformation
allocate (a2(ao_num,ao_num,ao_num,mo_num))
@ -166,11 +173,9 @@ subroutine four_idx_dgemm
deallocate (a1)
call map_sort(mo_integrals_map)
call map_unique(mo_integrals_map)
integer*8 :: get_mo_map_size, mo_map_size
mo_map_size = get_mo_map_size()
end subroutine
subroutine add_integrals_to_map(mask_ijkl)
@ -250,7 +255,7 @@ subroutine add_integrals_to_map(mask_ijkl)
call wall_time(wall_1)
size_buffer = min(ao_num*ao_num*ao_num,8000000)
size_buffer = min(ao_num*ao_num,8000000)
print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
@ -443,11 +448,6 @@ subroutine add_integrals_to_map(mask_ijkl)
!$OMP END PARALLEL
call map_merge(mo_integrals_map)
call wall_time(wall_2)
call cpu_time(cpu_2)
integer*8 :: get_mo_map_size, mo_map_size
mo_map_size = get_mo_map_size()
deallocate(list_ijkl)
@ -465,31 +465,31 @@ subroutine add_integrals_to_map_cholesky
integer :: size_buffer, n_integrals
size_buffer = min(mo_num*mo_num*mo_num,16000000)
double precision, allocatable :: Vtmp(:,:,:,:)
double precision, allocatable :: Vtmp(:,:,:)
integer(key_kind) , allocatable :: buffer_i(:)
real(integral_kind), allocatable :: buffer_value(:)
if (.True.) then
! In-memory transformation
call set_multiple_levels_omp(.False.)
allocate (Vtmp(mo_num,mo_num,mo_num,mo_num))
!$OMP PARALLEL DEFAULT(SHARED) &
!$OMP PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i, Vtmp)
allocate (buffer_i(size_buffer), buffer_value(size_buffer))
allocate (Vtmp(mo_num,mo_num,mo_num))
n_integrals = 0
call dgemm('N','T',mo_num*mo_num,mo_num*mo_num,cholesky_ao_num,1.d0, &
cholesky_mo, mo_num*mo_num, &
cholesky_mo, mo_num*mo_num, 0.d0, &
!$OMP DO SCHEDULE(dynamic)
do l=1,mo_num
call dgemm('T','N',mo_num*mo_num,mo_num,cholesky_ao_num,1.d0, &
cholesky_mo_transp, cholesky_ao_num, &
cholesky_mo_transp(1,1,l), cholesky_ao_num, 0.d0, &
Vtmp, mo_num*mo_num)
!$OMP PARALLEL PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i)
allocate (buffer_i(size_buffer), buffer_value(size_buffer))
n_integrals = 0
!$OMP DO
do l=1,mo_num
do k=1,l
do j=1,mo_num
do i=1,j
if (abs(Vtmp(i,j,k,l)) > mo_integrals_threshold) then
n_integrals += 1
buffer_value(n_integrals) = Vtmp(i,j,k,l)
if (dabs(Vtmp(i,j,k)) > mo_integrals_threshold) then
n_integrals = n_integrals + 1
buffer_value(n_integrals) = Vtmp(i,j,k)
!DIR$ FORCEINLINE
call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals))
if (n_integrals == size_buffer) then
@ -501,16 +501,18 @@ subroutine add_integrals_to_map_cholesky
enddo
enddo
enddo
!$OMP END DO
!$OMP END DO NOWAIT
if (n_integrals > 0) then
call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals)
deallocate(buffer_i, buffer_value)
endif
deallocate(buffer_i, buffer_value, Vtmp)
!$OMP BARRIER
!$OMP END PARALLEL
deallocate(Vtmp)
call map_sort(mo_integrals_map)
call map_unique(mo_integrals_map)
endif
end
subroutine add_integrals_to_map_three_indices(mask_ijk)
@ -580,6 +582,9 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
return
endif
if (ao_num > 1289) then
print *, irp_here, ': Integer overflow in ao_num**3'
endif
size_buffer = min(ao_num*ao_num*ao_num,16000000)
print*, 'Providing the molecular integrals '
print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
@ -855,6 +860,9 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
call bitstring_to_list( mask_ijkl(1,3), list_ijkl(1,3), n_k, N_int )
call bitstring_to_list( mask_ijkl(1,4), list_ijkl(1,4), n_l, N_int )
if (ao_num > 1289) then
print *, irp_here, ': Integer overflow in ao_num**3'
endif
size_buffer = min(ao_num*ao_num*ao_num,16000000)
print*, 'Providing the molecular integrals '
print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
@ -1350,16 +1358,29 @@ END_PROVIDER
! mo_two_e_integrals_jj_anti(i,j) = J_ij - K_ij
END_DOC
integer :: i,j
integer :: i,j,k
double precision :: get_two_e_integral
if (do_ao_cholesky) then
double precision, allocatable :: buffer(:,:)
allocate (buffer(cholesky_ao_num,mo_num))
do k=1,cholesky_ao_num
do i=1,mo_num
buffer(k,i) = cholesky_mo_transp(k,i,i)
enddo
enddo
call dgemm('T','N',mo_num,mo_num,cholesky_ao_num,1.d0, &
buffer, cholesky_ao_num, buffer, cholesky_ao_num, 0.d0, mo_two_e_integrals_jj, mo_num)
deallocate(buffer)
do j=1,mo_num
do i=1,mo_num
!TODO: use dgemm
mo_two_e_integrals_jj(i,j) = sum(cholesky_mo_transp(:,i,i)*cholesky_mo_transp(:,j,j))
mo_two_e_integrals_jj_exchange(i,j) = sum(cholesky_mo_transp(:,i,j)*cholesky_mo_transp(:,j,i))
mo_two_e_integrals_jj_exchange(i,j) = 0.d0
do k=1,cholesky_ao_num
mo_two_e_integrals_jj_exchange(i,j) = mo_two_e_integrals_jj_exchange(i,j) + &
cholesky_mo_transp(k,i,j)*cholesky_mo_transp(k,j,i)
enddo
enddo
enddo

View File

@ -22,12 +22,8 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only)
perror("Error opening mmap file for reading");
exit(EXIT_FAILURE);
}
map = mmap(NULL, bytes, PROT_READ, MAP_SHARED | MAP_HUGETLB, fd, 0);
if (map == MAP_FAILED) {
/* try again without huge pages */
map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0);
}
}
else
{
fd = open(filename, O_RDWR | O_CREAT, (mode_t)0600);
@ -53,16 +49,12 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only)
exit(EXIT_FAILURE);
}
map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_HUGETLB, fd, 0);
if (map == MAP_FAILED) {
/* try again without huge pages */
map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
}
}
if (map == MAP_FAILED) {
close(fd);
printf("%s:\n", filename);
printf("%s: %lu\n", filename, bytes);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}

View File

@ -11,6 +11,10 @@ subroutine map_save_to_disk(filename,map)
integer*8 :: n_elements
n_elements = int(map % n_elements,8)
if (n_elements <= 0) then
print *, 'Unable to write map to disk: n_elements = ', n_elements
stop -1
endif
if (map % consolidated) then

View File

@ -4,8 +4,10 @@ BEGIN_PROVIDER [ integer, qp_max_mem ]
! Maximum memory in Gb
END_DOC
character*(128) :: env
integer, external :: get_total_available_memory
qp_max_mem = 2000
qp_max_mem = get_total_available_memory()
call write_int(6,qp_max_mem,'Total available memory (GB)')
call getenv('QP_MAXMEM',env)
if (trim(env) /= '') then
call lock_io()
@ -122,3 +124,35 @@ subroutine print_memory_usage()
'.. >>>>> [ RES MEM : ', rss , &
' GB ] [ VIRT MEM : ', mem, ' GB ] <<<<< ..'
end
integer function get_total_available_memory() result(res)
implicit none
BEGIN_DOC
! Returns the total available memory on the current machine
END_DOC
character(len=128) :: line
integer :: status
integer :: iunit
integer*8, parameter :: KB = 1024
integer*8, parameter :: GiB = 1024**3
integer, external :: getUnitAndOpen
iunit = getUnitAndOpen('/proc/meminfo','r')
res = 512
do
read(iunit, '(A)', END=10) line
if (line(1:10) == "MemTotal: ") then
read(line(11:), *, ERR=20) res
res = int((res*KB) / GiB,4)
exit
20 continue
end if
end do
10 continue
close(iunit)
end function get_total_available_memory

View File

@ -46,7 +46,13 @@ module mmap_module
integer(c_size_t) :: length
integer(c_int) :: fd_
length = PRODUCT( shape(:) ) * bytes
integer :: i
length = int(bytes,8)
do i=1,size(shape)
length = length * shape(i)
enddo
if (read_only) then
map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 1)
else
@ -66,7 +72,12 @@ module mmap_module
integer(c_size_t) :: length
integer(c_int) :: fd_
length = PRODUCT( shape(:) ) * bytes
integer :: i
length = int(bytes,8)
do i=1,size(shape)
length = length * shape(i)
enddo
fd_ = fd
call c_munmap_fortran( length, fd_, map)
end subroutine
@ -82,7 +93,12 @@ module mmap_module
integer(c_size_t) :: length
integer(c_int) :: fd_
length = PRODUCT( shape(:) ) * bytes
integer :: i
length = int(bytes,8)
do i=1,size(shape)
length = length * shape(i)
enddo
fd_ = fd
call c_msync_fortran( length, fd_, map)
end subroutine

View File

@ -53,33 +53,8 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v)
allocate(v1(cholesky_ao_num,n1,n3), v2(cholesky_ao_num,n2,n4))
allocate(buffer(n1,n3,n2,n4))
!$OMP PARALLEL PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k)
!$OMP DO
do i3=1,n3
idx3 = list3(i3)
do i1=1,n1
idx1 = list1(i1)
do k=1,cholesky_ao_num
v1(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3)
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP DO
do i4=1,n4
idx4 = list4(i4)
do i2=1,n2
idx2 = list2(i2)
do k=1,cholesky_ao_num
v2(k,i2,i4) = cholesky_mo_transp(k,idx2,idx4)
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP BARRIER
!$OMP END PARALLEL
call gen_v_space_chol(n1,n3,list1,list3,v1,cholesky_ao_num)
call gen_v_space_chol(n2,n4,list2,list4,v2,cholesky_ao_num)
call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, &
v1, cholesky_ao_num, &
@ -129,6 +104,30 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v)
end
subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv)
implicit none
integer, intent(in) :: n1,n3,ldv
integer, intent(in) :: list1(n1),list3(n3)
double precision, intent(out) :: v(ldv,n1,n3)
integer :: i1,i3,idx1,idx3,k
!$OMP PARALLEL DO PRIVATE(i1,i3,idx1,idx3,k)
do i3=1,n3
idx3 = list3(i3)
do i1=1,n1
idx1 = list1(i1)
do k=1,cholesky_ao_num
v(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3)
enddo
enddo
enddo
!$OMP END PARALLEL DO
end
! full
BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)]
@ -345,6 +344,38 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvvv, (cc_nVa, cc_nVa, cc_nVa, cc_n
END_PROVIDER
BEGIN_PROVIDER [double precision, cc_space_v_vv_chol, (cholesky_ao_num, cc_nVa, cc_nVa)]
implicit none
call gen_v_space_chol(cc_nVa, cc_nVa, cc_list_vir, cc_list_vir, cc_space_v_vv_chol, cholesky_ao_num)
END_PROVIDER
BEGIN_PROVIDER [double precision, cc_space_v_vo_chol, (cholesky_ao_num, cc_nVa, cc_nOa)]
implicit none
call gen_v_space_chol(cc_nVa, cc_nOa, cc_list_vir, cc_list_occ, cc_space_v_vo_chol, cholesky_ao_num)
END_PROVIDER
BEGIN_PROVIDER [double precision, cc_space_v_ov_chol, (cholesky_ao_num, cc_nOa, cc_nVa)]
implicit none
call gen_v_space_chol(cc_nOa, cc_nVa, cc_list_occ, cc_list_vir, cc_space_v_ov_chol, cholesky_ao_num)
END_PROVIDER
BEGIN_PROVIDER [double precision, cc_space_v_oo_chol, (cholesky_ao_num, cc_nOa, cc_nOa)]
implicit none
call gen_v_space_chol(cc_nOa, cc_nOa, cc_list_occ, cc_list_occ, cc_space_v_oo_chol, cholesky_ao_num)
END_PROVIDER
! ppqq
BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)]