mirror of
https://github.com/QuantumPackage/qp2.git
synced 2024-12-31 16:45:50 +01:00
Merge branch 'dev-stable' of github.com:QuantumPackage/qp2 into dev-stable
This commit is contained in:
commit
96c5734ed0
@ -29,7 +29,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
double precision, pointer :: L(:,:), L_old(:,:)
|
||||
|
||||
|
||||
double precision, parameter :: s = 1.d-1
|
||||
double precision :: s
|
||||
double precision, parameter :: dscale = 1.d0
|
||||
|
||||
double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:)
|
||||
@ -43,16 +43,28 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
double precision, external :: get_ao_two_e_integral
|
||||
logical, external :: ao_two_e_integral_zero
|
||||
|
||||
double precision, external :: ao_two_e_integral
|
||||
integer :: block_size, iblock, ierr
|
||||
|
||||
integer(omp_lock_kind), allocatable :: lock(:)
|
||||
|
||||
PROVIDE ao_two_e_integrals_in_map
|
||||
double precision :: rss
|
||||
double precision, external :: memory_of_double, memory_of_int
|
||||
|
||||
|
||||
PROVIDE nucl_coord
|
||||
|
||||
if (.not.do_direct_integrals) then
|
||||
PROVIDE ao_two_e_integrals_in_map
|
||||
endif
|
||||
deallocate(cholesky_ao)
|
||||
|
||||
ndim = ao_num*ao_num
|
||||
tau = ao_cholesky_threshold
|
||||
|
||||
rss = 6.d0 * memory_of_double(ndim) + &
|
||||
6.d0 * memory_of_int(ndim)
|
||||
call check_mem(rss, irp_here)
|
||||
|
||||
allocate(L(ndim,1))
|
||||
|
||||
@ -85,13 +97,22 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
|
||||
do i=1,ndim
|
||||
D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), &
|
||||
addr(2,i), addr(2,i), &
|
||||
ao_integrals_map)
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
if (do_direct_integrals) then
|
||||
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
|
||||
do i=1,ndim
|
||||
D(i) = ao_two_e_integral(addr(1,i), addr(2,i), &
|
||||
addr(1,i), addr(2,i))
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
else
|
||||
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided)
|
||||
do i=1,ndim
|
||||
D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), &
|
||||
addr(2,i), addr(2,i), &
|
||||
ao_integrals_map)
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
endif
|
||||
|
||||
Dmax = maxval(D)
|
||||
|
||||
@ -117,21 +138,49 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
! a.
|
||||
i = i+1
|
||||
|
||||
! b.
|
||||
Dmin = max(s*Dmax,tau)
|
||||
logical :: memory_ok
|
||||
memory_ok = .False.
|
||||
|
||||
! c.
|
||||
nq=0
|
||||
LDmap = 0
|
||||
DLmap = 0
|
||||
do p=1,np
|
||||
if ( D(Lset(p)) > Dmin ) then
|
||||
nq = nq+1
|
||||
Dset(nq) = Lset(p)
|
||||
Dset_rev(Dset(nq)) = nq
|
||||
LDmap(p) = nq
|
||||
DLmap(nq) = p
|
||||
s = 0.1d0
|
||||
|
||||
! Inrease s until the arrays fit in memory
|
||||
do
|
||||
|
||||
! b.
|
||||
Dmin = max(s*Dmax,tau)
|
||||
|
||||
! c.
|
||||
nq=0
|
||||
LDmap = 0
|
||||
DLmap = 0
|
||||
do p=1,np
|
||||
if ( D(Lset(p)) > Dmin ) then
|
||||
nq = nq+1
|
||||
Dset(nq) = Lset(p)
|
||||
Dset_rev(Dset(nq)) = nq
|
||||
LDmap(p) = nq
|
||||
DLmap(nq) = p
|
||||
endif
|
||||
enddo
|
||||
|
||||
call resident_memory(rss)
|
||||
rss = rss &
|
||||
+ np*memory_of_double(nq) & ! Delta(np,nq)
|
||||
+ (rank+nq)* memory_of_double(ndim) & ! L(ndim,rank+nq)
|
||||
+ (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size)
|
||||
! Ltmp_q(nq,block_size)
|
||||
|
||||
if (rss > qp_max_mem) then
|
||||
s = s*2.d0
|
||||
else
|
||||
exit
|
||||
endif
|
||||
|
||||
if ((s > 1.d0).or.(nq == 0)) then
|
||||
print *, 'Not enough memory. Reduce cholesky threshold'
|
||||
stop -1
|
||||
endif
|
||||
|
||||
enddo
|
||||
|
||||
! d., e.
|
||||
@ -170,10 +219,15 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
stop -1
|
||||
endif
|
||||
|
||||
Delta(:,:) = 0.d0
|
||||
|
||||
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j)
|
||||
|
||||
!$OMP DO
|
||||
do q=1,nq
|
||||
Delta(:,q) = 0.d0
|
||||
enddo
|
||||
!$OMP ENDDO NOWAIT
|
||||
|
||||
!$OMP DO
|
||||
do k=1,N
|
||||
do p=1,np
|
||||
@ -183,9 +237,11 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
Ltmp_q(q,k) = L(Dset(q),k)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP END DO NOWAIT
|
||||
|
||||
!$OMP DO SCHEDULE(dynamic,8)
|
||||
!$OMP BARRIER
|
||||
|
||||
!$OMP DO SCHEDULE(guided)
|
||||
do m=1,nq
|
||||
|
||||
call omp_set_lock(lock(m))
|
||||
@ -196,8 +252,13 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
if ((0 < q).and.(q < k)) cycle
|
||||
if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)), &
|
||||
addr(2,Lset(k)), addr(2,Dset(m)) ) ) then
|
||||
Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), &
|
||||
if (do_direct_integrals) then
|
||||
Delta(k,m) = ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)), &
|
||||
addr(1,Dset(m)), addr(2,Dset(m)))
|
||||
else
|
||||
Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), &
|
||||
addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map)
|
||||
endif
|
||||
if (q /= 0) Delta(q,m) = Delta(k,m)
|
||||
endif
|
||||
enddo
|
||||
@ -218,8 +279,13 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
if ((0 < q).and.(q < p)) cycle
|
||||
if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), &
|
||||
addr(2,Dset(k)), addr(2,Dset(m)) ) ) then
|
||||
Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), &
|
||||
if (do_direct_integrals) then
|
||||
Delta(p,m) = ao_two_e_integral(addr(1,Dset(k)), addr(2,Dset(k)), &
|
||||
addr(1,Dset(m)), addr(2,Dset(m)))
|
||||
else
|
||||
Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), &
|
||||
addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map)
|
||||
endif
|
||||
if (q /= 0) Delta(q,m) = Delta(p,m)
|
||||
if (j /= 0) Delta(p,j) = Delta(p,m)
|
||||
if (q*j /= 0) Delta(q,j) = Delta(p,m)
|
||||
@ -339,8 +405,16 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
call omp_destroy_lock(lock(k))
|
||||
enddo
|
||||
|
||||
allocate(cholesky_ao(ao_num,ao_num,rank))
|
||||
call dcopy(ndim*rank, L, 1, cholesky_ao, 1)
|
||||
allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr)
|
||||
if (ierr /= 0) then
|
||||
print *, irp_here, ': Allocation failed'
|
||||
stop -1
|
||||
endif
|
||||
!$OMP PARALLEL DO PRIVATE(k)
|
||||
do k=1,rank
|
||||
call dcopy(ndim, L(1,k), 1, cholesky_ao(1,1,k), 1)
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
deallocate(L)
|
||||
cholesky_ao_num = rank
|
||||
|
||||
|
@ -460,7 +460,7 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz, (ao_num, ao_num)
|
||||
!$OMP PARALLEL DO PRIVATE(i,k) &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP SHARED (ao_num,ao_two_e_integral_schwartz) &
|
||||
!$OMP SCHEDULE(dynamic)
|
||||
!$OMP SCHEDULE(guided)
|
||||
do i=1,ao_num
|
||||
do k=1,i
|
||||
ao_two_e_integral_schwartz(i,k) = dsqrt(ao_two_e_integral(i,i,k,k))
|
||||
@ -975,7 +975,8 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_10,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_10,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd)
|
||||
|
||||
nx = nd
|
||||
!DIR$ LOOP COUNT(8)
|
||||
@ -998,7 +999,8 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
|
||||
endif
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_00,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd)
|
||||
endif
|
||||
|
||||
ny=0
|
||||
@ -1017,7 +1019,8 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(Y,ny,C_00,2,d,nd)
|
||||
call multiply_poly_c2(Y,ny,C_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd)
|
||||
end
|
||||
|
||||
recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
@ -1057,7 +1060,8 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_00,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd)
|
||||
|
||||
ny=0
|
||||
|
||||
@ -1069,7 +1073,8 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(Y,ny,C_00,2,d,nd)
|
||||
call multiply_poly_c2(Y,ny,C_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd)
|
||||
|
||||
end
|
||||
|
||||
@ -1098,7 +1103,8 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_10,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_10,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd)
|
||||
|
||||
nx = nd
|
||||
!DIR$ LOOP COUNT(8)
|
||||
@ -1118,7 +1124,8 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_00,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd)
|
||||
|
||||
ny=0
|
||||
!DIR$ LOOP COUNT(8)
|
||||
@ -1130,7 +1137,8 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(Y,ny,C_00,2,d,nd)
|
||||
call multiply_poly_c2(Y,ny,C_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd)
|
||||
end
|
||||
|
||||
recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
|
||||
@ -1177,9 +1185,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
|
||||
Y(1) = D_00(1)
|
||||
Y(2) = D_00(2)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(Y,ny,D_00,2,d,nd)
|
||||
call multiply_poly_c2(Y,ny,D_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd)
|
||||
|
||||
return
|
||||
|
||||
@ -1199,7 +1207,8 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_01,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_01,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(X,nx,B_01,d,nd)
|
||||
|
||||
ny = 0
|
||||
!DIR$ LOOP COUNT(6)
|
||||
@ -1208,9 +1217,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
|
||||
enddo
|
||||
call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(Y,ny,D_00,2,d,nd)
|
||||
call multiply_poly_c2(Y,ny,D_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd)
|
||||
|
||||
end select
|
||||
end
|
||||
@ -1232,7 +1241,8 @@ subroutine compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value)
|
||||
logical, external :: ao_two_e_integral_zero
|
||||
|
||||
integer :: i,k
|
||||
double precision :: ao_two_e_integral,cpu_1,cpu_2, wall_1, wall_2
|
||||
double precision, external :: ao_two_e_integral
|
||||
double precision :: cpu_1,cpu_2, wall_1, wall_2
|
||||
double precision :: integral, wall_0
|
||||
double precision :: thr
|
||||
integer :: kk, m, j1, i1
|
||||
@ -1299,3 +1309,56 @@ subroutine multiply_poly_local(b,nb,c,nc,d,nd)
|
||||
end
|
||||
|
||||
|
||||
!DIR$ FORCEINLINE
|
||||
subroutine multiply_poly_c2_inline_2e(b,nb,c,d,nd)
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Multiply two polynomials
|
||||
! D(t) += B(t)*C(t)
|
||||
END_DOC
|
||||
|
||||
integer, intent(in) :: nb
|
||||
integer, intent(out) :: nd
|
||||
double precision, intent(in) :: b(0:nb), c(0:2)
|
||||
double precision, intent(inout) :: d(0:nb+2)
|
||||
|
||||
integer :: ndtmp
|
||||
integer :: ib, ic, id, k
|
||||
if(nb < 0) return !False if nb>=0
|
||||
|
||||
select case (nb)
|
||||
case (0)
|
||||
d(0) = d(0) + c(0) * b(0)
|
||||
d(1) = d(1) + c(1) * b(0)
|
||||
d(2) = d(2) + c(2) * b(0)
|
||||
|
||||
case (1)
|
||||
d(0) = d(0) + c(0) * b(0)
|
||||
d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
|
||||
d(2) = d(2) + c(1) * b(1) + c(2) * b(0)
|
||||
d(3) = d(3) + c(2) * b(1)
|
||||
|
||||
case (2)
|
||||
d(0) = d(0) + c(0) * b(0)
|
||||
d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
|
||||
d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0)
|
||||
d(3) = d(3) + c(1) * b(2) + c(2) * b(1)
|
||||
d(4) = d(4) + c(2) * b(2)
|
||||
|
||||
case default
|
||||
d(0) = d(0) + c(0) * b(0)
|
||||
d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
|
||||
do ib=2,nb
|
||||
d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + c(2) * b(ib-2)
|
||||
enddo
|
||||
d(nb+1) = d(nb+1) + c(1) * b(nb) + c(2) * b(nb-1)
|
||||
d(nb+2) = d(nb+2) + c(2) * b(nb)
|
||||
|
||||
end select
|
||||
|
||||
do nd = nb+2,0,-1
|
||||
if (d(nd) /= 0.d0) exit
|
||||
enddo
|
||||
|
||||
end
|
||||
|
||||
|
@ -85,13 +85,23 @@ subroutine run_ccsd_space_orb
|
||||
|
||||
do while (not_converged)
|
||||
|
||||
call compute_H_oo(nO,nV,t1,t2,tau,H_oo)
|
||||
call compute_H_vv(nO,nV,t1,t2,tau,H_vv)
|
||||
call compute_H_vo(nO,nV,t1,t2,H_vo)
|
||||
|
||||
! Residue
|
||||
call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
|
||||
call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
||||
! if (do_ao_cholesky) then
|
||||
if (.False.) then
|
||||
call compute_H_oo_chol(nO,nV,t1,t2,tau,H_oo)
|
||||
call compute_H_vv_chol(nO,nV,t1,t2,tau,H_vv)
|
||||
call compute_H_vo_chol(nO,nV,t1,t2,H_vo)
|
||||
|
||||
call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
|
||||
call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
||||
else
|
||||
call compute_H_oo(nO,nV,t1,t2,tau,H_oo)
|
||||
call compute_H_vv(nO,nV,t1,t2,tau,H_vv)
|
||||
call compute_H_vo(nO,nV,t1,t2,H_vo)
|
||||
|
||||
call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
|
||||
call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
||||
endif
|
||||
max_r = max(max_r1,max_r2)
|
||||
|
||||
! Update
|
||||
@ -839,6 +849,10 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
||||
|
||||
! allocate(B1(nV,nV,nV,nV))
|
||||
! call compute_B1(nO,nV,t1,t2,B1)
|
||||
! call dgemm('N','N',nO*nO,nV*nV,nV*nV, &
|
||||
! 1d0, tau, size(tau,1) * size(tau,2), &
|
||||
! B1 , size(B1_gam,1) * size(B1_gam,2), &
|
||||
! 1d0, r2, size(r2,1) * size(r2,2))
|
||||
allocate(B1_gam(nV,nV,nV))
|
||||
do gam=1,nV
|
||||
call compute_B1_gam(nO,nV,t1,t2,B1_gam,gam)
|
||||
|
1395
src/ccsd/ccsd_space_orb_sub_chol.irp.f
Normal file
1395
src/ccsd/ccsd_space_orb_sub_chol.irp.f
Normal file
File diff suppressed because it is too large
Load Diff
@ -101,7 +101,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
|
||||
|
||||
!$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED)
|
||||
e = 0d0
|
||||
!$OMP DO SCHEDULE(dynamic)
|
||||
!$OMP DO SCHEDULE(guided)
|
||||
do a = 1, nV
|
||||
do b = a+1, nV
|
||||
do c = b+1, nV
|
||||
|
@ -117,7 +117,7 @@ END_PROVIDER
|
||||
!$OMP N_det_alpha_unique,N_det_beta_unique,irp_here)
|
||||
allocate(tmp_a(mo_num,mo_num,N_states), tmp_b(mo_num,mo_num,N_states) )
|
||||
tmp_a = 0.d0
|
||||
!$OMP DO SCHEDULE(dynamic,64)
|
||||
!$OMP DO SCHEDULE(guided)
|
||||
do k_a=1,N_det
|
||||
krow = psi_bilinear_matrix_rows(k_a)
|
||||
ASSERT (krow <= N_det_alpha_unique)
|
||||
@ -173,7 +173,7 @@ END_PROVIDER
|
||||
deallocate(tmp_a)
|
||||
|
||||
tmp_b = 0.d0
|
||||
!$OMP DO SCHEDULE(dynamic,64)
|
||||
!$OMP DO SCHEDULE(guided)
|
||||
do k_b=1,N_det
|
||||
krow = psi_bilinear_matrix_transp_rows(k_b)
|
||||
ASSERT (krow <= N_det_alpha_unique)
|
||||
|
@ -250,7 +250,7 @@ subroutine remove_duplicates_in_psi_det(found_duplicates)
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
!$OMP DO schedule(dynamic,1024)
|
||||
!$OMP DO schedule(guided,64)
|
||||
do i=1,N_det-1
|
||||
if (duplicate(i)) then
|
||||
cycle
|
||||
|
@ -317,7 +317,7 @@ subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nst
|
||||
!$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates)&
|
||||
!$OMP REDUCTION(+:accu)
|
||||
allocate(idx(0:n))
|
||||
!$OMP DO SCHEDULE(dynamic)
|
||||
!$OMP DO SCHEDULE(guided)
|
||||
do i = n,1,-1 ! Better OMP scheduling
|
||||
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),N_int,s2_tmp)
|
||||
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj)
|
||||
|
@ -190,47 +190,75 @@ END_PROVIDER
|
||||
|
||||
deallocate(X)
|
||||
|
||||
ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol
|
||||
if (elec_alpha_num > elec_beta_num) then
|
||||
ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol
|
||||
endif
|
||||
|
||||
|
||||
allocate(X2(ao_num,ao_num,cholesky_ao_num,2))
|
||||
double precision :: rss
|
||||
double precision :: memory_of_double
|
||||
|
||||
integer :: iblock
|
||||
integer, parameter :: block_size = 32
|
||||
|
||||
rss = memory_of_double(ao_num*ao_num)
|
||||
call check_mem(2.d0*block_size*rss, irp_here)
|
||||
allocate(X2(ao_num,ao_num,block_size,2))
|
||||
allocate(X3(ao_num,block_size,ao_num,2))
|
||||
|
||||
! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j)
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, &
|
||||
SCF_density_matrix_ao_alpha, ao_num, &
|
||||
cholesky_ao, ao_num, 0.d0, &
|
||||
X2(1,1,1,1), ao_num)
|
||||
do iblock=1,cholesky_ao_num,block_size
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, &
|
||||
SCF_density_matrix_ao_beta, ao_num, &
|
||||
cholesky_ao, ao_num, 0.d0, &
|
||||
X2(1,1,1,2), ao_num)
|
||||
call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, &
|
||||
SCF_density_matrix_ao_alpha, ao_num, &
|
||||
cholesky_ao(1,1,iblock), ao_num, 0.d0, &
|
||||
X2(1,1,1,1), ao_num)
|
||||
|
||||
allocate(X3(ao_num,cholesky_ao_num,ao_num,2))
|
||||
if (elec_alpha_num > elec_beta_num) then
|
||||
call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, &
|
||||
SCF_density_matrix_ao_beta, ao_num, &
|
||||
cholesky_ao(1,1,iblock), ao_num, 0.d0, &
|
||||
X2(1,1,1,2), ao_num)
|
||||
|
||||
do s=1,ao_num
|
||||
do j=1,min(cholesky_ao_num-iblock+1,block_size)
|
||||
do m=1,ao_num
|
||||
X3(m,j,s,1) = X2(m,s,j,1)
|
||||
X3(m,j,s,2) = X2(m,s,j,2)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
else
|
||||
|
||||
do s=1,ao_num
|
||||
do j=1,min(cholesky_ao_num-iblock+1,block_size)
|
||||
do m=1,ao_num
|
||||
X3(m,j,s,1) = X2(m,s,j,1)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
endif
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, &
|
||||
cholesky_ao(1,1,iblock), ao_num, &
|
||||
X3(1,1,1,1), ao_num*block_size, 1.d0, &
|
||||
ao_two_e_integral_alpha_chol, ao_num)
|
||||
|
||||
if (elec_alpha_num > elec_beta_num) then
|
||||
call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, &
|
||||
cholesky_ao(1,1,iblock), ao_num, &
|
||||
X3(1,1,1,2), ao_num*block_size, 1.d0, &
|
||||
ao_two_e_integral_beta_chol, ao_num)
|
||||
endif
|
||||
|
||||
do s=1,ao_num
|
||||
do j=1,cholesky_ao_num
|
||||
do m=1,ao_num
|
||||
X3(m,j,s,1) = X2(m,s,j,1)
|
||||
X3(m,j,s,2) = X2(m,s,j,2)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
deallocate(X2)
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, &
|
||||
cholesky_ao, ao_num, &
|
||||
X3(1,1,1,1), ao_num*cholesky_ao_num, 1.d0, &
|
||||
ao_two_e_integral_alpha_chol, ao_num)
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, &
|
||||
cholesky_ao, ao_num, &
|
||||
X3(1,1,1,2), ao_num*cholesky_ao_num, 1.d0, &
|
||||
ao_two_e_integral_beta_chol, ao_num)
|
||||
|
||||
deallocate(X3)
|
||||
if (elec_alpha_num == elec_beta_num) then
|
||||
ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol
|
||||
endif
|
||||
deallocate(X2,X3)
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
|
@ -4,16 +4,18 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num
|
||||
! Cholesky vectors in MO basis
|
||||
END_DOC
|
||||
|
||||
integer :: k
|
||||
integer :: k, i, j
|
||||
|
||||
call set_multiple_levels_omp(.False.)
|
||||
print *, 'AO->MO Transformation of Cholesky vectors'
|
||||
!$OMP PARALLEL DO PRIVATE(k)
|
||||
do k=1,cholesky_ao_num
|
||||
call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num)
|
||||
do j=1,mo_num
|
||||
do i=1,mo_num
|
||||
cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
print *, ''
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
@ -23,27 +25,19 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num,
|
||||
! Cholesky vectors in MO basis
|
||||
END_DOC
|
||||
|
||||
integer :: i,j,k
|
||||
double precision, allocatable :: buffer(:,:)
|
||||
double precision, allocatable :: X(:,:,:)
|
||||
integer :: ierr
|
||||
print *, 'AO->MO Transformation of Cholesky vectors'
|
||||
|
||||
print *, 'AO->MO Transformation of Cholesky vectors .'
|
||||
|
||||
call set_multiple_levels_omp(.False.)
|
||||
!$OMP PARALLEL PRIVATE(i,j,k,buffer)
|
||||
allocate(buffer(mo_num,mo_num))
|
||||
!$OMP DO SCHEDULE(static)
|
||||
do k=1,cholesky_ao_num
|
||||
call ao_to_mo(cholesky_ao(1,1,k),ao_num,buffer,mo_num)
|
||||
do j=1,mo_num
|
||||
do i=1,mo_num
|
||||
cholesky_mo_transp(k,i,j) = buffer(i,j)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
deallocate(buffer)
|
||||
!$OMP END PARALLEL
|
||||
print *, ''
|
||||
allocate(X(mo_num,cholesky_ao_num,ao_num), stat=ierr)
|
||||
if (ierr /= 0) then
|
||||
print *, irp_here, ': Allocation failed'
|
||||
endif
|
||||
call dgemm('T','N', ao_num*cholesky_ao_num, mo_num, ao_num, 1.d0, &
|
||||
cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_ao_num)
|
||||
call dgemm('T','N', cholesky_ao_num*mo_num, mo_num, ao_num, 1.d0, &
|
||||
X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_ao_num*mo_num)
|
||||
deallocate(X)
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
|
@ -37,7 +37,9 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]
|
||||
call map_load_from_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map)
|
||||
print*, 'MO integrals provided'
|
||||
return
|
||||
else
|
||||
endif
|
||||
|
||||
if (.not. do_direct_integrals) then
|
||||
PROVIDE ao_two_e_integrals_in_map
|
||||
endif
|
||||
|
||||
@ -90,6 +92,10 @@ subroutine four_idx_dgemm
|
||||
double precision, allocatable :: a1(:,:,:,:)
|
||||
double precision, allocatable :: a2(:,:,:,:)
|
||||
|
||||
if (ao_num > 1289) then
|
||||
print *, irp_here, ': Integer overflow in ao_num**3'
|
||||
endif
|
||||
|
||||
allocate (a1(ao_num,ao_num,ao_num,ao_num))
|
||||
|
||||
print *, 'Getting AOs'
|
||||
@ -103,6 +109,7 @@ subroutine four_idx_dgemm
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
|
||||
print *, '1st transformation'
|
||||
! 1st transformation
|
||||
allocate (a2(ao_num,ao_num,ao_num,mo_num))
|
||||
@ -166,11 +173,9 @@ subroutine four_idx_dgemm
|
||||
|
||||
deallocate (a1)
|
||||
|
||||
call map_sort(mo_integrals_map)
|
||||
call map_unique(mo_integrals_map)
|
||||
|
||||
integer*8 :: get_mo_map_size, mo_map_size
|
||||
mo_map_size = get_mo_map_size()
|
||||
|
||||
end subroutine
|
||||
|
||||
subroutine add_integrals_to_map(mask_ijkl)
|
||||
@ -250,7 +255,7 @@ subroutine add_integrals_to_map(mask_ijkl)
|
||||
|
||||
call wall_time(wall_1)
|
||||
|
||||
size_buffer = min(ao_num*ao_num*ao_num,8000000)
|
||||
size_buffer = min(ao_num*ao_num,8000000)
|
||||
print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
|
||||
ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
|
||||
|
||||
@ -443,11 +448,6 @@ subroutine add_integrals_to_map(mask_ijkl)
|
||||
!$OMP END PARALLEL
|
||||
call map_merge(mo_integrals_map)
|
||||
|
||||
call wall_time(wall_2)
|
||||
call cpu_time(cpu_2)
|
||||
integer*8 :: get_mo_map_size, mo_map_size
|
||||
mo_map_size = get_mo_map_size()
|
||||
|
||||
deallocate(list_ijkl)
|
||||
|
||||
|
||||
@ -465,51 +465,53 @@ subroutine add_integrals_to_map_cholesky
|
||||
integer :: size_buffer, n_integrals
|
||||
size_buffer = min(mo_num*mo_num*mo_num,16000000)
|
||||
|
||||
double precision, allocatable :: Vtmp(:,:,:,:)
|
||||
double precision, allocatable :: Vtmp(:,:,:)
|
||||
integer(key_kind) , allocatable :: buffer_i(:)
|
||||
real(integral_kind), allocatable :: buffer_value(:)
|
||||
|
||||
if (.True.) then
|
||||
! In-memory transformation
|
||||
call set_multiple_levels_omp(.False.)
|
||||
|
||||
allocate (Vtmp(mo_num,mo_num,mo_num,mo_num))
|
||||
!$OMP PARALLEL DEFAULT(SHARED) &
|
||||
!$OMP PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i, Vtmp)
|
||||
allocate (buffer_i(size_buffer), buffer_value(size_buffer))
|
||||
allocate (Vtmp(mo_num,mo_num,mo_num))
|
||||
n_integrals = 0
|
||||
|
||||
call dgemm('N','T',mo_num*mo_num,mo_num*mo_num,cholesky_ao_num,1.d0, &
|
||||
cholesky_mo, mo_num*mo_num, &
|
||||
cholesky_mo, mo_num*mo_num, 0.d0, &
|
||||
!$OMP DO SCHEDULE(dynamic)
|
||||
do l=1,mo_num
|
||||
call dgemm('T','N',mo_num*mo_num,mo_num,cholesky_ao_num,1.d0, &
|
||||
cholesky_mo_transp, cholesky_ao_num, &
|
||||
cholesky_mo_transp(1,1,l), cholesky_ao_num, 0.d0, &
|
||||
Vtmp, mo_num*mo_num)
|
||||
|
||||
!$OMP PARALLEL PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i)
|
||||
allocate (buffer_i(size_buffer), buffer_value(size_buffer))
|
||||
n_integrals = 0
|
||||
!$OMP DO
|
||||
do l=1,mo_num
|
||||
do k=1,l
|
||||
do j=1,mo_num
|
||||
do i=1,j
|
||||
if (abs(Vtmp(i,j,k,l)) > mo_integrals_threshold) then
|
||||
n_integrals += 1
|
||||
buffer_value(n_integrals) = Vtmp(i,j,k,l)
|
||||
!DIR$ FORCEINLINE
|
||||
call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals))
|
||||
if (n_integrals == size_buffer) then
|
||||
call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals)
|
||||
n_integrals = 0
|
||||
endif
|
||||
do k=1,l
|
||||
do j=1,mo_num
|
||||
do i=1,j
|
||||
if (dabs(Vtmp(i,j,k)) > mo_integrals_threshold) then
|
||||
n_integrals = n_integrals + 1
|
||||
buffer_value(n_integrals) = Vtmp(i,j,k)
|
||||
!DIR$ FORCEINLINE
|
||||
call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals))
|
||||
if (n_integrals == size_buffer) then
|
||||
call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals)
|
||||
n_integrals = 0
|
||||
endif
|
||||
enddo
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
|
||||
if (n_integrals > 0) then
|
||||
call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals)
|
||||
deallocate(buffer_i, buffer_value)
|
||||
!$OMP END PARALLEL
|
||||
|
||||
deallocate(Vtmp)
|
||||
call map_unique(mo_integrals_map)
|
||||
|
||||
endif
|
||||
deallocate(buffer_i, buffer_value, Vtmp)
|
||||
!$OMP BARRIER
|
||||
!$OMP END PARALLEL
|
||||
|
||||
call map_sort(mo_integrals_map)
|
||||
call map_unique(mo_integrals_map)
|
||||
|
||||
end
|
||||
|
||||
@ -580,6 +582,9 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
|
||||
return
|
||||
endif
|
||||
|
||||
if (ao_num > 1289) then
|
||||
print *, irp_here, ': Integer overflow in ao_num**3'
|
||||
endif
|
||||
size_buffer = min(ao_num*ao_num*ao_num,16000000)
|
||||
print*, 'Providing the molecular integrals '
|
||||
print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
|
||||
@ -855,6 +860,9 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
|
||||
call bitstring_to_list( mask_ijkl(1,3), list_ijkl(1,3), n_k, N_int )
|
||||
call bitstring_to_list( mask_ijkl(1,4), list_ijkl(1,4), n_l, N_int )
|
||||
|
||||
if (ao_num > 1289) then
|
||||
print *, irp_here, ': Integer overflow in ao_num**3'
|
||||
endif
|
||||
size_buffer = min(ao_num*ao_num*ao_num,16000000)
|
||||
print*, 'Providing the molecular integrals '
|
||||
print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
|
||||
@ -1350,16 +1358,29 @@ END_PROVIDER
|
||||
! mo_two_e_integrals_jj_anti(i,j) = J_ij - K_ij
|
||||
END_DOC
|
||||
|
||||
integer :: i,j
|
||||
integer :: i,j,k
|
||||
double precision :: get_two_e_integral
|
||||
|
||||
|
||||
if (do_ao_cholesky) then
|
||||
double precision, allocatable :: buffer(:,:)
|
||||
allocate (buffer(cholesky_ao_num,mo_num))
|
||||
do k=1,cholesky_ao_num
|
||||
do i=1,mo_num
|
||||
buffer(k,i) = cholesky_mo_transp(k,i,i)
|
||||
enddo
|
||||
enddo
|
||||
call dgemm('T','N',mo_num,mo_num,cholesky_ao_num,1.d0, &
|
||||
buffer, cholesky_ao_num, buffer, cholesky_ao_num, 0.d0, mo_two_e_integrals_jj, mo_num)
|
||||
deallocate(buffer)
|
||||
|
||||
do j=1,mo_num
|
||||
do i=1,mo_num
|
||||
!TODO: use dgemm
|
||||
mo_two_e_integrals_jj(i,j) = sum(cholesky_mo_transp(:,i,i)*cholesky_mo_transp(:,j,j))
|
||||
mo_two_e_integrals_jj_exchange(i,j) = sum(cholesky_mo_transp(:,i,j)*cholesky_mo_transp(:,j,i))
|
||||
mo_two_e_integrals_jj_exchange(i,j) = 0.d0
|
||||
do k=1,cholesky_ao_num
|
||||
mo_two_e_integrals_jj_exchange(i,j) = mo_two_e_integrals_jj_exchange(i,j) + &
|
||||
cholesky_mo_transp(k,i,j)*cholesky_mo_transp(k,j,i)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
|
@ -22,11 +22,7 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only)
|
||||
perror("Error opening mmap file for reading");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
map = mmap(NULL, bytes, PROT_READ, MAP_SHARED | MAP_HUGETLB, fd, 0);
|
||||
if (map == MAP_FAILED) {
|
||||
/* try again without huge pages */
|
||||
map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0);
|
||||
}
|
||||
map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -53,16 +49,12 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only)
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_HUGETLB, fd, 0);
|
||||
if (map == MAP_FAILED) {
|
||||
/* try again without huge pages */
|
||||
map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
}
|
||||
map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
}
|
||||
|
||||
if (map == MAP_FAILED) {
|
||||
close(fd);
|
||||
printf("%s:\n", filename);
|
||||
printf("%s: %lu\n", filename, bytes);
|
||||
perror("Error mmapping the file");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
@ -11,6 +11,10 @@ subroutine map_save_to_disk(filename,map)
|
||||
|
||||
integer*8 :: n_elements
|
||||
n_elements = int(map % n_elements,8)
|
||||
if (n_elements <= 0) then
|
||||
print *, 'Unable to write map to disk: n_elements = ', n_elements
|
||||
stop -1
|
||||
endif
|
||||
|
||||
|
||||
if (map % consolidated) then
|
||||
|
@ -4,8 +4,10 @@ BEGIN_PROVIDER [ integer, qp_max_mem ]
|
||||
! Maximum memory in Gb
|
||||
END_DOC
|
||||
character*(128) :: env
|
||||
integer, external :: get_total_available_memory
|
||||
|
||||
qp_max_mem = 2000
|
||||
qp_max_mem = get_total_available_memory()
|
||||
call write_int(6,qp_max_mem,'Total available memory (GB)')
|
||||
call getenv('QP_MAXMEM',env)
|
||||
if (trim(env) /= '') then
|
||||
call lock_io()
|
||||
@ -122,3 +124,35 @@ subroutine print_memory_usage()
|
||||
'.. >>>>> [ RES MEM : ', rss , &
|
||||
' GB ] [ VIRT MEM : ', mem, ' GB ] <<<<< ..'
|
||||
end
|
||||
|
||||
integer function get_total_available_memory() result(res)
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Returns the total available memory on the current machine
|
||||
END_DOC
|
||||
|
||||
character(len=128) :: line
|
||||
integer :: status
|
||||
integer :: iunit
|
||||
integer*8, parameter :: KB = 1024
|
||||
integer*8, parameter :: GiB = 1024**3
|
||||
integer, external :: getUnitAndOpen
|
||||
|
||||
iunit = getUnitAndOpen('/proc/meminfo','r')
|
||||
|
||||
res = 512
|
||||
do
|
||||
read(iunit, '(A)', END=10) line
|
||||
if (line(1:10) == "MemTotal: ") then
|
||||
read(line(11:), *, ERR=20) res
|
||||
res = int((res*KB) / GiB,4)
|
||||
exit
|
||||
20 continue
|
||||
end if
|
||||
end do
|
||||
10 continue
|
||||
close(iunit)
|
||||
|
||||
end function get_total_available_memory
|
||||
|
||||
|
||||
|
@ -46,7 +46,13 @@ module mmap_module
|
||||
integer(c_size_t) :: length
|
||||
integer(c_int) :: fd_
|
||||
|
||||
length = PRODUCT( shape(:) ) * bytes
|
||||
integer :: i
|
||||
|
||||
length = int(bytes,8)
|
||||
do i=1,size(shape)
|
||||
length = length * shape(i)
|
||||
enddo
|
||||
|
||||
if (read_only) then
|
||||
map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 1)
|
||||
else
|
||||
@ -66,7 +72,12 @@ module mmap_module
|
||||
integer(c_size_t) :: length
|
||||
integer(c_int) :: fd_
|
||||
|
||||
length = PRODUCT( shape(:) ) * bytes
|
||||
integer :: i
|
||||
|
||||
length = int(bytes,8)
|
||||
do i=1,size(shape)
|
||||
length = length * shape(i)
|
||||
enddo
|
||||
fd_ = fd
|
||||
call c_munmap_fortran( length, fd_, map)
|
||||
end subroutine
|
||||
@ -82,7 +93,12 @@ module mmap_module
|
||||
integer(c_size_t) :: length
|
||||
integer(c_int) :: fd_
|
||||
|
||||
length = PRODUCT( shape(:) ) * bytes
|
||||
integer :: i
|
||||
|
||||
length = int(bytes,8)
|
||||
do i=1,size(shape)
|
||||
length = length * shape(i)
|
||||
enddo
|
||||
fd_ = fd
|
||||
call c_msync_fortran( length, fd_, map)
|
||||
end subroutine
|
||||
|
@ -53,33 +53,8 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v)
|
||||
allocate(v1(cholesky_ao_num,n1,n3), v2(cholesky_ao_num,n2,n4))
|
||||
allocate(buffer(n1,n3,n2,n4))
|
||||
|
||||
!$OMP PARALLEL PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k)
|
||||
!$OMP DO
|
||||
do i3=1,n3
|
||||
idx3 = list3(i3)
|
||||
do i1=1,n1
|
||||
idx1 = list1(i1)
|
||||
do k=1,cholesky_ao_num
|
||||
v1(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
|
||||
!$OMP DO
|
||||
do i4=1,n4
|
||||
idx4 = list4(i4)
|
||||
do i2=1,n2
|
||||
idx2 = list2(i2)
|
||||
do k=1,cholesky_ao_num
|
||||
v2(k,i2,i4) = cholesky_mo_transp(k,idx2,idx4)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
|
||||
!$OMP BARRIER
|
||||
!$OMP END PARALLEL
|
||||
call gen_v_space_chol(n1,n3,list1,list3,v1,cholesky_ao_num)
|
||||
call gen_v_space_chol(n2,n4,list2,list4,v2,cholesky_ao_num)
|
||||
|
||||
call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, &
|
||||
v1, cholesky_ao_num, &
|
||||
@ -129,6 +104,30 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v)
|
||||
|
||||
end
|
||||
|
||||
subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv)
|
||||
|
||||
implicit none
|
||||
|
||||
integer, intent(in) :: n1,n3,ldv
|
||||
integer, intent(in) :: list1(n1),list3(n3)
|
||||
double precision, intent(out) :: v(ldv,n1,n3)
|
||||
|
||||
integer :: i1,i3,idx1,idx3,k
|
||||
|
||||
!$OMP PARALLEL DO PRIVATE(i1,i3,idx1,idx3,k)
|
||||
do i3=1,n3
|
||||
idx3 = list3(i3)
|
||||
do i1=1,n1
|
||||
idx1 = list1(i1)
|
||||
do k=1,cholesky_ao_num
|
||||
v(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
end
|
||||
|
||||
! full
|
||||
|
||||
BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)]
|
||||
@ -345,6 +344,38 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvvv, (cc_nVa, cc_nVa, cc_nVa, cc_n
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [double precision, cc_space_v_vv_chol, (cholesky_ao_num, cc_nVa, cc_nVa)]
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space_chol(cc_nVa, cc_nVa, cc_list_vir, cc_list_vir, cc_space_v_vv_chol, cholesky_ao_num)
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [double precision, cc_space_v_vo_chol, (cholesky_ao_num, cc_nVa, cc_nOa)]
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space_chol(cc_nVa, cc_nOa, cc_list_vir, cc_list_occ, cc_space_v_vo_chol, cholesky_ao_num)
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [double precision, cc_space_v_ov_chol, (cholesky_ao_num, cc_nOa, cc_nVa)]
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space_chol(cc_nOa, cc_nVa, cc_list_occ, cc_list_vir, cc_space_v_ov_chol, cholesky_ao_num)
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [double precision, cc_space_v_oo_chol, (cholesky_ao_num, cc_nOa, cc_nOa)]
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space_chol(cc_nOa, cc_nOa, cc_list_occ, cc_list_occ, cc_space_v_oo_chol, cholesky_ao_num)
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
! ppqq
|
||||
|
||||
BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)]
|
||||
|
Loading…
Reference in New Issue
Block a user