mirror of
https://github.com/QuantumPackage/qp2.git
synced 2024-12-22 12:23:43 +01:00
Improved disk access in Cholesky
This commit is contained in:
parent
70317b2a15
commit
d89682cb7e
@ -41,7 +41,7 @@ END_PROVIDER
|
|||||||
integer*8, allocatable :: Lset(:), Dset(:), addr3(:)
|
integer*8, allocatable :: Lset(:), Dset(:), addr3(:)
|
||||||
logical, allocatable :: computed(:)
|
logical, allocatable :: computed(:)
|
||||||
|
|
||||||
integer :: i,j,k,m,p,q, dj, p2, q2
|
integer :: i,j,k,m,p,q, dj, p2, q2, ii, jj
|
||||||
integer*8 :: i8, j8, p8, qj8, rank_max, np8
|
integer*8 :: i8, j8, p8, qj8, rank_max, np8
|
||||||
integer :: N, np, nq
|
integer :: N, np, nq
|
||||||
|
|
||||||
@ -65,6 +65,8 @@ END_PROVIDER
|
|||||||
type(c_ptr) :: c_pointer(2)
|
type(c_ptr) :: c_pointer(2)
|
||||||
integer :: fd(2)
|
integer :: fd(2)
|
||||||
logical :: delta_on_disk
|
logical :: delta_on_disk
|
||||||
|
integer :: dgemm_block_size, nqq
|
||||||
|
double precision, allocatable :: dgemm_buffer1(:,:), dgemm_buffer2(:,:)
|
||||||
|
|
||||||
PROVIDE nproc
|
PROVIDE nproc
|
||||||
PROVIDE nucl_coord ao_two_e_integral_schwartz
|
PROVIDE nucl_coord ao_two_e_integral_schwartz
|
||||||
@ -300,17 +302,58 @@ END_PROVIDER
|
|||||||
!$OMP BARRIER
|
!$OMP BARRIER
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
|
PROVIDE nproc
|
||||||
if (N>0) then
|
if (N>0) then
|
||||||
call dgemm('N','T', np, nq, N, -1.d0, &
|
|
||||||
Ltmp_p, np, Ltmp_q, nq, 0.d0, Delta, np)
|
if (delta_on_disk) then
|
||||||
else
|
! Blocking improves I/O performance
|
||||||
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) SCHEDULE(static,1)
|
|
||||||
do q=1,nq
|
dgemm_block_size = nproc*4
|
||||||
do j=1,np
|
|
||||||
Delta(j,q) = 0.d0
|
allocate (dgemm_buffer1(np,dgemm_block_size))
|
||||||
|
allocate (dgemm_buffer2(dgemm_block_size,N))
|
||||||
|
|
||||||
|
do jj=1,nq,dgemm_block_size
|
||||||
|
|
||||||
|
nqq = min(nq, jj+dgemm_block_size-1) - jj + 1
|
||||||
|
|
||||||
|
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,ii)
|
||||||
|
do ii=1,N
|
||||||
|
do q=jj,jj+nqq-1
|
||||||
|
dgemm_buffer2(q-jj+1,ii) = Ltmp_q(q,ii)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
!$OMP END PARALLEL DO
|
||||||
|
|
||||||
|
call dgemm('N', 'T', np, nqq, N, 1.d0, &
|
||||||
|
Ltmp_p, np, dgemm_buffer2, dgemm_block_size, 0.d0, dgemm_buffer1, np)
|
||||||
|
|
||||||
|
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q)
|
||||||
|
do q=jj,jj+nqq-1
|
||||||
|
Delta(:,q) = - dgemm_buffer1(:, q-jj+1)
|
||||||
|
enddo
|
||||||
|
!$OMP END PARALLEL DO
|
||||||
|
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
|
deallocate(dgemm_buffer1, dgemm_buffer2)
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
call dgemm('N', 'T', np, nq, N, -1.d0, &
|
||||||
|
Ltmp_p(1,1), np, Ltmp_q(1,1), nq, 0.d0, Delta, np)
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j)
|
||||||
|
do q=1,nq
|
||||||
|
Delta(:,q) = 0.d0
|
||||||
enddo
|
enddo
|
||||||
!$OMP END PARALLEL DO
|
!$OMP END PARALLEL DO
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
! f.
|
! f.
|
||||||
@ -329,9 +372,46 @@ END_PROVIDER
|
|||||||
rank = N+j
|
rank = N+j
|
||||||
|
|
||||||
if (iblock == block_size) then
|
if (iblock == block_size) then
|
||||||
call dgemm('N','T',np,nq,block_size,-1.d0, &
|
|
||||||
|
if (delta_on_disk) then
|
||||||
|
! Blocking improves I/O performance
|
||||||
|
|
||||||
|
dgemm_block_size = nproc*4
|
||||||
|
allocate (dgemm_buffer1(np,dgemm_block_size))
|
||||||
|
allocate (dgemm_buffer2(dgemm_block_size,block_size))
|
||||||
|
|
||||||
|
do jj=1,nq,dgemm_block_size
|
||||||
|
nqq = min(nq, jj+dgemm_block_size-1) - jj + 1
|
||||||
|
|
||||||
|
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,ii)
|
||||||
|
do ii=1,block_size
|
||||||
|
do q=jj,jj+nqq-1
|
||||||
|
dgemm_buffer2(q-jj+1,ii) = Ltmp_q(q,ii)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
!$OMP END PARALLEL DO
|
||||||
|
|
||||||
|
call dgemm('N', 'T', np, nqq, block_size, 1.d0, &
|
||||||
|
Ltmp_p(1,1), np, dgemm_buffer2, dgemm_block_size, 0.d0, dgemm_buffer1, np)
|
||||||
|
|
||||||
|
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q)
|
||||||
|
do q=jj,jj+nqq-1
|
||||||
|
Delta(:,q) = Delta(:,q) - dgemm_buffer1(:, q-jj+1)
|
||||||
|
enddo
|
||||||
|
!$OMP END PARALLEL DO
|
||||||
|
|
||||||
|
enddo
|
||||||
|
deallocate(dgemm_buffer1, dgemm_buffer2)
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
call dgemm('N','T',np,nq,block_size,-1.d0, &
|
||||||
Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np)
|
Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np)
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
iblock = 0
|
iblock = 0
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
! ii.
|
! ii.
|
||||||
|
Loading…
Reference in New Issue
Block a user