2017-12-05 18:54:10 +01:00
|
|
|
subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
2017-10-13 21:10:53 +02:00
|
|
|
i_start, j_start, k_start, l_start, &
|
|
|
|
i_end , j_end , k_end , l_end , &
|
|
|
|
a_start, b_start, c_start, d_start, &
|
|
|
|
a_end , b_end , c_end , d_end, task_id, thread )
|
|
|
|
implicit none
|
|
|
|
use f77_zmq
|
|
|
|
use map_module
|
|
|
|
use mmap_module
|
|
|
|
BEGIN_DOC
|
|
|
|
! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM)
|
|
|
|
! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld}
|
|
|
|
! Loops run over *_start->*_end
|
|
|
|
END_DOC
|
|
|
|
type(map_type), intent(in) :: map_a
|
|
|
|
integer, intent(in) :: LDB
|
|
|
|
double precision, intent(in) :: matrix_B(LDB,*)
|
|
|
|
integer, intent(in) :: i_start, j_start, k_start, l_start
|
|
|
|
integer, intent(in) :: i_end , j_end , k_end , l_end
|
|
|
|
integer, intent(in) :: a_start, b_start, c_start, d_start
|
|
|
|
integer, intent(in) :: a_end , b_end , c_end , d_end
|
|
|
|
integer, intent(in) :: task_id, thread
|
|
|
|
|
|
|
|
double precision, allocatable :: T(:,:), U(:,:,:), V(:,:)
|
|
|
|
double precision, allocatable :: T2d(:,:), V2d(:,:)
|
|
|
|
integer :: i_max, j_max, k_max, l_max
|
|
|
|
integer :: i_min, j_min, k_min, l_min
|
|
|
|
integer :: i, j, k, l, ik, ll
|
|
|
|
integer :: a, b, c, d
|
|
|
|
double precision, external :: get_ao_bielec_integral
|
|
|
|
integer*8 :: ii
|
|
|
|
integer(key_kind) :: idx
|
|
|
|
real(integral_kind) :: tmp
|
|
|
|
integer(key_kind), allocatable :: key(:)
|
|
|
|
real(integral_kind), allocatable :: value(:)
|
|
|
|
integer*8, allocatable :: l_pointer(:)
|
|
|
|
|
|
|
|
ASSERT (k_start == i_start)
|
|
|
|
ASSERT (l_start == j_start)
|
|
|
|
ASSERT (a_start == c_start)
|
|
|
|
ASSERT (b_start == d_start)
|
|
|
|
|
|
|
|
i_min = min(i_start,a_start)
|
|
|
|
i_max = max(i_end ,a_end )
|
|
|
|
j_min = min(j_start,b_start)
|
|
|
|
j_max = max(j_end ,b_end )
|
|
|
|
k_min = min(k_start,c_start)
|
|
|
|
k_max = max(k_end ,c_end )
|
|
|
|
l_min = min(l_start,d_start)
|
|
|
|
l_max = max(l_end ,d_end )
|
|
|
|
|
|
|
|
ASSERT (0 < i_max)
|
|
|
|
ASSERT (0 < j_max)
|
|
|
|
ASSERT (0 < k_max)
|
|
|
|
ASSERT (0 < l_max)
|
|
|
|
ASSERT (LDB >= i_max)
|
|
|
|
ASSERT (LDB >= j_max)
|
|
|
|
ASSERT (LDB >= k_max)
|
|
|
|
ASSERT (LDB >= l_max)
|
|
|
|
|
|
|
|
integer*4, allocatable :: a_array_ik(:)
|
|
|
|
integer*2, allocatable :: a_array_j(:)
|
|
|
|
double precision, allocatable :: a_array_value(:)
|
|
|
|
|
|
|
|
integer*8 :: new_size
|
|
|
|
new_size = max(1024_8, 5_8 * map_a % n_elements )
|
|
|
|
|
2017-12-05 18:54:10 +01:00
|
|
|
integer*8 :: tempspace
|
|
|
|
integer :: npass, l_block
|
|
|
|
|
|
|
|
tempspace = (new_size * 16_8) / (1024_8 * 1024_8)
|
2017-12-12 16:56:14 +01:00
|
|
|
npass = int(min(1_8+int(l_end-l_start,8),1_8 + tempspace / 1024_8),4) ! 1 GiB of scratch space
|
2017-12-05 18:54:10 +01:00
|
|
|
l_block = (l_end-l_start+1)/npass
|
|
|
|
|
|
|
|
allocate(a_array_ik(new_size/npass), a_array_j(new_size/npass), a_array_value(new_size/npass))
|
2017-10-13 21:10:53 +02:00
|
|
|
|
|
|
|
|
|
|
|
allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) )
|
|
|
|
ii = 1_8
|
|
|
|
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx)
|
|
|
|
do l=l_start,l_end
|
|
|
|
!$OMP SINGLE
|
|
|
|
l_pointer(l) = ii
|
|
|
|
!$OMP END SINGLE
|
|
|
|
do j=j_start,j_end
|
|
|
|
!$OMP DO SCHEDULE(static,1)
|
|
|
|
do k=k_start,k_end
|
|
|
|
do i=i_start,k
|
|
|
|
ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 )
|
|
|
|
call bielec_integrals_index(i,j,k,l,idx)
|
|
|
|
call map_get(map_a,idx,value(ik))
|
|
|
|
enddo
|
|
|
|
enddo
|
|
|
|
!$OMP END DO
|
|
|
|
|
|
|
|
!$OMP SINGLE
|
|
|
|
ik=0
|
|
|
|
do k=k_start,k_end
|
|
|
|
do i=i_start,k
|
|
|
|
ik = ik+1
|
|
|
|
tmp=value(ik)
|
|
|
|
if (tmp /= 0.d0) then
|
|
|
|
a_array_ik(ii) = ik
|
|
|
|
a_array_j(ii) = j
|
|
|
|
a_array_value(ii) = tmp
|
|
|
|
ii=ii+1_8
|
|
|
|
endif
|
|
|
|
enddo
|
|
|
|
enddo
|
|
|
|
!$OMP END SINGLE
|
|
|
|
enddo
|
|
|
|
enddo
|
|
|
|
!$OMP SINGLE
|
|
|
|
a_array_ik(ii) = 0
|
|
|
|
a_array_j(ii) = 0
|
|
|
|
a_array_value(ii) = 0.d0
|
|
|
|
l_pointer(l_end+1) = ii
|
|
|
|
!$OMP END SINGLE
|
|
|
|
!$OMP END PARALLEL
|
|
|
|
deallocate(value)
|
|
|
|
|
|
|
|
!INPUT DATA
|
|
|
|
!open(unit=10,file='INPUT',form='UNFORMATTED')
|
|
|
|
!write(10) i_start, j_start, i_end, j_end
|
|
|
|
!write(10) a_start, b_start, a_end, b_end
|
|
|
|
!write(10) LDB, mo_tot_num
|
|
|
|
!write(10) matrix_B(1:LDB,1:mo_tot_num)
|
|
|
|
!idx=size(a_array)
|
|
|
|
!write(10) idx
|
|
|
|
!write(10) a_array
|
|
|
|
!write(10) l_pointer
|
|
|
|
!close(10)
|
|
|
|
!open(unit=10,file='OUTPUT',form='FORMATTED')
|
|
|
|
! END INPUT DATA
|
|
|
|
|
2017-12-12 16:56:14 +01:00
|
|
|
PROVIDE nproc
|
|
|
|
integer :: n_running_threads
|
|
|
|
n_running_threads = 0
|
2017-12-12 16:26:48 +01:00
|
|
|
call omp_set_nested(.true.)
|
2017-10-13 21:10:53 +02:00
|
|
|
!$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, &
|
|
|
|
!$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,&
|
|
|
|
!$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,&
|
|
|
|
!$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, &
|
2017-12-12 16:56:14 +01:00
|
|
|
!$OMP matrix_B,l_pointer,thread,task_id,n_running_threads,nproc) &
|
2017-10-13 21:10:53 +02:00
|
|
|
!$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, &
|
2017-12-05 18:54:10 +01:00
|
|
|
!$OMP a,b,c,d,p,q,tmp,T2d,V2d,ii,zmq_socket_push)
|
2017-10-13 21:10:53 +02:00
|
|
|
|
|
|
|
integer(ZMQ_PTR) :: zmq_socket_push
|
2017-12-05 18:54:10 +01:00
|
|
|
integer(ZMQ_PTR), external :: new_zmq_push_socket
|
2017-10-13 21:10:53 +02:00
|
|
|
zmq_socket_push = new_zmq_push_socket(thread)
|
|
|
|
|
|
|
|
|
|
|
|
|
2017-12-12 16:26:48 +01:00
|
|
|
allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) )
|
2017-10-13 21:10:53 +02:00
|
|
|
|
2017-12-12 16:56:14 +01:00
|
|
|
!$OMP ATOMIC
|
|
|
|
n_running_threads = n_running_threads+1
|
2017-10-13 21:10:53 +02:00
|
|
|
|
2017-12-12 16:26:48 +01:00
|
|
|
!$OMP DO SCHEDULE(dynamic,1)
|
2017-10-13 21:10:53 +02:00
|
|
|
do d=d_start,d_end
|
|
|
|
U = 0.d0
|
|
|
|
do l=l_start,l_end
|
|
|
|
if (dabs(matrix_B(l,d)) < 1.d-10) then
|
|
|
|
cycle
|
|
|
|
endif
|
|
|
|
|
2017-12-12 16:26:48 +01:00
|
|
|
allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end) )
|
2017-10-13 21:10:53 +02:00
|
|
|
ii=l_pointer(l)
|
|
|
|
do j=j_start,j_end
|
|
|
|
!DIR$ VECTOR NONTEMPORAL
|
|
|
|
T2d(:,j) = 0.d0
|
|
|
|
!DIR$ IVDEP
|
|
|
|
do while (j == a_array_j(ii))
|
|
|
|
T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0)
|
|
|
|
ii = ii + 1_8
|
|
|
|
enddo
|
|
|
|
enddo
|
|
|
|
|
2017-12-12 16:26:48 +01:00
|
|
|
allocate (V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end))
|
2017-10-13 21:10:53 +02:00
|
|
|
call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),&
|
|
|
|
(d-b_start+1), &
|
|
|
|
(j_end-j_start+1), 1.d0, &
|
|
|
|
T2d(1,j_start), size(T2d,1), &
|
|
|
|
matrix_B(j_start,b_start), size(matrix_B,1),0.d0, &
|
|
|
|
V2d(1,b_start), size(V2d,1) )
|
2017-12-12 16:26:48 +01:00
|
|
|
deallocate(T2d)
|
2017-10-13 21:10:53 +02:00
|
|
|
|
2017-12-12 16:26:48 +01:00
|
|
|
!$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, &
|
|
|
|
!$OMP a_start,b_start,b_end,c_start,c_end,i_start,k_start,k_end, &
|
|
|
|
!$OMP matrix_B,U,l,d,V2d,i_end,a_end) &
|
2017-12-12 16:56:14 +01:00
|
|
|
!$OMP PRIVATE(T,V,i,k,ik) NUM_THREADS(nproc-n_running_threads+1)
|
2017-12-12 16:26:48 +01:00
|
|
|
allocate( V(i_start:i_end, k_start:k_end), T(k_start:k_end, a_start:a_end))
|
|
|
|
!$OMP DO SCHEDULE(static,1)
|
2017-10-13 21:10:53 +02:00
|
|
|
do b=b_start,d
|
|
|
|
ik = 0
|
|
|
|
do k=k_start,k_end
|
|
|
|
do i=i_start,k
|
|
|
|
ik = ik+1
|
|
|
|
V(i,k) = V2d(ik,b)
|
|
|
|
enddo
|
|
|
|
enddo
|
|
|
|
|
|
|
|
call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), &
|
|
|
|
1.d0, &
|
|
|
|
V(i_start,k_start), size(V,1), &
|
|
|
|
matrix_B(i_start,a_start), size(matrix_B,1),0.d0, &
|
|
|
|
T(k_start,a_start), size(T,1) )
|
|
|
|
|
|
|
|
call DGEMM('T','N', (b-a_start+1), (b-c_start+1), &
|
|
|
|
(k_end-k_start+1), matrix_B(l, d), &
|
|
|
|
T(k_start,a_start), size(T,1), &
|
|
|
|
matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, &
|
|
|
|
U(a_start,c_start,b), size(U,1) )
|
2017-12-12 16:26:48 +01:00
|
|
|
|
2017-10-13 21:10:53 +02:00
|
|
|
if (b < b_end) then
|
|
|
|
call DGEMM('T','N', (b-a_start+1), (c_end-b), &
|
|
|
|
(k_end-k_start+1), matrix_B(l, d), &
|
|
|
|
T(k_start,a_start), size(T,1), &
|
|
|
|
matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, &
|
|
|
|
U(a_start,b+1,b), size(U,1) )
|
|
|
|
endif
|
|
|
|
enddo
|
2017-12-12 16:26:48 +01:00
|
|
|
deallocate(T,V)
|
|
|
|
!$OMP END PARALLEL
|
2017-10-13 21:10:53 +02:00
|
|
|
|
2017-12-12 16:26:48 +01:00
|
|
|
deallocate(V2d)
|
2017-10-13 21:10:53 +02:00
|
|
|
enddo
|
|
|
|
|
|
|
|
idx = 0_8
|
2017-12-05 18:54:10 +01:00
|
|
|
|
2017-12-12 16:26:48 +01:00
|
|
|
allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) )
|
2017-12-05 18:54:10 +01:00
|
|
|
integer :: p, q
|
2017-10-13 21:10:53 +02:00
|
|
|
do b=b_start,d
|
2017-12-05 18:54:10 +01:00
|
|
|
q = b+ishft(d*d-d,-1)
|
2017-10-13 21:10:53 +02:00
|
|
|
do c=c_start,c_end
|
2017-12-05 18:54:10 +01:00
|
|
|
p = a_start+ishft(c*c-c,-1)
|
2017-10-13 21:10:53 +02:00
|
|
|
do a=a_start,min(b,c)
|
|
|
|
if (dabs(U(a,c,b)) < 1.d-15) then
|
|
|
|
cycle
|
|
|
|
endif
|
2017-12-05 18:54:10 +01:00
|
|
|
if ((a==b).and.(p>q)) cycle
|
|
|
|
p = p+1
|
2017-10-13 21:10:53 +02:00
|
|
|
idx = idx+1_8
|
|
|
|
call bielec_integrals_index(a,b,c,d,key(idx))
|
2017-12-05 18:54:10 +01:00
|
|
|
!print *, int(key(idx),4), int(a,2),int(b,2),int(c,2),int(d,2), p, q
|
2017-10-13 21:10:53 +02:00
|
|
|
value(idx) = U(a,c,b)
|
|
|
|
enddo
|
|
|
|
enddo
|
|
|
|
enddo
|
|
|
|
|
2017-12-05 18:54:10 +01:00
|
|
|
call four_idx_push_results(zmq_socket_push, key, value, idx, -task_id)
|
2017-12-12 16:26:48 +01:00
|
|
|
deallocate(key,value)
|
2017-10-13 21:10:53 +02:00
|
|
|
|
|
|
|
!WRITE OUTPUT
|
|
|
|
! OMP CRITICAL
|
|
|
|
!print *, d
|
|
|
|
!do b=b_start,d
|
|
|
|
! do c=c_start,c_end
|
|
|
|
! do a=a_start,min(b,c)
|
|
|
|
! if (dabs(U(a,c,b)) < 1.d-15) then
|
|
|
|
! cycle
|
|
|
|
! endif
|
|
|
|
! write(10,*) d,c,b,a,U(a,c,b)
|
|
|
|
! enddo
|
|
|
|
! enddo
|
|
|
|
!enddo
|
|
|
|
! OMP END CRITICAL
|
|
|
|
!END WRITE OUTPUT
|
|
|
|
|
|
|
|
|
|
|
|
enddo
|
|
|
|
!$OMP END DO
|
2017-12-12 16:26:48 +01:00
|
|
|
deallocate(U)
|
2017-12-12 16:56:14 +01:00
|
|
|
|
|
|
|
!$OMP ATOMIC
|
|
|
|
n_running_threads = n_running_threads-1
|
|
|
|
|
2017-12-05 18:54:10 +01:00
|
|
|
!$OMP BARRIER
|
|
|
|
!$OMP MASTER
|
|
|
|
call four_idx_push_results(zmq_socket_push, 0_8, 0.d0, 0, task_id)
|
|
|
|
!$OMP END MASTER
|
|
|
|
call end_zmq_push_socket(zmq_socket_push)
|
2017-10-13 21:10:53 +02:00
|
|
|
!$OMP END PARALLEL
|
|
|
|
|
2017-12-12 15:28:27 +01:00
|
|
|
deallocate(l_pointer,a_array_ik,a_array_j,a_array_value)
|
2017-10-13 21:10:53 +02:00
|
|
|
|
|
|
|
end
|