From a50663a77a08bc709ca3bd458e2bea5a087e5f99 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 13 Oct 2017 21:10:53 +0200 Subject: [PATCH] Working on four_idx_zmq --- src/Davidson/davidson_parallel.irp.f | 4 +- src/Determinants/H_apply.irp.f | 2 +- src/FourIdx/four_index.irp.f | 180 +++++++++++++ src/FourIdx/four_index_block.irp.f | 288 ++++++++++++++++++++ src/FourIdx/four_index_slave.irp.f.todo | 279 ++++++++++++++++++++ src/FourIdx/four_index_sym.irp.f | 5 +- src/FourIdx/four_index_sym_mmap.irp.f | 292 +++++++++++++++++++++ src/FourIdx/four_index_zmq.irp.f.todo | 273 +++++++++++++++++++ src/Integrals_Bielec/mo_bi_integrals.irp.f | 11 +- 9 files changed, 1324 insertions(+), 10 deletions(-) create mode 100644 src/FourIdx/four_index.irp.f create mode 100644 src/FourIdx/four_index_block.irp.f create mode 100644 src/FourIdx/four_index_slave.irp.f.todo create mode 100644 src/FourIdx/four_index_sym_mmap.irp.f create mode 100644 src/FourIdx/four_index_zmq.irp.f.todo diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 2b57545b..24f2f947 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -205,10 +205,10 @@ subroutine davidson_pull_results(zmq_socket_pull, v_t, s_t, imin, imax, task_id) if(rc /= 4) stop "davidson_pull_results failed to pull task_id" rc = f77_zmq_recv( zmq_socket_pull, imin, 4, 0) - if(rc /= 4) stop "davidson_pull_results failed to pull task_id" + if(rc /= 4) stop "davidson_pull_results failed to pull imin" rc = f77_zmq_recv( zmq_socket_pull, imax, 4, 0) - if(rc /= 4) stop "davidson_pull_results failed to pull task_id" + if(rc /= 4) stop "davidson_pull_results failed to pull imax" sz = (imax-imin+1)*N_states_diag diff --git a/src/Determinants/H_apply.irp.f b/src/Determinants/H_apply.irp.f index 26f981dc..1d7a5bd8 100644 --- a/src/Determinants/H_apply.irp.f +++ b/src/Determinants/H_apply.irp.f @@ -193,7 +193,7 @@ subroutine copy_H_apply_buffer_to_wf SOFT_TOUCH N_det psi_det psi_coef logical :: found_duplicates - !call remove_duplicates_in_psi_det(found_duplicates) + call remove_duplicates_in_psi_det(found_duplicates) end subroutine remove_duplicates_in_psi_det(found_duplicates) diff --git a/src/FourIdx/four_index.irp.f b/src/FourIdx/four_index.irp.f new file mode 100644 index 00000000..0c30f55e --- /dev/null +++ b/src/FourIdx/four_index.irp.f @@ -0,0 +1,180 @@ +subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:,:), U(:,:,:), V(:,:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + ! Create a temporary memory-mapped file + integer :: fd + type(c_ptr) :: c_pointer + integer*8, pointer :: a_array(:,:,:) + call mmap(trim(ezfio_filename)//'/work/four_idx', & + (/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, .False., c_pointer) + call c_f_pointer(c_pointer, a_array, (/ 4, (i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1), l_end-l_start+1 /)) + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_a,map_c,matrix_B) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx, & + !$OMP a,b,c,d,tmp) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + + !$OMP DO SCHEDULE(dynamic,4) + do l=l_start,l_end + a = 1 + do j=j_start,j_end + do k=k_start,k_end + do i=i_start,i_end + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,tmp) + if (tmp /= 0.d0) then + a = a+1 + a_array(1,a,l-l_start+1) = i + a_array(2,a,l-l_start+1) = j + a_array(3,a,l-l_start+1) = k + a_array(4,a,l-l_start+1) = transfer(dble(tmp), 1_8) + endif + enddo + enddo + enddo + a_array(1,1,l-l_start+1) = a + print *, l + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + U = 0.d0 + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + print *, d, l + + allocate( T(i_start:i_end, k_start:k_end, j_start:j_end), & + V(a_start:a_end, k_start:k_end, j_start:j_end) ) + + T = 0.d0 + do a=2,a_array(1,1,l-l_start+1) + i = a_array(1,a,l-l_start+1) + j = a_array(2,a,l-l_start+1) + k = a_array(3,a,l-l_start+1) + T(i, k,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) + enddo + + call DGEMM('T','N', (a_end-a_start+1), & + (k_end-k_start+1)*(j_end-j_start+1), & + (i_end-i_start+1), 1.d0, & + matrix_B(i_start,a_start), size(matrix_B,1), & + T(i_start,k_start,j_start), size(T,1), 0.d0, & + V(a_start,k_start,j_start), size(V, 1) ) + + deallocate(T) + allocate( T(a_start:a_end, k_start:k_end, b_start:d) ) + + call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), & + (b_end-b_start+1), & + (j_end-j_start+1), 1.d0, & + V(a_start,k_start,j_start), size(V,1)*size(V,2), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + T(a_start,k_start,b_start), size(T,1)*size(T,2) ) + + deallocate(V) + + do b=b_start,b_end + call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(a_start,k_start,b), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) + enddo + + deallocate(T) + + enddo + + idx = 0_8 + do b=b_start,b_end + do c=c_start,c_end + do a=a_start,a_end + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call map_append(map_c, key, value, idx) + call map_sort(map_c) + !$OMP END CRITICAL + + + enddo + !$OMP END DO + + deallocate(key,value) + !$OMP END PARALLEL + + call munmap( & + (/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, c_pointer) + +end diff --git a/src/FourIdx/four_index_block.irp.f b/src/FourIdx/four_index_block.irp.f new file mode 100644 index 00000000..dce5fcc9 --- /dev/null +++ b/src/FourIdx/four_index_block.irp.f @@ -0,0 +1,288 @@ +subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l, ik, ll + integer :: l_start_block, l_end_block, l_block + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer*8 :: ii + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + integer*4, allocatable :: a_array_ik(:) + integer*2, allocatable :: a_array_j(:) + double precision, allocatable :: a_array_value(:) + + integer*8 :: new_size + new_size = max(1024_8, 5_8 * map_a % n_elements ) + + allocate(a_array_ik(new_size), a_array_j(new_size), a_array_value(new_size)) + + integer :: ipass, npass + integer*8 :: tempspace + + tempspace = (new_size * 14_8) / (1024_8 * 1024_8) + npass = min(l_end-l_start,1 + tempspace / 2048) ! 2 GiB of scratch space + l_block = (l_end-l_start)/npass + + ipass = 0 + do l_start_block = l_start, l_end, l_block + ipass = ipass+1 + print *, 'Pass ', ipass + l_end_block = min(l_end, l_start_block+l_block-1) + + allocate(l_pointer(l_start_block:l_end_block+1), value((i_max*k_max)) ) + ii = 1_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + do l=l_start_block,l_end_block + !$OMP SINGLE + l_pointer(l) = ii + !$OMP END SINGLE + do j=j_start,j_end + !$OMP DO SCHEDULE(static,1) + do k=k_start,k_end + do i=i_start,k + ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,value(ik)) + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + tmp=value(ik) + if (tmp /= 0.d0) then + a_array_ik(ii) = ik + a_array_j(ii) = j + a_array_value(ii) = tmp + ii=ii+1_8 + endif + enddo + enddo + !$OMP END SINGLE + enddo + enddo + !$OMP SINGLE + a_array_ik(ii) = 0 + a_array_j(ii) = 0 + a_array_value(ii) = 0.d0 + l_pointer(l_end_block+1) = ii + !$OMP END SINGLE + !$OMP END PARALLEL + deallocate(value) + + !INPUT DATA + !open(unit=10,file='INPUT',form='UNFORMATTED') + !write(10) i_start, j_start, i_end, j_end + !write(10) a_start, b_start, a_end, b_end + !write(10) LDB, mo_tot_num + !write(10) matrix_B(1:LDB,1:mo_tot_num) + !idx=size(a_array) + !write(10) idx + !write(10) a_array + !write(10) l_pointer + !close(10) + !open(unit=10,file='OUTPUT',form='FORMATTED') + ! END INPUT DATA + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start_block,l_end_block,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_c,matrix_B,l_pointer) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & + !$OMP a,b,c,d,tmp,T2d,V2d,ii) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + + + allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & + V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & + V(i_start:i_end, k_start:k_end), & + T(k_start:k_end, a_start:a_end)) + + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + U = 0.d0 + do l=l_start_block,l_end_block + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + + ii=l_pointer(l) + do j=j_start,j_end + !DIR$ VECTOR NONTEMPORAL + T2d(:,j) = 0.d0 + !DIR$ IVDEP + do while (j == a_array_j(ii)) + T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0) + ii = ii + 1_8 + enddo + enddo + + call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + T2d(1,j_start), size(T2d,1), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + V2d(1,b_start), size(V2d,1) ) + + do b=b_start,d + ik = 0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + V(i,k) = V2d(ik,b) + enddo + enddo + + ! T = 0.d0 + ! do a=a_start,b + ! do k=k_start,k_end + ! do i=i_start,k + ! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a) + ! enddo + ! do i=k+1,i_end + ! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) + ! enddo + ! enddo + ! enddo + call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), & + 1.d0, & + V(i_start,k_start), size(V,1), & + matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & + T(k_start,a_start), size(T,1) ) + + ! do c=c_start,b + ! do a=a_start,c + ! do k=k_start,k_end + ! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) + ! enddo + ! enddo + ! enddo + call DGEMM('T','N', (b-a_start+1), (b-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) + ! do c=b+1,c_end + ! do a=a_start,b + ! do k=k_start,k_end + ! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) + ! enddo + ! enddo + ! enddo + if (b < b_end) then + call DGEMM('T','N', (b-a_start+1), (c_end-b), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, & + U(a_start,b+1,b), size(U,1) ) + endif + enddo + + enddo + + idx = 0_8 + do b=b_start,d + do c=c_start,c_end + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call map_update(map_c, key, value, idx,1.d-15) + !$OMP END CRITICAL + + !WRITE OUTPUT + ! OMP CRITICAL + !print *, d + !do b=b_start,d + ! do c=c_start,c_end + ! do a=a_start,min(b,c) + ! if (dabs(U(a,c,b)) < 1.d-15) then + ! cycle + ! endif + ! write(10,*) d,c,b,a,U(a,c,b) + ! enddo + ! enddo + !enddo + ! OMP END CRITICAL + !END WRITE OUTPUT + + + enddo + !$OMP END DO + + deallocate(key,value,V,T) + !$OMP END PARALLEL + call map_merge(map_c) + + deallocate(l_pointer) + enddo + deallocate(a_array_ik,a_array_j,a_array_value) + +end diff --git a/src/FourIdx/four_index_slave.irp.f.todo b/src/FourIdx/four_index_slave.irp.f.todo new file mode 100644 index 00000000..47124823 --- /dev/null +++ b/src/FourIdx/four_index_slave.irp.f.todo @@ -0,0 +1,279 @@ +subroutine four_index_transform_slave(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end, task_id, thread ) + implicit none + use f77_zmq + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + integer, intent(in) :: task_id, thread + + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l, ik, ll + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer*8 :: ii + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + integer*4, allocatable :: a_array_ik(:) + integer*2, allocatable :: a_array_j(:) + double precision, allocatable :: a_array_value(:) + + integer*8 :: new_size + new_size = max(1024_8, 5_8 * map_a % n_elements ) + + allocate(a_array_ik(new_size), a_array_j(new_size), a_array_value(new_size)) + + + allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) ) + ii = 1_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + do l=l_start,l_end + !$OMP SINGLE + l_pointer(l) = ii + !$OMP END SINGLE + do j=j_start,j_end + !$OMP DO SCHEDULE(static,1) + do k=k_start,k_end + do i=i_start,k + ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,value(ik)) + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + tmp=value(ik) + if (tmp /= 0.d0) then + a_array_ik(ii) = ik + a_array_j(ii) = j + a_array_value(ii) = tmp + ii=ii+1_8 + endif + enddo + enddo + !$OMP END SINGLE + enddo + enddo + !$OMP SINGLE + a_array_ik(ii) = 0 + a_array_j(ii) = 0 + a_array_value(ii) = 0.d0 + l_pointer(l_end+1) = ii + !$OMP END SINGLE + !$OMP END PARALLEL + deallocate(value) + +!INPUT DATA +!open(unit=10,file='INPUT',form='UNFORMATTED') +!write(10) i_start, j_start, i_end, j_end +!write(10) a_start, b_start, a_end, b_end +!write(10) LDB, mo_tot_num +!write(10) matrix_B(1:LDB,1:mo_tot_num) +!idx=size(a_array) +!write(10) idx +!write(10) a_array +!write(10) l_pointer +!close(10) +!open(unit=10,file='OUTPUT',form='FORMATTED') +! END INPUT DATA + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_c,matrix_B,l_pointer) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & + !$OMP a,b,c,d,tmp,T2d,V2d,ii) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + integer(ZMQ_PTR) :: zmq_socket_push + zmq_socket_push = new_zmq_push_socket(thread) + + + + allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & + V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & + V(i_start:i_end, k_start:k_end), & + T(k_start:k_end, a_start:a_end)) + + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + U = 0.d0 + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + + ii=l_pointer(l) + do j=j_start,j_end + !DIR$ VECTOR NONTEMPORAL + T2d(:,j) = 0.d0 + !DIR$ IVDEP + do while (j == a_array_j(ii)) + T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0) + ii = ii + 1_8 + enddo + enddo + + call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + T2d(1,j_start), size(T2d,1), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + V2d(1,b_start), size(V2d,1) ) + + do b=b_start,d + ik = 0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + V(i,k) = V2d(ik,b) + enddo + enddo + +! T = 0.d0 +! do a=a_start,b +! do k=k_start,k_end +! do i=i_start,k +! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a) +! enddo +! do i=k+1,i_end +! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) +! enddo +! enddo +! enddo + call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), & + 1.d0, & + V(i_start,k_start), size(V,1), & + matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & + T(k_start,a_start), size(T,1) ) + +! do c=c_start,b +! do a=a_start,c +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + call DGEMM('T','N', (b-a_start+1), (b-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) +! do c=b+1,c_end +! do a=a_start,b +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + if (b < b_end) then + call DGEMM('T','N', (b-a_start+1), (c_end-b), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, & + U(a_start,b+1,b), size(U,1) ) + endif + enddo + + enddo + + idx = 0_8 + do b=b_start,d + do c=c_start,c_end + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call four_idx_push_results(zmq_socket_push, key, value, idx, task_id) + !$OMP END CRITICAL + +!WRITE OUTPUT +! OMP CRITICAL +!print *, d +!do b=b_start,d +! do c=c_start,c_end +! do a=a_start,min(b,c) +! if (dabs(U(a,c,b)) < 1.d-15) then +! cycle +! endif +! write(10,*) d,c,b,a,U(a,c,b) +! enddo +! enddo +!enddo +! OMP END CRITICAL +!END WRITE OUTPUT + + + enddo + !$OMP END DO + call end_zmq_push_socket(zmq_socket_push,thread) + deallocate(key,value,V,T) + !$OMP END PARALLEL + call map_merge(map_c) + + deallocate(l_pointer) + deallocate(a_array_ik,a_array_j,a_array_value) + +end diff --git a/src/FourIdx/four_index_sym.irp.f b/src/FourIdx/four_index_sym.irp.f index 14a095ca..79c8d1d3 100644 --- a/src/FourIdx/four_index_sym.irp.f +++ b/src/FourIdx/four_index_sym.irp.f @@ -173,6 +173,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & ii = ii + 1_8 enddo enddo + call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& (d-b_start+1), & (j_end-j_start+1), 1.d0, & @@ -251,7 +252,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & enddo !$OMP CRITICAL - call map_append(map_c, key, value, idx) + call map_update(map_c, key, value, idx,1.d-15) !$OMP END CRITICAL !WRITE OUTPUT @@ -276,7 +277,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & deallocate(key,value,V,T) !$OMP END PARALLEL - call map_sort(map_c) + call map_merge(map_c) call munmap( (/ new_size /), 4, fd(1), c_pointer(1)) open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_ik') diff --git a/src/FourIdx/four_index_sym_mmap.irp.f b/src/FourIdx/four_index_sym_mmap.irp.f new file mode 100644 index 00000000..877daf30 --- /dev/null +++ b/src/FourIdx/four_index_sym_mmap.irp.f @@ -0,0 +1,292 @@ +subroutine four_index_transform_sym_mmap(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l, ik, ll + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer*8 :: ii + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + ! Create a temporary memory-mapped file + integer :: fd(3) + type(c_ptr) :: c_pointer(3) + integer*4, pointer :: a_array_ik(:) + integer*2, pointer :: a_array_j(:) + double precision, pointer :: a_array_value(:) + + integer*8 :: new_size + new_size = max(1024_8, 5_8 * map_a % n_elements ) + + call mmap(trim(ezfio_filename)//'/work/four_idx_ik', (/ new_size /), 4, fd(1), .False., c_pointer(1)) + call c_f_pointer(c_pointer(1), a_array_ik, (/ new_size /)) + + call mmap(trim(ezfio_filename)//'/work/four_idx_j', (/ new_size /), 2, fd(2), .False., c_pointer(2)) + call c_f_pointer(c_pointer(2), a_array_j, (/ new_size /)) + + call mmap(trim(ezfio_filename)//'/work/four_idx_value', (/ new_size /), 8, fd(3), .False., c_pointer(3)) + call c_f_pointer(c_pointer(3), a_array_value, (/ new_size /)) + + print *, 'Transforming MO integrals' + allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) ) + ii = 1_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + do l=l_start,l_end + !$OMP SINGLE + l_pointer(l) = ii + !$OMP END SINGLE + do j=j_start,j_end + !$OMP DO SCHEDULE(static,1) + do k=k_start,k_end + do i=i_start,k + ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,value(ik)) + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + tmp=value(ik) + if (tmp /= 0.d0) then + a_array_ik(ii) = ik + a_array_j(ii) = j + a_array_value(ii) = tmp + ii=ii+1_8 + endif + enddo + enddo + !$OMP END SINGLE + enddo + enddo + !$OMP SINGLE + a_array_ik(ii) = 0 + a_array_j(ii) = 0 + a_array_value(ii) = 0.d0 + l_pointer(l_end+1) = ii + !$OMP END SINGLE + !$OMP END PARALLEL + deallocate(value) + +!INPUT DATA +!open(unit=10,file='INPUT',form='UNFORMATTED') +!write(10) i_start, j_start, i_end, j_end +!write(10) a_start, b_start, a_end, b_end +!write(10) LDB, mo_tot_num +!write(10) matrix_B(1:LDB,1:mo_tot_num) +!idx=size(a_array) +!write(10) idx +!write(10) a_array +!write(10) l_pointer +!close(10) +!open(unit=10,file='OUTPUT',form='FORMATTED') +! END INPUT DATA + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value,c_pointer,fd, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_c,matrix_B,l_pointer) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & + !$OMP a,b,c,d,tmp,T2d,V2d,ii) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + + + allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & + V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & + V(i_start:i_end, k_start:k_end), & + T(k_start:k_end, a_start:a_end)) + + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + print *, d, '/', d_end + U = 0.d0 + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + + ii=l_pointer(l) + do j=j_start,j_end + !DIR$ VECTOR NONTEMPORAL + T2d(:,j) = 0.d0 + !DIR$ IVDEP + do while (j == a_array_j(ii)) + T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0) + ii = ii + 1_8 + enddo + enddo + call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + T2d(1,j_start), size(T2d,1), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + V2d(1,b_start), size(V2d,1) ) + + do b=b_start,d + ik = 0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + V(i,k) = V2d(ik,b) + enddo + enddo + +! T = 0.d0 +! do a=a_start,b +! do k=k_start,k_end +! do i=i_start,k +! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a) +! enddo +! do i=k+1,i_end +! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) +! enddo +! enddo +! enddo + call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), & + 1.d0, & + V(i_start,k_start), size(V,1), & + matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & + T(k_start,a_start), size(T,1) ) + +! do c=c_start,b +! do a=a_start,c +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + call DGEMM('T','N', (b-a_start+1), (b-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) +! do c=b+1,c_end +! do a=a_start,b +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + if (b < b_end) then + call DGEMM('T','N', (b-a_start+1), (c_end-b), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, & + U(a_start,b+1,b), size(U,1) ) + endif + enddo + + enddo + + idx = 0_8 + do b=b_start,d + do c=c_start,c_end + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call map_append(map_c, key, value, idx) + !$OMP END CRITICAL + +!WRITE OUTPUT +! OMP CRITICAL +!print *, d +!do b=b_start,d +! do c=c_start,c_end +! do a=a_start,min(b,c) +! if (dabs(U(a,c,b)) < 1.d-15) then +! cycle +! endif +! write(10,*) d,c,b,a,U(a,c,b) +! enddo +! enddo +!enddo +! OMP END CRITICAL +!END WRITE OUTPUT + + + enddo + !$OMP END DO + + deallocate(key,value,V,T) + !$OMP END PARALLEL + call map_sort(map_c) + + call munmap( (/ new_size /), 4, fd(1), c_pointer(1)) + open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_ik') + close(10,status='DELETE') + call munmap( (/ new_size /), 2, fd(2), c_pointer(2)) + open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_j') + close(10,status='DELETE') + call munmap( (/ new_size /), 8, fd(3), c_pointer(3)) + open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_value') + close(10,status='DELETE') + deallocate(l_pointer) + +end diff --git a/src/FourIdx/four_index_zmq.irp.f.todo b/src/FourIdx/four_index_zmq.irp.f.todo new file mode 100644 index 00000000..b2f639a7 --- /dev/null +++ b/src/FourIdx/four_index_zmq.irp.f.todo @@ -0,0 +1,273 @@ +subroutine four_index_transform_zmq(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use f77_zmq + use map_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l, ik, ll + integer :: l_start_block, l_end_block, l_block + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer*8 :: ii + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + call new_parallel_job(zmq_to_qp_run_socket,'four_idx') + + integer*8 :: new_size + new_size = max(1024_8, 5_8 * map_a % n_elements ) + + integer :: npass + integer*8 :: tempspace + + tempspace = (new_size * 14_8) / (1024_8 * 1024_8) + npass = min(l_end-l_start,1 + tempspace / 2048) ! 2 GiB of scratch space + l_block = (l_end-l_start)/npass + + ! Create tasks + ! ============ + + character(len=64), allocatable :: task + + do l_start_block = l_start, l_end, l_block + l_end_block = min(l_end, l_start_block+l_block-1) + write(task,'I10,X,I10') l_start_block, l_end_block + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) + enddo + + call zmq_set_running(zmq_to_qp_run_socket) + + PROVIDE nproc + + call omp_set_nested(.True.) + integer :: ithread + !$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread) + ithread = omp_get_thread_num() + if (ithread==0) then + call four_idx_collector(zmq_to_qp_run_socket,map_c) + else + !TODO : Put strings of map_a and matrix_b on server and broadcast + call four_index_transform_slave_inproc(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start_block, & + i_end , j_end , k_end , l_end_block , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end, 1 ) + endif + !$OMP END PARALLEL + + call end_parallel_job(zmq_to_qp_run_socket, 'four_idx') + + +end + + +subroutine four_idx_slave_work(zmq_to_qp_run_socket, worker_id) + use f77_zmq + implicit none + + integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket + integer,intent(in) :: worker_id + integer :: task_id + character*(512) :: msg + + integer :: i_start, j_start, k_start, l_start_block + integer :: i_end , j_end , k_end , l_end_block + integer :: a_start, b_start, c_start, d_start + integer :: a_end , b_end , c_end , d_end + + !TODO : get map_a and matrix_B from server + do + call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg) + if(task_id == 0) exit + read (msg,*) LDB, & + i_start, j_start, k_start, l_start_block, & + i_end , j_end , k_end , l_end_block , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end + + call four_index_transform_slave(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start_block, & + i_end , j_end , k_end , l_end_block , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end, zmq_to_qp_run_socket, & + task_id) + call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) + + enddo +end + + +BEGIN_PROVIDER [ integer, nthreads_four_idx ] + implicit none + BEGIN_DOC + ! Number of threads for 4-index transformation + END_DOC + nthreads_four_idx = nproc + character*(32) :: env + call getenv('NTHREADS_FOUR_IDX',env) + if (trim(env) /= '') then + read(env,*) nthreads_four_idx + endif + call write_int(6,nthreads_davidson,'Number of threads for 4-index transformation') +END_PROVIDER + + + +subroutine four_idx_collector(zmq_to_qp_run_socket,map_c) + use f77_zmq + use map_module + implicit none + type(map_type), intent(inout) :: map_c + + integer :: more + integer(ZMQ_PTR), external :: new_zmq_pull_socket + integer(ZMQ_PTR) :: zmq_socket_pull + + + more = 1 + zmq_socket_pull = new_zmq_pull_socket() + + do while (more == 1) + call four_idx_pull_results(zmq_socket_pull, map_c, task_id) + call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) + enddo + + call end_zmq_pull_socket(zmq_socket_pull) + +end + + +subroutine four_idx_pull_results(zmq_socket_pull, map_c, task_id) + use f77_zmq + use map_module + implicit none + type(map_type), intent(inout) :: map_c + integer(ZMQ_PTR), intent(inout) :: zmq_socket_pull + + integer, intent(out) :: task_id + + integer :: rc, sze + integer*8 :: rc8 + + + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) + if(rc /= 4) stop "four_idx_pull_results failed to pull task_id" + + rc = f77_zmq_recv( zmq_socket_pull, sze, 4, 0) + if(rc /= 4) stop "four_idx_pull_results failed to pull sze" + + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + + allocate(key(sze), value(sze)) + + rc8 = f77_zmq_recv8( zmq_socket_pull, key, key_kind*sze, 0) + if(rc8 /= key_kind*sze) stop "four_idx_pull_results failed to pull key" + + rc8 = f77_zmq_recv8( zmq_socket_pull, value, integral_kind*sze, 0) + if(rc8 /= integral_kind*sze) stop "four_idx_pull_results failed to pull value" + +! Activate if zmq_socket_pull is a REP +IRP_IF ZMQ_PUSH +IRP_ELSE + rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0) + if (rc /= 4) then + print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...' + stop 'error' + endif +IRP_ENDIF + + call map_update(map_c, key, value, sze, 1.d-15) ! TODO : threshold + + deallocate(key, value) +end + + + +subroutine four_idx_push_results(zmq_socket_push, key, value, sze, task_id) + use f77_zmq + use map_module + implicit none + integer, intent(in) :: sze + integer(key_kind), intent(in) :: key(sze) + real(integral_kind), intent(in) :: value(sze) + integer(ZMQ_PTR), intent(in) :: zmq_socket_push + integer, intent(in) :: task_id + + integer :: rc, sze + integer*8 :: rc8 + + + rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "four_idx_push_results failed to push task_id" + + rc = f77_zmq_send( zmq_socket_push, sze, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "four_idx_push_results failed to push sze" + + rc8 = f77_zmq_send8( zmq_socket_push, key, key_kind*sze, ZMQ_SNDMORE) + if(rc8 /= key_kind*sze) stop "four_idx_push_results failed to push key" + + rc8 = f77_zmq_send8( zmq_socket_push, value, integral_kind*sze, 0) + if(rc8 /= integral_kind*sze) stop "four_idx_push_results failed to push value" + +! Activate if zmq_socket_push is a REP +IRP_IF ZMQ_PUSH +IRP_ELSE + rc = f77_zmq_send( zmq_socket_push, 0, 4, 0) + if (rc /= 4) then + print *, irp_here, ' : f77_zmq_send (zmq_socket_push,...' + stop 'error' + endif +IRP_ENDIF + +end + + diff --git a/src/Integrals_Bielec/mo_bi_integrals.irp.f b/src/Integrals_Bielec/mo_bi_integrals.irp.f index 22799923..2fc77219 100644 --- a/src/Integrals_Bielec/mo_bi_integrals.irp.f +++ b/src/Integrals_Bielec/mo_bi_integrals.irp.f @@ -118,11 +118,12 @@ BEGIN_PROVIDER [ logical, mo_bielec_integrals_in_map ] else ! call add_integrals_to_map(full_ijkl_bitmask_4) - call four_index_transform_sym(ao_integrals_map,mo_integrals_map, & - mo_coef, size(mo_coef,1), & - 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & - 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) - + + call four_index_transform_block(ao_integrals_map,mo_integrals_map, & + mo_coef, size(mo_coef,1), & + 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & + 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) + integer*8 :: get_mo_map_size, mo_map_size mo_map_size = get_mo_map_size()