diff --git a/plugins/FourIdx/four_index_sym.irp.f b/plugins/FourIdx/four_index_sym.irp.f index 597395a6..ffab74e5 100644 --- a/plugins/FourIdx/four_index_sym.irp.f +++ b/plugins/FourIdx/four_index_sym.irp.f @@ -31,6 +31,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & real(integral_kind) :: tmp integer(key_kind), allocatable :: key(:) real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) ASSERT (k_start == i_start) ASSERT (l_start == j_start) @@ -58,48 +59,71 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & ! Create a temporary memory-mapped file integer :: fd type(c_ptr) :: c_pointer - integer*8, pointer :: a_array(:,:) + integer*8, pointer :: a_array(:) call mmap(trim(ezfio_filename)//'/work/four_idx', & - (/ (int(i_end-i_start+1,8)*int(j_end-j_start+2,8)*int(k_end-k_start+1,8)),int(l_end-l_start+1,8) /), 16, fd, .False., c_pointer) - call c_f_pointer(c_pointer, a_array, (/ ((i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1)*3_8)/2_8, l_end-l_start+1_8 /)) + (/ 12_8 * map_a % n_elements /), 8, fd, .False., c_pointer) + call c_f_pointer(c_pointer, a_array, (/ 12_8 * map_a % n_elements /)) + + allocate(l_pointer(l_start:l_end+1)) + +! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(static,137) +! do i=1,size(a_array) +! a_array(i) = 0.d0 +! enddo +! !$OMP END PARALLEL DO + + allocate( value((i_max*k_max)) ) + a = 1 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + do l=l_start,l_end + !$OMP SINGLE + l_pointer(l) = a + !$OMP END SINGLE + do j=j_start,j_end + !$OMP DO SCHEDULE(static,1) + do k=k_start,k_end + do i=i_start,k + ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,value(ik)) + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + tmp=value(ik) + if (tmp /= 0.d0) then + a_array(a) = ik + a = a+1 + a_array(a) = j + a = a+1 + a_array(a) = transfer(dble(tmp), 1_8) + a = a+1 + endif + enddo + enddo + !$OMP END SINGLE + enddo + enddo + !$OMP END PARALLEL + l_pointer(l_end+1) = a + deallocate(value) !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & - !$OMP map_a,map_c,matrix_B) & + !$OMP map_a,map_c,matrix_B,l_pointer) & !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & !$OMP a,b,c,d,tmp,T2d,V2d) allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) - !$OMP DO SCHEDULE(dynamic,4) - do l=l_start,l_end - a = 1 - ll = l-l_start+1 - do j=j_start,j_end - ik=0 - do k=k_start,k_end - do i=i_start,k - ik = ik+1 - call bielec_integrals_index(i,j,k,l,idx) - call map_get(map_a,idx,tmp) - if (tmp /= 0.d0) then - a = a+1 - a_array(a,ll) = ik - a = a+1 - a_array(a,ll) = j - a = a+1 - a_array(a,ll) = transfer(dble(tmp), 1_8) - endif - enddo - enddo - enddo - a_array(a+1,ll) = 0 - a_array(1,ll) = a - enddo - !$OMP END DO allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & @@ -115,24 +139,17 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & cycle endif - ll = l-l_start+1 -! T2d = 0.d0 -! do a=2,a_array(1,ll),3 -! ik = a_array(a,ll) -! j = a_array(a+1,ll) -! T2d(ik,j) = transfer(a_array(a+2,ll), 1.d0) -! enddo - - a=2 + a=l_pointer(l) do j=j_start,j_end ik=0 do k=k_start,k_end do i=i_start,k ik = ik+1 - if ( (ik /= a_array(a,ll)).or.(j /= a_array(a+1,ll)) ) then + if ( (ik /= a_array(a)).or.(j /= a_array(a+1)) & + .or.(a >= l_pointer(l+1)) ) then T2d(ik,j) = 0.d0 else - T2d(ik,j) = transfer(a_array(a+2,ll), 1.d0) + T2d(ik,j) = transfer(a_array(a+2), 1.d0) a=a+3 endif enddo @@ -228,6 +245,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & call map_sort(map_c) call munmap( & - (/ (int(i_end-i_start+1,8)*int(j_end-j_start+2,8)*int(k_end-k_start+1,8)),int(l_end-l_start+1,8) /), 16, fd, c_pointer) + (/ 12_8 * map_a % n_elements /), 8, fd, c_pointer) + deallocate(l_pointer) end