From 56609e4b3cffe2b8c8312112bba97e8b42aff553 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 28 Sep 2017 01:10:44 +0200 Subject: [PATCH] Almost as fast --- plugins/FourIdx/four_index_sym.irp.f | 74 +++++++++++++++------------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/plugins/FourIdx/four_index_sym.irp.f b/plugins/FourIdx/four_index_sym.irp.f index 7b0b2c3a..597395a6 100644 --- a/plugins/FourIdx/four_index_sym.irp.f +++ b/plugins/FourIdx/four_index_sym.irp.f @@ -24,7 +24,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & double precision, allocatable :: T2d(:,:), V2d(:,:) integer :: i_max, j_max, k_max, l_max integer :: i_min, j_min, k_min, l_min - integer :: i, j, k, l, ik + integer :: i, j, k, l, ik, ll integer :: a, b, c, d double precision, external :: get_ao_bielec_integral integer(key_kind) :: idx @@ -58,18 +58,17 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & ! Create a temporary memory-mapped file integer :: fd type(c_ptr) :: c_pointer - integer*8, pointer :: a_array(:,:,:) + integer*8, pointer :: a_array(:,:) call mmap(trim(ezfio_filename)//'/work/four_idx', & - (/ 2_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, .False., c_pointer) - call c_f_pointer(c_pointer, a_array, (/ 4, (i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1)/2, l_end-l_start+1 /)) - + (/ (int(i_end-i_start+1,8)*int(j_end-j_start+2,8)*int(k_end-k_start+1,8)),int(l_end-l_start+1,8) /), 16, fd, .False., c_pointer) + call c_f_pointer(c_pointer, a_array, (/ ((i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1)*3_8)/2_8, l_end-l_start+1_8 /)) !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & !$OMP map_a,map_c,matrix_B) & - !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik, & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & !$OMP a,b,c,d,tmp,T2d,V2d) allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) @@ -78,30 +77,34 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & !$OMP DO SCHEDULE(dynamic,4) do l=l_start,l_end a = 1 + ll = l-l_start+1 do j=j_start,j_end + ik=0 do k=k_start,k_end do i=i_start,k + ik = ik+1 call bielec_integrals_index(i,j,k,l,idx) call map_get(map_a,idx,tmp) if (tmp /= 0.d0) then a = a+1 - a_array(1,a,l-l_start+1) = i - a_array(2,a,l-l_start+1) = j - a_array(3,a,l-l_start+1) = k - a_array(4,a,l-l_start+1) = transfer(dble(tmp), 1_8) + a_array(a,ll) = ik + a = a+1 + a_array(a,ll) = j + a = a+1 + a_array(a,ll) = transfer(dble(tmp), 1_8) endif enddo enddo enddo - a_array(1,1,l-l_start+1) = a - print *, l + a_array(a+1,ll) = 0 + a_array(1,ll) = a enddo !$OMP END DO allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & V(i_start:i_end, k_start:k_end), & - T(k_start:k_end, a_start:a_end) ) + T(k_start:k_end, a_start:a_end)) !$OMP DO SCHEDULE(dynamic) @@ -111,35 +114,38 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & if (dabs(matrix_B(l,d)) < 1.d-10) then cycle endif - print *, d, l - T2d = 0.d0 - do a=2,a_array(1,1,l-l_start+1) - i = a_array(1,a,l-l_start+1) - j = a_array(2,a,l-l_start+1) - k = a_array(3,a,l-l_start+1) - ik = (i-i_start+1) + ishft( (k-k_start+1)*(k-k_start), -1) - T2d(ik,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) - enddo - -! V2d = 0.d0 -! do b=b_start,d -! do j=j_start,j_end -! do ik=1, ishft( (i_end-i_start+1)*(i_end-i_start+2), -1) -! V2d(ik,b) = V2d(ik,b) + T2d(ik,j)*matrix_B(j,b) -! enddo -! enddo + ll = l-l_start+1 +! T2d = 0.d0 +! do a=2,a_array(1,ll),3 +! ik = a_array(a,ll) +! j = a_array(a+1,ll) +! T2d(ik,j) = transfer(a_array(a+2,ll), 1.d0) ! enddo + + a=2 + do j=j_start,j_end + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + if ( (ik /= a_array(a,ll)).or.(j /= a_array(a+1,ll)) ) then + T2d(ik,j) = 0.d0 + else + T2d(ik,j) = transfer(a_array(a+2,ll), 1.d0) + a=a+3 + endif + enddo + enddo + enddo call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& (d-b_start+1), & (j_end-j_start+1), 1.d0, & T2d(1,j_start), size(T2d,1), & matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & V2d(1,b_start), size(V2d,1) ) - do b=b_start,d - V(:,:) = 0.d0 ik = 0 do k=k_start,k_end do i=i_start,k @@ -211,7 +217,6 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & !$OMP CRITICAL call map_append(map_c, key, value, idx) - call map_sort(map_c) !$OMP END CRITICAL @@ -220,8 +225,9 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & deallocate(key,value,V,T) !$OMP END PARALLEL + call map_sort(map_c) call munmap( & - (/ 2_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, c_pointer) + (/ (int(i_end-i_start+1,8)*int(j_end-j_start+2,8)*int(k_end-k_start+1,8)),int(l_end-l_start+1,8) /), 16, fd, c_pointer) end