From ccd4e67357c3cfdb367b34c514bf0bc53c9aeaea Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 27 Sep 2017 15:09:00 +0200 Subject: [PATCH] Symmetric transformation OK --- plugins/FourIdx/four_idx.irp.f | 10 +- plugins/FourIdx/four_index.irp.f | 82 ++++++++---- plugins/FourIdx/four_index_sym.irp.f | 191 +++++++++++++++++++++++++++ 3 files changed, 250 insertions(+), 33 deletions(-) create mode 100644 plugins/FourIdx/four_index_sym.irp.f diff --git a/plugins/FourIdx/four_idx.irp.f b/plugins/FourIdx/four_idx.irp.f index f3388b83..5a874ee6 100644 --- a/plugins/FourIdx/four_idx.irp.f +++ b/plugins/FourIdx/four_idx.irp.f @@ -13,16 +13,16 @@ program FourIdx sze = key_max call map_init(test_map,sze) - call four_index_transform(ao_integrals_map,test_map, & - mo_coef, size(mo_coef,1), & - 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & - 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) - ! call four_index_transform(ao_integrals_map,test_map, & ! mo_coef, size(mo_coef,1), & ! 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & ! 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) + call four_index_transform_sym(ao_integrals_map,test_map, & + mo_coef, size(mo_coef,1), & + 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & + 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) + integer :: i,j,k,l real(integral_kind) :: integral1, integral2 diff --git a/plugins/FourIdx/four_index.irp.f b/plugins/FourIdx/four_index.irp.f index 261b7d12..8f6c02ed 100644 --- a/plugins/FourIdx/four_index.irp.f +++ b/plugins/FourIdx/four_index.irp.f @@ -5,6 +5,7 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & a_end , b_end , c_end , d_end ) implicit none use map_module + use mmap_module BEGIN_DOC ! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) ! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} @@ -53,18 +54,53 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & ASSERT (LDB >= k_max) ASSERT (LDB >= l_max) - !$OMP PARALLEL DEFAULT(PRIVATE) SHARED( & + ! Create a temporary memory-mapped file + integer :: fd + type(c_ptr) :: c_pointer + integer*8, pointer :: a_array(:,:,:) + call mmap(trim(ezfio_filename)//'/work/four_idx', & + (/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, .False., c_pointer) + call c_f_pointer(c_pointer, a_array, (/ 4, (i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1), l_end-l_start+1 /)) + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & - !$OMP map_a,map_c,matrix_B) + !$OMP map_a,map_c,matrix_B) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx, & + !$OMP a,b,c,d,tmp) allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) - !$OMP DO SCHEDULE(static,1) + + !$OMP DO SCHEDULE(dynamic,4) + do l=l_start,l_end + a = 1 + do j=j_start,j_end + do k=k_start,k_end + do i=i_start,i_end + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,tmp) + if (tmp /= 0.d0) then + a = a+1 + a_array(1,a,l-l_start+1) = i + a_array(2,a,l-l_start+1) = j + a_array(3,a,l-l_start+1) = k + a_array(4,a,l-l_start+1) = transfer(dble(tmp), 1_8) + endif + enddo + enddo + enddo + a_array(1,1,l-l_start+1) = a + print *, l + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(dynamic) do d=d_start,d_end U = 0.d0 - do l=1,l_end + do l=l_start,l_end if (dabs(matrix_B(l,d)) < 1.d-10) then cycle endif @@ -73,21 +109,12 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & allocate( T(i_start:i_end, k_start:k_end, j_start:j_end), & V(a_start:a_end, k_start:k_end, j_start:j_end) ) - do k=k_start,k_end - do j=j_start,j_end - do i=i_start,k - call bielec_integrals_index(i,j,k,l,idx) - call map_get(map_a,idx,tmp) - T(i, k,j) = tmp - enddo - enddo - enddo - do j=j_start,j_end - do k=k_start,k_end - do i=k+1,i_end - T(i, k,j) = T(k, i,j) - enddo - enddo + T = 0.d0 + do a=2,a_array(1,1,l-l_start+1) + i = a_array(1,a,l-l_start+1) + j = a_array(2,a,l-l_start+1) + k = a_array(3,a,l-l_start+1) + T(i, k,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) enddo call DGEMM('T','N', (a_end-a_start+1), & @@ -100,9 +127,8 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & deallocate(T) allocate( T(a_start:a_end, k_start:k_end, b_start:d) ) - ! a=a_start,b call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), & - (d-b_start+1), & + (b_end-b_start+1), & (j_end-j_start+1), 1.d0, & V(a_start,k_start,j_start), size(V,1)*size(V,2), & matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & @@ -110,8 +136,8 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & deallocate(V) - do b=b_start,d - call DGEMM('N','N', (b-a_start+1), (c_end-c_start+1), & + do b=b_start,b_end + call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), & (k_end-k_start+1), matrix_B(l, d), & T(a_start,k_start,b), size(T,1), & matrix_B(k_start,k_start), size(matrix_B,1), 1.d0, & @@ -126,9 +152,6 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & do b=b_start,b_end do c=c_start,c_end do a=a_start,a_end -! if (a>b) cycle -! if (a>c) cycle -! if (b>d) cycle if (dabs(U(a,c,b)) < 1.d-15) then cycle endif @@ -138,17 +161,20 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & enddo enddo enddo + !$OMP CRITICAL call map_append(map_c, key, value, idx) call map_sort(map_c) -! call map_update(map_c, key, value, idx) -! call map_merge(map_c) !$OMP END CRITICAL + enddo !$OMP END DO deallocate(key,value) !$OMP END PARALLEL + call munmap( & + (/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, c_pointer) + end diff --git a/plugins/FourIdx/four_index_sym.irp.f b/plugins/FourIdx/four_index_sym.irp.f new file mode 100644 index 00000000..5fc152a8 --- /dev/null +++ b/plugins/FourIdx/four_index_sym.irp.f @@ -0,0 +1,191 @@ +subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:,:), U(:,:,:), V(:,:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + ! Create a temporary memory-mapped file + integer :: fd + type(c_ptr) :: c_pointer + integer*8, pointer :: a_array(:,:,:) + call mmap(trim(ezfio_filename)//'/work/four_idx', & + (/ 2_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, .False., c_pointer) + call c_f_pointer(c_pointer, a_array, (/ 4, (i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1)/2, l_end-l_start+1 /)) + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_a,map_c,matrix_B) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx, & + !$OMP a,b,c,d,tmp) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + + !$OMP DO SCHEDULE(dynamic,4) + do l=l_start,l_end + a = 1 + do j=j_start,j_end + do k=k_start,k_end + do i=i_start,k + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,tmp) + if (tmp /= 0.d0) then + a = a+1 + a_array(1,a,l-l_start+1) = i + a_array(2,a,l-l_start+1) = j + a_array(3,a,l-l_start+1) = k + a_array(4,a,l-l_start+1) = transfer(dble(tmp), 1_8) + endif + enddo + enddo + enddo + a_array(1,1,l-l_start+1) = a + print *, l + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + U = 0.d0 + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + print *, d, l + + allocate( T(i_start:i_end, k_start:k_end, j_start:j_end), & + V(a_start:a_end, k_start:k_end, j_start:j_end) ) + + T = 0.d0 + do a=2,a_array(1,1,l-l_start+1) + i = a_array(1,a,l-l_start+1) + j = a_array(2,a,l-l_start+1) + k = a_array(3,a,l-l_start+1) + T(i, k,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) + T(k, i,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) + enddo + +! V = 0.d0 +! do a=a_start,a_end +! do k=k_start,k_end +! do j=j_start,j_end +! do i=i_start,i_end +! V(a,k,j) = V(a,k,j) + T(i,k,j)*matrix_B(i,a) +! enddo +! enddo +! enddo +! enddo + call DGEMM('T','N', (a_end-a_start+1), & + (k_end-k_start+1)*(j_end-j_start+1), & + (i_end-i_start+1), 1.d0, & + matrix_B(i_start,a_start), size(matrix_B,1), & + T(i_start,k_start,j_start), size(T,1), 0.d0, & + V(a_start,k_start,j_start), size(V, 1) ) + + deallocate(T) + allocate( T(a_start:a_end, k_start:k_end, b_start:b_end) ) + + call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), & + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + V(a_start,k_start,j_start), size(V,1)*size(V,2), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + T(a_start,k_start,b_start), size(T,1)*size(T,2) ) + + deallocate(V) + + do b=b_start,b_end + call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(a_start,k_start,b), size(T,1), & + matrix_B(k_start,k_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) + enddo + + deallocate(T) + + enddo + + idx = 0_8 + do b=b_start,d + do c=c_start,c_end + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call map_append(map_c, key, value, idx) + call map_sort(map_c) + !$OMP END CRITICAL + + + enddo + !$OMP END DO + + deallocate(key,value) + !$OMP END PARALLEL + + call munmap( & + (/ 2_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, c_pointer) + +end