From 8826ba9c2d2913b4c9eec3417581df956928526b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 30 Oct 2017 15:35:55 +0100 Subject: [PATCH] Fixed travis --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 1 - plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 6 +- src/FourIdx/four_index.irp.f | 322 ++++++++++++------- src/FourIdx/four_index_block.irp.f | 2 +- src/FourIdx/four_index_sym.irp.f | 293 ----------------- src/FourIdx/four_index_sym_mmap.irp.f | 292 ----------------- src/Integrals_Bielec/mo_bi_integrals.irp.f | 7 +- 7 files changed, 223 insertions(+), 700 deletions(-) delete mode 100644 src/FourIdx/four_index_sym.irp.f delete mode 100644 src/FourIdx/four_index_sym_mmap.irp.f diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index e4ae825e..45fe525c 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -188,7 +188,6 @@ program fci_zmq else print *, 'E+PT2'//pt2_string//' = ', CI_energy(k)+pt2(k) endif - print *, 'error = ',error enddo print *, '-----' diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 5e6021bd..8bdcbff5 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -252,6 +252,9 @@ subroutine pt2_collector(E, b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, double precision :: E0, avg, prop call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove) firstTBDcomb = int(Nabove(1)) - orgTBDcomb + 1 + if (firstTBDcomb > Ncomb) then + call zmq_abort(zmq_to_qp_run_socket) + endif if(Nabove(1) < 5d0) cycle call get_first_tooth(actually_computed, tooth) @@ -266,11 +269,10 @@ subroutine pt2_collector(E, b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, eqt = 0.d0 endif call wall_time(time) - if ( (dabs(eqt/avg) < relative_error) .or. (dabs(eqt) < absolute_error)) then + if ( (dabs(eqt/avg) < relative_error) .or. (dabs(eqt) < absolute_error) ) then ! Termination pt2(1) = avg print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' -! print*, 'Final statistical error = ',eqt call zmq_abort(zmq_to_qp_run_socket) else if (Nabove(tooth) > Nabove_old) then diff --git a/src/FourIdx/four_index.irp.f b/src/FourIdx/four_index.irp.f index 0c30f55e..bded6d00 100644 --- a/src/FourIdx/four_index.irp.f +++ b/src/FourIdx/four_index.irp.f @@ -20,16 +20,20 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & integer, intent(in) :: a_start, b_start, c_start, d_start integer, intent(in) :: a_end , b_end , c_end , d_end - double precision, allocatable :: T(:,:,:), U(:,:,:), V(:,:,:) + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) integer :: i_max, j_max, k_max, l_max integer :: i_min, j_min, k_min, l_min - integer :: i, j, k, l + integer :: i, j, k, l, ik, ll + integer :: l_start_block, l_end_block, l_block integer :: a, b, c, d double precision, external :: get_ao_bielec_integral + integer*8 :: ii integer(key_kind) :: idx real(integral_kind) :: tmp integer(key_kind), allocatable :: key(:) real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) ASSERT (k_start == i_start) ASSERT (l_start == j_start) @@ -54,127 +58,225 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & ASSERT (LDB >= k_max) ASSERT (LDB >= l_max) - ! Create a temporary memory-mapped file - integer :: fd - type(c_ptr) :: c_pointer - integer*8, pointer :: a_array(:,:,:) - call mmap(trim(ezfio_filename)//'/work/four_idx', & - (/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, .False., c_pointer) - call c_f_pointer(c_pointer, a_array, (/ 4, (i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1), l_end-l_start+1 /)) + integer*4, allocatable :: a_array_ik(:) + integer*4, allocatable :: a_array_j(:) + double precision, allocatable :: a_array_value(:) + + integer*8 :: new_size + new_size = max(1024_8, 10_8 * map_a % n_elements ) + + allocate(a_array_ik(new_size), a_array_j(new_size), a_array_value(new_size)) + + integer :: ipass, npass + integer*8 :: tempspace + + tempspace = (new_size * 16_8) / (1024_8 * 1024_8) + npass = int(min(int(l_end-l_start,8),1_8 + tempspace / 2048_8),4) ! 2 GiB of scratch space + l_block = (l_end-l_start+1)/npass + + ipass = 0 + do l_start_block = l_start, l_end, l_block + ipass = ipass+1 + print *, 'Pass ', ipass + l_end_block = min(l_end, l_start_block+l_block-1) + + allocate(l_pointer(l_start_block:l_end_block+1), value((i_max*k_max)) ) + ii = 1_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + do l=l_start_block,l_end_block + !$OMP SINGLE + l_pointer(l) = ii + !$OMP END SINGLE + do j=j_start,j_end + !$OMP DO SCHEDULE(static,16) + do k=k_start,k_end + do i=i_start,i_end + ik = (i-i_start+1) + (k-k_start)*(k_end-k_start+1) + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,value(ik)) + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + ik=0 + do k=k_start,k_end + do i=i_start,i_end + ik = ik+1 + tmp=value(ik) + if (tmp /= 0.d0) then + a_array_ik(ii) = ik + a_array_j(ii) = j + a_array_value(ii) = tmp + ii=ii+1_8 + endif + enddo + enddo + !$OMP END SINGLE + enddo + enddo + !$OMP SINGLE + a_array_ik(ii) = 0 + a_array_j(ii) = 0 + a_array_value(ii) = 0.d0 + l_pointer(l_end_block+1) = ii + !$OMP END SINGLE + !$OMP END PARALLEL + deallocate(value) + + !INPUT DATA + !open(unit=10,file='INPUT',form='UNFORMATTED') + !write(10) i_start, j_start, i_end, j_end + !write(10) a_start, b_start, a_end, b_end + !write(10) LDB, mo_tot_num + !write(10) matrix_B(1:LDB,1:mo_tot_num) + !idx=size(a_array) + !write(10) idx + !write(10) a_array + !write(10) l_pointer + !close(10) + !open(unit=10,file='OUTPUT',form='FORMATTED') + ! END INPUT DATA - !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & - !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& - !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& - !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & - !$OMP map_a,map_c,matrix_B) & - !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx, & - !$OMP a,b,c,d,tmp) - allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) - allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value,& + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start_block,l_end_block,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_c,matrix_B,l_pointer) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & + !$OMP a,b,c,d,tmp,T2d,V2d,ii,p,q) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) - !$OMP DO SCHEDULE(dynamic,4) - do l=l_start,l_end - a = 1 - do j=j_start,j_end - do k=k_start,k_end - do i=i_start,i_end - call bielec_integrals_index(i,j,k,l,idx) - call map_get(map_a,idx,tmp) - if (tmp /= 0.d0) then - a = a+1 - a_array(1,a,l-l_start+1) = i - a_array(2,a,l-l_start+1) = j - a_array(3,a,l-l_start+1) = k - a_array(4,a,l-l_start+1) = transfer(dble(tmp), 1_8) - endif + + allocate( T2d((i_end-i_start+1)*(k_end-k_start+1), j_start:j_end), & + V2d((i_end-i_start+1)*(k_end-k_start+1), b_start:b_end), & + V(i_start:i_end, k_start:k_end), & + T(k_start:k_end, a_start:a_end)) + + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + U = 0.d0 + do l=l_start_block,l_end_block + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + + ii=l_pointer(l) + do j=j_start,j_end + !DIR$ VECTOR NONTEMPORAL + T2d(:,j) = 0.d0 + !DIR$ IVDEP + do while (j == a_array_j(ii)) + T2d(a_array_ik(ii),j) = a_array_value(ii) + ii = ii + 1_8 + enddo + enddo + + call DGEMM('N','N', (i_end-i_start+1)*(k_end-k_start+1) ,& + (b_end-b_start+1), (j_end-j_start+1), 1.d0, & + T2d(1,j_start), size(T2d,1), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + V2d(1,b_start), size(V2d,1) ) + + do b=b_start,b_end + ik = 0 + do k=k_start,k_end + do i=i_start,i_end + ik = ik+1 + V(i,k) = V2d(ik,b) + enddo + enddo + + ! T = 0.d0 + ! do a=a_start,a_end + ! do k=k_start,k_end + ! do i=i_start,i_end + ! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) + ! enddo + ! enddo + ! enddo + call DGEMM('N','N', (k_end-k_start+1), (a_end-a_start+1), & + (i_end-i_start+1), 1.d0, & + V(i_start,k_start), size(V,1), & + matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & + T(k_start,a_start), size(T,1) ) + + ! do c=c_start,c_end + ! do a=a_start,a_end + ! do k=k_start,k_end + ! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) + ! enddo + ! enddo + ! enddo + call DGEMM('T','N', (a_end-a_start+1), (c_end-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) enddo enddo - enddo - a_array(1,1,l-l_start+1) = a - print *, l - enddo - !$OMP END DO - !$OMP DO SCHEDULE(dynamic) - do d=d_start,d_end - U = 0.d0 - do l=l_start,l_end - if (dabs(matrix_B(l,d)) < 1.d-10) then - cycle - endif - print *, d, l + idx = 0_8 - allocate( T(i_start:i_end, k_start:k_end, j_start:j_end), & - V(a_start:a_end, k_start:k_end, j_start:j_end) ) - - T = 0.d0 - do a=2,a_array(1,1,l-l_start+1) - i = a_array(1,a,l-l_start+1) - j = a_array(2,a,l-l_start+1) - k = a_array(3,a,l-l_start+1) - T(i, k,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) - enddo - - call DGEMM('T','N', (a_end-a_start+1), & - (k_end-k_start+1)*(j_end-j_start+1), & - (i_end-i_start+1), 1.d0, & - matrix_B(i_start,a_start), size(matrix_B,1), & - T(i_start,k_start,j_start), size(T,1), 0.d0, & - V(a_start,k_start,j_start), size(V, 1) ) - - deallocate(T) - allocate( T(a_start:a_end, k_start:k_end, b_start:d) ) - - call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), & - (b_end-b_start+1), & - (j_end-j_start+1), 1.d0, & - V(a_start,k_start,j_start), size(V,1)*size(V,2), & - matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & - T(a_start,k_start,b_start), size(T,1)*size(T,2) ) - - deallocate(V) - - do b=b_start,b_end - call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), & - (k_end-k_start+1), matrix_B(l, d), & - T(a_start,k_start,b), size(T,1), & - matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & - U(a_start,c_start,b), size(U,1) ) - enddo - - deallocate(T) - - enddo - - idx = 0_8 - do b=b_start,b_end - do c=c_start,c_end - do a=a_start,a_end - if (dabs(U(a,c,b)) < 1.d-15) then - cycle - endif - idx = idx+1_8 - call bielec_integrals_index(a,b,c,d,key(idx)) - value(idx) = U(a,c,b) +! TODO : Remove symmetry in storage + integer :: p, q + do b=b_start,d + q = b+ishft(d*d-d,-1) + do c=c_start,c_end + p = a_start+ishft(c*c-c,-1) + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + if ((a==b).and.(p>q)) cycle + p = p+1 + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) +!print *, int(key(idx),4), int(a,2),int(b,2),int(c,2),int(d,2), p, q + value(idx) = U(a,c,b) + enddo enddo enddo + + + + + + + !$OMP CRITICAL + call map_update(map_c, key, value, idx,1.d-15) + !$OMP END CRITICAL + + !WRITE OUTPUT + ! OMP CRITICAL + !print *, d + !do b=b_start,d + ! do c=c_start,c_end + ! do a=a_start,min(b,c) + ! if (dabs(U(a,c,b)) < 1.d-15) then + ! cycle + ! endif + ! write(10,*) d,c,b,a,U(a,c,b) + ! enddo + ! enddo + !enddo + ! OMP END CRITICAL + !END WRITE OUTPUT + + enddo + !$OMP END DO - !$OMP CRITICAL - call map_append(map_c, key, value, idx) - call map_sort(map_c) - !$OMP END CRITICAL - + deallocate(key,value,V,T) + !$OMP END PARALLEL + call map_merge(map_c) + deallocate(l_pointer) enddo - !$OMP END DO - - deallocate(key,value) - !$OMP END PARALLEL - - call munmap( & - (/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, c_pointer) + deallocate(a_array_ik,a_array_j,a_array_value) end diff --git a/src/FourIdx/four_index_block.irp.f b/src/FourIdx/four_index_block.irp.f index d5929b51..46f6e991 100644 --- a/src/FourIdx/four_index_block.irp.f +++ b/src/FourIdx/four_index_block.irp.f @@ -71,7 +71,7 @@ subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & integer*8 :: tempspace tempspace = (new_size * 16_8) / (1024_8 * 1024_8) - npass = min(int(l_end-l_start,8),1_8 + tempspace / 2048_8) ! 2 GiB of scratch space + npass = int(min(int(l_end-l_start,8),1_8 + tempspace / 2048_8),4) ! 2 GiB of scratch space l_block = (l_end-l_start+1)/npass ipass = 0 diff --git a/src/FourIdx/four_index_sym.irp.f b/src/FourIdx/four_index_sym.irp.f deleted file mode 100644 index 79c8d1d3..00000000 --- a/src/FourIdx/four_index_sym.irp.f +++ /dev/null @@ -1,293 +0,0 @@ -subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & - i_start, j_start, k_start, l_start, & - i_end , j_end , k_end , l_end , & - a_start, b_start, c_start, d_start, & - a_end , b_end , c_end , d_end ) - implicit none - use map_module - use mmap_module - BEGIN_DOC -! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) -! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} -! Loops run over *_start->*_end - END_DOC - type(map_type), intent(in) :: map_a - type(map_type), intent(inout) :: map_c - integer, intent(in) :: LDB - double precision, intent(in) :: matrix_B(LDB,*) - integer, intent(in) :: i_start, j_start, k_start, l_start - integer, intent(in) :: i_end , j_end , k_end , l_end - integer, intent(in) :: a_start, b_start, c_start, d_start - integer, intent(in) :: a_end , b_end , c_end , d_end - - double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) - double precision, allocatable :: T2d(:,:), V2d(:,:) - integer :: i_max, j_max, k_max, l_max - integer :: i_min, j_min, k_min, l_min - integer :: i, j, k, l, ik, ll - integer :: a, b, c, d - double precision, external :: get_ao_bielec_integral - integer*8 :: ii - integer(key_kind) :: idx - real(integral_kind) :: tmp - integer(key_kind), allocatable :: key(:) - real(integral_kind), allocatable :: value(:) - integer*8, allocatable :: l_pointer(:) - - ASSERT (k_start == i_start) - ASSERT (l_start == j_start) - ASSERT (a_start == c_start) - ASSERT (b_start == d_start) - - i_min = min(i_start,a_start) - i_max = max(i_end ,a_end ) - j_min = min(j_start,b_start) - j_max = max(j_end ,b_end ) - k_min = min(k_start,c_start) - k_max = max(k_end ,c_end ) - l_min = min(l_start,d_start) - l_max = max(l_end ,d_end ) - - ASSERT (0 < i_max) - ASSERT (0 < j_max) - ASSERT (0 < k_max) - ASSERT (0 < l_max) - ASSERT (LDB >= i_max) - ASSERT (LDB >= j_max) - ASSERT (LDB >= k_max) - ASSERT (LDB >= l_max) - - ! Create a temporary memory-mapped file - integer :: fd(3) - type(c_ptr) :: c_pointer(3) - integer*4, pointer :: a_array_ik(:) - integer*2, pointer :: a_array_j(:) - double precision, pointer :: a_array_value(:) - - integer*8 :: new_size - new_size = max(1024_8, 5_8 * map_a % n_elements ) - - call mmap(trim(ezfio_filename)//'/work/four_idx_ik', (/ new_size /), 4, fd(1), .False., c_pointer(1)) - call c_f_pointer(c_pointer(1), a_array_ik, (/ new_size /)) - - call mmap(trim(ezfio_filename)//'/work/four_idx_j', (/ new_size /), 2, fd(2), .False., c_pointer(2)) - call c_f_pointer(c_pointer(2), a_array_j, (/ new_size /)) - - call mmap(trim(ezfio_filename)//'/work/four_idx_value', (/ new_size /), 8, fd(3), .False., c_pointer(3)) - call c_f_pointer(c_pointer(3), a_array_value, (/ new_size /)) - - print *, 'Transforming MO integrals' - allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) ) - ii = 1_8 - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) - do l=l_start,l_end - !$OMP SINGLE - l_pointer(l) = ii - !$OMP END SINGLE - do j=j_start,j_end - !$OMP DO SCHEDULE(static,1) - do k=k_start,k_end - do i=i_start,k - ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) - call bielec_integrals_index(i,j,k,l,idx) - call map_get(map_a,idx,value(ik)) - enddo - enddo - !$OMP END DO - - !$OMP SINGLE - ik=0 - do k=k_start,k_end - do i=i_start,k - ik = ik+1 - tmp=value(ik) - if (tmp /= 0.d0) then - a_array_ik(ii) = ik - a_array_j(ii) = j - a_array_value(ii) = tmp - ii=ii+1_8 - endif - enddo - enddo - !$OMP END SINGLE - enddo - enddo - !$OMP SINGLE - a_array_ik(ii) = 0 - a_array_j(ii) = 0 - a_array_value(ii) = 0.d0 - l_pointer(l_end+1) = ii - !$OMP END SINGLE - !$OMP END PARALLEL - deallocate(value) - -!INPUT DATA -!open(unit=10,file='INPUT',form='UNFORMATTED') -!write(10) i_start, j_start, i_end, j_end -!write(10) a_start, b_start, a_end, b_end -!write(10) LDB, mo_tot_num -!write(10) matrix_B(1:LDB,1:mo_tot_num) -!idx=size(a_array) -!write(10) idx -!write(10) a_array -!write(10) l_pointer -!close(10) -!open(unit=10,file='OUTPUT',form='FORMATTED') -! END INPUT DATA - - - !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value,c_pointer,fd, & - !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& - !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& - !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & - !$OMP map_c,matrix_B,l_pointer) & - !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & - !$OMP a,b,c,d,tmp,T2d,V2d,ii) - allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) - allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) - - - - allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & - V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & - V(i_start:i_end, k_start:k_end), & - T(k_start:k_end, a_start:a_end)) - - - !$OMP DO SCHEDULE(dynamic) - do d=d_start,d_end - print *, d, '/', d_end - U = 0.d0 - do l=l_start,l_end - if (dabs(matrix_B(l,d)) < 1.d-10) then - cycle - endif - - ii=l_pointer(l) - do j=j_start,j_end - !DIR$ VECTOR NONTEMPORAL - T2d(:,j) = 0.d0 - !DIR$ IVDEP - do while (j == a_array_j(ii)) - T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0) - ii = ii + 1_8 - enddo - enddo - - call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& - (d-b_start+1), & - (j_end-j_start+1), 1.d0, & - T2d(1,j_start), size(T2d,1), & - matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & - V2d(1,b_start), size(V2d,1) ) - - do b=b_start,d - ik = 0 - do k=k_start,k_end - do i=i_start,k - ik = ik+1 - V(i,k) = V2d(ik,b) - enddo - enddo - -! T = 0.d0 -! do a=a_start,b -! do k=k_start,k_end -! do i=i_start,k -! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a) -! enddo -! do i=k+1,i_end -! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) -! enddo -! enddo -! enddo - call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), & - 1.d0, & - V(i_start,k_start), size(V,1), & - matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & - T(k_start,a_start), size(T,1) ) - -! do c=c_start,b -! do a=a_start,c -! do k=k_start,k_end -! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) -! enddo -! enddo -! enddo - call DGEMM('T','N', (b-a_start+1), (b-c_start+1), & - (k_end-k_start+1), matrix_B(l, d), & - T(k_start,a_start), size(T,1), & - matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & - U(a_start,c_start,b), size(U,1) ) -! do c=b+1,c_end -! do a=a_start,b -! do k=k_start,k_end -! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) -! enddo -! enddo -! enddo - if (b < b_end) then - call DGEMM('T','N', (b-a_start+1), (c_end-b), & - (k_end-k_start+1), matrix_B(l, d), & - T(k_start,a_start), size(T,1), & - matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, & - U(a_start,b+1,b), size(U,1) ) - endif - enddo - - enddo - - idx = 0_8 - do b=b_start,d - do c=c_start,c_end - do a=a_start,min(b,c) - if (dabs(U(a,c,b)) < 1.d-15) then - cycle - endif - idx = idx+1_8 - call bielec_integrals_index(a,b,c,d,key(idx)) - value(idx) = U(a,c,b) - enddo - enddo - enddo - - !$OMP CRITICAL - call map_update(map_c, key, value, idx,1.d-15) - !$OMP END CRITICAL - -!WRITE OUTPUT -! OMP CRITICAL -!print *, d -!do b=b_start,d -! do c=c_start,c_end -! do a=a_start,min(b,c) -! if (dabs(U(a,c,b)) < 1.d-15) then -! cycle -! endif -! write(10,*) d,c,b,a,U(a,c,b) -! enddo -! enddo -!enddo -! OMP END CRITICAL -!END WRITE OUTPUT - - - enddo - !$OMP END DO - - deallocate(key,value,V,T) - !$OMP END PARALLEL - call map_merge(map_c) - - call munmap( (/ new_size /), 4, fd(1), c_pointer(1)) - open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_ik') - close(10,status='DELETE') - call munmap( (/ new_size /), 2, fd(2), c_pointer(2)) - open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_j') - close(10,status='DELETE') - call munmap( (/ new_size /), 8, fd(3), c_pointer(3)) - open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_value') - close(10,status='DELETE') - deallocate(l_pointer) - -end diff --git a/src/FourIdx/four_index_sym_mmap.irp.f b/src/FourIdx/four_index_sym_mmap.irp.f deleted file mode 100644 index 877daf30..00000000 --- a/src/FourIdx/four_index_sym_mmap.irp.f +++ /dev/null @@ -1,292 +0,0 @@ -subroutine four_index_transform_sym_mmap(map_a,map_c,matrix_B,LDB, & - i_start, j_start, k_start, l_start, & - i_end , j_end , k_end , l_end , & - a_start, b_start, c_start, d_start, & - a_end , b_end , c_end , d_end ) - implicit none - use map_module - use mmap_module - BEGIN_DOC -! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) -! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} -! Loops run over *_start->*_end - END_DOC - type(map_type), intent(in) :: map_a - type(map_type), intent(inout) :: map_c - integer, intent(in) :: LDB - double precision, intent(in) :: matrix_B(LDB,*) - integer, intent(in) :: i_start, j_start, k_start, l_start - integer, intent(in) :: i_end , j_end , k_end , l_end - integer, intent(in) :: a_start, b_start, c_start, d_start - integer, intent(in) :: a_end , b_end , c_end , d_end - - double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) - double precision, allocatable :: T2d(:,:), V2d(:,:) - integer :: i_max, j_max, k_max, l_max - integer :: i_min, j_min, k_min, l_min - integer :: i, j, k, l, ik, ll - integer :: a, b, c, d - double precision, external :: get_ao_bielec_integral - integer*8 :: ii - integer(key_kind) :: idx - real(integral_kind) :: tmp - integer(key_kind), allocatable :: key(:) - real(integral_kind), allocatable :: value(:) - integer*8, allocatable :: l_pointer(:) - - ASSERT (k_start == i_start) - ASSERT (l_start == j_start) - ASSERT (a_start == c_start) - ASSERT (b_start == d_start) - - i_min = min(i_start,a_start) - i_max = max(i_end ,a_end ) - j_min = min(j_start,b_start) - j_max = max(j_end ,b_end ) - k_min = min(k_start,c_start) - k_max = max(k_end ,c_end ) - l_min = min(l_start,d_start) - l_max = max(l_end ,d_end ) - - ASSERT (0 < i_max) - ASSERT (0 < j_max) - ASSERT (0 < k_max) - ASSERT (0 < l_max) - ASSERT (LDB >= i_max) - ASSERT (LDB >= j_max) - ASSERT (LDB >= k_max) - ASSERT (LDB >= l_max) - - ! Create a temporary memory-mapped file - integer :: fd(3) - type(c_ptr) :: c_pointer(3) - integer*4, pointer :: a_array_ik(:) - integer*2, pointer :: a_array_j(:) - double precision, pointer :: a_array_value(:) - - integer*8 :: new_size - new_size = max(1024_8, 5_8 * map_a % n_elements ) - - call mmap(trim(ezfio_filename)//'/work/four_idx_ik', (/ new_size /), 4, fd(1), .False., c_pointer(1)) - call c_f_pointer(c_pointer(1), a_array_ik, (/ new_size /)) - - call mmap(trim(ezfio_filename)//'/work/four_idx_j', (/ new_size /), 2, fd(2), .False., c_pointer(2)) - call c_f_pointer(c_pointer(2), a_array_j, (/ new_size /)) - - call mmap(trim(ezfio_filename)//'/work/four_idx_value', (/ new_size /), 8, fd(3), .False., c_pointer(3)) - call c_f_pointer(c_pointer(3), a_array_value, (/ new_size /)) - - print *, 'Transforming MO integrals' - allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) ) - ii = 1_8 - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) - do l=l_start,l_end - !$OMP SINGLE - l_pointer(l) = ii - !$OMP END SINGLE - do j=j_start,j_end - !$OMP DO SCHEDULE(static,1) - do k=k_start,k_end - do i=i_start,k - ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) - call bielec_integrals_index(i,j,k,l,idx) - call map_get(map_a,idx,value(ik)) - enddo - enddo - !$OMP END DO - - !$OMP SINGLE - ik=0 - do k=k_start,k_end - do i=i_start,k - ik = ik+1 - tmp=value(ik) - if (tmp /= 0.d0) then - a_array_ik(ii) = ik - a_array_j(ii) = j - a_array_value(ii) = tmp - ii=ii+1_8 - endif - enddo - enddo - !$OMP END SINGLE - enddo - enddo - !$OMP SINGLE - a_array_ik(ii) = 0 - a_array_j(ii) = 0 - a_array_value(ii) = 0.d0 - l_pointer(l_end+1) = ii - !$OMP END SINGLE - !$OMP END PARALLEL - deallocate(value) - -!INPUT DATA -!open(unit=10,file='INPUT',form='UNFORMATTED') -!write(10) i_start, j_start, i_end, j_end -!write(10) a_start, b_start, a_end, b_end -!write(10) LDB, mo_tot_num -!write(10) matrix_B(1:LDB,1:mo_tot_num) -!idx=size(a_array) -!write(10) idx -!write(10) a_array -!write(10) l_pointer -!close(10) -!open(unit=10,file='OUTPUT',form='FORMATTED') -! END INPUT DATA - - - !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value,c_pointer,fd, & - !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& - !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& - !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & - !$OMP map_c,matrix_B,l_pointer) & - !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & - !$OMP a,b,c,d,tmp,T2d,V2d,ii) - allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) - allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) - - - - allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & - V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & - V(i_start:i_end, k_start:k_end), & - T(k_start:k_end, a_start:a_end)) - - - !$OMP DO SCHEDULE(dynamic) - do d=d_start,d_end - print *, d, '/', d_end - U = 0.d0 - do l=l_start,l_end - if (dabs(matrix_B(l,d)) < 1.d-10) then - cycle - endif - - ii=l_pointer(l) - do j=j_start,j_end - !DIR$ VECTOR NONTEMPORAL - T2d(:,j) = 0.d0 - !DIR$ IVDEP - do while (j == a_array_j(ii)) - T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0) - ii = ii + 1_8 - enddo - enddo - call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& - (d-b_start+1), & - (j_end-j_start+1), 1.d0, & - T2d(1,j_start), size(T2d,1), & - matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & - V2d(1,b_start), size(V2d,1) ) - - do b=b_start,d - ik = 0 - do k=k_start,k_end - do i=i_start,k - ik = ik+1 - V(i,k) = V2d(ik,b) - enddo - enddo - -! T = 0.d0 -! do a=a_start,b -! do k=k_start,k_end -! do i=i_start,k -! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a) -! enddo -! do i=k+1,i_end -! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) -! enddo -! enddo -! enddo - call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), & - 1.d0, & - V(i_start,k_start), size(V,1), & - matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & - T(k_start,a_start), size(T,1) ) - -! do c=c_start,b -! do a=a_start,c -! do k=k_start,k_end -! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) -! enddo -! enddo -! enddo - call DGEMM('T','N', (b-a_start+1), (b-c_start+1), & - (k_end-k_start+1), matrix_B(l, d), & - T(k_start,a_start), size(T,1), & - matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & - U(a_start,c_start,b), size(U,1) ) -! do c=b+1,c_end -! do a=a_start,b -! do k=k_start,k_end -! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) -! enddo -! enddo -! enddo - if (b < b_end) then - call DGEMM('T','N', (b-a_start+1), (c_end-b), & - (k_end-k_start+1), matrix_B(l, d), & - T(k_start,a_start), size(T,1), & - matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, & - U(a_start,b+1,b), size(U,1) ) - endif - enddo - - enddo - - idx = 0_8 - do b=b_start,d - do c=c_start,c_end - do a=a_start,min(b,c) - if (dabs(U(a,c,b)) < 1.d-15) then - cycle - endif - idx = idx+1_8 - call bielec_integrals_index(a,b,c,d,key(idx)) - value(idx) = U(a,c,b) - enddo - enddo - enddo - - !$OMP CRITICAL - call map_append(map_c, key, value, idx) - !$OMP END CRITICAL - -!WRITE OUTPUT -! OMP CRITICAL -!print *, d -!do b=b_start,d -! do c=c_start,c_end -! do a=a_start,min(b,c) -! if (dabs(U(a,c,b)) < 1.d-15) then -! cycle -! endif -! write(10,*) d,c,b,a,U(a,c,b) -! enddo -! enddo -!enddo -! OMP END CRITICAL -!END WRITE OUTPUT - - - enddo - !$OMP END DO - - deallocate(key,value,V,T) - !$OMP END PARALLEL - call map_sort(map_c) - - call munmap( (/ new_size /), 4, fd(1), c_pointer(1)) - open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_ik') - close(10,status='DELETE') - call munmap( (/ new_size /), 2, fd(2), c_pointer(2)) - open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_j') - close(10,status='DELETE') - call munmap( (/ new_size /), 8, fd(3), c_pointer(3)) - open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_value') - close(10,status='DELETE') - deallocate(l_pointer) - -end diff --git a/src/Integrals_Bielec/mo_bi_integrals.irp.f b/src/Integrals_Bielec/mo_bi_integrals.irp.f index 2fc77219..6a75e523 100644 --- a/src/Integrals_Bielec/mo_bi_integrals.irp.f +++ b/src/Integrals_Bielec/mo_bi_integrals.irp.f @@ -119,7 +119,12 @@ BEGIN_PROVIDER [ logical, mo_bielec_integrals_in_map ] else ! call add_integrals_to_map(full_ijkl_bitmask_4) - call four_index_transform_block(ao_integrals_map,mo_integrals_map, & +! call four_index_transform_block(ao_integrals_map,mo_integrals_map, & +! mo_coef, size(mo_coef,1), & +! 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & +! 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) + + call four_index_transform(ao_integrals_map,mo_integrals_map, & mo_coef, size(mo_coef,1), & 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num)