diff --git a/install/scripts/install_gpi2.sh b/install/scripts/install_gpi2.sh index 751f4ef8..87bdbb62 100755 --- a/install/scripts/install_gpi2.sh +++ b/install/scripts/install_gpi2.sh @@ -6,9 +6,9 @@ GPI_OPTIONS=--with-ethernet function _install() { - cd gpi2 + cd _build/gpi2 ./install.sh -p $QP_ROOT $GPI_OPTIONS - cp src/GASPI.f90 $QP_ROOT/src/plugins/GPI2/ + cp src/GASPI.f90 $QP_ROOT/plugins/GPI2/ return 0 } diff --git a/plugins/FourIdx/four_index.irp.f b/plugins/FourIdx/four_index.irp.f new file mode 100644 index 00000000..0c30f55e --- /dev/null +++ b/plugins/FourIdx/four_index.irp.f @@ -0,0 +1,180 @@ +subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:,:), U(:,:,:), V(:,:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + ! Create a temporary memory-mapped file + integer :: fd + type(c_ptr) :: c_pointer + integer*8, pointer :: a_array(:,:,:) + call mmap(trim(ezfio_filename)//'/work/four_idx', & + (/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, .False., c_pointer) + call c_f_pointer(c_pointer, a_array, (/ 4, (i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1), l_end-l_start+1 /)) + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_a,map_c,matrix_B) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx, & + !$OMP a,b,c,d,tmp) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + + !$OMP DO SCHEDULE(dynamic,4) + do l=l_start,l_end + a = 1 + do j=j_start,j_end + do k=k_start,k_end + do i=i_start,i_end + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,tmp) + if (tmp /= 0.d0) then + a = a+1 + a_array(1,a,l-l_start+1) = i + a_array(2,a,l-l_start+1) = j + a_array(3,a,l-l_start+1) = k + a_array(4,a,l-l_start+1) = transfer(dble(tmp), 1_8) + endif + enddo + enddo + enddo + a_array(1,1,l-l_start+1) = a + print *, l + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + U = 0.d0 + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + print *, d, l + + allocate( T(i_start:i_end, k_start:k_end, j_start:j_end), & + V(a_start:a_end, k_start:k_end, j_start:j_end) ) + + T = 0.d0 + do a=2,a_array(1,1,l-l_start+1) + i = a_array(1,a,l-l_start+1) + j = a_array(2,a,l-l_start+1) + k = a_array(3,a,l-l_start+1) + T(i, k,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) + enddo + + call DGEMM('T','N', (a_end-a_start+1), & + (k_end-k_start+1)*(j_end-j_start+1), & + (i_end-i_start+1), 1.d0, & + matrix_B(i_start,a_start), size(matrix_B,1), & + T(i_start,k_start,j_start), size(T,1), 0.d0, & + V(a_start,k_start,j_start), size(V, 1) ) + + deallocate(T) + allocate( T(a_start:a_end, k_start:k_end, b_start:d) ) + + call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), & + (b_end-b_start+1), & + (j_end-j_start+1), 1.d0, & + V(a_start,k_start,j_start), size(V,1)*size(V,2), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + T(a_start,k_start,b_start), size(T,1)*size(T,2) ) + + deallocate(V) + + do b=b_start,b_end + call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(a_start,k_start,b), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) + enddo + + deallocate(T) + + enddo + + idx = 0_8 + do b=b_start,b_end + do c=c_start,c_end + do a=a_start,a_end + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call map_append(map_c, key, value, idx) + call map_sort(map_c) + !$OMP END CRITICAL + + + enddo + !$OMP END DO + + deallocate(key,value) + !$OMP END PARALLEL + + call munmap( & + (/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, c_pointer) + +end diff --git a/plugins/FourIdx/four_index_sym.irp.f b/plugins/FourIdx/four_index_sym.irp.f new file mode 100644 index 00000000..cd9cb150 --- /dev/null +++ b/plugins/FourIdx/four_index_sym.irp.f @@ -0,0 +1,277 @@ +subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l, ik, ll + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer*8 :: ii + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + ! Create a temporary memory-mapped file + integer :: fd + type(c_ptr) :: c_pointer + integer*8, pointer :: a_array(:) + call mmap(trim(ezfio_filename)//'/work/four_idx', & + (/ 12_8 * map_a % n_elements /), 8, fd, .False., c_pointer) + call c_f_pointer(c_pointer, a_array, (/ 12_8 * map_a % n_elements /)) + + allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) ) + ii = 1_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + do l=l_start,l_end + !$OMP SINGLE + l_pointer(l) = ii + !$OMP END SINGLE + do j=j_start,j_end + !$OMP DO SCHEDULE(static,1) + do k=k_start,k_end + do i=i_start,k + ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,value(ik)) + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + tmp=value(ik) + if (tmp /= 0.d0) then + a_array(ii) = ik + ii = ii+1_8 + a_array(ii) = j + ii = ii+1_8 + a_array(ii) = transfer(dble(tmp), 1_8) + ii = ii+1_8 + endif + enddo + enddo + !$OMP END SINGLE + enddo + enddo + !$OMP SINGLE + l_pointer(l_end+1) = ii + !$OMP END SINGLE + !$OMP END PARALLEL + deallocate(value) + +!INPUT DATA +!open(unit=10,file='INPUT',form='UNFORMATTED') +!write(10) i_start, j_start, i_end, j_end +!write(10) a_start, b_start, a_end, b_end +!write(10) LDB, mo_tot_num +!write(10) matrix_B(1:LDB,1:mo_tot_num) +!idx=size(a_array) +!write(10) idx +!write(10) a_array +!write(10) l_pointer +!close(10) +!open(unit=10,file='OUTPUT',form='FORMATTED') +! END INPUT DATA + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_c,matrix_B,l_pointer) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & + !$OMP a,b,c,d,tmp,T2d,V2d,ii) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + + + allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & + V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & + V(i_start:i_end, k_start:k_end), & + T(k_start:k_end, a_start:a_end)) + + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + U = 0.d0 + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + + ii=l_pointer(l) + do j=j_start,j_end + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + if ( (ik /= a_array(ii)).or.(j /= a_array(ii+1_8)) & + .or.(ii >= l_pointer(l+1)) ) then + T2d(ik,j) = 0.d0 + else + T2d(ik,j) = transfer(a_array(ii+2_8), 1.d0) + ii=ii+3_8 + endif + enddo + enddo + enddo + call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + T2d(1,j_start), size(T2d,1), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + V2d(1,b_start), size(V2d,1) ) + + do b=b_start,d + ik = 0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + V(i,k) = V2d(ik,b) + enddo + enddo + +! T = 0.d0 +! do a=a_start,b +! do k=k_start,k_end +! do i=i_start,k +! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a) +! enddo +! do i=k+1,i_end +! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) +! enddo +! enddo +! enddo + call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), & + 1.d0, & + V(i_start,k_start), size(V,1), & + matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & + T(k_start,a_start), size(T,1) ) + +! do c=c_start,b +! do a=a_start,c +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + call DGEMM('T','N', (b-a_start+1), (b-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) +! do c=b+1,c_end +! do a=a_start,b +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + if (b < b_end) then + call DGEMM('T','N', (b-a_start+1), (c_end-b), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, & + U(a_start,b+1,b), size(U,1) ) + endif + enddo + + enddo + + idx = 0_8 + do b=b_start,d + do c=c_start,c_end + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call map_append(map_c, key, value, idx) + !$OMP END CRITICAL + +!WRITE OUTPUT +! OMP CRITICAL +!print *, d +!do b=b_start,d +! do c=c_start,c_end +! do a=a_start,min(b,c) +! if (dabs(U(a,c,b)) < 1.d-15) then +! cycle +! endif +! write(10,*) d,c,b,a,U(a,c,b) +! enddo +! enddo +!enddo +! OMP END CRITICAL +!END WRITE OUTPUT + + + enddo + !$OMP END DO + + deallocate(key,value,V,T) + !$OMP END PARALLEL + call map_sort(map_c) + + call munmap( & + (/ 12_8 * map_a % n_elements /), 8, fd, c_pointer) + deallocate(l_pointer) + +end diff --git a/plugins/Full_CI_ZMQ/NEEDED_CHILDREN_MODULES b/plugins/Full_CI_ZMQ/NEEDED_CHILDREN_MODULES index 7ff203d4..d9a3a160 100644 --- a/plugins/Full_CI_ZMQ/NEEDED_CHILDREN_MODULES +++ b/plugins/Full_CI_ZMQ/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -Perturbation Selectors_full Generators_full ZMQ +Perturbation Selectors_full Generators_full ZMQ FourIdx diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index afeb08fd..62873c32 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -350,12 +350,12 @@ subroutine get_first_tooth(computed, first_teeth) end subroutine -BEGIN_PROVIDER [ integer, size_tbc ] +BEGIN_PROVIDER [ integer*8, size_tbc ] implicit none BEGIN_DOC ! Size of the tbc array END_DOC - size_tbc = (comb_teeth+1)*N_det_generators + fragment_count*fragment_first + size_tbc = int((comb_teeth+1),8)*int(N_det_generators,8) + fragment_count*fragment_first END_PROVIDER subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc) @@ -408,7 +408,8 @@ end subroutine subroutine add_comb(comb, computed, tbc, stbc, ct) implicit none - integer, intent(in) :: stbc, ct + integer*8, intent(in) :: stbc + integer, intent(in) :: ct double precision, intent(in) :: comb logical, intent(inout) :: computed(N_det_generators) integer, intent(inout) :: tbc(0:stbc) diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f index ceb7bd95..930eec2c 100644 --- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f @@ -57,7 +57,6 @@ subroutine run_selection_slave(thread,iproc,energy) endif if(done .or. ctask == size(task_id)) then - ASSERT (.not.(buf%N == 0 .and. ctask > 0)) do i=1, ctask call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) end do diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index f404d069..3e58224a 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -419,37 +419,82 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d fullinteresting(0) = 0 do ii=1,preinteresting(0) - i = preinteresting(ii) - mobMask(1,1) = iand(negMask(1,1), preinteresting_det(1,1,ii)) - mobMask(1,2) = iand(negMask(1,2), preinteresting_det(1,2,ii)) - nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) - do j=2,N_int - mobMask(j,1) = iand(negMask(j,1), preinteresting_det(j,1,ii)) - mobMask(j,2) = iand(negMask(j,2), preinteresting_det(j,2,ii)) - nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) - end do + select case (N_int) + case (1) + mobMask(1,1) = iand(negMask(1,1), preinteresting_det(1,1,ii)) + mobMask(1,2) = iand(negMask(1,2), preinteresting_det(1,2,ii)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + case (2) + mobMask(1:2,1) = iand(negMask(1:2,1), preinteresting_det(1:2,1,ii)) + mobMask(1:2,2) = iand(negMask(1:2,2), preinteresting_det(1:2,2,ii)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + & + popcnt(mobMask(2, 1)) + popcnt(mobMask(2, 2)) + case (3) + mobMask(1:3,1) = iand(negMask(1:3,1), preinteresting_det(1:3,1,ii)) + mobMask(1:3,2) = iand(negMask(1:3,2), preinteresting_det(1:3,2,ii)) + nt = 0 + do j=3,1,-1 + if (mobMask(j,1) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 1)) + if (nt > 4) exit + endif + if (mobMask(j,2) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 2)) + if (nt > 4) exit + endif + end do + case (4) + mobMask(1:4,1) = iand(negMask(1:4,1), preinteresting_det(1:4,1,ii)) + mobMask(1:4,2) = iand(negMask(1:4,2), preinteresting_det(1:4,2,ii)) + nt = 0 + do j=4,1,-1 + if (mobMask(j,1) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 1)) + if (nt > 4) exit + endif + if (mobMask(j,2) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 2)) + if (nt > 4) exit + endif + end do + case default + mobMask(1:N_int,1) = iand(negMask(1:N_int,1), preinteresting_det(1:N_int,1,ii)) + mobMask(1:N_int,2) = iand(negMask(1:N_int,2), preinteresting_det(1:N_int,2,ii)) + nt = 0 + do j=N_int,1,-1 + if (mobMask(j,1) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 1)) + if (nt > 4) exit + endif + if (mobMask(j,2) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 2)) + if (nt > 4) exit + endif + end do + end select - if(nt <= 4) then - interesting(0) += 1 - interesting(interesting(0)) = i + if(nt <= 4) then + i = preinteresting(ii) + interesting(0) += 1 + interesting(interesting(0)) = i minilist(1,1,interesting(0)) = preinteresting_det(1,1,ii) minilist(1,2,interesting(0)) = preinteresting_det(1,2,ii) - do j=2,N_int + do j=2,N_int minilist(j,1,interesting(0)) = preinteresting_det(j,1,ii) minilist(j,2,interesting(0)) = preinteresting_det(j,2,ii) - enddo - if(nt <= 2) then - fullinteresting(0) += 1 - fullinteresting(fullinteresting(0)) = i + enddo + if(nt <= 2) then + fullinteresting(0) += 1 + fullinteresting(fullinteresting(0)) = i fullminilist(1,1,fullinteresting(0)) = preinteresting_det(1,1,ii) fullminilist(1,2,fullinteresting(0)) = preinteresting_det(1,2,ii) - do j=2,N_int + do j=2,N_int fullminilist(j,1,fullinteresting(0)) = preinteresting_det(j,1,ii) fullminilist(j,2,fullinteresting(0)) = preinteresting_det(j,2,ii) - enddo - end if - end if - + enddo + end if + end if + end do do ii=1,prefullinteresting(0) @@ -458,12 +503,14 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i)) mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i)) nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) - do j=2,N_int + if (nt > 2) cycle + do j=N_int,2,-1 mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + if (nt > 2) exit end do - + if(nt <= 2) then fullinteresting(0) += 1 fullinteresting(fullinteresting(0)) = i diff --git a/plugins/GPI2/broadcast.irp.f b/plugins/GPI2/broadcast.irp.f new file mode 100644 index 00000000..e9f421d8 --- /dev/null +++ b/plugins/GPI2/broadcast.irp.f @@ -0,0 +1,254 @@ +subroutine broadcast_wf(energy) + implicit none + BEGIN_DOC + ! Segment corresponding to the wave function. This is segment 0. + END_DOC + use bitmasks + use GASPI + use ISO_C_BINDING + + double precision, intent(inout) :: energy(N_states) + integer(gaspi_return_t) :: res + + if (is_gaspi_master) then + call broadcast_wf_put(energy) + else + call broadcast_wf_get(energy) + endif + + res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_barrier failed" + stop -1 + end if + + + integer(gaspi_segment_id_t) :: seg_id + do seg_id=0,3 + res = gaspi_segment_delete(seg_id) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_segment_delete failed", seg_id + stop -1 + end if + end do + +end + + + + + +subroutine broadcast_wf_put(energy) + implicit none + BEGIN_DOC + ! Initiates the broadcast of the wave function + END_DOC + use bitmasks + use GASPI + use ISO_C_BINDING + + double precision, intent(in) :: energy(N_states) + integer(gaspi_segment_id_t) :: seg_id + integer(gaspi_alloc_t) :: seg_alloc_policy + integer(gaspi_size_t) :: seg_size(0:3) + type(c_ptr) :: seg_ptr(0:3) + integer, pointer :: params_int(:) ! Segment 0 + double precision, pointer :: psi_coef_tmp(:,:) ! Segment 1 + integer(bit_kind), pointer :: psi_det_tmp(:,:,:) ! Segment 2 + double precision, pointer :: params_double(:) ! Segment 3 + + integer(gaspi_return_t) :: res + + + seg_alloc_policy = GASPI_MEM_UNINITIALIZED + + seg_size(0) = 4 * 5 + seg_id=0 + res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL, & + GASPI_BLOCK, seg_alloc_policy) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_create_segment failed", gaspi_rank, seg_id + stop -1 + end if + + res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id)) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_segment_ptr failed", gaspi_rank + stop -1 + end if + + call c_f_pointer(seg_ptr(0), params_int, shape=(/ 5 /)) + params_int(1) = N_states + params_int(2) = N_det + params_int(3) = psi_det_size + + res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_barrier failed", gaspi_rank + stop -1 + end if + + seg_size(1) = 8 * psi_det_size * N_states + seg_size(2) = bit_kind * psi_det_size * 2 * N_int + seg_size(3) = 8 * N_states + + do seg_id=1, 3 + res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL, & + GASPI_BLOCK, seg_alloc_policy) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_create_segment failed", gaspi_rank, seg_id + stop -1 + end if + + res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id)) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_segment_ptr failed", gaspi_rank + stop -1 + end if + end do + + call c_f_pointer(seg_ptr(1), psi_coef_tmp, shape=shape(psi_coef)) + call c_f_pointer(seg_ptr(2), psi_det_tmp, shape=shape(psi_det)) + call c_f_pointer(seg_ptr(3), params_double, shape=(/ N_states /)) + + psi_coef_tmp = psi_coef + psi_det_tmp = psi_det + params_double = energy + + res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_barrier failed", gaspi_rank + stop -1 + end if + +end + + + + + + + +subroutine broadcast_wf_get(energy) + implicit none + BEGIN_DOC + ! Gets the broadcasted wave function + END_DOC + use bitmasks + use GASPI + use ISO_C_BINDING + + double precision, intent(out) :: energy(N_states) + integer(gaspi_segment_id_t) :: seg_id + integer(gaspi_alloc_t) :: seg_alloc_policy + integer(gaspi_size_t) :: seg_size(0:3) + type(c_ptr) :: seg_ptr(0:3) + integer, pointer :: params_int(:) ! Segment 0 + double precision, pointer :: psi_coef_tmp(:,:) ! Segment 1 + integer(bit_kind), pointer :: psi_det_tmp(:,:,:) ! Segment 2 + double precision, pointer :: params_double(:) ! Segment 3 + + integer(gaspi_return_t) :: res + + + seg_alloc_policy = GASPI_MEM_UNINITIALIZED + + seg_size(0) = 4 * 5 + seg_id=0 + res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL,& + GASPI_BLOCK, seg_alloc_policy) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_create_segment failed" + stop -1 + end if + + res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id)) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_segment_ptr failed" + stop -1 + end if + + res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_barrier failed" + stop -1 + end if + + integer(gaspi_offset_t) :: localOff, remoteOff + integer(gaspi_rank_t) :: remoteRank + integer(gaspi_queue_id_t) :: queue + localOff = 0 + remoteRank = 0 + queue = 0 + res = gaspi_read(seg_id, localOff, remoteRank, & + seg_id, remoteOff, seg_size(seg_id), queue, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_read failed" + stop -1 + end if + + res = gaspi_wait(queue, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_wait failed" + stop -1 + end if + + call c_f_pointer(seg_ptr(0), params_int, shape=shape( (/ 5 /) )) + + N_states = params_int(1) + N_det = params_int(2) + psi_det_size = params_int(3) + TOUCH N_states N_det psi_det_size + + seg_size(1) = 8 * psi_det_size * N_states + seg_size(2) = bit_kind * psi_det_size * 2 * N_int + seg_size(3) = 8 * N_states + + do seg_id=1, 3 + res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL, & + GASPI_BLOCK, seg_alloc_policy) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_create_segment failed" + stop -1 + end if + + res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id)) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_segment_ptr failed" + stop -1 + end if + end do + + res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_barrier failed" + stop -1 + end if + + do seg_id=1, 3 + res = gaspi_read(seg_id, localOff, remoteRank, & + seg_id, remoteOff, seg_size(seg_id), queue, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_read failed" + stop -1 + end if + res = gaspi_wait(queue, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_wait failed" + stop -1 + end if + end do + + call c_f_pointer(seg_ptr(1), psi_coef_tmp, shape=shape(psi_coef)) + call c_f_pointer(seg_ptr(2), psi_det_tmp, shape=shape(psi_det)) + call c_f_pointer(seg_ptr(3), params_double, shape=shape(energy)) + + psi_coef = psi_coef_tmp + psi_det = psi_det_tmp + energy = params_double + +end + + + + diff --git a/plugins/Hartree_Fock/SCF_old.irp.f b/plugins/Hartree_Fock/SCF_old.irp.f new file mode 100644 index 00000000..03d9a91d --- /dev/null +++ b/plugins/Hartree_Fock/SCF_old.irp.f @@ -0,0 +1,61 @@ +program scf + BEGIN_DOC +! Produce `Hartree_Fock` MO orbital +! output: mo_basis.mo_tot_num mo_basis.mo_label mo_basis.ao_md5 mo_basis.mo_coef mo_basis.mo_occ +! output: hartree_fock.energy +! optional: mo_basis.mo_coef + END_DOC + call create_guess + call orthonormalize_mos + call run +end + +subroutine create_guess + implicit none + BEGIN_DOC +! Create a MO guess if no MOs are present in the EZFIO directory + END_DOC + logical :: exists + PROVIDE ezfio_filename + call ezfio_has_mo_basis_mo_coef(exists) + if (.not.exists) then + if (mo_guess_type == "HCore") then + mo_coef = ao_ortho_lowdin_coef + TOUCH mo_coef + mo_label = 'Guess' + call mo_as_eigvectors_of_mo_matrix(mo_mono_elec_integral,size(mo_mono_elec_integral,1),size(mo_mono_elec_integral,2),mo_label) + SOFT_TOUCH mo_coef mo_label + else if (mo_guess_type == "Huckel") then + call huckel_guess + else + print *, 'Unrecognized MO guess type : '//mo_guess_type + stop 1 + endif + endif +end + +subroutine run + + BEGIN_DOC +! Run SCF calculation + END_DOC + + use bitmasks + implicit none + + double precision :: SCF_energy_before,SCF_energy_after,diag_H_mat_elem + double precision :: EHF + integer :: i_it, i, j, k + + EHF = HF_energy + + mo_label = "Canonical" + +! Choose SCF algorithm + + call damping_SCF ! Deprecated routine +! call Roothaan_Hall_SCF + +end + + diff --git a/plugins/QMC/densify_coefmatrix.irp.f b/plugins/QMC/densify_coefmatrix.irp.f new file mode 100644 index 00000000..2e1894b2 --- /dev/null +++ b/plugins/QMC/densify_coefmatrix.irp.f @@ -0,0 +1,8 @@ +program densify + implicit none + read_wf = .True. + touch read_wf + call generate_all_alpha_beta_det_products() + call diagonalize_ci + call save_wavefunction +end diff --git a/plugins/QMC/truncate_wf_spin.irp.f b/plugins/QMC/truncate_wf_spin.irp.f index 5a5fe125..68e903c1 100644 --- a/plugins/QMC/truncate_wf_spin.irp.f +++ b/plugins/QMC/truncate_wf_spin.irp.f @@ -39,7 +39,8 @@ subroutine run call dsort(norm_sort(1),iorder(1),nab) - PROVIDE psi_bilinear_matrix_values nuclear_repulsion + PROVIDE psi_bilinear_matrix_values psi_bilinear_matrix_rows psi_bilinear_matrix_columns + PROVIDE nuclear_repulsion print *, '' do j=0,nab i = iorder(j) @@ -47,7 +48,9 @@ subroutine run !$OMP PARALLEL DO PRIVATE(k) do k=1,n_det if (psi_bilinear_matrix_columns(k) == -i) then - psi_bilinear_matrix_values(k,1) = 0.d0 + do l=1,N_states + psi_bilinear_matrix_values(k,l) = 0.d0 + enddo endif enddo !$OMP END PARALLEL DO @@ -55,7 +58,9 @@ subroutine run !$OMP PARALLEL DO PRIVATE(k) do k=1,n_det if (psi_bilinear_matrix_rows(k) == i) then - psi_bilinear_matrix_values(k,1) = 0.d0 + do l=1,N_states + psi_bilinear_matrix_values(k,l) = 0.d0 + enddo endif enddo !$OMP END PARALLEL DO @@ -64,9 +69,11 @@ subroutine run cycle endif - u_0 = psi_bilinear_matrix_values(1:N_det,1:N_states) - v_t = 0.d0 - s_t = 0.d0 + u_0(1:N_det,1:N_states) = psi_bilinear_matrix_values(1:N_det,1:N_states) + v_0(1:N_det,1:N_states) = 0.d0 + u_t(1:N_states,1:N_det) = 0.d0 + v_t(1:N_states,1:N_det) = 0.d0 + s_t(1:N_states,1:N_det) = 0.d0 call dtranspose( & u_0, & size(u_0, 1), & @@ -85,20 +92,21 @@ subroutine run double precision, external :: u_dot_u, u_dot_v do i=1,N_states - e_0(i) = u_dot_v(v_t(1,i),u_0(1,i),N_det)/u_dot_u(u_0(1,i),N_det) + e_0(i) = u_dot_v(u_0(1,i),v_0(1,i),N_det)/u_dot_u(u_0(1,i),N_det) + print *, 'E = ', e_0(i) + nuclear_repulsion enddo m = 0 do k=1,n_det - if (psi_bilinear_matrix_values(k,1) /= 0.d0) then + if (sum(psi_bilinear_matrix_values(k,1:N_states)) /= 0.d0) then m = m+1 endif enddo - E = E_0(1) + nuclear_repulsion - norm = u_dot_u(u_0(1,1),N_det) + do k=1,N_states + E = E_0(k) + nuclear_repulsion + enddo print *, 'Number of determinants:', m - print *, 'Energy', E exit enddo call wf_of_psi_bilinear_matrix(.True.) diff --git a/plugins/analyze_wf/analyze_wf.irp.f b/plugins/analyze_wf/analyze_wf.irp.f index 7d005a05..c37db55f 100644 --- a/plugins/analyze_wf/analyze_wf.irp.f +++ b/plugins/analyze_wf/analyze_wf.irp.f @@ -14,6 +14,17 @@ subroutine run integer :: class(0:mo_tot_num,5) double precision :: occupation(mo_tot_num) + write(*,'(A)') 'Energy of 1st determinant' + write(*,'(A)') '=========================' + write(*,'(A)') '' + write(*,*) 'Total', ref_bitmask_energy + nuclear_repulsion + write(*,*) 'Mono-electronic', mono_elec_ref_bitmask_energy + write(*,*) 'Kinetic', kinetic_ref_bitmask_energy + write(*,*) 'Electron-nucleus', nucl_elec_ref_bitmask_energy + write(*,*) 'Two-electron', bi_elec_ref_bitmask_energy + write(*,'(A)') '' + write(*,'(A)') '' + write(*,'(A)') 'MO Occupation' write(*,'(A)') '=============' write(*,'(A)') '' diff --git a/plugins/mrcepa0/dressing_slave.irp.f b/plugins/mrcepa0/dressing_slave.irp.f index 2a6ddb1b..d7f081cd 100644 --- a/plugins/mrcepa0/dressing_slave.irp.f +++ b/plugins/mrcepa0/dressing_slave.irp.f @@ -42,18 +42,18 @@ subroutine mrsc2_dressing_slave(thread,iproc) integer, allocatable :: hp(:,:) - integer :: i_state, i, i_I, J, k, k2, k1, kk, ll, degree, degree2, m, l, deg, ni, m2 + integer :: i_state, i, i_I, J, k, k2, k1, kk, ll, m, l, deg, ni, m2 integer :: n(2) integer :: p1,p2,h1,h2,s1,s2, blok, I_s, J_s, kn logical :: ok - double precision :: phase_iI, phase_Ik, phase_Jl, phase_Ji, phase_al + double precision :: phase_ia, phase_Ik, phase_Jl, phase_Ji, phase_la, phase_ka, phase_tmp + double precision :: Hka, Hla, Ska, Sla, tmp double precision :: diI, hIi, hJi, delta_JI, dkI, HkI, ci_inv(N_states), cj_inv(N_states) double precision :: contrib, contrib_s2, wall, iwall - double precision, allocatable :: dleat(:,:,:), dleat_s2(:,:,:) - integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ + integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ, exc integer(bit_kind) :: det_tmp(N_int, 2), det_tmp2(N_int, 2), inac, virt integer, external :: get_index_in_psi_det_sorted_bit, searchDet, detCmp - logical, external :: is_in_wavefunction, isInCassd, detEq + logical, external :: is_in_wavefunction integer,allocatable :: komon(:) logical :: komoned !double precision, external :: get_dij @@ -63,8 +63,8 @@ subroutine mrsc2_dressing_slave(thread,iproc) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) - allocate (dleat(N_states, N_det_non_ref, 2), delta(N_states,0:N_det_non_ref, 2)) - allocate (dleat_s2(N_states, N_det_non_ref, 2), delta_s2(N_states,0:N_det_non_ref, 2)) + allocate (delta(N_states,0:N_det_non_ref, 2)) + allocate (delta_s2(N_states,0:N_det_non_ref, 2)) allocate(komon(0:N_det_non_ref)) allocate(hp(2,N_det_non_ref)) @@ -100,7 +100,7 @@ subroutine mrsc2_dressing_slave(thread,iproc) k = det_cepa0_idx(linked(kk, i_I)) blok = blokMwen(kk, i_I) - call get_excitation(psi_ref(1,1,i_I),psi_non_ref(1,1,k),exc_Ik,degree,phase_Ik,N_int) + call get_excitation(psi_ref(1,1,i_I),psi_non_ref(1,1,k),exc_Ik,deg,phase_Ik,N_int) if(J /= i_I) then call apply_excitation(psi_ref(1,1,J),exc_Ik,det_tmp2,ok,N_int) @@ -135,36 +135,10 @@ subroutine mrsc2_dressing_slave(thread,iproc) if(h_cache(J,i) == 0.d0) cycle if(h_cache(i_I,i) == 0.d0) cycle - - !ok = .false. - !do i_state=1, N_states - ! if(lambda_mrcc(i_state, i) /= 0d0) then - ! ok = .true. - ! exit - ! end if - !end do - !if(.not. ok) cycle -! - + komon(0) += 1 kn = komon(0) komon(kn) = i - - -! call get_excitation(psi_ref(1,1,J),psi_non_ref(1,1,i),exc_IJ,degree2,phase_Ji,N_int) -! if(I_i /= J) call get_excitation(psi_ref(1,1,I_i),psi_non_ref(1,1,i),exc_IJ,degree2,phase_Ii,N_int) -! if(I_i == J) phase_Ii = phase_Ji - - do i_state = 1,N_states - dkI = h_cache(J,i) * dij(i_I, i, i_state) - dleat(i_state, kn, 1) = dkI - dleat(i_state, kn, 2) = dkI - - dkI = s2_cache(J,i) * dij(i_I, i, i_state) - dleat_s2(i_state, kn, 1) = dkI - dleat_s2(i_state, kn, 2) = dkI - end do - end do komoned = .true. @@ -178,18 +152,20 @@ subroutine mrsc2_dressing_slave(thread,iproc) call apply_excitation(psi_non_ref(1,1,i),exc_Ik,det_tmp,ok,N_int) if(.not. ok) cycle if(HP(1,i) + HP(1,k) <= 2 .and. HP(2,i) + HP(2,k) <= 2) then - cycle + if(is_in_wavefunction(det_tmp, N_int)) cycle end if - !if(isInCassd(det_tmp, N_int)) cycle - + + call i_h_j_phase_out(psi_non_ref(1,1,i), det_tmp, N_int, tmp, phase_ia,exc, deg) + call i_h_j_phase_out(psi_ref(1,1,i_I), psi_non_ref(1,1,k), N_int, tmp, phase_ik,exc, deg) + + call i_h_j_phase_out(psi_non_ref(1,1,l), det_tmp, N_int, Hla, phase_la,exc,deg) + call get_s2(psi_non_ref(1,1,l), det_tmp, N_int, Sla) + + do i_state = 1, N_states - !if(lambda_mrcc(i_state, i) == 0d0) cycle - - - !contrib = h_cache(i_I,k) * lambda_mrcc(i_state, k) * dleat(i_state, m, 2)! * phase_al - contrib = dij(i_I, k, i_state) * dleat(i_state, m, 2) - contrib_s2 = dij(i_I, k, i_state) * dleat_s2(i_state, m, 2) + contrib = dij(i_I, k, i_state) * dij(i_I, i, i_state) * Hla * phase_ia * phase_ik + contrib_s2 = dij(i_I, k, i_state) * dij(i_I, i, i_state) * Sla *phase_ia * phase_ik delta(i_state,ll,1) += contrib delta_s2(i_state,ll,1) += contrib_s2 if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5) then @@ -198,9 +174,12 @@ subroutine mrsc2_dressing_slave(thread,iproc) endif if(I_i == J) cycle - !contrib = h_cache(J,l) * lambda_mrcc(i_state, l) * dleat(i_state, m, 1)! * phase_al - contrib = dij(J, l, i_state) * dleat(i_state, m, 1) - contrib_s2 = dij(J, l, i_state) * dleat_s2(i_state, m, 1) + call i_h_j_phase_out(psi_non_ref(1,1,k), det_tmp, N_int, Hka, phase_ka,exc,deg) + call get_s2(psi_non_ref(1,1,k), det_tmp, N_int, Ska) + call i_h_j_phase_out(psi_ref(1,1,J), psi_non_ref(1,1,l), N_int, tmp, phase_jl,exc, deg) + + contrib = dij(J, l, i_state) * dij(J, i, i_state) * Hka* phase_ia * phase_jl + contrib_s2 = dij(J, l, i_state) * dij(J, i, i_state) * Ska*phase_ia*phase_jl delta(i_state,kk,2) += contrib delta_s2(i_state,kk,2) += contrib_s2 if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then @@ -211,12 +190,8 @@ subroutine mrsc2_dressing_slave(thread,iproc) end do ! while end do ! kk - - call push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) - call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) - -! end if - + call push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) + call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) enddo deallocate(delta) diff --git a/plugins/read_integral/print_integrals_ao.irp.f b/plugins/read_integral/print_integrals_ao.irp.f new file mode 100644 index 00000000..488c024d --- /dev/null +++ b/plugins/read_integral/print_integrals_ao.irp.f @@ -0,0 +1,108 @@ +program print_integrals + + PROVIDE ezfio_filename + call ezfio_set_integrals_monoelec_disk_access_ao_one_integrals('None') + call ezfio_set_integrals_bielec_disk_access_ao_integrals('None') + call run +end + +subroutine run + implicit none + + integer :: iunit + integer :: getunitandopen + + integer ::i,j,k,l + double precision :: integral + + iunit = getunitandopen('kinetic_ao','w') + do i=1,ao_num + do j=1,ao_num + integral = ao_kinetic_integral(i,j) + if (dabs(integral) > ao_integrals_threshold) then + write(iunit,*) i,j, integral + endif + enddo + enddo + close(iunit) + + iunit = getunitandopen('overlap_ao','w') + do i=1,ao_num + do j=1,ao_num + integral = ao_overlap(i,j) + if (dabs(integral) > ao_integrals_threshold) then + write(iunit,*) i,j, integral + endif + enddo + enddo + close(iunit) + + iunit = getunitandopen('nuclear_ao','w') + do i=1,ao_num + do j=1,ao_num + integral = ao_nucl_elec_integral(i,j) + if (dabs(integral) > ao_integrals_threshold) then + write(iunit,*) i,j, integral + endif + enddo + enddo + close(iunit) + +! iunit = getunitandopen('pseudo_ao','w') +! do i=1,ao_num +! do j=1,ao_num +! write(iunit,*) i,j, ao_pseudo_integral(i,j) +! enddo +! enddo +! close(iunit) + + PROVIDE ao_bielec_integrals_in_map + iunit = getunitandopen('bielec_ao','w') + + integer*8 :: i8 + integer :: i_idx, n_elements_max, k1, n_elements + integer :: ii(8), jj(8), kk(8), ll(8) + double precision, external :: ao_bielec_integral + integer(key_kind), allocatable :: keys(:) + double precision, allocatable :: values(:) + + + call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max) + allocate(keys(n_elements_max), values(n_elements_max)) + +! do i8=0_8,ao_integrals_map%map_size +! n_elements = n_elements_max +! call get_cache_map(ao_integrals_map,i8,keys,values,n_elements) +! do k1=1,n_elements +! call bielec_integrals_index_reverse(kk,ii,ll,jj,keys(k1)) +! if ( (kk(1)>ao_num).or. & +! (ii(1)>ao_num).or. & +! (jj(1)>ao_num).or. & +! (ll(1)>ao_num) ) then +! cycle +! endif +! k = kk(1) +! i = ii(1) +! l = ll(1) +! j = jj(1) +! integral = values(k1) +! write (iunit,'(4(I6,X),F20.15)') k,i,l,j, integral +! enddo +! enddo + + do i=1,ao_num + do k=1,ao_num + do j=1,ao_num + do l=1,ao_num + double precision, external :: get_ao_bielec_integral + integral = get_ao_bielec_integral(i,j,k,l,ao_integrals_map) + if (dabs(integral)>=1.e-15) then + write (iunit,'(4(I6),F20.15)') i,j,k,l, integral + endif + enddo + enddo + enddo + enddo + + close(iunit) +end diff --git a/plugins/read_integral/print_integrals_mo.irp.f b/plugins/read_integral/print_integrals_mo.irp.f index 133e34b8..18795249 100644 --- a/plugins/read_integral/print_integrals_mo.irp.f +++ b/plugins/read_integral/print_integrals_mo.irp.f @@ -49,7 +49,7 @@ program print_integrals double precision :: get_mo_bielec_integral integral = get_mo_bielec_integral(i,j,k,l,mo_integrals_map) if (dabs(integral) > mo_integrals_threshold) then - write (iunit,'(4(I5,X),D22.15)') i,j,k,l, integral + write (iunit,'(4(I6,X),F20.15)') i,j,k,l, integral endif !end if enddo diff --git a/plugins/read_integral/read_integrals_ao.irp.f b/plugins/read_integral/read_integrals_ao.irp.f new file mode 100644 index 00000000..77f2213e --- /dev/null +++ b/plugins/read_integral/read_integrals_ao.irp.f @@ -0,0 +1,76 @@ +program read_integrals + + PROVIDE ezfio_filename + call ezfio_set_integrals_monoelec_disk_access_ao_one_integrals("None") + call run +end + +subroutine run + use map_module + implicit none + + integer :: iunit + integer :: getunitandopen + + integer ::i,j,k,l + double precision :: integral + double precision, allocatable :: A(:,:) + + integer :: n_integrals + integer(key_kind), allocatable :: buffer_i(:) + real(integral_kind), allocatable :: buffer_values(:) + integer(key_kind) :: key + + allocate (A(ao_num,ao_num)) + A = 0.d0 + + iunit = getunitandopen('kinetic_ao','r') + do + read (iunit,*,end=10) i,j, integral + A(i,j) = integral + A(j,i) = integral + enddo + 10 continue + close(iunit) + call write_one_e_integrals('ao_kinetic_integral', A, size(A,1), size(A,2)) + + + A = 0.d0 + iunit = getunitandopen('nuclear_ao','r') + do + read (iunit,*,end=12) i,j, integral + A(i,j) = integral + A(j,i) = integral + enddo + 12 continue + close(iunit) + call write_one_e_integrals('ao_ne_integral', A, size(A,1), size(A,2)) + + call write_one_e_integrals('ao_pseudo_integral', ao_pseudo_integral,& + size(ao_pseudo_integral,1), size(ao_pseudo_integral,2)) + + + call ezfio_set_integrals_monoelec_disk_access_ao_one_integrals("Read") + + allocate(buffer_i(ao_num**4), buffer_values(ao_num**4)) + + iunit = getunitandopen('bielec_ao','r') + n_integrals=0 + do + read (iunit,*,end=13) i,j,k,l, integral + n_integrals += 1 + call bielec_integrals_index(i, j, k, l, buffer_i(n_integrals) ) + buffer_values(n_integrals) = integral + enddo + 13 continue + close(iunit) + + call insert_into_ao_integrals_map(n_integrals,buffer_i,buffer_values) + + call map_sort(ao_integrals_map) + call map_unique(ao_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) + call ezfio_set_integrals_bielec_disk_access_ao_integrals('Read') + +end diff --git a/plugins/read_integral/read_integrals_mo.irp.f b/plugins/read_integral/read_integrals_mo.irp.f index e1ff5fe8..5376b2a2 100644 --- a/plugins/read_integral/read_integrals_mo.irp.f +++ b/plugins/read_integral/read_integrals_mo.irp.f @@ -1,5 +1,10 @@ program read_integrals - + BEGIN_DOC +! Reads the integrals from the following files: +! - kinetic_mo +! - nuclear_mo +! - bielec_mo + END_DOC PROVIDE ezfio_filename call ezfio_set_integrals_monoelec_disk_access_mo_one_integrals("None") call run diff --git a/scripts/compilation/qp_create_ninja.py b/scripts/compilation/qp_create_ninja.py index 56d79a4b..cb1ea89a 100755 --- a/scripts/compilation/qp_create_ninja.py +++ b/scripts/compilation/qp_create_ninja.py @@ -36,6 +36,7 @@ except ImportError: from qp_path import QP_ROOT, QP_SRC, QP_EZFIO LIB = "" # join(QP_ROOT, "lib", "rdtsc.o") +GPI_LIB = join(QP_ROOT, "lib64", "libGPI2.a") EZFIO_LIB = join(QP_ROOT, "lib", "libezfio_irp.a") ZMQ_LIB = join(QP_ROOT, "lib", "libf77zmq.a") + " " + join(QP_ROOT, "lib", "libzmq.a") + " -lstdc++ -lrt" ROOT_BUILD_NINJA = join(QP_ROOT, "config", "build.ninja") @@ -96,8 +97,7 @@ def ninja_create_env_variable(pwd_config_file): l_string.append(str_) lib_lapack = get_compilation_option(pwd_config_file, "LAPACK_LIB") - lib_gpi2 = get_compilation_option(pwd_config_file, "GPI2_LIB") - str_lib = " ".join([LIB, lib_lapack, lib_gpi2, EZFIO_LIB, ZMQ_LIB]) + str_lib = " ".join([LIB, lib_lapack, GPI_LIB, EZFIO_LIB, ZMQ_LIB]) l_string.append("LIB = {0} ".format(str_lib)) l_string.append("") @@ -266,7 +266,7 @@ def ninja_ezfio_rule(): install_lib_ezfio = join(QP_ROOT, 'install', 'EZFIO', "lib", "libezfio_irp.a") l_cmd = ["cd {0}".format(QP_EZFIO)] + l_flag - l_cmd += ["rm -f make.config ; ninja && ln -sf {0} {1}".format(install_lib_ezfio, EZFIO_LIB)] + l_cmd += ["rm -f make.config ; ninja && rm -f {1} ; ln -sf {0} {1}".format(install_lib_ezfio, EZFIO_LIB)] l_string = ["rule build_ezfio", " command = {0}".format(" ; ".join(l_cmd)), @@ -307,7 +307,7 @@ def ninja_symlink_rule(): """ Return the command to create for the symlink """ - return ["rule build_symlink", " command = ln -sf $in $out", ""] + return ["rule build_symlink", " command = rm -f $out ; ln -sf $in $out", ""] def ninja_symlink_build(path_module, l_symlink): diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 2b57545b..24f2f947 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -205,10 +205,10 @@ subroutine davidson_pull_results(zmq_socket_pull, v_t, s_t, imin, imax, task_id) if(rc /= 4) stop "davidson_pull_results failed to pull task_id" rc = f77_zmq_recv( zmq_socket_pull, imin, 4, 0) - if(rc /= 4) stop "davidson_pull_results failed to pull task_id" + if(rc /= 4) stop "davidson_pull_results failed to pull imin" rc = f77_zmq_recv( zmq_socket_pull, imax, 4, 0) - if(rc /= 4) stop "davidson_pull_results failed to pull task_id" + if(rc /= 4) stop "davidson_pull_results failed to pull imax" sz = (imax-imin+1)*N_states_diag diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index 0a2d5389..dd330644 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -139,7 +139,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ write(iunit,'(A)') trim(write_buffer) write_buffer = ' Iter' do i=1,N_st - write_buffer = trim(write_buffer)//' Energy S^2 Residual ' + write_buffer = trim(write_buffer)//' Energy S^2 Residual ' enddo write(iunit,'(A)') trim(write_buffer) write_buffer = '===== ' diff --git a/src/Davidson/print_energy.irp.f b/src/Davidson/print_energy.irp.f new file mode 100644 index 00000000..ae6f1da2 --- /dev/null +++ b/src/Davidson/print_energy.irp.f @@ -0,0 +1,22 @@ +program print_energy + implicit none + read_wf = .true. + touch read_wf + call routine +end + +subroutine routine + implicit none + integer :: i,j + double precision :: accu,hij + + print*, 'psi_energy = ',psi_energy + nuclear_repulsion + accu = 0.d0 +! do i = 1,N_det +! do j = 1,N_det +! call i_H_j(psi_det(1,1,j),psi_det(1,1,i),N_int,hij) +! accu += psi_coef(i,1) * psi_coef(j,1) * hij +! enddo +! enddo +! print*, 'accu = ',accu + nuclear_repulsion +end diff --git a/src/Determinants/H_apply.irp.f b/src/Determinants/H_apply.irp.f index 26f981dc..ef396f9c 100644 --- a/src/Determinants/H_apply.irp.f +++ b/src/Determinants/H_apply.irp.f @@ -192,8 +192,8 @@ subroutine copy_H_apply_buffer_to_wf call normalize(psi_coef,N_det) SOFT_TOUCH N_det psi_det psi_coef - logical :: found_duplicates - !call remove_duplicates_in_psi_det(found_duplicates) +! logical :: found_duplicates +! call remove_duplicates_in_psi_det(found_duplicates) end subroutine remove_duplicates_in_psi_det(found_duplicates) diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f index 9a1d4ee1..d11e853c 100644 --- a/src/Determinants/determinants.irp.f +++ b/src/Determinants/determinants.irp.f @@ -435,62 +435,32 @@ subroutine save_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psicoef) ! Save the wave function into the EZFIO file END_DOC use bitmasks + include 'constants.include.F' integer, intent(in) :: ndet,nstates,dim_psicoef integer(bit_kind), intent(in) :: psidet(N_int,2,ndet) double precision, intent(in) :: psicoef(dim_psicoef,nstates) integer*8, allocatable :: psi_det_save(:,:,:) double precision, allocatable :: psi_coef_save(:,:) - integer*8 :: det_8(100) - integer(bit_kind) :: det_bk((100*8)/bit_kind) - integer :: N_int2 - equivalence (det_8, det_bk) - integer :: i,k + integer :: i,j,k - PROVIDE progress_bar - call start_progress(7,'Saving wfunction',0.d0) - - progress_bar(1) = 1 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_N_int(N_int) - progress_bar(1) = 2 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_bit_kind(bit_kind) - progress_bar(1) = 3 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_N_det(ndet) - progress_bar(1) = 4 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_n_states(nstates) - progress_bar(1) = 5 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_mo_label(mo_label) - progress_bar(1) = 6 - progress_value = dble(progress_bar(1)) - - N_int2 = (N_int*bit_kind)/8 - allocate (psi_det_save(N_int2,2,ndet)) + allocate (psi_det_save(N_int,2,ndet)) do i=1,ndet + do j=1,2 do k=1,N_int - det_bk(k) = psidet(k,1,i) + psi_det_save(k,j,i) = transfer(psidet(k,j,i),1_8) enddo - do k=1,N_int2 - psi_det_save(k,1,i) = det_8(k) - enddo - do k=1,N_int - det_bk(k) = psidet(k,2,i) - enddo - do k=1,N_int2 - psi_det_save(k,2,i) = det_8(k) - enddo -! print*,psi_det_save + enddo enddo call ezfio_set_determinants_psi_det(psi_det_save) deallocate (psi_det_save) - progress_bar(1) = 7 - progress_value = dble(progress_bar(1)) allocate (psi_coef_save(ndet,nstates)) double precision :: accu_norm(nstates) accu_norm = 0.d0 @@ -511,7 +481,6 @@ subroutine save_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psicoef) call ezfio_set_determinants_psi_coef(psi_coef_save) call write_int(output_determinants,ndet,'Saved determinants') - call stop_progress deallocate (psi_coef_save) end @@ -537,28 +506,12 @@ subroutine save_wavefunction_specified(ndet,nstates,psidet,psicoef,ndetsave,inde integer :: i,k - PROVIDE progress_bar - call start_progress(7,'Saving wfunction',0.d0) - - progress_bar(1) = 1 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_N_int(N_int) - progress_bar(1) = 2 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_bit_kind(bit_kind) - progress_bar(1) = 3 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_N_det(ndetsave) - progress_bar(1) = 4 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_n_states(nstates) - progress_bar(1) = 5 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_mo_label(mo_label) - progress_bar(1) = 6 - progress_value = dble(progress_bar(1)) - N_int2 = (N_int*bit_kind)/8 allocate (psi_det_save(N_int2,2,ndetsave)) do i=1,ndetsave @@ -600,7 +553,6 @@ subroutine save_wavefunction_specified(ndet,nstates,psidet,psicoef,ndetsave,inde call ezfio_set_determinants_psi_coef(psi_coef_save) call write_int(output_determinants,ndet,'Saved determinants') - call stop_progress deallocate (psi_coef_save) end diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index e3f5c0b1..eb128715 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -234,61 +234,66 @@ subroutine get_double_excitation(det1,det2,exc,phase,Nint) cycle case(1) + + high = max(exc(1,1,ispin), exc(1,2,ispin))-1 low = min(exc(1,1,ispin), exc(1,2,ispin)) - high = max(exc(1,1,ispin), exc(1,2,ispin)) - - ASSERT (low > 0) - j = ishft(low-1,-bit_kind_shift)+1 ! Find integer in array(Nint) - n = iand(low-1,bit_kind_size-1)+1 ! mod(low,bit_kind_size) + + ASSERT (low >= 0) ASSERT (high > 0) - k = ishft(high-1,-bit_kind_shift)+1 - m = iand(high-1,bit_kind_size-1)+1 + + k = ishft(high,-bit_kind_shift)+1 + j = ishft(low,-bit_kind_shift)+1 + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) if (j==k) then - nperm = nperm + popcnt(iand(det1(j,ispin), & - iand( ibset(0_bit_kind,m-1)-1_bit_kind, & - ibclr(-1_bit_kind,n)+1_bit_kind ) )) -! TODO iand( not(ishft(1_bit_kind,n+1))+1_bit_kind, & -! ishft(1_bit_kind,m)-1_bit_kind))) + nperm = nperm + popcnt(iand(det1(j,ispin), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) else - nperm = nperm + popcnt(iand(det1(k,ispin), & - ibset(0_bit_kind,m-1)-1_bit_kind)) -! TODO ishft(1_bit_kind,m)-1_bit_kind)) - if (n < bit_kind_size) then - nperm = nperm + popcnt(iand(det1(j,ispin), ibclr(-1_bit_kind,n) +1_bit_kind)) -! TODO ishft(1_bit_kind,m)-1_bit_kind)) - endif + nperm = nperm + popcnt( & + iand(det1(j,ispin), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k,ispin), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + do i=j+1,k-1 nperm = nperm + popcnt(det1(i,ispin)) end do + endif case (2) - do i=1,2 - low = min(exc(i,1,ispin), exc(i,2,ispin)) - high = max(exc(i,1,ispin), exc(i,2,ispin)) - + do l=1,2 + high = max(exc(l,1,ispin), exc(l,2,ispin))-1 + low = min(exc(l,1,ispin), exc(l,2,ispin)) + ASSERT (low > 0) - j = ishft(low-1,-bit_kind_shift)+1 ! Find integer in array(Nint) - n = iand(low-1,bit_kind_size-1)+1 ! mod(low,bit_kind_size) ASSERT (high > 0) - k = ishft(high-1,-bit_kind_shift)+1 - m = iand(high-1,bit_kind_size-1)+1 + + k = ishft(high,-bit_kind_shift)+1 + j = ishft(low,-bit_kind_shift)+1 + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) if (j==k) then - nperm = nperm + popcnt(iand(det1(j,ispin), & - iand( ibset(0_bit_kind,m-1)-1_bit_kind, & - ibclr(-1_bit_kind,n)+1_bit_kind ) )) + nperm = nperm + popcnt(iand(det1(j,ispin), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) else - nperm = nperm + popcnt(iand(det1(k,ispin), & - ibset(0_bit_kind,m-1)-1_bit_kind)) - if (n < bit_kind_size) then - nperm = nperm + popcnt(iand(det1(j,ispin), ibclr(-1_bit_kind,n) +1_bit_kind)) - endif - do l=j+1,k-1 - nperm = nperm + popcnt(det1(l,ispin)) + nperm = nperm + popcnt( & + iand(det1(j,ispin), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k,ispin), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + + do i=j+1,k-1 + nperm = nperm + popcnt(det1(i,ispin)) end do + endif enddo @@ -297,7 +302,7 @@ subroutine get_double_excitation(det1,det2,exc,phase,Nint) b = max(exc(1,1,ispin), exc(1,2,ispin)) c = min(exc(2,1,ispin), exc(2,2,ispin)) d = max(exc(2,1,ispin), exc(2,2,ispin)) - if (c>a .and. cb) then + if ((a 0) - j = ishft(low-1,-bit_kind_shift)+1 ! Find integer in array(Nint) - n = iand(low-1,bit_kind_size-1)+1 ! mod(low,bit_kind_size) + + high = max(exc(1,1,ispin), exc(1,2,ispin))-1 + low = min(exc(1,1,ispin), exc(1,2,ispin)) + + ASSERT (low >= 0) ASSERT (high > 0) - k = ishft(high-1,-bit_kind_shift)+1 - m = iand(high-1,bit_kind_size-1)+1 + + k = ishft(high,-bit_kind_shift)+1 + j = ishft(low,-bit_kind_shift)+1 + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) + if (j==k) then - nperm = popcnt(iand(det1(j,ispin), & - iand(ibset(0_bit_kind,m-1)-1_bit_kind,ibclr(-1_bit_kind,n)+1_bit_kind))) -!TODO iand( not(ishft(1_bit_kind,n+1))+1_bit_kind, & -! ishft(1_bit_kind,m)-1_bit_kind))) + nperm = nperm + popcnt(iand(det1(j,ispin), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) else - nperm = nperm + popcnt(iand(det1(k,ispin),ibset(0_bit_kind,m-1)-1_bit_kind)) -!TODO nperm = popcnt(iand(det1(k,ispin), ishft(1_bit_kind,m)-1_bit_kind)) + & -! popcnt(iand(det1(j,ispin), not(ishft(1_bit_kind,n+1))+1_bit_kind)) - if (n < bit_kind_size) then - nperm = nperm + popcnt(iand(det1(j,ispin),ibclr(-1_bit_kind,n)+1_bit_kind)) - endif + nperm = nperm + popcnt( & + iand(det1(j,ispin), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k,ispin), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + do i=j+1,k-1 nperm = nperm + popcnt(det1(i,ispin)) end do + endif + phase = phase_dble(iand(nperm,1)) return enddo enddo + end subroutine bitstring_to_list_ab( string, list, n_elements, Nint) @@ -428,7 +438,6 @@ subroutine bitstring_to_list_ab( string, list, n_elements, Nint) enddo end - subroutine bitstring_to_list_ab_old( string, list, n_elements, Nint) use bitmasks implicit none @@ -2030,6 +2039,112 @@ subroutine get_occ_from_key(key,occ,Nint) end +subroutine get_double_excitation_phase_new(det1,det2,exc,phase,Nint) + use bitmasks + implicit none + + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2) + integer(bit_kind), intent(in) :: det2(Nint,2) + integer, intent(in) :: exc(0:2,2,2) + double precision, intent(out) :: phase + integer :: tz + integer :: l, ispin, idx_hole, idx_particle, ishift + integer :: nperm + integer :: i,j,k,m,n + integer :: high, low + integer :: a,b,c,d + integer(bit_kind) :: hole, particle, tmp + double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /) + + ASSERT (Nint > 0) + nperm = 0 + do ispin = 1,2 + select case (exc(0,1,ispin)) + case(0) + cycle + + case(1) + + high = max(exc(1,1,ispin), exc(1,2,ispin))-1 + low = min(exc(1,1,ispin), exc(1,2,ispin)) + + ASSERT (low >= 0) + ASSERT (high > 0) + + k = ishft(high,-bit_kind_shift) + j = ishft(low,-bit_kind_shift) + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) + + if (j==k) then + nperm = nperm + popcnt(iand(det1(j,ispin), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) + else + nperm = nperm + popcnt( & + iand(det1(j,ispin), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k,ispin), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + + do i=j+1,k-1 + nperm = nperm + popcnt(det1(i,ispin)) + end do + + endif + + case (2) + + do l=1,2 + high = max(exc(l,1,ispin), exc(l,2,ispin))-1 + low = min(exc(l,1,ispin), exc(l,2,ispin)) + + ASSERT (low > 0) + ASSERT (high > 0) + + k = ishft(high,-bit_kind_shift) + j = ishft(low,-bit_kind_shift) + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) + + if (j==k) then + nperm = nperm + popcnt(iand(det1(j,ispin), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) + else + nperm = nperm + popcnt( & + iand(det1(j,ispin), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k,ispin), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + + do i=j+1,k-1 + nperm = nperm + popcnt(det1(i,ispin)) + end do + + endif + + enddo + + a = min(exc(1,1,ispin), exc(1,2,ispin)) + b = max(exc(1,1,ispin), exc(1,2,ispin)) + c = min(exc(2,1,ispin), exc(2,2,ispin)) + d = max(exc(2,1,ispin), exc(2,2,ispin)) + if (c>a .and. cb) then + nperm = nperm + 1 + endif + exit + end select + + enddo + phase = phase_dble(iand(nperm,1)) +end + + + subroutine get_double_excitation_phase(det1,det2,exc,phase,Nint) use bitmasks implicit none @@ -2315,6 +2430,356 @@ subroutine decode_exc_spin(exc,h1,p1,h2,p2) end select end +subroutine get_excitation_degree_spin_new(key1,key2,degree,Nint) + use bitmasks + include 'Utils/constants.include.F' + implicit none + BEGIN_DOC + ! Returns the excitation degree between two determinants + END_DOC + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key1(Nint) + integer(bit_kind), intent(in) :: key2(Nint) + integer, intent(out) :: degree + + integer(bit_kind) :: xorvec(N_int_max) + integer :: l + + ASSERT (Nint > 0) + + select case (Nint) + + case (1) + xorvec(1) = xor( key1(1), key2(1)) + degree = popcnt(xorvec(1)) + + case (2) + xorvec(1) = xor( key1(1), key2(1)) + xorvec(2) = xor( key1(2), key2(2)) + degree = popcnt(xorvec(1))+popcnt(xorvec(2)) + + case (3) + xorvec(1) = xor( key1(1), key2(1)) + xorvec(2) = xor( key1(2), key2(2)) + xorvec(3) = xor( key1(3), key2(3)) + degree = sum(popcnt(xorvec(1:3))) + + case (4) + xorvec(1) = xor( key1(1), key2(1)) + xorvec(2) = xor( key1(2), key2(2)) + xorvec(3) = xor( key1(3), key2(3)) + xorvec(4) = xor( key1(4), key2(4)) + degree = sum(popcnt(xorvec(1:4))) + + case default + do l=1,Nint + xorvec(l) = xor( key1(l), key2(l)) + enddo + degree = sum(popcnt(xorvec(1:Nint))) + + end select + + degree = ishft(degree,-1) + +end + + +subroutine get_excitation_spin_new(det1,det2,exc,degree,phase,Nint) + use bitmasks + implicit none + BEGIN_DOC + ! Returns the excitation operators between two determinants and the phase + END_DOC + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint) + integer(bit_kind), intent(in) :: det2(Nint) + integer, intent(out) :: exc(0:2,2) + integer, intent(out) :: degree + double precision, intent(out) :: phase + ! exc(number,hole/particle) + ! ex : + ! exc(0,1) = number of holes + ! exc(0,2) = number of particles + ! exc(1,2) = first particle + ! exc(1,1) = first hole + + ASSERT (Nint > 0) + + !DIR$ FORCEINLINE + call get_excitation_degree_spin(det1,det2,degree,Nint) + select case (degree) + + case (3:) + degree = -1 + return + + case (2) + call get_double_excitation_spin(det1,det2,exc,phase,Nint) + return + + case (1) + call get_mono_excitation_spin(det1,det2,exc,phase,Nint) + return + + case(0) + return + + end select +end + +subroutine decode_exc_spin_new(exc,h1,p1,h2,p2) + use bitmasks + implicit none + BEGIN_DOC + ! Decodes the exc arrays returned by get_excitation. + ! h1,h2 : Holes + ! p1,p2 : Particles + END_DOC + integer, intent(in) :: exc(0:2,2) + integer, intent(out) :: h1,h2,p1,p2 + + select case (exc(0,1)) + case(2) + h1 = exc(1,1) + h2 = exc(2,1) + p1 = exc(1,2) + p2 = exc(2,2) + case(1) + h1 = exc(1,1) + h2 = 0 + p1 = exc(1,2) + p2 = 0 + case default + h1 = 0 + p1 = 0 + h2 = 0 + p2 = 0 + end select +end + + +subroutine get_double_excitation_spin_new(det1,det2,exc,phase,Nint) + use bitmasks + implicit none + BEGIN_DOC + ! Returns the two excitation operators between two doubly excited spin-determinants + ! and the phase + END_DOC + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint) + integer(bit_kind), intent(in) :: det2(Nint) + integer, intent(out) :: exc(0:2,2) + double precision, intent(out) :: phase + integer :: tz + integer :: l, idx_hole, idx_particle, ishift + integer :: nperm + integer :: i,j,k,m,n + integer :: high, low + integer :: a,b,c,d + integer(bit_kind) :: hole, particle, tmp + double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /) + + ASSERT (Nint > 0) + nperm = 0 + exc(0,1) = 0 + exc(0,2) = 0 + + idx_particle = 0 + idx_hole = 0 + ishift = 1-bit_kind_size + do l=1,Nint + ishift = ishift + bit_kind_size + if (det1(l) == det2(l)) then + cycle + endif + tmp = xor( det1(l), det2(l) ) + particle = iand(tmp, det2(l)) + hole = iand(tmp, det1(l)) + do while (particle /= 0_bit_kind) + tz = trailz(particle) + idx_particle = idx_particle + 1 + exc(0,2) = exc(0,2) + 1 + exc(idx_particle,2) = tz+ishift + particle = iand(particle,particle-1_bit_kind) + enddo + if (iand(exc(0,1),exc(0,2))==2) then ! exc(0,1)==2 or exc(0,2)==2 + exit + endif + do while (hole /= 0_bit_kind) + tz = trailz(hole) + idx_hole = idx_hole + 1 + exc(0,1) = exc(0,1) + 1 + exc(idx_hole,1) = tz+ishift + hole = iand(hole,hole-1_bit_kind) + enddo + if (iand(exc(0,1),exc(0,2))==2) then ! exc(0,1)==2 or exc(0,2)==2 + exit + endif + enddo + + select case (exc(0,1)) + + case(1) + + high = max(exc(1,1), exc(1,2))-1 + low = min(exc(1,1), exc(1,2)) + + ASSERT (low >= 0) + ASSERT (high > 0) + + k = ishft(high,-bit_kind_shift) + j = ishft(low,-bit_kind_shift) + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) + + if (j==k) then + nperm = nperm + popcnt(iand(det1(j), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) + else + nperm = nperm + popcnt( & + iand(det1(j), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + + do i=j+1,k-1 + nperm = nperm + popcnt(det1(i)) + end do + + endif + + case (2) + + do l=1,2 + high = max(exc(l,1), exc(l,2))-1 + low = min(exc(l,1), exc(l,2)) + + ASSERT (low > 0) + ASSERT (high > 0) + + k = ishft(high,-bit_kind_shift) + j = ishft(low,-bit_kind_shift) + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) + + if (j==k) then + nperm = nperm + popcnt(iand(det1(j), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) + else + nperm = nperm + popcnt( & + iand(det1(j), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + + do i=j+1,k-1 + nperm = nperm + popcnt(det1(i)) + end do + + endif + + enddo + + a = min(exc(1,1), exc(1,2)) + b = max(exc(1,1), exc(1,2)) + c = min(exc(2,1), exc(2,2)) + d = max(exc(2,1), exc(2,2)) + if (c>a .and. cb) then + nperm = nperm + 1 + endif + end select + + phase = phase_dble(iand(nperm,1)) + +end + +subroutine get_mono_excitation_spin_new(det1,det2,exc,phase,Nint) + use bitmasks + implicit none + BEGIN_DOC + ! Returns the excitation operator between two singly excited determinants and the phase + END_DOC + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint) + integer(bit_kind), intent(in) :: det2(Nint) + integer, intent(out) :: exc(0:2,2) + double precision, intent(out) :: phase + integer :: tz + integer :: l, idx_hole, idx_particle, ishift + integer :: nperm + integer :: i,j,k,m,n + integer :: high, low + integer :: a,b,c,d + integer(bit_kind) :: hole, particle, tmp + double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /) + + ASSERT (Nint > 0) + nperm = 0 + exc(0,1) = 0 + exc(0,2) = 0 + + ishift = 1-bit_kind_size + do l=1,Nint + ishift = ishift + bit_kind_size + if (det1(l) == det2(l)) then + cycle + endif + tmp = xor( det1(l), det2(l) ) + particle = iand(tmp, det2(l)) + hole = iand(tmp, det1(l)) + if (particle /= 0_bit_kind) then + tz = trailz(particle) + exc(0,2) = 1 + exc(1,2) = tz+ishift + endif + if (hole /= 0_bit_kind) then + tz = trailz(hole) + exc(0,1) = 1 + exc(1,1) = tz+ishift + endif + + if ( iand(exc(0,1),exc(0,2)) /= 1) then ! exc(0,1)/=1 and exc(0,2) /= 1 + cycle + endif + + high = max(exc(1,1), exc(1,2))-1 + low = min(exc(1,1), exc(1,2)) + + ASSERT (low >= 0) + ASSERT (high > 0) + + k = ishft(high,-bit_kind_shift) + j = ishft(low,-bit_kind_shift) + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) + + if (j==k) then + nperm = nperm + popcnt(iand(det1(j), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) + else + nperm = nperm + popcnt( & + iand(det1(j), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + + do i=j+1,k-1 + nperm = nperm + popcnt(det1(i)) + end do + + endif + + phase = phase_dble(iand(nperm,1)) + return + + enddo +end subroutine get_double_excitation_spin(det1,det2,exc,phase,Nint) use bitmasks diff --git a/src/Determinants/spindeterminants.irp.f b/src/Determinants/spindeterminants.irp.f index b6ca1cba..1d873af8 100644 --- a/src/Determinants/spindeterminants.irp.f +++ b/src/Determinants/spindeterminants.irp.f @@ -365,8 +365,9 @@ end do k=1,N_det i = psi_bilinear_matrix_rows(k) j = psi_bilinear_matrix_columns(k) + f = 0.d0 do l=1,N_states - f = psi_bilinear_matrix_values(k,l)*psi_bilinear_matrix_values(k,l) + f += psi_bilinear_matrix_values(k,l)*psi_bilinear_matrix_values(k,l) enddo det_alpha_norm(i) += f det_beta_norm(j) += f @@ -690,7 +691,7 @@ subroutine generate_all_alpha_beta_det_products integer, external :: get_index_in_psi_det_sorted_bit integer(bit_kind), allocatable :: tmp_det(:,:,:) logical, external :: is_in_wavefunction - integer, external :: omp_get_thread_num + PROVIDE H_apply_buffer_allocated !$OMP PARALLEL DEFAULT(NONE) SHARED(psi_coef_sorted_bit,N_det_beta_unique,& !$OMP N_det_alpha_unique, N_int, psi_det_alpha_unique, psi_det_beta_unique,& @@ -712,7 +713,7 @@ subroutine generate_all_alpha_beta_det_products enddo call fill_H_apply_buffer_no_selection(l-1, tmp_det, N_int, iproc) enddo - !$OMP END DO NOWAIT + !$OMP END DO deallocate(tmp_det) !$OMP END PARALLEL call copy_H_apply_buffer_to_wf diff --git a/src/Determinants/two_body_dm_map.irp.f b/src/Determinants/two_body_dm_map.irp.f index aa8f630b..2228b1b5 100644 --- a/src/Determinants/two_body_dm_map.irp.f +++ b/src/Determinants/two_body_dm_map.irp.f @@ -187,7 +187,7 @@ subroutine add_values_to_two_body_dm_map(mask_ijkl) print*,'n_elements = ',n_elements call insert_into_two_body_dm_ab_map(n_elements,buffer_i,buffer_value,& real(mo_integrals_threshold,integral_kind)) - call map_unique(two_body_dm_ab_map) + call map_merge(two_body_dm_ab_map) deallocate(buffer_i,buffer_value) diff --git a/src/FourIdx/NEEDED_CHILDREN_MODULES b/src/FourIdx/NEEDED_CHILDREN_MODULES new file mode 100644 index 00000000..96b2cfdc --- /dev/null +++ b/src/FourIdx/NEEDED_CHILDREN_MODULES @@ -0,0 +1 @@ +ZMQ diff --git a/src/FourIdx/README.rst b/src/FourIdx/README.rst new file mode 100644 index 00000000..6ea432c6 --- /dev/null +++ b/src/FourIdx/README.rst @@ -0,0 +1,6 @@ +======= +FourIdx +======= + +Four-index transformation. + diff --git a/src/FourIdx/four_index.irp.f b/src/FourIdx/four_index.irp.f new file mode 100644 index 00000000..0c30f55e --- /dev/null +++ b/src/FourIdx/four_index.irp.f @@ -0,0 +1,180 @@ +subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:,:), U(:,:,:), V(:,:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + ! Create a temporary memory-mapped file + integer :: fd + type(c_ptr) :: c_pointer + integer*8, pointer :: a_array(:,:,:) + call mmap(trim(ezfio_filename)//'/work/four_idx', & + (/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, .False., c_pointer) + call c_f_pointer(c_pointer, a_array, (/ 4, (i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1), l_end-l_start+1 /)) + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_a,map_c,matrix_B) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx, & + !$OMP a,b,c,d,tmp) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + + !$OMP DO SCHEDULE(dynamic,4) + do l=l_start,l_end + a = 1 + do j=j_start,j_end + do k=k_start,k_end + do i=i_start,i_end + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,tmp) + if (tmp /= 0.d0) then + a = a+1 + a_array(1,a,l-l_start+1) = i + a_array(2,a,l-l_start+1) = j + a_array(3,a,l-l_start+1) = k + a_array(4,a,l-l_start+1) = transfer(dble(tmp), 1_8) + endif + enddo + enddo + enddo + a_array(1,1,l-l_start+1) = a + print *, l + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + U = 0.d0 + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + print *, d, l + + allocate( T(i_start:i_end, k_start:k_end, j_start:j_end), & + V(a_start:a_end, k_start:k_end, j_start:j_end) ) + + T = 0.d0 + do a=2,a_array(1,1,l-l_start+1) + i = a_array(1,a,l-l_start+1) + j = a_array(2,a,l-l_start+1) + k = a_array(3,a,l-l_start+1) + T(i, k,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) + enddo + + call DGEMM('T','N', (a_end-a_start+1), & + (k_end-k_start+1)*(j_end-j_start+1), & + (i_end-i_start+1), 1.d0, & + matrix_B(i_start,a_start), size(matrix_B,1), & + T(i_start,k_start,j_start), size(T,1), 0.d0, & + V(a_start,k_start,j_start), size(V, 1) ) + + deallocate(T) + allocate( T(a_start:a_end, k_start:k_end, b_start:d) ) + + call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), & + (b_end-b_start+1), & + (j_end-j_start+1), 1.d0, & + V(a_start,k_start,j_start), size(V,1)*size(V,2), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + T(a_start,k_start,b_start), size(T,1)*size(T,2) ) + + deallocate(V) + + do b=b_start,b_end + call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(a_start,k_start,b), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) + enddo + + deallocate(T) + + enddo + + idx = 0_8 + do b=b_start,b_end + do c=c_start,c_end + do a=a_start,a_end + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call map_append(map_c, key, value, idx) + call map_sort(map_c) + !$OMP END CRITICAL + + + enddo + !$OMP END DO + + deallocate(key,value) + !$OMP END PARALLEL + + call munmap( & + (/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, c_pointer) + +end diff --git a/src/FourIdx/four_index_block.irp.f b/src/FourIdx/four_index_block.irp.f new file mode 100644 index 00000000..d5929b51 --- /dev/null +++ b/src/FourIdx/four_index_block.irp.f @@ -0,0 +1,300 @@ +subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l, ik, ll + integer :: l_start_block, l_end_block, l_block + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer*8 :: ii + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + integer*4, allocatable :: a_array_ik(:) + integer*4, allocatable :: a_array_j(:) + double precision, allocatable :: a_array_value(:) + + integer*8 :: new_size + new_size = max(1024_8, 5_8 * map_a % n_elements ) + + allocate(a_array_ik(new_size), a_array_j(new_size), a_array_value(new_size)) + + integer :: ipass, npass + integer*8 :: tempspace + + tempspace = (new_size * 16_8) / (1024_8 * 1024_8) + npass = min(int(l_end-l_start,8),1_8 + tempspace / 2048_8) ! 2 GiB of scratch space + l_block = (l_end-l_start+1)/npass + + ipass = 0 + do l_start_block = l_start, l_end, l_block + ipass = ipass+1 + print *, 'Pass ', ipass + l_end_block = min(l_end, l_start_block+l_block-1) + + allocate(l_pointer(l_start_block:l_end_block+1), value((i_max*k_max)) ) + ii = 1_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + do l=l_start_block,l_end_block + !$OMP SINGLE + l_pointer(l) = ii + !$OMP END SINGLE + do j=j_start,j_end + !$OMP DO SCHEDULE(static,16) + do k=k_start,k_end + do i=i_start,k + ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,value(ik)) + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + tmp=value(ik) + if (tmp /= 0.d0) then + a_array_ik(ii) = ik + a_array_j(ii) = j + a_array_value(ii) = tmp + ii=ii+1_8 + endif + enddo + enddo + !$OMP END SINGLE + enddo + enddo + !$OMP SINGLE + a_array_ik(ii) = 0 + a_array_j(ii) = 0 + a_array_value(ii) = 0.d0 + l_pointer(l_end_block+1) = ii + !$OMP END SINGLE + !$OMP END PARALLEL + deallocate(value) + + !INPUT DATA + !open(unit=10,file='INPUT',form='UNFORMATTED') + !write(10) i_start, j_start, i_end, j_end + !write(10) a_start, b_start, a_end, b_end + !write(10) LDB, mo_tot_num + !write(10) matrix_B(1:LDB,1:mo_tot_num) + !idx=size(a_array) + !write(10) idx + !write(10) a_array + !write(10) l_pointer + !close(10) + !open(unit=10,file='OUTPUT',form='FORMATTED') + ! END INPUT DATA + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value,& + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start_block,l_end_block,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_c,matrix_B,l_pointer) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & + !$OMP a,b,c,d,tmp,T2d,V2d,ii,p,q) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + + + allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & + V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & + V(i_start:i_end, k_start:k_end), & + T(k_start:k_end, a_start:a_end)) + + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + U = 0.d0 + do l=l_start_block,l_end_block + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + + ii=l_pointer(l) + do j=j_start,j_end + !DIR$ VECTOR NONTEMPORAL + T2d(:,j) = 0.d0 + !DIR$ IVDEP + do while (j == a_array_j(ii)) + T2d(a_array_ik(ii),j) = a_array_value(ii) + ii = ii + 1_8 + enddo + enddo + + call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + T2d(1,j_start), size(T2d,1), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + V2d(1,b_start), size(V2d,1) ) + + do b=b_start,d + ik = 0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + V(i,k) = V2d(ik,b) + enddo + enddo + + ! T = 0.d0 + ! do a=a_start,b + ! do k=k_start,k_end + ! do i=i_start,k + ! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a) + ! enddo + ! do i=k+1,i_end + ! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) + ! enddo + ! enddo + ! enddo + call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), & + 1.d0, & + V(i_start,k_start), size(V,1), & + matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & + T(k_start,a_start), size(T,1) ) + + ! do c=c_start,b + ! do a=a_start,c + ! do k=k_start,k_end + ! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) + ! enddo + ! enddo + ! enddo + call DGEMM('T','N', (b-a_start+1), (b-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) + ! do c=b+1,c_end + ! do a=a_start,b + ! do k=k_start,k_end + ! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) + ! enddo + ! enddo + ! enddo + if (b < b_end) then + call DGEMM('T','N', (b-a_start+1), (c_end-b), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, & + U(a_start,b+1,b), size(U,1) ) + endif + enddo + + enddo + + idx = 0_8 + + integer :: p, q + do b=b_start,d + q = b+ishft(d*d-d,-1) + do c=c_start,c_end + p = a_start+ishft(c*c-c,-1) + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + if ((a==b).and.(p>q)) cycle + p = p+1 + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) +!print *, int(key(idx),4), int(a,2),int(b,2),int(c,2),int(d,2), p, q + value(idx) = U(a,c,b) + enddo + enddo + enddo + + + + + + + !$OMP CRITICAL + call map_update(map_c, key, value, idx,1.d-15) + !$OMP END CRITICAL + + !WRITE OUTPUT + ! OMP CRITICAL + !print *, d + !do b=b_start,d + ! do c=c_start,c_end + ! do a=a_start,min(b,c) + ! if (dabs(U(a,c,b)) < 1.d-15) then + ! cycle + ! endif + ! write(10,*) d,c,b,a,U(a,c,b) + ! enddo + ! enddo + !enddo + ! OMP END CRITICAL + !END WRITE OUTPUT + + + enddo + !$OMP END DO + + deallocate(key,value,V,T) + !$OMP END PARALLEL + call map_merge(map_c) + + deallocate(l_pointer) + enddo + deallocate(a_array_ik,a_array_j,a_array_value) + +end diff --git a/src/FourIdx/four_index_slave.irp.f.todo b/src/FourIdx/four_index_slave.irp.f.todo new file mode 100644 index 00000000..47124823 --- /dev/null +++ b/src/FourIdx/four_index_slave.irp.f.todo @@ -0,0 +1,279 @@ +subroutine four_index_transform_slave(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end, task_id, thread ) + implicit none + use f77_zmq + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + integer, intent(in) :: task_id, thread + + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l, ik, ll + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer*8 :: ii + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + integer*4, allocatable :: a_array_ik(:) + integer*2, allocatable :: a_array_j(:) + double precision, allocatable :: a_array_value(:) + + integer*8 :: new_size + new_size = max(1024_8, 5_8 * map_a % n_elements ) + + allocate(a_array_ik(new_size), a_array_j(new_size), a_array_value(new_size)) + + + allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) ) + ii = 1_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + do l=l_start,l_end + !$OMP SINGLE + l_pointer(l) = ii + !$OMP END SINGLE + do j=j_start,j_end + !$OMP DO SCHEDULE(static,1) + do k=k_start,k_end + do i=i_start,k + ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,value(ik)) + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + tmp=value(ik) + if (tmp /= 0.d0) then + a_array_ik(ii) = ik + a_array_j(ii) = j + a_array_value(ii) = tmp + ii=ii+1_8 + endif + enddo + enddo + !$OMP END SINGLE + enddo + enddo + !$OMP SINGLE + a_array_ik(ii) = 0 + a_array_j(ii) = 0 + a_array_value(ii) = 0.d0 + l_pointer(l_end+1) = ii + !$OMP END SINGLE + !$OMP END PARALLEL + deallocate(value) + +!INPUT DATA +!open(unit=10,file='INPUT',form='UNFORMATTED') +!write(10) i_start, j_start, i_end, j_end +!write(10) a_start, b_start, a_end, b_end +!write(10) LDB, mo_tot_num +!write(10) matrix_B(1:LDB,1:mo_tot_num) +!idx=size(a_array) +!write(10) idx +!write(10) a_array +!write(10) l_pointer +!close(10) +!open(unit=10,file='OUTPUT',form='FORMATTED') +! END INPUT DATA + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_c,matrix_B,l_pointer) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & + !$OMP a,b,c,d,tmp,T2d,V2d,ii) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + integer(ZMQ_PTR) :: zmq_socket_push + zmq_socket_push = new_zmq_push_socket(thread) + + + + allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & + V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & + V(i_start:i_end, k_start:k_end), & + T(k_start:k_end, a_start:a_end)) + + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + U = 0.d0 + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + + ii=l_pointer(l) + do j=j_start,j_end + !DIR$ VECTOR NONTEMPORAL + T2d(:,j) = 0.d0 + !DIR$ IVDEP + do while (j == a_array_j(ii)) + T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0) + ii = ii + 1_8 + enddo + enddo + + call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + T2d(1,j_start), size(T2d,1), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + V2d(1,b_start), size(V2d,1) ) + + do b=b_start,d + ik = 0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + V(i,k) = V2d(ik,b) + enddo + enddo + +! T = 0.d0 +! do a=a_start,b +! do k=k_start,k_end +! do i=i_start,k +! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a) +! enddo +! do i=k+1,i_end +! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) +! enddo +! enddo +! enddo + call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), & + 1.d0, & + V(i_start,k_start), size(V,1), & + matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & + T(k_start,a_start), size(T,1) ) + +! do c=c_start,b +! do a=a_start,c +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + call DGEMM('T','N', (b-a_start+1), (b-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) +! do c=b+1,c_end +! do a=a_start,b +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + if (b < b_end) then + call DGEMM('T','N', (b-a_start+1), (c_end-b), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, & + U(a_start,b+1,b), size(U,1) ) + endif + enddo + + enddo + + idx = 0_8 + do b=b_start,d + do c=c_start,c_end + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call four_idx_push_results(zmq_socket_push, key, value, idx, task_id) + !$OMP END CRITICAL + +!WRITE OUTPUT +! OMP CRITICAL +!print *, d +!do b=b_start,d +! do c=c_start,c_end +! do a=a_start,min(b,c) +! if (dabs(U(a,c,b)) < 1.d-15) then +! cycle +! endif +! write(10,*) d,c,b,a,U(a,c,b) +! enddo +! enddo +!enddo +! OMP END CRITICAL +!END WRITE OUTPUT + + + enddo + !$OMP END DO + call end_zmq_push_socket(zmq_socket_push,thread) + deallocate(key,value,V,T) + !$OMP END PARALLEL + call map_merge(map_c) + + deallocate(l_pointer) + deallocate(a_array_ik,a_array_j,a_array_value) + +end diff --git a/src/FourIdx/four_index_sym.irp.f b/src/FourIdx/four_index_sym.irp.f new file mode 100644 index 00000000..79c8d1d3 --- /dev/null +++ b/src/FourIdx/four_index_sym.irp.f @@ -0,0 +1,293 @@ +subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l, ik, ll + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer*8 :: ii + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + ! Create a temporary memory-mapped file + integer :: fd(3) + type(c_ptr) :: c_pointer(3) + integer*4, pointer :: a_array_ik(:) + integer*2, pointer :: a_array_j(:) + double precision, pointer :: a_array_value(:) + + integer*8 :: new_size + new_size = max(1024_8, 5_8 * map_a % n_elements ) + + call mmap(trim(ezfio_filename)//'/work/four_idx_ik', (/ new_size /), 4, fd(1), .False., c_pointer(1)) + call c_f_pointer(c_pointer(1), a_array_ik, (/ new_size /)) + + call mmap(trim(ezfio_filename)//'/work/four_idx_j', (/ new_size /), 2, fd(2), .False., c_pointer(2)) + call c_f_pointer(c_pointer(2), a_array_j, (/ new_size /)) + + call mmap(trim(ezfio_filename)//'/work/four_idx_value', (/ new_size /), 8, fd(3), .False., c_pointer(3)) + call c_f_pointer(c_pointer(3), a_array_value, (/ new_size /)) + + print *, 'Transforming MO integrals' + allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) ) + ii = 1_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + do l=l_start,l_end + !$OMP SINGLE + l_pointer(l) = ii + !$OMP END SINGLE + do j=j_start,j_end + !$OMP DO SCHEDULE(static,1) + do k=k_start,k_end + do i=i_start,k + ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,value(ik)) + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + tmp=value(ik) + if (tmp /= 0.d0) then + a_array_ik(ii) = ik + a_array_j(ii) = j + a_array_value(ii) = tmp + ii=ii+1_8 + endif + enddo + enddo + !$OMP END SINGLE + enddo + enddo + !$OMP SINGLE + a_array_ik(ii) = 0 + a_array_j(ii) = 0 + a_array_value(ii) = 0.d0 + l_pointer(l_end+1) = ii + !$OMP END SINGLE + !$OMP END PARALLEL + deallocate(value) + +!INPUT DATA +!open(unit=10,file='INPUT',form='UNFORMATTED') +!write(10) i_start, j_start, i_end, j_end +!write(10) a_start, b_start, a_end, b_end +!write(10) LDB, mo_tot_num +!write(10) matrix_B(1:LDB,1:mo_tot_num) +!idx=size(a_array) +!write(10) idx +!write(10) a_array +!write(10) l_pointer +!close(10) +!open(unit=10,file='OUTPUT',form='FORMATTED') +! END INPUT DATA + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value,c_pointer,fd, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_c,matrix_B,l_pointer) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & + !$OMP a,b,c,d,tmp,T2d,V2d,ii) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + + + allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & + V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & + V(i_start:i_end, k_start:k_end), & + T(k_start:k_end, a_start:a_end)) + + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + print *, d, '/', d_end + U = 0.d0 + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + + ii=l_pointer(l) + do j=j_start,j_end + !DIR$ VECTOR NONTEMPORAL + T2d(:,j) = 0.d0 + !DIR$ IVDEP + do while (j == a_array_j(ii)) + T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0) + ii = ii + 1_8 + enddo + enddo + + call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + T2d(1,j_start), size(T2d,1), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + V2d(1,b_start), size(V2d,1) ) + + do b=b_start,d + ik = 0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + V(i,k) = V2d(ik,b) + enddo + enddo + +! T = 0.d0 +! do a=a_start,b +! do k=k_start,k_end +! do i=i_start,k +! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a) +! enddo +! do i=k+1,i_end +! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) +! enddo +! enddo +! enddo + call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), & + 1.d0, & + V(i_start,k_start), size(V,1), & + matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & + T(k_start,a_start), size(T,1) ) + +! do c=c_start,b +! do a=a_start,c +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + call DGEMM('T','N', (b-a_start+1), (b-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) +! do c=b+1,c_end +! do a=a_start,b +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + if (b < b_end) then + call DGEMM('T','N', (b-a_start+1), (c_end-b), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, & + U(a_start,b+1,b), size(U,1) ) + endif + enddo + + enddo + + idx = 0_8 + do b=b_start,d + do c=c_start,c_end + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call map_update(map_c, key, value, idx,1.d-15) + !$OMP END CRITICAL + +!WRITE OUTPUT +! OMP CRITICAL +!print *, d +!do b=b_start,d +! do c=c_start,c_end +! do a=a_start,min(b,c) +! if (dabs(U(a,c,b)) < 1.d-15) then +! cycle +! endif +! write(10,*) d,c,b,a,U(a,c,b) +! enddo +! enddo +!enddo +! OMP END CRITICAL +!END WRITE OUTPUT + + + enddo + !$OMP END DO + + deallocate(key,value,V,T) + !$OMP END PARALLEL + call map_merge(map_c) + + call munmap( (/ new_size /), 4, fd(1), c_pointer(1)) + open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_ik') + close(10,status='DELETE') + call munmap( (/ new_size /), 2, fd(2), c_pointer(2)) + open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_j') + close(10,status='DELETE') + call munmap( (/ new_size /), 8, fd(3), c_pointer(3)) + open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_value') + close(10,status='DELETE') + deallocate(l_pointer) + +end diff --git a/src/FourIdx/four_index_sym_mmap.irp.f b/src/FourIdx/four_index_sym_mmap.irp.f new file mode 100644 index 00000000..877daf30 --- /dev/null +++ b/src/FourIdx/four_index_sym_mmap.irp.f @@ -0,0 +1,292 @@ +subroutine four_index_transform_sym_mmap(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l, ik, ll + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer*8 :: ii + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + ! Create a temporary memory-mapped file + integer :: fd(3) + type(c_ptr) :: c_pointer(3) + integer*4, pointer :: a_array_ik(:) + integer*2, pointer :: a_array_j(:) + double precision, pointer :: a_array_value(:) + + integer*8 :: new_size + new_size = max(1024_8, 5_8 * map_a % n_elements ) + + call mmap(trim(ezfio_filename)//'/work/four_idx_ik', (/ new_size /), 4, fd(1), .False., c_pointer(1)) + call c_f_pointer(c_pointer(1), a_array_ik, (/ new_size /)) + + call mmap(trim(ezfio_filename)//'/work/four_idx_j', (/ new_size /), 2, fd(2), .False., c_pointer(2)) + call c_f_pointer(c_pointer(2), a_array_j, (/ new_size /)) + + call mmap(trim(ezfio_filename)//'/work/four_idx_value', (/ new_size /), 8, fd(3), .False., c_pointer(3)) + call c_f_pointer(c_pointer(3), a_array_value, (/ new_size /)) + + print *, 'Transforming MO integrals' + allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) ) + ii = 1_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + do l=l_start,l_end + !$OMP SINGLE + l_pointer(l) = ii + !$OMP END SINGLE + do j=j_start,j_end + !$OMP DO SCHEDULE(static,1) + do k=k_start,k_end + do i=i_start,k + ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,value(ik)) + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + tmp=value(ik) + if (tmp /= 0.d0) then + a_array_ik(ii) = ik + a_array_j(ii) = j + a_array_value(ii) = tmp + ii=ii+1_8 + endif + enddo + enddo + !$OMP END SINGLE + enddo + enddo + !$OMP SINGLE + a_array_ik(ii) = 0 + a_array_j(ii) = 0 + a_array_value(ii) = 0.d0 + l_pointer(l_end+1) = ii + !$OMP END SINGLE + !$OMP END PARALLEL + deallocate(value) + +!INPUT DATA +!open(unit=10,file='INPUT',form='UNFORMATTED') +!write(10) i_start, j_start, i_end, j_end +!write(10) a_start, b_start, a_end, b_end +!write(10) LDB, mo_tot_num +!write(10) matrix_B(1:LDB,1:mo_tot_num) +!idx=size(a_array) +!write(10) idx +!write(10) a_array +!write(10) l_pointer +!close(10) +!open(unit=10,file='OUTPUT',form='FORMATTED') +! END INPUT DATA + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value,c_pointer,fd, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_c,matrix_B,l_pointer) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & + !$OMP a,b,c,d,tmp,T2d,V2d,ii) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + + + allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & + V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & + V(i_start:i_end, k_start:k_end), & + T(k_start:k_end, a_start:a_end)) + + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + print *, d, '/', d_end + U = 0.d0 + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + + ii=l_pointer(l) + do j=j_start,j_end + !DIR$ VECTOR NONTEMPORAL + T2d(:,j) = 0.d0 + !DIR$ IVDEP + do while (j == a_array_j(ii)) + T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0) + ii = ii + 1_8 + enddo + enddo + call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + T2d(1,j_start), size(T2d,1), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + V2d(1,b_start), size(V2d,1) ) + + do b=b_start,d + ik = 0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + V(i,k) = V2d(ik,b) + enddo + enddo + +! T = 0.d0 +! do a=a_start,b +! do k=k_start,k_end +! do i=i_start,k +! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a) +! enddo +! do i=k+1,i_end +! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) +! enddo +! enddo +! enddo + call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), & + 1.d0, & + V(i_start,k_start), size(V,1), & + matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & + T(k_start,a_start), size(T,1) ) + +! do c=c_start,b +! do a=a_start,c +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + call DGEMM('T','N', (b-a_start+1), (b-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) +! do c=b+1,c_end +! do a=a_start,b +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + if (b < b_end) then + call DGEMM('T','N', (b-a_start+1), (c_end-b), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, & + U(a_start,b+1,b), size(U,1) ) + endif + enddo + + enddo + + idx = 0_8 + do b=b_start,d + do c=c_start,c_end + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call map_append(map_c, key, value, idx) + !$OMP END CRITICAL + +!WRITE OUTPUT +! OMP CRITICAL +!print *, d +!do b=b_start,d +! do c=c_start,c_end +! do a=a_start,min(b,c) +! if (dabs(U(a,c,b)) < 1.d-15) then +! cycle +! endif +! write(10,*) d,c,b,a,U(a,c,b) +! enddo +! enddo +!enddo +! OMP END CRITICAL +!END WRITE OUTPUT + + + enddo + !$OMP END DO + + deallocate(key,value,V,T) + !$OMP END PARALLEL + call map_sort(map_c) + + call munmap( (/ new_size /), 4, fd(1), c_pointer(1)) + open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_ik') + close(10,status='DELETE') + call munmap( (/ new_size /), 2, fd(2), c_pointer(2)) + open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_j') + close(10,status='DELETE') + call munmap( (/ new_size /), 8, fd(3), c_pointer(3)) + open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_value') + close(10,status='DELETE') + deallocate(l_pointer) + +end diff --git a/src/FourIdx/four_index_zmq.irp.f.todo b/src/FourIdx/four_index_zmq.irp.f.todo new file mode 100644 index 00000000..b2f639a7 --- /dev/null +++ b/src/FourIdx/four_index_zmq.irp.f.todo @@ -0,0 +1,273 @@ +subroutine four_index_transform_zmq(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use f77_zmq + use map_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l, ik, ll + integer :: l_start_block, l_end_block, l_block + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer*8 :: ii + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + call new_parallel_job(zmq_to_qp_run_socket,'four_idx') + + integer*8 :: new_size + new_size = max(1024_8, 5_8 * map_a % n_elements ) + + integer :: npass + integer*8 :: tempspace + + tempspace = (new_size * 14_8) / (1024_8 * 1024_8) + npass = min(l_end-l_start,1 + tempspace / 2048) ! 2 GiB of scratch space + l_block = (l_end-l_start)/npass + + ! Create tasks + ! ============ + + character(len=64), allocatable :: task + + do l_start_block = l_start, l_end, l_block + l_end_block = min(l_end, l_start_block+l_block-1) + write(task,'I10,X,I10') l_start_block, l_end_block + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) + enddo + + call zmq_set_running(zmq_to_qp_run_socket) + + PROVIDE nproc + + call omp_set_nested(.True.) + integer :: ithread + !$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread) + ithread = omp_get_thread_num() + if (ithread==0) then + call four_idx_collector(zmq_to_qp_run_socket,map_c) + else + !TODO : Put strings of map_a and matrix_b on server and broadcast + call four_index_transform_slave_inproc(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start_block, & + i_end , j_end , k_end , l_end_block , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end, 1 ) + endif + !$OMP END PARALLEL + + call end_parallel_job(zmq_to_qp_run_socket, 'four_idx') + + +end + + +subroutine four_idx_slave_work(zmq_to_qp_run_socket, worker_id) + use f77_zmq + implicit none + + integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket + integer,intent(in) :: worker_id + integer :: task_id + character*(512) :: msg + + integer :: i_start, j_start, k_start, l_start_block + integer :: i_end , j_end , k_end , l_end_block + integer :: a_start, b_start, c_start, d_start + integer :: a_end , b_end , c_end , d_end + + !TODO : get map_a and matrix_B from server + do + call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg) + if(task_id == 0) exit + read (msg,*) LDB, & + i_start, j_start, k_start, l_start_block, & + i_end , j_end , k_end , l_end_block , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end + + call four_index_transform_slave(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start_block, & + i_end , j_end , k_end , l_end_block , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end, zmq_to_qp_run_socket, & + task_id) + call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) + + enddo +end + + +BEGIN_PROVIDER [ integer, nthreads_four_idx ] + implicit none + BEGIN_DOC + ! Number of threads for 4-index transformation + END_DOC + nthreads_four_idx = nproc + character*(32) :: env + call getenv('NTHREADS_FOUR_IDX',env) + if (trim(env) /= '') then + read(env,*) nthreads_four_idx + endif + call write_int(6,nthreads_davidson,'Number of threads for 4-index transformation') +END_PROVIDER + + + +subroutine four_idx_collector(zmq_to_qp_run_socket,map_c) + use f77_zmq + use map_module + implicit none + type(map_type), intent(inout) :: map_c + + integer :: more + integer(ZMQ_PTR), external :: new_zmq_pull_socket + integer(ZMQ_PTR) :: zmq_socket_pull + + + more = 1 + zmq_socket_pull = new_zmq_pull_socket() + + do while (more == 1) + call four_idx_pull_results(zmq_socket_pull, map_c, task_id) + call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) + enddo + + call end_zmq_pull_socket(zmq_socket_pull) + +end + + +subroutine four_idx_pull_results(zmq_socket_pull, map_c, task_id) + use f77_zmq + use map_module + implicit none + type(map_type), intent(inout) :: map_c + integer(ZMQ_PTR), intent(inout) :: zmq_socket_pull + + integer, intent(out) :: task_id + + integer :: rc, sze + integer*8 :: rc8 + + + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) + if(rc /= 4) stop "four_idx_pull_results failed to pull task_id" + + rc = f77_zmq_recv( zmq_socket_pull, sze, 4, 0) + if(rc /= 4) stop "four_idx_pull_results failed to pull sze" + + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + + allocate(key(sze), value(sze)) + + rc8 = f77_zmq_recv8( zmq_socket_pull, key, key_kind*sze, 0) + if(rc8 /= key_kind*sze) stop "four_idx_pull_results failed to pull key" + + rc8 = f77_zmq_recv8( zmq_socket_pull, value, integral_kind*sze, 0) + if(rc8 /= integral_kind*sze) stop "four_idx_pull_results failed to pull value" + +! Activate if zmq_socket_pull is a REP +IRP_IF ZMQ_PUSH +IRP_ELSE + rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0) + if (rc /= 4) then + print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...' + stop 'error' + endif +IRP_ENDIF + + call map_update(map_c, key, value, sze, 1.d-15) ! TODO : threshold + + deallocate(key, value) +end + + + +subroutine four_idx_push_results(zmq_socket_push, key, value, sze, task_id) + use f77_zmq + use map_module + implicit none + integer, intent(in) :: sze + integer(key_kind), intent(in) :: key(sze) + real(integral_kind), intent(in) :: value(sze) + integer(ZMQ_PTR), intent(in) :: zmq_socket_push + integer, intent(in) :: task_id + + integer :: rc, sze + integer*8 :: rc8 + + + rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "four_idx_push_results failed to push task_id" + + rc = f77_zmq_send( zmq_socket_push, sze, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "four_idx_push_results failed to push sze" + + rc8 = f77_zmq_send8( zmq_socket_push, key, key_kind*sze, ZMQ_SNDMORE) + if(rc8 /= key_kind*sze) stop "four_idx_push_results failed to push key" + + rc8 = f77_zmq_send8( zmq_socket_push, value, integral_kind*sze, 0) + if(rc8 /= integral_kind*sze) stop "four_idx_push_results failed to push value" + +! Activate if zmq_socket_push is a REP +IRP_IF ZMQ_PUSH +IRP_ELSE + rc = f77_zmq_send( zmq_socket_push, 0, 4, 0) + if (rc /= 4) then + print *, irp_here, ' : f77_zmq_send (zmq_socket_push,...' + stop 'error' + endif +IRP_ENDIF + +end + + diff --git a/src/Integrals_Bielec/NEEDED_CHILDREN_MODULES b/src/Integrals_Bielec/NEEDED_CHILDREN_MODULES index 152711f3..245e3014 100644 --- a/src/Integrals_Bielec/NEEDED_CHILDREN_MODULES +++ b/src/Integrals_Bielec/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -Pseudo Bitmask ZMQ +Pseudo Bitmask ZMQ FourIdx diff --git a/src/Integrals_Bielec/map_integrals.irp.f b/src/Integrals_Bielec/map_integrals.irp.f index 996f8464..3d3d2a9b 100644 --- a/src/Integrals_Bielec/map_integrals.irp.f +++ b/src/Integrals_Bielec/map_integrals.irp.f @@ -179,7 +179,6 @@ double precision function get_ao_bielec_integral(i,j,k,l,map) result(result) call bielec_integrals_index(i,j,k,l,idx) !DIR$ FORCEINLINE call map_get(map,idx,tmp) - tmp = tmp else ii = l-ao_integrals_cache_min ii = ior( ishft(ii,6), k-ao_integrals_cache_min) @@ -336,7 +335,7 @@ end ! Min and max values of the MOs for which the integrals are in the cache END_DOC mo_integrals_cache_min_8 = max(1_8,elec_alpha_num - 63_8) - mo_integrals_cache_max_8 = min(int(mo_tot_num,8),mo_integrals_cache_min+127_8) + mo_integrals_cache_max_8 = min(int(mo_tot_num,8),mo_integrals_cache_min_8+127_8) mo_integrals_cache_min = max(1,elec_alpha_num - 63) mo_integrals_cache_max = min(mo_tot_num,mo_integrals_cache_min+127) diff --git a/src/Integrals_Bielec/mo_bi_integrals.irp.f b/src/Integrals_Bielec/mo_bi_integrals.irp.f index 05eb8dff..2fc77219 100644 --- a/src/Integrals_Bielec/mo_bi_integrals.irp.f +++ b/src/Integrals_Bielec/mo_bi_integrals.irp.f @@ -117,7 +117,17 @@ BEGIN_PROVIDER [ logical, mo_bielec_integrals_in_map ] endif else - call add_integrals_to_map(full_ijkl_bitmask_4) +! call add_integrals_to_map(full_ijkl_bitmask_4) + + call four_index_transform_block(ao_integrals_map,mo_integrals_map, & + mo_coef, size(mo_coef,1), & + 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & + 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) + + integer*8 :: get_mo_map_size, mo_map_size + mo_map_size = get_mo_map_size() + + print*,'Molecular integrals provided' endif if (write_mo_integrals) then call ezfio_set_work_empty(.False.) @@ -146,7 +156,7 @@ subroutine set_integrals_jj_into_map enddo call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& real(mo_integrals_threshold,integral_kind)) - call map_unique(mo_integrals_map) + call map_merge(mo_integrals_map) end subroutine set_integrals_exchange_jj_into_map @@ -167,7 +177,7 @@ subroutine set_integrals_exchange_jj_into_map enddo call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& real(mo_integrals_threshold,integral_kind)) - call map_unique(mo_integrals_map) + call map_merge(mo_integrals_map) end @@ -458,7 +468,7 @@ subroutine add_integrals_to_map(mask_ijkl) real(mo_integrals_threshold,integral_kind)) deallocate(buffer_i, buffer_value) !$OMP END PARALLEL - call map_unique(mo_integrals_map) + call map_merge(mo_integrals_map) call wall_time(wall_2) call cpu_time(cpu_2) @@ -773,7 +783,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) real(mo_integrals_threshold,integral_kind)) deallocate(buffer_i, buffer_value) !$OMP END PARALLEL - call map_unique(mo_integrals_map) + call map_merge(mo_integrals_map) call wall_time(wall_2) call cpu_time(cpu_2) @@ -1035,7 +1045,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl) ! print*, 'Communicating the map' ! call communicate_mo_integrals() !IRP_ENDIF - call map_unique(mo_integrals_map) + call map_merge(mo_integrals_map) call wall_time(wall_2) call cpu_time(cpu_2) diff --git a/src/Integrals_Monoelec/kin_ao_ints.irp.f b/src/Integrals_Monoelec/kin_ao_ints.irp.f index 6cb2aa49..d6d09fbc 100644 --- a/src/Integrals_Monoelec/kin_ao_ints.irp.f +++ b/src/Integrals_Monoelec/kin_ao_ints.irp.f @@ -1,6 +1,6 @@ - BEGIN_PROVIDER [ double precision, ao_deriv2_x,(ao_num_align,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_deriv2_y,(ao_num_align,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_deriv2_z,(ao_num_align,ao_num) ] + BEGIN_PROVIDER [ double precision, ao_deriv2_x,(ao_num,ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_deriv2_y,(ao_num,ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_deriv2_z,(ao_num,ao_num) ] implicit none integer :: i,j,n,l double precision :: f @@ -45,8 +45,6 @@ power_A(1) = ao_power( j, 1 ) power_A(2) = ao_power( j, 2 ) power_A(3) = ao_power( j, 3 ) - !DEC$ VECTOR ALIGNED - !DEC$ VECTOR ALWAYS do i= 1,ao_num ao_deriv2_x(i,j)= 0.d0 ao_deriv2_y(i,j)= 0.d0 @@ -59,7 +57,6 @@ power_B(3) = ao_power( i, 3 ) do n = 1,ao_prim_num(j) alpha = ao_expo_ordered_transp(n,j) - !DEC$ VECTOR ALIGNED do l = 1, ao_prim_num(i) beta = ao_expo_ordered_transp(l,i) call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x0,overlap_y0,overlap_z0,overlap,dim1) @@ -122,7 +119,7 @@ END_PROVIDER -BEGIN_PROVIDER [double precision, ao_kinetic_integral, (ao_num_align,ao_num)] +BEGIN_PROVIDER [double precision, ao_kinetic_integral, (ao_num,ao_num)] implicit none BEGIN_DOC ! array of the priminitve basis kinetic integrals @@ -131,27 +128,23 @@ BEGIN_PROVIDER [double precision, ao_kinetic_integral, (ao_num_align,ao_num)] integer :: i,j,k,l if (read_ao_one_integrals) then - call ezfio_get_ao_basis_integral_kinetic(ao_kinetic_integral(1:ao_num, 1:ao_num)) - call ezfio_set_ao_basis_integral_kinetic(ao_kinetic_integral(1:ao_num, 1:ao_num)) + call read_one_e_integrals('ao_kinetic_integral', ao_kinetic_integral,& + size(ao_kinetic_integral,1), size(ao_kinetic_integral,2)) print *, 'AO kinetic integrals read from disk' else !$OMP PARALLEL DO DEFAULT(NONE) & !$OMP PRIVATE(i,j) & - !$OMP SHARED(ao_num, ao_num_align, ao_kinetic_integral,ao_deriv2_x,ao_deriv2_y,ao_deriv2_z) + !$OMP SHARED(ao_num, ao_kinetic_integral,ao_deriv2_x,ao_deriv2_y,ao_deriv2_z) do j = 1, ao_num - !DEC$ VECTOR ALWAYS - !DEC$ VECTOR ALIGNED do i = 1, ao_num ao_kinetic_integral(i,j) = -0.5d0 * (ao_deriv2_x(i,j) + ao_deriv2_y(i,j) + ao_deriv2_z(i,j) ) enddo - do i = ao_num +1,ao_num_align - ao_kinetic_integral(i,j) = 0.d0 - enddo enddo !$OMP END PARALLEL DO endif if (write_ao_one_integrals) then - call ezfio_set_ao_basis_integral_kinetic(ao_kinetic_integral(1:ao_num, 1:ao_num)) + call write_one_e_integrals('ao_kinetic_integral', ao_kinetic_integral,& + size(ao_kinetic_integral,1), size(ao_kinetic_integral,2)) print *, 'AO kinetic integrals written to disk' endif END_PROVIDER diff --git a/src/Integrals_Monoelec/pot_ao_ints.irp.f b/src/Integrals_Monoelec/pot_ao_ints.irp.f index 7116d2c7..22869c4c 100644 --- a/src/Integrals_Monoelec/pot_ao_ints.irp.f +++ b/src/Integrals_Monoelec/pot_ao_ints.irp.f @@ -1,4 +1,4 @@ -BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral, (ao_num_align,ao_num)] +BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral, (ao_num,ao_num)] BEGIN_DOC ! interaction nuclear electron END_DOC @@ -11,7 +11,8 @@ BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral, (ao_num_align,ao_num)] double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult if (read_ao_one_integrals) then - call ezfio_get_ao_basis_integral_nuclear(ao_nucl_elec_integral(1:ao_num, 1:ao_num)) + call read_one_e_integrals('ao_ne_integral', ao_nucl_elec_integral, & + size(ao_nucl_elec_integral,1), size(ao_nucl_elec_integral,2)) print *, 'AO N-e integrals read from disk' else @@ -73,14 +74,15 @@ BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral, (ao_num_align,ao_num)] !$OMP END PARALLEL endif if (write_ao_one_integrals) then - call ezfio_set_ao_basis_integral_nuclear(ao_nucl_elec_integral(1:ao_num, 1:ao_num)) + call write_one_e_integrals('ao_ne_integral', ao_nucl_elec_integral, & + size(ao_nucl_elec_integral,1), size(ao_nucl_elec_integral,2)) print *, 'AO N-e integrals written to disk' endif END_PROVIDER - BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral_per_atom, (ao_num_align,ao_num,nucl_num)] + BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral_per_atom, (ao_num,ao_num,nucl_num)] BEGIN_DOC ! ao_nucl_elec_integral_per_atom(i,j,k) = - ! where Rk is the geometry of the kth atom diff --git a/src/Utils/map_functions.irp.f b/src/Utils/map_functions.irp.f index de7f66d7..c7ea6938 100644 --- a/src/Utils/map_functions.irp.f +++ b/src/Utils/map_functions.irp.f @@ -46,8 +46,8 @@ subroutine map_save_to_disk(filename,map) enddo deallocate(map % map(i) % value) deallocate(map % map(i) % key) - map % map(i) % value => map % consolidated_value ( map % consolidated_idx (i+1) :) - map % map(i) % key => map % consolidated_key ( map % consolidated_idx (i+1) :) + map % map(i) % value => map % consolidated_value ( map % consolidated_idx (i+1_8) :) + map % map(i) % key => map % consolidated_key ( map % consolidated_idx (i+1_8) :) enddo map % consolidated_idx (map % map_size + 2_8) = k map % consolidated = .True. @@ -82,7 +82,7 @@ subroutine map_load_from_disk(filename,map) call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .True., c_pointer(1)) call c_f_pointer(c_pointer(1),map % consolidated_idx, (/ map % map_size + 2_8/)) - map% n_elements = map % consolidated_idx (map % map_size+2_8)-1 + map% n_elements = map % consolidated_idx (map % map_size+2_8)-1_8 call mmap(trim(filename)//'_consolidated_key', (/ map % n_elements /), cache_key_kind, fd(2), .True., c_pointer(2)) call c_f_pointer(c_pointer(2),map % consolidated_key, (/ map % n_elements /)) @@ -96,11 +96,11 @@ subroutine map_load_from_disk(filename,map) do i=0_8, map % map_size deallocate(map % map(i) % value) deallocate(map % map(i) % key) - map % map(i) % value => map % consolidated_value ( map % consolidated_idx (i+1) :) - map % map(i) % key => map % consolidated_key ( map % consolidated_idx (i+1) :) + map % map(i) % value => map % consolidated_value ( map % consolidated_idx (i+1_8) :) + map % map(i) % key => map % consolidated_key ( map % consolidated_idx (i+1_8) :) map % map(i) % sorted = .True. - n_elements = int( map % consolidated_idx (i+2) - k, 4) - k = map % consolidated_idx (i+2) + n_elements = int( map % consolidated_idx (i+2_8) - k, 4) + k = map % consolidated_idx (i+2_8) map % map(i) % map_size = n_elements map % map(i) % n_elements = n_elements ! Load memory from disk @@ -116,7 +116,7 @@ subroutine map_load_from_disk(filename,map) enddo enddo map % sorted = x>0 .or. l == 0_8 - map % n_elements = k-1 + map % n_elements = k-1_8 map % sorted = map % sorted .or. .True. map % consolidated = .True. diff --git a/src/Utils/map_module.f90 b/src/Utils/map_module.f90 index ac16f97e..29f7440c 100644 --- a/src/Utils/map_module.f90 +++ b/src/Utils/map_module.f90 @@ -13,7 +13,7 @@ module map_module ! cache_map using a binary search ! ! When using the map_update subroutine to build the map, -! the map_unique subroutine +! the map_merge subroutine ! should be called before getting data from the map. use omp_lib @@ -274,7 +274,7 @@ subroutine map_sort(map) end -subroutine cache_map_unique(map) +subroutine cache_map_merge(map) use map_module implicit none type (cache_map_type), intent(inout) :: map @@ -298,6 +298,28 @@ subroutine cache_map_unique(map) end +subroutine cache_map_unique(map) + use map_module + implicit none + type (cache_map_type), intent(inout) :: map + integer(cache_key_kind) :: prev_key + integer(cache_map_size_kind) :: i, j + + call cache_map_sort(map) + prev_key = -1_8 + j=0 + do i=1,map%n_elements + if (map%key(i) /= prev_key) then + j = j+1 + map%value(j) = map%value(i) + map%key(j) = map%key(i) + prev_key = map%key(i) + endif + enddo + map%n_elements = j + +end + subroutine cache_map_shrink(map,thr) use map_module implicit none @@ -338,6 +360,27 @@ subroutine map_unique(map) end +subroutine map_merge(map) + use map_module + implicit none + type (map_type), intent(inout) :: map + integer(map_size_kind) :: i + integer(map_size_kind) :: icount + + icount = 0_8 + !$OMP PARALLEL DO SCHEDULE(dynamic,1000) DEFAULT(SHARED) PRIVATE(i)& + !$OMP REDUCTION(+:icount) + do i=0_8,map%map_size + call omp_set_lock(map%map(i)%lock) + call cache_map_merge(map%map(i)) + call omp_unset_lock(map%map(i)%lock) + icount = icount + map%map(i)%n_elements + enddo + !$OMP END PARALLEL DO + map%n_elements = icount + +end + subroutine map_shrink(map,thr) use map_module implicit none @@ -402,7 +445,7 @@ subroutine map_update(map, key, value, sze, thr) else ! Assert that the map has a proper size if (local_map%n_elements == local_map%map_size) then - call cache_map_unique(local_map) + call cache_map_merge(local_map) call cache_map_reallocate(local_map, local_map%n_elements + local_map%n_elements) call cache_map_shrink(local_map,thr) endif diff --git a/src/Utils/transpose.irp.f b/src/Utils/transpose.irp.f index 32e502e9..ec33023d 100644 --- a/src/Utils/transpose.irp.f +++ b/src/Utils/transpose.irp.f @@ -47,6 +47,14 @@ recursive subroutine dtranspose(A,LDA,B,LDB,d1,d2) double precision, intent(in) :: A(LDA,d2) double precision, intent(out) :: B(LDB,d1) + +! do j=1,d1 +! do i=1,d2 +! B(i,j ) = A(j ,i) +! enddo +! enddo +! return + integer :: i,j,k, mod_align if ( d2 < 32 ) then do j=1,d1 diff --git a/tests/bats/fci.bats b/tests/bats/fci.bats index 6512d60b..7a7bb09f 100644 --- a/tests/bats/fci.bats +++ b/tests/bats/fci.bats @@ -42,11 +42,12 @@ function run_FCI_ZMQ() { qp_set_mo_class h2o.ezfio -core "[1]" -act "[2-12]" -del "[13-24]" } @test "FCI H2O cc-pVDZ" { - run_FCI h2o.ezfio 2000 -76.1253758241716 -76.1258130146102 + run_FCI h2o.ezfio 2000 -76.1253757275131 -76.1258128174355 } + @test "FCI-ZMQ H2O cc-pVDZ" { run_FCI_ZMQ h2o.ezfio 2000 -76.1250552686394 -76.1258817228809 } diff --git a/tests/run_tests.sh b/tests/run_tests.sh index 3ac452ad..405f91e6 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -3,10 +3,10 @@ LIST=" convert.bats hf.bats -pseudo.bats fci.bats cassd.bats mrcepa0.bats +pseudo.bats " #foboci.bats