diff --git a/install/scripts/install_gpi2.sh b/install/scripts/install_gpi2.sh index 751f4ef8..87bdbb62 100755 --- a/install/scripts/install_gpi2.sh +++ b/install/scripts/install_gpi2.sh @@ -6,9 +6,9 @@ GPI_OPTIONS=--with-ethernet function _install() { - cd gpi2 + cd _build/gpi2 ./install.sh -p $QP_ROOT $GPI_OPTIONS - cp src/GASPI.f90 $QP_ROOT/src/plugins/GPI2/ + cp src/GASPI.f90 $QP_ROOT/plugins/GPI2/ return 0 } diff --git a/plugins/FourIdx/four_idx.irp.f b/plugins/FourIdx/four_idx.irp.f new file mode 100644 index 00000000..de5927bf --- /dev/null +++ b/plugins/FourIdx/four_idx.irp.f @@ -0,0 +1,44 @@ +program FourIdx + use map_module + implicit none + BEGIN_DOC +! Performs a four index transformation of the two-electron integrals + END_DOC + + type(map_type) :: test_map + integer(key_kind) :: key_max + integer(map_size_kind) :: sze + + call bielec_integrals_index(ao_num,ao_num,ao_num,ao_num,key_max) + sze = key_max + call map_init(test_map,sze) + + call four_index_transform(ao_integrals_map,test_map, & + mo_coef, size(mo_coef,1), & + 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & + 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) + + integer :: i,j,k,l + real(integral_kind) :: integral1, integral2 + + provide mo_bielec_integrals_in_map + + do i=1,mo_tot_num + do j=1,mo_tot_num + do k=1,mo_tot_num + do l=1,mo_tot_num + call bielec_integrals_index(i,j,k,l,key_max) + call map_get(test_map,key_max,integral1) + call map_get(mo_integrals_map,key_max,integral2) + if (dabs(integral2) >=1.d-10 ) then + if (dabs(integral1 / integral2 -1.d0) > .001d0) then + print *, i,j,k,l + print *, integral1, integral2 + print *, '' + endif + endif + enddo + enddo + enddo + enddo +end diff --git a/plugins/FourIdx/four_index.irp.f b/plugins/FourIdx/four_index.irp.f new file mode 100644 index 00000000..01197eae --- /dev/null +++ b/plugins/FourIdx/four_index.irp.f @@ -0,0 +1,149 @@ +subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:,:), U(:,:,:), V(:,:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + !$OMP PARALLEL DEFAULT(PRIVATE) SHARED( & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_a,map_c,matrix_B) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + !$OMP DO SCHEDULE(static,1) + do d=d_start,d_end + U = 0.d0 + do l=1,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + print *, d, l + + allocate( T(i_start:i_end, k_start:k_end, j_start:j_end), & + V(a_start:a_end, k_start:k_end, j_start:j_end) ) + + do k=k_start,k_end + do j=j_start,j_end + do i=i_start,k + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,tmp) + T(i, k,j) = tmp + enddo + enddo + enddo + do j=j_start,j_end + do k=k_start,k_end + do i=k+1,i_end + T(i, k,j) = T(k, i,j) + enddo + enddo + enddo + + call DGEMM('T','N', (a_end-a_start+1), & + (k_end-k_start+1)*(j_end-j_start+1), & + (i_end-i_start+1), 1.d0, & + matrix_B(i_start,a_start), size(matrix_B,1), & + T(i_start,k_start,j_start), size(T,1), 0.d0, & + V(a_start,k_start,j_start), size(V, 1) ) + + deallocate(T) + allocate( T(a_start:a_end, k_start:k_end, b_start:d) ) + + call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), & + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + V(a_start,k_start,j_start), size(V,1)*size(V,2), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + T(a_start,k_start,b_start), size(T,1)*size(T,2) ) + + deallocate(V) + + do b=b_start,d + call DGEMM('N','N', (b-a_start+1), (c_end-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(a_start,k_start,b), size(T,1), & + matrix_B(k_start,k_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) + enddo + + deallocate(T) + + enddo + + idx = 0_8 + do b=b_start,d + do c=c_start,c_end + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + !$OMP CRITICAL + call map_append(map_c, key, value, idx) + call map_sort(map_c) + call map_unique(map_c) + !$OMP END CRITICAL + + enddo + !$OMP END DO + + deallocate(key,value) + !$OMP END PARALLEL + +end diff --git a/plugins/GPI2/broadcast.irp.f b/plugins/GPI2/broadcast.irp.f new file mode 100644 index 00000000..e9f421d8 --- /dev/null +++ b/plugins/GPI2/broadcast.irp.f @@ -0,0 +1,254 @@ +subroutine broadcast_wf(energy) + implicit none + BEGIN_DOC + ! Segment corresponding to the wave function. This is segment 0. + END_DOC + use bitmasks + use GASPI + use ISO_C_BINDING + + double precision, intent(inout) :: energy(N_states) + integer(gaspi_return_t) :: res + + if (is_gaspi_master) then + call broadcast_wf_put(energy) + else + call broadcast_wf_get(energy) + endif + + res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_barrier failed" + stop -1 + end if + + + integer(gaspi_segment_id_t) :: seg_id + do seg_id=0,3 + res = gaspi_segment_delete(seg_id) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_segment_delete failed", seg_id + stop -1 + end if + end do + +end + + + + + +subroutine broadcast_wf_put(energy) + implicit none + BEGIN_DOC + ! Initiates the broadcast of the wave function + END_DOC + use bitmasks + use GASPI + use ISO_C_BINDING + + double precision, intent(in) :: energy(N_states) + integer(gaspi_segment_id_t) :: seg_id + integer(gaspi_alloc_t) :: seg_alloc_policy + integer(gaspi_size_t) :: seg_size(0:3) + type(c_ptr) :: seg_ptr(0:3) + integer, pointer :: params_int(:) ! Segment 0 + double precision, pointer :: psi_coef_tmp(:,:) ! Segment 1 + integer(bit_kind), pointer :: psi_det_tmp(:,:,:) ! Segment 2 + double precision, pointer :: params_double(:) ! Segment 3 + + integer(gaspi_return_t) :: res + + + seg_alloc_policy = GASPI_MEM_UNINITIALIZED + + seg_size(0) = 4 * 5 + seg_id=0 + res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL, & + GASPI_BLOCK, seg_alloc_policy) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_create_segment failed", gaspi_rank, seg_id + stop -1 + end if + + res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id)) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_segment_ptr failed", gaspi_rank + stop -1 + end if + + call c_f_pointer(seg_ptr(0), params_int, shape=(/ 5 /)) + params_int(1) = N_states + params_int(2) = N_det + params_int(3) = psi_det_size + + res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_barrier failed", gaspi_rank + stop -1 + end if + + seg_size(1) = 8 * psi_det_size * N_states + seg_size(2) = bit_kind * psi_det_size * 2 * N_int + seg_size(3) = 8 * N_states + + do seg_id=1, 3 + res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL, & + GASPI_BLOCK, seg_alloc_policy) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_create_segment failed", gaspi_rank, seg_id + stop -1 + end if + + res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id)) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_segment_ptr failed", gaspi_rank + stop -1 + end if + end do + + call c_f_pointer(seg_ptr(1), psi_coef_tmp, shape=shape(psi_coef)) + call c_f_pointer(seg_ptr(2), psi_det_tmp, shape=shape(psi_det)) + call c_f_pointer(seg_ptr(3), params_double, shape=(/ N_states /)) + + psi_coef_tmp = psi_coef + psi_det_tmp = psi_det + params_double = energy + + res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_barrier failed", gaspi_rank + stop -1 + end if + +end + + + + + + + +subroutine broadcast_wf_get(energy) + implicit none + BEGIN_DOC + ! Gets the broadcasted wave function + END_DOC + use bitmasks + use GASPI + use ISO_C_BINDING + + double precision, intent(out) :: energy(N_states) + integer(gaspi_segment_id_t) :: seg_id + integer(gaspi_alloc_t) :: seg_alloc_policy + integer(gaspi_size_t) :: seg_size(0:3) + type(c_ptr) :: seg_ptr(0:3) + integer, pointer :: params_int(:) ! Segment 0 + double precision, pointer :: psi_coef_tmp(:,:) ! Segment 1 + integer(bit_kind), pointer :: psi_det_tmp(:,:,:) ! Segment 2 + double precision, pointer :: params_double(:) ! Segment 3 + + integer(gaspi_return_t) :: res + + + seg_alloc_policy = GASPI_MEM_UNINITIALIZED + + seg_size(0) = 4 * 5 + seg_id=0 + res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL,& + GASPI_BLOCK, seg_alloc_policy) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_create_segment failed" + stop -1 + end if + + res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id)) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_segment_ptr failed" + stop -1 + end if + + res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_barrier failed" + stop -1 + end if + + integer(gaspi_offset_t) :: localOff, remoteOff + integer(gaspi_rank_t) :: remoteRank + integer(gaspi_queue_id_t) :: queue + localOff = 0 + remoteRank = 0 + queue = 0 + res = gaspi_read(seg_id, localOff, remoteRank, & + seg_id, remoteOff, seg_size(seg_id), queue, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_read failed" + stop -1 + end if + + res = gaspi_wait(queue, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_wait failed" + stop -1 + end if + + call c_f_pointer(seg_ptr(0), params_int, shape=shape( (/ 5 /) )) + + N_states = params_int(1) + N_det = params_int(2) + psi_det_size = params_int(3) + TOUCH N_states N_det psi_det_size + + seg_size(1) = 8 * psi_det_size * N_states + seg_size(2) = bit_kind * psi_det_size * 2 * N_int + seg_size(3) = 8 * N_states + + do seg_id=1, 3 + res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL, & + GASPI_BLOCK, seg_alloc_policy) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_create_segment failed" + stop -1 + end if + + res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id)) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_segment_ptr failed" + stop -1 + end if + end do + + res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_barrier failed" + stop -1 + end if + + do seg_id=1, 3 + res = gaspi_read(seg_id, localOff, remoteRank, & + seg_id, remoteOff, seg_size(seg_id), queue, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_read failed" + stop -1 + end if + res = gaspi_wait(queue, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_wait failed" + stop -1 + end if + end do + + call c_f_pointer(seg_ptr(1), psi_coef_tmp, shape=shape(psi_coef)) + call c_f_pointer(seg_ptr(2), psi_det_tmp, shape=shape(psi_det)) + call c_f_pointer(seg_ptr(3), params_double, shape=shape(energy)) + + psi_coef = psi_coef_tmp + psi_det = psi_det_tmp + energy = params_double + +end + + + + diff --git a/plugins/read_integral/read_integrals_mo.irp.f b/plugins/read_integral/read_integrals_mo.irp.f index e1ff5fe8..5376b2a2 100644 --- a/plugins/read_integral/read_integrals_mo.irp.f +++ b/plugins/read_integral/read_integrals_mo.irp.f @@ -1,5 +1,10 @@ program read_integrals - + BEGIN_DOC +! Reads the integrals from the following files: +! - kinetic_mo +! - nuclear_mo +! - bielec_mo + END_DOC PROVIDE ezfio_filename call ezfio_set_integrals_monoelec_disk_access_mo_one_integrals("None") call run diff --git a/scripts/compilation/qp_create_ninja.py b/scripts/compilation/qp_create_ninja.py index 56d79a4b..1b7272b1 100755 --- a/scripts/compilation/qp_create_ninja.py +++ b/scripts/compilation/qp_create_ninja.py @@ -36,6 +36,7 @@ except ImportError: from qp_path import QP_ROOT, QP_SRC, QP_EZFIO LIB = "" # join(QP_ROOT, "lib", "rdtsc.o") +GPI_LIB = join(QP_ROOT, "lib64", "libGPI2.a") EZFIO_LIB = join(QP_ROOT, "lib", "libezfio_irp.a") ZMQ_LIB = join(QP_ROOT, "lib", "libf77zmq.a") + " " + join(QP_ROOT, "lib", "libzmq.a") + " -lstdc++ -lrt" ROOT_BUILD_NINJA = join(QP_ROOT, "config", "build.ninja") @@ -96,8 +97,7 @@ def ninja_create_env_variable(pwd_config_file): l_string.append(str_) lib_lapack = get_compilation_option(pwd_config_file, "LAPACK_LIB") - lib_gpi2 = get_compilation_option(pwd_config_file, "GPI2_LIB") - str_lib = " ".join([LIB, lib_lapack, lib_gpi2, EZFIO_LIB, ZMQ_LIB]) + str_lib = " ".join([LIB, lib_lapack, GPI_LIB, EZFIO_LIB, ZMQ_LIB]) l_string.append("LIB = {0} ".format(str_lib)) l_string.append("") diff --git a/src/Determinants/two_body_dm_map.irp.f b/src/Determinants/two_body_dm_map.irp.f index aa8f630b..2228b1b5 100644 --- a/src/Determinants/two_body_dm_map.irp.f +++ b/src/Determinants/two_body_dm_map.irp.f @@ -187,7 +187,7 @@ subroutine add_values_to_two_body_dm_map(mask_ijkl) print*,'n_elements = ',n_elements call insert_into_two_body_dm_ab_map(n_elements,buffer_i,buffer_value,& real(mo_integrals_threshold,integral_kind)) - call map_unique(two_body_dm_ab_map) + call map_merge(two_body_dm_ab_map) deallocate(buffer_i,buffer_value) diff --git a/src/Integrals_Bielec/mo_bi_integrals.irp.f b/src/Integrals_Bielec/mo_bi_integrals.irp.f index 05eb8dff..84cfd228 100644 --- a/src/Integrals_Bielec/mo_bi_integrals.irp.f +++ b/src/Integrals_Bielec/mo_bi_integrals.irp.f @@ -146,7 +146,7 @@ subroutine set_integrals_jj_into_map enddo call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& real(mo_integrals_threshold,integral_kind)) - call map_unique(mo_integrals_map) + call map_merge(mo_integrals_map) end subroutine set_integrals_exchange_jj_into_map @@ -167,7 +167,7 @@ subroutine set_integrals_exchange_jj_into_map enddo call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& real(mo_integrals_threshold,integral_kind)) - call map_unique(mo_integrals_map) + call map_merge(mo_integrals_map) end @@ -458,7 +458,7 @@ subroutine add_integrals_to_map(mask_ijkl) real(mo_integrals_threshold,integral_kind)) deallocate(buffer_i, buffer_value) !$OMP END PARALLEL - call map_unique(mo_integrals_map) + call map_merge(mo_integrals_map) call wall_time(wall_2) call cpu_time(cpu_2) @@ -773,7 +773,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) real(mo_integrals_threshold,integral_kind)) deallocate(buffer_i, buffer_value) !$OMP END PARALLEL - call map_unique(mo_integrals_map) + call map_merge(mo_integrals_map) call wall_time(wall_2) call cpu_time(cpu_2) @@ -1035,7 +1035,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl) ! print*, 'Communicating the map' ! call communicate_mo_integrals() !IRP_ENDIF - call map_unique(mo_integrals_map) + call map_merge(mo_integrals_map) call wall_time(wall_2) call cpu_time(cpu_2) diff --git a/src/Utils/map_module.f90 b/src/Utils/map_module.f90 index ac16f97e..29f7440c 100644 --- a/src/Utils/map_module.f90 +++ b/src/Utils/map_module.f90 @@ -13,7 +13,7 @@ module map_module ! cache_map using a binary search ! ! When using the map_update subroutine to build the map, -! the map_unique subroutine +! the map_merge subroutine ! should be called before getting data from the map. use omp_lib @@ -274,7 +274,7 @@ subroutine map_sort(map) end -subroutine cache_map_unique(map) +subroutine cache_map_merge(map) use map_module implicit none type (cache_map_type), intent(inout) :: map @@ -298,6 +298,28 @@ subroutine cache_map_unique(map) end +subroutine cache_map_unique(map) + use map_module + implicit none + type (cache_map_type), intent(inout) :: map + integer(cache_key_kind) :: prev_key + integer(cache_map_size_kind) :: i, j + + call cache_map_sort(map) + prev_key = -1_8 + j=0 + do i=1,map%n_elements + if (map%key(i) /= prev_key) then + j = j+1 + map%value(j) = map%value(i) + map%key(j) = map%key(i) + prev_key = map%key(i) + endif + enddo + map%n_elements = j + +end + subroutine cache_map_shrink(map,thr) use map_module implicit none @@ -338,6 +360,27 @@ subroutine map_unique(map) end +subroutine map_merge(map) + use map_module + implicit none + type (map_type), intent(inout) :: map + integer(map_size_kind) :: i + integer(map_size_kind) :: icount + + icount = 0_8 + !$OMP PARALLEL DO SCHEDULE(dynamic,1000) DEFAULT(SHARED) PRIVATE(i)& + !$OMP REDUCTION(+:icount) + do i=0_8,map%map_size + call omp_set_lock(map%map(i)%lock) + call cache_map_merge(map%map(i)) + call omp_unset_lock(map%map(i)%lock) + icount = icount + map%map(i)%n_elements + enddo + !$OMP END PARALLEL DO + map%n_elements = icount + +end + subroutine map_shrink(map,thr) use map_module implicit none @@ -402,7 +445,7 @@ subroutine map_update(map, key, value, sze, thr) else ! Assert that the map has a proper size if (local_map%n_elements == local_map%map_size) then - call cache_map_unique(local_map) + call cache_map_merge(local_map) call cache_map_reallocate(local_map, local_map%n_elements + local_map%n_elements) call cache_map_shrink(local_map,thr) endif