From 631ef5b54cbff02f82e9a339f65a961746194cab Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 15 Aug 2017 17:56:23 +0200 Subject: [PATCH 01/34] Fixed GPI2 --- install/scripts/install_gpi2.sh | 4 +- plugins/GPI2/broadcast.irp.f | 254 +++++++++++++++++++++++++ scripts/compilation/qp_create_ninja.py | 4 +- src/AO_Basis/aos.irp.f | 3 - src/Determinants/EZFIO.cfg | 2 +- src/Determinants/determinants.irp.f | 109 ----------- 6 files changed, 259 insertions(+), 117 deletions(-) create mode 100644 plugins/GPI2/broadcast.irp.f diff --git a/install/scripts/install_gpi2.sh b/install/scripts/install_gpi2.sh index 751f4ef8..87bdbb62 100755 --- a/install/scripts/install_gpi2.sh +++ b/install/scripts/install_gpi2.sh @@ -6,9 +6,9 @@ GPI_OPTIONS=--with-ethernet function _install() { - cd gpi2 + cd _build/gpi2 ./install.sh -p $QP_ROOT $GPI_OPTIONS - cp src/GASPI.f90 $QP_ROOT/src/plugins/GPI2/ + cp src/GASPI.f90 $QP_ROOT/plugins/GPI2/ return 0 } diff --git a/plugins/GPI2/broadcast.irp.f b/plugins/GPI2/broadcast.irp.f new file mode 100644 index 00000000..7ebb1408 --- /dev/null +++ b/plugins/GPI2/broadcast.irp.f @@ -0,0 +1,254 @@ +subroutine broadcast_wf(energy) + implicit none + BEGIN_DOC + ! Segment corresponding to the wave function. This is segment 0. + END_DOC + use bitmasks + use GASPI + use ISO_C_BINDING + + double precision, intent(inout) :: energy(N_states) + integer(gaspi_return_t) :: res + + if (is_gaspi_master) then + call broadcast_wf_put(energy) + else + call broadcast_wf_get(energy) + endif + + res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_barrier failed" + stop -1 + end if + + + integer(gaspi_segment_id_t) :: seg_id + do seg_id=0,3 + res = gaspi_segment_delete(seg_id) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_segment_delete failed", seg_id + stop -1 + end if + end do + +end + + + + + +subroutine broadcast_wf_put(energy) + implicit none + BEGIN_DOC + ! Segment corresponding to the wave function. This is segment 0. + END_DOC + use bitmasks + use GASPI + use ISO_C_BINDING + + double precision, intent(in) :: energy(N_states) + integer(gaspi_segment_id_t) :: seg_id + integer(gaspi_alloc_t) :: seg_alloc_policy + integer(gaspi_size_t) :: seg_size(0:3) + type(c_ptr) :: seg_ptr(0:3) + integer, pointer :: params_int(:) ! Segment 0 + double precision, pointer :: psi_coef_tmp(:,:) ! Segment 1 + integer(bit_kind), pointer :: psi_det_tmp(:,:,:) ! Segment 2 + double precision, pointer :: params_double(:) ! Segment 3 + + integer(gaspi_return_t) :: res + + + seg_alloc_policy = GASPI_MEM_UNINITIALIZED + + seg_size(0) = 4 * 5 + seg_id=0 + res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL, & + GASPI_BLOCK, seg_alloc_policy) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_create_segment failed" + stop -1 + end if + + res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id)) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_segment_ptr failed" + stop -1 + end if + + call c_f_pointer(seg_ptr(0), params_int, shape=(/ 5 /)) + params_int(1) = N_states + params_int(2) = N_det + params_int(3) = psi_det_size + + res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_barrier failed" + stop -1 + end if + + seg_size(1) = 8 * psi_det_size * N_states + seg_size(2) = bit_kind * psi_det_size * 2 * N_int + seg_size(3) = 8 * N_states + + do seg_id=1, 3 + res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL, & + GASPI_BLOCK, seg_alloc_policy) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_create_segment failed" + stop -1 + end if + + res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id)) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_segment_ptr failed" + stop -1 + end if + end do + + call c_f_pointer(seg_ptr(1), psi_coef_tmp, shape=shape(psi_coef)) + call c_f_pointer(seg_ptr(2), psi_det_tmp, shape=shape(psi_det)) + call c_f_pointer(seg_ptr(3), params_double, shape=(/ N_states /)) + + psi_coef_tmp = psi_coef + psi_det_tmp = psi_det + params_double = energy + + res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_barrier failed" + stop -1 + end if + +end + + + + + + + +subroutine broadcast_wf_get(energy) + implicit none + BEGIN_DOC + ! Segment corresponding to the wave function. This is segment 0. + END_DOC + use bitmasks + use GASPI + use ISO_C_BINDING + + double precision, intent(out) :: energy(N_states) + integer(gaspi_segment_id_t) :: seg_id + integer(gaspi_alloc_t) :: seg_alloc_policy + integer(gaspi_size_t) :: seg_size(0:3) + type(c_ptr) :: seg_ptr(0:3) + integer, pointer :: params_int(:) ! Segment 0 + double precision, pointer :: psi_coef_tmp(:,:) ! Segment 1 + integer(bit_kind), pointer :: psi_det_tmp(:,:,:) ! Segment 2 + double precision, pointer :: params_double(:) ! Segment 3 + + integer(gaspi_return_t) :: res + + + seg_alloc_policy = GASPI_MEM_UNINITIALIZED + + seg_size(0) = 4 * 5 + seg_id=0 + res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL,& + GASPI_BLOCK, seg_alloc_policy) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_create_segment failed" + stop -1 + end if + + res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id)) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_segment_ptr failed" + stop -1 + end if + + res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_barrier failed" + stop -1 + end if + + integer(gaspi_offset_t) :: localOff, remoteOff + integer(gaspi_rank_t) :: remoteRank + integer(gaspi_queue_id_t) :: queue + localOff = 0 + remoteRank = 0 + queue = 0 + res = gaspi_read(seg_id, localOff, remoteRank, & + seg_id, remoteOff, seg_size(seg_id), queue, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_read failed" + stop -1 + end if + + res = gaspi_wait(queue, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_wait failed" + stop -1 + end if + + call c_f_pointer(seg_ptr(0), params_int, shape=shape( (/ 5 /) )) + + N_states = params_int(1) + N_det = params_int(2) + psi_det_size = params_int(3) + TOUCH N_states N_det psi_det_size + + seg_size(1) = 8 * psi_det_size * N_states + seg_size(2) = bit_kind * psi_det_size * 2 * N_int + seg_size(3) = 8 * N_states + + do seg_id=1, 3 + res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL, & + GASPI_BLOCK, seg_alloc_policy) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_create_segment failed" + stop -1 + end if + + res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id)) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_segment_ptr failed" + stop -1 + end if + end do + + res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_barrier failed" + stop -1 + end if + + do seg_id=1, 3 + res = gaspi_read(seg_id, localOff, remoteRank, & + seg_id, remoteOff, seg_size(seg_id), queue, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_read failed" + stop -1 + end if + res = gaspi_wait(queue, GASPI_BLOCK) + if(res .ne. GASPI_SUCCESS) then + write(*,*) "gaspi_wait failed" + stop -1 + end if + end do + + call c_f_pointer(seg_ptr(1), psi_coef_tmp, shape=shape(psi_coef)) + call c_f_pointer(seg_ptr(2), psi_det_tmp, shape=shape(psi_det)) + call c_f_pointer(seg_ptr(3), params_double, shape=shape(energy)) + + psi_coef = psi_coef_tmp + psi_det = psi_det_tmp + energy = params_double + +end + + + + diff --git a/scripts/compilation/qp_create_ninja.py b/scripts/compilation/qp_create_ninja.py index 56d79a4b..1b7272b1 100755 --- a/scripts/compilation/qp_create_ninja.py +++ b/scripts/compilation/qp_create_ninja.py @@ -36,6 +36,7 @@ except ImportError: from qp_path import QP_ROOT, QP_SRC, QP_EZFIO LIB = "" # join(QP_ROOT, "lib", "rdtsc.o") +GPI_LIB = join(QP_ROOT, "lib64", "libGPI2.a") EZFIO_LIB = join(QP_ROOT, "lib", "libezfio_irp.a") ZMQ_LIB = join(QP_ROOT, "lib", "libf77zmq.a") + " " + join(QP_ROOT, "lib", "libzmq.a") + " -lstdc++ -lrt" ROOT_BUILD_NINJA = join(QP_ROOT, "config", "build.ninja") @@ -96,8 +97,7 @@ def ninja_create_env_variable(pwd_config_file): l_string.append(str_) lib_lapack = get_compilation_option(pwd_config_file, "LAPACK_LIB") - lib_gpi2 = get_compilation_option(pwd_config_file, "GPI2_LIB") - str_lib = " ".join([LIB, lib_lapack, lib_gpi2, EZFIO_LIB, ZMQ_LIB]) + str_lib = " ".join([LIB, lib_lapack, GPI_LIB, EZFIO_LIB, ZMQ_LIB]) l_string.append("LIB = {0} ".format(str_lib)) l_string.append("") diff --git a/src/AO_Basis/aos.irp.f b/src/AO_Basis/aos.irp.f index f0f03fab..062ef296 100644 --- a/src/AO_Basis/aos.irp.f +++ b/src/AO_Basis/aos.irp.f @@ -10,13 +10,10 @@ BEGIN_PROVIDER [ integer, ao_num_align ] END_PROVIDER BEGIN_PROVIDER [ integer, ao_prim_num_max ] -&BEGIN_PROVIDER [ integer, ao_prim_num_max_align ] implicit none ao_prim_num_max = 0 PROVIDE ezfio_filename call ezfio_get_ao_basis_ao_prim_num_max(ao_prim_num_max) - integer :: align_double - ao_prim_num_max_align = align_double(ao_prim_num_max) END_PROVIDER BEGIN_PROVIDER [ double precision, ao_coef_normalized, (ao_num_align,ao_prim_num_max) ] diff --git a/src/Determinants/EZFIO.cfg b/src/Determinants/EZFIO.cfg index a68a61a5..9d0512f4 100644 --- a/src/Determinants/EZFIO.cfg +++ b/src/Determinants/EZFIO.cfg @@ -97,7 +97,7 @@ type: double precision size: (determinants.n_det) [expected_s2] -interface: ezfio,provider +interface: ezfio doc: Expected value of S^2 type: double precision diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f index ad955b97..9a1d4ee1 100644 --- a/src/Determinants/determinants.irp.f +++ b/src/Determinants/determinants.irp.f @@ -133,115 +133,6 @@ BEGIN_PROVIDER [ integer(bit_kind), psi_det, (N_int,2,psi_det_size) ] END_PROVIDER - BEGIN_PROVIDER [ integer(bit_kind), psi_occ_pattern, (N_int,2,psi_det_size) ] -&BEGIN_PROVIDER [ integer, N_occ_pattern ] - implicit none - BEGIN_DOC - ! array of the occ_pattern present in the wf - ! psi_occ_pattern(:,1,j) = jth occ_pattern of the wave function : represent all the single occupation - ! psi_occ_pattern(:,2,j) = jth occ_pattern of the wave function : represent all the double occupation - END_DOC - integer :: i,j,k - - ! create - do i = 1, N_det - do k = 1, N_int - psi_occ_pattern(k,1,i) = ieor(psi_det(k,1,i),psi_det(k,2,i)) - psi_occ_pattern(k,2,i) = iand(psi_det(k,1,i),psi_det(k,2,i)) - enddo - enddo - - ! Sort - integer, allocatable :: iorder(:) - integer*8, allocatable :: bit_tmp(:) - integer*8, external :: occ_pattern_search_key - integer(bit_kind), allocatable :: tmp_array(:,:,:) - logical,allocatable :: duplicate(:) - - - allocate ( iorder(N_det), duplicate(N_det), bit_tmp(N_det), tmp_array(N_int,2,psi_det_size) ) - - do i=1,N_det - iorder(i) = i - !$DIR FORCEINLINE - bit_tmp(i) = occ_pattern_search_key(psi_occ_pattern(1,1,i),N_int) - enddo - call i8sort(bit_tmp,iorder,N_det) - !DIR$ IVDEP - do i=1,N_det - do k=1,N_int - tmp_array(k,1,i) = psi_occ_pattern(k,1,iorder(i)) - tmp_array(k,2,i) = psi_occ_pattern(k,2,iorder(i)) - enddo - duplicate(i) = .False. - enddo - - i=1 - integer (bit_kind) :: occ_pattern_tmp - do i=1,N_det - duplicate(i) = .False. - enddo - - do i=1,N_det-1 - if (duplicate(i)) then - cycle - endif - j = i+1 - do while (bit_tmp(j)==bit_tmp(i)) - if (duplicate(j)) then - j+=1 - cycle - endif - duplicate(j) = .True. - do k=1,N_int - if ( (tmp_array(k,1,i) /= tmp_array(k,1,j)) & - .or. (tmp_array(k,2,i) /= tmp_array(k,2,j)) ) then - duplicate(j) = .False. - exit - endif - enddo - j+=1 - if (j>N_det) then - exit - endif - enddo - enddo - - N_occ_pattern=0 - do i=1,N_det - if (duplicate(i)) then - cycle - endif - N_occ_pattern += 1 - do k=1,N_int - psi_occ_pattern(k,1,N_occ_pattern) = tmp_array(k,1,i) - psi_occ_pattern(k,2,N_occ_pattern) = tmp_array(k,2,i) - enddo - enddo - - deallocate(iorder,duplicate,bit_tmp,tmp_array) -! !TODO DEBUG -! integer :: s -! do i=1,N_occ_pattern -! do j=i+1,N_occ_pattern -! s = 0 -! do k=1,N_int -! if((psi_occ_pattern(k,1,j) /= psi_occ_pattern(k,1,i)).or. & -! (psi_occ_pattern(k,2,j) /= psi_occ_pattern(k,2,i))) then -! s=1 -! exit -! endif -! enddo -! if ( s == 0 ) then -! print *, 'Error : occ ', j, 'already in wf' -! call debug_det(psi_occ_pattern(1,1,j),N_int) -! stop -! endif -! enddo -! enddo -! !TODO DEBUG -END_PROVIDER - BEGIN_PROVIDER [ double precision, psi_coef, (psi_det_size,N_states) ] implicit none From f5f5c13264cb39b73c05b70be153dc5a81692c00 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 15 Aug 2017 18:40:28 +0200 Subject: [PATCH 02/34] fixed travis --- .travis.yml | 6 ++---- plugins/GPI2/broadcast.irp.f | 16 ++++++++-------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index fe8de634..5126a44c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,11 +13,9 @@ addons: packages: - gfortran - gcc -# - liblapack-dev - - libatlas-dev + - libblas-dev + - liblapack-dev - graphviz -# - zlib1g-dev -# - libgmp3-dev cache: directories: diff --git a/plugins/GPI2/broadcast.irp.f b/plugins/GPI2/broadcast.irp.f index 7ebb1408..e9f421d8 100644 --- a/plugins/GPI2/broadcast.irp.f +++ b/plugins/GPI2/broadcast.irp.f @@ -41,7 +41,7 @@ end subroutine broadcast_wf_put(energy) implicit none BEGIN_DOC - ! Segment corresponding to the wave function. This is segment 0. + ! Initiates the broadcast of the wave function END_DOC use bitmasks use GASPI @@ -67,13 +67,13 @@ subroutine broadcast_wf_put(energy) res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL, & GASPI_BLOCK, seg_alloc_policy) if(res .ne. GASPI_SUCCESS) then - write(*,*) "gaspi_create_segment failed" + write(*,*) "gaspi_create_segment failed", gaspi_rank, seg_id stop -1 end if res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id)) if(res .ne. GASPI_SUCCESS) then - write(*,*) "gaspi_segment_ptr failed" + write(*,*) "gaspi_segment_ptr failed", gaspi_rank stop -1 end if @@ -84,7 +84,7 @@ subroutine broadcast_wf_put(energy) res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) if(res .ne. GASPI_SUCCESS) then - write(*,*) "gaspi_barrier failed" + write(*,*) "gaspi_barrier failed", gaspi_rank stop -1 end if @@ -96,13 +96,13 @@ subroutine broadcast_wf_put(energy) res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL, & GASPI_BLOCK, seg_alloc_policy) if(res .ne. GASPI_SUCCESS) then - write(*,*) "gaspi_create_segment failed" + write(*,*) "gaspi_create_segment failed", gaspi_rank, seg_id stop -1 end if res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id)) if(res .ne. GASPI_SUCCESS) then - write(*,*) "gaspi_segment_ptr failed" + write(*,*) "gaspi_segment_ptr failed", gaspi_rank stop -1 end if end do @@ -117,7 +117,7 @@ subroutine broadcast_wf_put(energy) res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK) if(res .ne. GASPI_SUCCESS) then - write(*,*) "gaspi_barrier failed" + write(*,*) "gaspi_barrier failed", gaspi_rank stop -1 end if @@ -132,7 +132,7 @@ end subroutine broadcast_wf_get(energy) implicit none BEGIN_DOC - ! Segment corresponding to the wave function. This is segment 0. + ! Gets the broadcasted wave function END_DOC use bitmasks use GASPI From f09bd45885557d10b70002bc861ba69fceb98fc6 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 25 Sep 2017 13:23:26 +0200 Subject: [PATCH 03/34] Optimization in selection --- plugins/Full_CI_ZMQ/selection.irp.f | 95 +++++++++++++++++++++-------- src/Davidson/print_energy.irp.f | 22 +++++++ 2 files changed, 93 insertions(+), 24 deletions(-) create mode 100644 src/Davidson/print_energy.irp.f diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index f404d069..d0dd6c40 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -419,37 +419,82 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d fullinteresting(0) = 0 do ii=1,preinteresting(0) - i = preinteresting(ii) - mobMask(1,1) = iand(negMask(1,1), preinteresting_det(1,1,ii)) - mobMask(1,2) = iand(negMask(1,2), preinteresting_det(1,2,ii)) - nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) - do j=2,N_int - mobMask(j,1) = iand(negMask(j,1), preinteresting_det(j,1,ii)) - mobMask(j,2) = iand(negMask(j,2), preinteresting_det(j,2,ii)) - nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) - end do + select case (N_int) + case (1) + mobMask(1,1) = iand(negMask(1,1), preinteresting_det(1,1,ii)) + mobMask(1,2) = iand(negMask(1,2), preinteresting_det(1,2,ii)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + case (2) + mobMask(1:2,1) = iand(negMask(1:2,1), preinteresting_det(1:2,1,ii)) + mobMask(1:2,2) = iand(negMask(1:2,2), preinteresting_det(1:2,2,ii)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + & + popcnt(mobMask(2, 1)) + popcnt(mobMask(2, 2)) + case (3) + mobMask(1:3,1) = iand(negMask(1:3,1), preinteresting_det(1:3,1,ii)) + mobMask(1:3,2) = iand(negMask(1:3,2), preinteresting_det(1:3,2,ii)) + nt = 0 + do j=3,1,-1 + if (mobMask(j,1) /= 0_bitkind) then + nt = nt+ popcnt(mobMask(j, 1)) + if (nt > 4) exit + endif + if (mobMask(j,2) /= 0_bitkind) then + nt = nt+ popcnt(mobMask(j, 2)) + if (nt > 4) exit + endif + end do + case (4) + mobMask(1:4,1) = iand(negMask(1:4,1), preinteresting_det(1:4,1,ii)) + mobMask(1:4,2) = iand(negMask(1:4,2), preinteresting_det(1:4,2,ii)) + nt = 0 + do j=4,1,-1 + if (mobMask(j,1) /= 0_bitkind) then + nt = nt+ popcnt(mobMask(j, 1)) + if (nt > 4) exit + endif + if (mobMask(j,2) /= 0_bitkind) then + nt = nt+ popcnt(mobMask(j, 2)) + if (nt > 4) exit + endif + end do + case (default) + mobMask(1:N_int,1) = iand(negMask(1:N_int,1), preinteresting_det(1:N_int,1,ii)) + mobMask(1:N_int,2) = iand(negMask(1:N_int,2), preinteresting_det(1:N_int,2,ii)) + nt = 0 + do j=N_int,1,-1 + if (mobMask(j,1) /= 0_bitkind) then + nt = nt+ popcnt(mobMask(j, 1)) + if (nt > 4) exit + endif + if (mobMask(j,2) /= 0_bitkind) then + nt = nt+ popcnt(mobMask(j, 2)) + if (nt > 4) exit + endif + end do + end select - if(nt <= 4) then - interesting(0) += 1 - interesting(interesting(0)) = i + if(nt <= 4) then + i = preinteresting(ii) + interesting(0) += 1 + interesting(interesting(0)) = i minilist(1,1,interesting(0)) = preinteresting_det(1,1,ii) minilist(1,2,interesting(0)) = preinteresting_det(1,2,ii) - do j=2,N_int + do j=2,N_int minilist(j,1,interesting(0)) = preinteresting_det(j,1,ii) minilist(j,2,interesting(0)) = preinteresting_det(j,2,ii) - enddo - if(nt <= 2) then - fullinteresting(0) += 1 - fullinteresting(fullinteresting(0)) = i + enddo + if(nt <= 2) then + fullinteresting(0) += 1 + fullinteresting(fullinteresting(0)) = i fullminilist(1,1,fullinteresting(0)) = preinteresting_det(1,1,ii) fullminilist(1,2,fullinteresting(0)) = preinteresting_det(1,2,ii) - do j=2,N_int + do j=2,N_int fullminilist(j,1,fullinteresting(0)) = preinteresting_det(j,1,ii) fullminilist(j,2,fullinteresting(0)) = preinteresting_det(j,2,ii) - enddo - end if - end if - + enddo + end if + end if + end do do ii=1,prefullinteresting(0) @@ -458,12 +503,14 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i)) mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i)) nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) - do j=2,N_int + if (nt > 2) cycle + do j=N_int,2,-1 mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + if (nt > 2) exit end do - + if(nt <= 2) then fullinteresting(0) += 1 fullinteresting(fullinteresting(0)) = i diff --git a/src/Davidson/print_energy.irp.f b/src/Davidson/print_energy.irp.f new file mode 100644 index 00000000..ae6f1da2 --- /dev/null +++ b/src/Davidson/print_energy.irp.f @@ -0,0 +1,22 @@ +program print_energy + implicit none + read_wf = .true. + touch read_wf + call routine +end + +subroutine routine + implicit none + integer :: i,j + double precision :: accu,hij + + print*, 'psi_energy = ',psi_energy + nuclear_repulsion + accu = 0.d0 +! do i = 1,N_det +! do j = 1,N_det +! call i_H_j(psi_det(1,1,j),psi_det(1,1,i),N_int,hij) +! accu += psi_coef(i,1) * psi_coef(j,1) * hij +! enddo +! enddo +! print*, 'accu = ',accu + nuclear_repulsion +end From b157f952fbba3b7450aec8795a1634b7d64fea60 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 25 Sep 2017 14:10:20 +0200 Subject: [PATCH 04/34] Optimization in selection --- plugins/Full_CI_ZMQ/selection.irp.f | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index d0dd6c40..3e58224a 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -434,11 +434,11 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d mobMask(1:3,2) = iand(negMask(1:3,2), preinteresting_det(1:3,2,ii)) nt = 0 do j=3,1,-1 - if (mobMask(j,1) /= 0_bitkind) then + if (mobMask(j,1) /= 0_bit_kind) then nt = nt+ popcnt(mobMask(j, 1)) if (nt > 4) exit endif - if (mobMask(j,2) /= 0_bitkind) then + if (mobMask(j,2) /= 0_bit_kind) then nt = nt+ popcnt(mobMask(j, 2)) if (nt > 4) exit endif @@ -448,25 +448,25 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d mobMask(1:4,2) = iand(negMask(1:4,2), preinteresting_det(1:4,2,ii)) nt = 0 do j=4,1,-1 - if (mobMask(j,1) /= 0_bitkind) then + if (mobMask(j,1) /= 0_bit_kind) then nt = nt+ popcnt(mobMask(j, 1)) if (nt > 4) exit endif - if (mobMask(j,2) /= 0_bitkind) then + if (mobMask(j,2) /= 0_bit_kind) then nt = nt+ popcnt(mobMask(j, 2)) if (nt > 4) exit endif end do - case (default) + case default mobMask(1:N_int,1) = iand(negMask(1:N_int,1), preinteresting_det(1:N_int,1,ii)) mobMask(1:N_int,2) = iand(negMask(1:N_int,2), preinteresting_det(1:N_int,2,ii)) nt = 0 do j=N_int,1,-1 - if (mobMask(j,1) /= 0_bitkind) then + if (mobMask(j,1) /= 0_bit_kind) then nt = nt+ popcnt(mobMask(j, 1)) if (nt > 4) exit endif - if (mobMask(j,2) /= 0_bitkind) then + if (mobMask(j,2) /= 0_bit_kind) then nt = nt+ popcnt(mobMask(j, 2)) if (nt > 4) exit endif From 6b6ca9e7b6d55c355fee9e55d59e03b0f7c81e6b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 25 Sep 2017 17:58:23 +0200 Subject: [PATCH 05/34] print_integrals_ao --- plugins/Hartree_Fock/SCF_old.irp.f | 61 ++++++++++++ .../read_integral/print_integrals_ao.irp.f | 94 +++++++++++++++++++ plugins/read_integral/read_integrals_ao.irp.f | 75 +++++++++++++++ src/Integrals_Monoelec/kin_ao_ints.irp.f | 25 ++--- src/Integrals_Monoelec/pot_ao_ints.irp.f | 10 +- 5 files changed, 245 insertions(+), 20 deletions(-) create mode 100644 plugins/Hartree_Fock/SCF_old.irp.f create mode 100644 plugins/read_integral/print_integrals_ao.irp.f create mode 100644 plugins/read_integral/read_integrals_ao.irp.f diff --git a/plugins/Hartree_Fock/SCF_old.irp.f b/plugins/Hartree_Fock/SCF_old.irp.f new file mode 100644 index 00000000..03d9a91d --- /dev/null +++ b/plugins/Hartree_Fock/SCF_old.irp.f @@ -0,0 +1,61 @@ +program scf + BEGIN_DOC +! Produce `Hartree_Fock` MO orbital +! output: mo_basis.mo_tot_num mo_basis.mo_label mo_basis.ao_md5 mo_basis.mo_coef mo_basis.mo_occ +! output: hartree_fock.energy +! optional: mo_basis.mo_coef + END_DOC + call create_guess + call orthonormalize_mos + call run +end + +subroutine create_guess + implicit none + BEGIN_DOC +! Create a MO guess if no MOs are present in the EZFIO directory + END_DOC + logical :: exists + PROVIDE ezfio_filename + call ezfio_has_mo_basis_mo_coef(exists) + if (.not.exists) then + if (mo_guess_type == "HCore") then + mo_coef = ao_ortho_lowdin_coef + TOUCH mo_coef + mo_label = 'Guess' + call mo_as_eigvectors_of_mo_matrix(mo_mono_elec_integral,size(mo_mono_elec_integral,1),size(mo_mono_elec_integral,2),mo_label) + SOFT_TOUCH mo_coef mo_label + else if (mo_guess_type == "Huckel") then + call huckel_guess + else + print *, 'Unrecognized MO guess type : '//mo_guess_type + stop 1 + endif + endif +end + +subroutine run + + BEGIN_DOC +! Run SCF calculation + END_DOC + + use bitmasks + implicit none + + double precision :: SCF_energy_before,SCF_energy_after,diag_H_mat_elem + double precision :: EHF + integer :: i_it, i, j, k + + EHF = HF_energy + + mo_label = "Canonical" + +! Choose SCF algorithm + + call damping_SCF ! Deprecated routine +! call Roothaan_Hall_SCF + +end + + diff --git a/plugins/read_integral/print_integrals_ao.irp.f b/plugins/read_integral/print_integrals_ao.irp.f new file mode 100644 index 00000000..3f489ba8 --- /dev/null +++ b/plugins/read_integral/print_integrals_ao.irp.f @@ -0,0 +1,94 @@ +program print_integrals + + PROVIDE ezfio_filename + call ezfio_set_integrals_monoelec_disk_access_ao_one_integrals('None') + call ezfio_set_integrals_bielec_disk_access_ao_integrals('None') + call run +end + +subroutine run + implicit none + + integer :: iunit + integer :: getunitandopen + + integer ::i,j,k,l + double precision :: integral + + iunit = getunitandopen('kinetic_ao','w') + do i=1,ao_num + do j=1,ao_num + integral = ao_kinetic_integral(i,j) + if (dabs(integral) > ao_integrals_threshold) then + write(iunit,*) i,j, integral + endif + enddo + enddo + close(iunit) + + iunit = getunitandopen('overlap_ao','w') + do i=1,ao_num + do j=1,ao_num + integral = ao_overlap(i,j) + if (dabs(integral) > ao_integrals_threshold) then + write(iunit,*) i,j, integral + endif + enddo + enddo + close(iunit) + + iunit = getunitandopen('nuclear_ao','w') + do i=1,ao_num + do j=1,ao_num + integral = ao_nucl_elec_integral(i,j) + if (dabs(integral) > ao_integrals_threshold) then + write(iunit,*) i,j, integral + endif + enddo + enddo + close(iunit) + +! iunit = getunitandopen('pseudo_ao','w') +! do i=1,ao_num +! do j=1,ao_num +! write(iunit,*) i,j, ao_pseudo_integral(i,j) +! enddo +! enddo +! close(iunit) + + PROVIDE ao_bielec_integrals_in_map + iunit = getunitandopen('bielec_ao','w') + + integer*8 :: i8 + integer :: i_idx, n_elements_max, k1, n_elements + integer :: ii(8), jj(8), kk(8), ll(8) + double precision, external :: ao_bielec_integral + integer(key_kind), allocatable :: keys(:) + double precision, allocatable :: values(:) + + + call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max) + allocate(keys(n_elements_max), values(n_elements_max)) + + do i8=0_8,ao_integrals_map%map_size + n_elements = n_elements_max + call get_cache_map(ao_integrals_map,i8,keys,values,n_elements) + do k1=1,n_elements + call bielec_integrals_index_reverse(kk,ii,ll,jj,keys(k1)) + if ( (kk(1)>ao_num).or. & + (ii(1)>ao_num).or. & + (jj(1)>ao_num).or. & + (ll(1)>ao_num) ) then + cycle + endif + k = kk(1) + i = ii(1) + l = ll(1) + j = jj(1) + integral = values(k1) + write (iunit,'(4(I5,X),D22.15)') k,i,l,j, integral + enddo + enddo + + close(iunit) +end diff --git a/plugins/read_integral/read_integrals_ao.irp.f b/plugins/read_integral/read_integrals_ao.irp.f new file mode 100644 index 00000000..c323c7e1 --- /dev/null +++ b/plugins/read_integral/read_integrals_ao.irp.f @@ -0,0 +1,75 @@ +program read_integrals + + PROVIDE ezfio_filename + call ezfio_set_integrals_monoelec_disk_access_ao_one_integrals("None") + call run +end + +subroutine run + use map_module + implicit none + + integer :: iunit + integer :: getunitandopen + + integer ::i,j,k,l + double precision :: integral + double precision, allocatable :: A(:,:) + + integer :: n_integrals + integer(key_kind), allocatable :: buffer_i(:) + real(integral_kind), allocatable :: buffer_values(:) + integer(key_kind) :: key + + allocate (A(ao_num,ao_num)) + A = 0.d0 + + iunit = getunitandopen('kinetic_ao','r') + do + read (iunit,*,end=10) i,j, integral + A(i,j) = integral + A(j,i) = integral + enddo + 10 continue + close(iunit) + call write_one_e_integrals('ao_kinetic_integral', A, size(A,1), size(A,2)) + + + A = 0.d0 + iunit = getunitandopen('nuclear_ao','r') + do + read (iunit,*,end=12) i,j, integral + A(i,j) = integral + A(j,i) = integral + enddo + 12 continue + close(iunit) + call write_one_e_integrals('ao_ne_integral', A, size(A,1), size(A,2)) + + call write_one_e_integrals('ao_pseudo_integral', ao_pseudo_integral,& + size(ao_pseudo_integral,1), size(ao_pseudo_integral,2)) + + + call ezfio_set_integrals_monoelec_disk_access_ao_one_integrals("Read") + + allocate(buffer_i(ao_num**4), buffer_values(ao_num**4)) + + iunit = getunitandopen('bielec_ao','r') + n_integrals=0 + do + read (iunit,*,end=13) i,j,k,l, integral + n_integrals += 1 + call bielec_integrals_index(i, j, k, l, buffer_i(n_integrals) ) + buffer_values(n_integrals) = integral + enddo + 13 continue + close(iunit) + + call insert_into_ao_integrals_map(n_integrals,buffer_i,buffer_values) + + call map_sort(ao_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) + call ezfio_set_integrals_bielec_disk_access_ao_integrals('Read') + +end diff --git a/src/Integrals_Monoelec/kin_ao_ints.irp.f b/src/Integrals_Monoelec/kin_ao_ints.irp.f index 6cb2aa49..d6d09fbc 100644 --- a/src/Integrals_Monoelec/kin_ao_ints.irp.f +++ b/src/Integrals_Monoelec/kin_ao_ints.irp.f @@ -1,6 +1,6 @@ - BEGIN_PROVIDER [ double precision, ao_deriv2_x,(ao_num_align,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_deriv2_y,(ao_num_align,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_deriv2_z,(ao_num_align,ao_num) ] + BEGIN_PROVIDER [ double precision, ao_deriv2_x,(ao_num,ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_deriv2_y,(ao_num,ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_deriv2_z,(ao_num,ao_num) ] implicit none integer :: i,j,n,l double precision :: f @@ -45,8 +45,6 @@ power_A(1) = ao_power( j, 1 ) power_A(2) = ao_power( j, 2 ) power_A(3) = ao_power( j, 3 ) - !DEC$ VECTOR ALIGNED - !DEC$ VECTOR ALWAYS do i= 1,ao_num ao_deriv2_x(i,j)= 0.d0 ao_deriv2_y(i,j)= 0.d0 @@ -59,7 +57,6 @@ power_B(3) = ao_power( i, 3 ) do n = 1,ao_prim_num(j) alpha = ao_expo_ordered_transp(n,j) - !DEC$ VECTOR ALIGNED do l = 1, ao_prim_num(i) beta = ao_expo_ordered_transp(l,i) call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x0,overlap_y0,overlap_z0,overlap,dim1) @@ -122,7 +119,7 @@ END_PROVIDER -BEGIN_PROVIDER [double precision, ao_kinetic_integral, (ao_num_align,ao_num)] +BEGIN_PROVIDER [double precision, ao_kinetic_integral, (ao_num,ao_num)] implicit none BEGIN_DOC ! array of the priminitve basis kinetic integrals @@ -131,27 +128,23 @@ BEGIN_PROVIDER [double precision, ao_kinetic_integral, (ao_num_align,ao_num)] integer :: i,j,k,l if (read_ao_one_integrals) then - call ezfio_get_ao_basis_integral_kinetic(ao_kinetic_integral(1:ao_num, 1:ao_num)) - call ezfio_set_ao_basis_integral_kinetic(ao_kinetic_integral(1:ao_num, 1:ao_num)) + call read_one_e_integrals('ao_kinetic_integral', ao_kinetic_integral,& + size(ao_kinetic_integral,1), size(ao_kinetic_integral,2)) print *, 'AO kinetic integrals read from disk' else !$OMP PARALLEL DO DEFAULT(NONE) & !$OMP PRIVATE(i,j) & - !$OMP SHARED(ao_num, ao_num_align, ao_kinetic_integral,ao_deriv2_x,ao_deriv2_y,ao_deriv2_z) + !$OMP SHARED(ao_num, ao_kinetic_integral,ao_deriv2_x,ao_deriv2_y,ao_deriv2_z) do j = 1, ao_num - !DEC$ VECTOR ALWAYS - !DEC$ VECTOR ALIGNED do i = 1, ao_num ao_kinetic_integral(i,j) = -0.5d0 * (ao_deriv2_x(i,j) + ao_deriv2_y(i,j) + ao_deriv2_z(i,j) ) enddo - do i = ao_num +1,ao_num_align - ao_kinetic_integral(i,j) = 0.d0 - enddo enddo !$OMP END PARALLEL DO endif if (write_ao_one_integrals) then - call ezfio_set_ao_basis_integral_kinetic(ao_kinetic_integral(1:ao_num, 1:ao_num)) + call write_one_e_integrals('ao_kinetic_integral', ao_kinetic_integral,& + size(ao_kinetic_integral,1), size(ao_kinetic_integral,2)) print *, 'AO kinetic integrals written to disk' endif END_PROVIDER diff --git a/src/Integrals_Monoelec/pot_ao_ints.irp.f b/src/Integrals_Monoelec/pot_ao_ints.irp.f index 7116d2c7..22869c4c 100644 --- a/src/Integrals_Monoelec/pot_ao_ints.irp.f +++ b/src/Integrals_Monoelec/pot_ao_ints.irp.f @@ -1,4 +1,4 @@ -BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral, (ao_num_align,ao_num)] +BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral, (ao_num,ao_num)] BEGIN_DOC ! interaction nuclear electron END_DOC @@ -11,7 +11,8 @@ BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral, (ao_num_align,ao_num)] double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult if (read_ao_one_integrals) then - call ezfio_get_ao_basis_integral_nuclear(ao_nucl_elec_integral(1:ao_num, 1:ao_num)) + call read_one_e_integrals('ao_ne_integral', ao_nucl_elec_integral, & + size(ao_nucl_elec_integral,1), size(ao_nucl_elec_integral,2)) print *, 'AO N-e integrals read from disk' else @@ -73,14 +74,15 @@ BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral, (ao_num_align,ao_num)] !$OMP END PARALLEL endif if (write_ao_one_integrals) then - call ezfio_set_ao_basis_integral_nuclear(ao_nucl_elec_integral(1:ao_num, 1:ao_num)) + call write_one_e_integrals('ao_ne_integral', ao_nucl_elec_integral, & + size(ao_nucl_elec_integral,1), size(ao_nucl_elec_integral,2)) print *, 'AO N-e integrals written to disk' endif END_PROVIDER - BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral_per_atom, (ao_num_align,ao_num,nucl_num)] + BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral_per_atom, (ao_num,ao_num,nucl_num)] BEGIN_DOC ! ao_nucl_elec_integral_per_atom(i,j,k) = - ! where Rk is the geometry of the kth atom From cb1227a9a98060a136728962b5e86e47119a314b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 25 Sep 2017 20:23:50 +0200 Subject: [PATCH 06/34] OK --- plugins/FourIdx/four_idx.irp.f | 44 ++++++ plugins/FourIdx/four_index.irp.f | 147 +++++++++++++++++++++ src/Determinants/two_body_dm_map.irp.f | 2 +- src/Integrals_Bielec/mo_bi_integrals.irp.f | 10 +- src/Utils/map_module.f90 | 49 ++++++- 5 files changed, 243 insertions(+), 9 deletions(-) create mode 100644 plugins/FourIdx/four_idx.irp.f create mode 100644 plugins/FourIdx/four_index.irp.f diff --git a/plugins/FourIdx/four_idx.irp.f b/plugins/FourIdx/four_idx.irp.f new file mode 100644 index 00000000..de5927bf --- /dev/null +++ b/plugins/FourIdx/four_idx.irp.f @@ -0,0 +1,44 @@ +program FourIdx + use map_module + implicit none + BEGIN_DOC +! Performs a four index transformation of the two-electron integrals + END_DOC + + type(map_type) :: test_map + integer(key_kind) :: key_max + integer(map_size_kind) :: sze + + call bielec_integrals_index(ao_num,ao_num,ao_num,ao_num,key_max) + sze = key_max + call map_init(test_map,sze) + + call four_index_transform(ao_integrals_map,test_map, & + mo_coef, size(mo_coef,1), & + 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & + 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) + + integer :: i,j,k,l + real(integral_kind) :: integral1, integral2 + + provide mo_bielec_integrals_in_map + + do i=1,mo_tot_num + do j=1,mo_tot_num + do k=1,mo_tot_num + do l=1,mo_tot_num + call bielec_integrals_index(i,j,k,l,key_max) + call map_get(test_map,key_max,integral1) + call map_get(mo_integrals_map,key_max,integral2) + if (dabs(integral2) >=1.d-10 ) then + if (dabs(integral1 / integral2 -1.d0) > .001d0) then + print *, i,j,k,l + print *, integral1, integral2 + print *, '' + endif + endif + enddo + enddo + enddo + enddo +end diff --git a/plugins/FourIdx/four_index.irp.f b/plugins/FourIdx/four_index.irp.f new file mode 100644 index 00000000..eba99f2c --- /dev/null +++ b/plugins/FourIdx/four_index.irp.f @@ -0,0 +1,147 @@ +subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:,:), U(:,:,:), V(:,:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + + + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + allocate( T(i_min:i_max,j_min:j_max,k_min:k_max), & + U(i_min:i_max,j_min:j_max,k_min:k_max), & + V(i_min:i_max,j_min:j_max,k_min:k_max), & + key(i_max*j_max*k_max), & + value(i_max*j_max*k_max) ) + + do d=d_start,d_end + U = 0.d0 + print *, d + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + do k=k_start,k_end + do j=j_start,j_end + do i=i_start,i_end + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,tmp) + T(i,j,k) = tmp + enddo + enddo + enddo + + V = 0.d0 + do a=a_start,a_end + do k=k_start,k_end + do j=j_start,j_end + do i=i_start,i_end + V(j,k,a) = V(j,k,a) + T(i,j,k)*matrix_B(i,a) + enddo + enddo + enddo + enddo +! call DGEMM('T','N', (j_end-j_start+1),(k_end-k_start+1), & +! (i_end-i_start+1), 1.d0, & +! T, size(T,1)* + + T = 0.d0 + do b=b_start,b_end + do a=a_start,a_end + do k=k_start,k_end + do j=j_start,j_end + T(k,a,b) = T(k,a,b) + V(j,k,a)*matrix_B(j,b) + enddo + enddo + enddo + enddo + + V = 0.d0 + do c=c_start,c_end + do b=b_start,b_end + do a=a_start,a_end + do k=k_start,k_end + V(a,b,c) = V(a,b,c) + T(k,a,b)*matrix_B(k,c) + enddo + enddo + enddo + enddo + + do c=c_start,c_end + do b=b_start,b_end + do a=a_start,a_end +! do c=c_start,c_end +! do b=b_start,d +! do a=a_start,min(b,c) + U(a,b,c) = U(a,b,c) + V(a,b,c) * matrix_B(l,d) + enddo + enddo + enddo + + enddo + + idx = 0_8 + do c=c_start,c_end + do b=b_start,b_end + do a=a_start,a_end +! do c=c_start,c_end +! do b=b_start,d +! do a=a_start,min(b,c) + if (dabs(U(a,b,c)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,b,c) + enddo + enddo + enddo + call map_append(map_c, key, value, idx) + call map_sort(map_c) + call map_unique(map_c) + + enddo + +end diff --git a/src/Determinants/two_body_dm_map.irp.f b/src/Determinants/two_body_dm_map.irp.f index aa8f630b..2228b1b5 100644 --- a/src/Determinants/two_body_dm_map.irp.f +++ b/src/Determinants/two_body_dm_map.irp.f @@ -187,7 +187,7 @@ subroutine add_values_to_two_body_dm_map(mask_ijkl) print*,'n_elements = ',n_elements call insert_into_two_body_dm_ab_map(n_elements,buffer_i,buffer_value,& real(mo_integrals_threshold,integral_kind)) - call map_unique(two_body_dm_ab_map) + call map_merge(two_body_dm_ab_map) deallocate(buffer_i,buffer_value) diff --git a/src/Integrals_Bielec/mo_bi_integrals.irp.f b/src/Integrals_Bielec/mo_bi_integrals.irp.f index 05eb8dff..84cfd228 100644 --- a/src/Integrals_Bielec/mo_bi_integrals.irp.f +++ b/src/Integrals_Bielec/mo_bi_integrals.irp.f @@ -146,7 +146,7 @@ subroutine set_integrals_jj_into_map enddo call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& real(mo_integrals_threshold,integral_kind)) - call map_unique(mo_integrals_map) + call map_merge(mo_integrals_map) end subroutine set_integrals_exchange_jj_into_map @@ -167,7 +167,7 @@ subroutine set_integrals_exchange_jj_into_map enddo call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& real(mo_integrals_threshold,integral_kind)) - call map_unique(mo_integrals_map) + call map_merge(mo_integrals_map) end @@ -458,7 +458,7 @@ subroutine add_integrals_to_map(mask_ijkl) real(mo_integrals_threshold,integral_kind)) deallocate(buffer_i, buffer_value) !$OMP END PARALLEL - call map_unique(mo_integrals_map) + call map_merge(mo_integrals_map) call wall_time(wall_2) call cpu_time(cpu_2) @@ -773,7 +773,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) real(mo_integrals_threshold,integral_kind)) deallocate(buffer_i, buffer_value) !$OMP END PARALLEL - call map_unique(mo_integrals_map) + call map_merge(mo_integrals_map) call wall_time(wall_2) call cpu_time(cpu_2) @@ -1035,7 +1035,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl) ! print*, 'Communicating the map' ! call communicate_mo_integrals() !IRP_ENDIF - call map_unique(mo_integrals_map) + call map_merge(mo_integrals_map) call wall_time(wall_2) call cpu_time(cpu_2) diff --git a/src/Utils/map_module.f90 b/src/Utils/map_module.f90 index ac16f97e..29f7440c 100644 --- a/src/Utils/map_module.f90 +++ b/src/Utils/map_module.f90 @@ -13,7 +13,7 @@ module map_module ! cache_map using a binary search ! ! When using the map_update subroutine to build the map, -! the map_unique subroutine +! the map_merge subroutine ! should be called before getting data from the map. use omp_lib @@ -274,7 +274,7 @@ subroutine map_sort(map) end -subroutine cache_map_unique(map) +subroutine cache_map_merge(map) use map_module implicit none type (cache_map_type), intent(inout) :: map @@ -298,6 +298,28 @@ subroutine cache_map_unique(map) end +subroutine cache_map_unique(map) + use map_module + implicit none + type (cache_map_type), intent(inout) :: map + integer(cache_key_kind) :: prev_key + integer(cache_map_size_kind) :: i, j + + call cache_map_sort(map) + prev_key = -1_8 + j=0 + do i=1,map%n_elements + if (map%key(i) /= prev_key) then + j = j+1 + map%value(j) = map%value(i) + map%key(j) = map%key(i) + prev_key = map%key(i) + endif + enddo + map%n_elements = j + +end + subroutine cache_map_shrink(map,thr) use map_module implicit none @@ -338,6 +360,27 @@ subroutine map_unique(map) end +subroutine map_merge(map) + use map_module + implicit none + type (map_type), intent(inout) :: map + integer(map_size_kind) :: i + integer(map_size_kind) :: icount + + icount = 0_8 + !$OMP PARALLEL DO SCHEDULE(dynamic,1000) DEFAULT(SHARED) PRIVATE(i)& + !$OMP REDUCTION(+:icount) + do i=0_8,map%map_size + call omp_set_lock(map%map(i)%lock) + call cache_map_merge(map%map(i)) + call omp_unset_lock(map%map(i)%lock) + icount = icount + map%map(i)%n_elements + enddo + !$OMP END PARALLEL DO + map%n_elements = icount + +end + subroutine map_shrink(map,thr) use map_module implicit none @@ -402,7 +445,7 @@ subroutine map_update(map, key, value, sze, thr) else ! Assert that the map has a proper size if (local_map%n_elements == local_map%map_size) then - call cache_map_unique(local_map) + call cache_map_merge(local_map) call cache_map_reallocate(local_map, local_map%n_elements + local_map%n_elements) call cache_map_shrink(local_map,thr) endif From 987fc6598483f3224fc730665c025b26a996bd5b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 25 Sep 2017 21:00:22 +0200 Subject: [PATCH 07/34] Introduced DGEMM --- plugins/FourIdx/four_index.irp.f | 125 +++++++++++++++++++++---------- 1 file changed, 85 insertions(+), 40 deletions(-) diff --git a/plugins/FourIdx/four_index.irp.f b/plugins/FourIdx/four_index.irp.f index eba99f2c..e9e6e9c0 100644 --- a/plugins/FourIdx/four_index.irp.f +++ b/plugins/FourIdx/four_index.irp.f @@ -50,11 +50,8 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & ASSERT (LDB >= k_max) ASSERT (LDB >= l_max) - allocate( T(i_min:i_max,j_min:j_max,k_min:k_max), & - U(i_min:i_max,j_min:j_max,k_min:k_max), & - V(i_min:i_max,j_min:j_max,k_min:k_max), & - key(i_max*j_max*k_max), & - value(i_max*j_max*k_max) ) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, b_start:b_end, c_start:c_end) ) do d=d_start,d_end U = 0.d0 @@ -63,62 +60,109 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & if (dabs(matrix_B(l,d)) < 1.d-10) then cycle endif + + allocate( T(i_start:i_end, k_start:k_end, j_start:j_end) ) + do k=k_start,k_end do j=j_start,j_end do i=i_start,i_end call bielec_integrals_index(i,j,k,l,idx) call map_get(map_a,idx,tmp) - T(i,j,k) = tmp + T(i, k,j) = tmp enddo enddo enddo - V = 0.d0 - do a=a_start,a_end - do k=k_start,k_end - do j=j_start,j_end - do i=i_start,i_end - V(j,k,a) = V(j,k,a) + T(i,j,k)*matrix_B(i,a) - enddo - enddo - enddo - enddo -! call DGEMM('T','N', (j_end-j_start+1),(k_end-k_start+1), & -! (i_end-i_start+1), 1.d0, & -! T, size(T,1)* + allocate( V(a_start:a_end, k_start:k_end, j_start:j_end) ) + +! V = 0.d0 +! do a=a_start,a_end +! do k=k_start,k_end +! do j=j_start,j_end +! do i=i_start,i_end +! V(a, k,j) = V(a, k,j) + T(i, k,j)*matrix_B(i, a) +! enddo +! enddo +! enddo +! enddo + call DGEMM('T','N', (a_end-a_start+1), (k_end-k_start+1)*(j_end-j_start+1),& + (i_end-i_start+1), 1.d0, & + matrix_B(i_start,a_start), size(matrix_B,1), & + T(i_start,k_start,j_start), size(T,1), 0.d0, & + V(a_start,k_start,j_start), size(V, 1) ) + + deallocate(T) + allocate( T(a_start:a_end, k_start:k_end, b_start:b_end) ) + +! V = 0.d0 +! do a=a_start,a_end +! do k=k_start,k_end +! do b=b_start,b_end +! do j=j_start,j_end +! V(a,k, b) = V(a,k, b) + T(a,k, j)*matrix_B(j, b) +! enddo +! enddo +! enddo +! enddo + call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1),(b_end-b_start+1),& + (j_end-j_start+1), 1.d0, & + V(a_start,k_start,j_start), size(V,1)*size(V,2), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + T(a_start,k_start,b_start), size(T,1)*size(T,2) ) + + deallocate(V) + allocate( V(a_start:a_end, k_start:k_end, b_start:b_end) ) + V = T + deallocate(T) + allocate( T(a_start:a_end, k_start:k_end, b_start:b_end) ) - T = 0.d0 do b=b_start,b_end do a=a_start,a_end do k=k_start,k_end - do j=j_start,j_end - T(k,a,b) = T(k,a,b) + V(j,k,a)*matrix_B(j,b) - enddo + T(a, k,b) = V(a, k,b) enddo enddo enddo + deallocate(V) + allocate( V(a_start:a_end, b_start:b_end, c_start:c_end) ) + +! V = 0.d0 +! do b=b_start,b_end +! do c=c_start,c_end +! do a=a_start,a_end +! do k=k_start,k_end +! V(a,b,c) = V(a,b,c) + T(a,k ,b)*matrix_B(k, c) +! enddo +! enddo +! enddo +! enddo + V = 0.d0 - do c=c_start,c_end - do b=b_start,b_end - do a=a_start,a_end - do k=k_start,k_end - V(a,b,c) = V(a,b,c) + T(k,a,b)*matrix_B(k,c) - enddo - enddo - enddo + do b=b_start,b_end + call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), & + (k_end-k_start+1), 1.d0, & + T(a_start,k_start,b), size(T,1), & + matrix_B(k_start,k_start), size(matrix_B,1), 1.d0, & + V(a_start,c_start,b), size(V,1) ) enddo - do c=c_start,c_end - do b=b_start,b_end - do a=a_start,a_end + + deallocate(T) + U = U + V*matrix_B(l, d) + +! do a=a_start,a_end +! do b=b_start,b_end +! do c=c_start,c_end ! do c=c_start,c_end ! do b=b_start,d ! do a=a_start,min(b,c) - U(a,b,c) = U(a,b,c) + V(a,b,c) * matrix_B(l,d) - enddo - enddo - enddo +! U(a,b,c) = U(a,b,c) + V(a,b,c) * matrix_B(l, d) +! enddo +! enddo +! enddo + + deallocate(V) enddo @@ -129,12 +173,12 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & ! do c=c_start,c_end ! do b=b_start,d ! do a=a_start,min(b,c) - if (dabs(U(a,b,c)) < 1.d-15) then + if (dabs(U(a,c,b)) < 1.d-15) then cycle endif idx = idx+1_8 call bielec_integrals_index(a,b,c,d,key(idx)) - value(idx) = U(a,b,c) + value(idx) = U(a,c,b) enddo enddo enddo @@ -143,5 +187,6 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & call map_unique(map_c) enddo + deallocate(key,value) end From 63af3aa6a2b3125f087046a455accca4e9f1d9a0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 25 Sep 2017 22:34:56 +0200 Subject: [PATCH 08/34] OpenMP --- plugins/FourIdx/four_index.irp.f | 100 +++++++++---------------------- 1 file changed, 29 insertions(+), 71 deletions(-) diff --git a/plugins/FourIdx/four_index.irp.f b/plugins/FourIdx/four_index.irp.f index e9e6e9c0..fcdad326 100644 --- a/plugins/FourIdx/four_index.irp.f +++ b/plugins/FourIdx/four_index.irp.f @@ -30,7 +30,10 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & integer(key_kind), allocatable :: key(:) real(integral_kind), allocatable :: value(:) - + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) i_min = min(i_start,a_start) i_max = max(i_end ,a_end ) @@ -50,42 +53,40 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & ASSERT (LDB >= k_max) ASSERT (LDB >= l_max) + !$OMP PARALLEL DEFAULT(PRIVATE) SHARED( & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_a,map_c,matrix_B) allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) allocate( U(a_start:a_end, b_start:b_end, c_start:c_end) ) + !$OMP DO do d=d_start,d_end U = 0.d0 print *, d - do l=l_start,l_end + do l=1,l_end if (dabs(matrix_B(l,d)) < 1.d-10) then cycle endif - allocate( T(i_start:i_end, k_start:k_end, j_start:j_end) ) + allocate( T(i_start:i_end, k_start:k_end, j_start:j_end), & + V(a_start:a_end, k_start:k_end, j_start:j_end) ) do k=k_start,k_end do j=j_start,j_end - do i=i_start,i_end + do i=i_start,k call bielec_integrals_index(i,j,k,l,idx) call map_get(map_a,idx,tmp) T(i, k,j) = tmp + T(k, i,j) = tmp enddo enddo enddo - allocate( V(a_start:a_end, k_start:k_end, j_start:j_end) ) -! V = 0.d0 -! do a=a_start,a_end -! do k=k_start,k_end -! do j=j_start,j_end -! do i=i_start,i_end -! V(a, k,j) = V(a, k,j) + T(i, k,j)*matrix_B(i, a) -! enddo -! enddo -! enddo -! enddo - call DGEMM('T','N', (a_end-a_start+1), (k_end-k_start+1)*(j_end-j_start+1),& + call DGEMM('T','N', (a_end-a_start+1), & + (k_end-k_start+1)*(j_end-j_start+1), & (i_end-i_start+1), 1.d0, & matrix_B(i_start,a_start), size(matrix_B,1), & T(i_start,k_start,j_start), size(T,1), 0.d0, & @@ -94,50 +95,16 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & deallocate(T) allocate( T(a_start:a_end, k_start:k_end, b_start:b_end) ) -! V = 0.d0 -! do a=a_start,a_end -! do k=k_start,k_end -! do b=b_start,b_end -! do j=j_start,j_end -! V(a,k, b) = V(a,k, b) + T(a,k, j)*matrix_B(j, b) -! enddo -! enddo -! enddo -! enddo - call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1),(b_end-b_start+1),& - (j_end-j_start+1), 1.d0, & - V(a_start,k_start,j_start), size(V,1)*size(V,2), & - matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & - T(a_start,k_start,b_start), size(T,1)*size(T,2) ) + call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), & + (b_end-b_start+1), & + (j_end-j_start+1), 1.d0, & + V(a_start,k_start,j_start), size(V,1)*size(V,2), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + T(a_start,k_start,b_start), size(T,1)*size(T,2) ) deallocate(V) - allocate( V(a_start:a_end, k_start:k_end, b_start:b_end) ) - V = T - deallocate(T) - allocate( T(a_start:a_end, k_start:k_end, b_start:b_end) ) - do b=b_start,b_end - do a=a_start,a_end - do k=k_start,k_end - T(a, k,b) = V(a, k,b) - enddo - enddo - enddo - - deallocate(V) allocate( V(a_start:a_end, b_start:b_end, c_start:c_end) ) - -! V = 0.d0 -! do b=b_start,b_end -! do c=c_start,c_end -! do a=a_start,a_end -! do k=k_start,k_end -! V(a,b,c) = V(a,b,c) + T(a,k ,b)*matrix_B(k, c) -! enddo -! enddo -! enddo -! enddo - V = 0.d0 do b=b_start,b_end call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), & @@ -147,22 +114,8 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & V(a_start,c_start,b), size(V,1) ) enddo - - deallocate(T) U = U + V*matrix_B(l, d) - -! do a=a_start,a_end -! do b=b_start,b_end -! do c=c_start,c_end -! do c=c_start,c_end -! do b=b_start,d -! do a=a_start,min(b,c) -! U(a,b,c) = U(a,b,c) + V(a,b,c) * matrix_B(l, d) -! enddo -! enddo -! enddo - - deallocate(V) + deallocate(T,V) enddo @@ -182,11 +135,16 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & enddo enddo enddo + !$OMP CRITICAL call map_append(map_c, key, value, idx) call map_sort(map_c) call map_unique(map_c) + !$OMP END CRITICAL enddo + !$OMP END DO + deallocate(key,value) + !$OMP END PARALLEL end From 42c7cf31b77b43aa673d7e752d0ef6640e160425 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 25 Sep 2017 23:45:37 +0200 Subject: [PATCH 09/34] Optimized DGEMM --- plugins/FourIdx/four_index.irp.f | 45 ++++++++++++++++---------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/plugins/FourIdx/four_index.irp.f b/plugins/FourIdx/four_index.irp.f index fcdad326..01197eae 100644 --- a/plugins/FourIdx/four_index.irp.f +++ b/plugins/FourIdx/four_index.irp.f @@ -59,16 +59,16 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & !$OMP map_a,map_c,matrix_B) allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) - allocate( U(a_start:a_end, b_start:b_end, c_start:c_end) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) - !$OMP DO + !$OMP DO SCHEDULE(static,1) do d=d_start,d_end U = 0.d0 - print *, d do l=1,l_end if (dabs(matrix_B(l,d)) < 1.d-10) then cycle endif + print *, d, l allocate( T(i_start:i_end, k_start:k_end, j_start:j_end), & V(a_start:a_end, k_start:k_end, j_start:j_end) ) @@ -79,11 +79,16 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & call bielec_integrals_index(i,j,k,l,idx) call map_get(map_a,idx,tmp) T(i, k,j) = tmp - T(k, i,j) = tmp enddo enddo enddo - + do j=j_start,j_end + do k=k_start,k_end + do i=k+1,i_end + T(i, k,j) = T(k, i,j) + enddo + enddo + enddo call DGEMM('T','N', (a_end-a_start+1), & (k_end-k_start+1)*(j_end-j_start+1), & @@ -93,10 +98,10 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & V(a_start,k_start,j_start), size(V, 1) ) deallocate(T) - allocate( T(a_start:a_end, k_start:k_end, b_start:b_end) ) + allocate( T(a_start:a_end, k_start:k_end, b_start:d) ) call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), & - (b_end-b_start+1), & + (d-b_start+1), & (j_end-j_start+1), 1.d0, & V(a_start,k_start,j_start), size(V,1)*size(V,2), & matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & @@ -104,28 +109,22 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & deallocate(V) - allocate( V(a_start:a_end, b_start:b_end, c_start:c_end) ) - V = 0.d0 - do b=b_start,b_end - call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), & - (k_end-k_start+1), 1.d0, & - T(a_start,k_start,b), size(T,1), & - matrix_B(k_start,k_start), size(matrix_B,1), 1.d0, & - V(a_start,c_start,b), size(V,1) ) + do b=b_start,d + call DGEMM('N','N', (b-a_start+1), (c_end-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(a_start,k_start,b), size(T,1), & + matrix_B(k_start,k_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) enddo - U = U + V*matrix_B(l, d) - deallocate(T,V) + deallocate(T) enddo idx = 0_8 - do c=c_start,c_end - do b=b_start,b_end - do a=a_start,a_end -! do c=c_start,c_end -! do b=b_start,d -! do a=a_start,min(b,c) + do b=b_start,d + do c=c_start,c_end + do a=a_start,min(b,c) if (dabs(U(a,c,b)) < 1.d-15) then cycle endif From 9e791e52d2b700508b5df3a820a52fdd54f46f0a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 26 Sep 2017 14:54:18 +0200 Subject: [PATCH 10/34] read_integrals --- .../read_integral/print_integrals_ao.irp.f | 48 ++++++++++++------- .../read_integral/print_integrals_mo.irp.f | 2 +- plugins/read_integral/read_integrals_ao.irp.f | 1 + 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/plugins/read_integral/print_integrals_ao.irp.f b/plugins/read_integral/print_integrals_ao.irp.f index 3f489ba8..488c024d 100644 --- a/plugins/read_integral/print_integrals_ao.irp.f +++ b/plugins/read_integral/print_integrals_ao.irp.f @@ -70,23 +70,37 @@ subroutine run call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max) allocate(keys(n_elements_max), values(n_elements_max)) - do i8=0_8,ao_integrals_map%map_size - n_elements = n_elements_max - call get_cache_map(ao_integrals_map,i8,keys,values,n_elements) - do k1=1,n_elements - call bielec_integrals_index_reverse(kk,ii,ll,jj,keys(k1)) - if ( (kk(1)>ao_num).or. & - (ii(1)>ao_num).or. & - (jj(1)>ao_num).or. & - (ll(1)>ao_num) ) then - cycle - endif - k = kk(1) - i = ii(1) - l = ll(1) - j = jj(1) - integral = values(k1) - write (iunit,'(4(I5,X),D22.15)') k,i,l,j, integral +! do i8=0_8,ao_integrals_map%map_size +! n_elements = n_elements_max +! call get_cache_map(ao_integrals_map,i8,keys,values,n_elements) +! do k1=1,n_elements +! call bielec_integrals_index_reverse(kk,ii,ll,jj,keys(k1)) +! if ( (kk(1)>ao_num).or. & +! (ii(1)>ao_num).or. & +! (jj(1)>ao_num).or. & +! (ll(1)>ao_num) ) then +! cycle +! endif +! k = kk(1) +! i = ii(1) +! l = ll(1) +! j = jj(1) +! integral = values(k1) +! write (iunit,'(4(I6,X),F20.15)') k,i,l,j, integral +! enddo +! enddo + + do i=1,ao_num + do k=1,ao_num + do j=1,ao_num + do l=1,ao_num + double precision, external :: get_ao_bielec_integral + integral = get_ao_bielec_integral(i,j,k,l,ao_integrals_map) + if (dabs(integral)>=1.e-15) then + write (iunit,'(4(I6),F20.15)') i,j,k,l, integral + endif + enddo + enddo enddo enddo diff --git a/plugins/read_integral/print_integrals_mo.irp.f b/plugins/read_integral/print_integrals_mo.irp.f index 133e34b8..18795249 100644 --- a/plugins/read_integral/print_integrals_mo.irp.f +++ b/plugins/read_integral/print_integrals_mo.irp.f @@ -49,7 +49,7 @@ program print_integrals double precision :: get_mo_bielec_integral integral = get_mo_bielec_integral(i,j,k,l,mo_integrals_map) if (dabs(integral) > mo_integrals_threshold) then - write (iunit,'(4(I5,X),D22.15)') i,j,k,l, integral + write (iunit,'(4(I6,X),F20.15)') i,j,k,l, integral endif !end if enddo diff --git a/plugins/read_integral/read_integrals_ao.irp.f b/plugins/read_integral/read_integrals_ao.irp.f index c323c7e1..77f2213e 100644 --- a/plugins/read_integral/read_integrals_ao.irp.f +++ b/plugins/read_integral/read_integrals_ao.irp.f @@ -68,6 +68,7 @@ subroutine run call insert_into_ao_integrals_map(n_integrals,buffer_i,buffer_values) call map_sort(ao_integrals_map) + call map_unique(ao_integrals_map) call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) call ezfio_set_integrals_bielec_disk_access_ao_integrals('Read') From 2ac2853f5c410b7581b82054348ca768f2e3c0c4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 26 Sep 2017 15:17:45 +0200 Subject: [PATCH 11/34] Four index --- plugins/FourIdx/four_idx.irp.f | 5 +++++ plugins/FourIdx/four_index.irp.f | 11 ++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/plugins/FourIdx/four_idx.irp.f b/plugins/FourIdx/four_idx.irp.f index de5927bf..f3388b83 100644 --- a/plugins/FourIdx/four_idx.irp.f +++ b/plugins/FourIdx/four_idx.irp.f @@ -18,6 +18,11 @@ program FourIdx 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) +! call four_index_transform(ao_integrals_map,test_map, & +! mo_coef, size(mo_coef,1), & +! 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & +! 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) + integer :: i,j,k,l real(integral_kind) :: integral1, integral2 diff --git a/plugins/FourIdx/four_index.irp.f b/plugins/FourIdx/four_index.irp.f index 01197eae..261b7d12 100644 --- a/plugins/FourIdx/four_index.irp.f +++ b/plugins/FourIdx/four_index.irp.f @@ -100,6 +100,7 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & deallocate(T) allocate( T(a_start:a_end, k_start:k_end, b_start:d) ) + ! a=a_start,b call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), & (d-b_start+1), & (j_end-j_start+1), 1.d0, & @@ -122,9 +123,12 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & enddo idx = 0_8 - do b=b_start,d + do b=b_start,b_end do c=c_start,c_end - do a=a_start,min(b,c) + do a=a_start,a_end +! if (a>b) cycle +! if (a>c) cycle +! if (b>d) cycle if (dabs(U(a,c,b)) < 1.d-15) then cycle endif @@ -137,7 +141,8 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & !$OMP CRITICAL call map_append(map_c, key, value, idx) call map_sort(map_c) - call map_unique(map_c) +! call map_update(map_c, key, value, idx) +! call map_merge(map_c) !$OMP END CRITICAL enddo From ccd4e67357c3cfdb367b34c514bf0bc53c9aeaea Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 27 Sep 2017 15:09:00 +0200 Subject: [PATCH 12/34] Symmetric transformation OK --- plugins/FourIdx/four_idx.irp.f | 10 +- plugins/FourIdx/four_index.irp.f | 82 ++++++++---- plugins/FourIdx/four_index_sym.irp.f | 191 +++++++++++++++++++++++++++ 3 files changed, 250 insertions(+), 33 deletions(-) create mode 100644 plugins/FourIdx/four_index_sym.irp.f diff --git a/plugins/FourIdx/four_idx.irp.f b/plugins/FourIdx/four_idx.irp.f index f3388b83..5a874ee6 100644 --- a/plugins/FourIdx/four_idx.irp.f +++ b/plugins/FourIdx/four_idx.irp.f @@ -13,16 +13,16 @@ program FourIdx sze = key_max call map_init(test_map,sze) - call four_index_transform(ao_integrals_map,test_map, & - mo_coef, size(mo_coef,1), & - 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & - 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) - ! call four_index_transform(ao_integrals_map,test_map, & ! mo_coef, size(mo_coef,1), & ! 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & ! 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) + call four_index_transform_sym(ao_integrals_map,test_map, & + mo_coef, size(mo_coef,1), & + 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & + 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) + integer :: i,j,k,l real(integral_kind) :: integral1, integral2 diff --git a/plugins/FourIdx/four_index.irp.f b/plugins/FourIdx/four_index.irp.f index 261b7d12..8f6c02ed 100644 --- a/plugins/FourIdx/four_index.irp.f +++ b/plugins/FourIdx/four_index.irp.f @@ -5,6 +5,7 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & a_end , b_end , c_end , d_end ) implicit none use map_module + use mmap_module BEGIN_DOC ! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) ! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} @@ -53,18 +54,53 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & ASSERT (LDB >= k_max) ASSERT (LDB >= l_max) - !$OMP PARALLEL DEFAULT(PRIVATE) SHARED( & + ! Create a temporary memory-mapped file + integer :: fd + type(c_ptr) :: c_pointer + integer*8, pointer :: a_array(:,:,:) + call mmap(trim(ezfio_filename)//'/work/four_idx', & + (/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, .False., c_pointer) + call c_f_pointer(c_pointer, a_array, (/ 4, (i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1), l_end-l_start+1 /)) + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & - !$OMP map_a,map_c,matrix_B) + !$OMP map_a,map_c,matrix_B) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx, & + !$OMP a,b,c,d,tmp) allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) - !$OMP DO SCHEDULE(static,1) + + !$OMP DO SCHEDULE(dynamic,4) + do l=l_start,l_end + a = 1 + do j=j_start,j_end + do k=k_start,k_end + do i=i_start,i_end + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,tmp) + if (tmp /= 0.d0) then + a = a+1 + a_array(1,a,l-l_start+1) = i + a_array(2,a,l-l_start+1) = j + a_array(3,a,l-l_start+1) = k + a_array(4,a,l-l_start+1) = transfer(dble(tmp), 1_8) + endif + enddo + enddo + enddo + a_array(1,1,l-l_start+1) = a + print *, l + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(dynamic) do d=d_start,d_end U = 0.d0 - do l=1,l_end + do l=l_start,l_end if (dabs(matrix_B(l,d)) < 1.d-10) then cycle endif @@ -73,21 +109,12 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & allocate( T(i_start:i_end, k_start:k_end, j_start:j_end), & V(a_start:a_end, k_start:k_end, j_start:j_end) ) - do k=k_start,k_end - do j=j_start,j_end - do i=i_start,k - call bielec_integrals_index(i,j,k,l,idx) - call map_get(map_a,idx,tmp) - T(i, k,j) = tmp - enddo - enddo - enddo - do j=j_start,j_end - do k=k_start,k_end - do i=k+1,i_end - T(i, k,j) = T(k, i,j) - enddo - enddo + T = 0.d0 + do a=2,a_array(1,1,l-l_start+1) + i = a_array(1,a,l-l_start+1) + j = a_array(2,a,l-l_start+1) + k = a_array(3,a,l-l_start+1) + T(i, k,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) enddo call DGEMM('T','N', (a_end-a_start+1), & @@ -100,9 +127,8 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & deallocate(T) allocate( T(a_start:a_end, k_start:k_end, b_start:d) ) - ! a=a_start,b call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), & - (d-b_start+1), & + (b_end-b_start+1), & (j_end-j_start+1), 1.d0, & V(a_start,k_start,j_start), size(V,1)*size(V,2), & matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & @@ -110,8 +136,8 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & deallocate(V) - do b=b_start,d - call DGEMM('N','N', (b-a_start+1), (c_end-c_start+1), & + do b=b_start,b_end + call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), & (k_end-k_start+1), matrix_B(l, d), & T(a_start,k_start,b), size(T,1), & matrix_B(k_start,k_start), size(matrix_B,1), 1.d0, & @@ -126,9 +152,6 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & do b=b_start,b_end do c=c_start,c_end do a=a_start,a_end -! if (a>b) cycle -! if (a>c) cycle -! if (b>d) cycle if (dabs(U(a,c,b)) < 1.d-15) then cycle endif @@ -138,17 +161,20 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & enddo enddo enddo + !$OMP CRITICAL call map_append(map_c, key, value, idx) call map_sort(map_c) -! call map_update(map_c, key, value, idx) -! call map_merge(map_c) !$OMP END CRITICAL + enddo !$OMP END DO deallocate(key,value) !$OMP END PARALLEL + call munmap( & + (/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, c_pointer) + end diff --git a/plugins/FourIdx/four_index_sym.irp.f b/plugins/FourIdx/four_index_sym.irp.f new file mode 100644 index 00000000..5fc152a8 --- /dev/null +++ b/plugins/FourIdx/four_index_sym.irp.f @@ -0,0 +1,191 @@ +subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:,:), U(:,:,:), V(:,:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + ! Create a temporary memory-mapped file + integer :: fd + type(c_ptr) :: c_pointer + integer*8, pointer :: a_array(:,:,:) + call mmap(trim(ezfio_filename)//'/work/four_idx', & + (/ 2_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, .False., c_pointer) + call c_f_pointer(c_pointer, a_array, (/ 4, (i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1)/2, l_end-l_start+1 /)) + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_a,map_c,matrix_B) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx, & + !$OMP a,b,c,d,tmp) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + + !$OMP DO SCHEDULE(dynamic,4) + do l=l_start,l_end + a = 1 + do j=j_start,j_end + do k=k_start,k_end + do i=i_start,k + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,tmp) + if (tmp /= 0.d0) then + a = a+1 + a_array(1,a,l-l_start+1) = i + a_array(2,a,l-l_start+1) = j + a_array(3,a,l-l_start+1) = k + a_array(4,a,l-l_start+1) = transfer(dble(tmp), 1_8) + endif + enddo + enddo + enddo + a_array(1,1,l-l_start+1) = a + print *, l + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + U = 0.d0 + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + print *, d, l + + allocate( T(i_start:i_end, k_start:k_end, j_start:j_end), & + V(a_start:a_end, k_start:k_end, j_start:j_end) ) + + T = 0.d0 + do a=2,a_array(1,1,l-l_start+1) + i = a_array(1,a,l-l_start+1) + j = a_array(2,a,l-l_start+1) + k = a_array(3,a,l-l_start+1) + T(i, k,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) + T(k, i,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) + enddo + +! V = 0.d0 +! do a=a_start,a_end +! do k=k_start,k_end +! do j=j_start,j_end +! do i=i_start,i_end +! V(a,k,j) = V(a,k,j) + T(i,k,j)*matrix_B(i,a) +! enddo +! enddo +! enddo +! enddo + call DGEMM('T','N', (a_end-a_start+1), & + (k_end-k_start+1)*(j_end-j_start+1), & + (i_end-i_start+1), 1.d0, & + matrix_B(i_start,a_start), size(matrix_B,1), & + T(i_start,k_start,j_start), size(T,1), 0.d0, & + V(a_start,k_start,j_start), size(V, 1) ) + + deallocate(T) + allocate( T(a_start:a_end, k_start:k_end, b_start:b_end) ) + + call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), & + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + V(a_start,k_start,j_start), size(V,1)*size(V,2), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + T(a_start,k_start,b_start), size(T,1)*size(T,2) ) + + deallocate(V) + + do b=b_start,b_end + call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(a_start,k_start,b), size(T,1), & + matrix_B(k_start,k_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) + enddo + + deallocate(T) + + enddo + + idx = 0_8 + do b=b_start,d + do c=c_start,c_end + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call map_append(map_c, key, value, idx) + call map_sort(map_c) + !$OMP END CRITICAL + + + enddo + !$OMP END DO + + deallocate(key,value) + !$OMP END PARALLEL + + call munmap( & + (/ 2_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, c_pointer) + +end From 6965ff15700e629a28cfd2fadeb8893e88020888 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 27 Sep 2017 23:13:44 +0200 Subject: [PATCH 13/34] Symmetry OK --- plugins/FourIdx/four_idx.irp.f | 4 + plugins/FourIdx/four_index.irp.f | 2 +- plugins/FourIdx/four_index_sym.irp.f | 118 +++++++++++++++++---------- 3 files changed, 82 insertions(+), 42 deletions(-) diff --git a/plugins/FourIdx/four_idx.irp.f b/plugins/FourIdx/four_idx.irp.f index 5a874ee6..29061a46 100644 --- a/plugins/FourIdx/four_idx.irp.f +++ b/plugins/FourIdx/four_idx.irp.f @@ -18,10 +18,14 @@ program FourIdx ! 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & ! 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) + double precision :: t0,t1 + call wall_time(t0) call four_index_transform_sym(ao_integrals_map,test_map, & mo_coef, size(mo_coef,1), & 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) + call wall_time(t1) + print *, 'Time: ', t1-t0, 's' integer :: i,j,k,l real(integral_kind) :: integral1, integral2 diff --git a/plugins/FourIdx/four_index.irp.f b/plugins/FourIdx/four_index.irp.f index 8f6c02ed..0c30f55e 100644 --- a/plugins/FourIdx/four_index.irp.f +++ b/plugins/FourIdx/four_index.irp.f @@ -140,7 +140,7 @@ subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), & (k_end-k_start+1), matrix_B(l, d), & T(a_start,k_start,b), size(T,1), & - matrix_B(k_start,k_start), size(matrix_B,1), 1.d0, & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & U(a_start,c_start,b), size(U,1) ) enddo diff --git a/plugins/FourIdx/four_index_sym.irp.f b/plugins/FourIdx/four_index_sym.irp.f index 5fc152a8..7b0b2c3a 100644 --- a/plugins/FourIdx/four_index_sym.irp.f +++ b/plugins/FourIdx/four_index_sym.irp.f @@ -20,10 +20,11 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & integer, intent(in) :: a_start, b_start, c_start, d_start integer, intent(in) :: a_end , b_end , c_end , d_end - double precision, allocatable :: T(:,:,:), U(:,:,:), V(:,:,:) + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) integer :: i_max, j_max, k_max, l_max integer :: i_min, j_min, k_min, l_min - integer :: i, j, k, l + integer :: i, j, k, l, ik integer :: a, b, c, d double precision, external :: get_ao_bielec_integral integer(key_kind) :: idx @@ -68,8 +69,8 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & !$OMP map_a,map_c,matrix_B) & - !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx, & - !$OMP a,b,c,d,tmp) + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik, & + !$OMP a,b,c,d,tmp,T2d,V2d) allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) @@ -97,6 +98,12 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & enddo !$OMP END DO + allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & + V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & + V(i_start:i_end, k_start:k_end), & + T(k_start:k_end, a_start:a_end) ) + + !$OMP DO SCHEDULE(dynamic) do d=d_start,d_end U = 0.d0 @@ -105,58 +112,87 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & cycle endif print *, d, l - - allocate( T(i_start:i_end, k_start:k_end, j_start:j_end), & - V(a_start:a_end, k_start:k_end, j_start:j_end) ) - - T = 0.d0 + + T2d = 0.d0 do a=2,a_array(1,1,l-l_start+1) i = a_array(1,a,l-l_start+1) j = a_array(2,a,l-l_start+1) k = a_array(3,a,l-l_start+1) - T(i, k,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) - T(k, i,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) + ik = (i-i_start+1) + ishft( (k-k_start+1)*(k-k_start), -1) + T2d(ik,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) enddo -! V = 0.d0 -! do a=a_start,a_end -! do k=k_start,k_end +! V2d = 0.d0 +! do b=b_start,d ! do j=j_start,j_end -! do i=i_start,i_end -! V(a,k,j) = V(a,k,j) + T(i,k,j)*matrix_B(i,a) +! do ik=1, ishft( (i_end-i_start+1)*(i_end-i_start+2), -1) +! V2d(ik,b) = V2d(ik,b) + T2d(ik,j)*matrix_B(j,b) ! enddo ! enddo -! enddo ! enddo - call DGEMM('T','N', (a_end-a_start+1), & - (k_end-k_start+1)*(j_end-j_start+1), & - (i_end-i_start+1), 1.d0, & - matrix_B(i_start,a_start), size(matrix_B,1), & - T(i_start,k_start,j_start), size(T,1), 0.d0, & - V(a_start,k_start,j_start), size(V, 1) ) + call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + T2d(1,j_start), size(T2d,1), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + V2d(1,b_start), size(V2d,1) ) + - deallocate(T) - allocate( T(a_start:a_end, k_start:k_end, b_start:b_end) ) + do b=b_start,d + V(:,:) = 0.d0 + ik = 0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + V(i,k) = V2d(ik,b) + enddo + enddo - call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), & - (d-b_start+1), & - (j_end-j_start+1), 1.d0, & - V(a_start,k_start,j_start), size(V,1)*size(V,2), & - matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & - T(a_start,k_start,b_start), size(T,1)*size(T,2) ) +! T = 0.d0 +! do a=a_start,b +! do k=k_start,k_end +! do i=i_start,k +! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a) +! enddo +! do i=k+1,i_end +! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) +! enddo +! enddo +! enddo + call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), & + 1.d0, & + V(i_start,k_start), size(V,1), & + matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & + T(k_start,a_start), size(T,1) ) - deallocate(V) - - do b=b_start,b_end - call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), & - (k_end-k_start+1), matrix_B(l, d), & - T(a_start,k_start,b), size(T,1), & - matrix_B(k_start,k_start), size(matrix_B,1), 1.d0, & +! do c=c_start,b +! do a=a_start,c +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + call DGEMM('T','N', (b-a_start+1), (b-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & U(a_start,c_start,b), size(U,1) ) +! do c=b+1,c_end +! do a=a_start,b +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + if (b < b_end) then + call DGEMM('T','N', (b-a_start+1), (c_end-b), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, & + U(a_start,b+1,b), size(U,1) ) + endif enddo - deallocate(T) - enddo idx = 0_8 @@ -182,7 +218,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & enddo !$OMP END DO - deallocate(key,value) + deallocate(key,value,V,T) !$OMP END PARALLEL call munmap( & From 56609e4b3cffe2b8c8312112bba97e8b42aff553 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 28 Sep 2017 01:10:44 +0200 Subject: [PATCH 14/34] Almost as fast --- plugins/FourIdx/four_index_sym.irp.f | 74 +++++++++++++++------------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/plugins/FourIdx/four_index_sym.irp.f b/plugins/FourIdx/four_index_sym.irp.f index 7b0b2c3a..597395a6 100644 --- a/plugins/FourIdx/four_index_sym.irp.f +++ b/plugins/FourIdx/four_index_sym.irp.f @@ -24,7 +24,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & double precision, allocatable :: T2d(:,:), V2d(:,:) integer :: i_max, j_max, k_max, l_max integer :: i_min, j_min, k_min, l_min - integer :: i, j, k, l, ik + integer :: i, j, k, l, ik, ll integer :: a, b, c, d double precision, external :: get_ao_bielec_integral integer(key_kind) :: idx @@ -58,18 +58,17 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & ! Create a temporary memory-mapped file integer :: fd type(c_ptr) :: c_pointer - integer*8, pointer :: a_array(:,:,:) + integer*8, pointer :: a_array(:,:) call mmap(trim(ezfio_filename)//'/work/four_idx', & - (/ 2_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, .False., c_pointer) - call c_f_pointer(c_pointer, a_array, (/ 4, (i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1)/2, l_end-l_start+1 /)) - + (/ (int(i_end-i_start+1,8)*int(j_end-j_start+2,8)*int(k_end-k_start+1,8)),int(l_end-l_start+1,8) /), 16, fd, .False., c_pointer) + call c_f_pointer(c_pointer, a_array, (/ ((i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1)*3_8)/2_8, l_end-l_start+1_8 /)) !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & !$OMP map_a,map_c,matrix_B) & - !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik, & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & !$OMP a,b,c,d,tmp,T2d,V2d) allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) @@ -78,30 +77,34 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & !$OMP DO SCHEDULE(dynamic,4) do l=l_start,l_end a = 1 + ll = l-l_start+1 do j=j_start,j_end + ik=0 do k=k_start,k_end do i=i_start,k + ik = ik+1 call bielec_integrals_index(i,j,k,l,idx) call map_get(map_a,idx,tmp) if (tmp /= 0.d0) then a = a+1 - a_array(1,a,l-l_start+1) = i - a_array(2,a,l-l_start+1) = j - a_array(3,a,l-l_start+1) = k - a_array(4,a,l-l_start+1) = transfer(dble(tmp), 1_8) + a_array(a,ll) = ik + a = a+1 + a_array(a,ll) = j + a = a+1 + a_array(a,ll) = transfer(dble(tmp), 1_8) endif enddo enddo enddo - a_array(1,1,l-l_start+1) = a - print *, l + a_array(a+1,ll) = 0 + a_array(1,ll) = a enddo !$OMP END DO allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & V(i_start:i_end, k_start:k_end), & - T(k_start:k_end, a_start:a_end) ) + T(k_start:k_end, a_start:a_end)) !$OMP DO SCHEDULE(dynamic) @@ -111,35 +114,38 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & if (dabs(matrix_B(l,d)) < 1.d-10) then cycle endif - print *, d, l - T2d = 0.d0 - do a=2,a_array(1,1,l-l_start+1) - i = a_array(1,a,l-l_start+1) - j = a_array(2,a,l-l_start+1) - k = a_array(3,a,l-l_start+1) - ik = (i-i_start+1) + ishft( (k-k_start+1)*(k-k_start), -1) - T2d(ik,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) - enddo - -! V2d = 0.d0 -! do b=b_start,d -! do j=j_start,j_end -! do ik=1, ishft( (i_end-i_start+1)*(i_end-i_start+2), -1) -! V2d(ik,b) = V2d(ik,b) + T2d(ik,j)*matrix_B(j,b) -! enddo -! enddo + ll = l-l_start+1 +! T2d = 0.d0 +! do a=2,a_array(1,ll),3 +! ik = a_array(a,ll) +! j = a_array(a+1,ll) +! T2d(ik,j) = transfer(a_array(a+2,ll), 1.d0) ! enddo + + a=2 + do j=j_start,j_end + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + if ( (ik /= a_array(a,ll)).or.(j /= a_array(a+1,ll)) ) then + T2d(ik,j) = 0.d0 + else + T2d(ik,j) = transfer(a_array(a+2,ll), 1.d0) + a=a+3 + endif + enddo + enddo + enddo call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& (d-b_start+1), & (j_end-j_start+1), 1.d0, & T2d(1,j_start), size(T2d,1), & matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & V2d(1,b_start), size(V2d,1) ) - do b=b_start,d - V(:,:) = 0.d0 ik = 0 do k=k_start,k_end do i=i_start,k @@ -211,7 +217,6 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & !$OMP CRITICAL call map_append(map_c, key, value, idx) - call map_sort(map_c) !$OMP END CRITICAL @@ -220,8 +225,9 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & deallocate(key,value,V,T) !$OMP END PARALLEL + call map_sort(map_c) call munmap( & - (/ 2_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, c_pointer) + (/ (int(i_end-i_start+1,8)*int(j_end-j_start+2,8)*int(k_end-k_start+1,8)),int(l_end-l_start+1,8) /), 16, fd, c_pointer) end From 9772a2b7d898e26807eec9f6ea9d0924ad4c1bba Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 28 Sep 2017 02:24:26 +0200 Subject: [PATCH 15/34] Four idx almost as fast n 4 cores --- plugins/FourIdx/four_index_sym.irp.f | 102 ++++++++++++++++----------- 1 file changed, 60 insertions(+), 42 deletions(-) diff --git a/plugins/FourIdx/four_index_sym.irp.f b/plugins/FourIdx/four_index_sym.irp.f index 597395a6..ffab74e5 100644 --- a/plugins/FourIdx/four_index_sym.irp.f +++ b/plugins/FourIdx/four_index_sym.irp.f @@ -31,6 +31,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & real(integral_kind) :: tmp integer(key_kind), allocatable :: key(:) real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) ASSERT (k_start == i_start) ASSERT (l_start == j_start) @@ -58,48 +59,71 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & ! Create a temporary memory-mapped file integer :: fd type(c_ptr) :: c_pointer - integer*8, pointer :: a_array(:,:) + integer*8, pointer :: a_array(:) call mmap(trim(ezfio_filename)//'/work/four_idx', & - (/ (int(i_end-i_start+1,8)*int(j_end-j_start+2,8)*int(k_end-k_start+1,8)),int(l_end-l_start+1,8) /), 16, fd, .False., c_pointer) - call c_f_pointer(c_pointer, a_array, (/ ((i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1)*3_8)/2_8, l_end-l_start+1_8 /)) + (/ 12_8 * map_a % n_elements /), 8, fd, .False., c_pointer) + call c_f_pointer(c_pointer, a_array, (/ 12_8 * map_a % n_elements /)) + + allocate(l_pointer(l_start:l_end+1)) + +! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(static,137) +! do i=1,size(a_array) +! a_array(i) = 0.d0 +! enddo +! !$OMP END PARALLEL DO + + allocate( value((i_max*k_max)) ) + a = 1 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + do l=l_start,l_end + !$OMP SINGLE + l_pointer(l) = a + !$OMP END SINGLE + do j=j_start,j_end + !$OMP DO SCHEDULE(static,1) + do k=k_start,k_end + do i=i_start,k + ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,value(ik)) + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + tmp=value(ik) + if (tmp /= 0.d0) then + a_array(a) = ik + a = a+1 + a_array(a) = j + a = a+1 + a_array(a) = transfer(dble(tmp), 1_8) + a = a+1 + endif + enddo + enddo + !$OMP END SINGLE + enddo + enddo + !$OMP END PARALLEL + l_pointer(l_end+1) = a + deallocate(value) !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & - !$OMP map_a,map_c,matrix_B) & + !$OMP map_a,map_c,matrix_B,l_pointer) & !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & !$OMP a,b,c,d,tmp,T2d,V2d) allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) - !$OMP DO SCHEDULE(dynamic,4) - do l=l_start,l_end - a = 1 - ll = l-l_start+1 - do j=j_start,j_end - ik=0 - do k=k_start,k_end - do i=i_start,k - ik = ik+1 - call bielec_integrals_index(i,j,k,l,idx) - call map_get(map_a,idx,tmp) - if (tmp /= 0.d0) then - a = a+1 - a_array(a,ll) = ik - a = a+1 - a_array(a,ll) = j - a = a+1 - a_array(a,ll) = transfer(dble(tmp), 1_8) - endif - enddo - enddo - enddo - a_array(a+1,ll) = 0 - a_array(1,ll) = a - enddo - !$OMP END DO allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & @@ -115,24 +139,17 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & cycle endif - ll = l-l_start+1 -! T2d = 0.d0 -! do a=2,a_array(1,ll),3 -! ik = a_array(a,ll) -! j = a_array(a+1,ll) -! T2d(ik,j) = transfer(a_array(a+2,ll), 1.d0) -! enddo - - a=2 + a=l_pointer(l) do j=j_start,j_end ik=0 do k=k_start,k_end do i=i_start,k ik = ik+1 - if ( (ik /= a_array(a,ll)).or.(j /= a_array(a+1,ll)) ) then + if ( (ik /= a_array(a)).or.(j /= a_array(a+1)) & + .or.(a >= l_pointer(l+1)) ) then T2d(ik,j) = 0.d0 else - T2d(ik,j) = transfer(a_array(a+2,ll), 1.d0) + T2d(ik,j) = transfer(a_array(a+2), 1.d0) a=a+3 endif enddo @@ -228,6 +245,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & call map_sort(map_c) call munmap( & - (/ (int(i_end-i_start+1,8)*int(j_end-j_start+2,8)*int(k_end-k_start+1,8)),int(l_end-l_start+1,8) /), 16, fd, c_pointer) + (/ 12_8 * map_a % n_elements /), 8, fd, c_pointer) + deallocate(l_pointer) end From 827e6933d4f64ab4d48f315bf1caebb8bb4ed19d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 2 Oct 2017 09:49:22 +0200 Subject: [PATCH 16/34] Commit --- plugins/FourIdx/four_index_sym.irp.f | 76 +++++++++++++------- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 4 +- 2 files changed, 52 insertions(+), 28 deletions(-) diff --git a/plugins/FourIdx/four_index_sym.irp.f b/plugins/FourIdx/four_index_sym.irp.f index ffab74e5..e12d47ea 100644 --- a/plugins/FourIdx/four_index_sym.irp.f +++ b/plugins/FourIdx/four_index_sym.irp.f @@ -27,6 +27,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & integer :: i, j, k, l, ik, ll integer :: a, b, c, d double precision, external :: get_ao_bielec_integral + integer*8 :: ii integer(key_kind) :: idx real(integral_kind) :: tmp integer(key_kind), allocatable :: key(:) @@ -64,20 +65,12 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & (/ 12_8 * map_a % n_elements /), 8, fd, .False., c_pointer) call c_f_pointer(c_pointer, a_array, (/ 12_8 * map_a % n_elements /)) - allocate(l_pointer(l_start:l_end+1)) - -! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(static,137) -! do i=1,size(a_array) -! a_array(i) = 0.d0 -! enddo -! !$OMP END PARALLEL DO - - allocate( value((i_max*k_max)) ) - a = 1 - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) ) + ii = 1_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx,ii) do l=l_start,l_end !$OMP SINGLE - l_pointer(l) = a + l_pointer(l) = ii !$OMP END SINGLE do j=j_start,j_end !$OMP DO SCHEDULE(static,1) @@ -97,12 +90,12 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & ik = ik+1 tmp=value(ik) if (tmp /= 0.d0) then - a_array(a) = ik - a = a+1 - a_array(a) = j - a = a+1 - a_array(a) = transfer(dble(tmp), 1_8) - a = a+1 + a_array(ii) = ik + ii = ii+1_8 + a_array(ii) = j + ii = ii+1_8 + a_array(ii) = transfer(dble(tmp), 1_8) + ii = ii+1_8 endif enddo enddo @@ -110,16 +103,31 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & enddo enddo !$OMP END PARALLEL - l_pointer(l_end+1) = a + l_pointer(l_end+1) = ii deallocate(value) +!INPUT DATA +!open(unit=10,file='INPUT',form='UNFORMATTED') +!write(10) i_start, j_start, i_end, j_end +!write(10) a_start, b_start, a_end, b_end +!write(10) LDB, mo_tot_num +!write(10) matrix_B(1:LDB,1:mo_tot_num) +!idx=size(a_array) +!write(10) idx +!write(10) a_array +!write(10) l_pointer +!close(10) +!open(unit=10,file='OUTPUT',form='FORMATTED') +! END INPUT DATA + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & - !$OMP map_a,map_c,matrix_B,l_pointer) & + !$OMP map_c,matrix_B,l_pointer) & !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & - !$OMP a,b,c,d,tmp,T2d,V2d) + !$OMP a,b,c,d,tmp,T2d,V2d,ii) allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) @@ -139,18 +147,18 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & cycle endif - a=l_pointer(l) + ii=l_pointer(l) do j=j_start,j_end ik=0 do k=k_start,k_end do i=i_start,k ik = ik+1 - if ( (ik /= a_array(a)).or.(j /= a_array(a+1)) & - .or.(a >= l_pointer(l+1)) ) then + if ( (ik /= a_array(ii)).or.(j /= a_array(ii+1_8)) & + .or.(ii >= l_pointer(l+1)) ) then T2d(ik,j) = 0.d0 else - T2d(ik,j) = transfer(a_array(a+2), 1.d0) - a=a+3 + T2d(ik,j) = transfer(a_array(ii+2_8), 1.d0) + ii=ii+3_8 endif enddo enddo @@ -236,6 +244,22 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & call map_append(map_c, key, value, idx) !$OMP END CRITICAL +!!$OMP CRITICAL +!WRITE OUTPUT +!print *, d +!do b=b_start,d +! do c=c_start,c_end +! do a=a_start,min(b,c) +! if (dabs(U(a,c,b)) < 1.d-15) then +! cycle +! endif +! write(10,*) d,c,b,a,U(a,c,b) +! enddo +! enddo +!enddo +!END WRITE OUTPUT +!!$OMP END CRITICAL + enddo !$OMP END DO diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index afeb08fd..f0a54214 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -350,12 +350,12 @@ subroutine get_first_tooth(computed, first_teeth) end subroutine -BEGIN_PROVIDER [ integer, size_tbc ] +BEGIN_PROVIDER [ integer*8, size_tbc ] implicit none BEGIN_DOC ! Size of the tbc array END_DOC - size_tbc = (comb_teeth+1)*N_det_generators + fragment_count*fragment_first + size_tbc = int((comb_teeth+1),8)*int(N_det_generators,8) + fragment_count*fragment_first END_PROVIDER subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc) From 1c58249f5399a826051db75ed9853b46c8ee7043 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 2 Oct 2017 11:40:36 +0200 Subject: [PATCH 17/34] Integrated in main program --- plugins/FourIdx/four_idx.irp.f | 53 --------------------- plugins/FourIdx/four_index_sym.irp.f | 6 ++- plugins/Full_CI_ZMQ/NEEDED_CHILDREN_MODULES | 2 +- src/Integrals_Bielec/mo_bi_integrals.irp.f | 11 ++++- 4 files changed, 15 insertions(+), 57 deletions(-) delete mode 100644 plugins/FourIdx/four_idx.irp.f diff --git a/plugins/FourIdx/four_idx.irp.f b/plugins/FourIdx/four_idx.irp.f deleted file mode 100644 index 29061a46..00000000 --- a/plugins/FourIdx/four_idx.irp.f +++ /dev/null @@ -1,53 +0,0 @@ -program FourIdx - use map_module - implicit none - BEGIN_DOC -! Performs a four index transformation of the two-electron integrals - END_DOC - - type(map_type) :: test_map - integer(key_kind) :: key_max - integer(map_size_kind) :: sze - - call bielec_integrals_index(ao_num,ao_num,ao_num,ao_num,key_max) - sze = key_max - call map_init(test_map,sze) - -! call four_index_transform(ao_integrals_map,test_map, & -! mo_coef, size(mo_coef,1), & -! 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & -! 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) - - double precision :: t0,t1 - call wall_time(t0) - call four_index_transform_sym(ao_integrals_map,test_map, & - mo_coef, size(mo_coef,1), & - 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & - 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) - call wall_time(t1) - print *, 'Time: ', t1-t0, 's' - - integer :: i,j,k,l - real(integral_kind) :: integral1, integral2 - - provide mo_bielec_integrals_in_map - - do i=1,mo_tot_num - do j=1,mo_tot_num - do k=1,mo_tot_num - do l=1,mo_tot_num - call bielec_integrals_index(i,j,k,l,key_max) - call map_get(test_map,key_max,integral1) - call map_get(mo_integrals_map,key_max,integral2) - if (dabs(integral2) >=1.d-10 ) then - if (dabs(integral1 / integral2 -1.d0) > .001d0) then - print *, i,j,k,l - print *, integral1, integral2 - print *, '' - endif - endif - enddo - enddo - enddo - enddo -end diff --git a/plugins/FourIdx/four_index_sym.irp.f b/plugins/FourIdx/four_index_sym.irp.f index e12d47ea..3debd349 100644 --- a/plugins/FourIdx/four_index_sym.irp.f +++ b/plugins/FourIdx/four_index_sym.irp.f @@ -67,7 +67,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) ) ii = 1_8 - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx,ii) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) do l=l_start,l_end !$OMP SINGLE l_pointer(l) = ii @@ -102,8 +102,10 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & !$OMP END SINGLE enddo enddo - !$OMP END PARALLEL + !$OMP SINGLE l_pointer(l_end+1) = ii + !$OMP END SINGLE + !$OMP END PARALLEL deallocate(value) !INPUT DATA diff --git a/plugins/Full_CI_ZMQ/NEEDED_CHILDREN_MODULES b/plugins/Full_CI_ZMQ/NEEDED_CHILDREN_MODULES index 7ff203d4..d9a3a160 100644 --- a/plugins/Full_CI_ZMQ/NEEDED_CHILDREN_MODULES +++ b/plugins/Full_CI_ZMQ/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -Perturbation Selectors_full Generators_full ZMQ +Perturbation Selectors_full Generators_full ZMQ FourIdx diff --git a/src/Integrals_Bielec/mo_bi_integrals.irp.f b/src/Integrals_Bielec/mo_bi_integrals.irp.f index 84cfd228..22799923 100644 --- a/src/Integrals_Bielec/mo_bi_integrals.irp.f +++ b/src/Integrals_Bielec/mo_bi_integrals.irp.f @@ -117,7 +117,16 @@ BEGIN_PROVIDER [ logical, mo_bielec_integrals_in_map ] endif else - call add_integrals_to_map(full_ijkl_bitmask_4) +! call add_integrals_to_map(full_ijkl_bitmask_4) + call four_index_transform_sym(ao_integrals_map,mo_integrals_map, & + mo_coef, size(mo_coef,1), & + 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & + 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) + + integer*8 :: get_mo_map_size, mo_map_size + mo_map_size = get_mo_map_size() + + print*,'Molecular integrals provided' endif if (write_mo_integrals) then call ezfio_set_work_empty(.False.) From f8ee845825e2516c4c457ab9ba70e1479a7ece92 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 5 Oct 2017 10:37:10 +0200 Subject: [PATCH 18/34] Fixed Slater's Rules --- plugins/FourIdx/four_index_sym.irp.f | 4 +- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 3 +- src/Determinants/slater_rules.irp.f | 581 +++++++++++++++++-- src/Integrals_Bielec/NEEDED_CHILDREN_MODULES | 2 +- 4 files changed, 528 insertions(+), 62 deletions(-) diff --git a/plugins/FourIdx/four_index_sym.irp.f b/plugins/FourIdx/four_index_sym.irp.f index 3debd349..cd9cb150 100644 --- a/plugins/FourIdx/four_index_sym.irp.f +++ b/plugins/FourIdx/four_index_sym.irp.f @@ -246,8 +246,8 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & call map_append(map_c, key, value, idx) !$OMP END CRITICAL -!!$OMP CRITICAL !WRITE OUTPUT +! OMP CRITICAL !print *, d !do b=b_start,d ! do c=c_start,c_end @@ -259,8 +259,8 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & ! enddo ! enddo !enddo +! OMP END CRITICAL !END WRITE OUTPUT -!!$OMP END CRITICAL enddo diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index f0a54214..62873c32 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -408,7 +408,8 @@ end subroutine subroutine add_comb(comb, computed, tbc, stbc, ct) implicit none - integer, intent(in) :: stbc, ct + integer*8, intent(in) :: stbc + integer, intent(in) :: ct double precision, intent(in) :: comb logical, intent(inout) :: computed(N_det_generators) integer, intent(inout) :: tbc(0:stbc) diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index e3f5c0b1..eb128715 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -234,61 +234,66 @@ subroutine get_double_excitation(det1,det2,exc,phase,Nint) cycle case(1) + + high = max(exc(1,1,ispin), exc(1,2,ispin))-1 low = min(exc(1,1,ispin), exc(1,2,ispin)) - high = max(exc(1,1,ispin), exc(1,2,ispin)) - - ASSERT (low > 0) - j = ishft(low-1,-bit_kind_shift)+1 ! Find integer in array(Nint) - n = iand(low-1,bit_kind_size-1)+1 ! mod(low,bit_kind_size) + + ASSERT (low >= 0) ASSERT (high > 0) - k = ishft(high-1,-bit_kind_shift)+1 - m = iand(high-1,bit_kind_size-1)+1 + + k = ishft(high,-bit_kind_shift)+1 + j = ishft(low,-bit_kind_shift)+1 + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) if (j==k) then - nperm = nperm + popcnt(iand(det1(j,ispin), & - iand( ibset(0_bit_kind,m-1)-1_bit_kind, & - ibclr(-1_bit_kind,n)+1_bit_kind ) )) -! TODO iand( not(ishft(1_bit_kind,n+1))+1_bit_kind, & -! ishft(1_bit_kind,m)-1_bit_kind))) + nperm = nperm + popcnt(iand(det1(j,ispin), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) else - nperm = nperm + popcnt(iand(det1(k,ispin), & - ibset(0_bit_kind,m-1)-1_bit_kind)) -! TODO ishft(1_bit_kind,m)-1_bit_kind)) - if (n < bit_kind_size) then - nperm = nperm + popcnt(iand(det1(j,ispin), ibclr(-1_bit_kind,n) +1_bit_kind)) -! TODO ishft(1_bit_kind,m)-1_bit_kind)) - endif + nperm = nperm + popcnt( & + iand(det1(j,ispin), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k,ispin), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + do i=j+1,k-1 nperm = nperm + popcnt(det1(i,ispin)) end do + endif case (2) - do i=1,2 - low = min(exc(i,1,ispin), exc(i,2,ispin)) - high = max(exc(i,1,ispin), exc(i,2,ispin)) - + do l=1,2 + high = max(exc(l,1,ispin), exc(l,2,ispin))-1 + low = min(exc(l,1,ispin), exc(l,2,ispin)) + ASSERT (low > 0) - j = ishft(low-1,-bit_kind_shift)+1 ! Find integer in array(Nint) - n = iand(low-1,bit_kind_size-1)+1 ! mod(low,bit_kind_size) ASSERT (high > 0) - k = ishft(high-1,-bit_kind_shift)+1 - m = iand(high-1,bit_kind_size-1)+1 + + k = ishft(high,-bit_kind_shift)+1 + j = ishft(low,-bit_kind_shift)+1 + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) if (j==k) then - nperm = nperm + popcnt(iand(det1(j,ispin), & - iand( ibset(0_bit_kind,m-1)-1_bit_kind, & - ibclr(-1_bit_kind,n)+1_bit_kind ) )) + nperm = nperm + popcnt(iand(det1(j,ispin), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) else - nperm = nperm + popcnt(iand(det1(k,ispin), & - ibset(0_bit_kind,m-1)-1_bit_kind)) - if (n < bit_kind_size) then - nperm = nperm + popcnt(iand(det1(j,ispin), ibclr(-1_bit_kind,n) +1_bit_kind)) - endif - do l=j+1,k-1 - nperm = nperm + popcnt(det1(l,ispin)) + nperm = nperm + popcnt( & + iand(det1(j,ispin), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k,ispin), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + + do i=j+1,k-1 + nperm = nperm + popcnt(det1(i,ispin)) end do + endif enddo @@ -297,7 +302,7 @@ subroutine get_double_excitation(det1,det2,exc,phase,Nint) b = max(exc(1,1,ispin), exc(1,2,ispin)) c = min(exc(2,1,ispin), exc(2,2,ispin)) d = max(exc(2,1,ispin), exc(2,2,ispin)) - if (c>a .and. cb) then + if ((a 0) - j = ishft(low-1,-bit_kind_shift)+1 ! Find integer in array(Nint) - n = iand(low-1,bit_kind_size-1)+1 ! mod(low,bit_kind_size) + + high = max(exc(1,1,ispin), exc(1,2,ispin))-1 + low = min(exc(1,1,ispin), exc(1,2,ispin)) + + ASSERT (low >= 0) ASSERT (high > 0) - k = ishft(high-1,-bit_kind_shift)+1 - m = iand(high-1,bit_kind_size-1)+1 + + k = ishft(high,-bit_kind_shift)+1 + j = ishft(low,-bit_kind_shift)+1 + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) + if (j==k) then - nperm = popcnt(iand(det1(j,ispin), & - iand(ibset(0_bit_kind,m-1)-1_bit_kind,ibclr(-1_bit_kind,n)+1_bit_kind))) -!TODO iand( not(ishft(1_bit_kind,n+1))+1_bit_kind, & -! ishft(1_bit_kind,m)-1_bit_kind))) + nperm = nperm + popcnt(iand(det1(j,ispin), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) else - nperm = nperm + popcnt(iand(det1(k,ispin),ibset(0_bit_kind,m-1)-1_bit_kind)) -!TODO nperm = popcnt(iand(det1(k,ispin), ishft(1_bit_kind,m)-1_bit_kind)) + & -! popcnt(iand(det1(j,ispin), not(ishft(1_bit_kind,n+1))+1_bit_kind)) - if (n < bit_kind_size) then - nperm = nperm + popcnt(iand(det1(j,ispin),ibclr(-1_bit_kind,n)+1_bit_kind)) - endif + nperm = nperm + popcnt( & + iand(det1(j,ispin), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k,ispin), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + do i=j+1,k-1 nperm = nperm + popcnt(det1(i,ispin)) end do + endif + phase = phase_dble(iand(nperm,1)) return enddo enddo + end subroutine bitstring_to_list_ab( string, list, n_elements, Nint) @@ -428,7 +438,6 @@ subroutine bitstring_to_list_ab( string, list, n_elements, Nint) enddo end - subroutine bitstring_to_list_ab_old( string, list, n_elements, Nint) use bitmasks implicit none @@ -2030,6 +2039,112 @@ subroutine get_occ_from_key(key,occ,Nint) end +subroutine get_double_excitation_phase_new(det1,det2,exc,phase,Nint) + use bitmasks + implicit none + + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2) + integer(bit_kind), intent(in) :: det2(Nint,2) + integer, intent(in) :: exc(0:2,2,2) + double precision, intent(out) :: phase + integer :: tz + integer :: l, ispin, idx_hole, idx_particle, ishift + integer :: nperm + integer :: i,j,k,m,n + integer :: high, low + integer :: a,b,c,d + integer(bit_kind) :: hole, particle, tmp + double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /) + + ASSERT (Nint > 0) + nperm = 0 + do ispin = 1,2 + select case (exc(0,1,ispin)) + case(0) + cycle + + case(1) + + high = max(exc(1,1,ispin), exc(1,2,ispin))-1 + low = min(exc(1,1,ispin), exc(1,2,ispin)) + + ASSERT (low >= 0) + ASSERT (high > 0) + + k = ishft(high,-bit_kind_shift) + j = ishft(low,-bit_kind_shift) + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) + + if (j==k) then + nperm = nperm + popcnt(iand(det1(j,ispin), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) + else + nperm = nperm + popcnt( & + iand(det1(j,ispin), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k,ispin), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + + do i=j+1,k-1 + nperm = nperm + popcnt(det1(i,ispin)) + end do + + endif + + case (2) + + do l=1,2 + high = max(exc(l,1,ispin), exc(l,2,ispin))-1 + low = min(exc(l,1,ispin), exc(l,2,ispin)) + + ASSERT (low > 0) + ASSERT (high > 0) + + k = ishft(high,-bit_kind_shift) + j = ishft(low,-bit_kind_shift) + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) + + if (j==k) then + nperm = nperm + popcnt(iand(det1(j,ispin), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) + else + nperm = nperm + popcnt( & + iand(det1(j,ispin), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k,ispin), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + + do i=j+1,k-1 + nperm = nperm + popcnt(det1(i,ispin)) + end do + + endif + + enddo + + a = min(exc(1,1,ispin), exc(1,2,ispin)) + b = max(exc(1,1,ispin), exc(1,2,ispin)) + c = min(exc(2,1,ispin), exc(2,2,ispin)) + d = max(exc(2,1,ispin), exc(2,2,ispin)) + if (c>a .and. cb) then + nperm = nperm + 1 + endif + exit + end select + + enddo + phase = phase_dble(iand(nperm,1)) +end + + + subroutine get_double_excitation_phase(det1,det2,exc,phase,Nint) use bitmasks implicit none @@ -2315,6 +2430,356 @@ subroutine decode_exc_spin(exc,h1,p1,h2,p2) end select end +subroutine get_excitation_degree_spin_new(key1,key2,degree,Nint) + use bitmasks + include 'Utils/constants.include.F' + implicit none + BEGIN_DOC + ! Returns the excitation degree between two determinants + END_DOC + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key1(Nint) + integer(bit_kind), intent(in) :: key2(Nint) + integer, intent(out) :: degree + + integer(bit_kind) :: xorvec(N_int_max) + integer :: l + + ASSERT (Nint > 0) + + select case (Nint) + + case (1) + xorvec(1) = xor( key1(1), key2(1)) + degree = popcnt(xorvec(1)) + + case (2) + xorvec(1) = xor( key1(1), key2(1)) + xorvec(2) = xor( key1(2), key2(2)) + degree = popcnt(xorvec(1))+popcnt(xorvec(2)) + + case (3) + xorvec(1) = xor( key1(1), key2(1)) + xorvec(2) = xor( key1(2), key2(2)) + xorvec(3) = xor( key1(3), key2(3)) + degree = sum(popcnt(xorvec(1:3))) + + case (4) + xorvec(1) = xor( key1(1), key2(1)) + xorvec(2) = xor( key1(2), key2(2)) + xorvec(3) = xor( key1(3), key2(3)) + xorvec(4) = xor( key1(4), key2(4)) + degree = sum(popcnt(xorvec(1:4))) + + case default + do l=1,Nint + xorvec(l) = xor( key1(l), key2(l)) + enddo + degree = sum(popcnt(xorvec(1:Nint))) + + end select + + degree = ishft(degree,-1) + +end + + +subroutine get_excitation_spin_new(det1,det2,exc,degree,phase,Nint) + use bitmasks + implicit none + BEGIN_DOC + ! Returns the excitation operators between two determinants and the phase + END_DOC + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint) + integer(bit_kind), intent(in) :: det2(Nint) + integer, intent(out) :: exc(0:2,2) + integer, intent(out) :: degree + double precision, intent(out) :: phase + ! exc(number,hole/particle) + ! ex : + ! exc(0,1) = number of holes + ! exc(0,2) = number of particles + ! exc(1,2) = first particle + ! exc(1,1) = first hole + + ASSERT (Nint > 0) + + !DIR$ FORCEINLINE + call get_excitation_degree_spin(det1,det2,degree,Nint) + select case (degree) + + case (3:) + degree = -1 + return + + case (2) + call get_double_excitation_spin(det1,det2,exc,phase,Nint) + return + + case (1) + call get_mono_excitation_spin(det1,det2,exc,phase,Nint) + return + + case(0) + return + + end select +end + +subroutine decode_exc_spin_new(exc,h1,p1,h2,p2) + use bitmasks + implicit none + BEGIN_DOC + ! Decodes the exc arrays returned by get_excitation. + ! h1,h2 : Holes + ! p1,p2 : Particles + END_DOC + integer, intent(in) :: exc(0:2,2) + integer, intent(out) :: h1,h2,p1,p2 + + select case (exc(0,1)) + case(2) + h1 = exc(1,1) + h2 = exc(2,1) + p1 = exc(1,2) + p2 = exc(2,2) + case(1) + h1 = exc(1,1) + h2 = 0 + p1 = exc(1,2) + p2 = 0 + case default + h1 = 0 + p1 = 0 + h2 = 0 + p2 = 0 + end select +end + + +subroutine get_double_excitation_spin_new(det1,det2,exc,phase,Nint) + use bitmasks + implicit none + BEGIN_DOC + ! Returns the two excitation operators between two doubly excited spin-determinants + ! and the phase + END_DOC + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint) + integer(bit_kind), intent(in) :: det2(Nint) + integer, intent(out) :: exc(0:2,2) + double precision, intent(out) :: phase + integer :: tz + integer :: l, idx_hole, idx_particle, ishift + integer :: nperm + integer :: i,j,k,m,n + integer :: high, low + integer :: a,b,c,d + integer(bit_kind) :: hole, particle, tmp + double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /) + + ASSERT (Nint > 0) + nperm = 0 + exc(0,1) = 0 + exc(0,2) = 0 + + idx_particle = 0 + idx_hole = 0 + ishift = 1-bit_kind_size + do l=1,Nint + ishift = ishift + bit_kind_size + if (det1(l) == det2(l)) then + cycle + endif + tmp = xor( det1(l), det2(l) ) + particle = iand(tmp, det2(l)) + hole = iand(tmp, det1(l)) + do while (particle /= 0_bit_kind) + tz = trailz(particle) + idx_particle = idx_particle + 1 + exc(0,2) = exc(0,2) + 1 + exc(idx_particle,2) = tz+ishift + particle = iand(particle,particle-1_bit_kind) + enddo + if (iand(exc(0,1),exc(0,2))==2) then ! exc(0,1)==2 or exc(0,2)==2 + exit + endif + do while (hole /= 0_bit_kind) + tz = trailz(hole) + idx_hole = idx_hole + 1 + exc(0,1) = exc(0,1) + 1 + exc(idx_hole,1) = tz+ishift + hole = iand(hole,hole-1_bit_kind) + enddo + if (iand(exc(0,1),exc(0,2))==2) then ! exc(0,1)==2 or exc(0,2)==2 + exit + endif + enddo + + select case (exc(0,1)) + + case(1) + + high = max(exc(1,1), exc(1,2))-1 + low = min(exc(1,1), exc(1,2)) + + ASSERT (low >= 0) + ASSERT (high > 0) + + k = ishft(high,-bit_kind_shift) + j = ishft(low,-bit_kind_shift) + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) + + if (j==k) then + nperm = nperm + popcnt(iand(det1(j), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) + else + nperm = nperm + popcnt( & + iand(det1(j), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + + do i=j+1,k-1 + nperm = nperm + popcnt(det1(i)) + end do + + endif + + case (2) + + do l=1,2 + high = max(exc(l,1), exc(l,2))-1 + low = min(exc(l,1), exc(l,2)) + + ASSERT (low > 0) + ASSERT (high > 0) + + k = ishft(high,-bit_kind_shift) + j = ishft(low,-bit_kind_shift) + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) + + if (j==k) then + nperm = nperm + popcnt(iand(det1(j), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) + else + nperm = nperm + popcnt( & + iand(det1(j), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + + do i=j+1,k-1 + nperm = nperm + popcnt(det1(i)) + end do + + endif + + enddo + + a = min(exc(1,1), exc(1,2)) + b = max(exc(1,1), exc(1,2)) + c = min(exc(2,1), exc(2,2)) + d = max(exc(2,1), exc(2,2)) + if (c>a .and. cb) then + nperm = nperm + 1 + endif + end select + + phase = phase_dble(iand(nperm,1)) + +end + +subroutine get_mono_excitation_spin_new(det1,det2,exc,phase,Nint) + use bitmasks + implicit none + BEGIN_DOC + ! Returns the excitation operator between two singly excited determinants and the phase + END_DOC + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint) + integer(bit_kind), intent(in) :: det2(Nint) + integer, intent(out) :: exc(0:2,2) + double precision, intent(out) :: phase + integer :: tz + integer :: l, idx_hole, idx_particle, ishift + integer :: nperm + integer :: i,j,k,m,n + integer :: high, low + integer :: a,b,c,d + integer(bit_kind) :: hole, particle, tmp + double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /) + + ASSERT (Nint > 0) + nperm = 0 + exc(0,1) = 0 + exc(0,2) = 0 + + ishift = 1-bit_kind_size + do l=1,Nint + ishift = ishift + bit_kind_size + if (det1(l) == det2(l)) then + cycle + endif + tmp = xor( det1(l), det2(l) ) + particle = iand(tmp, det2(l)) + hole = iand(tmp, det1(l)) + if (particle /= 0_bit_kind) then + tz = trailz(particle) + exc(0,2) = 1 + exc(1,2) = tz+ishift + endif + if (hole /= 0_bit_kind) then + tz = trailz(hole) + exc(0,1) = 1 + exc(1,1) = tz+ishift + endif + + if ( iand(exc(0,1),exc(0,2)) /= 1) then ! exc(0,1)/=1 and exc(0,2) /= 1 + cycle + endif + + high = max(exc(1,1), exc(1,2))-1 + low = min(exc(1,1), exc(1,2)) + + ASSERT (low >= 0) + ASSERT (high > 0) + + k = ishft(high,-bit_kind_shift) + j = ishft(low,-bit_kind_shift) + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) + + if (j==k) then + nperm = nperm + popcnt(iand(det1(j), & + iand( ishft(1_bit_kind,m)-1_bit_kind, & + not(ishft(1_bit_kind,n))+1_bit_kind)) ) + else + nperm = nperm + popcnt( & + iand(det1(j), & + iand(not(0_bit_kind), & + (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(det1(k), & + (ishft(1_bit_kind,m) - 1_bit_kind ) )) + + do i=j+1,k-1 + nperm = nperm + popcnt(det1(i)) + end do + + endif + + phase = phase_dble(iand(nperm,1)) + return + + enddo +end subroutine get_double_excitation_spin(det1,det2,exc,phase,Nint) use bitmasks diff --git a/src/Integrals_Bielec/NEEDED_CHILDREN_MODULES b/src/Integrals_Bielec/NEEDED_CHILDREN_MODULES index 152711f3..245e3014 100644 --- a/src/Integrals_Bielec/NEEDED_CHILDREN_MODULES +++ b/src/Integrals_Bielec/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -Pseudo Bitmask ZMQ +Pseudo Bitmask ZMQ FourIdx From 36bbd61c143fe85ac31f4c9f25f58d08c1967d15 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 5 Oct 2017 18:52:09 +0200 Subject: [PATCH 19/34] Forgot files in FourIdx --- src/FourIdx/NEEDED_CHILDREN_MODULES | 1 + src/FourIdx/README.rst | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 src/FourIdx/NEEDED_CHILDREN_MODULES create mode 100644 src/FourIdx/README.rst diff --git a/src/FourIdx/NEEDED_CHILDREN_MODULES b/src/FourIdx/NEEDED_CHILDREN_MODULES new file mode 100644 index 00000000..96b2cfdc --- /dev/null +++ b/src/FourIdx/NEEDED_CHILDREN_MODULES @@ -0,0 +1 @@ +ZMQ diff --git a/src/FourIdx/README.rst b/src/FourIdx/README.rst new file mode 100644 index 00000000..6ea432c6 --- /dev/null +++ b/src/FourIdx/README.rst @@ -0,0 +1,6 @@ +======= +FourIdx +======= + +Four-index transformation. + From 090525748ab3947e5d6b6070006f4144b5f653d4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 5 Oct 2017 19:08:39 +0200 Subject: [PATCH 20/34] Four index file missing --- src/FourIdx/four_index_sym.irp.f | 277 +++++++++++++++++++++++++++++++ 1 file changed, 277 insertions(+) create mode 100644 src/FourIdx/four_index_sym.irp.f diff --git a/src/FourIdx/four_index_sym.irp.f b/src/FourIdx/four_index_sym.irp.f new file mode 100644 index 00000000..cd9cb150 --- /dev/null +++ b/src/FourIdx/four_index_sym.irp.f @@ -0,0 +1,277 @@ +subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l, ik, ll + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer*8 :: ii + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + ! Create a temporary memory-mapped file + integer :: fd + type(c_ptr) :: c_pointer + integer*8, pointer :: a_array(:) + call mmap(trim(ezfio_filename)//'/work/four_idx', & + (/ 12_8 * map_a % n_elements /), 8, fd, .False., c_pointer) + call c_f_pointer(c_pointer, a_array, (/ 12_8 * map_a % n_elements /)) + + allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) ) + ii = 1_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + do l=l_start,l_end + !$OMP SINGLE + l_pointer(l) = ii + !$OMP END SINGLE + do j=j_start,j_end + !$OMP DO SCHEDULE(static,1) + do k=k_start,k_end + do i=i_start,k + ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,value(ik)) + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + tmp=value(ik) + if (tmp /= 0.d0) then + a_array(ii) = ik + ii = ii+1_8 + a_array(ii) = j + ii = ii+1_8 + a_array(ii) = transfer(dble(tmp), 1_8) + ii = ii+1_8 + endif + enddo + enddo + !$OMP END SINGLE + enddo + enddo + !$OMP SINGLE + l_pointer(l_end+1) = ii + !$OMP END SINGLE + !$OMP END PARALLEL + deallocate(value) + +!INPUT DATA +!open(unit=10,file='INPUT',form='UNFORMATTED') +!write(10) i_start, j_start, i_end, j_end +!write(10) a_start, b_start, a_end, b_end +!write(10) LDB, mo_tot_num +!write(10) matrix_B(1:LDB,1:mo_tot_num) +!idx=size(a_array) +!write(10) idx +!write(10) a_array +!write(10) l_pointer +!close(10) +!open(unit=10,file='OUTPUT',form='FORMATTED') +! END INPUT DATA + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_c,matrix_B,l_pointer) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & + !$OMP a,b,c,d,tmp,T2d,V2d,ii) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + + + allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & + V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & + V(i_start:i_end, k_start:k_end), & + T(k_start:k_end, a_start:a_end)) + + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + U = 0.d0 + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + + ii=l_pointer(l) + do j=j_start,j_end + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + if ( (ik /= a_array(ii)).or.(j /= a_array(ii+1_8)) & + .or.(ii >= l_pointer(l+1)) ) then + T2d(ik,j) = 0.d0 + else + T2d(ik,j) = transfer(a_array(ii+2_8), 1.d0) + ii=ii+3_8 + endif + enddo + enddo + enddo + call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + T2d(1,j_start), size(T2d,1), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + V2d(1,b_start), size(V2d,1) ) + + do b=b_start,d + ik = 0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + V(i,k) = V2d(ik,b) + enddo + enddo + +! T = 0.d0 +! do a=a_start,b +! do k=k_start,k_end +! do i=i_start,k +! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a) +! enddo +! do i=k+1,i_end +! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) +! enddo +! enddo +! enddo + call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), & + 1.d0, & + V(i_start,k_start), size(V,1), & + matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & + T(k_start,a_start), size(T,1) ) + +! do c=c_start,b +! do a=a_start,c +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + call DGEMM('T','N', (b-a_start+1), (b-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) +! do c=b+1,c_end +! do a=a_start,b +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + if (b < b_end) then + call DGEMM('T','N', (b-a_start+1), (c_end-b), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, & + U(a_start,b+1,b), size(U,1) ) + endif + enddo + + enddo + + idx = 0_8 + do b=b_start,d + do c=c_start,c_end + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call map_append(map_c, key, value, idx) + !$OMP END CRITICAL + +!WRITE OUTPUT +! OMP CRITICAL +!print *, d +!do b=b_start,d +! do c=c_start,c_end +! do a=a_start,min(b,c) +! if (dabs(U(a,c,b)) < 1.d-15) then +! cycle +! endif +! write(10,*) d,c,b,a,U(a,c,b) +! enddo +! enddo +!enddo +! OMP END CRITICAL +!END WRITE OUTPUT + + + enddo + !$OMP END DO + + deallocate(key,value,V,T) + !$OMP END PARALLEL + call map_sort(map_c) + + call munmap( & + (/ 12_8 * map_a % n_elements /), 8, fd, c_pointer) + deallocate(l_pointer) + +end From b5750ed87b11a66ce20109d012c2b683d470ac93 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 6 Oct 2017 13:05:57 +0200 Subject: [PATCH 21/34] not very efficient but working mrsc2 --- plugins/mrcepa0/dressing_slave.irp.f | 81 ++++++++++------------------ 1 file changed, 28 insertions(+), 53 deletions(-) diff --git a/plugins/mrcepa0/dressing_slave.irp.f b/plugins/mrcepa0/dressing_slave.irp.f index 2a6ddb1b..d7f081cd 100644 --- a/plugins/mrcepa0/dressing_slave.irp.f +++ b/plugins/mrcepa0/dressing_slave.irp.f @@ -42,18 +42,18 @@ subroutine mrsc2_dressing_slave(thread,iproc) integer, allocatable :: hp(:,:) - integer :: i_state, i, i_I, J, k, k2, k1, kk, ll, degree, degree2, m, l, deg, ni, m2 + integer :: i_state, i, i_I, J, k, k2, k1, kk, ll, m, l, deg, ni, m2 integer :: n(2) integer :: p1,p2,h1,h2,s1,s2, blok, I_s, J_s, kn logical :: ok - double precision :: phase_iI, phase_Ik, phase_Jl, phase_Ji, phase_al + double precision :: phase_ia, phase_Ik, phase_Jl, phase_Ji, phase_la, phase_ka, phase_tmp + double precision :: Hka, Hla, Ska, Sla, tmp double precision :: diI, hIi, hJi, delta_JI, dkI, HkI, ci_inv(N_states), cj_inv(N_states) double precision :: contrib, contrib_s2, wall, iwall - double precision, allocatable :: dleat(:,:,:), dleat_s2(:,:,:) - integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ + integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ, exc integer(bit_kind) :: det_tmp(N_int, 2), det_tmp2(N_int, 2), inac, virt integer, external :: get_index_in_psi_det_sorted_bit, searchDet, detCmp - logical, external :: is_in_wavefunction, isInCassd, detEq + logical, external :: is_in_wavefunction integer,allocatable :: komon(:) logical :: komoned !double precision, external :: get_dij @@ -63,8 +63,8 @@ subroutine mrsc2_dressing_slave(thread,iproc) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) - allocate (dleat(N_states, N_det_non_ref, 2), delta(N_states,0:N_det_non_ref, 2)) - allocate (dleat_s2(N_states, N_det_non_ref, 2), delta_s2(N_states,0:N_det_non_ref, 2)) + allocate (delta(N_states,0:N_det_non_ref, 2)) + allocate (delta_s2(N_states,0:N_det_non_ref, 2)) allocate(komon(0:N_det_non_ref)) allocate(hp(2,N_det_non_ref)) @@ -100,7 +100,7 @@ subroutine mrsc2_dressing_slave(thread,iproc) k = det_cepa0_idx(linked(kk, i_I)) blok = blokMwen(kk, i_I) - call get_excitation(psi_ref(1,1,i_I),psi_non_ref(1,1,k),exc_Ik,degree,phase_Ik,N_int) + call get_excitation(psi_ref(1,1,i_I),psi_non_ref(1,1,k),exc_Ik,deg,phase_Ik,N_int) if(J /= i_I) then call apply_excitation(psi_ref(1,1,J),exc_Ik,det_tmp2,ok,N_int) @@ -135,36 +135,10 @@ subroutine mrsc2_dressing_slave(thread,iproc) if(h_cache(J,i) == 0.d0) cycle if(h_cache(i_I,i) == 0.d0) cycle - - !ok = .false. - !do i_state=1, N_states - ! if(lambda_mrcc(i_state, i) /= 0d0) then - ! ok = .true. - ! exit - ! end if - !end do - !if(.not. ok) cycle -! - + komon(0) += 1 kn = komon(0) komon(kn) = i - - -! call get_excitation(psi_ref(1,1,J),psi_non_ref(1,1,i),exc_IJ,degree2,phase_Ji,N_int) -! if(I_i /= J) call get_excitation(psi_ref(1,1,I_i),psi_non_ref(1,1,i),exc_IJ,degree2,phase_Ii,N_int) -! if(I_i == J) phase_Ii = phase_Ji - - do i_state = 1,N_states - dkI = h_cache(J,i) * dij(i_I, i, i_state) - dleat(i_state, kn, 1) = dkI - dleat(i_state, kn, 2) = dkI - - dkI = s2_cache(J,i) * dij(i_I, i, i_state) - dleat_s2(i_state, kn, 1) = dkI - dleat_s2(i_state, kn, 2) = dkI - end do - end do komoned = .true. @@ -178,18 +152,20 @@ subroutine mrsc2_dressing_slave(thread,iproc) call apply_excitation(psi_non_ref(1,1,i),exc_Ik,det_tmp,ok,N_int) if(.not. ok) cycle if(HP(1,i) + HP(1,k) <= 2 .and. HP(2,i) + HP(2,k) <= 2) then - cycle + if(is_in_wavefunction(det_tmp, N_int)) cycle end if - !if(isInCassd(det_tmp, N_int)) cycle - + + call i_h_j_phase_out(psi_non_ref(1,1,i), det_tmp, N_int, tmp, phase_ia,exc, deg) + call i_h_j_phase_out(psi_ref(1,1,i_I), psi_non_ref(1,1,k), N_int, tmp, phase_ik,exc, deg) + + call i_h_j_phase_out(psi_non_ref(1,1,l), det_tmp, N_int, Hla, phase_la,exc,deg) + call get_s2(psi_non_ref(1,1,l), det_tmp, N_int, Sla) + + do i_state = 1, N_states - !if(lambda_mrcc(i_state, i) == 0d0) cycle - - - !contrib = h_cache(i_I,k) * lambda_mrcc(i_state, k) * dleat(i_state, m, 2)! * phase_al - contrib = dij(i_I, k, i_state) * dleat(i_state, m, 2) - contrib_s2 = dij(i_I, k, i_state) * dleat_s2(i_state, m, 2) + contrib = dij(i_I, k, i_state) * dij(i_I, i, i_state) * Hla * phase_ia * phase_ik + contrib_s2 = dij(i_I, k, i_state) * dij(i_I, i, i_state) * Sla *phase_ia * phase_ik delta(i_state,ll,1) += contrib delta_s2(i_state,ll,1) += contrib_s2 if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5) then @@ -198,9 +174,12 @@ subroutine mrsc2_dressing_slave(thread,iproc) endif if(I_i == J) cycle - !contrib = h_cache(J,l) * lambda_mrcc(i_state, l) * dleat(i_state, m, 1)! * phase_al - contrib = dij(J, l, i_state) * dleat(i_state, m, 1) - contrib_s2 = dij(J, l, i_state) * dleat_s2(i_state, m, 1) + call i_h_j_phase_out(psi_non_ref(1,1,k), det_tmp, N_int, Hka, phase_ka,exc,deg) + call get_s2(psi_non_ref(1,1,k), det_tmp, N_int, Ska) + call i_h_j_phase_out(psi_ref(1,1,J), psi_non_ref(1,1,l), N_int, tmp, phase_jl,exc, deg) + + contrib = dij(J, l, i_state) * dij(J, i, i_state) * Hka* phase_ia * phase_jl + contrib_s2 = dij(J, l, i_state) * dij(J, i, i_state) * Ska*phase_ia*phase_jl delta(i_state,kk,2) += contrib delta_s2(i_state,kk,2) += contrib_s2 if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then @@ -211,12 +190,8 @@ subroutine mrsc2_dressing_slave(thread,iproc) end do ! while end do ! kk - - call push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) - call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) - -! end if - + call push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) + call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) enddo deallocate(delta) From 7ac793cc52b25aacad7694246286927ea7c4bcc5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 6 Oct 2017 15:41:44 +0200 Subject: [PATCH 22/34] Working on truncate_wf --- plugins/Full_CI_ZMQ/run_selection_slave.irp.f | 2 +- plugins/QMC/truncate_wf_spin.irp.f | 15 ++++---- src/Davidson/diagonalization_hs2.irp.f | 2 +- src/Determinants/determinants.irp.f | 35 ------------------- src/Determinants/spindeterminants.irp.f | 3 +- 5 files changed, 12 insertions(+), 45 deletions(-) diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f index ceb7bd95..8684eb0f 100644 --- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f @@ -57,7 +57,7 @@ subroutine run_selection_slave(thread,iproc,energy) endif if(done .or. ctask == size(task_id)) then - ASSERT (.not.(buf%N == 0 .and. ctask > 0)) + ASSERT (buf%N /= 0) do i=1, ctask call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) end do diff --git a/plugins/QMC/truncate_wf_spin.irp.f b/plugins/QMC/truncate_wf_spin.irp.f index 5a5fe125..b0769efd 100644 --- a/plugins/QMC/truncate_wf_spin.irp.f +++ b/plugins/QMC/truncate_wf_spin.irp.f @@ -47,7 +47,7 @@ subroutine run !$OMP PARALLEL DO PRIVATE(k) do k=1,n_det if (psi_bilinear_matrix_columns(k) == -i) then - psi_bilinear_matrix_values(k,1) = 0.d0 + psi_bilinear_matrix_values(k,1:N_states) = 0.d0 endif enddo !$OMP END PARALLEL DO @@ -55,7 +55,7 @@ subroutine run !$OMP PARALLEL DO PRIVATE(k) do k=1,n_det if (psi_bilinear_matrix_rows(k) == i) then - psi_bilinear_matrix_values(k,1) = 0.d0 + psi_bilinear_matrix_values(k,1:N_states) = 0.d0 endif enddo !$OMP END PARALLEL DO @@ -85,20 +85,21 @@ subroutine run double precision, external :: u_dot_u, u_dot_v do i=1,N_states - e_0(i) = u_dot_v(v_t(1,i),u_0(1,i),N_det)/u_dot_u(u_0(1,i),N_det) + e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),N_det)/u_dot_u(u_0(1,i),N_det) + print *, 'E = ', e_0(i) enddo m = 0 do k=1,n_det - if (psi_bilinear_matrix_values(k,1) /= 0.d0) then + if (sum(psi_bilinear_matrix_values(k,1:N_states)) /= 0.d0) then m = m+1 endif enddo - E = E_0(1) + nuclear_repulsion - norm = u_dot_u(u_0(1,1),N_det) + do k=1,N_states + E = E_0(k) + nuclear_repulsion + enddo print *, 'Number of determinants:', m - print *, 'Energy', E exit enddo call wf_of_psi_bilinear_matrix(.True.) diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index 0a2d5389..dd330644 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -139,7 +139,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ write(iunit,'(A)') trim(write_buffer) write_buffer = ' Iter' do i=1,N_st - write_buffer = trim(write_buffer)//' Energy S^2 Residual ' + write_buffer = trim(write_buffer)//' Energy S^2 Residual ' enddo write(iunit,'(A)') trim(write_buffer) write_buffer = '===== ' diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f index 9a1d4ee1..dd272014 100644 --- a/src/Determinants/determinants.irp.f +++ b/src/Determinants/determinants.irp.f @@ -447,28 +447,12 @@ subroutine save_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psicoef) integer :: i,k - PROVIDE progress_bar - call start_progress(7,'Saving wfunction',0.d0) - - progress_bar(1) = 1 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_N_int(N_int) - progress_bar(1) = 2 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_bit_kind(bit_kind) - progress_bar(1) = 3 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_N_det(ndet) - progress_bar(1) = 4 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_n_states(nstates) - progress_bar(1) = 5 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_mo_label(mo_label) - progress_bar(1) = 6 - progress_value = dble(progress_bar(1)) - N_int2 = (N_int*bit_kind)/8 allocate (psi_det_save(N_int2,2,ndet)) do i=1,ndet @@ -484,13 +468,10 @@ subroutine save_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psicoef) do k=1,N_int2 psi_det_save(k,2,i) = det_8(k) enddo -! print*,psi_det_save enddo call ezfio_set_determinants_psi_det(psi_det_save) deallocate (psi_det_save) - progress_bar(1) = 7 - progress_value = dble(progress_bar(1)) allocate (psi_coef_save(ndet,nstates)) double precision :: accu_norm(nstates) accu_norm = 0.d0 @@ -537,28 +518,12 @@ subroutine save_wavefunction_specified(ndet,nstates,psidet,psicoef,ndetsave,inde integer :: i,k - PROVIDE progress_bar - call start_progress(7,'Saving wfunction',0.d0) - - progress_bar(1) = 1 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_N_int(N_int) - progress_bar(1) = 2 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_bit_kind(bit_kind) - progress_bar(1) = 3 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_N_det(ndetsave) - progress_bar(1) = 4 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_n_states(nstates) - progress_bar(1) = 5 - progress_value = dble(progress_bar(1)) call ezfio_set_determinants_mo_label(mo_label) - progress_bar(1) = 6 - progress_value = dble(progress_bar(1)) - N_int2 = (N_int*bit_kind)/8 allocate (psi_det_save(N_int2,2,ndetsave)) do i=1,ndetsave diff --git a/src/Determinants/spindeterminants.irp.f b/src/Determinants/spindeterminants.irp.f index b6ca1cba..75c2ee31 100644 --- a/src/Determinants/spindeterminants.irp.f +++ b/src/Determinants/spindeterminants.irp.f @@ -365,8 +365,9 @@ end do k=1,N_det i = psi_bilinear_matrix_rows(k) j = psi_bilinear_matrix_columns(k) + f = 0.d0 do l=1,N_states - f = psi_bilinear_matrix_values(k,l)*psi_bilinear_matrix_values(k,l) + f += psi_bilinear_matrix_values(k,l)*psi_bilinear_matrix_values(k,l) enddo det_alpha_norm(i) += f det_beta_norm(j) += f From 026383b429001c15658e02e72e59e932ecefd6fe Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 9 Oct 2017 09:46:46 +0200 Subject: [PATCH 23/34] Fixed symlinks --- scripts/compilation/qp_create_ninja.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/compilation/qp_create_ninja.py b/scripts/compilation/qp_create_ninja.py index 1b7272b1..cb1ea89a 100755 --- a/scripts/compilation/qp_create_ninja.py +++ b/scripts/compilation/qp_create_ninja.py @@ -266,7 +266,7 @@ def ninja_ezfio_rule(): install_lib_ezfio = join(QP_ROOT, 'install', 'EZFIO', "lib", "libezfio_irp.a") l_cmd = ["cd {0}".format(QP_EZFIO)] + l_flag - l_cmd += ["rm -f make.config ; ninja && ln -sf {0} {1}".format(install_lib_ezfio, EZFIO_LIB)] + l_cmd += ["rm -f make.config ; ninja && rm -f {1} ; ln -sf {0} {1}".format(install_lib_ezfio, EZFIO_LIB)] l_string = ["rule build_ezfio", " command = {0}".format(" ; ".join(l_cmd)), @@ -307,7 +307,7 @@ def ninja_symlink_rule(): """ Return the command to create for the symlink """ - return ["rule build_symlink", " command = ln -sf $in $out", ""] + return ["rule build_symlink", " command = rm -f $out ; ln -sf $in $out", ""] def ninja_symlink_build(path_module, l_symlink): From 06fc8cd8e17d591566f4d3bb5a5affc9ca7fc999 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 9 Oct 2017 14:14:27 +0200 Subject: [PATCH 24/34] Working on truncate_wf --- plugins/Full_CI_ZMQ/run_selection_slave.irp.f | 1 - plugins/QMC/truncate_wf_spin.irp.f | 23 +++++++++++------ src/Determinants/determinants.irp.f | 25 +++++-------------- src/Utils/transpose.irp.f | 8 ++++++ 4 files changed, 29 insertions(+), 28 deletions(-) diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f index 8684eb0f..930eec2c 100644 --- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f @@ -57,7 +57,6 @@ subroutine run_selection_slave(thread,iproc,energy) endif if(done .or. ctask == size(task_id)) then - ASSERT (buf%N /= 0) do i=1, ctask call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) end do diff --git a/plugins/QMC/truncate_wf_spin.irp.f b/plugins/QMC/truncate_wf_spin.irp.f index b0769efd..68e903c1 100644 --- a/plugins/QMC/truncate_wf_spin.irp.f +++ b/plugins/QMC/truncate_wf_spin.irp.f @@ -39,7 +39,8 @@ subroutine run call dsort(norm_sort(1),iorder(1),nab) - PROVIDE psi_bilinear_matrix_values nuclear_repulsion + PROVIDE psi_bilinear_matrix_values psi_bilinear_matrix_rows psi_bilinear_matrix_columns + PROVIDE nuclear_repulsion print *, '' do j=0,nab i = iorder(j) @@ -47,7 +48,9 @@ subroutine run !$OMP PARALLEL DO PRIVATE(k) do k=1,n_det if (psi_bilinear_matrix_columns(k) == -i) then - psi_bilinear_matrix_values(k,1:N_states) = 0.d0 + do l=1,N_states + psi_bilinear_matrix_values(k,l) = 0.d0 + enddo endif enddo !$OMP END PARALLEL DO @@ -55,7 +58,9 @@ subroutine run !$OMP PARALLEL DO PRIVATE(k) do k=1,n_det if (psi_bilinear_matrix_rows(k) == i) then - psi_bilinear_matrix_values(k,1:N_states) = 0.d0 + do l=1,N_states + psi_bilinear_matrix_values(k,l) = 0.d0 + enddo endif enddo !$OMP END PARALLEL DO @@ -64,9 +69,11 @@ subroutine run cycle endif - u_0 = psi_bilinear_matrix_values(1:N_det,1:N_states) - v_t = 0.d0 - s_t = 0.d0 + u_0(1:N_det,1:N_states) = psi_bilinear_matrix_values(1:N_det,1:N_states) + v_0(1:N_det,1:N_states) = 0.d0 + u_t(1:N_states,1:N_det) = 0.d0 + v_t(1:N_states,1:N_det) = 0.d0 + s_t(1:N_states,1:N_det) = 0.d0 call dtranspose( & u_0, & size(u_0, 1), & @@ -85,8 +92,8 @@ subroutine run double precision, external :: u_dot_u, u_dot_v do i=1,N_states - e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),N_det)/u_dot_u(u_0(1,i),N_det) - print *, 'E = ', e_0(i) + e_0(i) = u_dot_v(u_0(1,i),v_0(1,i),N_det)/u_dot_u(u_0(1,i),N_det) + print *, 'E = ', e_0(i) + nuclear_repulsion enddo m = 0 diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f index dd272014..d11e853c 100644 --- a/src/Determinants/determinants.irp.f +++ b/src/Determinants/determinants.irp.f @@ -435,17 +435,14 @@ subroutine save_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psicoef) ! Save the wave function into the EZFIO file END_DOC use bitmasks + include 'constants.include.F' integer, intent(in) :: ndet,nstates,dim_psicoef integer(bit_kind), intent(in) :: psidet(N_int,2,ndet) double precision, intent(in) :: psicoef(dim_psicoef,nstates) integer*8, allocatable :: psi_det_save(:,:,:) double precision, allocatable :: psi_coef_save(:,:) - integer*8 :: det_8(100) - integer(bit_kind) :: det_bk((100*8)/bit_kind) - integer :: N_int2 - equivalence (det_8, det_bk) - integer :: i,k + integer :: i,j,k call ezfio_set_determinants_N_int(N_int) call ezfio_set_determinants_bit_kind(bit_kind) @@ -453,21 +450,13 @@ subroutine save_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psicoef) call ezfio_set_determinants_n_states(nstates) call ezfio_set_determinants_mo_label(mo_label) - N_int2 = (N_int*bit_kind)/8 - allocate (psi_det_save(N_int2,2,ndet)) + allocate (psi_det_save(N_int,2,ndet)) do i=1,ndet + do j=1,2 do k=1,N_int - det_bk(k) = psidet(k,1,i) - enddo - do k=1,N_int2 - psi_det_save(k,1,i) = det_8(k) - enddo - do k=1,N_int - det_bk(k) = psidet(k,2,i) - enddo - do k=1,N_int2 - psi_det_save(k,2,i) = det_8(k) + psi_det_save(k,j,i) = transfer(psidet(k,j,i),1_8) enddo + enddo enddo call ezfio_set_determinants_psi_det(psi_det_save) deallocate (psi_det_save) @@ -492,7 +481,6 @@ subroutine save_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psicoef) call ezfio_set_determinants_psi_coef(psi_coef_save) call write_int(output_determinants,ndet,'Saved determinants') - call stop_progress deallocate (psi_coef_save) end @@ -565,7 +553,6 @@ subroutine save_wavefunction_specified(ndet,nstates,psidet,psicoef,ndetsave,inde call ezfio_set_determinants_psi_coef(psi_coef_save) call write_int(output_determinants,ndet,'Saved determinants') - call stop_progress deallocate (psi_coef_save) end diff --git a/src/Utils/transpose.irp.f b/src/Utils/transpose.irp.f index 32e502e9..ec33023d 100644 --- a/src/Utils/transpose.irp.f +++ b/src/Utils/transpose.irp.f @@ -47,6 +47,14 @@ recursive subroutine dtranspose(A,LDA,B,LDB,d1,d2) double precision, intent(in) :: A(LDA,d2) double precision, intent(out) :: B(LDB,d1) + +! do j=1,d1 +! do i=1,d2 +! B(i,j ) = A(j ,i) +! enddo +! enddo +! return + integer :: i,j,k, mod_align if ( d2 < 32 ) then do j=1,d1 From 24c4dddc2fd3e347a8308f2c9bcbb9b92708a75c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 9 Oct 2017 15:29:58 +0200 Subject: [PATCH 25/34] Aded densify_coefmatrix.irp.f --- plugins/QMC/densify_coefmatrix.irp.f | 18 ++++++++++++++++++ src/Determinants/spindeterminants.irp.f | 4 ++-- 2 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 plugins/QMC/densify_coefmatrix.irp.f diff --git a/plugins/QMC/densify_coefmatrix.irp.f b/plugins/QMC/densify_coefmatrix.irp.f new file mode 100644 index 00000000..aca29944 --- /dev/null +++ b/plugins/QMC/densify_coefmatrix.irp.f @@ -0,0 +1,18 @@ +program densify + implicit none + read_wf = .True. + touch read_wf + call generate_all_alpha_beta_det_products() + +! call wf_of_psi_bilinear_matrix(.False.) +! integer :: i, istate +! do istate=1,N_states +! do i=1,N_det +! if (psi_coef(i,istate) == 0.d0) then +! psi_coef(i,istate) = 1.d-6 +! endif +! enddo +! enddo + call diagonalize_ci + call save_wavefunction +end diff --git a/src/Determinants/spindeterminants.irp.f b/src/Determinants/spindeterminants.irp.f index 75c2ee31..1d873af8 100644 --- a/src/Determinants/spindeterminants.irp.f +++ b/src/Determinants/spindeterminants.irp.f @@ -691,7 +691,7 @@ subroutine generate_all_alpha_beta_det_products integer, external :: get_index_in_psi_det_sorted_bit integer(bit_kind), allocatable :: tmp_det(:,:,:) logical, external :: is_in_wavefunction - integer, external :: omp_get_thread_num + PROVIDE H_apply_buffer_allocated !$OMP PARALLEL DEFAULT(NONE) SHARED(psi_coef_sorted_bit,N_det_beta_unique,& !$OMP N_det_alpha_unique, N_int, psi_det_alpha_unique, psi_det_beta_unique,& @@ -713,7 +713,7 @@ subroutine generate_all_alpha_beta_det_products enddo call fill_H_apply_buffer_no_selection(l-1, tmp_det, N_int, iproc) enddo - !$OMP END DO NOWAIT + !$OMP END DO deallocate(tmp_det) !$OMP END PARALLEL call copy_H_apply_buffer_to_wf From 561ce296d2715179ea57e9e16672d45b95ebaadc Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 13 Oct 2017 17:16:46 +0200 Subject: [PATCH 26/34] Faster four idx transformation --- plugins/QMC/densify_coefmatrix.irp.f | 10 ---- src/FourIdx/four_index_sym.irp.f | 69 +++++++++++++++++----------- src/Utils/map_functions.irp.f | 16 +++---- 3 files changed, 50 insertions(+), 45 deletions(-) diff --git a/plugins/QMC/densify_coefmatrix.irp.f b/plugins/QMC/densify_coefmatrix.irp.f index aca29944..2e1894b2 100644 --- a/plugins/QMC/densify_coefmatrix.irp.f +++ b/plugins/QMC/densify_coefmatrix.irp.f @@ -3,16 +3,6 @@ program densify read_wf = .True. touch read_wf call generate_all_alpha_beta_det_products() - -! call wf_of_psi_bilinear_matrix(.False.) -! integer :: i, istate -! do istate=1,N_states -! do i=1,N_det -! if (psi_coef(i,istate) == 0.d0) then -! psi_coef(i,istate) = 1.d-6 -! endif -! enddo -! enddo call diagonalize_ci call save_wavefunction end diff --git a/src/FourIdx/four_index_sym.irp.f b/src/FourIdx/four_index_sym.irp.f index cd9cb150..14a095ca 100644 --- a/src/FourIdx/four_index_sym.irp.f +++ b/src/FourIdx/four_index_sym.irp.f @@ -58,13 +58,25 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & ASSERT (LDB >= l_max) ! Create a temporary memory-mapped file - integer :: fd - type(c_ptr) :: c_pointer - integer*8, pointer :: a_array(:) - call mmap(trim(ezfio_filename)//'/work/four_idx', & - (/ 12_8 * map_a % n_elements /), 8, fd, .False., c_pointer) - call c_f_pointer(c_pointer, a_array, (/ 12_8 * map_a % n_elements /)) + integer :: fd(3) + type(c_ptr) :: c_pointer(3) + integer*4, pointer :: a_array_ik(:) + integer*2, pointer :: a_array_j(:) + double precision, pointer :: a_array_value(:) + integer*8 :: new_size + new_size = max(1024_8, 5_8 * map_a % n_elements ) + + call mmap(trim(ezfio_filename)//'/work/four_idx_ik', (/ new_size /), 4, fd(1), .False., c_pointer(1)) + call c_f_pointer(c_pointer(1), a_array_ik, (/ new_size /)) + + call mmap(trim(ezfio_filename)//'/work/four_idx_j', (/ new_size /), 2, fd(2), .False., c_pointer(2)) + call c_f_pointer(c_pointer(2), a_array_j, (/ new_size /)) + + call mmap(trim(ezfio_filename)//'/work/four_idx_value', (/ new_size /), 8, fd(3), .False., c_pointer(3)) + call c_f_pointer(c_pointer(3), a_array_value, (/ new_size /)) + + print *, 'Transforming MO integrals' allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) ) ii = 1_8 !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) @@ -90,12 +102,10 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & ik = ik+1 tmp=value(ik) if (tmp /= 0.d0) then - a_array(ii) = ik - ii = ii+1_8 - a_array(ii) = j - ii = ii+1_8 - a_array(ii) = transfer(dble(tmp), 1_8) - ii = ii+1_8 + a_array_ik(ii) = ik + a_array_j(ii) = j + a_array_value(ii) = tmp + ii=ii+1_8 endif enddo enddo @@ -103,6 +113,9 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & enddo enddo !$OMP SINGLE + a_array_ik(ii) = 0 + a_array_j(ii) = 0 + a_array_value(ii) = 0.d0 l_pointer(l_end+1) = ii !$OMP END SINGLE !$OMP END PARALLEL @@ -123,7 +136,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & ! END INPUT DATA - !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value,c_pointer,fd, & !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & @@ -143,6 +156,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & !$OMP DO SCHEDULE(dynamic) do d=d_start,d_end + print *, d, '/', d_end U = 0.d0 do l=l_start,l_end if (dabs(matrix_B(l,d)) < 1.d-10) then @@ -151,18 +165,12 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & ii=l_pointer(l) do j=j_start,j_end - ik=0 - do k=k_start,k_end - do i=i_start,k - ik = ik+1 - if ( (ik /= a_array(ii)).or.(j /= a_array(ii+1_8)) & - .or.(ii >= l_pointer(l+1)) ) then - T2d(ik,j) = 0.d0 - else - T2d(ik,j) = transfer(a_array(ii+2_8), 1.d0) - ii=ii+3_8 - endif - enddo + !DIR$ VECTOR NONTEMPORAL + T2d(:,j) = 0.d0 + !DIR$ IVDEP + do while (j == a_array_j(ii)) + T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0) + ii = ii + 1_8 enddo enddo call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& @@ -270,8 +278,15 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & !$OMP END PARALLEL call map_sort(map_c) - call munmap( & - (/ 12_8 * map_a % n_elements /), 8, fd, c_pointer) + call munmap( (/ new_size /), 4, fd(1), c_pointer(1)) + open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_ik') + close(10,status='DELETE') + call munmap( (/ new_size /), 2, fd(2), c_pointer(2)) + open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_j') + close(10,status='DELETE') + call munmap( (/ new_size /), 8, fd(3), c_pointer(3)) + open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_value') + close(10,status='DELETE') deallocate(l_pointer) end diff --git a/src/Utils/map_functions.irp.f b/src/Utils/map_functions.irp.f index de7f66d7..c7ea6938 100644 --- a/src/Utils/map_functions.irp.f +++ b/src/Utils/map_functions.irp.f @@ -46,8 +46,8 @@ subroutine map_save_to_disk(filename,map) enddo deallocate(map % map(i) % value) deallocate(map % map(i) % key) - map % map(i) % value => map % consolidated_value ( map % consolidated_idx (i+1) :) - map % map(i) % key => map % consolidated_key ( map % consolidated_idx (i+1) :) + map % map(i) % value => map % consolidated_value ( map % consolidated_idx (i+1_8) :) + map % map(i) % key => map % consolidated_key ( map % consolidated_idx (i+1_8) :) enddo map % consolidated_idx (map % map_size + 2_8) = k map % consolidated = .True. @@ -82,7 +82,7 @@ subroutine map_load_from_disk(filename,map) call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .True., c_pointer(1)) call c_f_pointer(c_pointer(1),map % consolidated_idx, (/ map % map_size + 2_8/)) - map% n_elements = map % consolidated_idx (map % map_size+2_8)-1 + map% n_elements = map % consolidated_idx (map % map_size+2_8)-1_8 call mmap(trim(filename)//'_consolidated_key', (/ map % n_elements /), cache_key_kind, fd(2), .True., c_pointer(2)) call c_f_pointer(c_pointer(2),map % consolidated_key, (/ map % n_elements /)) @@ -96,11 +96,11 @@ subroutine map_load_from_disk(filename,map) do i=0_8, map % map_size deallocate(map % map(i) % value) deallocate(map % map(i) % key) - map % map(i) % value => map % consolidated_value ( map % consolidated_idx (i+1) :) - map % map(i) % key => map % consolidated_key ( map % consolidated_idx (i+1) :) + map % map(i) % value => map % consolidated_value ( map % consolidated_idx (i+1_8) :) + map % map(i) % key => map % consolidated_key ( map % consolidated_idx (i+1_8) :) map % map(i) % sorted = .True. - n_elements = int( map % consolidated_idx (i+2) - k, 4) - k = map % consolidated_idx (i+2) + n_elements = int( map % consolidated_idx (i+2_8) - k, 4) + k = map % consolidated_idx (i+2_8) map % map(i) % map_size = n_elements map % map(i) % n_elements = n_elements ! Load memory from disk @@ -116,7 +116,7 @@ subroutine map_load_from_disk(filename,map) enddo enddo map % sorted = x>0 .or. l == 0_8 - map % n_elements = k-1 + map % n_elements = k-1_8 map % sorted = map % sorted .or. .True. map % consolidated = .True. From a50663a77a08bc709ca3bd458e2bea5a087e5f99 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 13 Oct 2017 21:10:53 +0200 Subject: [PATCH 27/34] Working on four_idx_zmq --- src/Davidson/davidson_parallel.irp.f | 4 +- src/Determinants/H_apply.irp.f | 2 +- src/FourIdx/four_index.irp.f | 180 +++++++++++++ src/FourIdx/four_index_block.irp.f | 288 ++++++++++++++++++++ src/FourIdx/four_index_slave.irp.f.todo | 279 ++++++++++++++++++++ src/FourIdx/four_index_sym.irp.f | 5 +- src/FourIdx/four_index_sym_mmap.irp.f | 292 +++++++++++++++++++++ src/FourIdx/four_index_zmq.irp.f.todo | 273 +++++++++++++++++++ src/Integrals_Bielec/mo_bi_integrals.irp.f | 11 +- 9 files changed, 1324 insertions(+), 10 deletions(-) create mode 100644 src/FourIdx/four_index.irp.f create mode 100644 src/FourIdx/four_index_block.irp.f create mode 100644 src/FourIdx/four_index_slave.irp.f.todo create mode 100644 src/FourIdx/four_index_sym_mmap.irp.f create mode 100644 src/FourIdx/four_index_zmq.irp.f.todo diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 2b57545b..24f2f947 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -205,10 +205,10 @@ subroutine davidson_pull_results(zmq_socket_pull, v_t, s_t, imin, imax, task_id) if(rc /= 4) stop "davidson_pull_results failed to pull task_id" rc = f77_zmq_recv( zmq_socket_pull, imin, 4, 0) - if(rc /= 4) stop "davidson_pull_results failed to pull task_id" + if(rc /= 4) stop "davidson_pull_results failed to pull imin" rc = f77_zmq_recv( zmq_socket_pull, imax, 4, 0) - if(rc /= 4) stop "davidson_pull_results failed to pull task_id" + if(rc /= 4) stop "davidson_pull_results failed to pull imax" sz = (imax-imin+1)*N_states_diag diff --git a/src/Determinants/H_apply.irp.f b/src/Determinants/H_apply.irp.f index 26f981dc..1d7a5bd8 100644 --- a/src/Determinants/H_apply.irp.f +++ b/src/Determinants/H_apply.irp.f @@ -193,7 +193,7 @@ subroutine copy_H_apply_buffer_to_wf SOFT_TOUCH N_det psi_det psi_coef logical :: found_duplicates - !call remove_duplicates_in_psi_det(found_duplicates) + call remove_duplicates_in_psi_det(found_duplicates) end subroutine remove_duplicates_in_psi_det(found_duplicates) diff --git a/src/FourIdx/four_index.irp.f b/src/FourIdx/four_index.irp.f new file mode 100644 index 00000000..0c30f55e --- /dev/null +++ b/src/FourIdx/four_index.irp.f @@ -0,0 +1,180 @@ +subroutine four_index_transform(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:,:), U(:,:,:), V(:,:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + ! Create a temporary memory-mapped file + integer :: fd + type(c_ptr) :: c_pointer + integer*8, pointer :: a_array(:,:,:) + call mmap(trim(ezfio_filename)//'/work/four_idx', & + (/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, .False., c_pointer) + call c_f_pointer(c_pointer, a_array, (/ 4, (i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1), l_end-l_start+1 /)) + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_a,map_c,matrix_B) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx, & + !$OMP a,b,c,d,tmp) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + + !$OMP DO SCHEDULE(dynamic,4) + do l=l_start,l_end + a = 1 + do j=j_start,j_end + do k=k_start,k_end + do i=i_start,i_end + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,tmp) + if (tmp /= 0.d0) then + a = a+1 + a_array(1,a,l-l_start+1) = i + a_array(2,a,l-l_start+1) = j + a_array(3,a,l-l_start+1) = k + a_array(4,a,l-l_start+1) = transfer(dble(tmp), 1_8) + endif + enddo + enddo + enddo + a_array(1,1,l-l_start+1) = a + print *, l + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + U = 0.d0 + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + print *, d, l + + allocate( T(i_start:i_end, k_start:k_end, j_start:j_end), & + V(a_start:a_end, k_start:k_end, j_start:j_end) ) + + T = 0.d0 + do a=2,a_array(1,1,l-l_start+1) + i = a_array(1,a,l-l_start+1) + j = a_array(2,a,l-l_start+1) + k = a_array(3,a,l-l_start+1) + T(i, k,j) = transfer(a_array(4,a,l-l_start+1), 1.d0) + enddo + + call DGEMM('T','N', (a_end-a_start+1), & + (k_end-k_start+1)*(j_end-j_start+1), & + (i_end-i_start+1), 1.d0, & + matrix_B(i_start,a_start), size(matrix_B,1), & + T(i_start,k_start,j_start), size(T,1), 0.d0, & + V(a_start,k_start,j_start), size(V, 1) ) + + deallocate(T) + allocate( T(a_start:a_end, k_start:k_end, b_start:d) ) + + call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), & + (b_end-b_start+1), & + (j_end-j_start+1), 1.d0, & + V(a_start,k_start,j_start), size(V,1)*size(V,2), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + T(a_start,k_start,b_start), size(T,1)*size(T,2) ) + + deallocate(V) + + do b=b_start,b_end + call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(a_start,k_start,b), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) + enddo + + deallocate(T) + + enddo + + idx = 0_8 + do b=b_start,b_end + do c=c_start,c_end + do a=a_start,a_end + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call map_append(map_c, key, value, idx) + call map_sort(map_c) + !$OMP END CRITICAL + + + enddo + !$OMP END DO + + deallocate(key,value) + !$OMP END PARALLEL + + call munmap( & + (/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, c_pointer) + +end diff --git a/src/FourIdx/four_index_block.irp.f b/src/FourIdx/four_index_block.irp.f new file mode 100644 index 00000000..dce5fcc9 --- /dev/null +++ b/src/FourIdx/four_index_block.irp.f @@ -0,0 +1,288 @@ +subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l, ik, ll + integer :: l_start_block, l_end_block, l_block + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer*8 :: ii + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + integer*4, allocatable :: a_array_ik(:) + integer*2, allocatable :: a_array_j(:) + double precision, allocatable :: a_array_value(:) + + integer*8 :: new_size + new_size = max(1024_8, 5_8 * map_a % n_elements ) + + allocate(a_array_ik(new_size), a_array_j(new_size), a_array_value(new_size)) + + integer :: ipass, npass + integer*8 :: tempspace + + tempspace = (new_size * 14_8) / (1024_8 * 1024_8) + npass = min(l_end-l_start,1 + tempspace / 2048) ! 2 GiB of scratch space + l_block = (l_end-l_start)/npass + + ipass = 0 + do l_start_block = l_start, l_end, l_block + ipass = ipass+1 + print *, 'Pass ', ipass + l_end_block = min(l_end, l_start_block+l_block-1) + + allocate(l_pointer(l_start_block:l_end_block+1), value((i_max*k_max)) ) + ii = 1_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + do l=l_start_block,l_end_block + !$OMP SINGLE + l_pointer(l) = ii + !$OMP END SINGLE + do j=j_start,j_end + !$OMP DO SCHEDULE(static,1) + do k=k_start,k_end + do i=i_start,k + ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,value(ik)) + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + tmp=value(ik) + if (tmp /= 0.d0) then + a_array_ik(ii) = ik + a_array_j(ii) = j + a_array_value(ii) = tmp + ii=ii+1_8 + endif + enddo + enddo + !$OMP END SINGLE + enddo + enddo + !$OMP SINGLE + a_array_ik(ii) = 0 + a_array_j(ii) = 0 + a_array_value(ii) = 0.d0 + l_pointer(l_end_block+1) = ii + !$OMP END SINGLE + !$OMP END PARALLEL + deallocate(value) + + !INPUT DATA + !open(unit=10,file='INPUT',form='UNFORMATTED') + !write(10) i_start, j_start, i_end, j_end + !write(10) a_start, b_start, a_end, b_end + !write(10) LDB, mo_tot_num + !write(10) matrix_B(1:LDB,1:mo_tot_num) + !idx=size(a_array) + !write(10) idx + !write(10) a_array + !write(10) l_pointer + !close(10) + !open(unit=10,file='OUTPUT',form='FORMATTED') + ! END INPUT DATA + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start_block,l_end_block,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_c,matrix_B,l_pointer) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & + !$OMP a,b,c,d,tmp,T2d,V2d,ii) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + + + allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & + V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & + V(i_start:i_end, k_start:k_end), & + T(k_start:k_end, a_start:a_end)) + + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + U = 0.d0 + do l=l_start_block,l_end_block + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + + ii=l_pointer(l) + do j=j_start,j_end + !DIR$ VECTOR NONTEMPORAL + T2d(:,j) = 0.d0 + !DIR$ IVDEP + do while (j == a_array_j(ii)) + T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0) + ii = ii + 1_8 + enddo + enddo + + call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + T2d(1,j_start), size(T2d,1), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + V2d(1,b_start), size(V2d,1) ) + + do b=b_start,d + ik = 0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + V(i,k) = V2d(ik,b) + enddo + enddo + + ! T = 0.d0 + ! do a=a_start,b + ! do k=k_start,k_end + ! do i=i_start,k + ! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a) + ! enddo + ! do i=k+1,i_end + ! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) + ! enddo + ! enddo + ! enddo + call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), & + 1.d0, & + V(i_start,k_start), size(V,1), & + matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & + T(k_start,a_start), size(T,1) ) + + ! do c=c_start,b + ! do a=a_start,c + ! do k=k_start,k_end + ! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) + ! enddo + ! enddo + ! enddo + call DGEMM('T','N', (b-a_start+1), (b-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) + ! do c=b+1,c_end + ! do a=a_start,b + ! do k=k_start,k_end + ! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) + ! enddo + ! enddo + ! enddo + if (b < b_end) then + call DGEMM('T','N', (b-a_start+1), (c_end-b), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, & + U(a_start,b+1,b), size(U,1) ) + endif + enddo + + enddo + + idx = 0_8 + do b=b_start,d + do c=c_start,c_end + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call map_update(map_c, key, value, idx,1.d-15) + !$OMP END CRITICAL + + !WRITE OUTPUT + ! OMP CRITICAL + !print *, d + !do b=b_start,d + ! do c=c_start,c_end + ! do a=a_start,min(b,c) + ! if (dabs(U(a,c,b)) < 1.d-15) then + ! cycle + ! endif + ! write(10,*) d,c,b,a,U(a,c,b) + ! enddo + ! enddo + !enddo + ! OMP END CRITICAL + !END WRITE OUTPUT + + + enddo + !$OMP END DO + + deallocate(key,value,V,T) + !$OMP END PARALLEL + call map_merge(map_c) + + deallocate(l_pointer) + enddo + deallocate(a_array_ik,a_array_j,a_array_value) + +end diff --git a/src/FourIdx/four_index_slave.irp.f.todo b/src/FourIdx/four_index_slave.irp.f.todo new file mode 100644 index 00000000..47124823 --- /dev/null +++ b/src/FourIdx/four_index_slave.irp.f.todo @@ -0,0 +1,279 @@ +subroutine four_index_transform_slave(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end, task_id, thread ) + implicit none + use f77_zmq + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + integer, intent(in) :: task_id, thread + + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l, ik, ll + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer*8 :: ii + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + integer*4, allocatable :: a_array_ik(:) + integer*2, allocatable :: a_array_j(:) + double precision, allocatable :: a_array_value(:) + + integer*8 :: new_size + new_size = max(1024_8, 5_8 * map_a % n_elements ) + + allocate(a_array_ik(new_size), a_array_j(new_size), a_array_value(new_size)) + + + allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) ) + ii = 1_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + do l=l_start,l_end + !$OMP SINGLE + l_pointer(l) = ii + !$OMP END SINGLE + do j=j_start,j_end + !$OMP DO SCHEDULE(static,1) + do k=k_start,k_end + do i=i_start,k + ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,value(ik)) + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + tmp=value(ik) + if (tmp /= 0.d0) then + a_array_ik(ii) = ik + a_array_j(ii) = j + a_array_value(ii) = tmp + ii=ii+1_8 + endif + enddo + enddo + !$OMP END SINGLE + enddo + enddo + !$OMP SINGLE + a_array_ik(ii) = 0 + a_array_j(ii) = 0 + a_array_value(ii) = 0.d0 + l_pointer(l_end+1) = ii + !$OMP END SINGLE + !$OMP END PARALLEL + deallocate(value) + +!INPUT DATA +!open(unit=10,file='INPUT',form='UNFORMATTED') +!write(10) i_start, j_start, i_end, j_end +!write(10) a_start, b_start, a_end, b_end +!write(10) LDB, mo_tot_num +!write(10) matrix_B(1:LDB,1:mo_tot_num) +!idx=size(a_array) +!write(10) idx +!write(10) a_array +!write(10) l_pointer +!close(10) +!open(unit=10,file='OUTPUT',form='FORMATTED') +! END INPUT DATA + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_c,matrix_B,l_pointer) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & + !$OMP a,b,c,d,tmp,T2d,V2d,ii) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + integer(ZMQ_PTR) :: zmq_socket_push + zmq_socket_push = new_zmq_push_socket(thread) + + + + allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & + V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & + V(i_start:i_end, k_start:k_end), & + T(k_start:k_end, a_start:a_end)) + + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + U = 0.d0 + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + + ii=l_pointer(l) + do j=j_start,j_end + !DIR$ VECTOR NONTEMPORAL + T2d(:,j) = 0.d0 + !DIR$ IVDEP + do while (j == a_array_j(ii)) + T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0) + ii = ii + 1_8 + enddo + enddo + + call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + T2d(1,j_start), size(T2d,1), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + V2d(1,b_start), size(V2d,1) ) + + do b=b_start,d + ik = 0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + V(i,k) = V2d(ik,b) + enddo + enddo + +! T = 0.d0 +! do a=a_start,b +! do k=k_start,k_end +! do i=i_start,k +! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a) +! enddo +! do i=k+1,i_end +! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) +! enddo +! enddo +! enddo + call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), & + 1.d0, & + V(i_start,k_start), size(V,1), & + matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & + T(k_start,a_start), size(T,1) ) + +! do c=c_start,b +! do a=a_start,c +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + call DGEMM('T','N', (b-a_start+1), (b-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) +! do c=b+1,c_end +! do a=a_start,b +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + if (b < b_end) then + call DGEMM('T','N', (b-a_start+1), (c_end-b), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, & + U(a_start,b+1,b), size(U,1) ) + endif + enddo + + enddo + + idx = 0_8 + do b=b_start,d + do c=c_start,c_end + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call four_idx_push_results(zmq_socket_push, key, value, idx, task_id) + !$OMP END CRITICAL + +!WRITE OUTPUT +! OMP CRITICAL +!print *, d +!do b=b_start,d +! do c=c_start,c_end +! do a=a_start,min(b,c) +! if (dabs(U(a,c,b)) < 1.d-15) then +! cycle +! endif +! write(10,*) d,c,b,a,U(a,c,b) +! enddo +! enddo +!enddo +! OMP END CRITICAL +!END WRITE OUTPUT + + + enddo + !$OMP END DO + call end_zmq_push_socket(zmq_socket_push,thread) + deallocate(key,value,V,T) + !$OMP END PARALLEL + call map_merge(map_c) + + deallocate(l_pointer) + deallocate(a_array_ik,a_array_j,a_array_value) + +end diff --git a/src/FourIdx/four_index_sym.irp.f b/src/FourIdx/four_index_sym.irp.f index 14a095ca..79c8d1d3 100644 --- a/src/FourIdx/four_index_sym.irp.f +++ b/src/FourIdx/four_index_sym.irp.f @@ -173,6 +173,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & ii = ii + 1_8 enddo enddo + call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& (d-b_start+1), & (j_end-j_start+1), 1.d0, & @@ -251,7 +252,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & enddo !$OMP CRITICAL - call map_append(map_c, key, value, idx) + call map_update(map_c, key, value, idx,1.d-15) !$OMP END CRITICAL !WRITE OUTPUT @@ -276,7 +277,7 @@ subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, & deallocate(key,value,V,T) !$OMP END PARALLEL - call map_sort(map_c) + call map_merge(map_c) call munmap( (/ new_size /), 4, fd(1), c_pointer(1)) open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_ik') diff --git a/src/FourIdx/four_index_sym_mmap.irp.f b/src/FourIdx/four_index_sym_mmap.irp.f new file mode 100644 index 00000000..877daf30 --- /dev/null +++ b/src/FourIdx/four_index_sym_mmap.irp.f @@ -0,0 +1,292 @@ +subroutine four_index_transform_sym_mmap(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use map_module + use mmap_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l, ik, ll + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer*8 :: ii + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + ! Create a temporary memory-mapped file + integer :: fd(3) + type(c_ptr) :: c_pointer(3) + integer*4, pointer :: a_array_ik(:) + integer*2, pointer :: a_array_j(:) + double precision, pointer :: a_array_value(:) + + integer*8 :: new_size + new_size = max(1024_8, 5_8 * map_a % n_elements ) + + call mmap(trim(ezfio_filename)//'/work/four_idx_ik', (/ new_size /), 4, fd(1), .False., c_pointer(1)) + call c_f_pointer(c_pointer(1), a_array_ik, (/ new_size /)) + + call mmap(trim(ezfio_filename)//'/work/four_idx_j', (/ new_size /), 2, fd(2), .False., c_pointer(2)) + call c_f_pointer(c_pointer(2), a_array_j, (/ new_size /)) + + call mmap(trim(ezfio_filename)//'/work/four_idx_value', (/ new_size /), 8, fd(3), .False., c_pointer(3)) + call c_f_pointer(c_pointer(3), a_array_value, (/ new_size /)) + + print *, 'Transforming MO integrals' + allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) ) + ii = 1_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx) + do l=l_start,l_end + !$OMP SINGLE + l_pointer(l) = ii + !$OMP END SINGLE + do j=j_start,j_end + !$OMP DO SCHEDULE(static,1) + do k=k_start,k_end + do i=i_start,k + ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map_a,idx,value(ik)) + enddo + enddo + !$OMP END DO + + !$OMP SINGLE + ik=0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + tmp=value(ik) + if (tmp /= 0.d0) then + a_array_ik(ii) = ik + a_array_j(ii) = j + a_array_value(ii) = tmp + ii=ii+1_8 + endif + enddo + enddo + !$OMP END SINGLE + enddo + enddo + !$OMP SINGLE + a_array_ik(ii) = 0 + a_array_j(ii) = 0 + a_array_value(ii) = 0.d0 + l_pointer(l_end+1) = ii + !$OMP END SINGLE + !$OMP END PARALLEL + deallocate(value) + +!INPUT DATA +!open(unit=10,file='INPUT',form='UNFORMATTED') +!write(10) i_start, j_start, i_end, j_end +!write(10) a_start, b_start, a_end, b_end +!write(10) LDB, mo_tot_num +!write(10) matrix_B(1:LDB,1:mo_tot_num) +!idx=size(a_array) +!write(10) idx +!write(10) a_array +!write(10) l_pointer +!close(10) +!open(unit=10,file='OUTPUT',form='FORMATTED') +! END INPUT DATA + + + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value,c_pointer,fd, & + !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& + !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,& + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_c,matrix_B,l_pointer) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & + !$OMP a,b,c,d,tmp,T2d,V2d,ii) + allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) + allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) + + + + allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), & + V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), & + V(i_start:i_end, k_start:k_end), & + T(k_start:k_end, a_start:a_end)) + + + !$OMP DO SCHEDULE(dynamic) + do d=d_start,d_end + print *, d, '/', d_end + U = 0.d0 + do l=l_start,l_end + if (dabs(matrix_B(l,d)) < 1.d-10) then + cycle + endif + + ii=l_pointer(l) + do j=j_start,j_end + !DIR$ VECTOR NONTEMPORAL + T2d(:,j) = 0.d0 + !DIR$ IVDEP + do while (j == a_array_j(ii)) + T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0) + ii = ii + 1_8 + enddo + enddo + call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),& + (d-b_start+1), & + (j_end-j_start+1), 1.d0, & + T2d(1,j_start), size(T2d,1), & + matrix_B(j_start,b_start), size(matrix_B,1),0.d0, & + V2d(1,b_start), size(V2d,1) ) + + do b=b_start,d + ik = 0 + do k=k_start,k_end + do i=i_start,k + ik = ik+1 + V(i,k) = V2d(ik,b) + enddo + enddo + +! T = 0.d0 +! do a=a_start,b +! do k=k_start,k_end +! do i=i_start,k +! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a) +! enddo +! do i=k+1,i_end +! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a) +! enddo +! enddo +! enddo + call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), & + 1.d0, & + V(i_start,k_start), size(V,1), & + matrix_B(i_start,a_start), size(matrix_B,1),0.d0, & + T(k_start,a_start), size(T,1) ) + +! do c=c_start,b +! do a=a_start,c +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + call DGEMM('T','N', (b-a_start+1), (b-c_start+1), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, & + U(a_start,c_start,b), size(U,1) ) +! do c=b+1,c_end +! do a=a_start,b +! do k=k_start,k_end +! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d) +! enddo +! enddo +! enddo + if (b < b_end) then + call DGEMM('T','N', (b-a_start+1), (c_end-b), & + (k_end-k_start+1), matrix_B(l, d), & + T(k_start,a_start), size(T,1), & + matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, & + U(a_start,b+1,b), size(U,1) ) + endif + enddo + + enddo + + idx = 0_8 + do b=b_start,d + do c=c_start,c_end + do a=a_start,min(b,c) + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + enddo + + !$OMP CRITICAL + call map_append(map_c, key, value, idx) + !$OMP END CRITICAL + +!WRITE OUTPUT +! OMP CRITICAL +!print *, d +!do b=b_start,d +! do c=c_start,c_end +! do a=a_start,min(b,c) +! if (dabs(U(a,c,b)) < 1.d-15) then +! cycle +! endif +! write(10,*) d,c,b,a,U(a,c,b) +! enddo +! enddo +!enddo +! OMP END CRITICAL +!END WRITE OUTPUT + + + enddo + !$OMP END DO + + deallocate(key,value,V,T) + !$OMP END PARALLEL + call map_sort(map_c) + + call munmap( (/ new_size /), 4, fd(1), c_pointer(1)) + open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_ik') + close(10,status='DELETE') + call munmap( (/ new_size /), 2, fd(2), c_pointer(2)) + open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_j') + close(10,status='DELETE') + call munmap( (/ new_size /), 8, fd(3), c_pointer(3)) + open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_value') + close(10,status='DELETE') + deallocate(l_pointer) + +end diff --git a/src/FourIdx/four_index_zmq.irp.f.todo b/src/FourIdx/four_index_zmq.irp.f.todo new file mode 100644 index 00000000..b2f639a7 --- /dev/null +++ b/src/FourIdx/four_index_zmq.irp.f.todo @@ -0,0 +1,273 @@ +subroutine four_index_transform_zmq(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start, & + i_end , j_end , k_end , l_end , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end ) + implicit none + use f77_zmq + use map_module + BEGIN_DOC +! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM) +! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld} +! Loops run over *_start->*_end + END_DOC + type(map_type), intent(in) :: map_a + type(map_type), intent(inout) :: map_c + integer, intent(in) :: LDB + double precision, intent(in) :: matrix_B(LDB,*) + integer, intent(in) :: i_start, j_start, k_start, l_start + integer, intent(in) :: i_end , j_end , k_end , l_end + integer, intent(in) :: a_start, b_start, c_start, d_start + integer, intent(in) :: a_end , b_end , c_end , d_end + + double precision, allocatable :: T(:,:), U(:,:,:), V(:,:) + double precision, allocatable :: T2d(:,:), V2d(:,:) + integer :: i_max, j_max, k_max, l_max + integer :: i_min, j_min, k_min, l_min + integer :: i, j, k, l, ik, ll + integer :: l_start_block, l_end_block, l_block + integer :: a, b, c, d + double precision, external :: get_ao_bielec_integral + integer*8 :: ii + integer(key_kind) :: idx + real(integral_kind) :: tmp + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + integer*8, allocatable :: l_pointer(:) + + ASSERT (k_start == i_start) + ASSERT (l_start == j_start) + ASSERT (a_start == c_start) + ASSERT (b_start == d_start) + + i_min = min(i_start,a_start) + i_max = max(i_end ,a_end ) + j_min = min(j_start,b_start) + j_max = max(j_end ,b_end ) + k_min = min(k_start,c_start) + k_max = max(k_end ,c_end ) + l_min = min(l_start,d_start) + l_max = max(l_end ,d_end ) + + ASSERT (0 < i_max) + ASSERT (0 < j_max) + ASSERT (0 < k_max) + ASSERT (0 < l_max) + ASSERT (LDB >= i_max) + ASSERT (LDB >= j_max) + ASSERT (LDB >= k_max) + ASSERT (LDB >= l_max) + + + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + call new_parallel_job(zmq_to_qp_run_socket,'four_idx') + + integer*8 :: new_size + new_size = max(1024_8, 5_8 * map_a % n_elements ) + + integer :: npass + integer*8 :: tempspace + + tempspace = (new_size * 14_8) / (1024_8 * 1024_8) + npass = min(l_end-l_start,1 + tempspace / 2048) ! 2 GiB of scratch space + l_block = (l_end-l_start)/npass + + ! Create tasks + ! ============ + + character(len=64), allocatable :: task + + do l_start_block = l_start, l_end, l_block + l_end_block = min(l_end, l_start_block+l_block-1) + write(task,'I10,X,I10') l_start_block, l_end_block + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) + enddo + + call zmq_set_running(zmq_to_qp_run_socket) + + PROVIDE nproc + + call omp_set_nested(.True.) + integer :: ithread + !$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread) + ithread = omp_get_thread_num() + if (ithread==0) then + call four_idx_collector(zmq_to_qp_run_socket,map_c) + else + !TODO : Put strings of map_a and matrix_b on server and broadcast + call four_index_transform_slave_inproc(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start_block, & + i_end , j_end , k_end , l_end_block , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end, 1 ) + endif + !$OMP END PARALLEL + + call end_parallel_job(zmq_to_qp_run_socket, 'four_idx') + + +end + + +subroutine four_idx_slave_work(zmq_to_qp_run_socket, worker_id) + use f77_zmq + implicit none + + integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket + integer,intent(in) :: worker_id + integer :: task_id + character*(512) :: msg + + integer :: i_start, j_start, k_start, l_start_block + integer :: i_end , j_end , k_end , l_end_block + integer :: a_start, b_start, c_start, d_start + integer :: a_end , b_end , c_end , d_end + + !TODO : get map_a and matrix_B from server + do + call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg) + if(task_id == 0) exit + read (msg,*) LDB, & + i_start, j_start, k_start, l_start_block, & + i_end , j_end , k_end , l_end_block , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end + + call four_index_transform_slave(map_a,map_c,matrix_B,LDB, & + i_start, j_start, k_start, l_start_block, & + i_end , j_end , k_end , l_end_block , & + a_start, b_start, c_start, d_start, & + a_end , b_end , c_end , d_end, zmq_to_qp_run_socket, & + task_id) + call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) + + enddo +end + + +BEGIN_PROVIDER [ integer, nthreads_four_idx ] + implicit none + BEGIN_DOC + ! Number of threads for 4-index transformation + END_DOC + nthreads_four_idx = nproc + character*(32) :: env + call getenv('NTHREADS_FOUR_IDX',env) + if (trim(env) /= '') then + read(env,*) nthreads_four_idx + endif + call write_int(6,nthreads_davidson,'Number of threads for 4-index transformation') +END_PROVIDER + + + +subroutine four_idx_collector(zmq_to_qp_run_socket,map_c) + use f77_zmq + use map_module + implicit none + type(map_type), intent(inout) :: map_c + + integer :: more + integer(ZMQ_PTR), external :: new_zmq_pull_socket + integer(ZMQ_PTR) :: zmq_socket_pull + + + more = 1 + zmq_socket_pull = new_zmq_pull_socket() + + do while (more == 1) + call four_idx_pull_results(zmq_socket_pull, map_c, task_id) + call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) + enddo + + call end_zmq_pull_socket(zmq_socket_pull) + +end + + +subroutine four_idx_pull_results(zmq_socket_pull, map_c, task_id) + use f77_zmq + use map_module + implicit none + type(map_type), intent(inout) :: map_c + integer(ZMQ_PTR), intent(inout) :: zmq_socket_pull + + integer, intent(out) :: task_id + + integer :: rc, sze + integer*8 :: rc8 + + + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) + if(rc /= 4) stop "four_idx_pull_results failed to pull task_id" + + rc = f77_zmq_recv( zmq_socket_pull, sze, 4, 0) + if(rc /= 4) stop "four_idx_pull_results failed to pull sze" + + integer(key_kind), allocatable :: key(:) + real(integral_kind), allocatable :: value(:) + + allocate(key(sze), value(sze)) + + rc8 = f77_zmq_recv8( zmq_socket_pull, key, key_kind*sze, 0) + if(rc8 /= key_kind*sze) stop "four_idx_pull_results failed to pull key" + + rc8 = f77_zmq_recv8( zmq_socket_pull, value, integral_kind*sze, 0) + if(rc8 /= integral_kind*sze) stop "four_idx_pull_results failed to pull value" + +! Activate if zmq_socket_pull is a REP +IRP_IF ZMQ_PUSH +IRP_ELSE + rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0) + if (rc /= 4) then + print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...' + stop 'error' + endif +IRP_ENDIF + + call map_update(map_c, key, value, sze, 1.d-15) ! TODO : threshold + + deallocate(key, value) +end + + + +subroutine four_idx_push_results(zmq_socket_push, key, value, sze, task_id) + use f77_zmq + use map_module + implicit none + integer, intent(in) :: sze + integer(key_kind), intent(in) :: key(sze) + real(integral_kind), intent(in) :: value(sze) + integer(ZMQ_PTR), intent(in) :: zmq_socket_push + integer, intent(in) :: task_id + + integer :: rc, sze + integer*8 :: rc8 + + + rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "four_idx_push_results failed to push task_id" + + rc = f77_zmq_send( zmq_socket_push, sze, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "four_idx_push_results failed to push sze" + + rc8 = f77_zmq_send8( zmq_socket_push, key, key_kind*sze, ZMQ_SNDMORE) + if(rc8 /= key_kind*sze) stop "four_idx_push_results failed to push key" + + rc8 = f77_zmq_send8( zmq_socket_push, value, integral_kind*sze, 0) + if(rc8 /= integral_kind*sze) stop "four_idx_push_results failed to push value" + +! Activate if zmq_socket_push is a REP +IRP_IF ZMQ_PUSH +IRP_ELSE + rc = f77_zmq_send( zmq_socket_push, 0, 4, 0) + if (rc /= 4) then + print *, irp_here, ' : f77_zmq_send (zmq_socket_push,...' + stop 'error' + endif +IRP_ENDIF + +end + + diff --git a/src/Integrals_Bielec/mo_bi_integrals.irp.f b/src/Integrals_Bielec/mo_bi_integrals.irp.f index 22799923..2fc77219 100644 --- a/src/Integrals_Bielec/mo_bi_integrals.irp.f +++ b/src/Integrals_Bielec/mo_bi_integrals.irp.f @@ -118,11 +118,12 @@ BEGIN_PROVIDER [ logical, mo_bielec_integrals_in_map ] else ! call add_integrals_to_map(full_ijkl_bitmask_4) - call four_index_transform_sym(ao_integrals_map,mo_integrals_map, & - mo_coef, size(mo_coef,1), & - 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & - 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) - + + call four_index_transform_block(ao_integrals_map,mo_integrals_map, & + mo_coef, size(mo_coef,1), & + 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & + 1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) + integer*8 :: get_mo_map_size, mo_map_size mo_map_size = get_mo_map_size() From 27b8b4d7360ef600557dda1010058373345f2e55 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 14 Oct 2017 01:25:43 +0200 Subject: [PATCH 28/34] Commented Remove_duplicates --- src/Determinants/H_apply.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Determinants/H_apply.irp.f b/src/Determinants/H_apply.irp.f index 1d7a5bd8..ef396f9c 100644 --- a/src/Determinants/H_apply.irp.f +++ b/src/Determinants/H_apply.irp.f @@ -192,8 +192,8 @@ subroutine copy_H_apply_buffer_to_wf call normalize(psi_coef,N_det) SOFT_TOUCH N_det psi_det psi_coef - logical :: found_duplicates - call remove_duplicates_in_psi_det(found_duplicates) +! logical :: found_duplicates +! call remove_duplicates_in_psi_det(found_duplicates) end subroutine remove_duplicates_in_psi_det(found_duplicates) From c09713b4b41ead6c476f18525a605f7183648407 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 16 Oct 2017 15:06:24 +0200 Subject: [PATCH 29/34] Minor changes --- plugins/analyze_wf/analyze_wf.irp.f | 11 +++++++++++ src/FourIdx/four_index_block.irp.f | 17 +++++++++-------- tests/run_tests.sh | 2 +- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/plugins/analyze_wf/analyze_wf.irp.f b/plugins/analyze_wf/analyze_wf.irp.f index 7d005a05..c37db55f 100644 --- a/plugins/analyze_wf/analyze_wf.irp.f +++ b/plugins/analyze_wf/analyze_wf.irp.f @@ -14,6 +14,17 @@ subroutine run integer :: class(0:mo_tot_num,5) double precision :: occupation(mo_tot_num) + write(*,'(A)') 'Energy of 1st determinant' + write(*,'(A)') '=========================' + write(*,'(A)') '' + write(*,*) 'Total', ref_bitmask_energy + nuclear_repulsion + write(*,*) 'Mono-electronic', mono_elec_ref_bitmask_energy + write(*,*) 'Kinetic', kinetic_ref_bitmask_energy + write(*,*) 'Electron-nucleus', nucl_elec_ref_bitmask_energy + write(*,*) 'Two-electron', bi_elec_ref_bitmask_energy + write(*,'(A)') '' + write(*,'(A)') '' + write(*,'(A)') 'MO Occupation' write(*,'(A)') '=============' write(*,'(A)') '' diff --git a/src/FourIdx/four_index_block.irp.f b/src/FourIdx/four_index_block.irp.f index dce5fcc9..7a72e184 100644 --- a/src/FourIdx/four_index_block.irp.f +++ b/src/FourIdx/four_index_block.irp.f @@ -59,7 +59,7 @@ subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & ASSERT (LDB >= l_max) integer*4, allocatable :: a_array_ik(:) - integer*2, allocatable :: a_array_j(:) + integer*4, allocatable :: a_array_j(:) double precision, allocatable :: a_array_value(:) integer*8 :: new_size @@ -70,8 +70,9 @@ subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & integer :: ipass, npass integer*8 :: tempspace - tempspace = (new_size * 14_8) / (1024_8 * 1024_8) + tempspace = (new_size * 16_8) / (1024_8 * 1024_8) npass = min(l_end-l_start,1 + tempspace / 2048) ! 2 GiB of scratch space + npass = 1 l_block = (l_end-l_start)/npass ipass = 0 @@ -88,7 +89,7 @@ subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & l_pointer(l) = ii !$OMP END SINGLE do j=j_start,j_end - !$OMP DO SCHEDULE(static,1) + !$OMP DO SCHEDULE(static,16) do k=k_start,k_end do i=i_start,k ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 ) @@ -139,12 +140,12 @@ subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & ! END INPUT DATA - !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, & + !$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value,& !$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,& !$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start_block,l_end_block,& - !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & - !$OMP map_c,matrix_B,l_pointer) & - !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & + !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & + !$OMP map_c,matrix_B,l_pointer) & + !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & !$OMP a,b,c,d,tmp,T2d,V2d,ii) allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) @@ -171,7 +172,7 @@ subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & T2d(:,j) = 0.d0 !DIR$ IVDEP do while (j == a_array_j(ii)) - T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0) + T2d(a_array_ik(ii),j) = a_array_value(ii) ii = ii + 1_8 enddo enddo diff --git a/tests/run_tests.sh b/tests/run_tests.sh index 3ac452ad..405f91e6 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -3,10 +3,10 @@ LIST=" convert.bats hf.bats -pseudo.bats fci.bats cassd.bats mrcepa0.bats +pseudo.bats " #foboci.bats From 0ea730e6f0a67bcae9b912e588be87af47c9f37d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 16 Oct 2017 17:55:00 +0200 Subject: [PATCH 30/34] Fixed four_index --- src/FourIdx/four_index_block.irp.f | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/FourIdx/four_index_block.irp.f b/src/FourIdx/four_index_block.irp.f index 7a72e184..f5956ca4 100644 --- a/src/FourIdx/four_index_block.irp.f +++ b/src/FourIdx/four_index_block.irp.f @@ -72,8 +72,7 @@ subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & tempspace = (new_size * 16_8) / (1024_8 * 1024_8) npass = min(l_end-l_start,1 + tempspace / 2048) ! 2 GiB of scratch space - npass = 1 - l_block = (l_end-l_start)/npass + l_block = (l_end-l_start+1)/npass ipass = 0 do l_start_block = l_start, l_end, l_block @@ -244,6 +243,7 @@ subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & do b=b_start,d do c=c_start,c_end do a=a_start,min(b,c) + if (a==b) cycle if (dabs(U(a,c,b)) < 1.d-15) then cycle endif @@ -254,8 +254,21 @@ subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & enddo enddo + do b=b_start,d + a=b + do c=c_start,d + if (dabs(U(a,c,b)) < 1.d-15) then + cycle + endif + idx = idx+1_8 + call bielec_integrals_index(a,b,c,d,key(idx)) + value(idx) = U(a,c,b) + enddo + enddo + !$OMP CRITICAL call map_update(map_c, key, value, idx,1.d-15) +! call map_append(map_c, key, value, idx) !$OMP END CRITICAL !WRITE OUTPUT @@ -281,6 +294,7 @@ subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & deallocate(key,value,V,T) !$OMP END PARALLEL call map_merge(map_c) +! call map_sort(map_c) deallocate(l_pointer) enddo From 2685cc15ebb2ff7fd0d827e6c4dfd02553ca1a7e Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 16 Oct 2017 18:09:10 +0200 Subject: [PATCH 31/34] Strong typing --- src/FourIdx/four_index_block.irp.f | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/FourIdx/four_index_block.irp.f b/src/FourIdx/four_index_block.irp.f index f5956ca4..830e41da 100644 --- a/src/FourIdx/four_index_block.irp.f +++ b/src/FourIdx/four_index_block.irp.f @@ -71,7 +71,7 @@ subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & integer*8 :: tempspace tempspace = (new_size * 16_8) / (1024_8 * 1024_8) - npass = min(l_end-l_start,1 + tempspace / 2048) ! 2 GiB of scratch space + npass = min(int(l_end-l_start,8),1_8 + tempspace / 2048_8) ! 2 GiB of scratch space l_block = (l_end-l_start+1)/npass ipass = 0 @@ -268,7 +268,6 @@ subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & !$OMP CRITICAL call map_update(map_c, key, value, idx,1.d-15) -! call map_append(map_c, key, value, idx) !$OMP END CRITICAL !WRITE OUTPUT @@ -294,7 +293,6 @@ subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & deallocate(key,value,V,T) !$OMP END PARALLEL call map_merge(map_c) -! call map_sort(map_c) deallocate(l_pointer) enddo From 7db4bf509b9f673c6dd933e33752ef69dfe38a03 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 16 Oct 2017 18:28:54 +0200 Subject: [PATCH 32/34] Fixed tests --- tests/bats/fci.bats | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/bats/fci.bats b/tests/bats/fci.bats index 6512d60b..447133ec 100644 --- a/tests/bats/fci.bats +++ b/tests/bats/fci.bats @@ -42,11 +42,12 @@ function run_FCI_ZMQ() { qp_set_mo_class h2o.ezfio -core "[1]" -act "[2-12]" -del "[13-24]" } @test "FCI H2O cc-pVDZ" { - run_FCI h2o.ezfio 2000 -76.1253758241716 -76.1258130146102 + run_FCI h2o.ezfio 2000 -76.125246738693903 -76.1258130146102 } + @test "FCI-ZMQ H2O cc-pVDZ" { run_FCI_ZMQ h2o.ezfio 2000 -76.1250552686394 -76.1258817228809 } From 8854eeb457815eff207fdab263ff80a189fbe9ce Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 16 Oct 2017 22:41:40 +0200 Subject: [PATCH 33/34] Fixed four-idx --- src/FourIdx/four_index_block.irp.f | 25 ++++++++++++------------- tests/bats/fci.bats | 2 +- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/FourIdx/four_index_block.irp.f b/src/FourIdx/four_index_block.irp.f index 830e41da..d5929b51 100644 --- a/src/FourIdx/four_index_block.irp.f +++ b/src/FourIdx/four_index_block.irp.f @@ -145,7 +145,7 @@ subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & !$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, & !$OMP map_c,matrix_B,l_pointer) & !$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, & - !$OMP a,b,c,d,tmp,T2d,V2d,ii) + !$OMP a,b,c,d,tmp,T2d,V2d,ii,p,q) allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) ) allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) ) @@ -240,31 +240,30 @@ subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, & enddo idx = 0_8 + + integer :: p, q do b=b_start,d + q = b+ishft(d*d-d,-1) do c=c_start,c_end + p = a_start+ishft(c*c-c,-1) do a=a_start,min(b,c) - if (a==b) cycle if (dabs(U(a,c,b)) < 1.d-15) then cycle endif + if ((a==b).and.(p>q)) cycle + p = p+1 idx = idx+1_8 call bielec_integrals_index(a,b,c,d,key(idx)) +!print *, int(key(idx),4), int(a,2),int(b,2),int(c,2),int(d,2), p, q value(idx) = U(a,c,b) enddo enddo enddo - do b=b_start,d - a=b - do c=c_start,d - if (dabs(U(a,c,b)) < 1.d-15) then - cycle - endif - idx = idx+1_8 - call bielec_integrals_index(a,b,c,d,key(idx)) - value(idx) = U(a,c,b) - enddo - enddo + + + + !$OMP CRITICAL call map_update(map_c, key, value, idx,1.d-15) diff --git a/tests/bats/fci.bats b/tests/bats/fci.bats index 447133ec..7a7bb09f 100644 --- a/tests/bats/fci.bats +++ b/tests/bats/fci.bats @@ -42,7 +42,7 @@ function run_FCI_ZMQ() { qp_set_mo_class h2o.ezfio -core "[1]" -act "[2-12]" -del "[13-24]" } @test "FCI H2O cc-pVDZ" { - run_FCI h2o.ezfio 2000 -76.125246738693903 -76.1258130146102 + run_FCI h2o.ezfio 2000 -76.1253757275131 -76.1258128174355 } From 80e581965806db659cacf9dd2d5b19b10cc96306 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 20 Oct 2017 16:08:41 +0200 Subject: [PATCH 34/34] Bug in MO cache --- src/Integrals_Bielec/map_integrals.irp.f | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Integrals_Bielec/map_integrals.irp.f b/src/Integrals_Bielec/map_integrals.irp.f index 996f8464..3d3d2a9b 100644 --- a/src/Integrals_Bielec/map_integrals.irp.f +++ b/src/Integrals_Bielec/map_integrals.irp.f @@ -179,7 +179,6 @@ double precision function get_ao_bielec_integral(i,j,k,l,map) result(result) call bielec_integrals_index(i,j,k,l,idx) !DIR$ FORCEINLINE call map_get(map,idx,tmp) - tmp = tmp else ii = l-ao_integrals_cache_min ii = ior( ishft(ii,6), k-ao_integrals_cache_min) @@ -336,7 +335,7 @@ end ! Min and max values of the MOs for which the integrals are in the cache END_DOC mo_integrals_cache_min_8 = max(1_8,elec_alpha_num - 63_8) - mo_integrals_cache_max_8 = min(int(mo_tot_num,8),mo_integrals_cache_min+127_8) + mo_integrals_cache_max_8 = min(int(mo_tot_num,8),mo_integrals_cache_min_8+127_8) mo_integrals_cache_min = max(1,elec_alpha_num - 63) mo_integrals_cache_max = min(mo_tot_num,mo_integrals_cache_min+127)