10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-11-03 20:54:00 +01:00

Merge pull request #214 from scemama/master

Bug in MO integrals cache fixed
This commit is contained in:
Thomas Applencourt 2017-10-20 14:51:26 -05:00 committed by GitHub
commit 5006801b43
44 changed files with 3394 additions and 264 deletions

View File

@ -6,9 +6,9 @@ GPI_OPTIONS=--with-ethernet
function _install() function _install()
{ {
cd gpi2 cd _build/gpi2
./install.sh -p $QP_ROOT $GPI_OPTIONS ./install.sh -p $QP_ROOT $GPI_OPTIONS
cp src/GASPI.f90 $QP_ROOT/src/plugins/GPI2/ cp src/GASPI.f90 $QP_ROOT/plugins/GPI2/
return 0 return 0
} }

View File

@ -0,0 +1,180 @@
subroutine four_index_transform(map_a,map_c,matrix_B,LDB, &
i_start, j_start, k_start, l_start, &
i_end , j_end , k_end , l_end , &
a_start, b_start, c_start, d_start, &
a_end , b_end , c_end , d_end )
implicit none
use map_module
use mmap_module
BEGIN_DOC
! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM)
! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld}
! Loops run over *_start->*_end
END_DOC
type(map_type), intent(in) :: map_a
type(map_type), intent(inout) :: map_c
integer, intent(in) :: LDB
double precision, intent(in) :: matrix_B(LDB,*)
integer, intent(in) :: i_start, j_start, k_start, l_start
integer, intent(in) :: i_end , j_end , k_end , l_end
integer, intent(in) :: a_start, b_start, c_start, d_start
integer, intent(in) :: a_end , b_end , c_end , d_end
double precision, allocatable :: T(:,:,:), U(:,:,:), V(:,:,:)
integer :: i_max, j_max, k_max, l_max
integer :: i_min, j_min, k_min, l_min
integer :: i, j, k, l
integer :: a, b, c, d
double precision, external :: get_ao_bielec_integral
integer(key_kind) :: idx
real(integral_kind) :: tmp
integer(key_kind), allocatable :: key(:)
real(integral_kind), allocatable :: value(:)
ASSERT (k_start == i_start)
ASSERT (l_start == j_start)
ASSERT (a_start == c_start)
ASSERT (b_start == d_start)
i_min = min(i_start,a_start)
i_max = max(i_end ,a_end )
j_min = min(j_start,b_start)
j_max = max(j_end ,b_end )
k_min = min(k_start,c_start)
k_max = max(k_end ,c_end )
l_min = min(l_start,d_start)
l_max = max(l_end ,d_end )
ASSERT (0 < i_max)
ASSERT (0 < j_max)
ASSERT (0 < k_max)
ASSERT (0 < l_max)
ASSERT (LDB >= i_max)
ASSERT (LDB >= j_max)
ASSERT (LDB >= k_max)
ASSERT (LDB >= l_max)
! Create a temporary memory-mapped file
integer :: fd
type(c_ptr) :: c_pointer
integer*8, pointer :: a_array(:,:,:)
call mmap(trim(ezfio_filename)//'/work/four_idx', &
(/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, .False., c_pointer)
call c_f_pointer(c_pointer, a_array, (/ 4, (i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1), l_end-l_start+1 /))
!$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, &
!$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,&
!$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,&
!$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, &
!$OMP map_a,map_c,matrix_B) &
!$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx, &
!$OMP a,b,c,d,tmp)
allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) )
allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) )
!$OMP DO SCHEDULE(dynamic,4)
do l=l_start,l_end
a = 1
do j=j_start,j_end
do k=k_start,k_end
do i=i_start,i_end
call bielec_integrals_index(i,j,k,l,idx)
call map_get(map_a,idx,tmp)
if (tmp /= 0.d0) then
a = a+1
a_array(1,a,l-l_start+1) = i
a_array(2,a,l-l_start+1) = j
a_array(3,a,l-l_start+1) = k
a_array(4,a,l-l_start+1) = transfer(dble(tmp), 1_8)
endif
enddo
enddo
enddo
a_array(1,1,l-l_start+1) = a
print *, l
enddo
!$OMP END DO
!$OMP DO SCHEDULE(dynamic)
do d=d_start,d_end
U = 0.d0
do l=l_start,l_end
if (dabs(matrix_B(l,d)) < 1.d-10) then
cycle
endif
print *, d, l
allocate( T(i_start:i_end, k_start:k_end, j_start:j_end), &
V(a_start:a_end, k_start:k_end, j_start:j_end) )
T = 0.d0
do a=2,a_array(1,1,l-l_start+1)
i = a_array(1,a,l-l_start+1)
j = a_array(2,a,l-l_start+1)
k = a_array(3,a,l-l_start+1)
T(i, k,j) = transfer(a_array(4,a,l-l_start+1), 1.d0)
enddo
call DGEMM('T','N', (a_end-a_start+1), &
(k_end-k_start+1)*(j_end-j_start+1), &
(i_end-i_start+1), 1.d0, &
matrix_B(i_start,a_start), size(matrix_B,1), &
T(i_start,k_start,j_start), size(T,1), 0.d0, &
V(a_start,k_start,j_start), size(V, 1) )
deallocate(T)
allocate( T(a_start:a_end, k_start:k_end, b_start:d) )
call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), &
(b_end-b_start+1), &
(j_end-j_start+1), 1.d0, &
V(a_start,k_start,j_start), size(V,1)*size(V,2), &
matrix_B(j_start,b_start), size(matrix_B,1),0.d0, &
T(a_start,k_start,b_start), size(T,1)*size(T,2) )
deallocate(V)
do b=b_start,b_end
call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), &
(k_end-k_start+1), matrix_B(l, d), &
T(a_start,k_start,b), size(T,1), &
matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, &
U(a_start,c_start,b), size(U,1) )
enddo
deallocate(T)
enddo
idx = 0_8
do b=b_start,b_end
do c=c_start,c_end
do a=a_start,a_end
if (dabs(U(a,c,b)) < 1.d-15) then
cycle
endif
idx = idx+1_8
call bielec_integrals_index(a,b,c,d,key(idx))
value(idx) = U(a,c,b)
enddo
enddo
enddo
!$OMP CRITICAL
call map_append(map_c, key, value, idx)
call map_sort(map_c)
!$OMP END CRITICAL
enddo
!$OMP END DO
deallocate(key,value)
!$OMP END PARALLEL
call munmap( &
(/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, c_pointer)
end

View File

@ -0,0 +1,277 @@
subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, &
i_start, j_start, k_start, l_start, &
i_end , j_end , k_end , l_end , &
a_start, b_start, c_start, d_start, &
a_end , b_end , c_end , d_end )
implicit none
use map_module
use mmap_module
BEGIN_DOC
! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM)
! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld}
! Loops run over *_start->*_end
END_DOC
type(map_type), intent(in) :: map_a
type(map_type), intent(inout) :: map_c
integer, intent(in) :: LDB
double precision, intent(in) :: matrix_B(LDB,*)
integer, intent(in) :: i_start, j_start, k_start, l_start
integer, intent(in) :: i_end , j_end , k_end , l_end
integer, intent(in) :: a_start, b_start, c_start, d_start
integer, intent(in) :: a_end , b_end , c_end , d_end
double precision, allocatable :: T(:,:), U(:,:,:), V(:,:)
double precision, allocatable :: T2d(:,:), V2d(:,:)
integer :: i_max, j_max, k_max, l_max
integer :: i_min, j_min, k_min, l_min
integer :: i, j, k, l, ik, ll
integer :: a, b, c, d
double precision, external :: get_ao_bielec_integral
integer*8 :: ii
integer(key_kind) :: idx
real(integral_kind) :: tmp
integer(key_kind), allocatable :: key(:)
real(integral_kind), allocatable :: value(:)
integer*8, allocatable :: l_pointer(:)
ASSERT (k_start == i_start)
ASSERT (l_start == j_start)
ASSERT (a_start == c_start)
ASSERT (b_start == d_start)
i_min = min(i_start,a_start)
i_max = max(i_end ,a_end )
j_min = min(j_start,b_start)
j_max = max(j_end ,b_end )
k_min = min(k_start,c_start)
k_max = max(k_end ,c_end )
l_min = min(l_start,d_start)
l_max = max(l_end ,d_end )
ASSERT (0 < i_max)
ASSERT (0 < j_max)
ASSERT (0 < k_max)
ASSERT (0 < l_max)
ASSERT (LDB >= i_max)
ASSERT (LDB >= j_max)
ASSERT (LDB >= k_max)
ASSERT (LDB >= l_max)
! Create a temporary memory-mapped file
integer :: fd
type(c_ptr) :: c_pointer
integer*8, pointer :: a_array(:)
call mmap(trim(ezfio_filename)//'/work/four_idx', &
(/ 12_8 * map_a % n_elements /), 8, fd, .False., c_pointer)
call c_f_pointer(c_pointer, a_array, (/ 12_8 * map_a % n_elements /))
allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) )
ii = 1_8
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx)
do l=l_start,l_end
!$OMP SINGLE
l_pointer(l) = ii
!$OMP END SINGLE
do j=j_start,j_end
!$OMP DO SCHEDULE(static,1)
do k=k_start,k_end
do i=i_start,k
ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 )
call bielec_integrals_index(i,j,k,l,idx)
call map_get(map_a,idx,value(ik))
enddo
enddo
!$OMP END DO
!$OMP SINGLE
ik=0
do k=k_start,k_end
do i=i_start,k
ik = ik+1
tmp=value(ik)
if (tmp /= 0.d0) then
a_array(ii) = ik
ii = ii+1_8
a_array(ii) = j
ii = ii+1_8
a_array(ii) = transfer(dble(tmp), 1_8)
ii = ii+1_8
endif
enddo
enddo
!$OMP END SINGLE
enddo
enddo
!$OMP SINGLE
l_pointer(l_end+1) = ii
!$OMP END SINGLE
!$OMP END PARALLEL
deallocate(value)
!INPUT DATA
!open(unit=10,file='INPUT',form='UNFORMATTED')
!write(10) i_start, j_start, i_end, j_end
!write(10) a_start, b_start, a_end, b_end
!write(10) LDB, mo_tot_num
!write(10) matrix_B(1:LDB,1:mo_tot_num)
!idx=size(a_array)
!write(10) idx
!write(10) a_array
!write(10) l_pointer
!close(10)
!open(unit=10,file='OUTPUT',form='FORMATTED')
! END INPUT DATA
!$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, &
!$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,&
!$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,&
!$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, &
!$OMP map_c,matrix_B,l_pointer) &
!$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, &
!$OMP a,b,c,d,tmp,T2d,V2d,ii)
allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) )
allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) )
allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), &
V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), &
V(i_start:i_end, k_start:k_end), &
T(k_start:k_end, a_start:a_end))
!$OMP DO SCHEDULE(dynamic)
do d=d_start,d_end
U = 0.d0
do l=l_start,l_end
if (dabs(matrix_B(l,d)) < 1.d-10) then
cycle
endif
ii=l_pointer(l)
do j=j_start,j_end
ik=0
do k=k_start,k_end
do i=i_start,k
ik = ik+1
if ( (ik /= a_array(ii)).or.(j /= a_array(ii+1_8)) &
.or.(ii >= l_pointer(l+1)) ) then
T2d(ik,j) = 0.d0
else
T2d(ik,j) = transfer(a_array(ii+2_8), 1.d0)
ii=ii+3_8
endif
enddo
enddo
enddo
call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),&
(d-b_start+1), &
(j_end-j_start+1), 1.d0, &
T2d(1,j_start), size(T2d,1), &
matrix_B(j_start,b_start), size(matrix_B,1),0.d0, &
V2d(1,b_start), size(V2d,1) )
do b=b_start,d
ik = 0
do k=k_start,k_end
do i=i_start,k
ik = ik+1
V(i,k) = V2d(ik,b)
enddo
enddo
! T = 0.d0
! do a=a_start,b
! do k=k_start,k_end
! do i=i_start,k
! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a)
! enddo
! do i=k+1,i_end
! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a)
! enddo
! enddo
! enddo
call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), &
1.d0, &
V(i_start,k_start), size(V,1), &
matrix_B(i_start,a_start), size(matrix_B,1),0.d0, &
T(k_start,a_start), size(T,1) )
! do c=c_start,b
! do a=a_start,c
! do k=k_start,k_end
! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d)
! enddo
! enddo
! enddo
call DGEMM('T','N', (b-a_start+1), (b-c_start+1), &
(k_end-k_start+1), matrix_B(l, d), &
T(k_start,a_start), size(T,1), &
matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, &
U(a_start,c_start,b), size(U,1) )
! do c=b+1,c_end
! do a=a_start,b
! do k=k_start,k_end
! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d)
! enddo
! enddo
! enddo
if (b < b_end) then
call DGEMM('T','N', (b-a_start+1), (c_end-b), &
(k_end-k_start+1), matrix_B(l, d), &
T(k_start,a_start), size(T,1), &
matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, &
U(a_start,b+1,b), size(U,1) )
endif
enddo
enddo
idx = 0_8
do b=b_start,d
do c=c_start,c_end
do a=a_start,min(b,c)
if (dabs(U(a,c,b)) < 1.d-15) then
cycle
endif
idx = idx+1_8
call bielec_integrals_index(a,b,c,d,key(idx))
value(idx) = U(a,c,b)
enddo
enddo
enddo
!$OMP CRITICAL
call map_append(map_c, key, value, idx)
!$OMP END CRITICAL
!WRITE OUTPUT
! OMP CRITICAL
!print *, d
!do b=b_start,d
! do c=c_start,c_end
! do a=a_start,min(b,c)
! if (dabs(U(a,c,b)) < 1.d-15) then
! cycle
! endif
! write(10,*) d,c,b,a,U(a,c,b)
! enddo
! enddo
!enddo
! OMP END CRITICAL
!END WRITE OUTPUT
enddo
!$OMP END DO
deallocate(key,value,V,T)
!$OMP END PARALLEL
call map_sort(map_c)
call munmap( &
(/ 12_8 * map_a % n_elements /), 8, fd, c_pointer)
deallocate(l_pointer)
end

View File

@ -1 +1 @@
Perturbation Selectors_full Generators_full ZMQ Perturbation Selectors_full Generators_full ZMQ FourIdx

View File

@ -350,12 +350,12 @@ subroutine get_first_tooth(computed, first_teeth)
end subroutine end subroutine
BEGIN_PROVIDER [ integer, size_tbc ] BEGIN_PROVIDER [ integer*8, size_tbc ]
implicit none implicit none
BEGIN_DOC BEGIN_DOC
! Size of the tbc array ! Size of the tbc array
END_DOC END_DOC
size_tbc = (comb_teeth+1)*N_det_generators + fragment_count*fragment_first size_tbc = int((comb_teeth+1),8)*int(N_det_generators,8) + fragment_count*fragment_first
END_PROVIDER END_PROVIDER
subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc) subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc)
@ -408,7 +408,8 @@ end subroutine
subroutine add_comb(comb, computed, tbc, stbc, ct) subroutine add_comb(comb, computed, tbc, stbc, ct)
implicit none implicit none
integer, intent(in) :: stbc, ct integer*8, intent(in) :: stbc
integer, intent(in) :: ct
double precision, intent(in) :: comb double precision, intent(in) :: comb
logical, intent(inout) :: computed(N_det_generators) logical, intent(inout) :: computed(N_det_generators)
integer, intent(inout) :: tbc(0:stbc) integer, intent(inout) :: tbc(0:stbc)

View File

@ -57,7 +57,6 @@ subroutine run_selection_slave(thread,iproc,energy)
endif endif
if(done .or. ctask == size(task_id)) then if(done .or. ctask == size(task_id)) then
ASSERT (.not.(buf%N == 0 .and. ctask > 0))
do i=1, ctask do i=1, ctask
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i))
end do end do

View File

@ -419,17 +419,62 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
fullinteresting(0) = 0 fullinteresting(0) = 0
do ii=1,preinteresting(0) do ii=1,preinteresting(0)
i = preinteresting(ii) select case (N_int)
case (1)
mobMask(1,1) = iand(negMask(1,1), preinteresting_det(1,1,ii)) mobMask(1,1) = iand(negMask(1,1), preinteresting_det(1,1,ii))
mobMask(1,2) = iand(negMask(1,2), preinteresting_det(1,2,ii)) mobMask(1,2) = iand(negMask(1,2), preinteresting_det(1,2,ii))
nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
do j=2,N_int case (2)
mobMask(j,1) = iand(negMask(j,1), preinteresting_det(j,1,ii)) mobMask(1:2,1) = iand(negMask(1:2,1), preinteresting_det(1:2,1,ii))
mobMask(j,2) = iand(negMask(j,2), preinteresting_det(j,2,ii)) mobMask(1:2,2) = iand(negMask(1:2,2), preinteresting_det(1:2,2,ii))
nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + &
popcnt(mobMask(2, 1)) + popcnt(mobMask(2, 2))
case (3)
mobMask(1:3,1) = iand(negMask(1:3,1), preinteresting_det(1:3,1,ii))
mobMask(1:3,2) = iand(negMask(1:3,2), preinteresting_det(1:3,2,ii))
nt = 0
do j=3,1,-1
if (mobMask(j,1) /= 0_bit_kind) then
nt = nt+ popcnt(mobMask(j, 1))
if (nt > 4) exit
endif
if (mobMask(j,2) /= 0_bit_kind) then
nt = nt+ popcnt(mobMask(j, 2))
if (nt > 4) exit
endif
end do end do
case (4)
mobMask(1:4,1) = iand(negMask(1:4,1), preinteresting_det(1:4,1,ii))
mobMask(1:4,2) = iand(negMask(1:4,2), preinteresting_det(1:4,2,ii))
nt = 0
do j=4,1,-1
if (mobMask(j,1) /= 0_bit_kind) then
nt = nt+ popcnt(mobMask(j, 1))
if (nt > 4) exit
endif
if (mobMask(j,2) /= 0_bit_kind) then
nt = nt+ popcnt(mobMask(j, 2))
if (nt > 4) exit
endif
end do
case default
mobMask(1:N_int,1) = iand(negMask(1:N_int,1), preinteresting_det(1:N_int,1,ii))
mobMask(1:N_int,2) = iand(negMask(1:N_int,2), preinteresting_det(1:N_int,2,ii))
nt = 0
do j=N_int,1,-1
if (mobMask(j,1) /= 0_bit_kind) then
nt = nt+ popcnt(mobMask(j, 1))
if (nt > 4) exit
endif
if (mobMask(j,2) /= 0_bit_kind) then
nt = nt+ popcnt(mobMask(j, 2))
if (nt > 4) exit
endif
end do
end select
if(nt <= 4) then if(nt <= 4) then
i = preinteresting(ii)
interesting(0) += 1 interesting(0) += 1
interesting(interesting(0)) = i interesting(interesting(0)) = i
minilist(1,1,interesting(0)) = preinteresting_det(1,1,ii) minilist(1,1,interesting(0)) = preinteresting_det(1,1,ii)
@ -458,10 +503,12 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i)) mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i))
mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i)) mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i))
nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
do j=2,N_int if (nt > 2) cycle
do j=N_int,2,-1
mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i))
mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i))
nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2))
if (nt > 2) exit
end do end do
if(nt <= 2) then if(nt <= 2) then

View File

@ -0,0 +1,254 @@
subroutine broadcast_wf(energy)
implicit none
BEGIN_DOC
! Segment corresponding to the wave function. This is segment 0.
END_DOC
use bitmasks
use GASPI
use ISO_C_BINDING
double precision, intent(inout) :: energy(N_states)
integer(gaspi_return_t) :: res
if (is_gaspi_master) then
call broadcast_wf_put(energy)
else
call broadcast_wf_get(energy)
endif
res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_barrier failed"
stop -1
end if
integer(gaspi_segment_id_t) :: seg_id
do seg_id=0,3
res = gaspi_segment_delete(seg_id)
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_segment_delete failed", seg_id
stop -1
end if
end do
end
subroutine broadcast_wf_put(energy)
implicit none
BEGIN_DOC
! Initiates the broadcast of the wave function
END_DOC
use bitmasks
use GASPI
use ISO_C_BINDING
double precision, intent(in) :: energy(N_states)
integer(gaspi_segment_id_t) :: seg_id
integer(gaspi_alloc_t) :: seg_alloc_policy
integer(gaspi_size_t) :: seg_size(0:3)
type(c_ptr) :: seg_ptr(0:3)
integer, pointer :: params_int(:) ! Segment 0
double precision, pointer :: psi_coef_tmp(:,:) ! Segment 1
integer(bit_kind), pointer :: psi_det_tmp(:,:,:) ! Segment 2
double precision, pointer :: params_double(:) ! Segment 3
integer(gaspi_return_t) :: res
seg_alloc_policy = GASPI_MEM_UNINITIALIZED
seg_size(0) = 4 * 5
seg_id=0
res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL, &
GASPI_BLOCK, seg_alloc_policy)
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_create_segment failed", gaspi_rank, seg_id
stop -1
end if
res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id))
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_segment_ptr failed", gaspi_rank
stop -1
end if
call c_f_pointer(seg_ptr(0), params_int, shape=(/ 5 /))
params_int(1) = N_states
params_int(2) = N_det
params_int(3) = psi_det_size
res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_barrier failed", gaspi_rank
stop -1
end if
seg_size(1) = 8 * psi_det_size * N_states
seg_size(2) = bit_kind * psi_det_size * 2 * N_int
seg_size(3) = 8 * N_states
do seg_id=1, 3
res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL, &
GASPI_BLOCK, seg_alloc_policy)
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_create_segment failed", gaspi_rank, seg_id
stop -1
end if
res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id))
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_segment_ptr failed", gaspi_rank
stop -1
end if
end do
call c_f_pointer(seg_ptr(1), psi_coef_tmp, shape=shape(psi_coef))
call c_f_pointer(seg_ptr(2), psi_det_tmp, shape=shape(psi_det))
call c_f_pointer(seg_ptr(3), params_double, shape=(/ N_states /))
psi_coef_tmp = psi_coef
psi_det_tmp = psi_det
params_double = energy
res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_barrier failed", gaspi_rank
stop -1
end if
end
subroutine broadcast_wf_get(energy)
implicit none
BEGIN_DOC
! Gets the broadcasted wave function
END_DOC
use bitmasks
use GASPI
use ISO_C_BINDING
double precision, intent(out) :: energy(N_states)
integer(gaspi_segment_id_t) :: seg_id
integer(gaspi_alloc_t) :: seg_alloc_policy
integer(gaspi_size_t) :: seg_size(0:3)
type(c_ptr) :: seg_ptr(0:3)
integer, pointer :: params_int(:) ! Segment 0
double precision, pointer :: psi_coef_tmp(:,:) ! Segment 1
integer(bit_kind), pointer :: psi_det_tmp(:,:,:) ! Segment 2
double precision, pointer :: params_double(:) ! Segment 3
integer(gaspi_return_t) :: res
seg_alloc_policy = GASPI_MEM_UNINITIALIZED
seg_size(0) = 4 * 5
seg_id=0
res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL,&
GASPI_BLOCK, seg_alloc_policy)
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_create_segment failed"
stop -1
end if
res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id))
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_segment_ptr failed"
stop -1
end if
res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_barrier failed"
stop -1
end if
integer(gaspi_offset_t) :: localOff, remoteOff
integer(gaspi_rank_t) :: remoteRank
integer(gaspi_queue_id_t) :: queue
localOff = 0
remoteRank = 0
queue = 0
res = gaspi_read(seg_id, localOff, remoteRank, &
seg_id, remoteOff, seg_size(seg_id), queue, GASPI_BLOCK)
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_read failed"
stop -1
end if
res = gaspi_wait(queue, GASPI_BLOCK)
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_wait failed"
stop -1
end if
call c_f_pointer(seg_ptr(0), params_int, shape=shape( (/ 5 /) ))
N_states = params_int(1)
N_det = params_int(2)
psi_det_size = params_int(3)
TOUCH N_states N_det psi_det_size
seg_size(1) = 8 * psi_det_size * N_states
seg_size(2) = bit_kind * psi_det_size * 2 * N_int
seg_size(3) = 8 * N_states
do seg_id=1, 3
res = gaspi_segment_create(seg_id, seg_size(seg_id), GASPI_GROUP_ALL, &
GASPI_BLOCK, seg_alloc_policy)
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_create_segment failed"
stop -1
end if
res = gaspi_segment_ptr(seg_id, seg_ptr(seg_id))
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_segment_ptr failed"
stop -1
end if
end do
res = gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_barrier failed"
stop -1
end if
do seg_id=1, 3
res = gaspi_read(seg_id, localOff, remoteRank, &
seg_id, remoteOff, seg_size(seg_id), queue, GASPI_BLOCK)
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_read failed"
stop -1
end if
res = gaspi_wait(queue, GASPI_BLOCK)
if(res .ne. GASPI_SUCCESS) then
write(*,*) "gaspi_wait failed"
stop -1
end if
end do
call c_f_pointer(seg_ptr(1), psi_coef_tmp, shape=shape(psi_coef))
call c_f_pointer(seg_ptr(2), psi_det_tmp, shape=shape(psi_det))
call c_f_pointer(seg_ptr(3), params_double, shape=shape(energy))
psi_coef = psi_coef_tmp
psi_det = psi_det_tmp
energy = params_double
end

View File

@ -0,0 +1,61 @@
program scf
BEGIN_DOC
! Produce `Hartree_Fock` MO orbital
! output: mo_basis.mo_tot_num mo_basis.mo_label mo_basis.ao_md5 mo_basis.mo_coef mo_basis.mo_occ
! output: hartree_fock.energy
! optional: mo_basis.mo_coef
END_DOC
call create_guess
call orthonormalize_mos
call run
end
subroutine create_guess
implicit none
BEGIN_DOC
! Create a MO guess if no MOs are present in the EZFIO directory
END_DOC
logical :: exists
PROVIDE ezfio_filename
call ezfio_has_mo_basis_mo_coef(exists)
if (.not.exists) then
if (mo_guess_type == "HCore") then
mo_coef = ao_ortho_lowdin_coef
TOUCH mo_coef
mo_label = 'Guess'
call mo_as_eigvectors_of_mo_matrix(mo_mono_elec_integral,size(mo_mono_elec_integral,1),size(mo_mono_elec_integral,2),mo_label)
SOFT_TOUCH mo_coef mo_label
else if (mo_guess_type == "Huckel") then
call huckel_guess
else
print *, 'Unrecognized MO guess type : '//mo_guess_type
stop 1
endif
endif
end
subroutine run
BEGIN_DOC
! Run SCF calculation
END_DOC
use bitmasks
implicit none
double precision :: SCF_energy_before,SCF_energy_after,diag_H_mat_elem
double precision :: EHF
integer :: i_it, i, j, k
EHF = HF_energy
mo_label = "Canonical"
! Choose SCF algorithm
call damping_SCF ! Deprecated routine
! call Roothaan_Hall_SCF
end

View File

@ -0,0 +1,8 @@
program densify
implicit none
read_wf = .True.
touch read_wf
call generate_all_alpha_beta_det_products()
call diagonalize_ci
call save_wavefunction
end

View File

@ -39,7 +39,8 @@ subroutine run
call dsort(norm_sort(1),iorder(1),nab) call dsort(norm_sort(1),iorder(1),nab)
PROVIDE psi_bilinear_matrix_values nuclear_repulsion PROVIDE psi_bilinear_matrix_values psi_bilinear_matrix_rows psi_bilinear_matrix_columns
PROVIDE nuclear_repulsion
print *, '' print *, ''
do j=0,nab do j=0,nab
i = iorder(j) i = iorder(j)
@ -47,7 +48,9 @@ subroutine run
!$OMP PARALLEL DO PRIVATE(k) !$OMP PARALLEL DO PRIVATE(k)
do k=1,n_det do k=1,n_det
if (psi_bilinear_matrix_columns(k) == -i) then if (psi_bilinear_matrix_columns(k) == -i) then
psi_bilinear_matrix_values(k,1) = 0.d0 do l=1,N_states
psi_bilinear_matrix_values(k,l) = 0.d0
enddo
endif endif
enddo enddo
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
@ -55,7 +58,9 @@ subroutine run
!$OMP PARALLEL DO PRIVATE(k) !$OMP PARALLEL DO PRIVATE(k)
do k=1,n_det do k=1,n_det
if (psi_bilinear_matrix_rows(k) == i) then if (psi_bilinear_matrix_rows(k) == i) then
psi_bilinear_matrix_values(k,1) = 0.d0 do l=1,N_states
psi_bilinear_matrix_values(k,l) = 0.d0
enddo
endif endif
enddo enddo
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
@ -64,9 +69,11 @@ subroutine run
cycle cycle
endif endif
u_0 = psi_bilinear_matrix_values(1:N_det,1:N_states) u_0(1:N_det,1:N_states) = psi_bilinear_matrix_values(1:N_det,1:N_states)
v_t = 0.d0 v_0(1:N_det,1:N_states) = 0.d0
s_t = 0.d0 u_t(1:N_states,1:N_det) = 0.d0
v_t(1:N_states,1:N_det) = 0.d0
s_t(1:N_states,1:N_det) = 0.d0
call dtranspose( & call dtranspose( &
u_0, & u_0, &
size(u_0, 1), & size(u_0, 1), &
@ -85,20 +92,21 @@ subroutine run
double precision, external :: u_dot_u, u_dot_v double precision, external :: u_dot_u, u_dot_v
do i=1,N_states do i=1,N_states
e_0(i) = u_dot_v(v_t(1,i),u_0(1,i),N_det)/u_dot_u(u_0(1,i),N_det) e_0(i) = u_dot_v(u_0(1,i),v_0(1,i),N_det)/u_dot_u(u_0(1,i),N_det)
print *, 'E = ', e_0(i) + nuclear_repulsion
enddo enddo
m = 0 m = 0
do k=1,n_det do k=1,n_det
if (psi_bilinear_matrix_values(k,1) /= 0.d0) then if (sum(psi_bilinear_matrix_values(k,1:N_states)) /= 0.d0) then
m = m+1 m = m+1
endif endif
enddo enddo
E = E_0(1) + nuclear_repulsion do k=1,N_states
norm = u_dot_u(u_0(1,1),N_det) E = E_0(k) + nuclear_repulsion
enddo
print *, 'Number of determinants:', m print *, 'Number of determinants:', m
print *, 'Energy', E
exit exit
enddo enddo
call wf_of_psi_bilinear_matrix(.True.) call wf_of_psi_bilinear_matrix(.True.)

View File

@ -14,6 +14,17 @@ subroutine run
integer :: class(0:mo_tot_num,5) integer :: class(0:mo_tot_num,5)
double precision :: occupation(mo_tot_num) double precision :: occupation(mo_tot_num)
write(*,'(A)') 'Energy of 1st determinant'
write(*,'(A)') '========================='
write(*,'(A)') ''
write(*,*) 'Total', ref_bitmask_energy + nuclear_repulsion
write(*,*) 'Mono-electronic', mono_elec_ref_bitmask_energy
write(*,*) 'Kinetic', kinetic_ref_bitmask_energy
write(*,*) 'Electron-nucleus', nucl_elec_ref_bitmask_energy
write(*,*) 'Two-electron', bi_elec_ref_bitmask_energy
write(*,'(A)') ''
write(*,'(A)') ''
write(*,'(A)') 'MO Occupation' write(*,'(A)') 'MO Occupation'
write(*,'(A)') '=============' write(*,'(A)') '============='
write(*,'(A)') '' write(*,'(A)') ''

View File

@ -42,18 +42,18 @@ subroutine mrsc2_dressing_slave(thread,iproc)
integer, allocatable :: hp(:,:) integer, allocatable :: hp(:,:)
integer :: i_state, i, i_I, J, k, k2, k1, kk, ll, degree, degree2, m, l, deg, ni, m2 integer :: i_state, i, i_I, J, k, k2, k1, kk, ll, m, l, deg, ni, m2
integer :: n(2) integer :: n(2)
integer :: p1,p2,h1,h2,s1,s2, blok, I_s, J_s, kn integer :: p1,p2,h1,h2,s1,s2, blok, I_s, J_s, kn
logical :: ok logical :: ok
double precision :: phase_iI, phase_Ik, phase_Jl, phase_Ji, phase_al double precision :: phase_ia, phase_Ik, phase_Jl, phase_Ji, phase_la, phase_ka, phase_tmp
double precision :: Hka, Hla, Ska, Sla, tmp
double precision :: diI, hIi, hJi, delta_JI, dkI, HkI, ci_inv(N_states), cj_inv(N_states) double precision :: diI, hIi, hJi, delta_JI, dkI, HkI, ci_inv(N_states), cj_inv(N_states)
double precision :: contrib, contrib_s2, wall, iwall double precision :: contrib, contrib_s2, wall, iwall
double precision, allocatable :: dleat(:,:,:), dleat_s2(:,:,:) integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ, exc
integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ
integer(bit_kind) :: det_tmp(N_int, 2), det_tmp2(N_int, 2), inac, virt integer(bit_kind) :: det_tmp(N_int, 2), det_tmp2(N_int, 2), inac, virt
integer, external :: get_index_in_psi_det_sorted_bit, searchDet, detCmp integer, external :: get_index_in_psi_det_sorted_bit, searchDet, detCmp
logical, external :: is_in_wavefunction, isInCassd, detEq logical, external :: is_in_wavefunction
integer,allocatable :: komon(:) integer,allocatable :: komon(:)
logical :: komoned logical :: komoned
!double precision, external :: get_dij !double precision, external :: get_dij
@ -63,8 +63,8 @@ subroutine mrsc2_dressing_slave(thread,iproc)
call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread)
allocate (dleat(N_states, N_det_non_ref, 2), delta(N_states,0:N_det_non_ref, 2)) allocate (delta(N_states,0:N_det_non_ref, 2))
allocate (dleat_s2(N_states, N_det_non_ref, 2), delta_s2(N_states,0:N_det_non_ref, 2)) allocate (delta_s2(N_states,0:N_det_non_ref, 2))
allocate(komon(0:N_det_non_ref)) allocate(komon(0:N_det_non_ref))
allocate(hp(2,N_det_non_ref)) allocate(hp(2,N_det_non_ref))
@ -100,7 +100,7 @@ subroutine mrsc2_dressing_slave(thread,iproc)
k = det_cepa0_idx(linked(kk, i_I)) k = det_cepa0_idx(linked(kk, i_I))
blok = blokMwen(kk, i_I) blok = blokMwen(kk, i_I)
call get_excitation(psi_ref(1,1,i_I),psi_non_ref(1,1,k),exc_Ik,degree,phase_Ik,N_int) call get_excitation(psi_ref(1,1,i_I),psi_non_ref(1,1,k),exc_Ik,deg,phase_Ik,N_int)
if(J /= i_I) then if(J /= i_I) then
call apply_excitation(psi_ref(1,1,J),exc_Ik,det_tmp2,ok,N_int) call apply_excitation(psi_ref(1,1,J),exc_Ik,det_tmp2,ok,N_int)
@ -136,35 +136,9 @@ subroutine mrsc2_dressing_slave(thread,iproc)
if(h_cache(J,i) == 0.d0) cycle if(h_cache(J,i) == 0.d0) cycle
if(h_cache(i_I,i) == 0.d0) cycle if(h_cache(i_I,i) == 0.d0) cycle
!ok = .false.
!do i_state=1, N_states
! if(lambda_mrcc(i_state, i) /= 0d0) then
! ok = .true.
! exit
! end if
!end do
!if(.not. ok) cycle
!
komon(0) += 1 komon(0) += 1
kn = komon(0) kn = komon(0)
komon(kn) = i komon(kn) = i
! call get_excitation(psi_ref(1,1,J),psi_non_ref(1,1,i),exc_IJ,degree2,phase_Ji,N_int)
! if(I_i /= J) call get_excitation(psi_ref(1,1,I_i),psi_non_ref(1,1,i),exc_IJ,degree2,phase_Ii,N_int)
! if(I_i == J) phase_Ii = phase_Ji
do i_state = 1,N_states
dkI = h_cache(J,i) * dij(i_I, i, i_state)
dleat(i_state, kn, 1) = dkI
dleat(i_state, kn, 2) = dkI
dkI = s2_cache(J,i) * dij(i_I, i, i_state)
dleat_s2(i_state, kn, 1) = dkI
dleat_s2(i_state, kn, 2) = dkI
end do
end do end do
komoned = .true. komoned = .true.
@ -178,18 +152,20 @@ subroutine mrsc2_dressing_slave(thread,iproc)
call apply_excitation(psi_non_ref(1,1,i),exc_Ik,det_tmp,ok,N_int) call apply_excitation(psi_non_ref(1,1,i),exc_Ik,det_tmp,ok,N_int)
if(.not. ok) cycle if(.not. ok) cycle
if(HP(1,i) + HP(1,k) <= 2 .and. HP(2,i) + HP(2,k) <= 2) then if(HP(1,i) + HP(1,k) <= 2 .and. HP(2,i) + HP(2,k) <= 2) then
cycle if(is_in_wavefunction(det_tmp, N_int)) cycle
end if end if
!if(isInCassd(det_tmp, N_int)) cycle
call i_h_j_phase_out(psi_non_ref(1,1,i), det_tmp, N_int, tmp, phase_ia,exc, deg)
call i_h_j_phase_out(psi_ref(1,1,i_I), psi_non_ref(1,1,k), N_int, tmp, phase_ik,exc, deg)
call i_h_j_phase_out(psi_non_ref(1,1,l), det_tmp, N_int, Hla, phase_la,exc,deg)
call get_s2(psi_non_ref(1,1,l), det_tmp, N_int, Sla)
do i_state = 1, N_states do i_state = 1, N_states
!if(lambda_mrcc(i_state, i) == 0d0) cycle contrib = dij(i_I, k, i_state) * dij(i_I, i, i_state) * Hla * phase_ia * phase_ik
contrib_s2 = dij(i_I, k, i_state) * dij(i_I, i, i_state) * Sla *phase_ia * phase_ik
!contrib = h_cache(i_I,k) * lambda_mrcc(i_state, k) * dleat(i_state, m, 2)! * phase_al
contrib = dij(i_I, k, i_state) * dleat(i_state, m, 2)
contrib_s2 = dij(i_I, k, i_state) * dleat_s2(i_state, m, 2)
delta(i_state,ll,1) += contrib delta(i_state,ll,1) += contrib
delta_s2(i_state,ll,1) += contrib_s2 delta_s2(i_state,ll,1) += contrib_s2
if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5) then if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5) then
@ -198,9 +174,12 @@ subroutine mrsc2_dressing_slave(thread,iproc)
endif endif
if(I_i == J) cycle if(I_i == J) cycle
!contrib = h_cache(J,l) * lambda_mrcc(i_state, l) * dleat(i_state, m, 1)! * phase_al call i_h_j_phase_out(psi_non_ref(1,1,k), det_tmp, N_int, Hka, phase_ka,exc,deg)
contrib = dij(J, l, i_state) * dleat(i_state, m, 1) call get_s2(psi_non_ref(1,1,k), det_tmp, N_int, Ska)
contrib_s2 = dij(J, l, i_state) * dleat_s2(i_state, m, 1) call i_h_j_phase_out(psi_ref(1,1,J), psi_non_ref(1,1,l), N_int, tmp, phase_jl,exc, deg)
contrib = dij(J, l, i_state) * dij(J, i, i_state) * Hka* phase_ia * phase_jl
contrib_s2 = dij(J, l, i_state) * dij(J, i, i_state) * Ska*phase_ia*phase_jl
delta(i_state,kk,2) += contrib delta(i_state,kk,2) += contrib
delta_s2(i_state,kk,2) += contrib_s2 delta_s2(i_state,kk,2) += contrib_s2
if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then
@ -211,12 +190,8 @@ subroutine mrsc2_dressing_slave(thread,iproc)
end do ! while end do ! while
end do ! kk end do ! kk
call push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) call push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id)
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
! end if
enddo enddo
deallocate(delta) deallocate(delta)

View File

@ -0,0 +1,108 @@
program print_integrals
PROVIDE ezfio_filename
call ezfio_set_integrals_monoelec_disk_access_ao_one_integrals('None')
call ezfio_set_integrals_bielec_disk_access_ao_integrals('None')
call run
end
subroutine run
implicit none
integer :: iunit
integer :: getunitandopen
integer ::i,j,k,l
double precision :: integral
iunit = getunitandopen('kinetic_ao','w')
do i=1,ao_num
do j=1,ao_num
integral = ao_kinetic_integral(i,j)
if (dabs(integral) > ao_integrals_threshold) then
write(iunit,*) i,j, integral
endif
enddo
enddo
close(iunit)
iunit = getunitandopen('overlap_ao','w')
do i=1,ao_num
do j=1,ao_num
integral = ao_overlap(i,j)
if (dabs(integral) > ao_integrals_threshold) then
write(iunit,*) i,j, integral
endif
enddo
enddo
close(iunit)
iunit = getunitandopen('nuclear_ao','w')
do i=1,ao_num
do j=1,ao_num
integral = ao_nucl_elec_integral(i,j)
if (dabs(integral) > ao_integrals_threshold) then
write(iunit,*) i,j, integral
endif
enddo
enddo
close(iunit)
! iunit = getunitandopen('pseudo_ao','w')
! do i=1,ao_num
! do j=1,ao_num
! write(iunit,*) i,j, ao_pseudo_integral(i,j)
! enddo
! enddo
! close(iunit)
PROVIDE ao_bielec_integrals_in_map
iunit = getunitandopen('bielec_ao','w')
integer*8 :: i8
integer :: i_idx, n_elements_max, k1, n_elements
integer :: ii(8), jj(8), kk(8), ll(8)
double precision, external :: ao_bielec_integral
integer(key_kind), allocatable :: keys(:)
double precision, allocatable :: values(:)
call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max)
allocate(keys(n_elements_max), values(n_elements_max))
! do i8=0_8,ao_integrals_map%map_size
! n_elements = n_elements_max
! call get_cache_map(ao_integrals_map,i8,keys,values,n_elements)
! do k1=1,n_elements
! call bielec_integrals_index_reverse(kk,ii,ll,jj,keys(k1))
! if ( (kk(1)>ao_num).or. &
! (ii(1)>ao_num).or. &
! (jj(1)>ao_num).or. &
! (ll(1)>ao_num) ) then
! cycle
! endif
! k = kk(1)
! i = ii(1)
! l = ll(1)
! j = jj(1)
! integral = values(k1)
! write (iunit,'(4(I6,X),F20.15)') k,i,l,j, integral
! enddo
! enddo
do i=1,ao_num
do k=1,ao_num
do j=1,ao_num
do l=1,ao_num
double precision, external :: get_ao_bielec_integral
integral = get_ao_bielec_integral(i,j,k,l,ao_integrals_map)
if (dabs(integral)>=1.e-15) then
write (iunit,'(4(I6),F20.15)') i,j,k,l, integral
endif
enddo
enddo
enddo
enddo
close(iunit)
end

View File

@ -49,7 +49,7 @@ program print_integrals
double precision :: get_mo_bielec_integral double precision :: get_mo_bielec_integral
integral = get_mo_bielec_integral(i,j,k,l,mo_integrals_map) integral = get_mo_bielec_integral(i,j,k,l,mo_integrals_map)
if (dabs(integral) > mo_integrals_threshold) then if (dabs(integral) > mo_integrals_threshold) then
write (iunit,'(4(I5,X),D22.15)') i,j,k,l, integral write (iunit,'(4(I6,X),F20.15)') i,j,k,l, integral
endif endif
!end if !end if
enddo enddo

View File

@ -0,0 +1,76 @@
program read_integrals
PROVIDE ezfio_filename
call ezfio_set_integrals_monoelec_disk_access_ao_one_integrals("None")
call run
end
subroutine run
use map_module
implicit none
integer :: iunit
integer :: getunitandopen
integer ::i,j,k,l
double precision :: integral
double precision, allocatable :: A(:,:)
integer :: n_integrals
integer(key_kind), allocatable :: buffer_i(:)
real(integral_kind), allocatable :: buffer_values(:)
integer(key_kind) :: key
allocate (A(ao_num,ao_num))
A = 0.d0
iunit = getunitandopen('kinetic_ao','r')
do
read (iunit,*,end=10) i,j, integral
A(i,j) = integral
A(j,i) = integral
enddo
10 continue
close(iunit)
call write_one_e_integrals('ao_kinetic_integral', A, size(A,1), size(A,2))
A = 0.d0
iunit = getunitandopen('nuclear_ao','r')
do
read (iunit,*,end=12) i,j, integral
A(i,j) = integral
A(j,i) = integral
enddo
12 continue
close(iunit)
call write_one_e_integrals('ao_ne_integral', A, size(A,1), size(A,2))
call write_one_e_integrals('ao_pseudo_integral', ao_pseudo_integral,&
size(ao_pseudo_integral,1), size(ao_pseudo_integral,2))
call ezfio_set_integrals_monoelec_disk_access_ao_one_integrals("Read")
allocate(buffer_i(ao_num**4), buffer_values(ao_num**4))
iunit = getunitandopen('bielec_ao','r')
n_integrals=0
do
read (iunit,*,end=13) i,j,k,l, integral
n_integrals += 1
call bielec_integrals_index(i, j, k, l, buffer_i(n_integrals) )
buffer_values(n_integrals) = integral
enddo
13 continue
close(iunit)
call insert_into_ao_integrals_map(n_integrals,buffer_i,buffer_values)
call map_sort(ao_integrals_map)
call map_unique(ao_integrals_map)
call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map)
call ezfio_set_integrals_bielec_disk_access_ao_integrals('Read')
end

View File

@ -1,5 +1,10 @@
program read_integrals program read_integrals
BEGIN_DOC
! Reads the integrals from the following files:
! - kinetic_mo
! - nuclear_mo
! - bielec_mo
END_DOC
PROVIDE ezfio_filename PROVIDE ezfio_filename
call ezfio_set_integrals_monoelec_disk_access_mo_one_integrals("None") call ezfio_set_integrals_monoelec_disk_access_mo_one_integrals("None")
call run call run

View File

@ -36,6 +36,7 @@ except ImportError:
from qp_path import QP_ROOT, QP_SRC, QP_EZFIO from qp_path import QP_ROOT, QP_SRC, QP_EZFIO
LIB = "" # join(QP_ROOT, "lib", "rdtsc.o") LIB = "" # join(QP_ROOT, "lib", "rdtsc.o")
GPI_LIB = join(QP_ROOT, "lib64", "libGPI2.a")
EZFIO_LIB = join(QP_ROOT, "lib", "libezfio_irp.a") EZFIO_LIB = join(QP_ROOT, "lib", "libezfio_irp.a")
ZMQ_LIB = join(QP_ROOT, "lib", "libf77zmq.a") + " " + join(QP_ROOT, "lib", "libzmq.a") + " -lstdc++ -lrt" ZMQ_LIB = join(QP_ROOT, "lib", "libf77zmq.a") + " " + join(QP_ROOT, "lib", "libzmq.a") + " -lstdc++ -lrt"
ROOT_BUILD_NINJA = join(QP_ROOT, "config", "build.ninja") ROOT_BUILD_NINJA = join(QP_ROOT, "config", "build.ninja")
@ -96,8 +97,7 @@ def ninja_create_env_variable(pwd_config_file):
l_string.append(str_) l_string.append(str_)
lib_lapack = get_compilation_option(pwd_config_file, "LAPACK_LIB") lib_lapack = get_compilation_option(pwd_config_file, "LAPACK_LIB")
lib_gpi2 = get_compilation_option(pwd_config_file, "GPI2_LIB") str_lib = " ".join([LIB, lib_lapack, GPI_LIB, EZFIO_LIB, ZMQ_LIB])
str_lib = " ".join([LIB, lib_lapack, lib_gpi2, EZFIO_LIB, ZMQ_LIB])
l_string.append("LIB = {0} ".format(str_lib)) l_string.append("LIB = {0} ".format(str_lib))
l_string.append("") l_string.append("")
@ -266,7 +266,7 @@ def ninja_ezfio_rule():
install_lib_ezfio = join(QP_ROOT, 'install', 'EZFIO', "lib", "libezfio_irp.a") install_lib_ezfio = join(QP_ROOT, 'install', 'EZFIO', "lib", "libezfio_irp.a")
l_cmd = ["cd {0}".format(QP_EZFIO)] + l_flag l_cmd = ["cd {0}".format(QP_EZFIO)] + l_flag
l_cmd += ["rm -f make.config ; ninja && ln -sf {0} {1}".format(install_lib_ezfio, EZFIO_LIB)] l_cmd += ["rm -f make.config ; ninja && rm -f {1} ; ln -sf {0} {1}".format(install_lib_ezfio, EZFIO_LIB)]
l_string = ["rule build_ezfio", l_string = ["rule build_ezfio",
" command = {0}".format(" ; ".join(l_cmd)), " command = {0}".format(" ; ".join(l_cmd)),
@ -307,7 +307,7 @@ def ninja_symlink_rule():
""" """
Return the command to create for the symlink Return the command to create for the symlink
""" """
return ["rule build_symlink", " command = ln -sf $in $out", ""] return ["rule build_symlink", " command = rm -f $out ; ln -sf $in $out", ""]
def ninja_symlink_build(path_module, l_symlink): def ninja_symlink_build(path_module, l_symlink):

View File

@ -205,10 +205,10 @@ subroutine davidson_pull_results(zmq_socket_pull, v_t, s_t, imin, imax, task_id)
if(rc /= 4) stop "davidson_pull_results failed to pull task_id" if(rc /= 4) stop "davidson_pull_results failed to pull task_id"
rc = f77_zmq_recv( zmq_socket_pull, imin, 4, 0) rc = f77_zmq_recv( zmq_socket_pull, imin, 4, 0)
if(rc /= 4) stop "davidson_pull_results failed to pull task_id" if(rc /= 4) stop "davidson_pull_results failed to pull imin"
rc = f77_zmq_recv( zmq_socket_pull, imax, 4, 0) rc = f77_zmq_recv( zmq_socket_pull, imax, 4, 0)
if(rc /= 4) stop "davidson_pull_results failed to pull task_id" if(rc /= 4) stop "davidson_pull_results failed to pull imax"
sz = (imax-imin+1)*N_states_diag sz = (imax-imin+1)*N_states_diag

View File

@ -0,0 +1,22 @@
program print_energy
implicit none
read_wf = .true.
touch read_wf
call routine
end
subroutine routine
implicit none
integer :: i,j
double precision :: accu,hij
print*, 'psi_energy = ',psi_energy + nuclear_repulsion
accu = 0.d0
! do i = 1,N_det
! do j = 1,N_det
! call i_H_j(psi_det(1,1,j),psi_det(1,1,i),N_int,hij)
! accu += psi_coef(i,1) * psi_coef(j,1) * hij
! enddo
! enddo
! print*, 'accu = ',accu + nuclear_repulsion
end

View File

@ -192,7 +192,7 @@ subroutine copy_H_apply_buffer_to_wf
call normalize(psi_coef,N_det) call normalize(psi_coef,N_det)
SOFT_TOUCH N_det psi_det psi_coef SOFT_TOUCH N_det psi_det psi_coef
logical :: found_duplicates ! logical :: found_duplicates
! call remove_duplicates_in_psi_det(found_duplicates) ! call remove_duplicates_in_psi_det(found_duplicates)
end end

View File

@ -435,62 +435,32 @@ subroutine save_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psicoef)
! Save the wave function into the EZFIO file ! Save the wave function into the EZFIO file
END_DOC END_DOC
use bitmasks use bitmasks
include 'constants.include.F'
integer, intent(in) :: ndet,nstates,dim_psicoef integer, intent(in) :: ndet,nstates,dim_psicoef
integer(bit_kind), intent(in) :: psidet(N_int,2,ndet) integer(bit_kind), intent(in) :: psidet(N_int,2,ndet)
double precision, intent(in) :: psicoef(dim_psicoef,nstates) double precision, intent(in) :: psicoef(dim_psicoef,nstates)
integer*8, allocatable :: psi_det_save(:,:,:) integer*8, allocatable :: psi_det_save(:,:,:)
double precision, allocatable :: psi_coef_save(:,:) double precision, allocatable :: psi_coef_save(:,:)
integer*8 :: det_8(100)
integer(bit_kind) :: det_bk((100*8)/bit_kind)
integer :: N_int2
equivalence (det_8, det_bk)
integer :: i,k integer :: i,j,k
PROVIDE progress_bar
call start_progress(7,'Saving wfunction',0.d0)
progress_bar(1) = 1
progress_value = dble(progress_bar(1))
call ezfio_set_determinants_N_int(N_int) call ezfio_set_determinants_N_int(N_int)
progress_bar(1) = 2
progress_value = dble(progress_bar(1))
call ezfio_set_determinants_bit_kind(bit_kind) call ezfio_set_determinants_bit_kind(bit_kind)
progress_bar(1) = 3
progress_value = dble(progress_bar(1))
call ezfio_set_determinants_N_det(ndet) call ezfio_set_determinants_N_det(ndet)
progress_bar(1) = 4
progress_value = dble(progress_bar(1))
call ezfio_set_determinants_n_states(nstates) call ezfio_set_determinants_n_states(nstates)
progress_bar(1) = 5
progress_value = dble(progress_bar(1))
call ezfio_set_determinants_mo_label(mo_label) call ezfio_set_determinants_mo_label(mo_label)
progress_bar(1) = 6 allocate (psi_det_save(N_int,2,ndet))
progress_value = dble(progress_bar(1))
N_int2 = (N_int*bit_kind)/8
allocate (psi_det_save(N_int2,2,ndet))
do i=1,ndet do i=1,ndet
do j=1,2
do k=1,N_int do k=1,N_int
det_bk(k) = psidet(k,1,i) psi_det_save(k,j,i) = transfer(psidet(k,j,i),1_8)
enddo enddo
do k=1,N_int2
psi_det_save(k,1,i) = det_8(k)
enddo enddo
do k=1,N_int
det_bk(k) = psidet(k,2,i)
enddo
do k=1,N_int2
psi_det_save(k,2,i) = det_8(k)
enddo
! print*,psi_det_save
enddo enddo
call ezfio_set_determinants_psi_det(psi_det_save) call ezfio_set_determinants_psi_det(psi_det_save)
deallocate (psi_det_save) deallocate (psi_det_save)
progress_bar(1) = 7
progress_value = dble(progress_bar(1))
allocate (psi_coef_save(ndet,nstates)) allocate (psi_coef_save(ndet,nstates))
double precision :: accu_norm(nstates) double precision :: accu_norm(nstates)
accu_norm = 0.d0 accu_norm = 0.d0
@ -511,7 +481,6 @@ subroutine save_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psicoef)
call ezfio_set_determinants_psi_coef(psi_coef_save) call ezfio_set_determinants_psi_coef(psi_coef_save)
call write_int(output_determinants,ndet,'Saved determinants') call write_int(output_determinants,ndet,'Saved determinants')
call stop_progress
deallocate (psi_coef_save) deallocate (psi_coef_save)
end end
@ -537,28 +506,12 @@ subroutine save_wavefunction_specified(ndet,nstates,psidet,psicoef,ndetsave,inde
integer :: i,k integer :: i,k
PROVIDE progress_bar
call start_progress(7,'Saving wfunction',0.d0)
progress_bar(1) = 1
progress_value = dble(progress_bar(1))
call ezfio_set_determinants_N_int(N_int) call ezfio_set_determinants_N_int(N_int)
progress_bar(1) = 2
progress_value = dble(progress_bar(1))
call ezfio_set_determinants_bit_kind(bit_kind) call ezfio_set_determinants_bit_kind(bit_kind)
progress_bar(1) = 3
progress_value = dble(progress_bar(1))
call ezfio_set_determinants_N_det(ndetsave) call ezfio_set_determinants_N_det(ndetsave)
progress_bar(1) = 4
progress_value = dble(progress_bar(1))
call ezfio_set_determinants_n_states(nstates) call ezfio_set_determinants_n_states(nstates)
progress_bar(1) = 5
progress_value = dble(progress_bar(1))
call ezfio_set_determinants_mo_label(mo_label) call ezfio_set_determinants_mo_label(mo_label)
progress_bar(1) = 6
progress_value = dble(progress_bar(1))
N_int2 = (N_int*bit_kind)/8 N_int2 = (N_int*bit_kind)/8
allocate (psi_det_save(N_int2,2,ndetsave)) allocate (psi_det_save(N_int2,2,ndetsave))
do i=1,ndetsave do i=1,ndetsave
@ -600,7 +553,6 @@ subroutine save_wavefunction_specified(ndet,nstates,psidet,psicoef,ndetsave,inde
call ezfio_set_determinants_psi_coef(psi_coef_save) call ezfio_set_determinants_psi_coef(psi_coef_save)
call write_int(output_determinants,ndet,'Saved determinants') call write_int(output_determinants,ndet,'Saved determinants')
call stop_progress
deallocate (psi_coef_save) deallocate (psi_coef_save)
end end

View File

@ -234,61 +234,66 @@ subroutine get_double_excitation(det1,det2,exc,phase,Nint)
cycle cycle
case(1) case(1)
low = min(exc(1,1,ispin), exc(1,2,ispin))
high = max(exc(1,1,ispin), exc(1,2,ispin))
ASSERT (low > 0) high = max(exc(1,1,ispin), exc(1,2,ispin))-1
j = ishft(low-1,-bit_kind_shift)+1 ! Find integer in array(Nint) low = min(exc(1,1,ispin), exc(1,2,ispin))
n = iand(low-1,bit_kind_size-1)+1 ! mod(low,bit_kind_size)
ASSERT (low >= 0)
ASSERT (high > 0) ASSERT (high > 0)
k = ishft(high-1,-bit_kind_shift)+1
m = iand(high-1,bit_kind_size-1)+1 k = ishft(high,-bit_kind_shift)+1
j = ishft(low,-bit_kind_shift)+1
m = iand(high,bit_kind_size-1)
n = iand(low,bit_kind_size-1)
if (j==k) then if (j==k) then
nperm = nperm + popcnt(iand(det1(j,ispin), & nperm = nperm + popcnt(iand(det1(j,ispin), &
iand( ibset(0_bit_kind,m-1)-1_bit_kind, & iand( ishft(1_bit_kind,m)-1_bit_kind, &
ibclr(-1_bit_kind,n)+1_bit_kind ) )) not(ishft(1_bit_kind,n))+1_bit_kind)) )
! TODO iand( not(ishft(1_bit_kind,n+1))+1_bit_kind, &
! ishft(1_bit_kind,m)-1_bit_kind)))
else else
nperm = nperm + popcnt(iand(det1(k,ispin), & nperm = nperm + popcnt( &
ibset(0_bit_kind,m-1)-1_bit_kind)) iand(det1(j,ispin), &
! TODO ishft(1_bit_kind,m)-1_bit_kind)) iand(not(0_bit_kind), &
if (n < bit_kind_size) then (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) &
nperm = nperm + popcnt(iand(det1(j,ispin), ibclr(-1_bit_kind,n) +1_bit_kind)) + popcnt(iand(det1(k,ispin), &
! TODO ishft(1_bit_kind,m)-1_bit_kind)) (ishft(1_bit_kind,m) - 1_bit_kind ) ))
endif
do i=j+1,k-1 do i=j+1,k-1
nperm = nperm + popcnt(det1(i,ispin)) nperm = nperm + popcnt(det1(i,ispin))
end do end do
endif endif
case (2) case (2)
do i=1,2 do l=1,2
low = min(exc(i,1,ispin), exc(i,2,ispin)) high = max(exc(l,1,ispin), exc(l,2,ispin))-1
high = max(exc(i,1,ispin), exc(i,2,ispin)) low = min(exc(l,1,ispin), exc(l,2,ispin))
ASSERT (low > 0) ASSERT (low > 0)
j = ishft(low-1,-bit_kind_shift)+1 ! Find integer in array(Nint)
n = iand(low-1,bit_kind_size-1)+1 ! mod(low,bit_kind_size)
ASSERT (high > 0) ASSERT (high > 0)
k = ishft(high-1,-bit_kind_shift)+1
m = iand(high-1,bit_kind_size-1)+1 k = ishft(high,-bit_kind_shift)+1
j = ishft(low,-bit_kind_shift)+1
m = iand(high,bit_kind_size-1)
n = iand(low,bit_kind_size-1)
if (j==k) then if (j==k) then
nperm = nperm + popcnt(iand(det1(j,ispin), & nperm = nperm + popcnt(iand(det1(j,ispin), &
iand( ibset(0_bit_kind,m-1)-1_bit_kind, & iand( ishft(1_bit_kind,m)-1_bit_kind, &
ibclr(-1_bit_kind,n)+1_bit_kind ) )) not(ishft(1_bit_kind,n))+1_bit_kind)) )
else else
nperm = nperm + popcnt(iand(det1(k,ispin), & nperm = nperm + popcnt( &
ibset(0_bit_kind,m-1)-1_bit_kind)) iand(det1(j,ispin), &
if (n < bit_kind_size) then iand(not(0_bit_kind), &
nperm = nperm + popcnt(iand(det1(j,ispin), ibclr(-1_bit_kind,n) +1_bit_kind)) (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) &
endif + popcnt(iand(det1(k,ispin), &
do l=j+1,k-1 (ishft(1_bit_kind,m) - 1_bit_kind ) ))
nperm = nperm + popcnt(det1(l,ispin))
do i=j+1,k-1
nperm = nperm + popcnt(det1(i,ispin))
end do end do
endif endif
enddo enddo
@ -297,7 +302,7 @@ subroutine get_double_excitation(det1,det2,exc,phase,Nint)
b = max(exc(1,1,ispin), exc(1,2,ispin)) b = max(exc(1,1,ispin), exc(1,2,ispin))
c = min(exc(2,1,ispin), exc(2,2,ispin)) c = min(exc(2,1,ispin), exc(2,2,ispin))
d = max(exc(2,1,ispin), exc(2,2,ispin)) d = max(exc(2,1,ispin), exc(2,2,ispin))
if (c>a .and. c<b .and. d>b) then if ((a<c) .and. (c<b) .and. (b<d)) then
nperm = nperm + 1 nperm = nperm + 1
endif endif
exit exit
@ -359,36 +364,41 @@ subroutine get_mono_excitation(det1,det2,exc,phase,Nint)
cycle cycle
endif endif
high = max(exc(1,1,ispin), exc(1,2,ispin))-1
low = min(exc(1,1,ispin), exc(1,2,ispin)) low = min(exc(1,1,ispin), exc(1,2,ispin))
high = max(exc(1,1,ispin),exc(1,2,ispin))
ASSERT (low > 0) ASSERT (low >= 0)
j = ishft(low-1,-bit_kind_shift)+1 ! Find integer in array(Nint)
n = iand(low-1,bit_kind_size-1)+1 ! mod(low,bit_kind_size)
ASSERT (high > 0) ASSERT (high > 0)
k = ishft(high-1,-bit_kind_shift)+1
m = iand(high-1,bit_kind_size-1)+1 k = ishft(high,-bit_kind_shift)+1
j = ishft(low,-bit_kind_shift)+1
m = iand(high,bit_kind_size-1)
n = iand(low,bit_kind_size-1)
if (j==k) then if (j==k) then
nperm = popcnt(iand(det1(j,ispin), & nperm = nperm + popcnt(iand(det1(j,ispin), &
iand(ibset(0_bit_kind,m-1)-1_bit_kind,ibclr(-1_bit_kind,n)+1_bit_kind))) iand( ishft(1_bit_kind,m)-1_bit_kind, &
!TODO iand( not(ishft(1_bit_kind,n+1))+1_bit_kind, & not(ishft(1_bit_kind,n))+1_bit_kind)) )
! ishft(1_bit_kind,m)-1_bit_kind)))
else else
nperm = nperm + popcnt(iand(det1(k,ispin),ibset(0_bit_kind,m-1)-1_bit_kind)) nperm = nperm + popcnt( &
!TODO nperm = popcnt(iand(det1(k,ispin), ishft(1_bit_kind,m)-1_bit_kind)) + & iand(det1(j,ispin), &
! popcnt(iand(det1(j,ispin), not(ishft(1_bit_kind,n+1))+1_bit_kind)) iand(not(0_bit_kind), &
if (n < bit_kind_size) then (not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) &
nperm = nperm + popcnt(iand(det1(j,ispin),ibclr(-1_bit_kind,n)+1_bit_kind)) + popcnt(iand(det1(k,ispin), &
endif (ishft(1_bit_kind,m) - 1_bit_kind ) ))
do i=j+1,k-1 do i=j+1,k-1
nperm = nperm + popcnt(det1(i,ispin)) nperm = nperm + popcnt(det1(i,ispin))
end do end do
endif endif
phase = phase_dble(iand(nperm,1)) phase = phase_dble(iand(nperm,1))
return return
enddo enddo
enddo enddo
end end
subroutine bitstring_to_list_ab( string, list, n_elements, Nint) subroutine bitstring_to_list_ab( string, list, n_elements, Nint)
@ -428,7 +438,6 @@ subroutine bitstring_to_list_ab( string, list, n_elements, Nint)
enddo enddo
end end
subroutine bitstring_to_list_ab_old( string, list, n_elements, Nint) subroutine bitstring_to_list_ab_old( string, list, n_elements, Nint)
use bitmasks use bitmasks
implicit none implicit none
@ -2030,6 +2039,112 @@ subroutine get_occ_from_key(key,occ,Nint)
end end
subroutine get_double_excitation_phase_new(det1,det2,exc,phase,Nint)
use bitmasks
implicit none
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: det1(Nint,2)
integer(bit_kind), intent(in) :: det2(Nint,2)
integer, intent(in) :: exc(0:2,2,2)
double precision, intent(out) :: phase
integer :: tz
integer :: l, ispin, idx_hole, idx_particle, ishift
integer :: nperm
integer :: i,j,k,m,n
integer :: high, low
integer :: a,b,c,d
integer(bit_kind) :: hole, particle, tmp
double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /)
ASSERT (Nint > 0)
nperm = 0
do ispin = 1,2
select case (exc(0,1,ispin))
case(0)
cycle
case(1)
high = max(exc(1,1,ispin), exc(1,2,ispin))-1
low = min(exc(1,1,ispin), exc(1,2,ispin))
ASSERT (low >= 0)
ASSERT (high > 0)
k = ishft(high,-bit_kind_shift)
j = ishft(low,-bit_kind_shift)
m = iand(high,bit_kind_size-1)
n = iand(low,bit_kind_size-1)
if (j==k) then
nperm = nperm + popcnt(iand(det1(j,ispin), &
iand( ishft(1_bit_kind,m)-1_bit_kind, &
not(ishft(1_bit_kind,n))+1_bit_kind)) )
else
nperm = nperm + popcnt( &
iand(det1(j,ispin), &
iand(not(0_bit_kind), &
(not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) &
+ popcnt(iand(det1(k,ispin), &
(ishft(1_bit_kind,m) - 1_bit_kind ) ))
do i=j+1,k-1
nperm = nperm + popcnt(det1(i,ispin))
end do
endif
case (2)
do l=1,2
high = max(exc(l,1,ispin), exc(l,2,ispin))-1
low = min(exc(l,1,ispin), exc(l,2,ispin))
ASSERT (low > 0)
ASSERT (high > 0)
k = ishft(high,-bit_kind_shift)
j = ishft(low,-bit_kind_shift)
m = iand(high,bit_kind_size-1)
n = iand(low,bit_kind_size-1)
if (j==k) then
nperm = nperm + popcnt(iand(det1(j,ispin), &
iand( ishft(1_bit_kind,m)-1_bit_kind, &
not(ishft(1_bit_kind,n))+1_bit_kind)) )
else
nperm = nperm + popcnt( &
iand(det1(j,ispin), &
iand(not(0_bit_kind), &
(not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) &
+ popcnt(iand(det1(k,ispin), &
(ishft(1_bit_kind,m) - 1_bit_kind ) ))
do i=j+1,k-1
nperm = nperm + popcnt(det1(i,ispin))
end do
endif
enddo
a = min(exc(1,1,ispin), exc(1,2,ispin))
b = max(exc(1,1,ispin), exc(1,2,ispin))
c = min(exc(2,1,ispin), exc(2,2,ispin))
d = max(exc(2,1,ispin), exc(2,2,ispin))
if (c>a .and. c<b .and. d>b) then
nperm = nperm + 1
endif
exit
end select
enddo
phase = phase_dble(iand(nperm,1))
end
subroutine get_double_excitation_phase(det1,det2,exc,phase,Nint) subroutine get_double_excitation_phase(det1,det2,exc,phase,Nint)
use bitmasks use bitmasks
implicit none implicit none
@ -2315,6 +2430,356 @@ subroutine decode_exc_spin(exc,h1,p1,h2,p2)
end select end select
end end
subroutine get_excitation_degree_spin_new(key1,key2,degree,Nint)
use bitmasks
include 'Utils/constants.include.F'
implicit none
BEGIN_DOC
! Returns the excitation degree between two determinants
END_DOC
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: key1(Nint)
integer(bit_kind), intent(in) :: key2(Nint)
integer, intent(out) :: degree
integer(bit_kind) :: xorvec(N_int_max)
integer :: l
ASSERT (Nint > 0)
select case (Nint)
case (1)
xorvec(1) = xor( key1(1), key2(1))
degree = popcnt(xorvec(1))
case (2)
xorvec(1) = xor( key1(1), key2(1))
xorvec(2) = xor( key1(2), key2(2))
degree = popcnt(xorvec(1))+popcnt(xorvec(2))
case (3)
xorvec(1) = xor( key1(1), key2(1))
xorvec(2) = xor( key1(2), key2(2))
xorvec(3) = xor( key1(3), key2(3))
degree = sum(popcnt(xorvec(1:3)))
case (4)
xorvec(1) = xor( key1(1), key2(1))
xorvec(2) = xor( key1(2), key2(2))
xorvec(3) = xor( key1(3), key2(3))
xorvec(4) = xor( key1(4), key2(4))
degree = sum(popcnt(xorvec(1:4)))
case default
do l=1,Nint
xorvec(l) = xor( key1(l), key2(l))
enddo
degree = sum(popcnt(xorvec(1:Nint)))
end select
degree = ishft(degree,-1)
end
subroutine get_excitation_spin_new(det1,det2,exc,degree,phase,Nint)
use bitmasks
implicit none
BEGIN_DOC
! Returns the excitation operators between two determinants and the phase
END_DOC
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: det1(Nint)
integer(bit_kind), intent(in) :: det2(Nint)
integer, intent(out) :: exc(0:2,2)
integer, intent(out) :: degree
double precision, intent(out) :: phase
! exc(number,hole/particle)
! ex :
! exc(0,1) = number of holes
! exc(0,2) = number of particles
! exc(1,2) = first particle
! exc(1,1) = first hole
ASSERT (Nint > 0)
!DIR$ FORCEINLINE
call get_excitation_degree_spin(det1,det2,degree,Nint)
select case (degree)
case (3:)
degree = -1
return
case (2)
call get_double_excitation_spin(det1,det2,exc,phase,Nint)
return
case (1)
call get_mono_excitation_spin(det1,det2,exc,phase,Nint)
return
case(0)
return
end select
end
subroutine decode_exc_spin_new(exc,h1,p1,h2,p2)
use bitmasks
implicit none
BEGIN_DOC
! Decodes the exc arrays returned by get_excitation.
! h1,h2 : Holes
! p1,p2 : Particles
END_DOC
integer, intent(in) :: exc(0:2,2)
integer, intent(out) :: h1,h2,p1,p2
select case (exc(0,1))
case(2)
h1 = exc(1,1)
h2 = exc(2,1)
p1 = exc(1,2)
p2 = exc(2,2)
case(1)
h1 = exc(1,1)
h2 = 0
p1 = exc(1,2)
p2 = 0
case default
h1 = 0
p1 = 0
h2 = 0
p2 = 0
end select
end
subroutine get_double_excitation_spin_new(det1,det2,exc,phase,Nint)
use bitmasks
implicit none
BEGIN_DOC
! Returns the two excitation operators between two doubly excited spin-determinants
! and the phase
END_DOC
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: det1(Nint)
integer(bit_kind), intent(in) :: det2(Nint)
integer, intent(out) :: exc(0:2,2)
double precision, intent(out) :: phase
integer :: tz
integer :: l, idx_hole, idx_particle, ishift
integer :: nperm
integer :: i,j,k,m,n
integer :: high, low
integer :: a,b,c,d
integer(bit_kind) :: hole, particle, tmp
double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /)
ASSERT (Nint > 0)
nperm = 0
exc(0,1) = 0
exc(0,2) = 0
idx_particle = 0
idx_hole = 0
ishift = 1-bit_kind_size
do l=1,Nint
ishift = ishift + bit_kind_size
if (det1(l) == det2(l)) then
cycle
endif
tmp = xor( det1(l), det2(l) )
particle = iand(tmp, det2(l))
hole = iand(tmp, det1(l))
do while (particle /= 0_bit_kind)
tz = trailz(particle)
idx_particle = idx_particle + 1
exc(0,2) = exc(0,2) + 1
exc(idx_particle,2) = tz+ishift
particle = iand(particle,particle-1_bit_kind)
enddo
if (iand(exc(0,1),exc(0,2))==2) then ! exc(0,1)==2 or exc(0,2)==2
exit
endif
do while (hole /= 0_bit_kind)
tz = trailz(hole)
idx_hole = idx_hole + 1
exc(0,1) = exc(0,1) + 1
exc(idx_hole,1) = tz+ishift
hole = iand(hole,hole-1_bit_kind)
enddo
if (iand(exc(0,1),exc(0,2))==2) then ! exc(0,1)==2 or exc(0,2)==2
exit
endif
enddo
select case (exc(0,1))
case(1)
high = max(exc(1,1), exc(1,2))-1
low = min(exc(1,1), exc(1,2))
ASSERT (low >= 0)
ASSERT (high > 0)
k = ishft(high,-bit_kind_shift)
j = ishft(low,-bit_kind_shift)
m = iand(high,bit_kind_size-1)
n = iand(low,bit_kind_size-1)
if (j==k) then
nperm = nperm + popcnt(iand(det1(j), &
iand( ishft(1_bit_kind,m)-1_bit_kind, &
not(ishft(1_bit_kind,n))+1_bit_kind)) )
else
nperm = nperm + popcnt( &
iand(det1(j), &
iand(not(0_bit_kind), &
(not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) &
+ popcnt(iand(det1(k), &
(ishft(1_bit_kind,m) - 1_bit_kind ) ))
do i=j+1,k-1
nperm = nperm + popcnt(det1(i))
end do
endif
case (2)
do l=1,2
high = max(exc(l,1), exc(l,2))-1
low = min(exc(l,1), exc(l,2))
ASSERT (low > 0)
ASSERT (high > 0)
k = ishft(high,-bit_kind_shift)
j = ishft(low,-bit_kind_shift)
m = iand(high,bit_kind_size-1)
n = iand(low,bit_kind_size-1)
if (j==k) then
nperm = nperm + popcnt(iand(det1(j), &
iand( ishft(1_bit_kind,m)-1_bit_kind, &
not(ishft(1_bit_kind,n))+1_bit_kind)) )
else
nperm = nperm + popcnt( &
iand(det1(j), &
iand(not(0_bit_kind), &
(not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) &
+ popcnt(iand(det1(k), &
(ishft(1_bit_kind,m) - 1_bit_kind ) ))
do i=j+1,k-1
nperm = nperm + popcnt(det1(i))
end do
endif
enddo
a = min(exc(1,1), exc(1,2))
b = max(exc(1,1), exc(1,2))
c = min(exc(2,1), exc(2,2))
d = max(exc(2,1), exc(2,2))
if (c>a .and. c<b .and. d>b) then
nperm = nperm + 1
endif
end select
phase = phase_dble(iand(nperm,1))
end
subroutine get_mono_excitation_spin_new(det1,det2,exc,phase,Nint)
use bitmasks
implicit none
BEGIN_DOC
! Returns the excitation operator between two singly excited determinants and the phase
END_DOC
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: det1(Nint)
integer(bit_kind), intent(in) :: det2(Nint)
integer, intent(out) :: exc(0:2,2)
double precision, intent(out) :: phase
integer :: tz
integer :: l, idx_hole, idx_particle, ishift
integer :: nperm
integer :: i,j,k,m,n
integer :: high, low
integer :: a,b,c,d
integer(bit_kind) :: hole, particle, tmp
double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /)
ASSERT (Nint > 0)
nperm = 0
exc(0,1) = 0
exc(0,2) = 0
ishift = 1-bit_kind_size
do l=1,Nint
ishift = ishift + bit_kind_size
if (det1(l) == det2(l)) then
cycle
endif
tmp = xor( det1(l), det2(l) )
particle = iand(tmp, det2(l))
hole = iand(tmp, det1(l))
if (particle /= 0_bit_kind) then
tz = trailz(particle)
exc(0,2) = 1
exc(1,2) = tz+ishift
endif
if (hole /= 0_bit_kind) then
tz = trailz(hole)
exc(0,1) = 1
exc(1,1) = tz+ishift
endif
if ( iand(exc(0,1),exc(0,2)) /= 1) then ! exc(0,1)/=1 and exc(0,2) /= 1
cycle
endif
high = max(exc(1,1), exc(1,2))-1
low = min(exc(1,1), exc(1,2))
ASSERT (low >= 0)
ASSERT (high > 0)
k = ishft(high,-bit_kind_shift)
j = ishft(low,-bit_kind_shift)
m = iand(high,bit_kind_size-1)
n = iand(low,bit_kind_size-1)
if (j==k) then
nperm = nperm + popcnt(iand(det1(j), &
iand( ishft(1_bit_kind,m)-1_bit_kind, &
not(ishft(1_bit_kind,n))+1_bit_kind)) )
else
nperm = nperm + popcnt( &
iand(det1(j), &
iand(not(0_bit_kind), &
(not(ishft(1_bit_kind,n)) + 1_bit_kind) ))) &
+ popcnt(iand(det1(k), &
(ishft(1_bit_kind,m) - 1_bit_kind ) ))
do i=j+1,k-1
nperm = nperm + popcnt(det1(i))
end do
endif
phase = phase_dble(iand(nperm,1))
return
enddo
end
subroutine get_double_excitation_spin(det1,det2,exc,phase,Nint) subroutine get_double_excitation_spin(det1,det2,exc,phase,Nint)
use bitmasks use bitmasks

View File

@ -365,8 +365,9 @@ end
do k=1,N_det do k=1,N_det
i = psi_bilinear_matrix_rows(k) i = psi_bilinear_matrix_rows(k)
j = psi_bilinear_matrix_columns(k) j = psi_bilinear_matrix_columns(k)
f = 0.d0
do l=1,N_states do l=1,N_states
f = psi_bilinear_matrix_values(k,l)*psi_bilinear_matrix_values(k,l) f += psi_bilinear_matrix_values(k,l)*psi_bilinear_matrix_values(k,l)
enddo enddo
det_alpha_norm(i) += f det_alpha_norm(i) += f
det_beta_norm(j) += f det_beta_norm(j) += f
@ -690,7 +691,7 @@ subroutine generate_all_alpha_beta_det_products
integer, external :: get_index_in_psi_det_sorted_bit integer, external :: get_index_in_psi_det_sorted_bit
integer(bit_kind), allocatable :: tmp_det(:,:,:) integer(bit_kind), allocatable :: tmp_det(:,:,:)
logical, external :: is_in_wavefunction logical, external :: is_in_wavefunction
integer, external :: omp_get_thread_num PROVIDE H_apply_buffer_allocated
!$OMP PARALLEL DEFAULT(NONE) SHARED(psi_coef_sorted_bit,N_det_beta_unique,& !$OMP PARALLEL DEFAULT(NONE) SHARED(psi_coef_sorted_bit,N_det_beta_unique,&
!$OMP N_det_alpha_unique, N_int, psi_det_alpha_unique, psi_det_beta_unique,& !$OMP N_det_alpha_unique, N_int, psi_det_alpha_unique, psi_det_beta_unique,&
@ -712,7 +713,7 @@ subroutine generate_all_alpha_beta_det_products
enddo enddo
call fill_H_apply_buffer_no_selection(l-1, tmp_det, N_int, iproc) call fill_H_apply_buffer_no_selection(l-1, tmp_det, N_int, iproc)
enddo enddo
!$OMP END DO NOWAIT !$OMP END DO
deallocate(tmp_det) deallocate(tmp_det)
!$OMP END PARALLEL !$OMP END PARALLEL
call copy_H_apply_buffer_to_wf call copy_H_apply_buffer_to_wf

View File

@ -187,7 +187,7 @@ subroutine add_values_to_two_body_dm_map(mask_ijkl)
print*,'n_elements = ',n_elements print*,'n_elements = ',n_elements
call insert_into_two_body_dm_ab_map(n_elements,buffer_i,buffer_value,& call insert_into_two_body_dm_ab_map(n_elements,buffer_i,buffer_value,&
real(mo_integrals_threshold,integral_kind)) real(mo_integrals_threshold,integral_kind))
call map_unique(two_body_dm_ab_map) call map_merge(two_body_dm_ab_map)
deallocate(buffer_i,buffer_value) deallocate(buffer_i,buffer_value)

View File

@ -0,0 +1 @@
ZMQ

6
src/FourIdx/README.rst Normal file
View File

@ -0,0 +1,6 @@
=======
FourIdx
=======
Four-index transformation.

View File

@ -0,0 +1,180 @@
subroutine four_index_transform(map_a,map_c,matrix_B,LDB, &
i_start, j_start, k_start, l_start, &
i_end , j_end , k_end , l_end , &
a_start, b_start, c_start, d_start, &
a_end , b_end , c_end , d_end )
implicit none
use map_module
use mmap_module
BEGIN_DOC
! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM)
! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld}
! Loops run over *_start->*_end
END_DOC
type(map_type), intent(in) :: map_a
type(map_type), intent(inout) :: map_c
integer, intent(in) :: LDB
double precision, intent(in) :: matrix_B(LDB,*)
integer, intent(in) :: i_start, j_start, k_start, l_start
integer, intent(in) :: i_end , j_end , k_end , l_end
integer, intent(in) :: a_start, b_start, c_start, d_start
integer, intent(in) :: a_end , b_end , c_end , d_end
double precision, allocatable :: T(:,:,:), U(:,:,:), V(:,:,:)
integer :: i_max, j_max, k_max, l_max
integer :: i_min, j_min, k_min, l_min
integer :: i, j, k, l
integer :: a, b, c, d
double precision, external :: get_ao_bielec_integral
integer(key_kind) :: idx
real(integral_kind) :: tmp
integer(key_kind), allocatable :: key(:)
real(integral_kind), allocatable :: value(:)
ASSERT (k_start == i_start)
ASSERT (l_start == j_start)
ASSERT (a_start == c_start)
ASSERT (b_start == d_start)
i_min = min(i_start,a_start)
i_max = max(i_end ,a_end )
j_min = min(j_start,b_start)
j_max = max(j_end ,b_end )
k_min = min(k_start,c_start)
k_max = max(k_end ,c_end )
l_min = min(l_start,d_start)
l_max = max(l_end ,d_end )
ASSERT (0 < i_max)
ASSERT (0 < j_max)
ASSERT (0 < k_max)
ASSERT (0 < l_max)
ASSERT (LDB >= i_max)
ASSERT (LDB >= j_max)
ASSERT (LDB >= k_max)
ASSERT (LDB >= l_max)
! Create a temporary memory-mapped file
integer :: fd
type(c_ptr) :: c_pointer
integer*8, pointer :: a_array(:,:,:)
call mmap(trim(ezfio_filename)//'/work/four_idx', &
(/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, .False., c_pointer)
call c_f_pointer(c_pointer, a_array, (/ 4, (i_end-i_start+1)*(j_end-j_start+1)*(k_end-k_start+1), l_end-l_start+1 /))
!$OMP PARALLEL DEFAULT(NONE) SHARED(a_array,c_pointer,fd, &
!$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,&
!$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,&
!$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, &
!$OMP map_a,map_c,matrix_B) &
!$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx, &
!$OMP a,b,c,d,tmp)
allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) )
allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) )
!$OMP DO SCHEDULE(dynamic,4)
do l=l_start,l_end
a = 1
do j=j_start,j_end
do k=k_start,k_end
do i=i_start,i_end
call bielec_integrals_index(i,j,k,l,idx)
call map_get(map_a,idx,tmp)
if (tmp /= 0.d0) then
a = a+1
a_array(1,a,l-l_start+1) = i
a_array(2,a,l-l_start+1) = j
a_array(3,a,l-l_start+1) = k
a_array(4,a,l-l_start+1) = transfer(dble(tmp), 1_8)
endif
enddo
enddo
enddo
a_array(1,1,l-l_start+1) = a
print *, l
enddo
!$OMP END DO
!$OMP DO SCHEDULE(dynamic)
do d=d_start,d_end
U = 0.d0
do l=l_start,l_end
if (dabs(matrix_B(l,d)) < 1.d-10) then
cycle
endif
print *, d, l
allocate( T(i_start:i_end, k_start:k_end, j_start:j_end), &
V(a_start:a_end, k_start:k_end, j_start:j_end) )
T = 0.d0
do a=2,a_array(1,1,l-l_start+1)
i = a_array(1,a,l-l_start+1)
j = a_array(2,a,l-l_start+1)
k = a_array(3,a,l-l_start+1)
T(i, k,j) = transfer(a_array(4,a,l-l_start+1), 1.d0)
enddo
call DGEMM('T','N', (a_end-a_start+1), &
(k_end-k_start+1)*(j_end-j_start+1), &
(i_end-i_start+1), 1.d0, &
matrix_B(i_start,a_start), size(matrix_B,1), &
T(i_start,k_start,j_start), size(T,1), 0.d0, &
V(a_start,k_start,j_start), size(V, 1) )
deallocate(T)
allocate( T(a_start:a_end, k_start:k_end, b_start:d) )
call DGEMM('N','N', (a_end-a_start+1)*(k_end-k_start+1), &
(b_end-b_start+1), &
(j_end-j_start+1), 1.d0, &
V(a_start,k_start,j_start), size(V,1)*size(V,2), &
matrix_B(j_start,b_start), size(matrix_B,1),0.d0, &
T(a_start,k_start,b_start), size(T,1)*size(T,2) )
deallocate(V)
do b=b_start,b_end
call DGEMM('N','N', (a_end-a_start+1), (c_end-c_start+1), &
(k_end-k_start+1), matrix_B(l, d), &
T(a_start,k_start,b), size(T,1), &
matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, &
U(a_start,c_start,b), size(U,1) )
enddo
deallocate(T)
enddo
idx = 0_8
do b=b_start,b_end
do c=c_start,c_end
do a=a_start,a_end
if (dabs(U(a,c,b)) < 1.d-15) then
cycle
endif
idx = idx+1_8
call bielec_integrals_index(a,b,c,d,key(idx))
value(idx) = U(a,c,b)
enddo
enddo
enddo
!$OMP CRITICAL
call map_append(map_c, key, value, idx)
call map_sort(map_c)
!$OMP END CRITICAL
enddo
!$OMP END DO
deallocate(key,value)
!$OMP END PARALLEL
call munmap( &
(/ 4_8,int(i_end-i_start+1,8),int(j_end-j_start+1,8),int(k_end-k_start+1,8), int(l_end-l_start+1,8) /), 8, fd, c_pointer)
end

View File

@ -0,0 +1,300 @@
subroutine four_index_transform_block(map_a,map_c,matrix_B,LDB, &
i_start, j_start, k_start, l_start, &
i_end , j_end , k_end , l_end , &
a_start, b_start, c_start, d_start, &
a_end , b_end , c_end , d_end )
implicit none
use map_module
use mmap_module
BEGIN_DOC
! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM)
! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld}
! Loops run over *_start->*_end
END_DOC
type(map_type), intent(in) :: map_a
type(map_type), intent(inout) :: map_c
integer, intent(in) :: LDB
double precision, intent(in) :: matrix_B(LDB,*)
integer, intent(in) :: i_start, j_start, k_start, l_start
integer, intent(in) :: i_end , j_end , k_end , l_end
integer, intent(in) :: a_start, b_start, c_start, d_start
integer, intent(in) :: a_end , b_end , c_end , d_end
double precision, allocatable :: T(:,:), U(:,:,:), V(:,:)
double precision, allocatable :: T2d(:,:), V2d(:,:)
integer :: i_max, j_max, k_max, l_max
integer :: i_min, j_min, k_min, l_min
integer :: i, j, k, l, ik, ll
integer :: l_start_block, l_end_block, l_block
integer :: a, b, c, d
double precision, external :: get_ao_bielec_integral
integer*8 :: ii
integer(key_kind) :: idx
real(integral_kind) :: tmp
integer(key_kind), allocatable :: key(:)
real(integral_kind), allocatable :: value(:)
integer*8, allocatable :: l_pointer(:)
ASSERT (k_start == i_start)
ASSERT (l_start == j_start)
ASSERT (a_start == c_start)
ASSERT (b_start == d_start)
i_min = min(i_start,a_start)
i_max = max(i_end ,a_end )
j_min = min(j_start,b_start)
j_max = max(j_end ,b_end )
k_min = min(k_start,c_start)
k_max = max(k_end ,c_end )
l_min = min(l_start,d_start)
l_max = max(l_end ,d_end )
ASSERT (0 < i_max)
ASSERT (0 < j_max)
ASSERT (0 < k_max)
ASSERT (0 < l_max)
ASSERT (LDB >= i_max)
ASSERT (LDB >= j_max)
ASSERT (LDB >= k_max)
ASSERT (LDB >= l_max)
integer*4, allocatable :: a_array_ik(:)
integer*4, allocatable :: a_array_j(:)
double precision, allocatable :: a_array_value(:)
integer*8 :: new_size
new_size = max(1024_8, 5_8 * map_a % n_elements )
allocate(a_array_ik(new_size), a_array_j(new_size), a_array_value(new_size))
integer :: ipass, npass
integer*8 :: tempspace
tempspace = (new_size * 16_8) / (1024_8 * 1024_8)
npass = min(int(l_end-l_start,8),1_8 + tempspace / 2048_8) ! 2 GiB of scratch space
l_block = (l_end-l_start+1)/npass
ipass = 0
do l_start_block = l_start, l_end, l_block
ipass = ipass+1
print *, 'Pass ', ipass
l_end_block = min(l_end, l_start_block+l_block-1)
allocate(l_pointer(l_start_block:l_end_block+1), value((i_max*k_max)) )
ii = 1_8
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx)
do l=l_start_block,l_end_block
!$OMP SINGLE
l_pointer(l) = ii
!$OMP END SINGLE
do j=j_start,j_end
!$OMP DO SCHEDULE(static,16)
do k=k_start,k_end
do i=i_start,k
ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 )
call bielec_integrals_index(i,j,k,l,idx)
call map_get(map_a,idx,value(ik))
enddo
enddo
!$OMP END DO
!$OMP SINGLE
ik=0
do k=k_start,k_end
do i=i_start,k
ik = ik+1
tmp=value(ik)
if (tmp /= 0.d0) then
a_array_ik(ii) = ik
a_array_j(ii) = j
a_array_value(ii) = tmp
ii=ii+1_8
endif
enddo
enddo
!$OMP END SINGLE
enddo
enddo
!$OMP SINGLE
a_array_ik(ii) = 0
a_array_j(ii) = 0
a_array_value(ii) = 0.d0
l_pointer(l_end_block+1) = ii
!$OMP END SINGLE
!$OMP END PARALLEL
deallocate(value)
!INPUT DATA
!open(unit=10,file='INPUT',form='UNFORMATTED')
!write(10) i_start, j_start, i_end, j_end
!write(10) a_start, b_start, a_end, b_end
!write(10) LDB, mo_tot_num
!write(10) matrix_B(1:LDB,1:mo_tot_num)
!idx=size(a_array)
!write(10) idx
!write(10) a_array
!write(10) l_pointer
!close(10)
!open(unit=10,file='OUTPUT',form='FORMATTED')
! END INPUT DATA
!$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value,&
!$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,&
!$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start_block,l_end_block,&
!$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, &
!$OMP map_c,matrix_B,l_pointer) &
!$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, &
!$OMP a,b,c,d,tmp,T2d,V2d,ii,p,q)
allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) )
allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) )
allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), &
V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), &
V(i_start:i_end, k_start:k_end), &
T(k_start:k_end, a_start:a_end))
!$OMP DO SCHEDULE(dynamic)
do d=d_start,d_end
U = 0.d0
do l=l_start_block,l_end_block
if (dabs(matrix_B(l,d)) < 1.d-10) then
cycle
endif
ii=l_pointer(l)
do j=j_start,j_end
!DIR$ VECTOR NONTEMPORAL
T2d(:,j) = 0.d0
!DIR$ IVDEP
do while (j == a_array_j(ii))
T2d(a_array_ik(ii),j) = a_array_value(ii)
ii = ii + 1_8
enddo
enddo
call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),&
(d-b_start+1), &
(j_end-j_start+1), 1.d0, &
T2d(1,j_start), size(T2d,1), &
matrix_B(j_start,b_start), size(matrix_B,1),0.d0, &
V2d(1,b_start), size(V2d,1) )
do b=b_start,d
ik = 0
do k=k_start,k_end
do i=i_start,k
ik = ik+1
V(i,k) = V2d(ik,b)
enddo
enddo
! T = 0.d0
! do a=a_start,b
! do k=k_start,k_end
! do i=i_start,k
! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a)
! enddo
! do i=k+1,i_end
! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a)
! enddo
! enddo
! enddo
call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), &
1.d0, &
V(i_start,k_start), size(V,1), &
matrix_B(i_start,a_start), size(matrix_B,1),0.d0, &
T(k_start,a_start), size(T,1) )
! do c=c_start,b
! do a=a_start,c
! do k=k_start,k_end
! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d)
! enddo
! enddo
! enddo
call DGEMM('T','N', (b-a_start+1), (b-c_start+1), &
(k_end-k_start+1), matrix_B(l, d), &
T(k_start,a_start), size(T,1), &
matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, &
U(a_start,c_start,b), size(U,1) )
! do c=b+1,c_end
! do a=a_start,b
! do k=k_start,k_end
! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d)
! enddo
! enddo
! enddo
if (b < b_end) then
call DGEMM('T','N', (b-a_start+1), (c_end-b), &
(k_end-k_start+1), matrix_B(l, d), &
T(k_start,a_start), size(T,1), &
matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, &
U(a_start,b+1,b), size(U,1) )
endif
enddo
enddo
idx = 0_8
integer :: p, q
do b=b_start,d
q = b+ishft(d*d-d,-1)
do c=c_start,c_end
p = a_start+ishft(c*c-c,-1)
do a=a_start,min(b,c)
if (dabs(U(a,c,b)) < 1.d-15) then
cycle
endif
if ((a==b).and.(p>q)) cycle
p = p+1
idx = idx+1_8
call bielec_integrals_index(a,b,c,d,key(idx))
!print *, int(key(idx),4), int(a,2),int(b,2),int(c,2),int(d,2), p, q
value(idx) = U(a,c,b)
enddo
enddo
enddo
!$OMP CRITICAL
call map_update(map_c, key, value, idx,1.d-15)
!$OMP END CRITICAL
!WRITE OUTPUT
! OMP CRITICAL
!print *, d
!do b=b_start,d
! do c=c_start,c_end
! do a=a_start,min(b,c)
! if (dabs(U(a,c,b)) < 1.d-15) then
! cycle
! endif
! write(10,*) d,c,b,a,U(a,c,b)
! enddo
! enddo
!enddo
! OMP END CRITICAL
!END WRITE OUTPUT
enddo
!$OMP END DO
deallocate(key,value,V,T)
!$OMP END PARALLEL
call map_merge(map_c)
deallocate(l_pointer)
enddo
deallocate(a_array_ik,a_array_j,a_array_value)
end

View File

@ -0,0 +1,279 @@
subroutine four_index_transform_slave(map_a,map_c,matrix_B,LDB, &
i_start, j_start, k_start, l_start, &
i_end , j_end , k_end , l_end , &
a_start, b_start, c_start, d_start, &
a_end , b_end , c_end , d_end, task_id, thread )
implicit none
use f77_zmq
use map_module
use mmap_module
BEGIN_DOC
! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM)
! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld}
! Loops run over *_start->*_end
END_DOC
type(map_type), intent(in) :: map_a
type(map_type), intent(inout) :: map_c
integer, intent(in) :: LDB
double precision, intent(in) :: matrix_B(LDB,*)
integer, intent(in) :: i_start, j_start, k_start, l_start
integer, intent(in) :: i_end , j_end , k_end , l_end
integer, intent(in) :: a_start, b_start, c_start, d_start
integer, intent(in) :: a_end , b_end , c_end , d_end
integer, intent(in) :: task_id, thread
double precision, allocatable :: T(:,:), U(:,:,:), V(:,:)
double precision, allocatable :: T2d(:,:), V2d(:,:)
integer :: i_max, j_max, k_max, l_max
integer :: i_min, j_min, k_min, l_min
integer :: i, j, k, l, ik, ll
integer :: a, b, c, d
double precision, external :: get_ao_bielec_integral
integer*8 :: ii
integer(key_kind) :: idx
real(integral_kind) :: tmp
integer(key_kind), allocatable :: key(:)
real(integral_kind), allocatable :: value(:)
integer*8, allocatable :: l_pointer(:)
ASSERT (k_start == i_start)
ASSERT (l_start == j_start)
ASSERT (a_start == c_start)
ASSERT (b_start == d_start)
i_min = min(i_start,a_start)
i_max = max(i_end ,a_end )
j_min = min(j_start,b_start)
j_max = max(j_end ,b_end )
k_min = min(k_start,c_start)
k_max = max(k_end ,c_end )
l_min = min(l_start,d_start)
l_max = max(l_end ,d_end )
ASSERT (0 < i_max)
ASSERT (0 < j_max)
ASSERT (0 < k_max)
ASSERT (0 < l_max)
ASSERT (LDB >= i_max)
ASSERT (LDB >= j_max)
ASSERT (LDB >= k_max)
ASSERT (LDB >= l_max)
integer*4, allocatable :: a_array_ik(:)
integer*2, allocatable :: a_array_j(:)
double precision, allocatable :: a_array_value(:)
integer*8 :: new_size
new_size = max(1024_8, 5_8 * map_a % n_elements )
allocate(a_array_ik(new_size), a_array_j(new_size), a_array_value(new_size))
allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) )
ii = 1_8
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx)
do l=l_start,l_end
!$OMP SINGLE
l_pointer(l) = ii
!$OMP END SINGLE
do j=j_start,j_end
!$OMP DO SCHEDULE(static,1)
do k=k_start,k_end
do i=i_start,k
ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 )
call bielec_integrals_index(i,j,k,l,idx)
call map_get(map_a,idx,value(ik))
enddo
enddo
!$OMP END DO
!$OMP SINGLE
ik=0
do k=k_start,k_end
do i=i_start,k
ik = ik+1
tmp=value(ik)
if (tmp /= 0.d0) then
a_array_ik(ii) = ik
a_array_j(ii) = j
a_array_value(ii) = tmp
ii=ii+1_8
endif
enddo
enddo
!$OMP END SINGLE
enddo
enddo
!$OMP SINGLE
a_array_ik(ii) = 0
a_array_j(ii) = 0
a_array_value(ii) = 0.d0
l_pointer(l_end+1) = ii
!$OMP END SINGLE
!$OMP END PARALLEL
deallocate(value)
!INPUT DATA
!open(unit=10,file='INPUT',form='UNFORMATTED')
!write(10) i_start, j_start, i_end, j_end
!write(10) a_start, b_start, a_end, b_end
!write(10) LDB, mo_tot_num
!write(10) matrix_B(1:LDB,1:mo_tot_num)
!idx=size(a_array)
!write(10) idx
!write(10) a_array
!write(10) l_pointer
!close(10)
!open(unit=10,file='OUTPUT',form='FORMATTED')
! END INPUT DATA
!$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, &
!$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,&
!$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,&
!$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, &
!$OMP map_c,matrix_B,l_pointer) &
!$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, &
!$OMP a,b,c,d,tmp,T2d,V2d,ii)
allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) )
allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) )
integer(ZMQ_PTR) :: zmq_socket_push
zmq_socket_push = new_zmq_push_socket(thread)
allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), &
V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), &
V(i_start:i_end, k_start:k_end), &
T(k_start:k_end, a_start:a_end))
!$OMP DO SCHEDULE(dynamic)
do d=d_start,d_end
U = 0.d0
do l=l_start,l_end
if (dabs(matrix_B(l,d)) < 1.d-10) then
cycle
endif
ii=l_pointer(l)
do j=j_start,j_end
!DIR$ VECTOR NONTEMPORAL
T2d(:,j) = 0.d0
!DIR$ IVDEP
do while (j == a_array_j(ii))
T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0)
ii = ii + 1_8
enddo
enddo
call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),&
(d-b_start+1), &
(j_end-j_start+1), 1.d0, &
T2d(1,j_start), size(T2d,1), &
matrix_B(j_start,b_start), size(matrix_B,1),0.d0, &
V2d(1,b_start), size(V2d,1) )
do b=b_start,d
ik = 0
do k=k_start,k_end
do i=i_start,k
ik = ik+1
V(i,k) = V2d(ik,b)
enddo
enddo
! T = 0.d0
! do a=a_start,b
! do k=k_start,k_end
! do i=i_start,k
! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a)
! enddo
! do i=k+1,i_end
! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a)
! enddo
! enddo
! enddo
call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), &
1.d0, &
V(i_start,k_start), size(V,1), &
matrix_B(i_start,a_start), size(matrix_B,1),0.d0, &
T(k_start,a_start), size(T,1) )
! do c=c_start,b
! do a=a_start,c
! do k=k_start,k_end
! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d)
! enddo
! enddo
! enddo
call DGEMM('T','N', (b-a_start+1), (b-c_start+1), &
(k_end-k_start+1), matrix_B(l, d), &
T(k_start,a_start), size(T,1), &
matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, &
U(a_start,c_start,b), size(U,1) )
! do c=b+1,c_end
! do a=a_start,b
! do k=k_start,k_end
! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d)
! enddo
! enddo
! enddo
if (b < b_end) then
call DGEMM('T','N', (b-a_start+1), (c_end-b), &
(k_end-k_start+1), matrix_B(l, d), &
T(k_start,a_start), size(T,1), &
matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, &
U(a_start,b+1,b), size(U,1) )
endif
enddo
enddo
idx = 0_8
do b=b_start,d
do c=c_start,c_end
do a=a_start,min(b,c)
if (dabs(U(a,c,b)) < 1.d-15) then
cycle
endif
idx = idx+1_8
call bielec_integrals_index(a,b,c,d,key(idx))
value(idx) = U(a,c,b)
enddo
enddo
enddo
!$OMP CRITICAL
call four_idx_push_results(zmq_socket_push, key, value, idx, task_id)
!$OMP END CRITICAL
!WRITE OUTPUT
! OMP CRITICAL
!print *, d
!do b=b_start,d
! do c=c_start,c_end
! do a=a_start,min(b,c)
! if (dabs(U(a,c,b)) < 1.d-15) then
! cycle
! endif
! write(10,*) d,c,b,a,U(a,c,b)
! enddo
! enddo
!enddo
! OMP END CRITICAL
!END WRITE OUTPUT
enddo
!$OMP END DO
call end_zmq_push_socket(zmq_socket_push,thread)
deallocate(key,value,V,T)
!$OMP END PARALLEL
call map_merge(map_c)
deallocate(l_pointer)
deallocate(a_array_ik,a_array_j,a_array_value)
end

View File

@ -0,0 +1,293 @@
subroutine four_index_transform_sym(map_a,map_c,matrix_B,LDB, &
i_start, j_start, k_start, l_start, &
i_end , j_end , k_end , l_end , &
a_start, b_start, c_start, d_start, &
a_end , b_end , c_end , d_end )
implicit none
use map_module
use mmap_module
BEGIN_DOC
! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM)
! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld}
! Loops run over *_start->*_end
END_DOC
type(map_type), intent(in) :: map_a
type(map_type), intent(inout) :: map_c
integer, intent(in) :: LDB
double precision, intent(in) :: matrix_B(LDB,*)
integer, intent(in) :: i_start, j_start, k_start, l_start
integer, intent(in) :: i_end , j_end , k_end , l_end
integer, intent(in) :: a_start, b_start, c_start, d_start
integer, intent(in) :: a_end , b_end , c_end , d_end
double precision, allocatable :: T(:,:), U(:,:,:), V(:,:)
double precision, allocatable :: T2d(:,:), V2d(:,:)
integer :: i_max, j_max, k_max, l_max
integer :: i_min, j_min, k_min, l_min
integer :: i, j, k, l, ik, ll
integer :: a, b, c, d
double precision, external :: get_ao_bielec_integral
integer*8 :: ii
integer(key_kind) :: idx
real(integral_kind) :: tmp
integer(key_kind), allocatable :: key(:)
real(integral_kind), allocatable :: value(:)
integer*8, allocatable :: l_pointer(:)
ASSERT (k_start == i_start)
ASSERT (l_start == j_start)
ASSERT (a_start == c_start)
ASSERT (b_start == d_start)
i_min = min(i_start,a_start)
i_max = max(i_end ,a_end )
j_min = min(j_start,b_start)
j_max = max(j_end ,b_end )
k_min = min(k_start,c_start)
k_max = max(k_end ,c_end )
l_min = min(l_start,d_start)
l_max = max(l_end ,d_end )
ASSERT (0 < i_max)
ASSERT (0 < j_max)
ASSERT (0 < k_max)
ASSERT (0 < l_max)
ASSERT (LDB >= i_max)
ASSERT (LDB >= j_max)
ASSERT (LDB >= k_max)
ASSERT (LDB >= l_max)
! Create a temporary memory-mapped file
integer :: fd(3)
type(c_ptr) :: c_pointer(3)
integer*4, pointer :: a_array_ik(:)
integer*2, pointer :: a_array_j(:)
double precision, pointer :: a_array_value(:)
integer*8 :: new_size
new_size = max(1024_8, 5_8 * map_a % n_elements )
call mmap(trim(ezfio_filename)//'/work/four_idx_ik', (/ new_size /), 4, fd(1), .False., c_pointer(1))
call c_f_pointer(c_pointer(1), a_array_ik, (/ new_size /))
call mmap(trim(ezfio_filename)//'/work/four_idx_j', (/ new_size /), 2, fd(2), .False., c_pointer(2))
call c_f_pointer(c_pointer(2), a_array_j, (/ new_size /))
call mmap(trim(ezfio_filename)//'/work/four_idx_value', (/ new_size /), 8, fd(3), .False., c_pointer(3))
call c_f_pointer(c_pointer(3), a_array_value, (/ new_size /))
print *, 'Transforming MO integrals'
allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) )
ii = 1_8
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx)
do l=l_start,l_end
!$OMP SINGLE
l_pointer(l) = ii
!$OMP END SINGLE
do j=j_start,j_end
!$OMP DO SCHEDULE(static,1)
do k=k_start,k_end
do i=i_start,k
ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 )
call bielec_integrals_index(i,j,k,l,idx)
call map_get(map_a,idx,value(ik))
enddo
enddo
!$OMP END DO
!$OMP SINGLE
ik=0
do k=k_start,k_end
do i=i_start,k
ik = ik+1
tmp=value(ik)
if (tmp /= 0.d0) then
a_array_ik(ii) = ik
a_array_j(ii) = j
a_array_value(ii) = tmp
ii=ii+1_8
endif
enddo
enddo
!$OMP END SINGLE
enddo
enddo
!$OMP SINGLE
a_array_ik(ii) = 0
a_array_j(ii) = 0
a_array_value(ii) = 0.d0
l_pointer(l_end+1) = ii
!$OMP END SINGLE
!$OMP END PARALLEL
deallocate(value)
!INPUT DATA
!open(unit=10,file='INPUT',form='UNFORMATTED')
!write(10) i_start, j_start, i_end, j_end
!write(10) a_start, b_start, a_end, b_end
!write(10) LDB, mo_tot_num
!write(10) matrix_B(1:LDB,1:mo_tot_num)
!idx=size(a_array)
!write(10) idx
!write(10) a_array
!write(10) l_pointer
!close(10)
!open(unit=10,file='OUTPUT',form='FORMATTED')
! END INPUT DATA
!$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value,c_pointer,fd, &
!$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,&
!$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,&
!$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, &
!$OMP map_c,matrix_B,l_pointer) &
!$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, &
!$OMP a,b,c,d,tmp,T2d,V2d,ii)
allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) )
allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) )
allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), &
V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), &
V(i_start:i_end, k_start:k_end), &
T(k_start:k_end, a_start:a_end))
!$OMP DO SCHEDULE(dynamic)
do d=d_start,d_end
print *, d, '/', d_end
U = 0.d0
do l=l_start,l_end
if (dabs(matrix_B(l,d)) < 1.d-10) then
cycle
endif
ii=l_pointer(l)
do j=j_start,j_end
!DIR$ VECTOR NONTEMPORAL
T2d(:,j) = 0.d0
!DIR$ IVDEP
do while (j == a_array_j(ii))
T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0)
ii = ii + 1_8
enddo
enddo
call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),&
(d-b_start+1), &
(j_end-j_start+1), 1.d0, &
T2d(1,j_start), size(T2d,1), &
matrix_B(j_start,b_start), size(matrix_B,1),0.d0, &
V2d(1,b_start), size(V2d,1) )
do b=b_start,d
ik = 0
do k=k_start,k_end
do i=i_start,k
ik = ik+1
V(i,k) = V2d(ik,b)
enddo
enddo
! T = 0.d0
! do a=a_start,b
! do k=k_start,k_end
! do i=i_start,k
! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a)
! enddo
! do i=k+1,i_end
! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a)
! enddo
! enddo
! enddo
call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), &
1.d0, &
V(i_start,k_start), size(V,1), &
matrix_B(i_start,a_start), size(matrix_B,1),0.d0, &
T(k_start,a_start), size(T,1) )
! do c=c_start,b
! do a=a_start,c
! do k=k_start,k_end
! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d)
! enddo
! enddo
! enddo
call DGEMM('T','N', (b-a_start+1), (b-c_start+1), &
(k_end-k_start+1), matrix_B(l, d), &
T(k_start,a_start), size(T,1), &
matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, &
U(a_start,c_start,b), size(U,1) )
! do c=b+1,c_end
! do a=a_start,b
! do k=k_start,k_end
! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d)
! enddo
! enddo
! enddo
if (b < b_end) then
call DGEMM('T','N', (b-a_start+1), (c_end-b), &
(k_end-k_start+1), matrix_B(l, d), &
T(k_start,a_start), size(T,1), &
matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, &
U(a_start,b+1,b), size(U,1) )
endif
enddo
enddo
idx = 0_8
do b=b_start,d
do c=c_start,c_end
do a=a_start,min(b,c)
if (dabs(U(a,c,b)) < 1.d-15) then
cycle
endif
idx = idx+1_8
call bielec_integrals_index(a,b,c,d,key(idx))
value(idx) = U(a,c,b)
enddo
enddo
enddo
!$OMP CRITICAL
call map_update(map_c, key, value, idx,1.d-15)
!$OMP END CRITICAL
!WRITE OUTPUT
! OMP CRITICAL
!print *, d
!do b=b_start,d
! do c=c_start,c_end
! do a=a_start,min(b,c)
! if (dabs(U(a,c,b)) < 1.d-15) then
! cycle
! endif
! write(10,*) d,c,b,a,U(a,c,b)
! enddo
! enddo
!enddo
! OMP END CRITICAL
!END WRITE OUTPUT
enddo
!$OMP END DO
deallocate(key,value,V,T)
!$OMP END PARALLEL
call map_merge(map_c)
call munmap( (/ new_size /), 4, fd(1), c_pointer(1))
open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_ik')
close(10,status='DELETE')
call munmap( (/ new_size /), 2, fd(2), c_pointer(2))
open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_j')
close(10,status='DELETE')
call munmap( (/ new_size /), 8, fd(3), c_pointer(3))
open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_value')
close(10,status='DELETE')
deallocate(l_pointer)
end

View File

@ -0,0 +1,292 @@
subroutine four_index_transform_sym_mmap(map_a,map_c,matrix_B,LDB, &
i_start, j_start, k_start, l_start, &
i_end , j_end , k_end , l_end , &
a_start, b_start, c_start, d_start, &
a_end , b_end , c_end , d_end )
implicit none
use map_module
use mmap_module
BEGIN_DOC
! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM)
! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld}
! Loops run over *_start->*_end
END_DOC
type(map_type), intent(in) :: map_a
type(map_type), intent(inout) :: map_c
integer, intent(in) :: LDB
double precision, intent(in) :: matrix_B(LDB,*)
integer, intent(in) :: i_start, j_start, k_start, l_start
integer, intent(in) :: i_end , j_end , k_end , l_end
integer, intent(in) :: a_start, b_start, c_start, d_start
integer, intent(in) :: a_end , b_end , c_end , d_end
double precision, allocatable :: T(:,:), U(:,:,:), V(:,:)
double precision, allocatable :: T2d(:,:), V2d(:,:)
integer :: i_max, j_max, k_max, l_max
integer :: i_min, j_min, k_min, l_min
integer :: i, j, k, l, ik, ll
integer :: a, b, c, d
double precision, external :: get_ao_bielec_integral
integer*8 :: ii
integer(key_kind) :: idx
real(integral_kind) :: tmp
integer(key_kind), allocatable :: key(:)
real(integral_kind), allocatable :: value(:)
integer*8, allocatable :: l_pointer(:)
ASSERT (k_start == i_start)
ASSERT (l_start == j_start)
ASSERT (a_start == c_start)
ASSERT (b_start == d_start)
i_min = min(i_start,a_start)
i_max = max(i_end ,a_end )
j_min = min(j_start,b_start)
j_max = max(j_end ,b_end )
k_min = min(k_start,c_start)
k_max = max(k_end ,c_end )
l_min = min(l_start,d_start)
l_max = max(l_end ,d_end )
ASSERT (0 < i_max)
ASSERT (0 < j_max)
ASSERT (0 < k_max)
ASSERT (0 < l_max)
ASSERT (LDB >= i_max)
ASSERT (LDB >= j_max)
ASSERT (LDB >= k_max)
ASSERT (LDB >= l_max)
! Create a temporary memory-mapped file
integer :: fd(3)
type(c_ptr) :: c_pointer(3)
integer*4, pointer :: a_array_ik(:)
integer*2, pointer :: a_array_j(:)
double precision, pointer :: a_array_value(:)
integer*8 :: new_size
new_size = max(1024_8, 5_8 * map_a % n_elements )
call mmap(trim(ezfio_filename)//'/work/four_idx_ik', (/ new_size /), 4, fd(1), .False., c_pointer(1))
call c_f_pointer(c_pointer(1), a_array_ik, (/ new_size /))
call mmap(trim(ezfio_filename)//'/work/four_idx_j', (/ new_size /), 2, fd(2), .False., c_pointer(2))
call c_f_pointer(c_pointer(2), a_array_j, (/ new_size /))
call mmap(trim(ezfio_filename)//'/work/four_idx_value', (/ new_size /), 8, fd(3), .False., c_pointer(3))
call c_f_pointer(c_pointer(3), a_array_value, (/ new_size /))
print *, 'Transforming MO integrals'
allocate(l_pointer(l_start:l_end+1), value((i_max*k_max)) )
ii = 1_8
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,ik,idx)
do l=l_start,l_end
!$OMP SINGLE
l_pointer(l) = ii
!$OMP END SINGLE
do j=j_start,j_end
!$OMP DO SCHEDULE(static,1)
do k=k_start,k_end
do i=i_start,k
ik = (i-i_start+1) + ishft( (k-k_start)*(k-k_start+1), -1 )
call bielec_integrals_index(i,j,k,l,idx)
call map_get(map_a,idx,value(ik))
enddo
enddo
!$OMP END DO
!$OMP SINGLE
ik=0
do k=k_start,k_end
do i=i_start,k
ik = ik+1
tmp=value(ik)
if (tmp /= 0.d0) then
a_array_ik(ii) = ik
a_array_j(ii) = j
a_array_value(ii) = tmp
ii=ii+1_8
endif
enddo
enddo
!$OMP END SINGLE
enddo
enddo
!$OMP SINGLE
a_array_ik(ii) = 0
a_array_j(ii) = 0
a_array_value(ii) = 0.d0
l_pointer(l_end+1) = ii
!$OMP END SINGLE
!$OMP END PARALLEL
deallocate(value)
!INPUT DATA
!open(unit=10,file='INPUT',form='UNFORMATTED')
!write(10) i_start, j_start, i_end, j_end
!write(10) a_start, b_start, a_end, b_end
!write(10) LDB, mo_tot_num
!write(10) matrix_B(1:LDB,1:mo_tot_num)
!idx=size(a_array)
!write(10) idx
!write(10) a_array
!write(10) l_pointer
!close(10)
!open(unit=10,file='OUTPUT',form='FORMATTED')
! END INPUT DATA
!$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value,c_pointer,fd, &
!$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,&
!$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,&
!$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, &
!$OMP map_c,matrix_B,l_pointer) &
!$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, &
!$OMP a,b,c,d,tmp,T2d,V2d,ii)
allocate( key(i_max*j_max*k_max), value(i_max*j_max*k_max) )
allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) )
allocate( T2d((i_end-i_start+1)*(k_end-k_start+2)/2, j_start:j_end), &
V2d((i_end-i_start+1)*(k_end-k_start+2)/2, b_start:b_end), &
V(i_start:i_end, k_start:k_end), &
T(k_start:k_end, a_start:a_end))
!$OMP DO SCHEDULE(dynamic)
do d=d_start,d_end
print *, d, '/', d_end
U = 0.d0
do l=l_start,l_end
if (dabs(matrix_B(l,d)) < 1.d-10) then
cycle
endif
ii=l_pointer(l)
do j=j_start,j_end
!DIR$ VECTOR NONTEMPORAL
T2d(:,j) = 0.d0
!DIR$ IVDEP
do while (j == a_array_j(ii))
T2d(a_array_ik(ii),j) = transfer(a_array_value(ii), 1.d0)
ii = ii + 1_8
enddo
enddo
call DGEMM('N','N', ishft( (i_end-i_start+1)*(i_end-i_start+2), -1),&
(d-b_start+1), &
(j_end-j_start+1), 1.d0, &
T2d(1,j_start), size(T2d,1), &
matrix_B(j_start,b_start), size(matrix_B,1),0.d0, &
V2d(1,b_start), size(V2d,1) )
do b=b_start,d
ik = 0
do k=k_start,k_end
do i=i_start,k
ik = ik+1
V(i,k) = V2d(ik,b)
enddo
enddo
! T = 0.d0
! do a=a_start,b
! do k=k_start,k_end
! do i=i_start,k
! T(k,a) = T(k,a) + V(i,k)*matrix_B(i,a)
! enddo
! do i=k+1,i_end
! T(k,a) = T(k,a) + V(k,i)*matrix_B(i,a)
! enddo
! enddo
! enddo
call DSYMM('L','U', (k_end-k_start+1), (b-a_start+1), &
1.d0, &
V(i_start,k_start), size(V,1), &
matrix_B(i_start,a_start), size(matrix_B,1),0.d0, &
T(k_start,a_start), size(T,1) )
! do c=c_start,b
! do a=a_start,c
! do k=k_start,k_end
! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d)
! enddo
! enddo
! enddo
call DGEMM('T','N', (b-a_start+1), (b-c_start+1), &
(k_end-k_start+1), matrix_B(l, d), &
T(k_start,a_start), size(T,1), &
matrix_B(k_start,c_start), size(matrix_B,1), 1.d0, &
U(a_start,c_start,b), size(U,1) )
! do c=b+1,c_end
! do a=a_start,b
! do k=k_start,k_end
! U(a,c,b) = U(a,c,b) + T(k,a)*matrix_B(k,c)*matrix_B(l,d)
! enddo
! enddo
! enddo
if (b < b_end) then
call DGEMM('T','N', (b-a_start+1), (c_end-b), &
(k_end-k_start+1), matrix_B(l, d), &
T(k_start,a_start), size(T,1), &
matrix_B(k_start,b+1), size(matrix_B,1), 1.d0, &
U(a_start,b+1,b), size(U,1) )
endif
enddo
enddo
idx = 0_8
do b=b_start,d
do c=c_start,c_end
do a=a_start,min(b,c)
if (dabs(U(a,c,b)) < 1.d-15) then
cycle
endif
idx = idx+1_8
call bielec_integrals_index(a,b,c,d,key(idx))
value(idx) = U(a,c,b)
enddo
enddo
enddo
!$OMP CRITICAL
call map_append(map_c, key, value, idx)
!$OMP END CRITICAL
!WRITE OUTPUT
! OMP CRITICAL
!print *, d
!do b=b_start,d
! do c=c_start,c_end
! do a=a_start,min(b,c)
! if (dabs(U(a,c,b)) < 1.d-15) then
! cycle
! endif
! write(10,*) d,c,b,a,U(a,c,b)
! enddo
! enddo
!enddo
! OMP END CRITICAL
!END WRITE OUTPUT
enddo
!$OMP END DO
deallocate(key,value,V,T)
!$OMP END PARALLEL
call map_sort(map_c)
call munmap( (/ new_size /), 4, fd(1), c_pointer(1))
open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_ik')
close(10,status='DELETE')
call munmap( (/ new_size /), 2, fd(2), c_pointer(2))
open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_j')
close(10,status='DELETE')
call munmap( (/ new_size /), 8, fd(3), c_pointer(3))
open(unit=10,file=trim(ezfio_filename)//'/work/four_idx_value')
close(10,status='DELETE')
deallocate(l_pointer)
end

View File

@ -0,0 +1,273 @@
subroutine four_index_transform_zmq(map_a,map_c,matrix_B,LDB, &
i_start, j_start, k_start, l_start, &
i_end , j_end , k_end , l_end , &
a_start, b_start, c_start, d_start, &
a_end , b_end , c_end , d_end )
implicit none
use f77_zmq
use map_module
BEGIN_DOC
! Performs a four-index transformation of map_a(N^4) into map_c(M^4) using b(NxM)
! C_{abcd} = \sum_{ijkl} A_{ijkl}.B_{ia}.B_{jb}.B_{kc}.B_{ld}
! Loops run over *_start->*_end
END_DOC
type(map_type), intent(in) :: map_a
type(map_type), intent(inout) :: map_c
integer, intent(in) :: LDB
double precision, intent(in) :: matrix_B(LDB,*)
integer, intent(in) :: i_start, j_start, k_start, l_start
integer, intent(in) :: i_end , j_end , k_end , l_end
integer, intent(in) :: a_start, b_start, c_start, d_start
integer, intent(in) :: a_end , b_end , c_end , d_end
double precision, allocatable :: T(:,:), U(:,:,:), V(:,:)
double precision, allocatable :: T2d(:,:), V2d(:,:)
integer :: i_max, j_max, k_max, l_max
integer :: i_min, j_min, k_min, l_min
integer :: i, j, k, l, ik, ll
integer :: l_start_block, l_end_block, l_block
integer :: a, b, c, d
double precision, external :: get_ao_bielec_integral
integer*8 :: ii
integer(key_kind) :: idx
real(integral_kind) :: tmp
integer(key_kind), allocatable :: key(:)
real(integral_kind), allocatable :: value(:)
integer*8, allocatable :: l_pointer(:)
ASSERT (k_start == i_start)
ASSERT (l_start == j_start)
ASSERT (a_start == c_start)
ASSERT (b_start == d_start)
i_min = min(i_start,a_start)
i_max = max(i_end ,a_end )
j_min = min(j_start,b_start)
j_max = max(j_end ,b_end )
k_min = min(k_start,c_start)
k_max = max(k_end ,c_end )
l_min = min(l_start,d_start)
l_max = max(l_end ,d_end )
ASSERT (0 < i_max)
ASSERT (0 < j_max)
ASSERT (0 < k_max)
ASSERT (0 < l_max)
ASSERT (LDB >= i_max)
ASSERT (LDB >= j_max)
ASSERT (LDB >= k_max)
ASSERT (LDB >= l_max)
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
call new_parallel_job(zmq_to_qp_run_socket,'four_idx')
integer*8 :: new_size
new_size = max(1024_8, 5_8 * map_a % n_elements )
integer :: npass
integer*8 :: tempspace
tempspace = (new_size * 14_8) / (1024_8 * 1024_8)
npass = min(l_end-l_start,1 + tempspace / 2048) ! 2 GiB of scratch space
l_block = (l_end-l_start)/npass
! Create tasks
! ============
character(len=64), allocatable :: task
do l_start_block = l_start, l_end, l_block
l_end_block = min(l_end, l_start_block+l_block-1)
write(task,'I10,X,I10') l_start_block, l_end_block
call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task))
enddo
call zmq_set_running(zmq_to_qp_run_socket)
PROVIDE nproc
call omp_set_nested(.True.)
integer :: ithread
!$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread)
ithread = omp_get_thread_num()
if (ithread==0) then
call four_idx_collector(zmq_to_qp_run_socket,map_c)
else
!TODO : Put strings of map_a and matrix_b on server and broadcast
call four_index_transform_slave_inproc(map_a,map_c,matrix_B,LDB, &
i_start, j_start, k_start, l_start_block, &
i_end , j_end , k_end , l_end_block , &
a_start, b_start, c_start, d_start, &
a_end , b_end , c_end , d_end, 1 )
endif
!$OMP END PARALLEL
call end_parallel_job(zmq_to_qp_run_socket, 'four_idx')
end
subroutine four_idx_slave_work(zmq_to_qp_run_socket, worker_id)
use f77_zmq
implicit none
integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket
integer,intent(in) :: worker_id
integer :: task_id
character*(512) :: msg
integer :: i_start, j_start, k_start, l_start_block
integer :: i_end , j_end , k_end , l_end_block
integer :: a_start, b_start, c_start, d_start
integer :: a_end , b_end , c_end , d_end
!TODO : get map_a and matrix_B from server
do
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg)
if(task_id == 0) exit
read (msg,*) LDB, &
i_start, j_start, k_start, l_start_block, &
i_end , j_end , k_end , l_end_block , &
a_start, b_start, c_start, d_start, &
a_end , b_end , c_end , d_end
call four_index_transform_slave(map_a,map_c,matrix_B,LDB, &
i_start, j_start, k_start, l_start_block, &
i_end , j_end , k_end , l_end_block , &
a_start, b_start, c_start, d_start, &
a_end , b_end , c_end , d_end, zmq_to_qp_run_socket, &
task_id)
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
enddo
end
BEGIN_PROVIDER [ integer, nthreads_four_idx ]
implicit none
BEGIN_DOC
! Number of threads for 4-index transformation
END_DOC
nthreads_four_idx = nproc
character*(32) :: env
call getenv('NTHREADS_FOUR_IDX',env)
if (trim(env) /= '') then
read(env,*) nthreads_four_idx
endif
call write_int(6,nthreads_davidson,'Number of threads for 4-index transformation')
END_PROVIDER
subroutine four_idx_collector(zmq_to_qp_run_socket,map_c)
use f77_zmq
use map_module
implicit none
type(map_type), intent(inout) :: map_c
integer :: more
integer(ZMQ_PTR), external :: new_zmq_pull_socket
integer(ZMQ_PTR) :: zmq_socket_pull
more = 1
zmq_socket_pull = new_zmq_pull_socket()
do while (more == 1)
call four_idx_pull_results(zmq_socket_pull, map_c, task_id)
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more)
enddo
call end_zmq_pull_socket(zmq_socket_pull)
end
subroutine four_idx_pull_results(zmq_socket_pull, map_c, task_id)
use f77_zmq
use map_module
implicit none
type(map_type), intent(inout) :: map_c
integer(ZMQ_PTR), intent(inout) :: zmq_socket_pull
integer, intent(out) :: task_id
integer :: rc, sze
integer*8 :: rc8
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
if(rc /= 4) stop "four_idx_pull_results failed to pull task_id"
rc = f77_zmq_recv( zmq_socket_pull, sze, 4, 0)
if(rc /= 4) stop "four_idx_pull_results failed to pull sze"
integer(key_kind), allocatable :: key(:)
real(integral_kind), allocatable :: value(:)
allocate(key(sze), value(sze))
rc8 = f77_zmq_recv8( zmq_socket_pull, key, key_kind*sze, 0)
if(rc8 /= key_kind*sze) stop "four_idx_pull_results failed to pull key"
rc8 = f77_zmq_recv8( zmq_socket_pull, value, integral_kind*sze, 0)
if(rc8 /= integral_kind*sze) stop "four_idx_pull_results failed to pull value"
! Activate if zmq_socket_pull is a REP
IRP_IF ZMQ_PUSH
IRP_ELSE
rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0)
if (rc /= 4) then
print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...'
stop 'error'
endif
IRP_ENDIF
call map_update(map_c, key, value, sze, 1.d-15) ! TODO : threshold
deallocate(key, value)
end
subroutine four_idx_push_results(zmq_socket_push, key, value, sze, task_id)
use f77_zmq
use map_module
implicit none
integer, intent(in) :: sze
integer(key_kind), intent(in) :: key(sze)
real(integral_kind), intent(in) :: value(sze)
integer(ZMQ_PTR), intent(in) :: zmq_socket_push
integer, intent(in) :: task_id
integer :: rc, sze
integer*8 :: rc8
rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "four_idx_push_results failed to push task_id"
rc = f77_zmq_send( zmq_socket_push, sze, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "four_idx_push_results failed to push sze"
rc8 = f77_zmq_send8( zmq_socket_push, key, key_kind*sze, ZMQ_SNDMORE)
if(rc8 /= key_kind*sze) stop "four_idx_push_results failed to push key"
rc8 = f77_zmq_send8( zmq_socket_push, value, integral_kind*sze, 0)
if(rc8 /= integral_kind*sze) stop "four_idx_push_results failed to push value"
! Activate if zmq_socket_push is a REP
IRP_IF ZMQ_PUSH
IRP_ELSE
rc = f77_zmq_send( zmq_socket_push, 0, 4, 0)
if (rc /= 4) then
print *, irp_here, ' : f77_zmq_send (zmq_socket_push,...'
stop 'error'
endif
IRP_ENDIF
end

View File

@ -1 +1 @@
Pseudo Bitmask ZMQ Pseudo Bitmask ZMQ FourIdx

View File

@ -179,7 +179,6 @@ double precision function get_ao_bielec_integral(i,j,k,l,map) result(result)
call bielec_integrals_index(i,j,k,l,idx) call bielec_integrals_index(i,j,k,l,idx)
!DIR$ FORCEINLINE !DIR$ FORCEINLINE
call map_get(map,idx,tmp) call map_get(map,idx,tmp)
tmp = tmp
else else
ii = l-ao_integrals_cache_min ii = l-ao_integrals_cache_min
ii = ior( ishft(ii,6), k-ao_integrals_cache_min) ii = ior( ishft(ii,6), k-ao_integrals_cache_min)
@ -336,7 +335,7 @@ end
! Min and max values of the MOs for which the integrals are in the cache ! Min and max values of the MOs for which the integrals are in the cache
END_DOC END_DOC
mo_integrals_cache_min_8 = max(1_8,elec_alpha_num - 63_8) mo_integrals_cache_min_8 = max(1_8,elec_alpha_num - 63_8)
mo_integrals_cache_max_8 = min(int(mo_tot_num,8),mo_integrals_cache_min+127_8) mo_integrals_cache_max_8 = min(int(mo_tot_num,8),mo_integrals_cache_min_8+127_8)
mo_integrals_cache_min = max(1,elec_alpha_num - 63) mo_integrals_cache_min = max(1,elec_alpha_num - 63)
mo_integrals_cache_max = min(mo_tot_num,mo_integrals_cache_min+127) mo_integrals_cache_max = min(mo_tot_num,mo_integrals_cache_min+127)

View File

@ -117,7 +117,17 @@ BEGIN_PROVIDER [ logical, mo_bielec_integrals_in_map ]
endif endif
else else
call add_integrals_to_map(full_ijkl_bitmask_4) ! call add_integrals_to_map(full_ijkl_bitmask_4)
call four_index_transform_block(ao_integrals_map,mo_integrals_map, &
mo_coef, size(mo_coef,1), &
1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, &
1, 1, 1, 1, mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num)
integer*8 :: get_mo_map_size, mo_map_size
mo_map_size = get_mo_map_size()
print*,'Molecular integrals provided'
endif endif
if (write_mo_integrals) then if (write_mo_integrals) then
call ezfio_set_work_empty(.False.) call ezfio_set_work_empty(.False.)
@ -146,7 +156,7 @@ subroutine set_integrals_jj_into_map
enddo enddo
call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
real(mo_integrals_threshold,integral_kind)) real(mo_integrals_threshold,integral_kind))
call map_unique(mo_integrals_map) call map_merge(mo_integrals_map)
end end
subroutine set_integrals_exchange_jj_into_map subroutine set_integrals_exchange_jj_into_map
@ -167,7 +177,7 @@ subroutine set_integrals_exchange_jj_into_map
enddo enddo
call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
real(mo_integrals_threshold,integral_kind)) real(mo_integrals_threshold,integral_kind))
call map_unique(mo_integrals_map) call map_merge(mo_integrals_map)
end end
@ -458,7 +468,7 @@ subroutine add_integrals_to_map(mask_ijkl)
real(mo_integrals_threshold,integral_kind)) real(mo_integrals_threshold,integral_kind))
deallocate(buffer_i, buffer_value) deallocate(buffer_i, buffer_value)
!$OMP END PARALLEL !$OMP END PARALLEL
call map_unique(mo_integrals_map) call map_merge(mo_integrals_map)
call wall_time(wall_2) call wall_time(wall_2)
call cpu_time(cpu_2) call cpu_time(cpu_2)
@ -773,7 +783,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
real(mo_integrals_threshold,integral_kind)) real(mo_integrals_threshold,integral_kind))
deallocate(buffer_i, buffer_value) deallocate(buffer_i, buffer_value)
!$OMP END PARALLEL !$OMP END PARALLEL
call map_unique(mo_integrals_map) call map_merge(mo_integrals_map)
call wall_time(wall_2) call wall_time(wall_2)
call cpu_time(cpu_2) call cpu_time(cpu_2)
@ -1035,7 +1045,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
! print*, 'Communicating the map' ! print*, 'Communicating the map'
! call communicate_mo_integrals() ! call communicate_mo_integrals()
!IRP_ENDIF !IRP_ENDIF
call map_unique(mo_integrals_map) call map_merge(mo_integrals_map)
call wall_time(wall_2) call wall_time(wall_2)
call cpu_time(cpu_2) call cpu_time(cpu_2)

View File

@ -1,6 +1,6 @@
BEGIN_PROVIDER [ double precision, ao_deriv2_x,(ao_num_align,ao_num) ] BEGIN_PROVIDER [ double precision, ao_deriv2_x,(ao_num,ao_num) ]
&BEGIN_PROVIDER [ double precision, ao_deriv2_y,(ao_num_align,ao_num) ] &BEGIN_PROVIDER [ double precision, ao_deriv2_y,(ao_num,ao_num) ]
&BEGIN_PROVIDER [ double precision, ao_deriv2_z,(ao_num_align,ao_num) ] &BEGIN_PROVIDER [ double precision, ao_deriv2_z,(ao_num,ao_num) ]
implicit none implicit none
integer :: i,j,n,l integer :: i,j,n,l
double precision :: f double precision :: f
@ -45,8 +45,6 @@
power_A(1) = ao_power( j, 1 ) power_A(1) = ao_power( j, 1 )
power_A(2) = ao_power( j, 2 ) power_A(2) = ao_power( j, 2 )
power_A(3) = ao_power( j, 3 ) power_A(3) = ao_power( j, 3 )
!DEC$ VECTOR ALIGNED
!DEC$ VECTOR ALWAYS
do i= 1,ao_num do i= 1,ao_num
ao_deriv2_x(i,j)= 0.d0 ao_deriv2_x(i,j)= 0.d0
ao_deriv2_y(i,j)= 0.d0 ao_deriv2_y(i,j)= 0.d0
@ -59,7 +57,6 @@
power_B(3) = ao_power( i, 3 ) power_B(3) = ao_power( i, 3 )
do n = 1,ao_prim_num(j) do n = 1,ao_prim_num(j)
alpha = ao_expo_ordered_transp(n,j) alpha = ao_expo_ordered_transp(n,j)
!DEC$ VECTOR ALIGNED
do l = 1, ao_prim_num(i) do l = 1, ao_prim_num(i)
beta = ao_expo_ordered_transp(l,i) beta = ao_expo_ordered_transp(l,i)
call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x0,overlap_y0,overlap_z0,overlap,dim1) call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x0,overlap_y0,overlap_z0,overlap,dim1)
@ -122,7 +119,7 @@
END_PROVIDER END_PROVIDER
BEGIN_PROVIDER [double precision, ao_kinetic_integral, (ao_num_align,ao_num)] BEGIN_PROVIDER [double precision, ao_kinetic_integral, (ao_num,ao_num)]
implicit none implicit none
BEGIN_DOC BEGIN_DOC
! array of the priminitve basis kinetic integrals ! array of the priminitve basis kinetic integrals
@ -131,27 +128,23 @@ BEGIN_PROVIDER [double precision, ao_kinetic_integral, (ao_num_align,ao_num)]
integer :: i,j,k,l integer :: i,j,k,l
if (read_ao_one_integrals) then if (read_ao_one_integrals) then
call ezfio_get_ao_basis_integral_kinetic(ao_kinetic_integral(1:ao_num, 1:ao_num)) call read_one_e_integrals('ao_kinetic_integral', ao_kinetic_integral,&
call ezfio_set_ao_basis_integral_kinetic(ao_kinetic_integral(1:ao_num, 1:ao_num)) size(ao_kinetic_integral,1), size(ao_kinetic_integral,2))
print *, 'AO kinetic integrals read from disk' print *, 'AO kinetic integrals read from disk'
else else
!$OMP PARALLEL DO DEFAULT(NONE) & !$OMP PARALLEL DO DEFAULT(NONE) &
!$OMP PRIVATE(i,j) & !$OMP PRIVATE(i,j) &
!$OMP SHARED(ao_num, ao_num_align, ao_kinetic_integral,ao_deriv2_x,ao_deriv2_y,ao_deriv2_z) !$OMP SHARED(ao_num, ao_kinetic_integral,ao_deriv2_x,ao_deriv2_y,ao_deriv2_z)
do j = 1, ao_num do j = 1, ao_num
!DEC$ VECTOR ALWAYS
!DEC$ VECTOR ALIGNED
do i = 1, ao_num do i = 1, ao_num
ao_kinetic_integral(i,j) = -0.5d0 * (ao_deriv2_x(i,j) + ao_deriv2_y(i,j) + ao_deriv2_z(i,j) ) ao_kinetic_integral(i,j) = -0.5d0 * (ao_deriv2_x(i,j) + ao_deriv2_y(i,j) + ao_deriv2_z(i,j) )
enddo enddo
do i = ao_num +1,ao_num_align
ao_kinetic_integral(i,j) = 0.d0
enddo
enddo enddo
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
endif endif
if (write_ao_one_integrals) then if (write_ao_one_integrals) then
call ezfio_set_ao_basis_integral_kinetic(ao_kinetic_integral(1:ao_num, 1:ao_num)) call write_one_e_integrals('ao_kinetic_integral', ao_kinetic_integral,&
size(ao_kinetic_integral,1), size(ao_kinetic_integral,2))
print *, 'AO kinetic integrals written to disk' print *, 'AO kinetic integrals written to disk'
endif endif
END_PROVIDER END_PROVIDER

View File

@ -1,4 +1,4 @@
BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral, (ao_num_align,ao_num)] BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral, (ao_num,ao_num)]
BEGIN_DOC BEGIN_DOC
! interaction nuclear electron ! interaction nuclear electron
END_DOC END_DOC
@ -11,7 +11,8 @@ BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral, (ao_num_align,ao_num)]
double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult
if (read_ao_one_integrals) then if (read_ao_one_integrals) then
call ezfio_get_ao_basis_integral_nuclear(ao_nucl_elec_integral(1:ao_num, 1:ao_num)) call read_one_e_integrals('ao_ne_integral', ao_nucl_elec_integral, &
size(ao_nucl_elec_integral,1), size(ao_nucl_elec_integral,2))
print *, 'AO N-e integrals read from disk' print *, 'AO N-e integrals read from disk'
else else
@ -73,14 +74,15 @@ BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral, (ao_num_align,ao_num)]
!$OMP END PARALLEL !$OMP END PARALLEL
endif endif
if (write_ao_one_integrals) then if (write_ao_one_integrals) then
call ezfio_set_ao_basis_integral_nuclear(ao_nucl_elec_integral(1:ao_num, 1:ao_num)) call write_one_e_integrals('ao_ne_integral', ao_nucl_elec_integral, &
size(ao_nucl_elec_integral,1), size(ao_nucl_elec_integral,2))
print *, 'AO N-e integrals written to disk' print *, 'AO N-e integrals written to disk'
endif endif
END_PROVIDER END_PROVIDER
BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral_per_atom, (ao_num_align,ao_num,nucl_num)] BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral_per_atom, (ao_num,ao_num,nucl_num)]
BEGIN_DOC BEGIN_DOC
! ao_nucl_elec_integral_per_atom(i,j,k) = -<AO(i)|1/|r-Rk|AO(j)> ! ao_nucl_elec_integral_per_atom(i,j,k) = -<AO(i)|1/|r-Rk|AO(j)>
! where Rk is the geometry of the kth atom ! where Rk is the geometry of the kth atom

View File

@ -46,8 +46,8 @@ subroutine map_save_to_disk(filename,map)
enddo enddo
deallocate(map % map(i) % value) deallocate(map % map(i) % value)
deallocate(map % map(i) % key) deallocate(map % map(i) % key)
map % map(i) % value => map % consolidated_value ( map % consolidated_idx (i+1) :) map % map(i) % value => map % consolidated_value ( map % consolidated_idx (i+1_8) :)
map % map(i) % key => map % consolidated_key ( map % consolidated_idx (i+1) :) map % map(i) % key => map % consolidated_key ( map % consolidated_idx (i+1_8) :)
enddo enddo
map % consolidated_idx (map % map_size + 2_8) = k map % consolidated_idx (map % map_size + 2_8) = k
map % consolidated = .True. map % consolidated = .True.
@ -82,7 +82,7 @@ subroutine map_load_from_disk(filename,map)
call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .True., c_pointer(1)) call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .True., c_pointer(1))
call c_f_pointer(c_pointer(1),map % consolidated_idx, (/ map % map_size + 2_8/)) call c_f_pointer(c_pointer(1),map % consolidated_idx, (/ map % map_size + 2_8/))
map% n_elements = map % consolidated_idx (map % map_size+2_8)-1 map% n_elements = map % consolidated_idx (map % map_size+2_8)-1_8
call mmap(trim(filename)//'_consolidated_key', (/ map % n_elements /), cache_key_kind, fd(2), .True., c_pointer(2)) call mmap(trim(filename)//'_consolidated_key', (/ map % n_elements /), cache_key_kind, fd(2), .True., c_pointer(2))
call c_f_pointer(c_pointer(2),map % consolidated_key, (/ map % n_elements /)) call c_f_pointer(c_pointer(2),map % consolidated_key, (/ map % n_elements /))
@ -96,11 +96,11 @@ subroutine map_load_from_disk(filename,map)
do i=0_8, map % map_size do i=0_8, map % map_size
deallocate(map % map(i) % value) deallocate(map % map(i) % value)
deallocate(map % map(i) % key) deallocate(map % map(i) % key)
map % map(i) % value => map % consolidated_value ( map % consolidated_idx (i+1) :) map % map(i) % value => map % consolidated_value ( map % consolidated_idx (i+1_8) :)
map % map(i) % key => map % consolidated_key ( map % consolidated_idx (i+1) :) map % map(i) % key => map % consolidated_key ( map % consolidated_idx (i+1_8) :)
map % map(i) % sorted = .True. map % map(i) % sorted = .True.
n_elements = int( map % consolidated_idx (i+2) - k, 4) n_elements = int( map % consolidated_idx (i+2_8) - k, 4)
k = map % consolidated_idx (i+2) k = map % consolidated_idx (i+2_8)
map % map(i) % map_size = n_elements map % map(i) % map_size = n_elements
map % map(i) % n_elements = n_elements map % map(i) % n_elements = n_elements
! Load memory from disk ! Load memory from disk
@ -116,7 +116,7 @@ subroutine map_load_from_disk(filename,map)
enddo enddo
enddo enddo
map % sorted = x>0 .or. l == 0_8 map % sorted = x>0 .or. l == 0_8
map % n_elements = k-1 map % n_elements = k-1_8
map % sorted = map % sorted .or. .True. map % sorted = map % sorted .or. .True.
map % consolidated = .True. map % consolidated = .True.

View File

@ -13,7 +13,7 @@ module map_module
! cache_map using a binary search ! cache_map using a binary search
! !
! When using the map_update subroutine to build the map, ! When using the map_update subroutine to build the map,
! the map_unique subroutine ! the map_merge subroutine
! should be called before getting data from the map. ! should be called before getting data from the map.
use omp_lib use omp_lib
@ -274,7 +274,7 @@ subroutine map_sort(map)
end end
subroutine cache_map_unique(map) subroutine cache_map_merge(map)
use map_module use map_module
implicit none implicit none
type (cache_map_type), intent(inout) :: map type (cache_map_type), intent(inout) :: map
@ -298,6 +298,28 @@ subroutine cache_map_unique(map)
end end
subroutine cache_map_unique(map)
use map_module
implicit none
type (cache_map_type), intent(inout) :: map
integer(cache_key_kind) :: prev_key
integer(cache_map_size_kind) :: i, j
call cache_map_sort(map)
prev_key = -1_8
j=0
do i=1,map%n_elements
if (map%key(i) /= prev_key) then
j = j+1
map%value(j) = map%value(i)
map%key(j) = map%key(i)
prev_key = map%key(i)
endif
enddo
map%n_elements = j
end
subroutine cache_map_shrink(map,thr) subroutine cache_map_shrink(map,thr)
use map_module use map_module
implicit none implicit none
@ -338,6 +360,27 @@ subroutine map_unique(map)
end end
subroutine map_merge(map)
use map_module
implicit none
type (map_type), intent(inout) :: map
integer(map_size_kind) :: i
integer(map_size_kind) :: icount
icount = 0_8
!$OMP PARALLEL DO SCHEDULE(dynamic,1000) DEFAULT(SHARED) PRIVATE(i)&
!$OMP REDUCTION(+:icount)
do i=0_8,map%map_size
call omp_set_lock(map%map(i)%lock)
call cache_map_merge(map%map(i))
call omp_unset_lock(map%map(i)%lock)
icount = icount + map%map(i)%n_elements
enddo
!$OMP END PARALLEL DO
map%n_elements = icount
end
subroutine map_shrink(map,thr) subroutine map_shrink(map,thr)
use map_module use map_module
implicit none implicit none
@ -402,7 +445,7 @@ subroutine map_update(map, key, value, sze, thr)
else else
! Assert that the map has a proper size ! Assert that the map has a proper size
if (local_map%n_elements == local_map%map_size) then if (local_map%n_elements == local_map%map_size) then
call cache_map_unique(local_map) call cache_map_merge(local_map)
call cache_map_reallocate(local_map, local_map%n_elements + local_map%n_elements) call cache_map_reallocate(local_map, local_map%n_elements + local_map%n_elements)
call cache_map_shrink(local_map,thr) call cache_map_shrink(local_map,thr)
endif endif

View File

@ -47,6 +47,14 @@ recursive subroutine dtranspose(A,LDA,B,LDB,d1,d2)
double precision, intent(in) :: A(LDA,d2) double precision, intent(in) :: A(LDA,d2)
double precision, intent(out) :: B(LDB,d1) double precision, intent(out) :: B(LDB,d1)
! do j=1,d1
! do i=1,d2
! B(i,j ) = A(j ,i)
! enddo
! enddo
! return
integer :: i,j,k, mod_align integer :: i,j,k, mod_align
if ( d2 < 32 ) then if ( d2 < 32 ) then
do j=1,d1 do j=1,d1

View File

@ -42,11 +42,12 @@ function run_FCI_ZMQ() {
qp_set_mo_class h2o.ezfio -core "[1]" -act "[2-12]" -del "[13-24]" qp_set_mo_class h2o.ezfio -core "[1]" -act "[2-12]" -del "[13-24]"
} }
@test "FCI H2O cc-pVDZ" { @test "FCI H2O cc-pVDZ" {
run_FCI h2o.ezfio 2000 -76.1253758241716 -76.1258130146102 run_FCI h2o.ezfio 2000 -76.1253757275131 -76.1258128174355
} }
@test "FCI-ZMQ H2O cc-pVDZ" { @test "FCI-ZMQ H2O cc-pVDZ" {
run_FCI_ZMQ h2o.ezfio 2000 -76.1250552686394 -76.1258817228809 run_FCI_ZMQ h2o.ezfio 2000 -76.1250552686394 -76.1258817228809
} }

View File

@ -3,10 +3,10 @@
LIST=" LIST="
convert.bats convert.bats
hf.bats hf.bats
pseudo.bats
fci.bats fci.bats
cassd.bats cassd.bats
mrcepa0.bats mrcepa0.bats
pseudo.bats
" "
#foboci.bats #foboci.bats