mirror of
https://github.com/QuantumPackage/qp2.git
synced 2025-01-05 10:59:45 +01:00
Accelerated 4idx transformation
This commit is contained in:
parent
edb1b43563
commit
43704f8fc7
@ -8,6 +8,7 @@
|
|||||||
- Use OpamPack for OCaml
|
- Use OpamPack for OCaml
|
||||||
- Configure adapted for ARM
|
- Configure adapted for ARM
|
||||||
- Added many types of integrals
|
- Added many types of integrals
|
||||||
|
- Accelerated four-index transformation
|
||||||
|
|
||||||
*** TODO: take from dev
|
*** TODO: take from dev
|
||||||
- [ ] Added GTOs with complex exponent
|
- [ ] Added GTOs with complex exponent
|
||||||
|
@ -53,7 +53,7 @@ BEGIN_PROVIDER [ double precision, h_core_ri, (mo_num, mo_num) ]
|
|||||||
enddo
|
enddo
|
||||||
do k=1,mo_num
|
do k=1,mo_num
|
||||||
do i=1,mo_num
|
do i=1,mo_num
|
||||||
h_core_ri(i,j) = h_core_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j)
|
h_core_ri(i,j) = h_core_ri(i,j) - 0.5d0 * big_array_exchange_integrals(k,i,j)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
|
@ -53,7 +53,11 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]
|
|||||||
! call four_idx_novvvv
|
! call four_idx_novvvv
|
||||||
call four_idx_novvvv_old
|
call four_idx_novvvv_old
|
||||||
else
|
else
|
||||||
call add_integrals_to_map(full_ijkl_bitmask_4)
|
if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then
|
||||||
|
call four_idx_dgemm
|
||||||
|
else
|
||||||
|
call add_integrals_to_map(full_ijkl_bitmask_4)
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
call wall_time(wall_2)
|
call wall_time(wall_2)
|
||||||
@ -77,6 +81,94 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]
|
|||||||
|
|
||||||
END_PROVIDER
|
END_PROVIDER
|
||||||
|
|
||||||
|
subroutine four_idx_dgemm
|
||||||
|
implicit none
|
||||||
|
integer :: p,q,r,s,i,j,k,l
|
||||||
|
double precision, allocatable :: a1(:,:,:,:)
|
||||||
|
double precision, allocatable :: a2(:,:,:,:)
|
||||||
|
|
||||||
|
allocate (a1(ao_num,ao_num,ao_num,ao_num))
|
||||||
|
|
||||||
|
print *, 'Getting AOs'
|
||||||
|
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,r,s)
|
||||||
|
do s=1,ao_num
|
||||||
|
do r=1,ao_num
|
||||||
|
do q=1,ao_num
|
||||||
|
call get_ao_two_e_integrals(q,r,s,ao_num,a1(1,q,r,s))
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
!$OMP END PARALLEL DO
|
||||||
|
|
||||||
|
print *, '1st transformation'
|
||||||
|
! 1st transformation
|
||||||
|
allocate (a2(ao_num,ao_num,ao_num,mo_num))
|
||||||
|
call dgemm('T','N', (ao_num*ao_num*ao_num), mo_num, ao_num, 1.d0, a1, ao_num, mo_coef, ao_num, 0.d0, a2, (ao_num*ao_num*ao_num))
|
||||||
|
|
||||||
|
! 2nd transformation
|
||||||
|
print *, '2nd transformation'
|
||||||
|
deallocate (a1)
|
||||||
|
allocate (a1(ao_num,ao_num,mo_num,mo_num))
|
||||||
|
call dgemm('T','N', (ao_num*ao_num*mo_num), mo_num, ao_num, 1.d0, a2, ao_num, mo_coef, ao_num, 0.d0, a1, (ao_num*ao_num*mo_num))
|
||||||
|
|
||||||
|
! 3rd transformation
|
||||||
|
print *, '3rd transformation'
|
||||||
|
deallocate (a2)
|
||||||
|
allocate (a2(ao_num,mo_num,mo_num,mo_num))
|
||||||
|
call dgemm('T','N', (ao_num*mo_num*mo_num), mo_num, ao_num, 1.d0, a1, ao_num, mo_coef, ao_num, 0.d0, a2, (ao_num*mo_num*mo_num))
|
||||||
|
|
||||||
|
! 4th transformation
|
||||||
|
print *, '4th transformation'
|
||||||
|
deallocate (a1)
|
||||||
|
allocate (a1(mo_num,mo_num,mo_num,mo_num))
|
||||||
|
call dgemm('T','N', (mo_num*mo_num*mo_num), mo_num, ao_num, 1.d0, a2, ao_num, mo_coef, ao_num, 0.d0, a1, (mo_num*mo_num*mo_num))
|
||||||
|
|
||||||
|
deallocate (a2)
|
||||||
|
|
||||||
|
integer :: n_integrals, size_buffer
|
||||||
|
integer(key_kind) , allocatable :: buffer_i(:)
|
||||||
|
real(integral_kind), allocatable :: buffer_value(:)
|
||||||
|
size_buffer = min(ao_num*ao_num*ao_num,16000000)
|
||||||
|
|
||||||
|
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,buffer_value,buffer_i,n_integrals)
|
||||||
|
allocate ( buffer_i(size_buffer), buffer_value(size_buffer) )
|
||||||
|
|
||||||
|
n_integrals = 0
|
||||||
|
!$OMP DO
|
||||||
|
do l=1,mo_num
|
||||||
|
do k=1,mo_num
|
||||||
|
do j=1,l
|
||||||
|
do i=1,k
|
||||||
|
if (abs(a1(i,j,k,l)) < mo_integrals_threshold) then
|
||||||
|
cycle
|
||||||
|
endif
|
||||||
|
n_integrals += 1
|
||||||
|
buffer_value(n_integrals) = a1(i,j,k,l)
|
||||||
|
!DIR$ FORCEINLINE
|
||||||
|
call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals))
|
||||||
|
if (n_integrals == size_buffer) then
|
||||||
|
call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals)
|
||||||
|
n_integrals = 0
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
!$OMP END DO
|
||||||
|
|
||||||
|
call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals)
|
||||||
|
|
||||||
|
deallocate(buffer_i, buffer_value)
|
||||||
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
|
deallocate (a1)
|
||||||
|
|
||||||
|
call map_unique(mo_integrals_map)
|
||||||
|
|
||||||
|
integer*8 :: get_mo_map_size, mo_map_size
|
||||||
|
mo_map_size = get_mo_map_size()
|
||||||
|
|
||||||
|
end subroutine
|
||||||
|
|
||||||
subroutine add_integrals_to_map(mask_ijkl)
|
subroutine add_integrals_to_map(mask_ijkl)
|
||||||
use bitmasks
|
use bitmasks
|
||||||
@ -153,24 +245,26 @@ subroutine add_integrals_to_map(mask_ijkl)
|
|||||||
return
|
return
|
||||||
endif
|
endif
|
||||||
|
|
||||||
size_buffer = min(ao_num*ao_num*ao_num,16000000)
|
call wall_time(wall_1)
|
||||||
|
|
||||||
|
size_buffer = min(ao_num*ao_num*ao_num,8000000)
|
||||||
print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
|
print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
|
||||||
ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
|
ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
|
||||||
|
|
||||||
double precision :: accu_bis
|
|
||||||
accu_bis = 0.d0
|
|
||||||
call wall_time(wall_1)
|
|
||||||
|
|
||||||
!$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, &
|
!$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, &
|
||||||
!$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
|
!$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
|
||||||
!$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, &
|
!$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, &
|
||||||
!$OMP wall_0,thread_num,accu_bis) &
|
!$OMP wall_0,thread_num) &
|
||||||
!$OMP DEFAULT(NONE) &
|
!$OMP DEFAULT(NONE) &
|
||||||
!$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,n_l, &
|
!$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,n_l, &
|
||||||
!$OMP mo_coef_transp, &
|
!$OMP mo_coef_transp, &
|
||||||
!$OMP mo_coef_transp_is_built, list_ijkl, &
|
!$OMP mo_coef_transp_is_built, list_ijkl, &
|
||||||
!$OMP mo_coef_is_built, wall_1, &
|
!$OMP mo_coef_is_built, wall_1, &
|
||||||
!$OMP mo_coef,mo_integrals_threshold,mo_integrals_map)
|
!$OMP mo_coef,mo_integrals_threshold,mo_integrals_map)
|
||||||
|
|
||||||
|
thread_num = 0
|
||||||
|
!$ thread_num = omp_get_thread_num()
|
||||||
|
|
||||||
n_integrals = 0
|
n_integrals = 0
|
||||||
wall_0 = wall_1
|
wall_0 = wall_1
|
||||||
allocate(two_e_tmp_3(mo_num, n_j, n_k), &
|
allocate(two_e_tmp_3(mo_num, n_j, n_k), &
|
||||||
@ -181,8 +275,6 @@ subroutine add_integrals_to_map(mask_ijkl)
|
|||||||
buffer_i(size_buffer), &
|
buffer_i(size_buffer), &
|
||||||
buffer_value(size_buffer) )
|
buffer_value(size_buffer) )
|
||||||
|
|
||||||
thread_num = 0
|
|
||||||
!$ thread_num = omp_get_thread_num()
|
|
||||||
!$OMP DO SCHEDULE(guided)
|
!$OMP DO SCHEDULE(guided)
|
||||||
do l1 = 1,ao_num
|
do l1 = 1,ao_num
|
||||||
two_e_tmp_3 = 0.d0
|
two_e_tmp_3 = 0.d0
|
||||||
@ -340,10 +432,10 @@ subroutine add_integrals_to_map(mask_ijkl)
|
|||||||
!$OMP END DO NOWAIT
|
!$OMP END DO NOWAIT
|
||||||
deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
|
deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
|
||||||
|
|
||||||
integer :: index_needed
|
if (n_integrals > 0) then
|
||||||
|
call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
|
||||||
call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
|
real(mo_integrals_threshold,integral_kind))
|
||||||
real(mo_integrals_threshold,integral_kind))
|
endif
|
||||||
deallocate(buffer_i, buffer_value)
|
deallocate(buffer_i, buffer_value)
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
call map_merge(mo_integrals_map)
|
call map_merge(mo_integrals_map)
|
||||||
@ -433,12 +525,10 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
|
|||||||
|
|
||||||
call wall_time(wall_1)
|
call wall_time(wall_1)
|
||||||
call cpu_time(cpu_1)
|
call cpu_time(cpu_1)
|
||||||
double precision :: accu_bis
|
|
||||||
accu_bis = 0.d0
|
|
||||||
!$OMP PARALLEL PRIVATE(m,l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, &
|
!$OMP PARALLEL PRIVATE(m,l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, &
|
||||||
!$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
|
!$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
|
||||||
!$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, &
|
!$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, &
|
||||||
!$OMP wall_0,thread_num,accu_bis) &
|
!$OMP wall_0,thread_num) &
|
||||||
!$OMP DEFAULT(NONE) &
|
!$OMP DEFAULT(NONE) &
|
||||||
!$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k, &
|
!$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k, &
|
||||||
!$OMP mo_coef_transp, &
|
!$OMP mo_coef_transp, &
|
||||||
@ -636,8 +726,6 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
|
|||||||
!$OMP END DO NOWAIT
|
!$OMP END DO NOWAIT
|
||||||
deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
|
deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
|
||||||
|
|
||||||
integer :: index_needed
|
|
||||||
|
|
||||||
call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
|
call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
|
||||||
real(mo_integrals_threshold,integral_kind))
|
real(mo_integrals_threshold,integral_kind))
|
||||||
deallocate(buffer_i, buffer_value)
|
deallocate(buffer_i, buffer_value)
|
||||||
|
Loading…
Reference in New Issue
Block a user