DGEMM four-index transformation

2024-12-22 03:23:29 +01:00 · 2022-05-25 19:35:44 +02:00 · 2022-05-25 19:35:44 +02:00 · 727815751c
commit 727815751c
parent a52a084f58
1 changed files with 95 additions and 1 deletions
--- a/src/mo_two_e_ints/mo_bi_integrals.irp.f
+++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f
@ -52,9 +52,13 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]
  if(no_vvvv_integrals)then
 !    call four_idx_novvvv
    call four_idx_novvvv_old
+  else
+    if (ao_num*ao_num*ao_num*ao_num*32.d-9 < dble(qp_max_mem)) then
+      call four_idx_dgemm
    else
      call add_integrals_to_map(full_ijkl_bitmask_4)
    endif
+  endif

  call wall_time(wall_2)
  call cpu_time(cpu_2)
@ -77,6 +81,96 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]

 END_PROVIDER

+subroutine four_idx_dgemm
+  implicit none
+  integer :: p,q,r,s,i,j,k,l
+  double precision, allocatable :: a1(:,:,:,:)
+  double precision, allocatable :: a2(:,:,:,:)
+
+  allocate (a1(ao_num,ao_num,ao_num,ao_num))
+
+  print *, 'Getting AOs'
+  !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,r,s)
+  do s=1,ao_num
+    do r=1,ao_num
+      do q=1,ao_num
+        call get_ao_two_e_integrals(q,r,s,ao_num,a1(1,q,r,s))
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL DO
+
+  print *, '1st transformation'
+  ! 1st transformation
+  allocate (a2(ao_num,ao_num,ao_num,mo_num))
+  call dgemm('T','N', (ao_num*ao_num*ao_num), mo_num, ao_num, 1.d0, a1, ao_num, mo_coef, ao_num, 0.d0, a2, (ao_num*ao_num*ao_num))
+
+  ! 2nd transformation
+  print *, '2nd transformation'
+  deallocate (a1)
+  allocate (a1(ao_num,ao_num,mo_num,mo_num))
+  call dgemm('T','N', (ao_num*ao_num*mo_num), mo_num, ao_num, 1.d0, a2, ao_num, mo_coef, ao_num, 0.d0, a1, (ao_num*ao_num*mo_num))
+
+  ! 3rd transformation
+  print *, '3rd transformation'
+  deallocate (a2)
+  allocate (a2(ao_num,mo_num,mo_num,mo_num))
+  call dgemm('T','N', (ao_num*mo_num*mo_num), mo_num, ao_num, 1.d0, a1, ao_num, mo_coef, ao_num, 0.d0, a2, (ao_num*mo_num*mo_num))
+
+  ! 4th transformation
+  print *, '4th transformation'
+  deallocate (a1)
+  allocate (a1(mo_num,mo_num,mo_num,mo_num))
+  call dgemm('T','N', (mo_num*mo_num*mo_num), mo_num, ao_num, 1.d0, a2, ao_num, mo_coef, ao_num, 0.d0, a1, (mo_num*mo_num*mo_num))
+
+  deallocate (a2)
+
+  integer :: n_integrals, size_buffer
+  integer(key_kind)  , allocatable :: buffer_i(:)
+  real(integral_kind), allocatable :: buffer_value(:)
+  size_buffer = min(ao_num*ao_num*ao_num,16000000)
+
+  print *, 'Storing'
+  !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,buffer_value,buffer_i,n_integrals)
+  allocate ( buffer_i(size_buffer), buffer_value(size_buffer) )
+
+  n_integrals = 0
+  !$OMP DO
+  do l=1,mo_num
+    do k=1,mo_num
+      do j=1,l
+        do i=1,k
+            if (abs(a1(i,j,k,l)) < mo_integrals_threshold) then
+              cycle
+            endif
+            n_integrals += 1
+            buffer_value(n_integrals) = a1(i,j,k,l)
+            !DIR$ FORCEINLINE
+            call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals))
+            if (n_integrals == size_buffer) then
+              call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals)
+              n_integrals = 0
+            endif
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+
+  call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals)
+
+  deallocate(buffer_i, buffer_value)
+  !$OMP END PARALLEL
+
+  deallocate (a1)
+
+  print *, 'Unique'
+  call map_unique(mo_integrals_map)
+
+  integer*8                      :: get_mo_map_size, mo_map_size
+  mo_map_size = get_mo_map_size()
+
+end subroutine

 subroutine add_integrals_to_map(mask_ijkl)
  use bitmasks