From 5ca864b7b08ca29453cb00dc482ffed77e20b840 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 30 Aug 2023 18:49:09 +0200 Subject: [PATCH] DGEMM for fock_3e_uhf_mo_cs: OK --- src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 364 +++++++++++++++++++++++++- src/tc_scf/test_int.irp.f | 46 +++- 2 files changed, 402 insertions(+), 8 deletions(-) diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index 3e624941..5d663480 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -3,6 +3,356 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] + implicit none + integer :: a, b, i, j, ipoint + double precision :: ti, tf + double precision :: loc_1, loc_2 + double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) + double precision, allocatable :: tmpval_omp(:), tmpvec_omp(:,:), tmpten_omp(:,:,:) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) + double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) + + PROVIDE mo_l_coef mo_r_coef + + print *, ' PROVIDING fock_3e_uhf_mo_cs ...' + call wall_time(ti) + + ! --- + + allocate(tmpvec_1(n_points_final_grid,3), tmpval_1(n_points_final_grid)) + tmpvec_1 = 0.d0 + tmpval_1 = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, tmpval_omp, tmpvec_omp) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmpval_1, tmpvec_1) + + allocate(tmpvec_omp(n_points_final_grid,3), tmpval_omp(n_points_final_grid)) + tmpvec_omp = 0.d0 + tmpval_omp = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmpvec_omp(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmpvec_omp(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmpvec_omp(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmpval_omp(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmpvec_1(ipoint,1) += tmpvec_omp(ipoint,1) + tmpvec_1(ipoint,2) += tmpvec_omp(ipoint,2) + tmpvec_1(ipoint,3) += tmpvec_omp(ipoint,3) + tmpval_1(ipoint) += tmpval_omp(ipoint) + enddo + !$OMP END CRITICAL + + deallocate(tmpvec_omp, tmpval_omp) + + !$OMP END PARALLEL + + ! --- + + allocate(tmp_1(n_points_final_grid,5)) + tmp_1 = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, loc_1) & + !$OMP SHARED (n_points_final_grid, tmpval_1, tmpvec_1, tmp_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + + loc_1 = -4.d0 * tmpval_1(ipoint) + + tmp_1(ipoint,1) = loc_1 * tmpvec_1(ipoint,1) + tmp_1(ipoint,2) = loc_1 * tmpvec_1(ipoint,2) + tmp_1(ipoint,3) = loc_1 * tmpvec_1(ipoint,3) + + tmp_1(ipoint,4) = -2.d0 * ( tmpvec_1(ipoint,1) * tmpvec_1(ipoint,1) & + + tmpvec_1(ipoint,2) * tmpvec_1(ipoint,2) & + + tmpvec_1(ipoint,3) * tmpvec_1(ipoint,3) ) + + tmp_1(ipoint,5) = tmpval_1(ipoint) + enddo + !$OMP END DO + !$OMP END PARALLEL + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, loc_1, tmpvec_omp) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_1) + + allocate(tmpvec_omp(n_points_final_grid,4)) + tmpvec_omp = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_omp(ipoint,1) += 2.d0 * loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + tmpvec_omp(ipoint,2) += 2.d0 * loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + tmpvec_omp(ipoint,3) += 2.d0 * loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + tmpvec_omp(ipoint,4) += ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmpvec_omp(ipoint,1) + tmp_1(ipoint,2) += tmpvec_omp(ipoint,2) + tmp_1(ipoint,3) += tmpvec_omp(ipoint,3) + tmp_1(ipoint,4) += tmpvec_omp(ipoint,4) + enddo + !$OMP END CRITICAL + + deallocate(tmpvec_omp) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_2(n_points_final_grid,5,mo_num,mo_num)) + tmp_2 = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_2) + !$OMP DO + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + tmp_2(ipoint,4,b,a) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) * mos_r_in_r_array_transp(ipoint,a) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i, tmpten_omp) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP tmp_2) + + allocate(tmpten_omp(n_points_final_grid,mo_num,mo_num)) + tmpten_omp = 0.d0 + + !$OMP DO + do a = 1, mo_num + do b = 1, mo_num + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmpten_omp(ipoint,b,a) += 2.d0 * final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,5,b,a) += tmpten_omp(ipoint,b,a) + enddo + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmpten_omp) + + !$OMP END PARALLEL + + ! --- + + call dgemv( 'T', 5*n_points_final_grid, mo_num*mo_num, 1.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_uhf_mo_cs(1,1), 1) + + deallocate(tmp_1, tmp_2) + + ! --- + + allocate(tmp_3(n_points_final_grid,7,mo_num), tmp_4(n_points_final_grid,7,mo_num)) + tmp_3 = 0.d0 + tmp_4 = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + loc_2 = mos_r_in_r_array_transp(ipoint,b) + + tmp_3(ipoint,2,b) = loc_1 + tmp_3(ipoint,7,b) = loc_1 + + tmp_4(ipoint,1,b) = loc_2 + tmp_4(ipoint,6,b) = loc_2 + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, tmpten_omp) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmpvec_1, tmp_3, tmp_4) + + allocate(tmpten_omp(n_points_final_grid,8,mo_num)) + tmpten_omp = 0.d0 + + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmpten_omp(ipoint,1,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmpten_omp(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmpten_omp(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + tmpten_omp(ipoint,4,b) += 2.d0 * loc_1 * ( tmpvec_1(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + tmpvec_1(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + tmpvec_1(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmpten_omp(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmpten_omp(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmpten_omp(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + tmpten_omp(ipoint,8,b) += 2.d0 * loc_2 * ( tmpvec_1(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + tmpvec_1(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + tmpvec_1(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_3(ipoint,3,b) += tmpten_omp(ipoint,1,b) + tmp_3(ipoint,4,b) += tmpten_omp(ipoint,2,b) + tmp_3(ipoint,5,b) += tmpten_omp(ipoint,3,b) + tmp_3(ipoint,6,b) += tmpten_omp(ipoint,4,b) + + tmp_4(ipoint,3,b) += tmpten_omp(ipoint,5,b) + tmp_4(ipoint,4,b) += tmpten_omp(ipoint,6,b) + tmp_4(ipoint,5,b) += tmpten_omp(ipoint,7,b) + tmp_4(ipoint,7,b) += tmpten_omp(ipoint,8,b) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmpten_omp) + + !$OMP END PARALLEL + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, tmpten_omp) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + + allocate(tmpten_omp(n_points_final_grid,2,mo_num)) + tmpten_omp = 0.d0 + + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmpten_omp(ipoint,1,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmpten_omp(ipoint,2,b) -= loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_3(ipoint,1,b) += tmpten_omp(ipoint,1,b) + tmp_4(ipoint,2,b) += tmpten_omp(ipoint,2,b) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmpten_omp) + + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 7*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 7*n_points_final_grid & + , tmp_4(1,1,1), 7*n_points_final_grid & + , 1.d0, fock_3e_uhf_mo_cs(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + + ! --- + + call wall_time(tf) + print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs_old, (mo_num, mo_num)] + implicit none integer :: a, b, i, j double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia @@ -12,14 +362,14 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] PROVIDE mo_l_coef mo_r_coef call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - !print *, ' PROVIDING fock_3e_uhf_mo_cs ...' - !call wall_time(ti) + print *, ' PROVIDING fock_3e_uhf_mo_cs_old ...' + call wall_time(ti) - fock_3e_uhf_mo_cs = 0.d0 + fock_3e_uhf_mo_cs_old = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, elec_beta_num, fock_3e_uhf_mo_cs) + !$OMP SHARED (mo_num, elec_beta_num, fock_3e_uhf_mo_cs_old) allocate(tmp(mo_num,mo_num)) tmp = 0.d0 @@ -54,7 +404,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] !$OMP CRITICAL do a = 1, mo_num do b = 1, mo_num - fock_3e_uhf_mo_cs(b,a) += tmp(b,a) + fock_3e_uhf_mo_cs_old(b,a) += tmp(b,a) enddo enddo !$OMP END CRITICAL @@ -62,8 +412,8 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] deallocate(tmp) !$OMP END PARALLEL - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti + call wall_time(tf) + print *, ' total Wall time for fock_3e_uhf_mo_cs_old =', tf - ti END_PROVIDER diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index 649d0f3e..d7780497 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -54,7 +54,10 @@ program test_ints !!PROVIDE TC_HF_energy VARTC_HF_energy !!print *, ' TC_HF_energy = ', TC_HF_energy !!print *, ' VARTC_HF_energy = ', VARTC_HF_energy - call test_old_ints +! call test_old_ints + + call test_fock_3e_uhf_mo_cs() + end ! --- @@ -1096,3 +1099,44 @@ subroutine test_int2_grad1_u12_ao_test print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 end + +! --- + +subroutine test_fock_3e_uhf_mo_cs() + + implicit none + integer :: i, j + double precision :: I_old, I_new + double precision :: diff_tot, diff, thr_ih, norm + + PROVIDE fock_3e_uhf_mo_cs fock_3e_uhf_mo_cs_old + + thr_ih = 1d-10 + norm = 0.d0 + diff_tot = 0.d0 + + do i = 1, mo_num + do j = 1, mo_num + + I_old = fock_3e_uhf_mo_cs_old(j,i) + I_new = fock_3e_uhf_mo_cs (j,i) + + diff = dabs(I_old - I_new) + if(diff .gt. thr_ih) then + print *, ' problem on ', j, i + print *, ' old value = ', I_old + print *, ' new value = ', I_new + stop + endif + + norm += dabs(I_old) + diff_tot += diff + enddo + enddo + + print *, ' diff tot (%) = ', 100.d0 * diff_tot / norm + + return +end subroutine test_fock_3e_uhf_mo_cs + +