From 8f6df3428371cffeb0a28b8c16b6da910a5ee7e4 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Tue, 29 Aug 2023 14:26:07 +0200 Subject: [PATCH 01/20] minor modif --- src/bi_ortho_mos/bi_density.irp.f | 2 -- src/tc_scf/tc_scf_dm.irp.f | 30 ++++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/bi_ortho_mos/bi_density.irp.f b/src/bi_ortho_mos/bi_density.irp.f index 2dad9485..90fe9634 100644 --- a/src/bi_ortho_mos/bi_density.irp.f +++ b/src/bi_ortho_mos/bi_density.irp.f @@ -15,7 +15,6 @@ BEGIN_PROVIDER [double precision, TCSCF_bi_ort_dm_ao_alpha, (ao_num, ao_num) ] call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0 & , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) & - !, mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) & , 0.d0, TCSCF_bi_ort_dm_ao_alpha, size(TCSCF_bi_ort_dm_ao_alpha, 1) ) END_PROVIDER @@ -36,7 +35,6 @@ BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao_beta, (ao_num, ao_num) ] call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0 & , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) & - !, mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) & , 0.d0, TCSCF_bi_ort_dm_ao_beta, size(TCSCF_bi_ort_dm_ao_beta, 1) ) END_PROVIDER diff --git a/src/tc_scf/tc_scf_dm.irp.f b/src/tc_scf/tc_scf_dm.irp.f index 07da8a58..bf31a4a1 100644 --- a/src/tc_scf/tc_scf_dm.irp.f +++ b/src/tc_scf/tc_scf_dm.irp.f @@ -1,46 +1,68 @@ ! --- -BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_beta, (ao_num, ao_num) ] +BEGIN_PROVIDER [double precision, TCSCF_density_matrix_ao_beta, (ao_num, ao_num)] BEGIN_DOC + ! ! TC-SCF transition density matrix on the AO basis for BETA electrons + ! END_DOC + implicit none if(bi_ortho) then + PROVIDE mo_l_coef mo_r_coef TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta + else + TCSCF_density_matrix_ao_beta = SCF_density_matrix_ao_beta + endif + END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_alpha, (ao_num, ao_num) ] +BEGIN_PROVIDER [double precision, TCSCF_density_matrix_ao_alpha, (ao_num, ao_num)] BEGIN_DOC + ! ! TC-SCF transition density matrix on the AO basis for ALPHA electrons + ! END_DOC + implicit none if(bi_ortho) then + PROVIDE mo_l_coef mo_r_coef TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha + else + TCSCF_density_matrix_ao_alpha = SCF_density_matrix_ao_alpha + endif + END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_tot, (ao_num, ao_num) ] - implicit none +BEGIN_PROVIDER [double precision, TCSCF_density_matrix_ao_tot, (ao_num, ao_num)] + BEGIN_DOC + ! ! TC-SCF transition density matrix on the AO basis for ALPHA+BETA electrons + ! END_DOC + + implicit none + TCSCF_density_matrix_ao_tot = TCSCF_density_matrix_ao_beta + TCSCF_density_matrix_ao_alpha + END_PROVIDER From 53041958a6ccbb18bfacb4bd623887dde33aa71d Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Tue, 29 Aug 2023 19:21:54 +0200 Subject: [PATCH 02/20] opt in v_ij_u_cst_mu_j1b_an: tested --- src/ao_many_one_e_ints/ao_erf_gauss.irp.f | 154 ++++++++++++++++++ .../grad_lapl_jmu_modif.irp.f | 131 ++++++++++++++- src/non_h_ints_mu/test_non_h_ints.irp.f | 44 ++++- 3 files changed, 317 insertions(+), 12 deletions(-) diff --git a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f index b1077161..823536cc 100644 --- a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f @@ -1245,3 +1245,157 @@ end subroutine NAI_pol_x2_mult_erf_ao ! --- +subroutine NAI_pol_012_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center, ints) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! ints(1) = $\int_{-\infty}^{infty} dr x^0 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! ints(2) = $\int_{-\infty}^{infty} dr x^1 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! ints(3) = $\int_{-\infty}^{infty} dr y^1 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! ints(4) = $\int_{-\infty}^{infty} dr z^1 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! ints(5) = $\int_{-\infty}^{infty} dr x^2 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! ints(6) = $\int_{-\infty}^{infty} dr y^2 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! ints(7) = $\int_{-\infty}^{infty} dr z^2 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao + double precision, intent(in) :: beta, B_center(3), mu_in, C_center(3) + double precision, intent(out) :: ints(7) + + integer :: i, j, power_Ai(3), power_Aj(3), n_pt_in, m + integer :: power_A1(3), power_A2(3) + double precision :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi + double precision :: integral0, integral1, integral2 + + double precision, external :: NAI_pol_mult_erf_with1s + + ASSERT(beta .ge. 0.d0) + if(beta .lt. 1d-10) then + call NAI_pol_012_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints) + return + endif + + ints = 0.d0 + + power_Ai(1:3) = ao_power(i_ao,1:3) + power_Aj(1:3) = ao_power(j_ao,1:3) + + Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3) + Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3) + + n_pt_in = n_pt_max_integrals + + do i = 1, ao_prim_num(i_ao) + alphai = ao_expo_ordered_transp (i,i_ao) + coefi = ao_coef_normalized_ordered_transp(i,i_ao) + + do j = 1, ao_prim_num(j_ao) + alphaj = ao_expo_ordered_transp (j,j_ao) + coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) + + integral0 = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in) + ints(1) += coef * integral0 + + do m = 1, 3 + + power_A1 = power_Ai + power_A1(m) += 1 + integral1 = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_A1, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in) + ints(1+m) += coef * (integral1 + Ai_center(m)*integral0) + + power_A2 = power_Ai + power_A2(m) += 2 + integral2 = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_A2, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in) + ints(4+m) += coef * (integral2 + Ai_center(m) * (2.d0*integral1 + Ai_center(m)*integral0)) + enddo + enddo + enddo + +end subroutine NAI_pol_012_mult_erf_ao_with1s + +! --- + +subroutine NAI_pol_012_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! int(1) = $\int_{-\infty}^{infty} dr x^0 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! int(2) = $\int_{-\infty}^{infty} dr x^1 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! int(3) = $\int_{-\infty}^{infty} dr y^1 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! int(4) = $\int_{-\infty}^{infty} dr z^1 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! int(5) = $\int_{-\infty}^{infty} dr x^2 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! int(6) = $\int_{-\infty}^{infty} dr y^2 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! int(7) = $\int_{-\infty}^{infty} dr z^2 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao + double precision, intent(in) :: mu_in, C_center(3) + double precision, intent(out) :: ints(7) + + integer :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, m + integer :: power_A1(3), power_A2(3) + double precision :: A_center(3), B_center(3), alpha, beta, coef + double precision :: integral0, integral1, integral2 + + double precision :: NAI_pol_mult_erf + + ints = 0.d0 + + num_A = ao_nucl(i_ao) + power_A(1:3) = ao_power(i_ao,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + num_B = ao_nucl(j_ao) + power_B(1:3) = ao_power(j_ao,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + n_pt_in = n_pt_max_integrals + + do i = 1, ao_prim_num(i_ao) + alpha = ao_expo_ordered_transp(i,i_ao) + + do j = 1, ao_prim_num(j_ao) + beta = ao_expo_ordered_transp(j,j_ao) + coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao) + + integral0 = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in) + ints(1) += coef * integral0 + + do m = 1, 3 + + power_A1 = power_A + power_A1(m) += 1 + integral1 = NAI_pol_mult_erf(A_center, B_center, power_A1, power_B, alpha, beta, C_center, n_pt_in, mu_in) + + ints(1+m) += coef * (integral1 + A_center(m)*integral0) + + power_A2 = power_A + power_A2(m) += 2 + integral2 = NAI_pol_mult_erf(A_center, B_center, power_A2, power_B, alpha, beta, C_center, n_pt_in, mu_in) + + ints(4+m) += coef * (integral2 + A_center(m) * (2.d0*integral1 + A_center(m)*integral0)) + enddo + enddo + enddo + +end subroutine NAI_pol_012_mult_erf_ao + +! --- + diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f index 9af3f9a9..24b33eb5 100644 --- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f @@ -299,15 +299,12 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_points_final_grid)] +BEGIN_PROVIDER [double precision, v_ij_u_cst_mu_j1b_an_old, (ao_num, ao_num, n_points_final_grid)] BEGIN_DOC ! ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12) ! - ! TODO - ! one subroutine for all integrals - ! END_DOC include 'constants.include.F' @@ -325,7 +322,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_poin double precision, external :: overlap_gauss_r12_ao_with1s double precision, external :: NAI_pol_mult_erf_ao_with1s - print*, ' providing v_ij_u_cst_mu_j1b_an ...' + print*, ' providing v_ij_u_cst_mu_j1b_an_old ...' call wall_time(wall0) provide mu_erf final_grid_points j1b_pen @@ -333,7 +330,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_poin ct = inv_sq_pi_2 / mu_erf - v_ij_u_cst_mu_j1b_an = 0.d0 + v_ij_u_cst_mu_j1b_an_old = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, & @@ -342,7 +339,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_poin !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b2_size, & !$OMP final_grid_points, mu_erf, ct, & !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, & - !$OMP List_all_comb_b2_cent, v_ij_u_cst_mu_j1b_an) + !$OMP List_all_comb_b2_cent, v_ij_u_cst_mu_j1b_an_old) !$OMP DO do ipoint = 1, n_points_final_grid @@ -413,6 +410,125 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_poin ! --- + v_ij_u_cst_mu_j1b_an_old(j,i,ipoint) = tmp + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + do ipoint = 1, n_points_final_grid + do i = 2, ao_num + do j = 1, i-1 + v_ij_u_cst_mu_j1b_an_old(j,i,ipoint) = v_ij_u_cst_mu_j1b_an_old(i,j,ipoint) + enddo + enddo + enddo + + call wall_time(wall1) + print*, ' wall time for v_ij_u_cst_mu_j1b_an_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_points_final_grid)] + + BEGIN_DOC + ! + ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12) + ! + END_DOC + + include 'constants.include.F' + + implicit none + integer :: i, j, ipoint, i_1s + double precision :: r(3), r1_2 + double precision :: int_o + double precision :: int_c(7), int_e(7) + double precision :: coef, beta, B_center(3) + double precision :: tmp, ct + double precision :: wall0, wall1 + + double precision, external :: overlap_gauss_r12_ao_with1s + double precision, external :: NAI_pol_mult_erf_ao_with1s + + print*, ' providing v_ij_u_cst_mu_j1b_an ...' + call wall_time(wall0) + + provide mu_erf final_grid_points j1b_pen + PROVIDE List_all_comb_b2_size List_all_comb_b2_coef List_all_comb_b2_expo List_all_comb_b2_cent + + ct = inv_sq_pi_2 / mu_erf + + v_ij_u_cst_mu_j1b_an = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, & + !$OMP r1_2, tmp, int_c, int_e, int_o) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b2_size, & + !$OMP final_grid_points, mu_erf, ct, & + !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, & + !$OMP List_all_comb_b2_cent, v_ij_u_cst_mu_j1b_an) + !$OMP DO + do ipoint = 1, n_points_final_grid + + r(1) = final_grid_points(1,ipoint) + r(2) = final_grid_points(2,ipoint) + r(3) = final_grid_points(3,ipoint) + r1_2 = 0.5d0 * (r(1)*r(1) + r(2)*r(2) + r(3)*r(3)) + + do i = 1, ao_num + do j = i, ao_num + + ! --- + + coef = List_all_comb_b2_coef (1) + beta = List_all_comb_b2_expo (1) + B_center(1) = List_all_comb_b2_cent(1,1) + B_center(2) = List_all_comb_b2_cent(2,1) + B_center(3) = List_all_comb_b2_cent(3,1) + + call NAI_pol_012_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, int_c) + call NAI_pol_012_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, int_e) + + int_o = overlap_gauss_r12_ao_with1s(B_center, beta, r, mu_erf*mu_erf, i, j) + + tmp = coef & + * ( r1_2 * (int_c(1) - int_e(1)) & + - r(1) * (int_c(2) - int_e(2)) - r(2) * (int_c(3) - int_e(3)) - r(3) * (int_c(4) - int_e(4)) & + + 0.5d0 * (int_c(5) + int_c(6) + int_c(7) - int_e(5) - int_e(6) - int_e(7)) & + - ct * int_o & + ) + + ! --- + + do i_1s = 2, List_all_comb_b2_size + + coef = List_all_comb_b2_coef (i_1s) + if(dabs(coef) .lt. 1d-15) cycle ! beta = 0.0 + beta = List_all_comb_b2_expo (i_1s) + B_center(1) = List_all_comb_b2_cent(1,i_1s) + B_center(2) = List_all_comb_b2_cent(2,i_1s) + B_center(3) = List_all_comb_b2_cent(3,i_1s) + + call NAI_pol_012_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, int_c) + call NAI_pol_012_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, int_e) + + int_o = overlap_gauss_r12_ao_with1s(B_center, beta, r, mu_erf*mu_erf, i, j) + + tmp = tmp + coef & + * ( r1_2 * (int_c(1) - int_e(1)) & + - r(1) * (int_c(2) - int_e(2)) - r(2) * (int_c(3) - int_e(3)) - r(3) * (int_c(4) - int_e(4)) & + + 0.5d0 * (int_c(5) + int_c(6) + int_c(7) - int_e(5) - int_e(6) - int_e(7)) & + - ct * int_o & + ) + + enddo + + ! --- + v_ij_u_cst_mu_j1b_an(j,i,ipoint) = tmp enddo enddo @@ -434,4 +550,3 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_poin END_PROVIDER ! --- - diff --git a/src/non_h_ints_mu/test_non_h_ints.irp.f b/src/non_h_ints_mu/test_non_h_ints.irp.f index aff53c2d..e7718a40 100644 --- a/src/non_h_ints_mu/test_non_h_ints.irp.f +++ b/src/non_h_ints_mu/test_non_h_ints.irp.f @@ -14,7 +14,9 @@ program test_non_h !call routine_grad_squared() !call routine_fit() - call test_ipp() + !call test_ipp() + + call test_v_ij_u_cst_mu_j1b_an() end ! --- @@ -545,9 +547,43 @@ end subroutine grad1_aos_ik_grad1_esquare ! --- - - - +subroutine test_v_ij_u_cst_mu_j1b_an() + + implicit none + integer :: i, j, ipoint + double precision :: I_old, I_new + double precision :: norm, accu, thr, diff + + PROVIDE v_ij_u_cst_mu_j1b_an_old v_ij_u_cst_mu_j1b_an + + thr = 1d-12 + norm = 0.d0 + accu = 0.d0 + do ipoint = 1, n_points_final_grid + do i = 1, ao_num + do j = 1, ao_num + + I_old = v_ij_u_cst_mu_j1b_an_old(j,i,ipoint) + I_new = v_ij_u_cst_mu_j1b_an (j,i,ipoint) + + diff = dabs(I_new-I_old) + if(diff .gt. thr) then + print *, ' problem on:', j, i, ipoint + print *, ' old value :', I_old + print *, ' new value :', I_new + stop + endif + + accu += diff + norm += dabs(I_old) + enddo + enddo + enddo + + print*, ' accuracy(%) = ', 100.d0 * accu / norm + + return +end subroutine test_v_ij_u_cst_mu_j1b_an() From 5ca864b7b08ca29453cb00dc482ffed77e20b840 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 30 Aug 2023 18:49:09 +0200 Subject: [PATCH 03/20] DGEMM for fock_3e_uhf_mo_cs: OK --- src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 364 +++++++++++++++++++++++++- src/tc_scf/test_int.irp.f | 46 +++- 2 files changed, 402 insertions(+), 8 deletions(-) diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index 3e624941..5d663480 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -3,6 +3,356 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] + implicit none + integer :: a, b, i, j, ipoint + double precision :: ti, tf + double precision :: loc_1, loc_2 + double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) + double precision, allocatable :: tmpval_omp(:), tmpvec_omp(:,:), tmpten_omp(:,:,:) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) + double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) + + PROVIDE mo_l_coef mo_r_coef + + print *, ' PROVIDING fock_3e_uhf_mo_cs ...' + call wall_time(ti) + + ! --- + + allocate(tmpvec_1(n_points_final_grid,3), tmpval_1(n_points_final_grid)) + tmpvec_1 = 0.d0 + tmpval_1 = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, tmpval_omp, tmpvec_omp) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmpval_1, tmpvec_1) + + allocate(tmpvec_omp(n_points_final_grid,3), tmpval_omp(n_points_final_grid)) + tmpvec_omp = 0.d0 + tmpval_omp = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmpvec_omp(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmpvec_omp(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmpvec_omp(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmpval_omp(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmpvec_1(ipoint,1) += tmpvec_omp(ipoint,1) + tmpvec_1(ipoint,2) += tmpvec_omp(ipoint,2) + tmpvec_1(ipoint,3) += tmpvec_omp(ipoint,3) + tmpval_1(ipoint) += tmpval_omp(ipoint) + enddo + !$OMP END CRITICAL + + deallocate(tmpvec_omp, tmpval_omp) + + !$OMP END PARALLEL + + ! --- + + allocate(tmp_1(n_points_final_grid,5)) + tmp_1 = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, loc_1) & + !$OMP SHARED (n_points_final_grid, tmpval_1, tmpvec_1, tmp_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + + loc_1 = -4.d0 * tmpval_1(ipoint) + + tmp_1(ipoint,1) = loc_1 * tmpvec_1(ipoint,1) + tmp_1(ipoint,2) = loc_1 * tmpvec_1(ipoint,2) + tmp_1(ipoint,3) = loc_1 * tmpvec_1(ipoint,3) + + tmp_1(ipoint,4) = -2.d0 * ( tmpvec_1(ipoint,1) * tmpvec_1(ipoint,1) & + + tmpvec_1(ipoint,2) * tmpvec_1(ipoint,2) & + + tmpvec_1(ipoint,3) * tmpvec_1(ipoint,3) ) + + tmp_1(ipoint,5) = tmpval_1(ipoint) + enddo + !$OMP END DO + !$OMP END PARALLEL + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, loc_1, tmpvec_omp) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_1) + + allocate(tmpvec_omp(n_points_final_grid,4)) + tmpvec_omp = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_omp(ipoint,1) += 2.d0 * loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + tmpvec_omp(ipoint,2) += 2.d0 * loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + tmpvec_omp(ipoint,3) += 2.d0 * loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + tmpvec_omp(ipoint,4) += ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmpvec_omp(ipoint,1) + tmp_1(ipoint,2) += tmpvec_omp(ipoint,2) + tmp_1(ipoint,3) += tmpvec_omp(ipoint,3) + tmp_1(ipoint,4) += tmpvec_omp(ipoint,4) + enddo + !$OMP END CRITICAL + + deallocate(tmpvec_omp) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_2(n_points_final_grid,5,mo_num,mo_num)) + tmp_2 = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_2) + !$OMP DO + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + tmp_2(ipoint,4,b,a) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) * mos_r_in_r_array_transp(ipoint,a) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i, tmpten_omp) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP tmp_2) + + allocate(tmpten_omp(n_points_final_grid,mo_num,mo_num)) + tmpten_omp = 0.d0 + + !$OMP DO + do a = 1, mo_num + do b = 1, mo_num + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmpten_omp(ipoint,b,a) += 2.d0 * final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,5,b,a) += tmpten_omp(ipoint,b,a) + enddo + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmpten_omp) + + !$OMP END PARALLEL + + ! --- + + call dgemv( 'T', 5*n_points_final_grid, mo_num*mo_num, 1.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_uhf_mo_cs(1,1), 1) + + deallocate(tmp_1, tmp_2) + + ! --- + + allocate(tmp_3(n_points_final_grid,7,mo_num), tmp_4(n_points_final_grid,7,mo_num)) + tmp_3 = 0.d0 + tmp_4 = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + loc_2 = mos_r_in_r_array_transp(ipoint,b) + + tmp_3(ipoint,2,b) = loc_1 + tmp_3(ipoint,7,b) = loc_1 + + tmp_4(ipoint,1,b) = loc_2 + tmp_4(ipoint,6,b) = loc_2 + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, tmpten_omp) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmpvec_1, tmp_3, tmp_4) + + allocate(tmpten_omp(n_points_final_grid,8,mo_num)) + tmpten_omp = 0.d0 + + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmpten_omp(ipoint,1,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmpten_omp(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmpten_omp(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + tmpten_omp(ipoint,4,b) += 2.d0 * loc_1 * ( tmpvec_1(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + tmpvec_1(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + tmpvec_1(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmpten_omp(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmpten_omp(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmpten_omp(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + tmpten_omp(ipoint,8,b) += 2.d0 * loc_2 * ( tmpvec_1(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + tmpvec_1(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + tmpvec_1(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_3(ipoint,3,b) += tmpten_omp(ipoint,1,b) + tmp_3(ipoint,4,b) += tmpten_omp(ipoint,2,b) + tmp_3(ipoint,5,b) += tmpten_omp(ipoint,3,b) + tmp_3(ipoint,6,b) += tmpten_omp(ipoint,4,b) + + tmp_4(ipoint,3,b) += tmpten_omp(ipoint,5,b) + tmp_4(ipoint,4,b) += tmpten_omp(ipoint,6,b) + tmp_4(ipoint,5,b) += tmpten_omp(ipoint,7,b) + tmp_4(ipoint,7,b) += tmpten_omp(ipoint,8,b) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmpten_omp) + + !$OMP END PARALLEL + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, tmpten_omp) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + + allocate(tmpten_omp(n_points_final_grid,2,mo_num)) + tmpten_omp = 0.d0 + + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmpten_omp(ipoint,1,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmpten_omp(ipoint,2,b) -= loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_3(ipoint,1,b) += tmpten_omp(ipoint,1,b) + tmp_4(ipoint,2,b) += tmpten_omp(ipoint,2,b) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmpten_omp) + + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 7*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 7*n_points_final_grid & + , tmp_4(1,1,1), 7*n_points_final_grid & + , 1.d0, fock_3e_uhf_mo_cs(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + + ! --- + + call wall_time(tf) + print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs_old, (mo_num, mo_num)] + implicit none integer :: a, b, i, j double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia @@ -12,14 +362,14 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] PROVIDE mo_l_coef mo_r_coef call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - !print *, ' PROVIDING fock_3e_uhf_mo_cs ...' - !call wall_time(ti) + print *, ' PROVIDING fock_3e_uhf_mo_cs_old ...' + call wall_time(ti) - fock_3e_uhf_mo_cs = 0.d0 + fock_3e_uhf_mo_cs_old = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, elec_beta_num, fock_3e_uhf_mo_cs) + !$OMP SHARED (mo_num, elec_beta_num, fock_3e_uhf_mo_cs_old) allocate(tmp(mo_num,mo_num)) tmp = 0.d0 @@ -54,7 +404,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] !$OMP CRITICAL do a = 1, mo_num do b = 1, mo_num - fock_3e_uhf_mo_cs(b,a) += tmp(b,a) + fock_3e_uhf_mo_cs_old(b,a) += tmp(b,a) enddo enddo !$OMP END CRITICAL @@ -62,8 +412,8 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] deallocate(tmp) !$OMP END PARALLEL - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti + call wall_time(tf) + print *, ' total Wall time for fock_3e_uhf_mo_cs_old =', tf - ti END_PROVIDER diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index 649d0f3e..d7780497 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -54,7 +54,10 @@ program test_ints !!PROVIDE TC_HF_energy VARTC_HF_energy !!print *, ' TC_HF_energy = ', TC_HF_energy !!print *, ' VARTC_HF_energy = ', VARTC_HF_energy - call test_old_ints +! call test_old_ints + + call test_fock_3e_uhf_mo_cs() + end ! --- @@ -1096,3 +1099,44 @@ subroutine test_int2_grad1_u12_ao_test print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 end + +! --- + +subroutine test_fock_3e_uhf_mo_cs() + + implicit none + integer :: i, j + double precision :: I_old, I_new + double precision :: diff_tot, diff, thr_ih, norm + + PROVIDE fock_3e_uhf_mo_cs fock_3e_uhf_mo_cs_old + + thr_ih = 1d-10 + norm = 0.d0 + diff_tot = 0.d0 + + do i = 1, mo_num + do j = 1, mo_num + + I_old = fock_3e_uhf_mo_cs_old(j,i) + I_new = fock_3e_uhf_mo_cs (j,i) + + diff = dabs(I_old - I_new) + if(diff .gt. thr_ih) then + print *, ' problem on ', j, i + print *, ' old value = ', I_old + print *, ' new value = ', I_new + stop + endif + + norm += dabs(I_old) + diff_tot += diff + enddo + enddo + + print *, ' diff tot (%) = ', 100.d0 * diff_tot / norm + + return +end subroutine test_fock_3e_uhf_mo_cs + + From 024d9019bb470916f55c8a599208e49e44531ac7 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 31 Aug 2023 17:45:12 +0200 Subject: [PATCH 04/20] F(3e) CS optim --- src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 256 +++++++++----------------- 1 file changed, 92 insertions(+), 164 deletions(-) diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index 5d663480..ce343f9b 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -6,9 +6,9 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] implicit none integer :: a, b, i, j, ipoint double precision :: ti, tf - double precision :: loc_1, loc_2 - double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) - double precision, allocatable :: tmpval_omp(:), tmpvec_omp(:,:), tmpten_omp(:,:,:) + double precision :: loc_1, loc_2, loc_3 + double precision, allocatable :: Okappa(:), Jkappa(:,:) + double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) @@ -19,96 +19,81 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] ! --- - allocate(tmpvec_1(n_points_final_grid,3), tmpval_1(n_points_final_grid)) - tmpvec_1 = 0.d0 - tmpval_1 = 0.d0 + allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) + Jkappa = 0.d0 + Okappa = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, tmpval_omp, tmpvec_omp) & + !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & !$OMP SHARED (n_points_final_grid, elec_beta_num, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmpval_1, tmpvec_1) + !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa) - allocate(tmpvec_omp(n_points_final_grid,3), tmpval_omp(n_points_final_grid)) - tmpvec_omp = 0.d0 - tmpval_omp = 0.d0 + allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 !$OMP DO do i = 1, elec_beta_num do ipoint = 1, n_points_final_grid - tmpvec_omp(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmpvec_omp(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmpvec_omp(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) - tmpval_omp(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) enddo enddo !$OMP END DO NOWAIT !$OMP CRITICAL do ipoint = 1, n_points_final_grid - tmpvec_1(ipoint,1) += tmpvec_omp(ipoint,1) - tmpvec_1(ipoint,2) += tmpvec_omp(ipoint,2) - tmpvec_1(ipoint,3) += tmpvec_omp(ipoint,3) - tmpval_1(ipoint) += tmpval_omp(ipoint) + Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Okappa(ipoint) += tmp_omp_d1(ipoint) enddo !$OMP END CRITICAL - deallocate(tmpvec_omp, tmpval_omp) + deallocate(tmp_omp_d2, tmp_omp_d1) !$OMP END PARALLEL ! --- - allocate(tmp_1(n_points_final_grid,5)) - tmp_1 = 0.d0 + allocate(tmp_1(n_points_final_grid,4)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, loc_1) & - !$OMP SHARED (n_points_final_grid, tmpval_1, tmpvec_1, tmp_1) - !$OMP DO do ipoint = 1, n_points_final_grid - loc_1 = -4.d0 * tmpval_1(ipoint) + loc_1 = 2.d0 * Okappa(ipoint) - tmp_1(ipoint,1) = loc_1 * tmpvec_1(ipoint,1) - tmp_1(ipoint,2) = loc_1 * tmpvec_1(ipoint,2) - tmp_1(ipoint,3) = loc_1 * tmpvec_1(ipoint,3) + tmp_1(ipoint,1) = loc_1 * Jkappa(ipoint,1) + tmp_1(ipoint,2) = loc_1 * Jkappa(ipoint,2) + tmp_1(ipoint,3) = loc_1 * Jkappa(ipoint,3) - tmp_1(ipoint,4) = -2.d0 * ( tmpvec_1(ipoint,1) * tmpvec_1(ipoint,1) & - + tmpvec_1(ipoint,2) * tmpvec_1(ipoint,2) & - + tmpvec_1(ipoint,3) * tmpvec_1(ipoint,3) ) - - tmp_1(ipoint,5) = tmpval_1(ipoint) + tmp_1(ipoint,4) = Okappa(ipoint) enddo - !$OMP END DO - !$OMP END PARALLEL !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, loc_1, tmpvec_omp) & + !$OMP PRIVATE (ipoint, i, j, loc_1, tmp_omp_d2) & !$OMP SHARED (n_points_final_grid, elec_beta_num, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, tmp_1) - allocate(tmpvec_omp(n_points_final_grid,4)) - tmpvec_omp = 0.d0 + allocate(tmp_omp_d2(n_points_final_grid,3)) + tmp_omp_d2 = 0.d0 - !$OMP DO + !$OMP DO COLLAPSE(2) do i = 1, elec_beta_num do j = 1, elec_beta_num do ipoint = 1, n_points_final_grid loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_omp(ipoint,1) += 2.d0 * loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) - tmpvec_omp(ipoint,2) += 2.d0 * loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) - tmpvec_omp(ipoint,3) += 2.d0 * loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) - tmpvec_omp(ipoint,4) += ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + tmp_omp_d2(ipoint,1) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + tmp_omp_d2(ipoint,2) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + tmp_omp_d2(ipoint,3) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) enddo enddo enddo @@ -116,19 +101,18 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] !$OMP CRITICAL do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmpvec_omp(ipoint,1) - tmp_1(ipoint,2) += tmpvec_omp(ipoint,2) - tmp_1(ipoint,3) += tmpvec_omp(ipoint,3) - tmp_1(ipoint,4) += tmpvec_omp(ipoint,4) + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) enddo !$OMP END CRITICAL - deallocate(tmpvec_omp) + deallocate(tmp_omp_d2) !$OMP END PARALLEL ! --- - allocate(tmp_2(n_points_final_grid,5,mo_num,mo_num)) + allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) tmp_2 = 0.d0 !$OMP PARALLEL & @@ -138,14 +122,13 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP tmp_2) - !$OMP DO + !$OMP DO COLLAPSE(2) do a = 1, mo_num do b = 1, mo_num do ipoint = 1, n_points_final_grid tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) - tmp_2(ipoint,4,b,a) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) * mos_r_in_r_array_transp(ipoint,a) enddo enddo enddo @@ -154,75 +137,56 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b, i, tmpten_omp) & + !$OMP PRIVATE (ipoint, a, b, i) & !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & !$OMP tmp_2) - - allocate(tmpten_omp(n_points_final_grid,mo_num,mo_num)) - tmpten_omp = 0.d0 - - !$OMP DO + !$OMP DO COLLAPSE(2) do a = 1, mo_num do b = 1, mo_num + tmp_2(:,4,b,a) = 0.d0 do i = 1, elec_beta_num do ipoint = 1, n_points_final_grid - tmpten_omp(ipoint,b,a) += 2.d0 * final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) enddo enddo enddo enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,5,b,a) += tmpten_omp(ipoint,b,a) - enddo - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmpten_omp) - + !$OMP END DO !$OMP END PARALLEL ! --- - - call dgemv( 'T', 5*n_points_final_grid, mo_num*mo_num, 1.d0 & - , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & - , tmp_1(1,1), 1 & + + call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & , 0.d0, fock_3e_uhf_mo_cs(1,1), 1) deallocate(tmp_1, tmp_2) ! --- - allocate(tmp_3(n_points_final_grid,7,mo_num), tmp_4(n_points_final_grid,7,mo_num)) - tmp_3 = 0.d0 - tmp_4 = 0.d0 + allocate(tmp_3(n_points_final_grid,5,mo_num), tmp_4(n_points_final_grid,5,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & !$OMP SHARED (n_points_final_grid, mo_num, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, tmp_3, tmp_4) + !$OMP final_weight_at_r_vector, Jkappa, tmp_3, tmp_4) !$OMP DO do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 do ipoint = 1, n_points_final_grid + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - loc_2 = mos_r_in_r_array_transp(ipoint,b) - - tmp_3(ipoint,2,b) = loc_1 - tmp_3(ipoint,7,b) = loc_1 - - tmp_4(ipoint,1,b) = loc_2 - tmp_4(ipoint,6,b) = loc_2 + tmp_4(ipoint,1,b) = -2.d0 * mos_r_in_r_array_transp(ipoint,b) * ( Jkappa(ipoint,1) * Jkappa(ipoint,1) & + + Jkappa(ipoint,2) * Jkappa(ipoint,2) & + + Jkappa(ipoint,3) * Jkappa(ipoint,3) ) + tmp_4(ipoint,5,b) = mos_r_in_r_array_transp(ipoint,b) enddo enddo !$OMP END DO @@ -230,15 +194,11 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, tmpten_omp) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2) & !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP tmpvec_1, tmp_3, tmp_4) - - allocate(tmpten_omp(n_points_final_grid,8,mo_num)) - tmpten_omp = 0.d0 - + !$OMP Jkappa, tmp_3, tmp_4) !$OMP DO do b = 1, mo_num do i = 1, elec_beta_num @@ -247,57 +207,32 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) loc_2 = mos_r_in_r_array_transp(ipoint,i) - tmpten_omp(ipoint,1,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) - tmpten_omp(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) - tmpten_omp(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - tmpten_omp(ipoint,4,b) += 2.d0 * loc_1 * ( tmpvec_1(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + tmpvec_1(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + tmpvec_1(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) - - tmpten_omp(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmpten_omp(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmpten_omp(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - tmpten_omp(ipoint,8,b) += 2.d0 * loc_2 * ( tmpvec_1(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + tmpvec_1(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + tmpvec_1(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + tmp_3(ipoint,5,b) += 2.d0 * loc_1 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + tmp_4(ipoint,1,b) += 2.d0 * loc_2 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) enddo enddo enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_3(ipoint,3,b) += tmpten_omp(ipoint,1,b) - tmp_3(ipoint,4,b) += tmpten_omp(ipoint,2,b) - tmp_3(ipoint,5,b) += tmpten_omp(ipoint,3,b) - tmp_3(ipoint,6,b) += tmpten_omp(ipoint,4,b) - - tmp_4(ipoint,3,b) += tmpten_omp(ipoint,5,b) - tmp_4(ipoint,4,b) += tmpten_omp(ipoint,6,b) - tmp_4(ipoint,5,b) += tmpten_omp(ipoint,7,b) - tmp_4(ipoint,7,b) += tmpten_omp(ipoint,8,b) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmpten_omp) - + !$OMP END DO !$OMP END PARALLEL - - !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, tmpten_omp) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP tmp_3, tmp_4) - - allocate(tmpten_omp(n_points_final_grid,2,mo_num)) - tmpten_omp = 0.d0 - !$OMP DO do b = 1, mo_num do i = 1, elec_beta_num @@ -305,42 +240,35 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] do ipoint = 1, n_points_final_grid loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) - loc_2 = mos_r_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) - tmpten_omp(ipoint,1,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + tmp_3(ipoint,5,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) - tmpten_omp(ipoint,2,b) -= loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + tmp_4(ipoint,1,b) += ( loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) & + - loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) ) enddo enddo enddo enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_3(ipoint,1,b) += tmpten_omp(ipoint,1,b) - tmp_4(ipoint,2,b) += tmpten_omp(ipoint,2,b) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmpten_omp) - + !$OMP END DO !$OMP END PARALLEL ! --- - call dgemm( 'T', 'N', mo_num, mo_num, 7*n_points_final_grid, 1.d0 & - , tmp_3(1,1,1), 7*n_points_final_grid & - , tmp_4(1,1,1), 7*n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, 5*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 5*n_points_final_grid & + , tmp_4(1,1,1), 5*n_points_final_grid & , 1.d0, fock_3e_uhf_mo_cs(1,1), mo_num) deallocate(tmp_3, tmp_4) + deallocate(Jkappa, Okappa) ! --- From e0a11ad21db49d19e8395e650e0db8c6189096a7 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 1 Sep 2023 09:34:54 +0200 Subject: [PATCH 05/20] alpha part: 1st and 2nd terms of OS --- src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 453 +++++++++++++++++++++++++- src/tc_scf/fock_tc.irp.f | 26 -- src/tc_scf/fock_three_bi_ortho.irp.f | 2 +- src/tc_scf/test_int.irp.f | 89 ++++- 4 files changed, 525 insertions(+), 45 deletions(-) diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index ce343f9b..3a68ffc6 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -113,7 +113,6 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] ! --- allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) - tmp_2 = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -347,7 +346,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a_old, (mo_num, mo_num)] BEGIN_DOC ! @@ -366,17 +365,18 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] PROVIDE mo_l_coef mo_r_coef PROVIDE fock_3e_uhf_mo_cs - !print *, ' Providing fock_3e_uhf_mo_a ...' - !call wall_time(ti) + print *, ' Providing fock_3e_uhf_mo_a_old ...' + call wall_time(ti) o = elec_beta_num + 1 call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs + PROVIDE fock_3e_uhf_mo_cs + fock_3e_uhf_mo_a_old = fock_3e_uhf_mo_cs !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a) + !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a_old) allocate(tmp(mo_num,mo_num)) tmp = 0.d0 @@ -460,7 +460,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] !$OMP CRITICAL do a = 1, mo_num do b = 1, mo_num - fock_3e_uhf_mo_a(b,a) += tmp(b,a) + fock_3e_uhf_mo_a_old(b,a) += tmp(b,a) enddo enddo !$OMP END CRITICAL @@ -468,19 +468,21 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] deallocate(tmp) !$OMP END PARALLEL - !call wall_time(tf) - !print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti + call wall_time(tf) + print *, ' Wall time for fock_3e_uhf_mo_a_old =', tf - ti END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b_old, (mo_num, mo_num)] BEGIN_DOC + ! ! BETA part of the Fock matrix from three-electron terms ! ! WARNING :: non hermitian if bi-ortho MOS used + ! END_DOC implicit none @@ -491,17 +493,18 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] PROVIDE mo_l_coef mo_r_coef - !print *, ' PROVIDING fock_3e_uhf_mo_b ...' - !call wall_time(ti) + print *, ' PROVIDING fock_3e_uhf_mo_b_old ...' + call wall_time(ti) o = elec_beta_num + 1 call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs + PROVIDE fock_3e_uhf_mo_cs_old + fock_3e_uhf_mo_b_old = fock_3e_uhf_mo_cs_old !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_b) + !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_b_old) allocate(tmp(mo_num,mo_num)) tmp = 0.d0 @@ -575,7 +578,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] !$OMP CRITICAL do a = 1, mo_num do b = 1, mo_num - fock_3e_uhf_mo_b(b,a) += tmp(b,a) + fock_3e_uhf_mo_b_old(b,a) += tmp(b,a) enddo enddo !$OMP END CRITICAL @@ -583,8 +586,8 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] deallocate(tmp) !$OMP END PARALLEL - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_mo_b =', tf - ti + call wall_time(tf) + print *, ' total Wall time for fock_3e_uhf_mo_b_old =', tf - ti END_PROVIDER @@ -760,5 +763,421 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)] END_PROVIDER +! --- + + BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] +&BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! Open Shell part of the Fock matrix from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + integer :: a, b, i, j, ipoint, o + double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia + double precision :: loc_1, loc_2, loc_3, loc_4 + double precision :: ti, tf + double precision, allocatable :: tmp(:,:) + double precision, allocatable :: Okappa(:), Jkappa(:,:), Obarkappa(:), Jbarkappa(:,:) + double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) + double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) + + PROVIDE mo_l_coef mo_r_coef + PROVIDE fock_3e_uhf_mo_cs + + print *, ' Providing fock_3e_uhf_mo_a and fock_3e_uhf_mo_b ...' + call wall_time(ti) + + o = elec_beta_num + 1 + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) + + PROVIDE fock_3e_uhf_mo_cs + fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs + fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs + + ! --- + + allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) + allocate(Jbarkappa(n_points_final_grid,3), Obarkappa(n_points_final_grid)) + Jkappa = 0.d0 + Okappa = 0.d0 + Jbarkappa = 0.d0 + Obarkappa = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa, Obarkappa, Jbarkappa) + + allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) + + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Okappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + !$OMP DO + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jbarkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jbarkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jbarkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Obarkappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2, tmp_omp_d1) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_1(n_points_final_grid,5)) + + do ipoint = 1, n_points_final_grid + + loc_1 = -2.d0 * Okappa (ipoint) + loc_2 = -2.d0 * Obarkappa(ipoint) + + tmp_1(ipoint,1) = loc_1 * Jbarkappa(ipoint,1) + loc_2 * Jkappa(ipoint,1) + tmp_1(ipoint,2) = loc_1 * Jbarkappa(ipoint,2) + loc_2 * Jkappa(ipoint,2) + tmp_1(ipoint,3) = loc_1 * Jbarkappa(ipoint,3) + loc_2 * Jkappa(ipoint,3) + + tmp_1(ipoint,4) = Obarkappa(ipoint) + tmp_1(ipoint,5) = -loc_1 + enddo + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, loc_1, loc_2, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_1) + + allocate(tmp_omp_d2(n_points_final_grid,3)) + tmp_omp_d2 = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + loc_2 = mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,1,j,i) + tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,2,j,i) + tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_2(n_points_final_grid,5,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + + tmp_2(:,4,b,a) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,4,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + + tmp_2(:,5,b,a) = 0.d0 + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,5,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemv( 'T', 5*n_points_final_grid, mo_num*mo_num, 1.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 1.d0, fock_3e_uhf_mo_a(1,1), 1) + + deallocate(tmp_1, tmp_2) + + ! --- + + allocate(tmp_3(n_points_final_grid,8,mo_num), tmp_4(n_points_final_grid,8,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 + do ipoint = 1, n_points_final_grid + + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + + loc_1 = -2.d0 * mos_r_in_r_array_transp(ipoint,b) + tmp_4(ipoint,1,b) = loc_1 * ( Jbarkappa(ipoint,1) * Jkappa(ipoint,1) & + + Jbarkappa(ipoint,2) * Jkappa(ipoint,2) & + + Jbarkappa(ipoint,3) * Jkappa(ipoint,3) ) + + tmp_4(ipoint,8,b) = mos_r_in_r_array_transp(ipoint,b) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + tmp_3(ipoint,8,b) += loc_1 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + tmp_4(ipoint,1,b) += loc_2 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_3 = 2.d0 * loc_1 + loc_2 = mos_r_in_r_array_transp(ipoint,i) + loc_4 = 2.d0 * loc_2 + + tmp_3(ipoint,5,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,6,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,7,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + tmp_3(ipoint,8,b) += loc_3 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + tmp_4(ipoint,1,b) += loc_4 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_3 = mos_r_in_r_array_transp(ipoint,j) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,b,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,b,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,j,i) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + int2_grad1_u12_bimo_t(ipoint,2,j,i) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + int2_grad1_u12_bimo_t(ipoint,3,j,i) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 8*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 8*n_points_final_grid & + , tmp_4(1,1,1), 8*n_points_final_grid & + , 1.d0, fock_3e_uhf_mo_a(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + deallocate(Jkappa, Okappa) + + ! --- + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & + !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO + do a = 1, mo_num + do b = 1, mo_num + + do j = o, elec_alpha_num + do i = o, elec_alpha_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( I_bij_aij & + + I_bij_ija & + + I_bij_jai & + - I_bij_aji & + - I_bij_iaj & + - I_bij_jia ) + + enddo + enddo + + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do a = 1, mo_num + do b = 1, mo_num + fock_3e_uhf_mo_a(b,a) += tmp(b,a) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL + + call wall_time(tf) + print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti + +END_PROVIDER + ! --- diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f index 0ae515bb..fcca29ac 100644 --- a/src/tc_scf/fock_tc.irp.f +++ b/src/tc_scf/fock_tc.irp.f @@ -190,30 +190,14 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ] if(bi_ortho) then - !allocate(tmp(ao_num,ao_num)) - !tmp = Fock_matrix_tc_ao_alpha - !if(three_body_h_tc) then - ! tmp += fock_3e_uhf_ao_a - !endif - !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1)) - !deallocate(tmp) - PROVIDE mo_l_coef mo_r_coef - !call wall_time(tt0) call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) - !call wall_time(tt1) - !print*, ' 2-e term:', tt1-tt0 if(three_body_h_tc) then - !call wall_time(tt0) - !PROVIDE fock_a_tot_3e_bi_orth - !Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth PROVIDE fock_3e_uhf_mo_a Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a - !call wall_time(tt1) - !print*, ' 3-e term:', tt1-tt0 endif else @@ -241,19 +225,9 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ] if(bi_ortho) then - !allocate(tmp(ao_num,ao_num)) - !tmp = Fock_matrix_tc_ao_beta - !if(three_body_h_tc) then - ! tmp += fock_3e_uhf_ao_b - !endif - !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1)) - !deallocate(tmp) - call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) if(three_body_h_tc) then - !PROVIDE fock_b_tot_3e_bi_orth - !Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth PROVIDE fock_3e_uhf_mo_b Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b endif diff --git a/src/tc_scf/fock_three_bi_ortho.irp.f b/src/tc_scf/fock_three_bi_ortho.irp.f index 5d2f199c..8475c387 100644 --- a/src/tc_scf/fock_three_bi_ortho.irp.f +++ b/src/tc_scf/fock_three_bi_ortho.irp.f @@ -34,7 +34,7 @@ BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)] enddo !call wall_time(t1) - !print*, ' Wall time for fock_a_tot_3e_bi_orth =', t1-t0 + !print*, ' Wall time for fock_a_tot_3e_bi_orth =', t1 - t0 END_PROVIDER diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index d7780497..b925c9df 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -57,6 +57,7 @@ program test_ints ! call test_old_ints call test_fock_3e_uhf_mo_cs() + call test_fock_3e_uhf_mo_a() end @@ -1109,6 +1110,13 @@ subroutine test_fock_3e_uhf_mo_cs() double precision :: I_old, I_new double precision :: diff_tot, diff, thr_ih, norm +! double precision :: t0, t1 +! print*, ' Providing fock_a_tot_3e_bi_orth ...' +! call wall_time(t0) +! PROVIDE fock_a_tot_3e_bi_orth +! call wall_time(t1) +! print*, ' Wall time for fock_a_tot_3e_bi_orth =', t1 - t0 + PROVIDE fock_3e_uhf_mo_cs fock_3e_uhf_mo_cs_old thr_ih = 1d-10 @@ -1123,7 +1131,7 @@ subroutine test_fock_3e_uhf_mo_cs() diff = dabs(I_old - I_new) if(diff .gt. thr_ih) then - print *, ' problem on ', j, i + print *, ' problem in fock_3e_uhf_mo_cs on ', j, i print *, ' old value = ', I_old print *, ' new value = ', I_new stop @@ -1139,4 +1147,83 @@ subroutine test_fock_3e_uhf_mo_cs() return end subroutine test_fock_3e_uhf_mo_cs +! --- + +subroutine test_fock_3e_uhf_mo_a() + + implicit none + integer :: i, j + double precision :: I_old, I_new + double precision :: diff_tot, diff, thr_ih, norm + + PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_a_old + + thr_ih = 1d-10 + norm = 0.d0 + diff_tot = 0.d0 + + do i = 1, mo_num + do j = 1, mo_num + + I_old = fock_3e_uhf_mo_a_old(j,i) + I_new = fock_3e_uhf_mo_a (j,i) + + diff = dabs(I_old - I_new) + if(diff .gt. thr_ih) then + print *, ' problem in fock_3e_uhf_mo_a on ', j, i + print *, ' old value = ', I_old + print *, ' new value = ', I_new + stop + endif + + norm += dabs(I_old) + diff_tot += diff + enddo + enddo + + print *, ' diff tot (%) = ', 100.d0 * diff_tot / norm + + return +end subroutine test_fock_3e_uhf_mo_a + +! --- + +subroutine test_fock_3e_uhf_mo_b() + + implicit none + integer :: i, j + double precision :: I_old, I_new + double precision :: diff_tot, diff, thr_ih, norm + + PROVIDE fock_3e_uhf_mo_b fock_3e_uhf_mo_b_old + + thr_ih = 1d-10 + norm = 0.d0 + diff_tot = 0.d0 + + do i = 1, mo_num + do j = 1, mo_num + + I_old = fock_3e_uhf_mo_b_old(j,i) + I_new = fock_3e_uhf_mo_b (j,i) + + diff = dabs(I_old - I_new) + if(diff .gt. thr_ih) then + print *, ' problem in fock_3e_uhf_mo_b on ', j, i + print *, ' old value = ', I_old + print *, ' new value = ', I_new + stop + endif + + norm += dabs(I_old) + diff_tot += diff + enddo + enddo + + print *, ' diff tot (%) = ', 100.d0 * diff_tot / norm + + return +end subroutine test_fock_3e_uhf_mo_b + +! --- From 8f0f4d3135c0c709d92012800e941930eb9de852 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 1 Sep 2023 10:25:12 +0200 Subject: [PATCH 06/20] alpha part: full OS --- src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 148 +++++++++++++------------- 1 file changed, 75 insertions(+), 73 deletions(-) diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index 3a68ffc6..bb3025f3 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -777,11 +777,9 @@ END_PROVIDER END_DOC implicit none - integer :: a, b, i, j, ipoint, o - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia + integer :: a, b, i, j, ipoint double precision :: loc_1, loc_2, loc_3, loc_4 double precision :: ti, tf - double precision, allocatable :: tmp(:,:) double precision, allocatable :: Okappa(:), Jkappa(:,:), Obarkappa(:), Jbarkappa(:,:) double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) @@ -793,9 +791,6 @@ END_PROVIDER print *, ' Providing fock_3e_uhf_mo_a and fock_3e_uhf_mo_b ...' call wall_time(ti) - o = elec_beta_num + 1 - call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - PROVIDE fock_3e_uhf_mo_cs fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs @@ -871,13 +866,14 @@ END_PROVIDER loc_1 = -2.d0 * Okappa (ipoint) loc_2 = -2.d0 * Obarkappa(ipoint) + loc_3 = Obarkappa(ipoint) - tmp_1(ipoint,1) = loc_1 * Jbarkappa(ipoint,1) + loc_2 * Jkappa(ipoint,1) - tmp_1(ipoint,2) = loc_1 * Jbarkappa(ipoint,2) + loc_2 * Jkappa(ipoint,2) - tmp_1(ipoint,3) = loc_1 * Jbarkappa(ipoint,3) + loc_2 * Jkappa(ipoint,3) + tmp_1(ipoint,1) = (loc_1 - loc_3) * Jbarkappa(ipoint,1) + loc_2 * Jkappa(ipoint,1) + tmp_1(ipoint,2) = (loc_1 - loc_3) * Jbarkappa(ipoint,2) + loc_2 * Jkappa(ipoint,2) + tmp_1(ipoint,3) = (loc_1 - loc_3) * Jbarkappa(ipoint,3) + loc_2 * Jkappa(ipoint,3) tmp_1(ipoint,4) = Obarkappa(ipoint) - tmp_1(ipoint,5) = -loc_1 + tmp_1(ipoint,5) = loc_3 - loc_1 enddo @@ -889,8 +885,8 @@ END_PROVIDER !$OMP int2_grad1_u12_bimo_t, tmp_1) allocate(tmp_omp_d2(n_points_final_grid,3)) - tmp_omp_d2 = 0.d0 + tmp_omp_d2 = 0.d0 !$OMP DO COLLAPSE(2) do i = 1, elec_beta_num do j = elec_beta_num+1, elec_alpha_num @@ -906,12 +902,34 @@ END_PROVIDER enddo enddo !$OMP END DO NOWAIT - !$OMP CRITICAL do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) - tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) - tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + tmp_omp_d2 = 0.d0 + !$OMP DO COLLAPSE(2) + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) enddo !$OMP END CRITICAL @@ -1001,10 +1019,11 @@ END_PROVIDER tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - loc_1 = -2.d0 * mos_r_in_r_array_transp(ipoint,b) - tmp_4(ipoint,1,b) = loc_1 * ( Jbarkappa(ipoint,1) * Jkappa(ipoint,1) & - + Jbarkappa(ipoint,2) * Jkappa(ipoint,2) & - + Jbarkappa(ipoint,3) * Jkappa(ipoint,3) ) + loc_1 = -2.0d0 * mos_r_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) = loc_1 * ( Jbarkappa(ipoint,1) * (Jkappa(ipoint,1) + 0.25d0 * Jbarkappa(ipoint,1)) & + + Jbarkappa(ipoint,2) * (Jkappa(ipoint,2) + 0.25d0 * Jbarkappa(ipoint,2)) & + + Jbarkappa(ipoint,3) * (Jkappa(ipoint,3) + 0.25d0 * Jbarkappa(ipoint,3)) ) tmp_4(ipoint,8,b) = mos_r_in_r_array_transp(ipoint,b) enddo @@ -1055,16 +1074,22 @@ END_PROVIDER tmp_3(ipoint,5,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) tmp_3(ipoint,6,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) tmp_3(ipoint,7,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - tmp_3(ipoint,8,b) += loc_3 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_3(ipoint,8,b) += loc_3 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + tmp_4(ipoint,1,b) += loc_4 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - tmp_4(ipoint,1,b) += loc_4 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + + tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) enddo enddo enddo @@ -1080,6 +1105,7 @@ END_PROVIDER !$OMP tmp_3, tmp_4) !$OMP DO do b = 1, mo_num + do i = 1, elec_beta_num do j = elec_beta_num+1, elec_alpha_num do ipoint = 1, n_points_final_grid @@ -1095,6 +1121,7 @@ END_PROVIDER tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) @@ -1112,6 +1139,29 @@ END_PROVIDER enddo enddo enddo + + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + enddo + enddo + enddo enddo !$OMP END DO !$OMP END PARALLEL @@ -1126,54 +1176,6 @@ END_PROVIDER deallocate(tmp_3, tmp_4) deallocate(Jkappa, Okappa) - ! --- - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a) - - allocate(tmp(mo_num,mo_num)) - tmp = 0.d0 - - !$OMP DO - do a = 1, mo_num - do b = 1, mo_num - - do j = o, elec_alpha_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - I_bij_iaj & - - I_bij_jia ) - - enddo - enddo - - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - fock_3e_uhf_mo_a(b,a) += tmp(b,a) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp) - !$OMP END PARALLEL - call wall_time(tf) print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti From 7d04a650b7f417ef6deeec34f005029e61eb7bd0 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 2 Sep 2023 16:47:24 +0200 Subject: [PATCH 07/20] CS & OS of alpha & beta --- src/tc_scf/fock_3e_bi_ortho_cs.irp.f | 280 +++++ src/tc_scf/fock_3e_bi_ortho_os.irp.f | 536 ++++++++++ src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 1168 +-------------------- src/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f | 490 +++++++++ src/tc_scf/test_int.irp.f | 1 + 5 files changed, 1320 insertions(+), 1155 deletions(-) create mode 100644 src/tc_scf/fock_3e_bi_ortho_cs.irp.f create mode 100644 src/tc_scf/fock_3e_bi_ortho_os.irp.f create mode 100644 src/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f diff --git a/src/tc_scf/fock_3e_bi_ortho_cs.irp.f b/src/tc_scf/fock_3e_bi_ortho_cs.irp.f new file mode 100644 index 00000000..0b883865 --- /dev/null +++ b/src/tc_scf/fock_3e_bi_ortho_cs.irp.f @@ -0,0 +1,280 @@ + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] + + implicit none + integer :: a, b, i, j, ipoint + double precision :: ti, tf + double precision :: loc_1, loc_2, loc_3 + double precision, allocatable :: Okappa(:), Jkappa(:,:) + double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) + double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) + + PROVIDE mo_l_coef mo_r_coef + + !print *, ' PROVIDING fock_3e_uhf_mo_cs ...' + !call wall_time(ti) + + ! --- + + allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) + Jkappa = 0.d0 + Okappa = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa) + + allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Okappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2, tmp_omp_d1) + + !$OMP END PARALLEL + + ! --- + + allocate(tmp_1(n_points_final_grid,4)) + + do ipoint = 1, n_points_final_grid + + loc_1 = 2.d0 * Okappa(ipoint) + + tmp_1(ipoint,1) = loc_1 * Jkappa(ipoint,1) + tmp_1(ipoint,2) = loc_1 * Jkappa(ipoint,2) + tmp_1(ipoint,3) = loc_1 * Jkappa(ipoint,3) + + tmp_1(ipoint,4) = Okappa(ipoint) + enddo + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, loc_1, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_1) + + allocate(tmp_omp_d2(n_points_final_grid,3)) + tmp_omp_d2 = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmp_omp_d2(ipoint,1) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + tmp_omp_d2(ipoint,2) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + tmp_omp_d2(ipoint,3) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + tmp_2(:,4,b,a) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_uhf_mo_cs(1,1), 1) + + deallocate(tmp_1, tmp_2) + + ! --- + + allocate(tmp_3(n_points_final_grid,5,mo_num), tmp_4(n_points_final_grid,5,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, Jkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 + do ipoint = 1, n_points_final_grid + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) = -2.d0 * mos_r_in_r_array_transp(ipoint,b) * ( Jkappa(ipoint,1) * Jkappa(ipoint,1) & + + Jkappa(ipoint,2) * Jkappa(ipoint,2) & + + Jkappa(ipoint,3) * Jkappa(ipoint,3) ) + tmp_4(ipoint,5,b) = mos_r_in_r_array_transp(ipoint,b) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP Jkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + tmp_3(ipoint,5,b) += 2.d0 * loc_1 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + tmp_4(ipoint,1,b) += 2.d0 * loc_2 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,5,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) += ( loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) & + - loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 5*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 5*n_points_final_grid & + , tmp_4(1,1,1), 5*n_points_final_grid & + , 1.d0, fock_3e_uhf_mo_cs(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + deallocate(Jkappa, Okappa) + + ! --- + + !call wall_time(tf) + !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti + +END_PROVIDER + +! --- + diff --git a/src/tc_scf/fock_3e_bi_ortho_os.irp.f b/src/tc_scf/fock_3e_bi_ortho_os.irp.f new file mode 100644 index 00000000..4bbce720 --- /dev/null +++ b/src/tc_scf/fock_3e_bi_ortho_os.irp.f @@ -0,0 +1,536 @@ + +! --- + + BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a_os, (mo_num, mo_num)] +&BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b_os, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! Open Shell part of the Fock matrix from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + integer :: a, b, i, j, ipoint + double precision :: loc_1, loc_2, loc_3, loc_4 + double precision :: ti, tf + double precision, allocatable :: Okappa(:), Jkappa(:,:), Obarkappa(:), Jbarkappa(:,:) + double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) + double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) + + PROVIDE mo_l_coef mo_r_coef + + !print *, ' Providing fock_3e_uhf_mo_a_os and fock_3e_uhf_mo_b_os ...' + !call wall_time(ti) + + ! --- + + allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) + allocate(Jbarkappa(n_points_final_grid,3), Obarkappa(n_points_final_grid)) + Jkappa = 0.d0 + Okappa = 0.d0 + Jbarkappa = 0.d0 + Obarkappa = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa, Obarkappa, Jbarkappa) + + allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) + + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Okappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + !$OMP DO + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jbarkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jbarkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jbarkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Obarkappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2, tmp_omp_d1) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_1(n_points_final_grid,4)) + + do ipoint = 1, n_points_final_grid + + loc_1 = -2.d0 * Okappa (ipoint) + loc_2 = -2.d0 * Obarkappa(ipoint) + loc_3 = Obarkappa(ipoint) + + tmp_1(ipoint,1) = (loc_1 - loc_3) * Jbarkappa(ipoint,1) + loc_2 * Jkappa(ipoint,1) + tmp_1(ipoint,2) = (loc_1 - loc_3) * Jbarkappa(ipoint,2) + loc_2 * Jkappa(ipoint,2) + tmp_1(ipoint,3) = (loc_1 - loc_3) * Jbarkappa(ipoint,3) + loc_2 * Jkappa(ipoint,3) + + tmp_1(ipoint,4) = Obarkappa(ipoint) + enddo + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, loc_1, loc_2, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_1) + + allocate(tmp_omp_d2(n_points_final_grid,3)) + + tmp_omp_d2 = 0.d0 + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + loc_2 = mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,1,j,i) + tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,2,j,i) + tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + tmp_omp_d2 = 0.d0 + !$OMP DO COLLAPSE(2) + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + + tmp_2(:,4,b,a) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,4,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, 1.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_uhf_mo_b_os(1,1), 1) + + deallocate(tmp_1, tmp_2) + + ! --- + + allocate(tmp_3(n_points_final_grid,2,mo_num), tmp_4(n_points_final_grid,2,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 + do ipoint = 1, n_points_final_grid + + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + + loc_1 = -2.0d0 * mos_r_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) = loc_1 * ( Jbarkappa(ipoint,1) * (Jkappa(ipoint,1) + 0.25d0 * Jbarkappa(ipoint,1)) & + + Jbarkappa(ipoint,2) * (Jkappa(ipoint,2) + 0.25d0 * Jbarkappa(ipoint,2)) & + + Jbarkappa(ipoint,3) * (Jkappa(ipoint,3) + 0.25d0 * Jbarkappa(ipoint,3)) ) + + tmp_4(ipoint,2,b) = mos_r_in_r_array_transp(ipoint,b) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,2,b) += loc_1 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,1,b) += loc_2 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_2 = mos_r_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 2*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 2*n_points_final_grid & + , tmp_4(1,1,1), 2*n_points_final_grid & + , 1.d0, fock_3e_uhf_mo_b_os(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + + + + + ! --- + + fock_3e_uhf_mo_a_os = fock_3e_uhf_mo_b_os + + allocate(tmp_1(n_points_final_grid,1)) + + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) = Obarkappa(ipoint) + 2.d0 * Okappa(ipoint) + enddo + + allocate(tmp_2(n_points_final_grid,1,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + + tmp_2(:,1,b,a) = 0.d0 + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemv( 'T', n_points_final_grid, mo_num*mo_num, 1.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 1.d0, fock_3e_uhf_mo_a_os(1,1), 1) + + deallocate(tmp_1, tmp_2) + + ! --- + + allocate(tmp_3(n_points_final_grid,8,mo_num), tmp_4(n_points_final_grid,8,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 + do ipoint = 1, n_points_final_grid + + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,8,b) = mos_r_in_r_array_transp(ipoint,b) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + + tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_3 = 2.d0 * loc_1 + loc_2 = mos_r_in_r_array_transp(ipoint,i) + loc_4 = 2.d0 * loc_2 + + tmp_3(ipoint,5,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,6,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,7,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + + tmp_3(ipoint,8,b) += loc_3 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,1,b) += loc_4 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + + tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + + tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_3 = mos_r_in_r_array_transp(ipoint,j) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,b,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,b,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,j,i) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + int2_grad1_u12_bimo_t(ipoint,2,j,i) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + int2_grad1_u12_bimo_t(ipoint,3,j,i) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 8*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 8*n_points_final_grid & + , tmp_4(1,1,1), 8*n_points_final_grid & + , 1.d0, fock_3e_uhf_mo_a_os(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + deallocate(Jkappa, Okappa) + + !call wall_time(tf) + !print *, ' Wall time for fock_3e_uhf_mo_a_os and fock_3e_uhf_mo_b_os =', tf - ti + +END_PROVIDER + +! --- + diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index bb3025f3..baa73c8f 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -1,768 +1,4 @@ -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] - - implicit none - integer :: a, b, i, j, ipoint - double precision :: ti, tf - double precision :: loc_1, loc_2, loc_3 - double precision, allocatable :: Okappa(:), Jkappa(:,:) - double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) - double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) - double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) - - PROVIDE mo_l_coef mo_r_coef - - print *, ' PROVIDING fock_3e_uhf_mo_cs ...' - call wall_time(ti) - - ! --- - - allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) - Jkappa = 0.d0 - Okappa = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa) - - allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) - tmp_omp_d2 = 0.d0 - tmp_omp_d1 = 0.d0 - - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) - tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) - Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) - Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) - Okappa(ipoint) += tmp_omp_d1(ipoint) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2, tmp_omp_d1) - - !$OMP END PARALLEL - - ! --- - - allocate(tmp_1(n_points_final_grid,4)) - - do ipoint = 1, n_points_final_grid - - loc_1 = 2.d0 * Okappa(ipoint) - - tmp_1(ipoint,1) = loc_1 * Jkappa(ipoint,1) - tmp_1(ipoint,2) = loc_1 * Jkappa(ipoint,2) - tmp_1(ipoint,3) = loc_1 * Jkappa(ipoint,3) - - tmp_1(ipoint,4) = Okappa(ipoint) - enddo - - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, loc_1, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_1) - - allocate(tmp_omp_d2(n_points_final_grid,3)) - tmp_omp_d2 = 0.d0 - - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - - tmp_omp_d2(ipoint,1) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) - tmp_omp_d2(ipoint,2) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) - tmp_omp_d2(ipoint,3) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) - tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) - tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2) - !$OMP END PARALLEL - - ! --- - - allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) - tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) - tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b, i) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - tmp_2(:,4,b,a) = 0.d0 - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 & - , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & - , tmp_1(1,1), 1 & - , 0.d0, fock_3e_uhf_mo_cs(1,1), 1) - - deallocate(tmp_1, tmp_2) - - ! --- - - allocate(tmp_3(n_points_final_grid,5,mo_num), tmp_4(n_points_final_grid,5,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, Jkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - tmp_3(:,:,b) = 0.d0 - tmp_4(:,:,b) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - - tmp_4(ipoint,1,b) = -2.d0 * mos_r_in_r_array_transp(ipoint,b) * ( Jkappa(ipoint,1) * Jkappa(ipoint,1) & - + Jkappa(ipoint,2) * Jkappa(ipoint,2) & - + Jkappa(ipoint,3) * Jkappa(ipoint,3) ) - tmp_4(ipoint,5,b) = mos_r_in_r_array_transp(ipoint,b) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP Jkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_2 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) - tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) - tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - tmp_3(ipoint,5,b) += 2.d0 * loc_1 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) - - tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - tmp_4(ipoint,1,b) += 2.d0 * loc_2 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) - loc_2 = mos_r_in_r_array_transp(ipoint,b) - loc_3 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,5,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) - - tmp_4(ipoint,1,b) += ( loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) & - - loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemm( 'T', 'N', mo_num, mo_num, 5*n_points_final_grid, 1.d0 & - , tmp_3(1,1,1), 5*n_points_final_grid & - , tmp_4(1,1,1), 5*n_points_final_grid & - , 1.d0, fock_3e_uhf_mo_cs(1,1), mo_num) - - deallocate(tmp_3, tmp_4) - deallocate(Jkappa, Okappa) - - ! --- - - call wall_time(tf) - print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs_old, (mo_num, mo_num)] - - implicit none - integer :: a, b, i, j - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia - double precision :: ti, tf - double precision, allocatable :: tmp(:,:) - - PROVIDE mo_l_coef mo_r_coef - call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - - print *, ' PROVIDING fock_3e_uhf_mo_cs_old ...' - call wall_time(ti) - - fock_3e_uhf_mo_cs_old = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, elec_beta_num, fock_3e_uhf_mo_cs_old) - - allocate(tmp(mo_num,mo_num)) - tmp = 0.d0 - - !$OMP DO - do a = 1, mo_num - do b = 1, mo_num - - do j = 1, elec_beta_num - do i = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - 2.d0 * I_bij_aji & - - 2.d0 * I_bij_iaj & - - 2.d0 * I_bij_jia ) - - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - fock_3e_uhf_mo_cs_old(b,a) += tmp(b,a) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp) - !$OMP END PARALLEL - - call wall_time(tf) - print *, ' total Wall time for fock_3e_uhf_mo_cs_old =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a_old, (mo_num, mo_num)] - - BEGIN_DOC - ! - ! ALPHA part of the Fock matrix from three-electron terms - ! - ! WARNING :: non hermitian if bi-ortho MOS used - ! - END_DOC - - implicit none - integer :: a, b, i, j, o - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia - double precision :: ti, tf - double precision, allocatable :: tmp(:,:) - - PROVIDE mo_l_coef mo_r_coef - PROVIDE fock_3e_uhf_mo_cs - - print *, ' Providing fock_3e_uhf_mo_a_old ...' - call wall_time(ti) - - o = elec_beta_num + 1 - call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - - PROVIDE fock_3e_uhf_mo_cs - fock_3e_uhf_mo_a_old = fock_3e_uhf_mo_cs - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a_old) - - allocate(tmp(mo_num,mo_num)) - tmp = 0.d0 - - !$OMP DO - do a = 1, mo_num - do b = 1, mo_num - - ! --- - - do j = o, elec_alpha_num - do i = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - I_bij_iaj & - - 2.d0 * I_bij_jia ) - - enddo - enddo - - ! --- - - do j = 1, elec_beta_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - 2.d0 * I_bij_iaj & - - I_bij_jia ) - - enddo - enddo - - ! --- - - do j = o, elec_alpha_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - I_bij_iaj & - - I_bij_jia ) - - enddo - enddo - - ! --- - - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - fock_3e_uhf_mo_a_old(b,a) += tmp(b,a) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp) - !$OMP END PARALLEL - - call wall_time(tf) - print *, ' Wall time for fock_3e_uhf_mo_a_old =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b_old, (mo_num, mo_num)] - - BEGIN_DOC - ! - ! BETA part of the Fock matrix from three-electron terms - ! - ! WARNING :: non hermitian if bi-ortho MOS used - ! - END_DOC - - implicit none - integer :: a, b, i, j, o - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia - double precision :: ti, tf - double precision, allocatable :: tmp(:,:) - - PROVIDE mo_l_coef mo_r_coef - - print *, ' PROVIDING fock_3e_uhf_mo_b_old ...' - call wall_time(ti) - - o = elec_beta_num + 1 - call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - - PROVIDE fock_3e_uhf_mo_cs_old - fock_3e_uhf_mo_b_old = fock_3e_uhf_mo_cs_old - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_b_old) - - allocate(tmp(mo_num,mo_num)) - tmp = 0.d0 - - !$OMP DO - do a = 1, mo_num - do b = 1, mo_num - - ! --- - - do j = o, elec_alpha_num - do i = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - - I_bij_aji & - - I_bij_iaj ) - - enddo - enddo - - ! --- - - do j = 1, elec_beta_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - - I_bij_aji & - - I_bij_jia ) - - enddo - enddo - - ! --- - - do j = o, elec_alpha_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( I_bij_aij & - - I_bij_aji ) - - enddo - enddo - - ! --- - - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - fock_3e_uhf_mo_b_old(b,a) += tmp(b,a) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp) - !$OMP END PARALLEL - - call wall_time(tf) - print *, ' total Wall time for fock_3e_uhf_mo_b_old =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)] - - BEGIN_DOC - ! - ! Equations (B6) and (B7) - ! - ! g <--> gamma - ! d <--> delta - ! e <--> eta - ! k <--> kappa - ! - END_DOC - - implicit none - integer :: g, d, e, k, mu, nu - double precision :: dm_ge_a, dm_ge_b, dm_ge - double precision :: dm_dk_a, dm_dk_b, dm_dk - double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu - double precision :: ti, tf - double precision, allocatable :: f_tmp(:,:) - - print *, ' PROVIDING fock_3e_uhf_ao_a ...' - call wall_time(ti) - - fock_3e_uhf_ao_a = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & - !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & - !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a) - - allocate(f_tmp(ao_num,ao_num)) - f_tmp = 0.d0 - - !$OMP DO - do g = 1, ao_num - do e = 1, ao_num - dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) - dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e) - dm_ge = dm_ge_a + dm_ge_b - do d = 1, ao_num - do k = 1, ao_num - dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k) - dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k) - dm_dk = dm_dk_a + dm_dk_b - do mu = 1, ao_num - do nu = 1, ao_num - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue) - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu) - f_tmp(mu,nu) -= 0.5d0 * ( dm_ge * dm_dk * i_mugd_nuek & - + dm_ge_a * dm_dk_a * i_mugd_eknu & - + dm_ge_a * dm_dk_a * i_mugd_knue & - - dm_ge_a * dm_dk * i_mugd_enuk & - - dm_ge * dm_dk_a * i_mugd_kenu & - - dm_ge_a * dm_dk_a * i_mugd_nuke & - - dm_ge_b * dm_dk_b * i_mugd_nuke ) - enddo - enddo - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do mu = 1, ao_num - do nu = 1, ao_num - fock_3e_uhf_ao_a(mu,nu) += f_tmp(mu,nu) - enddo - enddo - !$OMP END CRITICAL - - deallocate(f_tmp) - !$OMP END PARALLEL - - call wall_time(tf) - print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)] - - BEGIN_DOC - ! - ! Equations (B6) and (B7) - ! - ! g <--> gamma - ! d <--> delta - ! e <--> eta - ! k <--> kappa - ! - END_DOC - - implicit none - integer :: g, d, e, k, mu, nu - double precision :: dm_ge_a, dm_ge_b, dm_ge - double precision :: dm_dk_a, dm_dk_b, dm_dk - double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu - double precision :: ti, tf - double precision, allocatable :: f_tmp(:,:) - - print *, ' PROVIDING fock_3e_uhf_ao_b ...' - call wall_time(ti) - - fock_3e_uhf_ao_b = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & - !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & - !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b) - - allocate(f_tmp(ao_num,ao_num)) - f_tmp = 0.d0 - - !$OMP DO - do g = 1, ao_num - do e = 1, ao_num - dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) - dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e) - dm_ge = dm_ge_a + dm_ge_b - do d = 1, ao_num - do k = 1, ao_num - dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k) - dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k) - dm_dk = dm_dk_a + dm_dk_b - do mu = 1, ao_num - do nu = 1, ao_num - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue) - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu) - f_tmp(mu,nu) -= 0.5d0 * ( dm_ge * dm_dk * i_mugd_nuek & - + dm_ge_b * dm_dk_b * i_mugd_eknu & - + dm_ge_b * dm_dk_b * i_mugd_knue & - - dm_ge_b * dm_dk * i_mugd_enuk & - - dm_ge * dm_dk_b * i_mugd_kenu & - - dm_ge_b * dm_dk_b * i_mugd_nuke & - - dm_ge_a * dm_dk_a * i_mugd_nuke ) - enddo - enddo - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do mu = 1, ao_num - do nu = 1, ao_num - fock_3e_uhf_ao_b(mu,nu) += f_tmp(mu,nu) - enddo - enddo - !$OMP END CRITICAL - - deallocate(f_tmp) - !$OMP END PARALLEL - - call wall_time(tf) - print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti - -END_PROVIDER - ! --- BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] @@ -770,414 +6,36 @@ END_PROVIDER BEGIN_DOC ! - ! Open Shell part of the Fock matrix from three-electron terms + ! Fock matrix from three-electron terms ! ! WARNING :: non hermitian if bi-ortho MOS used ! END_DOC implicit none - integer :: a, b, i, j, ipoint - double precision :: loc_1, loc_2, loc_3, loc_4 - double precision :: ti, tf - double precision, allocatable :: Okappa(:), Jkappa(:,:), Obarkappa(:), Jbarkappa(:,:) - double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) - double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) - double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) + double precision :: ti, tf PROVIDE mo_l_coef mo_r_coef - PROVIDE fock_3e_uhf_mo_cs - print *, ' Providing fock_3e_uhf_mo_a and fock_3e_uhf_mo_b ...' - call wall_time(ti) + !print *, ' Providing fock_3e_uhf_mo_a and fock_3e_uhf_mo_b ...' + !call wall_time(ti) + ! CLOSED-SHELL PART PROVIDE fock_3e_uhf_mo_cs fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs - ! --- + if(elec_alpha_num .ne. elec_beta_num) then - allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) - allocate(Jbarkappa(n_points_final_grid,3), Obarkappa(n_points_final_grid)) - Jkappa = 0.d0 - Okappa = 0.d0 - Jbarkappa = 0.d0 - Obarkappa = 0.d0 + ! OPEN-SHELL PART + PROVIDE fock_3e_uhf_mo_a_os fock_3e_uhf_mo_b_os - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa, Obarkappa, Jbarkappa) + fock_3e_uhf_mo_a += fock_3e_uhf_mo_a_os + fock_3e_uhf_mo_b += fock_3e_uhf_mo_b_os + endif - allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) - - tmp_omp_d2 = 0.d0 - tmp_omp_d1 = 0.d0 - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) - tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) - Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) - Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) - Okappa(ipoint) += tmp_omp_d1(ipoint) - enddo - !$OMP END CRITICAL - - tmp_omp_d2 = 0.d0 - tmp_omp_d1 = 0.d0 - !$OMP DO - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) - tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - Jbarkappa(ipoint,1) += tmp_omp_d2(ipoint,1) - Jbarkappa(ipoint,2) += tmp_omp_d2(ipoint,2) - Jbarkappa(ipoint,3) += tmp_omp_d2(ipoint,3) - Obarkappa(ipoint) += tmp_omp_d1(ipoint) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2, tmp_omp_d1) - !$OMP END PARALLEL - - ! --- - - allocate(tmp_1(n_points_final_grid,5)) - - do ipoint = 1, n_points_final_grid - - loc_1 = -2.d0 * Okappa (ipoint) - loc_2 = -2.d0 * Obarkappa(ipoint) - loc_3 = Obarkappa(ipoint) - - tmp_1(ipoint,1) = (loc_1 - loc_3) * Jbarkappa(ipoint,1) + loc_2 * Jkappa(ipoint,1) - tmp_1(ipoint,2) = (loc_1 - loc_3) * Jbarkappa(ipoint,2) + loc_2 * Jkappa(ipoint,2) - tmp_1(ipoint,3) = (loc_1 - loc_3) * Jbarkappa(ipoint,3) + loc_2 * Jkappa(ipoint,3) - - tmp_1(ipoint,4) = Obarkappa(ipoint) - tmp_1(ipoint,5) = loc_3 - loc_1 - enddo - - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, loc_1, loc_2, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_1) - - allocate(tmp_omp_d2(n_points_final_grid,3)) - - tmp_omp_d2 = 0.d0 - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - loc_2 = mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,1,j,i) - tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,2,j,i) - tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) - tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) - tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) - enddo - !$OMP END CRITICAL - - tmp_omp_d2 = 0.d0 - !$OMP DO COLLAPSE(2) - do i = elec_beta_num+1, elec_alpha_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - - tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) - tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) - tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) - enddo - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) - tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) - tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2) - !$OMP END PARALLEL - - ! --- - - allocate(tmp_2(n_points_final_grid,5,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) - tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) - tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b, i) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - - tmp_2(:,4,b,a) = 0.d0 - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,4,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) - enddo - enddo - - tmp_2(:,5,b,a) = 0.d0 - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,5,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemv( 'T', 5*n_points_final_grid, mo_num*mo_num, 1.d0 & - , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & - , tmp_1(1,1), 1 & - , 1.d0, fock_3e_uhf_mo_a(1,1), 1) - - deallocate(tmp_1, tmp_2) - - ! --- - - allocate(tmp_3(n_points_final_grid,8,mo_num), tmp_4(n_points_final_grid,8,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - tmp_3(:,:,b) = 0.d0 - tmp_4(:,:,b) = 0.d0 - do ipoint = 1, n_points_final_grid - - tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - - loc_1 = -2.0d0 * mos_r_in_r_array_transp(ipoint,b) - - tmp_4(ipoint,1,b) = loc_1 * ( Jbarkappa(ipoint,1) * (Jkappa(ipoint,1) + 0.25d0 * Jbarkappa(ipoint,1)) & - + Jbarkappa(ipoint,2) * (Jkappa(ipoint,2) + 0.25d0 * Jbarkappa(ipoint,2)) & - + Jbarkappa(ipoint,3) * (Jkappa(ipoint,3) + 0.25d0 * Jbarkappa(ipoint,3)) ) - - tmp_4(ipoint,8,b) = mos_r_in_r_array_transp(ipoint,b) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_2 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) - tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) - tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - tmp_3(ipoint,8,b) += loc_1 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) - - tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - tmp_4(ipoint,1,b) += loc_2 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - enddo - enddo - - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_3 = 2.d0 * loc_1 - loc_2 = mos_r_in_r_array_transp(ipoint,i) - loc_4 = 2.d0 * loc_2 - - tmp_3(ipoint,5,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) - tmp_3(ipoint,6,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) - tmp_3(ipoint,7,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - - tmp_3(ipoint,8,b) += loc_3 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) - - tmp_4(ipoint,1,b) += loc_4 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - - tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - - tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - - do i = 1, elec_beta_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) - loc_2 = mos_r_in_r_array_transp(ipoint,b) - loc_3 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) - - tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) - - tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_3 = mos_r_in_r_array_transp(ipoint,j) - - tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,b,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,b,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) - - tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,j,i) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + int2_grad1_u12_bimo_t(ipoint,2,j,i) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + int2_grad1_u12_bimo_t(ipoint,3,j,i) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - enddo - enddo - enddo - - do i = elec_beta_num+1, elec_alpha_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) - loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) - loc_3 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) - - tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) - - tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemm( 'T', 'N', mo_num, mo_num, 8*n_points_final_grid, 1.d0 & - , tmp_3(1,1,1), 8*n_points_final_grid & - , tmp_4(1,1,1), 8*n_points_final_grid & - , 1.d0, fock_3e_uhf_mo_a(1,1), mo_num) - - deallocate(tmp_3, tmp_4) - deallocate(Jkappa, Okappa) - - call wall_time(tf) - print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti + !call wall_time(tf) + !print *, ' Wall time for fock_3e_uhf_mo_a and fock_3e_uhf_mo_b =', tf - ti END_PROVIDER diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f new file mode 100644 index 00000000..3bf6bd85 --- /dev/null +++ b/src/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f @@ -0,0 +1,490 @@ + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs_old, (mo_num, mo_num)] + + implicit none + integer :: a, b, i, j + double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia + double precision :: ti, tf + double precision, allocatable :: tmp(:,:) + + PROVIDE mo_l_coef mo_r_coef + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) + + !print *, ' PROVIDING fock_3e_uhf_mo_cs_old ...' + !call wall_time(ti) + + fock_3e_uhf_mo_cs_old = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & + !$OMP SHARED (mo_num, elec_beta_num, fock_3e_uhf_mo_cs_old) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO + do a = 1, mo_num + do b = 1, mo_num + + do j = 1, elec_beta_num + do i = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij & + + I_bij_ija & + + I_bij_jai & + - 2.d0 * I_bij_aji & + - 2.d0 * I_bij_iaj & + - 2.d0 * I_bij_jia ) + + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do a = 1, mo_num + do b = 1, mo_num + fock_3e_uhf_mo_cs_old(b,a) += tmp(b,a) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL + + !call wall_time(tf) + !print *, ' total Wall time for fock_3e_uhf_mo_cs_old =', tf - ti + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a_old, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! ALPHA part of the Fock matrix from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + integer :: a, b, i, j, o + double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia + double precision :: ti, tf + double precision, allocatable :: tmp(:,:) + + PROVIDE mo_l_coef mo_r_coef + PROVIDE fock_3e_uhf_mo_cs + + !print *, ' Providing fock_3e_uhf_mo_a_old ...' + !call wall_time(ti) + + o = elec_beta_num + 1 + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) + + PROVIDE fock_3e_uhf_mo_cs_old + fock_3e_uhf_mo_a_old = fock_3e_uhf_mo_cs_old + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & + !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a_old) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO + do a = 1, mo_num + do b = 1, mo_num + + ! --- + + do j = o, elec_alpha_num + do i = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & + + I_bij_ija & + + I_bij_jai & + - I_bij_aji & + - I_bij_iaj & + - 2.d0 * I_bij_jia ) + + enddo + enddo + + ! --- + + do j = 1, elec_beta_num + do i = o, elec_alpha_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & + + I_bij_ija & + + I_bij_jai & + - I_bij_aji & + - 2.d0 * I_bij_iaj & + - I_bij_jia ) + + enddo + enddo + + ! --- + + do j = o, elec_alpha_num + do i = o, elec_alpha_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( I_bij_aij & + + I_bij_ija & + + I_bij_jai & + - I_bij_aji & + - I_bij_iaj & + - I_bij_jia ) + + enddo + enddo + + ! --- + + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do a = 1, mo_num + do b = 1, mo_num + fock_3e_uhf_mo_a_old(b,a) += tmp(b,a) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL + + !call wall_time(tf) + !print *, ' Wall time for fock_3e_uhf_mo_a_old =', tf - ti + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b_old, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! BETA part of the Fock matrix from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + integer :: a, b, i, j, o + double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia + double precision :: ti, tf + double precision, allocatable :: tmp(:,:) + + PROVIDE mo_l_coef mo_r_coef + + !print *, ' PROVIDING fock_3e_uhf_mo_b_old ...' + !call wall_time(ti) + + o = elec_beta_num + 1 + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) + + PROVIDE fock_3e_uhf_mo_cs_old + fock_3e_uhf_mo_b_old = fock_3e_uhf_mo_cs_old + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & + !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_b_old) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO + do a = 1, mo_num + do b = 1, mo_num + + ! --- + + do j = o, elec_alpha_num + do i = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & + - I_bij_aji & + - I_bij_iaj ) + + enddo + enddo + + ! --- + + do j = 1, elec_beta_num + do i = o, elec_alpha_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & + - I_bij_aji & + - I_bij_jia ) + + enddo + enddo + + ! --- + + do j = o, elec_alpha_num + do i = o, elec_alpha_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( I_bij_aij & + - I_bij_aji ) + + enddo + enddo + + ! --- + + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do a = 1, mo_num + do b = 1, mo_num + fock_3e_uhf_mo_b_old(b,a) += tmp(b,a) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL + + !call wall_time(tf) + !print *, ' total Wall time for fock_3e_uhf_mo_b_old =', tf - ti + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)] + + BEGIN_DOC + ! + ! Equations (B6) and (B7) + ! + ! g <--> gamma + ! d <--> delta + ! e <--> eta + ! k <--> kappa + ! + END_DOC + + implicit none + integer :: g, d, e, k, mu, nu + double precision :: dm_ge_a, dm_ge_b, dm_ge + double precision :: dm_dk_a, dm_dk_b, dm_dk + double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu + double precision :: ti, tf + double precision, allocatable :: f_tmp(:,:) + + !print *, ' PROVIDING fock_3e_uhf_ao_a ...' + !call wall_time(ti) + + fock_3e_uhf_ao_a = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & + !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & + !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a) + + allocate(f_tmp(ao_num,ao_num)) + f_tmp = 0.d0 + + !$OMP DO + do g = 1, ao_num + do e = 1, ao_num + dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) + dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e) + dm_ge = dm_ge_a + dm_ge_b + do d = 1, ao_num + do k = 1, ao_num + dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k) + dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k) + dm_dk = dm_dk_a + dm_dk_b + do mu = 1, ao_num + do nu = 1, ao_num + call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek) + call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu) + call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue) + call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke) + call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk) + call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu) + f_tmp(mu,nu) -= 0.5d0 * ( dm_ge * dm_dk * i_mugd_nuek & + + dm_ge_a * dm_dk_a * i_mugd_eknu & + + dm_ge_a * dm_dk_a * i_mugd_knue & + - dm_ge_a * dm_dk * i_mugd_enuk & + - dm_ge * dm_dk_a * i_mugd_kenu & + - dm_ge_a * dm_dk_a * i_mugd_nuke & + - dm_ge_b * dm_dk_b * i_mugd_nuke ) + enddo + enddo + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do mu = 1, ao_num + do nu = 1, ao_num + fock_3e_uhf_ao_a(mu,nu) += f_tmp(mu,nu) + enddo + enddo + !$OMP END CRITICAL + + deallocate(f_tmp) + !$OMP END PARALLEL + + !call wall_time(tf) + !print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)] + + BEGIN_DOC + ! + ! Equations (B6) and (B7) + ! + ! g <--> gamma + ! d <--> delta + ! e <--> eta + ! k <--> kappa + ! + END_DOC + + implicit none + integer :: g, d, e, k, mu, nu + double precision :: dm_ge_a, dm_ge_b, dm_ge + double precision :: dm_dk_a, dm_dk_b, dm_dk + double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu + double precision :: ti, tf + double precision, allocatable :: f_tmp(:,:) + + !print *, ' PROVIDING fock_3e_uhf_ao_b ...' + !call wall_time(ti) + + fock_3e_uhf_ao_b = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & + !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & + !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b) + + allocate(f_tmp(ao_num,ao_num)) + f_tmp = 0.d0 + + !$OMP DO + do g = 1, ao_num + do e = 1, ao_num + dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) + dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e) + dm_ge = dm_ge_a + dm_ge_b + do d = 1, ao_num + do k = 1, ao_num + dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k) + dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k) + dm_dk = dm_dk_a + dm_dk_b + do mu = 1, ao_num + do nu = 1, ao_num + call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek) + call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu) + call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue) + call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke) + call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk) + call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu) + f_tmp(mu,nu) -= 0.5d0 * ( dm_ge * dm_dk * i_mugd_nuek & + + dm_ge_b * dm_dk_b * i_mugd_eknu & + + dm_ge_b * dm_dk_b * i_mugd_knue & + - dm_ge_b * dm_dk * i_mugd_enuk & + - dm_ge * dm_dk_b * i_mugd_kenu & + - dm_ge_b * dm_dk_b * i_mugd_nuke & + - dm_ge_a * dm_dk_a * i_mugd_nuke ) + enddo + enddo + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do mu = 1, ao_num + do nu = 1, ao_num + fock_3e_uhf_ao_b(mu,nu) += f_tmp(mu,nu) + enddo + enddo + !$OMP END CRITICAL + + deallocate(f_tmp) + !$OMP END PARALLEL + + !call wall_time(tf) + !print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti + +END_PROVIDER + +! --- + diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index b925c9df..f09da461 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -58,6 +58,7 @@ program test_ints call test_fock_3e_uhf_mo_cs() call test_fock_3e_uhf_mo_a() + call test_fock_3e_uhf_mo_b() end From 90ff4af1a093c3190dc9de033a9a2f56b31935ac Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sat, 2 Sep 2023 17:17:55 +0200 Subject: [PATCH 08/20] fock(3e) DGEMM --- src/tc_scf/fock_tc.irp.f | 1 + src/tc_scf/test_int.irp.f | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f index fcca29ac..282f9873 100644 --- a/src/tc_scf/fock_tc.irp.f +++ b/src/tc_scf/fock_tc.irp.f @@ -227,6 +227,7 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ] call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) + if(three_body_h_tc) then PROVIDE fock_3e_uhf_mo_b Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index f09da461..4aa67d04 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -1120,7 +1120,7 @@ subroutine test_fock_3e_uhf_mo_cs() PROVIDE fock_3e_uhf_mo_cs fock_3e_uhf_mo_cs_old - thr_ih = 1d-10 + thr_ih = 1d-8 norm = 0.d0 diff_tot = 0.d0 @@ -1135,7 +1135,7 @@ subroutine test_fock_3e_uhf_mo_cs() print *, ' problem in fock_3e_uhf_mo_cs on ', j, i print *, ' old value = ', I_old print *, ' new value = ', I_new - stop + !stop endif norm += dabs(I_old) @@ -1159,7 +1159,7 @@ subroutine test_fock_3e_uhf_mo_a() PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_a_old - thr_ih = 1d-10 + thr_ih = 1d-8 norm = 0.d0 diff_tot = 0.d0 @@ -1174,7 +1174,7 @@ subroutine test_fock_3e_uhf_mo_a() print *, ' problem in fock_3e_uhf_mo_a on ', j, i print *, ' old value = ', I_old print *, ' new value = ', I_new - stop + !stop endif norm += dabs(I_old) @@ -1198,7 +1198,7 @@ subroutine test_fock_3e_uhf_mo_b() PROVIDE fock_3e_uhf_mo_b fock_3e_uhf_mo_b_old - thr_ih = 1d-10 + thr_ih = 1d-8 norm = 0.d0 diff_tot = 0.d0 @@ -1213,7 +1213,7 @@ subroutine test_fock_3e_uhf_mo_b() print *, ' problem in fock_3e_uhf_mo_b on ', j, i print *, ' old value = ', I_old print *, ' new value = ', I_new - stop + !stop endif norm += dabs(I_old) From 63f6404f8e3dfcb67a3827ad712713ab2f11735a Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sat, 2 Sep 2023 18:11:05 +0200 Subject: [PATCH 09/20] added OPENMP on 1/r12 --- src/non_h_ints_mu/total_tc_int.irp.f | 37 ++++++++++++++++---- src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 50 ++++++++++++++++++++++----- 2 files changed, 72 insertions(+), 15 deletions(-) diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 158ee2fb..58103643 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -159,25 +159,48 @@ BEGIN_PROVIDER [double precision, ao_two_e_coul, (ao_num, ao_num, ao_num, ao_num ! END_DOC - integer :: i, j, k, l - double precision :: integral - double precision, external :: get_ao_two_e_integral + integer :: i, j, k, l + double precision :: integral + double precision, allocatable :: tmp(:) + double precision, external :: get_ao_two_e_integral PROVIDE ao_integrals_map + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(ao_num, ao_two_e_coul, ao_integrals_map) & + !$OMP PRIVATE(i, j, k, l) + !$OMP DO do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num do k = 1, ao_num - ! < 1:k, 2:l | 1:i, 2:j > - integral = get_ao_two_e_integral(i, j, k, l, ao_integrals_map) - - ao_two_e_coul(k,i,l,j) = integral + ao_two_e_coul(k,i,l,j) = get_ao_two_e_integral(i, j, k, l, ao_integrals_map) enddo enddo enddo enddo + !$OMP END DO + !$OMP END PARALLEL + + +! TODO +! allocate(tmp(ao_num)) +! +! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,l,j,k,tmp) +! do j = 1, ao_num +! do l = 1, ao_num +! do i = 1, ao_num +! call get_ao_two_e_integrals(i, l, l, ao_num, tmp(1)) +! do k = 1, ao_num +! ao_two_e_coul(k,i,l,j) = tmp(k) +! enddo +! enddo +! enddo +! enddo +! !$OMP END PARALLEL DO +! +! deallocate(tmp) END_PROVIDER diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index baa73c8f..63a1e162 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -1,12 +1,11 @@ ! --- - BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] -&BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] BEGIN_DOC ! - ! Fock matrix from three-electron terms + ! Fock matrix alpha from three-electron terms ! ! WARNING :: non hermitian if bi-ortho MOS used ! @@ -17,25 +16,60 @@ PROVIDE mo_l_coef mo_r_coef - !print *, ' Providing fock_3e_uhf_mo_a and fock_3e_uhf_mo_b ...' + !print *, ' Providing fock_3e_uhf_mo_a ...' !call wall_time(ti) ! CLOSED-SHELL PART PROVIDE fock_3e_uhf_mo_cs fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs - fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs if(elec_alpha_num .ne. elec_beta_num) then ! OPEN-SHELL PART - PROVIDE fock_3e_uhf_mo_a_os fock_3e_uhf_mo_b_os + PROVIDE fock_3e_uhf_mo_a_os fock_3e_uhf_mo_a += fock_3e_uhf_mo_a_os - fock_3e_uhf_mo_b += fock_3e_uhf_mo_b_os endif !call wall_time(tf) - !print *, ' Wall time for fock_3e_uhf_mo_a and fock_3e_uhf_mo_b =', tf - ti + !print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! Fock matrix beta from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + double precision :: ti, tf + + PROVIDE mo_l_coef mo_r_coef + + !print *, ' Providing and fock_3e_uhf_mo_b ...' + !call wall_time(ti) + + ! CLOSED-SHELL PART + PROVIDE fock_3e_uhf_mo_cs + fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs + + if(elec_alpha_num .ne. elec_beta_num) then + + ! OPEN-SHELL PART + PROVIDE fock_3e_uhf_mo_b_os + + fock_3e_uhf_mo_b += fock_3e_uhf_mo_b_os + endif + + !call wall_time(tf) + !print *, ' Wall time for fock_3e_uhf_mo_b =', tf - ti END_PROVIDER From 38f8b96d414c89664ce89c3d89f2191094f0eedb Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Mon, 4 Sep 2023 22:27:18 +0200 Subject: [PATCH 10/20] numerical integrals: 1 shot --> blocks over r2 --- src/non_h_ints_mu/grad_squared.irp.f | 18 - src/non_h_ints_mu/grad_squared_manu.irp.f | 66 --- src/non_h_ints_mu/jast_deriv.irp.f | 533 ++++++++++++++++++++-- src/non_h_ints_mu/new_grad_tc.irp.f | 16 - src/non_h_ints_mu/tc_integ.irp.f | 314 ++++++++++--- src/non_h_ints_mu/test_non_h_ints.irp.f | 100 +++- src/non_h_ints_mu/total_tc_int.irp.f | 28 +- src/tc_scf/tc_scf.irp.f | 10 +- 8 files changed, 845 insertions(+), 240 deletions(-) diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index 44a6ae65..8c6d35dc 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -425,7 +425,6 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao ! an additional term is added here directly instead of ! being added in int2_grad1_u12_square_ao for performance - ! note that the factor PROVIDE int2_u2_j1b2 @@ -465,25 +464,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao ! --- deallocate(b_mat) - call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num) - !!$OMP PARALLEL & - !!$OMP DEFAULT (NONE) & - !!$OMP PRIVATE (i, j, k, l) & - !!$OMP SHARED (ac_mat, tc_grad_square_ao, ao_num) - !!$OMP DO SCHEDULE (static) - ! do j = 1, ao_num - ! do l = 1, ao_num - ! do i = 1, ao_num - ! do k = 1, ao_num - ! tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) - ! enddo - ! enddo - ! enddo - ! enddo - !!$OMP END DO - !!$OMP END PARALLEL endif if(write_tc_integ.and.mpi_master) then diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f index 66f3c693..dcfeff47 100644 --- a/src/non_h_ints_mu/grad_squared_manu.irp.f +++ b/src/non_h_ints_mu/grad_squared_manu.irp.f @@ -67,72 +67,6 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu deallocate(tmp, b_mat) call sum_A_At(tc_grad_square_ao_test(1,1,1,1), ao_num*ao_num) - !do i = 1, ao_num - ! do j = 1, ao_num - ! do k = i, ao_num - - ! do l = max(j,k), ao_num - ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) - ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) - ! end do - - ! !if (j.eq.k) then - ! ! do l = j+1, ao_num - ! ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) - ! ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) - ! ! end do - ! !else - ! ! do l = j, ao_num - ! ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) - ! ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) - ! ! enddo - ! !endif - - ! enddo - ! enddo - !enddo - !tc_grad_square_ao_test = 2.d0 * tc_grad_square_ao_test - ! !$OMP PARALLEL & - ! !$OMP DEFAULT (NONE) & - ! !$OMP PRIVATE (i, j, k, l) & - ! !$OMP SHARED (tc_grad_square_ao_test, ao_num) - ! !$OMP DO SCHEDULE (static) - ! integer :: ii - ! ii = 0 - ! do j = 1, ao_num - ! do l = 1, ao_num - ! do i = 1, ao_num - ! do k = 1, ao_num - ! if((i.lt.j) .and. (k.lt.l)) cycle - ! ii = ii + 1 - ! tc_grad_square_ao_test(k,i,l,j) = tc_grad_square_ao_test(k,i,l,j) + tc_grad_square_ao_test(l,j,k,i) - ! enddo - ! enddo - ! enddo - ! enddo - ! print *, ' ii =', ii - ! !$OMP END DO - ! !$OMP END PARALLEL - - ! !$OMP PARALLEL & - ! !$OMP DEFAULT (NONE) & - ! !$OMP PRIVATE (i, j, k, l) & - ! !$OMP SHARED (tc_grad_square_ao_test, ao_num) - ! !$OMP DO SCHEDULE (static) - ! do j = 1, ao_num - ! do l = 1, ao_num - ! do i = 1, j-1 - ! do k = 1, l-1 - ! ii = ii + 1 - ! tc_grad_square_ao_test(k,i,l,j) = tc_grad_square_ao_test(l,j,k,i) - ! enddo - ! enddo - ! enddo - ! enddo - ! print *, ' ii =', ii - ! print *, ao_num * ao_num * ao_num * ao_num - ! !$OMP END DO - ! !$OMP END PARALLEL endif diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index 859f2aa5..082412f9 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -32,7 +32,8 @@ grad1_u12_num = 0.d0 grad1_u12_squared_num = 0.d0 - if(j1b_type .eq. 100) then + if( (j1b_type .eq. 100) .or. & + (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -111,42 +112,6 @@ !$OMP END DO !$OMP END PARALLEL - elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, jpoint, r1, r2, grad1_u2b, dx, dy, dz) & - !$OMP SHARED (n_points_final_grid, n_points_extra_final_grid, final_grid_points, & - !$OMP final_grid_points_extra, grad1_u12_num, grad1_u12_squared_num) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid ! r1 - - r1(1) = final_grid_points(1,ipoint) - r1(2) = final_grid_points(2,ipoint) - r1(3) = final_grid_points(3,ipoint) - - do jpoint = 1, n_points_extra_final_grid ! r2 - - r2(1) = final_grid_points_extra(1,jpoint) - r2(2) = final_grid_points_extra(2,jpoint) - r2(3) = final_grid_points_extra(3,jpoint) - - call grad1_j12_mu(r1, r2, grad1_u2b) - - dx = grad1_u2b(1) - dy = grad1_u2b(2) - dz = grad1_u2b(3) - - grad1_u12_num(jpoint,ipoint,1) = dx - grad1_u12_num(jpoint,ipoint,2) = dy - grad1_u12_num(jpoint,ipoint,3) = dz - - grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - else print *, ' j1b_type = ', j1b_type, 'not implemented yet' @@ -158,6 +123,239 @@ END_PROVIDER ! --- +subroutine get_grad1_u12_r1_seq(r1, n_grid2, resx, resy, resz) + + BEGIN_DOC + ! + ! grad_1 u(r1,r2) + ! + ! this will be integrated numerically over r2: + ! we use grid for r1 and extra_grid for r2 + ! + ! for 99 < j1b_type < 199 + ! + ! u(r1,r2) = j12_mu(r12) x v(r1) x v(r2) + ! grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2) + ! + END_DOC + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: resx(n_grid2), resy(n_grid2), resz(n_grid2) + + double precision :: v1b_r1 + double precision :: grad1_v1b(3) + double precision, allocatable :: v1b_r2(:) + double precision, allocatable :: u2b_r12(:) + double precision, allocatable :: gradx1_u2b(:), grady1_u2b(:), gradz1_u2b(:) + double precision, external :: j1b_nucl + + PROVIDE j1b_type + PROVIDE final_grid_points_extra + + if( (j1b_type .eq. 100) .or. & + (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then + + call grad1_j12_mu_r1_seq(r1, n_grid2, resx, resy, resz) + + elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then + + allocate(v1b_r2(n_grid2)) + allocate(u2b_r12(n_grid2)) + allocate(gradx1_u2b(n_grid2)) + allocate(grady1_u2b(n_grid2)) + allocate(gradz1_u2b(n_grid2)) + + v1b_r1 = j1b_nucl(r1) + call grad1_j1b_nucl(r1, grad1_v1b) + + call j1b_nucl_r1_seq(n_grid2, v1b_r2) + call j12_mu_r1_seq(r1, n_grid2, u2b_r12) + call grad1_j12_mu_r1_seq(r1, n_grid2, gradx1_u2b, grady1_u2b, gradz1_u2b) + + resx(:) = (gradx1_u2b(:) * v1b_r1 + u2b_r12(:) * grad1_v1b(1)) * v1b_r2(:) + resy(:) = (grady1_u2b(:) * v1b_r1 + u2b_r12(:) * grad1_v1b(2)) * v1b_r2(:) + resz(:) = (gradz1_u2b(:) * v1b_r1 + u2b_r12(:) * grad1_v1b(3)) * v1b_r2(:) + + deallocate(v1b_r2, u2b_r12, gradx1_u2b, grady1_u2b, gradz1_u2b) + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine get_grad1_u12_r1_seq + +! --- + +subroutine get_grad1_u12_squared_r1_seq(r1, n_grid2, res) + + BEGIN_DOC + ! + ! grad_1 u(r1,r2) + ! + ! this will be integrated numerically over r2: + ! we use grid for r1 and extra_grid for r2 + ! + ! for 99 < j1b_type < 199 + ! + ! u(r1,r2) = j12_mu(r12) x v(r1) x v(r2) + ! grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2) + ! + END_DOC + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: res(n_grid2) + + + integer :: jpoint + double precision :: r2(3) + double precision :: v1b_r1, v1b_r2, u2b_r12 + double precision :: grad1_v1b(3), grad1_u2b(3) + double precision :: dx, dy, dz + double precision, external :: j12_mu, j1b_nucl + + PROVIDE j1b_type + PROVIDE final_grid_points_extra + + if( (j1b_type .eq. 100) .or. & + (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + call grad1_j12_mu(r1, r2, grad1_u2b) + + dx = grad1_u2b(1) + dy = grad1_u2b(2) + dz = grad1_u2b(3) + + res(jpoint) = dx*dx + dy*dy + dz*dz + enddo + + elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then + + v1b_r1 = j1b_nucl(r1) + call grad1_j1b_nucl(r1, grad1_v1b) + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + v1b_r2 = j1b_nucl(r2) + u2b_r12 = j12_mu(r1, r2) + call grad1_j12_mu(r1, r2, grad1_u2b) + + dx = (grad1_u2b(1) * v1b_r1 + u2b_r12 * grad1_v1b(1)) * v1b_r2 + dy = (grad1_u2b(2) * v1b_r1 + u2b_r12 * grad1_v1b(2)) * v1b_r2 + dz = (grad1_u2b(3) * v1b_r1 + u2b_r12 * grad1_v1b(3)) * v1b_r2 + + res(jpoint) = dx*dx + dy*dy + dz*dz + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine get_grad1_u12_squared_r1_seq + +! --- + +subroutine get_grad1_u12_withsq_r1_seq(r1, n_grid2, resx, resy, resz, res) + + BEGIN_DOC + ! + ! grad_1 u(r1,r2) + ! + ! this will be integrated numerically over r2: + ! we use grid for r1 and extra_grid for r2 + ! + ! for 99 < j1b_type < 199 + ! + ! u(r1,r2) = j12_mu(r12) x v(r1) x v(r2) + ! grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2) + ! + END_DOC + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: resx(n_grid2), resy(n_grid2), resz(n_grid2), res(n_grid2) + + integer :: jpoint + double precision :: v1b_r1 + double precision :: grad1_v1b(3) + double precision, allocatable :: v1b_r2(:) + double precision, allocatable :: u2b_r12(:) + double precision, allocatable :: gradx1_u2b(:), grady1_u2b(:), gradz1_u2b(:) + double precision, external :: j1b_nucl + + PROVIDE j1b_type + PROVIDE final_grid_points_extra + + if( (j1b_type .eq. 100) .or. & + (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then + + call grad1_j12_mu_r1_seq(r1, n_grid2, resx, resy, resz) + do jpoint = 1, n_points_extra_final_grid + res(jpoint) = resx(jpoint) * resx(jpoint) & + + resy(jpoint) * resy(jpoint) & + + resz(jpoint) * resz(jpoint) + enddo + + elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then + + allocate(v1b_r2(n_grid2)) + allocate(u2b_r12(n_grid2)) + allocate(gradx1_u2b(n_grid2)) + allocate(grady1_u2b(n_grid2)) + allocate(gradz1_u2b(n_grid2)) + + v1b_r1 = j1b_nucl(r1) + call grad1_j1b_nucl(r1, grad1_v1b) + + call j1b_nucl_r1_seq(n_grid2, v1b_r2) + call j12_mu_r1_seq(r1, n_grid2, u2b_r12) + call grad1_j12_mu_r1_seq(r1, n_grid2, gradx1_u2b, grady1_u2b, gradz1_u2b) + + do jpoint = 1, n_points_extra_final_grid + resx(jpoint) = (gradx1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(1)) * v1b_r2(jpoint) + resy(jpoint) = (grady1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(2)) * v1b_r2(jpoint) + resz(jpoint) = (gradz1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(3)) * v1b_r2(jpoint) + res (jpoint) = resx(jpoint) * resx(jpoint) & + + resy(jpoint) * resy(jpoint) & + + resz(jpoint) * resz(jpoint) + enddo + + deallocate(v1b_r2, u2b_r12, gradx1_u2b, grady1_u2b, gradz1_u2b) + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine get_grad1_u12_withsq_r1_seq + +! --- + double precision function j12_mu(r1, r2) include 'constants.include.F' @@ -190,18 +388,20 @@ end function j12_mu subroutine grad1_j12_mu(r1, r2, grad) BEGIN_DOC -! gradient of j(mu(r1,r2),r12) form of jastrow. -! -! if mu(r1,r2) = cst ---> j1b_type < 200 and -! -! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) -! -! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and -! -! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) -! -! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) + ! + ! gradient of j(mu(r1,r2),r12) form of jastrow. + ! + ! if mu(r1,r2) = cst ---> j1b_type < 200 and + ! + ! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) + ! + ! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and + ! + ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) + ! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) + ! END_DOC + include 'constants.include.F' implicit none @@ -851,3 +1051,240 @@ subroutine f_mu_and_deriv_mu_simple(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) end +! --- + +subroutine grad1_j12_mu_r1_seq(r1, n_grid2, gradx, grady, gradz) + + BEGIN_DOC + ! + ! gradient of j(mu(r1,r2),r12) form of jastrow. + ! + ! if mu(r1,r2) = cst ---> j1b_type < 200 and + ! + ! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) + ! + ! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and + ! + ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) + ! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) + ! + END_DOC + + include 'constants.include.F' + + implicit none + integer , intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: gradx(n_grid2) + double precision, intent(out) :: grady(n_grid2) + double precision, intent(out) :: gradz(n_grid2) + + integer :: jpoint + double precision :: r2(3) + double precision :: dx, dy, dz, r12, tmp + + if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + if(r12 .lt. 1d-10) return + + tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 + + gradx(jpoint) = tmp * dx + grady(jpoint) = tmp * dy + gradz(jpoint) = tmp * dz + enddo + + elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then + + double precision :: mu_val, mu_tmp, mu_der(3) + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + + call mu_r_val_and_grad(r1, r2, mu_val, mu_der) + mu_tmp = mu_val * r12 + tmp = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val) + gradx(jpoint) = tmp * mu_der(1) + grady(jpoint) = tmp * mu_der(2) + gradz(jpoint) = tmp * mu_der(3) + + if(r12 .lt. 1d-10) return + tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) / r12 + + gradx(jpoint) = gradx(jpoint) + tmp * dx + grady(jpoint) = grady(jpoint) + tmp * dy + gradz(jpoint) = gradz(jpoint) + tmp * dz + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine grad1_j12_mu_r1_seq + +! --- + +subroutine j12_mu_r1_seq(r1, n_grid2, res) + + include 'constants.include.F' + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: res(n_grid2) + + integer :: jpoint + double precision :: r2(3) + double precision :: mu_tmp, r12 + + PROVIDE final_grid_points_extra + + if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & + + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) + mu_tmp = mu_erf * r12 + + res(jpoint) = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for j12_mu_r1_seq' + stop + + endif + + return +end subroutine j12_mu_r1_seq + +! --- + +subroutine j1b_nucl_r1_seq(n_grid2, res) + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(out) :: res(n_grid2) + + double precision :: r(3) + integer :: i, jpoint + double precision :: a, d, e, x, y, z + + if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + + res(jpoint) -= dexp(-a*dsqrt(d)) + enddo + enddo + + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + e = 1.d0 - dexp(-a*d) + + res(jpoint) *= e + enddo + enddo + + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + res(jpoint) -= j1b_pen_coef(i) * dexp(-a*d) + enddo + enddo + + elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + res(jpoint) -= dexp(-a*d*d) + enddo + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl_r1_seq' + stop + + endif + + return +end subroutine j1b_nucl_r1_seq + +! --- + diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index dc76431d..ab3cc3be 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -149,22 +149,6 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, deallocate(b_mat) call sum_A_At(tc_grad_and_lapl_ao(1,1,1,1), ao_num*ao_num) - ! !$OMP PARALLEL & - ! !$OMP DEFAULT (NONE) & - ! !$OMP PRIVATE (i, j, k, l) & - ! !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao, ao_num) - ! !$OMP DO SCHEDULE (static) - ! do j = 1, ao_num - ! do l = 1, ao_num - ! do i = 1, ao_num - ! do k = 1, ao_num - ! tc_grad_and_lapl_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) - ! enddo - ! enddo - ! enddo - ! enddo - ! !$OMP END DO - ! !$OMP END PARALLEL endif diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index d569b25c..dd21a67f 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -1,10 +1,11 @@ -! --- - BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)] BEGIN_DOC ! + ! TODO + ! combine with int2_grad1_u12_square_ao to avoid repeated calculation ? + ! ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) ! ! where r1 = r(ipoint) @@ -106,7 +107,6 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f elseif(j1b_type .ge. 100) then PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - PROVIDE grad1_u12_num double precision, allocatable :: tmp(:,:,:) allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) @@ -126,39 +126,71 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f !$OMP END DO !$OMP END PARALLEL - int2_grad1_u12_ao = 0.d0 - do m = 1, 3 - !call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, +1.d0 & - ! this work also because of the symmetry in K(1,2) and sign compensation in L(1,2,3) - call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -1.d0 & - , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num) - enddo + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision, allocatable :: tmp_grad1_u12(:,:,:) + + ! n_points_final_grid = n_blocks * n_pass + n_rest + n_blocks = 8 + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + + if(n_pass .le. 1) then + print*, ' blocks are to large or grid is very small !' + stop + endif + + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,3)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_pass, i_blocks, ipoint, ii, m, tmp_grad1_u12) & + !$OMP SHARED (n_pass, n_blocks, n_points_extra_final_grid, ao_num, & + !$OMP final_grid_points, tmp, int2_grad1_u12_ao) + !$OMP DO + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1) & + , tmp_grad1_u12(1,i_blocks,2) & + , tmp_grad1_u12(1,i_blocks,3) ) + enddo - !! these dgemm are equivalent to - !!$OMP PARALLEL & - !!$OMP DEFAULT (NONE) & - !!$OMP PRIVATE (j, i, ipoint, jpoint, w) & - !!$OMP SHARED (int2_grad1_u12_ao, ao_num, n_points_final_grid, & - !!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, & - !!$OMP aos_in_r_array_extra_transp, grad1_u12_num, tmp) - !!$OMP DO SCHEDULE (static) - !do ipoint = 1, n_points_final_grid - ! do j = 1, ao_num - ! do i = 1, ao_num - ! do jpoint = 1, n_points_extra_final_grid - ! w = -tmp(jpoint,i,j) - ! !w = tmp(jpoint,i,j) this work also because of the symmetry in K(1,2) - ! ! and sign compensation in L(1,2,3) - ! int2_grad1_u12_ao(i,j,ipoint,1) += w * grad1_u12_num(jpoint,ipoint,1) - ! int2_grad1_u12_ao(i,j,ipoint,2) += w * grad1_u12_num(jpoint,ipoint,2) - ! int2_grad1_u12_ao(i,j,ipoint,3) += w * grad1_u12_num(jpoint,ipoint,3) - ! enddo - ! enddo - ! enddo - !enddo - !!$OMP END DO - !!$OMP END PARALLEL + do m = 1, 3 + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_grad1_u12) + + ! TODO + ! OPENMP + if(n_rest .ne. 0) then + + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,3)) + + ii = n_pass*n_blocks + 1 + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1) & + , tmp_grad1_u12(1,i_rest,2) & + , tmp_grad1_u12(1,i_rest,3) ) + enddo + + do m = 1, 3 + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + + deallocate(tmp_grad1_u12) + endif deallocate(tmp) else @@ -185,6 +217,72 @@ END_PROVIDER ! --- +BEGIN_PROVIDER [double precision, int2_grad1_u12_ao_num_1shot, (ao_num, ao_num, n_points_final_grid, 3)] + + BEGIN_DOC + ! + ! int2_grad1_u12_ao_num_1shot(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) + ! + END_DOC + + implicit none + integer :: ipoint, i, j, m, jpoint + double precision :: time0, time1 + double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 + + print*, ' providing int2_grad1_u12_ao_num_1shot ...' + call wall_time(time0) + + PROVIDE j1b_type + + if(j1b_type .ge. 100) then + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + PROVIDE grad1_u12_num + + double precision, allocatable :: tmp(:,:,:) + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + tmp = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + do m = 1, 3 + !call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -1.d0 & + ! this work also because of the symmetry in K(1,2) and sign compensation in L(1,2,3) + call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, +1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao_num_1shot(1,1,1,m), ao_num*ao_num) + enddo + + deallocate(tmp) + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao_num_1shot =', time1-time0 + call print_memory_usage() + +END_PROVIDER + +! --- + BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_points_final_grid)] BEGIN_DOC @@ -275,16 +373,14 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p elseif(j1b_type .ge. 100) then PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - PROVIDE grad1_u12_squared_num double precision, allocatable :: tmp(:,:,:) allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) - tmp = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (j, i, jpoint) & !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) - !$OMP DO SCHEDULE (static) + !$OMP DO COLLAPSE(2) do j = 1, ao_num do i = 1, ao_num do jpoint = 1, n_points_extra_final_grid @@ -295,31 +391,63 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p !$OMP END DO !$OMP END PARALLEL - int2_grad1_u12_square_ao = 0.d0 - call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -0.5d0 & - , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num) + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision, allocatable :: tmp_grad1_u12_squared(:,:) - !! this dgemm is equivalen to - !!$OMP PARALLEL & - !!$OMP DEFAULT (NONE) & - !!$OMP PRIVATE (i, j, ipoint, jpoint, w) & - !!$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, & - !!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, & - !!$OMP aos_in_r_array_extra_transp, grad1_u12_squared_num, tmp) - !!$OMP DO SCHEDULE (static) - !do ipoint = 1, n_points_final_grid - ! do j = 1, ao_num - ! do i = 1, ao_num - ! do jpoint = 1, n_points_extra_final_grid - ! w = -0.5d0 * tmp(jpoint,i,j) - ! int2_grad1_u12_square_ao(i,j,ipoint) += w * grad1_u12_squared_num(jpoint,ipoint) - ! enddo - ! enddo - ! enddo - !enddo - !!$OMP END DO - !!$OMP END PARALLEL + ! n_points_final_grid = n_blocks * n_pass + n_rest + n_blocks = 16 + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + + if(n_pass .le. 1) then + print*, ' blocks are to large or grid is very small !' + stop + endif + + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_blocks)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_pass, i_blocks, ipoint, ii, tmp_grad1_u12_squared) & + !$OMP SHARED (n_pass, n_blocks, n_points_extra_final_grid, ao_num, & + !$OMP final_grid_points, tmp, int2_grad1_u12_square_ao) + !$OMP DO + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_squared_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12_squared(1,i_blocks)) + enddo + + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num) + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_grad1_u12_squared) + + ! TODO + ! OPENMP + if(n_rest .ne. 0) then + + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_rest)) + + ii = n_pass*n_blocks + 1 + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_squared_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12_squared(1,i_rest)) + enddo + + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num) + + deallocate(tmp_grad1_u12_squared) + endif deallocate(tmp) @@ -338,3 +466,65 @@ END_PROVIDER ! --- +BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao_num_1shot, (ao_num, ao_num, n_points_final_grid)] + + BEGIN_DOC + ! + ! int2_grad1_u12_square_ao_num_1shot = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2 + ! + END_DOC + + implicit none + integer :: i, j, jpoint + double precision :: time0, time1 + + print*, ' providing int2_grad1_u12_square_ao_num_1shot ...' + call wall_time(time0) + + PROVIDE j1b_type + + if(j1b_type .ge. 100) then + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + PROVIDE grad1_u12_squared_num + + double precision, allocatable :: tmp(:,:,:) + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO COLLAPSE(2) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao_num_1shot(1,1,1), ao_num*ao_num) + + FREE grad1_u12_squared_num + deallocate(tmp) + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_square_ao_num_1shot =', time1-time0 + call print_memory_usage() + +END_PROVIDER + +! --- + diff --git a/src/non_h_ints_mu/test_non_h_ints.irp.f b/src/non_h_ints_mu/test_non_h_ints.irp.f index e7718a40..7d84e73b 100644 --- a/src/non_h_ints_mu/test_non_h_ints.irp.f +++ b/src/non_h_ints_mu/test_non_h_ints.irp.f @@ -11,12 +11,24 @@ program test_non_h my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + if(j1b_type .ge. 100) then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + endif + + !call routine_grad_squared() !call routine_fit() !call test_ipp() - call test_v_ij_u_cst_mu_j1b_an() + !call test_v_ij_u_cst_mu_j1b_an() + + call test_int2_grad1_u12_square_ao() + call test_int2_grad1_u12_ao() end ! --- @@ -583,7 +595,91 @@ subroutine test_v_ij_u_cst_mu_j1b_an() print*, ' accuracy(%) = ', 100.d0 * accu / norm return -end subroutine test_v_ij_u_cst_mu_j1b_an() +end subroutine test_v_ij_u_cst_mu_j1b_an +! --- +subroutine test_int2_grad1_u12_square_ao() + + implicit none + integer :: i, j, ipoint + double precision :: I_old, I_new + double precision :: norm, accu, thr, diff + + PROVIDE int2_grad1_u12_square_ao + PROVIDE int2_grad1_u12_square_ao_num_1shot + + thr = 1d-8 + norm = 0.d0 + accu = 0.d0 + do ipoint = 1, n_points_final_grid + do i = 1, ao_num + do j = 1, ao_num + + I_old = int2_grad1_u12_square_ao_num_1shot(j,i,ipoint) + I_new = int2_grad1_u12_square_ao (j,i,ipoint) + + diff = dabs(I_new-I_old) + if(diff .gt. thr) then + print *, ' problem on:', j, i, ipoint + print *, ' old value :', I_old + print *, ' new value :', I_new + stop + endif + + accu += diff + norm += dabs(I_old) + enddo + enddo + enddo + + print*, ' accuracy(%) = ', 100.d0 * accu / norm + + return +end subroutine test_int2_grad1_u12_square_ao + +! --- + +subroutine test_int2_grad1_u12_ao() + + implicit none + integer :: i, j, ipoint, m + double precision :: I_old, I_new + double precision :: norm, accu, thr, diff + + PROVIDE int2_grad1_u12_ao + PROVIDE int2_grad1_u12_ao_num_1shot + + thr = 1d-8 + norm = 0.d0 + accu = 0.d0 + do ipoint = 1, n_points_final_grid + do i = 1, ao_num + do j = 1, ao_num + + do m = 1, 3 + I_old = int2_grad1_u12_ao_num_1shot(j,i,ipoint,m) + I_new = int2_grad1_u12_ao (j,i,ipoint,m) + + diff = dabs(I_new-I_old) + if(diff .gt. thr) then + print *, ' problem on:', j, i, ipoint, m + print *, ' old value :', I_old + print *, ' new value :', I_new + stop + endif + + accu += diff + norm += dabs(I_old) + enddo + enddo + enddo + enddo + + print*, ' accuracy(%) = ', 100.d0 * accu / norm + + return +end subroutine test_int2_grad1_u12_ao + +! --- diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 58103643..254defe1 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -1,7 +1,4 @@ -! TODO -! remove ao_two_e_coul and use map directly - ! --- BEGIN_PROVIDER [double precision, ao_vartc_int_chemist, (ao_num, ao_num, ao_num, ao_num)] @@ -159,10 +156,8 @@ BEGIN_PROVIDER [double precision, ao_two_e_coul, (ao_num, ao_num, ao_num, ao_num ! END_DOC - integer :: i, j, k, l - double precision :: integral - double precision, allocatable :: tmp(:) - double precision, external :: get_ao_two_e_integral + integer :: i, j, k, l + double precision, external :: get_ao_two_e_integral PROVIDE ao_integrals_map @@ -183,25 +178,6 @@ BEGIN_PROVIDER [double precision, ao_two_e_coul, (ao_num, ao_num, ao_num, ao_num !$OMP END DO !$OMP END PARALLEL - -! TODO -! allocate(tmp(ao_num)) -! -! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,l,j,k,tmp) -! do j = 1, ao_num -! do l = 1, ao_num -! do i = 1, ao_num -! call get_ao_two_e_integrals(i, l, l, ao_num, tmp(1)) -! do k = 1, ao_num -! ao_two_e_coul(k,i,l,j) = tmp(k) -! enddo -! enddo -! enddo -! enddo -! !$OMP END PARALLEL DO -! -! deallocate(tmp) - END_PROVIDER ! --- diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index e4c38741..9bcbb13b 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -13,11 +13,9 @@ program tc_scf print *, ' starting ...' my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r my_n_pt_r_grid = tc_grid1_r my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid PROVIDE mu_erf @@ -26,6 +24,14 @@ program tc_scf print *, ' j1b_type = ', j1b_type print *, j1b_pen + if(j1b_type .ge. 100) then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + endif + !call create_guess() !call orthonormalize_mos() From 1f56b5d0f4eb8f6f68ef83ef6744a26807514807 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Tue, 5 Sep 2023 11:52:08 +0200 Subject: [PATCH 11/20] num integ grad & grad squared --- src/becke_numerical_grid/EZFIO.cfg | 4 + src/becke_numerical_grid/extra_grid.irp.f | 4 +- .../extra_grid_vector.irp.f | 34 +- src/becke_numerical_grid/grid_becke.irp.f | 2 +- src/non_h_ints_mu/debug_fit.irp.f | 77 +- src/non_h_ints_mu/jast_deriv.irp.f | 1165 ----------------- src/non_h_ints_mu/jast_deriv_utils.irp.f | 700 ++++++++++ src/non_h_ints_mu/jast_deriv_utils_vect.irp.f | 332 +++++ src/non_h_ints_mu/tc_integ.irp.f | 530 -------- src/non_h_ints_mu/tc_integ_an.irp.f | 244 ++++ src/non_h_ints_mu/tc_integ_num.irp.f | 190 +++ src/non_h_ints_mu/test_non_h_ints.irp.f | 6 +- src/tc_keywords/EZFIO.cfg | 11 + 13 files changed, 1584 insertions(+), 1715 deletions(-) create mode 100644 src/non_h_ints_mu/jast_deriv_utils.irp.f create mode 100644 src/non_h_ints_mu/jast_deriv_utils_vect.irp.f delete mode 100644 src/non_h_ints_mu/tc_integ.irp.f create mode 100644 src/non_h_ints_mu/tc_integ_an.irp.f create mode 100644 src/non_h_ints_mu/tc_integ_num.irp.f diff --git a/src/becke_numerical_grid/EZFIO.cfg b/src/becke_numerical_grid/EZFIO.cfg index 7861f074..e660fd6d 100644 --- a/src/becke_numerical_grid/EZFIO.cfg +++ b/src/becke_numerical_grid/EZFIO.cfg @@ -33,6 +33,10 @@ doc: Number of angular grid points given from input. Warning, this number cannot interface: ezfio,provider,ocaml default: 1202 +[n_points_extra_final_grid] +type: integer +doc: Total number of extra_grid points +interface: ezfio [extra_grid_type_sgn] type: integer diff --git a/src/becke_numerical_grid/extra_grid.irp.f b/src/becke_numerical_grid/extra_grid.irp.f index 9bd24f22..7df4dd6d 100644 --- a/src/becke_numerical_grid/extra_grid.irp.f +++ b/src/becke_numerical_grid/extra_grid.irp.f @@ -14,7 +14,7 @@ implicit none - if(.not.my_extra_grid_becke)then + if(.not. my_extra_grid_becke) then select case (extra_grid_type_sgn) case(0) n_points_extra_radial_grid = 23 @@ -33,7 +33,7 @@ stop end select else - n_points_extra_radial_grid = my_n_pt_r_extra_grid + n_points_extra_radial_grid = my_n_pt_r_extra_grid n_points_extra_integration_angular = my_n_pt_a_extra_grid endif diff --git a/src/becke_numerical_grid/extra_grid_vector.irp.f b/src/becke_numerical_grid/extra_grid_vector.irp.f index e4fc03b5..ae167282 100644 --- a/src/becke_numerical_grid/extra_grid_vector.irp.f +++ b/src/becke_numerical_grid/extra_grid_vector.irp.f @@ -23,29 +23,33 @@ BEGIN_PROVIDER [integer, n_points_extra_final_grid] enddo enddo - print*,'n_points_extra_final_grid = ',n_points_extra_final_grid - print*,'n max point = ',n_points_extra_integration_angular*(n_points_extra_radial_grid*nucl_num - 1) -! call ezfio_set_becke_numerical_grid_n_points_extra_final_grid(n_points_extra_final_grid) + print*, ' n_points_extra_final_grid = ', n_points_extra_final_grid + print*, ' n max point = ', n_points_extra_integration_angular*(n_points_extra_radial_grid*nucl_num - 1) + call ezfio_set_becke_numerical_grid_n_points_extra_final_grid(n_points_extra_final_grid) + END_PROVIDER ! --- BEGIN_PROVIDER [double precision, final_grid_points_extra, (3,n_points_extra_final_grid)] -&BEGIN_PROVIDER [double precision, final_weight_at_r_vector_extra, (n_points_extra_final_grid) ] -&BEGIN_PROVIDER [integer, index_final_points_extra, (3,n_points_extra_final_grid) ] -&BEGIN_PROVIDER [integer, index_final_points_extra_reverse, (n_points_extra_integration_angular,n_points_extra_radial_grid,nucl_num) ] - implicit none +&BEGIN_PROVIDER [double precision, final_weight_at_r_vector_extra, (n_points_extra_final_grid)] +&BEGIN_PROVIDER [integer, index_final_points_extra, (3,n_points_extra_final_grid)] +&BEGIN_PROVIDER [integer, index_final_points_extra_reverse, (n_points_extra_integration_angular,n_points_extra_radial_grid,nucl_num)] + BEGIN_DOC -! final_grid_points_extra(1:3,j) = (/ x, y, z /) of the jth grid point -! -! final_weight_at_r_vector_extra(i) = Total weight function of the ith grid point which contains the Lebedev, Voronoi and radial weights contributions -! -! index_final_points_extra(1:3,i) = gives the angular, radial and atomic indices associated to the ith grid point -! -! index_final_points_extra_reverse(i,j,k) = index of the grid point having i as angular, j as radial and l as atomic indices + ! final_grid_points_extra(1:3,j) = (/ x, y, z /) of the jth grid point + ! + ! final_weight_at_r_vector_extra(i) = Total weight function of the ith grid point which contains the Lebedev, Voronoi and radial weights contributions + ! + ! index_final_points_extra(1:3,i) = gives the angular, radial and atomic indices associated to the ith grid point + ! + ! index_final_points_extra_reverse(i,j,k) = index of the grid point having i as angular, j as radial and l as atomic indices END_DOC + + implicit none integer :: i,j,k,l,i_count double precision :: r(3) + i_count = 0 do j = 1, nucl_num do i = 1, n_points_extra_radial_grid -1 @@ -67,3 +71,5 @@ END_PROVIDER enddo END_PROVIDER + + diff --git a/src/becke_numerical_grid/grid_becke.irp.f b/src/becke_numerical_grid/grid_becke.irp.f index 21b9f98d..91fdc563 100644 --- a/src/becke_numerical_grid/grid_becke.irp.f +++ b/src/becke_numerical_grid/grid_becke.irp.f @@ -14,7 +14,7 @@ implicit none - if(.not.my_grid_becke)then + if(.not. my_grid_becke) then select case (grid_type_sgn) case(0) n_points_radial_grid = 23 diff --git a/src/non_h_ints_mu/debug_fit.irp.f b/src/non_h_ints_mu/debug_fit.irp.f index 05d2db68..d3152836 100644 --- a/src/non_h_ints_mu/debug_fit.irp.f +++ b/src/non_h_ints_mu/debug_fit.irp.f @@ -13,17 +13,27 @@ program debug_fit PROVIDE mu_erf j1b_pen + if(j1b_type .ge. 100) then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + endif + !call test_j1b_nucl() !call test_grad_j1b_nucl() !call test_lapl_j1b_nucl() !call test_list_b2() - call test_list_b3() + !call test_list_b3() !call test_fit_u() !call test_fit_u2() !call test_fit_ugradu() + call test_grad1_u12_withsq_num() + end ! --- @@ -643,4 +653,69 @@ end subroutine test_fit_u2 ! --- +subroutine test_grad1_u12_withsq_num() + + implicit none + integer :: ipoint, jpoint, m + double precision :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz + double precision, allocatable :: tmp_grad1_u12_squared(:,:), tmp_grad1_u12(:,:,:) + + print*, ' test_grad1_u12_withsq_num ...' + + PROVIDE grad1_u12_num grad1_u12_squared_num + + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_points_final_grid)) + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_points_final_grid,3)) + + eps_ij = 1d-7 + acc_tot = 0.d0 + normalz = 0.d0 + + do ipoint = 1, n_points_final_grid + + call get_grad1_u12_withsq_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,ipoint,1) & + , tmp_grad1_u12(1,ipoint,2) & + , tmp_grad1_u12(1,ipoint,3) & + , tmp_grad1_u12_squared(1,ipoint)) + do jpoint = 1, n_points_extra_final_grid + + i_exc = grad1_u12_squared_num(jpoint,ipoint) + i_num = tmp_grad1_u12_squared(jpoint,ipoint) + acc_ij = dabs(i_exc - i_num) + if(acc_ij .gt. eps_ij) then + print *, ' problem in grad1_u12_squared_num on', ipoint, jpoint + print *, ' analyt = ', i_exc + print *, ' numeri = ', i_num + print *, ' diff = ', acc_ij + stop + endif + acc_tot += acc_ij + normalz += dabs(i_num) + + do m = 1, 3 + i_exc = grad1_u12_num(jpoint,ipoint,m) + i_num = tmp_grad1_u12(jpoint,ipoint,m) + acc_ij = dabs(i_exc - i_num) + if(acc_ij .gt. eps_ij) then + print *, ' problem in grad1_u12_num on', ipoint, jpoint, m + print *, ' analyt = ', i_exc + print *, ' numeri = ', i_num + print *, ' diff = ', acc_ij + stop + endif + acc_tot += acc_ij + normalz += dabs(i_num) + enddo + enddo + enddo + + !print*, ' acc_tot = ', acc_tot + !print*, ' normalz = ', normalz + print*, ' accuracy (%) = ', 100.d0 * acc_tot / normalz + + return +end subroutine test_grad1_u12_withsq_num + +! --- + diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index 082412f9..ee01886c 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -123,1168 +123,3 @@ END_PROVIDER ! --- -subroutine get_grad1_u12_r1_seq(r1, n_grid2, resx, resy, resz) - - BEGIN_DOC - ! - ! grad_1 u(r1,r2) - ! - ! this will be integrated numerically over r2: - ! we use grid for r1 and extra_grid for r2 - ! - ! for 99 < j1b_type < 199 - ! - ! u(r1,r2) = j12_mu(r12) x v(r1) x v(r2) - ! grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2) - ! - END_DOC - - implicit none - integer, intent(in) :: n_grid2 - double precision, intent(in) :: r1(3) - double precision, intent(out) :: resx(n_grid2), resy(n_grid2), resz(n_grid2) - - double precision :: v1b_r1 - double precision :: grad1_v1b(3) - double precision, allocatable :: v1b_r2(:) - double precision, allocatable :: u2b_r12(:) - double precision, allocatable :: gradx1_u2b(:), grady1_u2b(:), gradz1_u2b(:) - double precision, external :: j1b_nucl - - PROVIDE j1b_type - PROVIDE final_grid_points_extra - - if( (j1b_type .eq. 100) .or. & - (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then - - call grad1_j12_mu_r1_seq(r1, n_grid2, resx, resy, resz) - - elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then - - allocate(v1b_r2(n_grid2)) - allocate(u2b_r12(n_grid2)) - allocate(gradx1_u2b(n_grid2)) - allocate(grady1_u2b(n_grid2)) - allocate(gradz1_u2b(n_grid2)) - - v1b_r1 = j1b_nucl(r1) - call grad1_j1b_nucl(r1, grad1_v1b) - - call j1b_nucl_r1_seq(n_grid2, v1b_r2) - call j12_mu_r1_seq(r1, n_grid2, u2b_r12) - call grad1_j12_mu_r1_seq(r1, n_grid2, gradx1_u2b, grady1_u2b, gradz1_u2b) - - resx(:) = (gradx1_u2b(:) * v1b_r1 + u2b_r12(:) * grad1_v1b(1)) * v1b_r2(:) - resy(:) = (grady1_u2b(:) * v1b_r1 + u2b_r12(:) * grad1_v1b(2)) * v1b_r2(:) - resz(:) = (gradz1_u2b(:) * v1b_r1 + u2b_r12(:) * grad1_v1b(3)) * v1b_r2(:) - - deallocate(v1b_r2, u2b_r12, gradx1_u2b, grady1_u2b, gradz1_u2b) - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - return -end subroutine get_grad1_u12_r1_seq - -! --- - -subroutine get_grad1_u12_squared_r1_seq(r1, n_grid2, res) - - BEGIN_DOC - ! - ! grad_1 u(r1,r2) - ! - ! this will be integrated numerically over r2: - ! we use grid for r1 and extra_grid for r2 - ! - ! for 99 < j1b_type < 199 - ! - ! u(r1,r2) = j12_mu(r12) x v(r1) x v(r2) - ! grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2) - ! - END_DOC - - implicit none - integer, intent(in) :: n_grid2 - double precision, intent(in) :: r1(3) - double precision, intent(out) :: res(n_grid2) - - - integer :: jpoint - double precision :: r2(3) - double precision :: v1b_r1, v1b_r2, u2b_r12 - double precision :: grad1_v1b(3), grad1_u2b(3) - double precision :: dx, dy, dz - double precision, external :: j12_mu, j1b_nucl - - PROVIDE j1b_type - PROVIDE final_grid_points_extra - - if( (j1b_type .eq. 100) .or. & - (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then - - do jpoint = 1, n_points_extra_final_grid ! r2 - - r2(1) = final_grid_points_extra(1,jpoint) - r2(2) = final_grid_points_extra(2,jpoint) - r2(3) = final_grid_points_extra(3,jpoint) - - call grad1_j12_mu(r1, r2, grad1_u2b) - - dx = grad1_u2b(1) - dy = grad1_u2b(2) - dz = grad1_u2b(3) - - res(jpoint) = dx*dx + dy*dy + dz*dz - enddo - - elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then - - v1b_r1 = j1b_nucl(r1) - call grad1_j1b_nucl(r1, grad1_v1b) - - do jpoint = 1, n_points_extra_final_grid ! r2 - - r2(1) = final_grid_points_extra(1,jpoint) - r2(2) = final_grid_points_extra(2,jpoint) - r2(3) = final_grid_points_extra(3,jpoint) - - v1b_r2 = j1b_nucl(r2) - u2b_r12 = j12_mu(r1, r2) - call grad1_j12_mu(r1, r2, grad1_u2b) - - dx = (grad1_u2b(1) * v1b_r1 + u2b_r12 * grad1_v1b(1)) * v1b_r2 - dy = (grad1_u2b(2) * v1b_r1 + u2b_r12 * grad1_v1b(2)) * v1b_r2 - dz = (grad1_u2b(3) * v1b_r1 + u2b_r12 * grad1_v1b(3)) * v1b_r2 - - res(jpoint) = dx*dx + dy*dy + dz*dz - enddo - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - return -end subroutine get_grad1_u12_squared_r1_seq - -! --- - -subroutine get_grad1_u12_withsq_r1_seq(r1, n_grid2, resx, resy, resz, res) - - BEGIN_DOC - ! - ! grad_1 u(r1,r2) - ! - ! this will be integrated numerically over r2: - ! we use grid for r1 and extra_grid for r2 - ! - ! for 99 < j1b_type < 199 - ! - ! u(r1,r2) = j12_mu(r12) x v(r1) x v(r2) - ! grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2) - ! - END_DOC - - implicit none - integer, intent(in) :: n_grid2 - double precision, intent(in) :: r1(3) - double precision, intent(out) :: resx(n_grid2), resy(n_grid2), resz(n_grid2), res(n_grid2) - - integer :: jpoint - double precision :: v1b_r1 - double precision :: grad1_v1b(3) - double precision, allocatable :: v1b_r2(:) - double precision, allocatable :: u2b_r12(:) - double precision, allocatable :: gradx1_u2b(:), grady1_u2b(:), gradz1_u2b(:) - double precision, external :: j1b_nucl - - PROVIDE j1b_type - PROVIDE final_grid_points_extra - - if( (j1b_type .eq. 100) .or. & - (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then - - call grad1_j12_mu_r1_seq(r1, n_grid2, resx, resy, resz) - do jpoint = 1, n_points_extra_final_grid - res(jpoint) = resx(jpoint) * resx(jpoint) & - + resy(jpoint) * resy(jpoint) & - + resz(jpoint) * resz(jpoint) - enddo - - elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then - - allocate(v1b_r2(n_grid2)) - allocate(u2b_r12(n_grid2)) - allocate(gradx1_u2b(n_grid2)) - allocate(grady1_u2b(n_grid2)) - allocate(gradz1_u2b(n_grid2)) - - v1b_r1 = j1b_nucl(r1) - call grad1_j1b_nucl(r1, grad1_v1b) - - call j1b_nucl_r1_seq(n_grid2, v1b_r2) - call j12_mu_r1_seq(r1, n_grid2, u2b_r12) - call grad1_j12_mu_r1_seq(r1, n_grid2, gradx1_u2b, grady1_u2b, gradz1_u2b) - - do jpoint = 1, n_points_extra_final_grid - resx(jpoint) = (gradx1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(1)) * v1b_r2(jpoint) - resy(jpoint) = (grady1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(2)) * v1b_r2(jpoint) - resz(jpoint) = (gradz1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(3)) * v1b_r2(jpoint) - res (jpoint) = resx(jpoint) * resx(jpoint) & - + resy(jpoint) * resy(jpoint) & - + resz(jpoint) * resz(jpoint) - enddo - - deallocate(v1b_r2, u2b_r12, gradx1_u2b, grady1_u2b, gradz1_u2b) - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - return -end subroutine get_grad1_u12_withsq_r1_seq - -! --- - -double precision function j12_mu(r1, r2) - - include 'constants.include.F' - - implicit none - double precision, intent(in) :: r1(3), r2(3) - double precision :: mu_tmp, r12 - - if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then - - r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & - + (r1(2) - r2(2)) * (r1(2) - r2(2)) & - + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) - mu_tmp = mu_erf * r12 - - j12_mu = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented for j12_mu' - stop - - endif - - return -end function j12_mu - -! --- - -subroutine grad1_j12_mu(r1, r2, grad) - - BEGIN_DOC - ! - ! gradient of j(mu(r1,r2),r12) form of jastrow. - ! - ! if mu(r1,r2) = cst ---> j1b_type < 200 and - ! - ! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) - ! - ! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and - ! - ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) - ! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) - ! - END_DOC - - include 'constants.include.F' - - implicit none - double precision, intent(in) :: r1(3), r2(3) - double precision, intent(out) :: grad(3) - double precision :: dx, dy, dz, r12, tmp - - grad = 0.d0 - - if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then - - dx = r1(1) - r2(1) - dy = r1(2) - r2(2) - dz = r1(3) - r2(3) - - r12 = dsqrt(dx * dx + dy * dy + dz * dz) - if(r12 .lt. 1d-10) return - - tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 - - grad(1) = tmp * dx - grad(2) = tmp * dy - grad(3) = tmp * dz - - elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then - - double precision :: mu_val, mu_tmp, mu_der(3) - - dx = r1(1) - r2(1) - dy = r1(2) - r2(2) - dz = r1(3) - r2(3) - r12 = dsqrt(dx * dx + dy * dy + dz * dz) - - call mu_r_val_and_grad(r1, r2, mu_val, mu_der) - mu_tmp = mu_val * r12 - tmp = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val) - grad(1) = tmp * mu_der(1) - grad(2) = tmp * mu_der(2) - grad(3) = tmp * mu_der(3) - - if(r12 .lt. 1d-10) return - tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) / r12 - grad(1) = grad(1) + tmp * dx - grad(2) = grad(2) + tmp * dy - grad(3) = grad(3) + tmp * dz - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - return -end subroutine grad1_j12_mu - -! --- - -double precision function j1b_nucl(r) - - implicit none - double precision, intent(in) :: r(3) - integer :: i - double precision :: a, d, e, x, y, z - - if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then - - j1b_nucl = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - j1b_nucl = j1b_nucl - dexp(-a*dsqrt(d)) - enddo - - elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then - - j1b_nucl = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - e = 1.d0 - dexp(-a*d) - j1b_nucl = j1b_nucl * e - enddo - - elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then - - j1b_nucl = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - j1b_nucl = j1b_nucl - j1b_pen_coef(i) * dexp(-a*d) - enddo - - elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then - - j1b_nucl = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - x = r(1) - nucl_coord(i,1) - y = r(2) - nucl_coord(i,2) - z = r(3) - nucl_coord(i,3) - d = x*x + y*y + z*z - j1b_nucl = j1b_nucl - dexp(-a*d*d) - enddo - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl' - stop - - endif - - return -end function j1b_nucl - -! --- - -double precision function j1b_nucl_square(r) - - implicit none - double precision, intent(in) :: r(3) - integer :: i - double precision :: a, d, e, x, y, z - - if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then - - j1b_nucl_square = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - j1b_nucl_square = j1b_nucl_square - dexp(-a*dsqrt(d)) - enddo - j1b_nucl_square = j1b_nucl_square * j1b_nucl_square - - elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then - - j1b_nucl_square = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - e = 1.d0 - dexp(-a*d) - j1b_nucl_square = j1b_nucl_square * e - enddo - j1b_nucl_square = j1b_nucl_square * j1b_nucl_square - - elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then - - j1b_nucl_square = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - j1b_nucl_square = j1b_nucl_square - j1b_pen_coef(i) * dexp(-a*d) - enddo - j1b_nucl_square = j1b_nucl_square * j1b_nucl_square - - elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then - - j1b_nucl_square = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - x = r(1) - nucl_coord(i,1) - y = r(2) - nucl_coord(i,2) - z = r(3) - nucl_coord(i,3) - d = x*x + y*y + z*z - j1b_nucl_square = j1b_nucl_square - dexp(-a*d*d) - enddo - j1b_nucl_square = j1b_nucl_square * j1b_nucl_square - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl_square' - stop - - endif - - return -end function j1b_nucl_square - -! --- - -subroutine grad1_j1b_nucl(r, grad) - - implicit none - double precision, intent(in) :: r(3) - double precision, intent(out) :: grad(3) - integer :: ipoint, i, j, phase - double precision :: x, y, z, dx, dy, dz - double precision :: a, d, e - double precision :: fact_x, fact_y, fact_z - double precision :: ax_der, ay_der, az_der, a_expo - - if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then - - fact_x = 0.d0 - fact_y = 0.d0 - fact_z = 0.d0 - do i = 1, nucl_num - a = j1b_pen(i) - x = r(1) - nucl_coord(i,1) - y = r(2) - nucl_coord(i,2) - z = r(3) - nucl_coord(i,3) - d = dsqrt(x*x + y*y + z*z) - e = a * dexp(-a*d) / d - - fact_x += e * x - fact_y += e * y - fact_z += e * z - enddo - - grad(1) = fact_x - grad(2) = fact_y - grad(3) = fact_z - - elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then - - x = r(1) - y = r(2) - z = r(3) - - fact_x = 0.d0 - fact_y = 0.d0 - fact_z = 0.d0 - do i = 1, List_all_comb_b2_size - - phase = 0 - a_expo = 0.d0 - ax_der = 0.d0 - ay_der = 0.d0 - az_der = 0.d0 - do j = 1, nucl_num - a = dble(List_all_comb_b2(j,i)) * j1b_pen(j) - dx = x - nucl_coord(j,1) - dy = y - nucl_coord(j,2) - dz = z - nucl_coord(j,3) - - phase += List_all_comb_b2(j,i) - a_expo += a * (dx*dx + dy*dy + dz*dz) - ax_der += a * dx - ay_der += a * dy - az_der += a * dz - enddo - e = -2.d0 * (-1.d0)**dble(phase) * dexp(-a_expo) - - fact_x += e * ax_der - fact_y += e * ay_der - fact_z += e * az_der - enddo - - grad(1) = fact_x - grad(2) = fact_y - grad(3) = fact_z - - elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then - - fact_x = 0.d0 - fact_y = 0.d0 - fact_z = 0.d0 - do i = 1, nucl_num - a = j1b_pen(i) - x = r(1) - nucl_coord(i,1) - y = r(2) - nucl_coord(i,2) - z = r(3) - nucl_coord(i,3) - d = x*x + y*y + z*z - e = a * j1b_pen_coef(i) * dexp(-a*d) - - fact_x += e * x - fact_y += e * y - fact_z += e * z - enddo - - grad(1) = 2.d0 * fact_x - grad(2) = 2.d0 * fact_y - grad(3) = 2.d0 * fact_z - - elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then - - fact_x = 0.d0 - fact_y = 0.d0 - fact_z = 0.d0 - do i = 1, nucl_num - a = j1b_pen(i) - x = r(1) - nucl_coord(i,1) - y = r(2) - nucl_coord(i,2) - z = r(3) - nucl_coord(i,3) - d = x*x + y*y + z*z - e = a * d * dexp(-a*d*d) - - fact_x += e * x - fact_y += e * y - fact_z += e * z - enddo - - grad(1) = 4.d0 * fact_x - grad(2) = 4.d0 * fact_y - grad(3) = 4.d0 * fact_z - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented for grad1_j1b_nucl' - stop - - endif - - return -end subroutine grad1_j1b_nucl - -! --- - -subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) - - implicit none - double precision, intent(in) :: r1(3), r2(3) - double precision, intent(out) :: mu_val, mu_der(3) - double precision :: r(3) - double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) - double precision :: dm_tot, tmp1, tmp2, tmp3 - double precision :: rho1, grad_rho1(3),rho2,rho_tot,inv_rho_tot - double precision :: f_rho1, f_rho2, d_drho_f_rho1 - double precision :: d_dx1_f_rho1(3),d_dx_rho_f_rho(3),nume - - if(j1b_type .eq. 200) then - - ! - ! r = 0.5 (r1 + r2) - ! - ! mu[rho(r)] = alpha sqrt(rho) + mu0 exp(-rho) - ! - ! d mu[rho(r)] / dx1 = 0.5 d mu[rho(r)] / dx - ! d mu[rho(r)] / dx = [0.5 alpha / sqrt(rho) - mu0 exp(-rho)] (d rho(r) / dx) - ! - - PROVIDE mu_r_ct mu_erf - - r(1) = 0.5d0 * (r1(1) + r2(1)) - r(2) = 0.5d0 * (r1(2) + r2(2)) - r(3) = 0.5d0 * (r1(3) + r2(3)) - - call density_and_grad_alpha_beta(r, dm_a, dm_b, grad_dm_a, grad_dm_b) - - dm_tot = dm_a(1) + dm_b(1) - tmp1 = dsqrt(dm_tot) - tmp2 = mu_erf * dexp(-dm_tot) - - mu_val = mu_r_ct * tmp1 + tmp2 - - mu_der = 0.d0 - if(dm_tot .lt. 1d-7) return - - tmp3 = 0.25d0 * mu_r_ct / tmp1 - 0.5d0 * tmp2 - mu_der(1) = tmp3 * (grad_dm_a(1,1) + grad_dm_b(1,1)) - mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1)) - mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1)) - - elseif(j1b_type .eq. 201) then - - ! - ! r = 0.5 (r1 + r2) - ! - ! mu[rho(r)] = alpha rho + mu0 exp(-rho) - ! - ! d mu[rho(r)] / dx1 = 0.5 d mu[rho(r)] / dx - ! d mu[rho(r)] / dx = [0.5 alpha / sqrt(rho) - mu0 exp(-rho)] (d rho(r) / dx) - ! - - PROVIDE mu_r_ct mu_erf - - r(1) = 0.5d0 * (r1(1) + r2(1)) - r(2) = 0.5d0 * (r1(2) + r2(2)) - r(3) = 0.5d0 * (r1(3) + r2(3)) - - call density_and_grad_alpha_beta(r, dm_a, dm_b, grad_dm_a, grad_dm_b) - - dm_tot = dm_a(1) + dm_b(1) - tmp2 = mu_erf * dexp(-dm_tot) - - mu_val = mu_r_ct * dm_tot + tmp2 - - tmp3 = 0.5d0 * (mu_r_ct - tmp2) - mu_der(1) = tmp3 * (grad_dm_a(1,1) + grad_dm_b(1,1)) - mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1)) - mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1)) - - elseif(j1b_type .eq. 202) then - - ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO - ! - ! RHO = rho(r1) + rho(r2) - ! - ! f[rho] = alpha rho^beta + mu0 exp(-rho) - ! - ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) - ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } - ! - ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) - mu0 exp(-rho(r1))] (d rho(r1) / dx1) - ! - ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) - - !!!!!!!!! rho1,rho2,rho1+rho2 - call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) - rho_tot = rho1 + rho2 - if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 - inv_rho_tot = 1.d0/rho_tot - ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf * exp(-rho) - call get_all_f_rho(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) - d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) - d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) - nume = rho1 * f_rho1 + rho2 * f_rho2 - mu_val = nume * inv_rho_tot - mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) - elseif(j1b_type .eq. 203) then - - ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO - ! - ! RHO = rho(r1) + rho(r2) - ! - ! f[rho] = alpha rho^beta + mu0 - ! - ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) - ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } - ! - ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) - ! - ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) - - !!!!!!!!! rho1,rho2,rho1+rho2 - call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) - rho_tot = rho1 + rho2 - if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 - inv_rho_tot = 1.d0/rho_tot - ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf - call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) - d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) - d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) - nume = rho1 * f_rho1 + rho2 * f_rho2 - mu_val = nume * inv_rho_tot - mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) - elseif(j1b_type .eq. 204) then - - ! mu(r1,r2) = 1/2 * (f[rho(r1)] + f[rho(r2)]} - ! - ! f[rho] = alpha rho^beta + mu0 - ! - ! d/dx1 mu(r1,r2) = 1/2 * d/dx1 (rho(r1) f[rho(r1)]) - ! - ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) - ! - ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) - - !!!!!!!!! rho1,rho2,rho1+rho2 - call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) - rho_tot = rho1 + rho2 - if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 - inv_rho_tot = 1.d0/rho_tot - ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf - call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) - d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) - d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) - mu_val = 0.5d0 * ( f_rho1 + f_rho2) - mu_der(1:3) = d_dx_rho_f_rho(1:3) - else - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - return -end subroutine mu_r_val_and_grad - -! --- - -subroutine grad1_j1b_nucl_square_num(r1, grad) - - implicit none - double precision, intent(in) :: r1(3) - double precision, intent(out) :: grad(3) - double precision :: r(3), eps, tmp_eps, vp, vm - double precision, external :: j1b_nucl_square - - eps = 1d-5 - tmp_eps = 0.5d0 / eps - - r(1:3) = r1(1:3) - - r(1) = r(1) + eps - vp = j1b_nucl_square(r) - r(1) = r(1) - 2.d0 * eps - vm = j1b_nucl_square(r) - r(1) = r(1) + eps - grad(1) = tmp_eps * (vp - vm) - - r(2) = r(2) + eps - vp = j1b_nucl_square(r) - r(2) = r(2) - 2.d0 * eps - vm = j1b_nucl_square(r) - r(2) = r(2) + eps - grad(2) = tmp_eps * (vp - vm) - - r(3) = r(3) + eps - vp = j1b_nucl_square(r) - r(3) = r(3) - 2.d0 * eps - vm = j1b_nucl_square(r) - r(3) = r(3) + eps - grad(3) = tmp_eps * (vp - vm) - - return -end subroutine grad1_j1b_nucl_square_num - -! --- - -subroutine grad1_j12_mu_square_num(r1, r2, grad) - - include 'constants.include.F' - - implicit none - double precision, intent(in) :: r1(3), r2(3) - double precision, intent(out) :: grad(3) - double precision :: r(3) - double precision :: eps, tmp_eps, vp, vm - double precision, external :: j12_mu_square - - eps = 1d-5 - tmp_eps = 0.5d0 / eps - - r(1:3) = r1(1:3) - - r(1) = r(1) + eps - vp = j12_mu_square(r, r2) - r(1) = r(1) - 2.d0 * eps - vm = j12_mu_square(r, r2) - r(1) = r(1) + eps - grad(1) = tmp_eps * (vp - vm) - - r(2) = r(2) + eps - vp = j12_mu_square(r, r2) - r(2) = r(2) - 2.d0 * eps - vm = j12_mu_square(r, r2) - r(2) = r(2) + eps - grad(2) = tmp_eps * (vp - vm) - - r(3) = r(3) + eps - vp = j12_mu_square(r, r2) - r(3) = r(3) - 2.d0 * eps - vm = j12_mu_square(r, r2) - r(3) = r(3) + eps - grad(3) = tmp_eps * (vp - vm) - - return -end subroutine grad1_j12_mu_square_num - -! --- - -double precision function j12_mu_square(r1, r2) - - implicit none - double precision, intent(in) :: r1(3), r2(3) - double precision, external :: j12_mu - - j12_mu_square = j12_mu(r1, r2) * j12_mu(r1, r2) - - return -end function j12_mu_square - -! --- - -subroutine f_mu_and_deriv_mu(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) - implicit none - BEGIN_DOC -! function giving mu as a function of rho -! -! f_mu = alpha * rho**beta + mu0 * exp(-rho) -! -! and its derivative with respect to rho d_drho_f_mu - END_DOC - double precision, intent(in) :: rho,alpha,mu0,beta - double precision, intent(out) :: f_mu,d_drho_f_mu - f_mu = alpha * (rho)**beta + mu0 * dexp(-rho) - d_drho_f_mu = alpha * beta * rho**(beta-1.d0) - mu0 * dexp(-rho) - -end - - -subroutine get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) - implicit none - BEGIN_DOC -! returns the density in r1,r2 and grad_rho at r1 - END_DOC - double precision, intent(in) :: r1(3),r2(3) - double precision, intent(out):: grad_rho1(3),rho1,rho2 - double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) - call density_and_grad_alpha_beta(r1, dm_a, dm_b, grad_dm_a, grad_dm_b) - rho1 = dm_a(1) + dm_b(1) - grad_rho1(1:3) = grad_dm_a(1:3,1) + grad_dm_b(1:3,1) - call density_and_grad_alpha_beta(r2, dm_a, dm_b, grad_dm_a, grad_dm_b) - rho2 = dm_a(1) + dm_b(1) -end - -subroutine get_all_f_rho(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) - implicit none - BEGIN_DOC -! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) - END_DOC - double precision, intent(in) :: rho1,rho2,alpha,mu0,beta - double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 - double precision :: tmp - call f_mu_and_deriv_mu(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) - call f_mu_and_deriv_mu(rho2,alpha,mu0,beta,f_rho2,tmp) -end - - -subroutine get_all_f_rho_simple(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) - implicit none - BEGIN_DOC -! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) - END_DOC - double precision, intent(in) :: rho1,rho2,alpha,mu0,beta - double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 - double precision :: tmp - call f_mu_and_deriv_mu_simple(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) - call f_mu_and_deriv_mu_simple(rho2,alpha,mu0,beta,f_rho2,tmp) -end - -subroutine f_mu_and_deriv_mu_simple(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) - implicit none - BEGIN_DOC -! function giving mu as a function of rho -! -! f_mu = alpha * rho**beta + mu0 -! -! and its derivative with respect to rho d_drho_f_mu - END_DOC - double precision, intent(in) :: rho,alpha,mu0,beta - double precision, intent(out) :: f_mu,d_drho_f_mu - f_mu = alpha * (rho)**beta + mu0 - d_drho_f_mu = alpha * beta * rho**(beta-1.d0) - -end - -! --- - -subroutine grad1_j12_mu_r1_seq(r1, n_grid2, gradx, grady, gradz) - - BEGIN_DOC - ! - ! gradient of j(mu(r1,r2),r12) form of jastrow. - ! - ! if mu(r1,r2) = cst ---> j1b_type < 200 and - ! - ! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) - ! - ! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and - ! - ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) - ! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) - ! - END_DOC - - include 'constants.include.F' - - implicit none - integer , intent(in) :: n_grid2 - double precision, intent(in) :: r1(3) - double precision, intent(out) :: gradx(n_grid2) - double precision, intent(out) :: grady(n_grid2) - double precision, intent(out) :: gradz(n_grid2) - - integer :: jpoint - double precision :: r2(3) - double precision :: dx, dy, dz, r12, tmp - - if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then - - do jpoint = 1, n_points_extra_final_grid ! r2 - - r2(1) = final_grid_points_extra(1,jpoint) - r2(2) = final_grid_points_extra(2,jpoint) - r2(3) = final_grid_points_extra(3,jpoint) - - dx = r1(1) - r2(1) - dy = r1(2) - r2(2) - dz = r1(3) - r2(3) - - r12 = dsqrt(dx * dx + dy * dy + dz * dz) - if(r12 .lt. 1d-10) return - - tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 - - gradx(jpoint) = tmp * dx - grady(jpoint) = tmp * dy - gradz(jpoint) = tmp * dz - enddo - - elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then - - double precision :: mu_val, mu_tmp, mu_der(3) - - do jpoint = 1, n_points_extra_final_grid ! r2 - - r2(1) = final_grid_points_extra(1,jpoint) - r2(2) = final_grid_points_extra(2,jpoint) - r2(3) = final_grid_points_extra(3,jpoint) - - dx = r1(1) - r2(1) - dy = r1(2) - r2(2) - dz = r1(3) - r2(3) - r12 = dsqrt(dx * dx + dy * dy + dz * dz) - - call mu_r_val_and_grad(r1, r2, mu_val, mu_der) - mu_tmp = mu_val * r12 - tmp = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val) - gradx(jpoint) = tmp * mu_der(1) - grady(jpoint) = tmp * mu_der(2) - gradz(jpoint) = tmp * mu_der(3) - - if(r12 .lt. 1d-10) return - tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) / r12 - - gradx(jpoint) = gradx(jpoint) + tmp * dx - grady(jpoint) = grady(jpoint) + tmp * dy - gradz(jpoint) = gradz(jpoint) + tmp * dz - enddo - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - return -end subroutine grad1_j12_mu_r1_seq - -! --- - -subroutine j12_mu_r1_seq(r1, n_grid2, res) - - include 'constants.include.F' - - implicit none - integer, intent(in) :: n_grid2 - double precision, intent(in) :: r1(3) - double precision, intent(out) :: res(n_grid2) - - integer :: jpoint - double precision :: r2(3) - double precision :: mu_tmp, r12 - - PROVIDE final_grid_points_extra - - if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then - - do jpoint = 1, n_points_extra_final_grid ! r2 - - r2(1) = final_grid_points_extra(1,jpoint) - r2(2) = final_grid_points_extra(2,jpoint) - r2(3) = final_grid_points_extra(3,jpoint) - - r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & - + (r1(2) - r2(2)) * (r1(2) - r2(2)) & - + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) - mu_tmp = mu_erf * r12 - - res(jpoint) = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf - enddo - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented for j12_mu_r1_seq' - stop - - endif - - return -end subroutine j12_mu_r1_seq - -! --- - -subroutine j1b_nucl_r1_seq(n_grid2, res) - - implicit none - integer, intent(in) :: n_grid2 - double precision, intent(out) :: res(n_grid2) - - double precision :: r(3) - integer :: i, jpoint - double precision :: a, d, e, x, y, z - - if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then - - res = 1.d0 - - do jpoint = 1, n_points_extra_final_grid ! r2 - r(1) = final_grid_points_extra(1,jpoint) - r(2) = final_grid_points_extra(2,jpoint) - r(3) = final_grid_points_extra(3,jpoint) - - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - - res(jpoint) -= dexp(-a*dsqrt(d)) - enddo - enddo - - elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then - - res = 1.d0 - - do jpoint = 1, n_points_extra_final_grid ! r2 - r(1) = final_grid_points_extra(1,jpoint) - r(2) = final_grid_points_extra(2,jpoint) - r(3) = final_grid_points_extra(3,jpoint) - - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - e = 1.d0 - dexp(-a*d) - - res(jpoint) *= e - enddo - enddo - - elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then - - res = 1.d0 - - do jpoint = 1, n_points_extra_final_grid ! r2 - r(1) = final_grid_points_extra(1,jpoint) - r(2) = final_grid_points_extra(2,jpoint) - r(3) = final_grid_points_extra(3,jpoint) - - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - res(jpoint) -= j1b_pen_coef(i) * dexp(-a*d) - enddo - enddo - - elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then - - res = 1.d0 - - do jpoint = 1, n_points_extra_final_grid ! r2 - r(1) = final_grid_points_extra(1,jpoint) - r(2) = final_grid_points_extra(2,jpoint) - r(3) = final_grid_points_extra(3,jpoint) - - do i = 1, nucl_num - a = j1b_pen(i) - x = r(1) - nucl_coord(i,1) - y = r(2) - nucl_coord(i,2) - z = r(3) - nucl_coord(i,3) - d = x*x + y*y + z*z - res(jpoint) -= dexp(-a*d*d) - enddo - enddo - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl_r1_seq' - stop - - endif - - return -end subroutine j1b_nucl_r1_seq - -! --- - diff --git a/src/non_h_ints_mu/jast_deriv_utils.irp.f b/src/non_h_ints_mu/jast_deriv_utils.irp.f new file mode 100644 index 00000000..bcbe16af --- /dev/null +++ b/src/non_h_ints_mu/jast_deriv_utils.irp.f @@ -0,0 +1,700 @@ + +! --- + +double precision function j12_mu(r1, r2) + + include 'constants.include.F' + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision :: mu_tmp, r12 + + if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then + + r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & + + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) + mu_tmp = mu_erf * r12 + + j12_mu = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for j12_mu' + stop + + endif + + return +end function j12_mu + +! --- + +subroutine grad1_j12_mu(r1, r2, grad) + + BEGIN_DOC + ! + ! gradient of j(mu(r1,r2),r12) form of jastrow. + ! + ! if mu(r1,r2) = cst ---> j1b_type < 200 and + ! + ! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) + ! + ! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and + ! + ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) + ! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) + ! + END_DOC + + include 'constants.include.F' + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision, intent(out) :: grad(3) + double precision :: dx, dy, dz, r12, tmp + + grad = 0.d0 + + if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then + + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + if(r12 .lt. 1d-10) return + + tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 + + grad(1) = tmp * dx + grad(2) = tmp * dy + grad(3) = tmp * dz + + elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then + + double precision :: mu_val, mu_tmp, mu_der(3) + + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + + call mu_r_val_and_grad(r1, r2, mu_val, mu_der) + mu_tmp = mu_val * r12 + tmp = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val) + grad(1) = tmp * mu_der(1) + grad(2) = tmp * mu_der(2) + grad(3) = tmp * mu_der(3) + + if(r12 .lt. 1d-10) return + tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) / r12 + grad(1) = grad(1) + tmp * dx + grad(2) = grad(2) + tmp * dy + grad(3) = grad(3) + tmp * dz + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine grad1_j12_mu + +! --- + +double precision function j1b_nucl(r) + + implicit none + double precision, intent(in) :: r(3) + integer :: i + double precision :: a, d, e, x, y, z + + if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then + + j1b_nucl = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + j1b_nucl = j1b_nucl - dexp(-a*dsqrt(d)) + enddo + + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then + + j1b_nucl = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + e = 1.d0 - dexp(-a*d) + j1b_nucl = j1b_nucl * e + enddo + + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then + + j1b_nucl = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + j1b_nucl = j1b_nucl - j1b_pen_coef(i) * dexp(-a*d) + enddo + + elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then + + j1b_nucl = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + j1b_nucl = j1b_nucl - dexp(-a*d*d) + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl' + stop + + endif + + return +end function j1b_nucl + +! --- + +double precision function j1b_nucl_square(r) + + implicit none + double precision, intent(in) :: r(3) + integer :: i + double precision :: a, d, e, x, y, z + + if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then + + j1b_nucl_square = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + j1b_nucl_square = j1b_nucl_square - dexp(-a*dsqrt(d)) + enddo + j1b_nucl_square = j1b_nucl_square * j1b_nucl_square + + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then + + j1b_nucl_square = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + e = 1.d0 - dexp(-a*d) + j1b_nucl_square = j1b_nucl_square * e + enddo + j1b_nucl_square = j1b_nucl_square * j1b_nucl_square + + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then + + j1b_nucl_square = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + j1b_nucl_square = j1b_nucl_square - j1b_pen_coef(i) * dexp(-a*d) + enddo + j1b_nucl_square = j1b_nucl_square * j1b_nucl_square + + elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then + + j1b_nucl_square = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + j1b_nucl_square = j1b_nucl_square - dexp(-a*d*d) + enddo + j1b_nucl_square = j1b_nucl_square * j1b_nucl_square + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl_square' + stop + + endif + + return +end function j1b_nucl_square + +! --- + +subroutine grad1_j1b_nucl(r, grad) + + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out) :: grad(3) + integer :: ipoint, i, j, phase + double precision :: x, y, z, dx, dy, dz + double precision :: a, d, e + double precision :: fact_x, fact_y, fact_z + double precision :: ax_der, ay_der, az_der, a_expo + + if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then + + fact_x = 0.d0 + fact_y = 0.d0 + fact_z = 0.d0 + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = dsqrt(x*x + y*y + z*z) + e = a * dexp(-a*d) / d + + fact_x += e * x + fact_y += e * y + fact_z += e * z + enddo + + grad(1) = fact_x + grad(2) = fact_y + grad(3) = fact_z + + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then + + x = r(1) + y = r(2) + z = r(3) + + fact_x = 0.d0 + fact_y = 0.d0 + fact_z = 0.d0 + do i = 1, List_all_comb_b2_size + + phase = 0 + a_expo = 0.d0 + ax_der = 0.d0 + ay_der = 0.d0 + az_der = 0.d0 + do j = 1, nucl_num + a = dble(List_all_comb_b2(j,i)) * j1b_pen(j) + dx = x - nucl_coord(j,1) + dy = y - nucl_coord(j,2) + dz = z - nucl_coord(j,3) + + phase += List_all_comb_b2(j,i) + a_expo += a * (dx*dx + dy*dy + dz*dz) + ax_der += a * dx + ay_der += a * dy + az_der += a * dz + enddo + e = -2.d0 * (-1.d0)**dble(phase) * dexp(-a_expo) + + fact_x += e * ax_der + fact_y += e * ay_der + fact_z += e * az_der + enddo + + grad(1) = fact_x + grad(2) = fact_y + grad(3) = fact_z + + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then + + fact_x = 0.d0 + fact_y = 0.d0 + fact_z = 0.d0 + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + e = a * j1b_pen_coef(i) * dexp(-a*d) + + fact_x += e * x + fact_y += e * y + fact_z += e * z + enddo + + grad(1) = 2.d0 * fact_x + grad(2) = 2.d0 * fact_y + grad(3) = 2.d0 * fact_z + + elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then + + fact_x = 0.d0 + fact_y = 0.d0 + fact_z = 0.d0 + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + e = a * d * dexp(-a*d*d) + + fact_x += e * x + fact_y += e * y + fact_z += e * z + enddo + + grad(1) = 4.d0 * fact_x + grad(2) = 4.d0 * fact_y + grad(3) = 4.d0 * fact_z + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for grad1_j1b_nucl' + stop + + endif + + return +end subroutine grad1_j1b_nucl + +! --- + +subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision, intent(out) :: mu_val, mu_der(3) + double precision :: r(3) + double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) + double precision :: dm_tot, tmp1, tmp2, tmp3 + double precision :: rho1, grad_rho1(3),rho2,rho_tot,inv_rho_tot + double precision :: f_rho1, f_rho2, d_drho_f_rho1 + double precision :: d_dx1_f_rho1(3),d_dx_rho_f_rho(3),nume + + if(j1b_type .eq. 200) then + + ! + ! r = 0.5 (r1 + r2) + ! + ! mu[rho(r)] = alpha sqrt(rho) + mu0 exp(-rho) + ! + ! d mu[rho(r)] / dx1 = 0.5 d mu[rho(r)] / dx + ! d mu[rho(r)] / dx = [0.5 alpha / sqrt(rho) - mu0 exp(-rho)] (d rho(r) / dx) + ! + + PROVIDE mu_r_ct mu_erf + + r(1) = 0.5d0 * (r1(1) + r2(1)) + r(2) = 0.5d0 * (r1(2) + r2(2)) + r(3) = 0.5d0 * (r1(3) + r2(3)) + + call density_and_grad_alpha_beta(r, dm_a, dm_b, grad_dm_a, grad_dm_b) + + dm_tot = dm_a(1) + dm_b(1) + tmp1 = dsqrt(dm_tot) + tmp2 = mu_erf * dexp(-dm_tot) + + mu_val = mu_r_ct * tmp1 + tmp2 + + mu_der = 0.d0 + if(dm_tot .lt. 1d-7) return + + tmp3 = 0.25d0 * mu_r_ct / tmp1 - 0.5d0 * tmp2 + mu_der(1) = tmp3 * (grad_dm_a(1,1) + grad_dm_b(1,1)) + mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1)) + mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1)) + + elseif(j1b_type .eq. 201) then + + ! + ! r = 0.5 (r1 + r2) + ! + ! mu[rho(r)] = alpha rho + mu0 exp(-rho) + ! + ! d mu[rho(r)] / dx1 = 0.5 d mu[rho(r)] / dx + ! d mu[rho(r)] / dx = [0.5 alpha / sqrt(rho) - mu0 exp(-rho)] (d rho(r) / dx) + ! + + PROVIDE mu_r_ct mu_erf + + r(1) = 0.5d0 * (r1(1) + r2(1)) + r(2) = 0.5d0 * (r1(2) + r2(2)) + r(3) = 0.5d0 * (r1(3) + r2(3)) + + call density_and_grad_alpha_beta(r, dm_a, dm_b, grad_dm_a, grad_dm_b) + + dm_tot = dm_a(1) + dm_b(1) + tmp2 = mu_erf * dexp(-dm_tot) + + mu_val = mu_r_ct * dm_tot + tmp2 + + tmp3 = 0.5d0 * (mu_r_ct - tmp2) + mu_der(1) = tmp3 * (grad_dm_a(1,1) + grad_dm_b(1,1)) + mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1)) + mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1)) + + elseif(j1b_type .eq. 202) then + + ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO + ! + ! RHO = rho(r1) + rho(r2) + ! + ! f[rho] = alpha rho^beta + mu0 exp(-rho) + ! + ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) + ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) - mu0 exp(-rho(r1))] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf * exp(-rho) + call get_all_f_rho(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + nume = rho1 * f_rho1 + rho2 * f_rho2 + mu_val = nume * inv_rho_tot + mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) + elseif(j1b_type .eq. 203) then + + ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO + ! + ! RHO = rho(r1) + rho(r2) + ! + ! f[rho] = alpha rho^beta + mu0 + ! + ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) + ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf + call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + nume = rho1 * f_rho1 + rho2 * f_rho2 + mu_val = nume * inv_rho_tot + mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) + elseif(j1b_type .eq. 204) then + + ! mu(r1,r2) = 1/2 * (f[rho(r1)] + f[rho(r2)]} + ! + ! f[rho] = alpha rho^beta + mu0 + ! + ! d/dx1 mu(r1,r2) = 1/2 * d/dx1 (rho(r1) f[rho(r1)]) + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf + call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + mu_val = 0.5d0 * ( f_rho1 + f_rho2) + mu_der(1:3) = d_dx_rho_f_rho(1:3) + else + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine mu_r_val_and_grad + +! --- + +subroutine grad1_j1b_nucl_square_num(r1, grad) + + implicit none + double precision, intent(in) :: r1(3) + double precision, intent(out) :: grad(3) + double precision :: r(3), eps, tmp_eps, vp, vm + double precision, external :: j1b_nucl_square + + eps = 1d-5 + tmp_eps = 0.5d0 / eps + + r(1:3) = r1(1:3) + + r(1) = r(1) + eps + vp = j1b_nucl_square(r) + r(1) = r(1) - 2.d0 * eps + vm = j1b_nucl_square(r) + r(1) = r(1) + eps + grad(1) = tmp_eps * (vp - vm) + + r(2) = r(2) + eps + vp = j1b_nucl_square(r) + r(2) = r(2) - 2.d0 * eps + vm = j1b_nucl_square(r) + r(2) = r(2) + eps + grad(2) = tmp_eps * (vp - vm) + + r(3) = r(3) + eps + vp = j1b_nucl_square(r) + r(3) = r(3) - 2.d0 * eps + vm = j1b_nucl_square(r) + r(3) = r(3) + eps + grad(3) = tmp_eps * (vp - vm) + + return +end subroutine grad1_j1b_nucl_square_num + +! --- + +subroutine grad1_j12_mu_square_num(r1, r2, grad) + + include 'constants.include.F' + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision, intent(out) :: grad(3) + double precision :: r(3) + double precision :: eps, tmp_eps, vp, vm + double precision, external :: j12_mu_square + + eps = 1d-5 + tmp_eps = 0.5d0 / eps + + r(1:3) = r1(1:3) + + r(1) = r(1) + eps + vp = j12_mu_square(r, r2) + r(1) = r(1) - 2.d0 * eps + vm = j12_mu_square(r, r2) + r(1) = r(1) + eps + grad(1) = tmp_eps * (vp - vm) + + r(2) = r(2) + eps + vp = j12_mu_square(r, r2) + r(2) = r(2) - 2.d0 * eps + vm = j12_mu_square(r, r2) + r(2) = r(2) + eps + grad(2) = tmp_eps * (vp - vm) + + r(3) = r(3) + eps + vp = j12_mu_square(r, r2) + r(3) = r(3) - 2.d0 * eps + vm = j12_mu_square(r, r2) + r(3) = r(3) + eps + grad(3) = tmp_eps * (vp - vm) + + return +end subroutine grad1_j12_mu_square_num + +! --- + +double precision function j12_mu_square(r1, r2) + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision, external :: j12_mu + + j12_mu_square = j12_mu(r1, r2) * j12_mu(r1, r2) + + return +end function j12_mu_square + +! --- + +subroutine f_mu_and_deriv_mu(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) + implicit none + BEGIN_DOC +! function giving mu as a function of rho +! +! f_mu = alpha * rho**beta + mu0 * exp(-rho) +! +! and its derivative with respect to rho d_drho_f_mu + END_DOC + double precision, intent(in) :: rho,alpha,mu0,beta + double precision, intent(out) :: f_mu,d_drho_f_mu + f_mu = alpha * (rho)**beta + mu0 * dexp(-rho) + d_drho_f_mu = alpha * beta * rho**(beta-1.d0) - mu0 * dexp(-rho) + +end + + +subroutine get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + implicit none + BEGIN_DOC +! returns the density in r1,r2 and grad_rho at r1 + END_DOC + double precision, intent(in) :: r1(3),r2(3) + double precision, intent(out):: grad_rho1(3),rho1,rho2 + double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) + call density_and_grad_alpha_beta(r1, dm_a, dm_b, grad_dm_a, grad_dm_b) + rho1 = dm_a(1) + dm_b(1) + grad_rho1(1:3) = grad_dm_a(1:3,1) + grad_dm_b(1:3,1) + call density_and_grad_alpha_beta(r2, dm_a, dm_b, grad_dm_a, grad_dm_b) + rho2 = dm_a(1) + dm_b(1) +end + +subroutine get_all_f_rho(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) + implicit none + BEGIN_DOC +! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) + END_DOC + double precision, intent(in) :: rho1,rho2,alpha,mu0,beta + double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 + double precision :: tmp + call f_mu_and_deriv_mu(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) + call f_mu_and_deriv_mu(rho2,alpha,mu0,beta,f_rho2,tmp) +end + + +subroutine get_all_f_rho_simple(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) + implicit none + BEGIN_DOC +! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) + END_DOC + double precision, intent(in) :: rho1,rho2,alpha,mu0,beta + double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 + double precision :: tmp + call f_mu_and_deriv_mu_simple(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) + call f_mu_and_deriv_mu_simple(rho2,alpha,mu0,beta,f_rho2,tmp) +end + +subroutine f_mu_and_deriv_mu_simple(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) + implicit none + BEGIN_DOC +! function giving mu as a function of rho +! +! f_mu = alpha * rho**beta + mu0 +! +! and its derivative with respect to rho d_drho_f_mu + END_DOC + double precision, intent(in) :: rho,alpha,mu0,beta + double precision, intent(out) :: f_mu,d_drho_f_mu + f_mu = alpha * (rho)**beta + mu0 + d_drho_f_mu = alpha * beta * rho**(beta-1.d0) + +end + +! --- + diff --git a/src/non_h_ints_mu/jast_deriv_utils_vect.irp.f b/src/non_h_ints_mu/jast_deriv_utils_vect.irp.f new file mode 100644 index 00000000..f9512827 --- /dev/null +++ b/src/non_h_ints_mu/jast_deriv_utils_vect.irp.f @@ -0,0 +1,332 @@ + +! --- + +subroutine get_grad1_u12_withsq_r1_seq(r1, n_grid2, resx, resy, resz, res) + + BEGIN_DOC + ! + ! grad_1 u(r1,r2) + ! + ! this will be integrated numerically over r2: + ! we use grid for r1 and extra_grid for r2 + ! + ! for 99 < j1b_type < 199 + ! + ! u(r1,r2) = j12_mu(r12) x v(r1) x v(r2) + ! grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2) + ! + END_DOC + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: resx(n_grid2), resy(n_grid2), resz(n_grid2), res(n_grid2) + + integer :: jpoint + double precision :: v1b_r1 + double precision :: grad1_v1b(3) + double precision, allocatable :: v1b_r2(:) + double precision, allocatable :: u2b_r12(:) + double precision, allocatable :: gradx1_u2b(:), grady1_u2b(:), gradz1_u2b(:) + double precision, external :: j1b_nucl + + PROVIDE j1b_type + PROVIDE final_grid_points_extra + + if( (j1b_type .eq. 100) .or. & + (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then + + call grad1_j12_mu_r1_seq(r1, n_grid2, resx, resy, resz) + do jpoint = 1, n_points_extra_final_grid + res(jpoint) = resx(jpoint) * resx(jpoint) & + + resy(jpoint) * resy(jpoint) & + + resz(jpoint) * resz(jpoint) + enddo + + elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then + + allocate(v1b_r2(n_grid2)) + allocate(u2b_r12(n_grid2)) + allocate(gradx1_u2b(n_grid2)) + allocate(grady1_u2b(n_grid2)) + allocate(gradz1_u2b(n_grid2)) + + v1b_r1 = j1b_nucl(r1) + call grad1_j1b_nucl(r1, grad1_v1b) + + call j1b_nucl_r1_seq(n_grid2, v1b_r2) + call j12_mu_r1_seq(r1, n_grid2, u2b_r12) + call grad1_j12_mu_r1_seq(r1, n_grid2, gradx1_u2b, grady1_u2b, gradz1_u2b) + + do jpoint = 1, n_points_extra_final_grid + resx(jpoint) = (gradx1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(1)) * v1b_r2(jpoint) + resy(jpoint) = (grady1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(2)) * v1b_r2(jpoint) + resz(jpoint) = (gradz1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(3)) * v1b_r2(jpoint) + res (jpoint) = resx(jpoint) * resx(jpoint) & + + resy(jpoint) * resy(jpoint) & + + resz(jpoint) * resz(jpoint) + enddo + + deallocate(v1b_r2, u2b_r12, gradx1_u2b, grady1_u2b, gradz1_u2b) + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine get_grad1_u12_withsq_r1_seq + +! --- + +subroutine grad1_j12_mu_r1_seq(r1, n_grid2, gradx, grady, gradz) + + BEGIN_DOC + ! + ! gradient of j(mu(r1,r2),r12) form of jastrow. + ! + ! if mu(r1,r2) = cst ---> j1b_type < 200 and + ! + ! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) + ! + ! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and + ! + ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) + ! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) + ! + END_DOC + + include 'constants.include.F' + + implicit none + integer , intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: gradx(n_grid2) + double precision, intent(out) :: grady(n_grid2) + double precision, intent(out) :: gradz(n_grid2) + + integer :: jpoint + double precision :: r2(3) + double precision :: dx, dy, dz, r12, tmp + + if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + if(r12 .lt. 1d-10) then + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 + cycle + endif + + tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 + + gradx(jpoint) = tmp * dx + grady(jpoint) = tmp * dy + gradz(jpoint) = tmp * dz + enddo + + elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then + + double precision :: mu_val, mu_tmp, mu_der(3) + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + + call mu_r_val_and_grad(r1, r2, mu_val, mu_der) + mu_tmp = mu_val * r12 + tmp = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val) + gradx(jpoint) = tmp * mu_der(1) + grady(jpoint) = tmp * mu_der(2) + gradz(jpoint) = tmp * mu_der(3) + + if(r12 .lt. 1d-10) then + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 + cycle + endif + + tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) / r12 + + gradx(jpoint) = gradx(jpoint) + tmp * dx + grady(jpoint) = grady(jpoint) + tmp * dy + gradz(jpoint) = gradz(jpoint) + tmp * dz + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine grad1_j12_mu_r1_seq + +! --- + +subroutine j12_mu_r1_seq(r1, n_grid2, res) + + include 'constants.include.F' + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: res(n_grid2) + + integer :: jpoint + double precision :: r2(3) + double precision :: mu_tmp, r12 + + PROVIDE final_grid_points_extra + + if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & + + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) + mu_tmp = mu_erf * r12 + + res(jpoint) = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for j12_mu_r1_seq' + stop + + endif + + return +end subroutine j12_mu_r1_seq + +! --- + +subroutine j1b_nucl_r1_seq(n_grid2, res) + + ! TODO + ! change loops order + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(out) :: res(n_grid2) + + double precision :: r(3) + integer :: i, jpoint + double precision :: a, d, e, x, y, z + + if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + + res(jpoint) -= dexp(-a*dsqrt(d)) + enddo + enddo + + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + e = 1.d0 - dexp(-a*d) + + res(jpoint) *= e + enddo + enddo + + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + res(jpoint) -= j1b_pen_coef(i) * dexp(-a*d) + enddo + enddo + + elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + res(jpoint) -= dexp(-a*d*d) + enddo + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl_r1_seq' + stop + + endif + + return +end subroutine j1b_nucl_r1_seq + +! --- + diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f deleted file mode 100644 index dd21a67f..00000000 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ /dev/null @@ -1,530 +0,0 @@ - -BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)] - - BEGIN_DOC - ! - ! TODO - ! combine with int2_grad1_u12_square_ao to avoid repeated calculation ? - ! - ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) - ! - ! where r1 = r(ipoint) - ! - ! if J(r1,r2) = u12 (j1b_type .eq. 1) - ! - ! int2_grad1_u12_ao(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2) - ! = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ] - ! - ! if J(r1,r2) = u12 x v1 x v2 (j1b_type .eq. 3) - ! - ! int2_grad1_u12_ao(i,j,ipoint,:) = v1 x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ] - ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] - ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) - ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) - ! - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint) - ! - END_DOC - - implicit none - integer :: ipoint, i, j, m, jpoint - double precision :: time0, time1 - double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 - - print*, ' providing int2_grad1_u12_ao ...' - call wall_time(time0) - - PROVIDE j1b_type - - if(read_tc_integ) then - - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read") - read(11) int2_grad1_u12_ao - - else - - if(j1b_type .eq. 0) then - - PROVIDE v_ij_erf_rk_cst_mu x_v_ij_erf_rk_cst_mu - - int2_grad1_u12_ao = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, x, y, z, tmp1) & - !$OMP SHARED ( ao_num, n_points_final_grid, final_grid_points & - !$OMP , v_ij_erf_rk_cst_mu, x_v_ij_erf_rk_cst_mu, int2_grad1_u12_ao) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid - x = final_grid_points(1,ipoint) - y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) - do j = 1, ao_num - do i = 1, ao_num - tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint) - int2_grad1_u12_ao(i,j,ipoint,1) = 0.5d0 * (tmp1 * x - x_v_ij_erf_rk_cst_mu(i,j,ipoint,1)) - int2_grad1_u12_ao(i,j,ipoint,2) = 0.5d0 * (tmp1 * y - x_v_ij_erf_rk_cst_mu(i,j,ipoint,2)) - int2_grad1_u12_ao(i,j,ipoint,3) = 0.5d0 * (tmp1 * z - x_v_ij_erf_rk_cst_mu(i,j,ipoint,3)) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then - - PROVIDE v_1b_grad - PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b_an x_v_ij_erf_rk_cst_mu_j1b - - int2_grad1_u12_ao = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, x, y, z, tmp0, tmp1, tmp2, tmp_x, tmp_y, tmp_z) & - !$OMP SHARED ( ao_num, n_points_final_grid, final_grid_points, v_1b, v_1b_grad & - !$OMP , v_ij_erf_rk_cst_mu_j1b, v_ij_u_cst_mu_j1b_an, x_v_ij_erf_rk_cst_mu_j1b, int2_grad1_u12_ao) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid - x = final_grid_points(1,ipoint) - y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) - tmp0 = 0.5d0 * v_1b(ipoint) - tmp_x = v_1b_grad(1,ipoint) - tmp_y = v_1b_grad(2,ipoint) - tmp_z = v_1b_grad(3,ipoint) - do j = 1, ao_num - do i = 1, ao_num - tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) - tmp2 = v_ij_u_cst_mu_j1b_an(i,j,ipoint) - int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x - int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y - int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - FREE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b_an x_v_ij_erf_rk_cst_mu_j1b - - elseif(j1b_type .ge. 100) then - - PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - - double precision, allocatable :: tmp(:,:,:) - allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) - tmp = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, i, jpoint) & - !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) - !$OMP DO SCHEDULE (static) - do j = 1, ao_num - do i = 1, ao_num - do jpoint = 1, n_points_extra_final_grid - tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - integer :: n_blocks, n_rest, n_pass - integer :: i_blocks, i_rest, i_pass, ii - double precision, allocatable :: tmp_grad1_u12(:,:,:) - - ! n_points_final_grid = n_blocks * n_pass + n_rest - n_blocks = 8 - n_rest = int(mod(n_points_final_grid, n_blocks)) - n_pass = int((n_points_final_grid - n_rest) / n_blocks) - - if(n_pass .le. 1) then - print*, ' blocks are to large or grid is very small !' - stop - endif - - allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,3)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i_pass, i_blocks, ipoint, ii, m, tmp_grad1_u12) & - !$OMP SHARED (n_pass, n_blocks, n_points_extra_final_grid, ao_num, & - !$OMP final_grid_points, tmp, int2_grad1_u12_ao) - !$OMP DO - do i_pass = 1, n_pass - ii = (i_pass-1)*n_blocks + 1 - - do i_blocks = 1, n_blocks - ipoint = ii - 1 + i_blocks ! r1 - call get_grad1_u12_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1) & - , tmp_grad1_u12(1,i_blocks,2) & - , tmp_grad1_u12(1,i_blocks,3) ) - enddo - - do m = 1, 3 - call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & - , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - deallocate(tmp_grad1_u12) - - ! TODO - ! OPENMP - if(n_rest .ne. 0) then - - allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,3)) - - ii = n_pass*n_blocks + 1 - do i_rest = 1, n_rest - ipoint = ii - 1 + i_rest ! r1 - call get_grad1_u12_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1) & - , tmp_grad1_u12(1,i_rest,2) & - , tmp_grad1_u12(1,i_rest,3) ) - enddo - - do m = 1, 3 - call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & - , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) - enddo - - deallocate(tmp_grad1_u12) - endif - - deallocate(tmp) - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - endif - - if(write_tc_integ.and.mpi_master) then - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") - call ezfio_set_work_empty(.False.) - write(11) int2_grad1_u12_ao - close(11) - call ezfio_set_tc_keywords_io_tc_integ('Read') - endif - - call wall_time(time1) - print*, ' wall time for int2_grad1_u12_ao =', time1-time0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, int2_grad1_u12_ao_num_1shot, (ao_num, ao_num, n_points_final_grid, 3)] - - BEGIN_DOC - ! - ! int2_grad1_u12_ao_num_1shot(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) - ! - END_DOC - - implicit none - integer :: ipoint, i, j, m, jpoint - double precision :: time0, time1 - double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 - - print*, ' providing int2_grad1_u12_ao_num_1shot ...' - call wall_time(time0) - - PROVIDE j1b_type - - if(j1b_type .ge. 100) then - - PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - PROVIDE grad1_u12_num - - double precision, allocatable :: tmp(:,:,:) - allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) - tmp = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, i, jpoint) & - !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) - !$OMP DO SCHEDULE (static) - do j = 1, ao_num - do i = 1, ao_num - do jpoint = 1, n_points_extra_final_grid - tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - do m = 1, 3 - !call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -1.d0 & - ! this work also because of the symmetry in K(1,2) and sign compensation in L(1,2,3) - call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, +1.d0 & - , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_ao_num_1shot(1,1,1,m), ao_num*ao_num) - enddo - - deallocate(tmp) - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - call wall_time(time1) - print*, ' wall time for int2_grad1_u12_ao_num_1shot =', time1-time0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_points_final_grid)] - - BEGIN_DOC - ! - ! int2_grad1_u12_square_ao = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2 - ! - END_DOC - - implicit none - integer :: ipoint, i, j, m, jpoint - double precision :: time0, time1 - double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 - - print*, ' providing int2_grad1_u12_square_ao ...' - call wall_time(time0) - - PROVIDE j1b_type - - if(j1b_type .eq. 0) then - - PROVIDE int2_grad1u2_grad2u2 - - int2_grad1_u12_square_ao = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, ipoint) & - !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, int2_grad1u2_grad2u2) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - int2_grad1_u12_square_ao(i,j,ipoint) = int2_grad1u2_grad2u2(i,j,ipoint) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then - - if(use_ipp) then - - ! the term u12_grad1_u12_j1b_grad1_j1b is added directly for performance - PROVIDE u12sq_j1bsq grad12_j12 - - int2_grad1_u12_square_ao = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, ipoint) & - !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, grad12_j12) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - FREE u12sq_j1bsq grad12_j12 - - else - - PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 - - int2_grad1_u12_square_ao = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, ipoint) & - !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, grad12_j12, u12_grad1_u12_j1b_grad1_j1b) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - FREE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 - - endif - - elseif(j1b_type .ge. 100) then - - PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - - double precision, allocatable :: tmp(:,:,:) - allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, i, jpoint) & - !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) - !$OMP DO COLLAPSE(2) - do j = 1, ao_num - do i = 1, ao_num - do jpoint = 1, n_points_extra_final_grid - tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - integer :: n_blocks, n_rest, n_pass - integer :: i_blocks, i_rest, i_pass, ii - double precision, allocatable :: tmp_grad1_u12_squared(:,:) - - ! n_points_final_grid = n_blocks * n_pass + n_rest - n_blocks = 16 - n_rest = int(mod(n_points_final_grid, n_blocks)) - n_pass = int((n_points_final_grid - n_rest) / n_blocks) - - if(n_pass .le. 1) then - print*, ' blocks are to large or grid is very small !' - stop - endif - - allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_blocks)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i_pass, i_blocks, ipoint, ii, tmp_grad1_u12_squared) & - !$OMP SHARED (n_pass, n_blocks, n_points_extra_final_grid, ao_num, & - !$OMP final_grid_points, tmp, int2_grad1_u12_square_ao) - !$OMP DO - do i_pass = 1, n_pass - ii = (i_pass-1)*n_blocks + 1 - - do i_blocks = 1, n_blocks - ipoint = ii - 1 + i_blocks ! r1 - call get_grad1_u12_squared_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12_squared(1,i_blocks)) - enddo - - call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, -0.5d0 & - , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num) - enddo - !$OMP END DO - !$OMP END PARALLEL - - deallocate(tmp_grad1_u12_squared) - - ! TODO - ! OPENMP - if(n_rest .ne. 0) then - - allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_rest)) - - ii = n_pass*n_blocks + 1 - do i_rest = 1, n_rest - ipoint = ii - 1 + i_rest ! r1 - call get_grad1_u12_squared_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12_squared(1,i_rest)) - enddo - - call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, -0.5d0 & - , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num) - - deallocate(tmp_grad1_u12_squared) - endif - - deallocate(tmp) - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - call wall_time(time1) - print*, ' wall time for int2_grad1_u12_square_ao =', time1-time0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao_num_1shot, (ao_num, ao_num, n_points_final_grid)] - - BEGIN_DOC - ! - ! int2_grad1_u12_square_ao_num_1shot = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2 - ! - END_DOC - - implicit none - integer :: i, j, jpoint - double precision :: time0, time1 - - print*, ' providing int2_grad1_u12_square_ao_num_1shot ...' - call wall_time(time0) - - PROVIDE j1b_type - - if(j1b_type .ge. 100) then - - PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - PROVIDE grad1_u12_squared_num - - double precision, allocatable :: tmp(:,:,:) - allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, i, jpoint) & - !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) - !$OMP DO COLLAPSE(2) - do j = 1, ao_num - do i = 1, ao_num - do jpoint = 1, n_points_extra_final_grid - tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -0.5d0 & - , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_square_ao_num_1shot(1,1,1), ao_num*ao_num) - - FREE grad1_u12_squared_num - deallocate(tmp) - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - call wall_time(time1) - print*, ' wall time for int2_grad1_u12_square_ao_num_1shot =', time1-time0 - call print_memory_usage() - -END_PROVIDER - -! --- - diff --git a/src/non_h_ints_mu/tc_integ_an.irp.f b/src/non_h_ints_mu/tc_integ_an.irp.f new file mode 100644 index 00000000..e2f181a0 --- /dev/null +++ b/src/non_h_ints_mu/tc_integ_an.irp.f @@ -0,0 +1,244 @@ + +BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)] + + BEGIN_DOC + ! + ! TODO + ! combine with int2_grad1_u12_square_ao to avoid repeated calculation ? + ! + ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) + ! + ! where r1 = r(ipoint) + ! + ! if J(r1,r2) = u12 (j1b_type .eq. 1) + ! + ! int2_grad1_u12_ao(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2) + ! = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ] + ! + ! if J(r1,r2) = u12 x v1 x v2 (j1b_type .eq. 3) + ! + ! int2_grad1_u12_ao(i,j,ipoint,:) = v1 x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ] + ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] + ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) + ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) + ! - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint) + ! + END_DOC + + implicit none + integer :: ipoint, i, j, m, jpoint + double precision :: time0, time1 + double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 + + print*, ' providing int2_grad1_u12_ao ...' + call wall_time(time0) + + PROVIDE j1b_type + + if(read_tc_integ) then + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read") + read(11) int2_grad1_u12_ao + + else + + if(j1b_type .eq. 0) then + + PROVIDE v_ij_erf_rk_cst_mu x_v_ij_erf_rk_cst_mu + + int2_grad1_u12_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, x, y, z, tmp1) & + !$OMP SHARED ( ao_num, n_points_final_grid, final_grid_points & + !$OMP , v_ij_erf_rk_cst_mu, x_v_ij_erf_rk_cst_mu, int2_grad1_u12_ao) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + do j = 1, ao_num + do i = 1, ao_num + tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint) + int2_grad1_u12_ao(i,j,ipoint,1) = 0.5d0 * (tmp1 * x - x_v_ij_erf_rk_cst_mu(i,j,ipoint,1)) + int2_grad1_u12_ao(i,j,ipoint,2) = 0.5d0 * (tmp1 * y - x_v_ij_erf_rk_cst_mu(i,j,ipoint,2)) + int2_grad1_u12_ao(i,j,ipoint,3) = 0.5d0 * (tmp1 * z - x_v_ij_erf_rk_cst_mu(i,j,ipoint,3)) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then + + PROVIDE v_1b_grad + PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b_an x_v_ij_erf_rk_cst_mu_j1b + + int2_grad1_u12_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, x, y, z, tmp0, tmp1, tmp2, tmp_x, tmp_y, tmp_z) & + !$OMP SHARED ( ao_num, n_points_final_grid, final_grid_points, v_1b, v_1b_grad & + !$OMP , v_ij_erf_rk_cst_mu_j1b, v_ij_u_cst_mu_j1b_an, x_v_ij_erf_rk_cst_mu_j1b, int2_grad1_u12_ao) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + tmp0 = 0.5d0 * v_1b(ipoint) + tmp_x = v_1b_grad(1,ipoint) + tmp_y = v_1b_grad(2,ipoint) + tmp_z = v_1b_grad(3,ipoint) + do j = 1, ao_num + do i = 1, ao_num + tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) + tmp2 = v_ij_u_cst_mu_j1b_an(i,j,ipoint) + int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x + int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y + int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + FREE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b_an x_v_ij_erf_rk_cst_mu_j1b + + elseif(j1b_type .ge. 100) then + + PROVIDE int2_grad1_u12_ao_num + int2_grad1_u12_ao = int2_grad1_u12_ao_num + FREE int2_grad1_u12_ao_num + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + endif + + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') + endif + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao =', time1-time0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_points_final_grid)] + + BEGIN_DOC + ! + ! int2_grad1_u12_square_ao = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2 + ! + END_DOC + + implicit none + integer :: ipoint, i, j, m, jpoint + double precision :: time0, time1 + double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 + + print*, ' providing int2_grad1_u12_square_ao ...' + call wall_time(time0) + + PROVIDE j1b_type + + if(j1b_type .eq. 0) then + + PROVIDE int2_grad1u2_grad2u2 + + int2_grad1_u12_square_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, ipoint) & + !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, int2_grad1u2_grad2u2) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + int2_grad1_u12_square_ao(i,j,ipoint) = int2_grad1u2_grad2u2(i,j,ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then + + if(use_ipp) then + + ! the term u12_grad1_u12_j1b_grad1_j1b is added directly for performance + PROVIDE u12sq_j1bsq grad12_j12 + + int2_grad1_u12_square_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, ipoint) & + !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, grad12_j12) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + FREE u12sq_j1bsq grad12_j12 + + else + + PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 + + int2_grad1_u12_square_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, ipoint) & + !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, grad12_j12, u12_grad1_u12_j1b_grad1_j1b) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + FREE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 + + endif + + elseif(j1b_type .ge. 100) then + + PROVIDE int2_grad1_u12_square_ao_num + int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num + FREE int2_grad1_u12_square_ao_num + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_square_ao =', time1-time0 + call print_memory_usage() + +END_PROVIDER + +! --- + diff --git a/src/non_h_ints_mu/tc_integ_num.irp.f b/src/non_h_ints_mu/tc_integ_num.irp.f new file mode 100644 index 00000000..707d484c --- /dev/null +++ b/src/non_h_ints_mu/tc_integ_num.irp.f @@ -0,0 +1,190 @@ + + BEGIN_PROVIDER [double precision, int2_grad1_u12_ao_num , (ao_num,ao_num,n_points_final_grid,3)] +&BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao_num, (ao_num,ao_num,n_points_final_grid) ] + + BEGIN_DOC + ! + ! int2_grad1_u12_ao_num(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) + ! + ! int2_grad1_u12_square_ao_num = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2 + ! + END_DOC + + implicit none + integer :: ipoint, i, j, m, jpoint + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision :: time0, time1 + double precision, allocatable :: tmp(:,:,:) + double precision, allocatable :: tmp_grad1_u12(:,:,:), tmp_grad1_u12_squared(:,:) + + ! TODO + ! tmp_grad1_u12_squared get be obtained from tmp_grad1_u12 + + print*, ' providing int2_grad1_u12_ao_num & int2_grad1_u12_square_ao_num ...' + call wall_time(time0) + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! n_points_final_grid = n_blocks * n_pass + n_rest + n_blocks = 4 + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + + if(n_pass .le. 1) then + print*, ' blocks are to large or grid is very small !' + stop + endif + + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_blocks)) + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,3)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_pass, i_blocks, ipoint, ii, m, tmp_grad1_u12, & + !$OMP tmp_grad1_u12_squared) & + !$OMP SHARED (n_pass, n_blocks, n_points_extra_final_grid, ao_num, & + !$OMP final_grid_points, tmp, int2_grad1_u12_ao_num, & + !$OMP int2_grad1_u12_square_ao_num) + !$OMP DO + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_withsq_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1) & + , tmp_grad1_u12(1,i_blocks,2) & + , tmp_grad1_u12(1,i_blocks,3) & + , tmp_grad1_u12_squared(1,i_blocks)) + enddo + + do m = 1, 3 + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao_num(1,1,ii,m), ao_num*ao_num) + enddo + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao_num(1,1,ii), ao_num*ao_num) + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_grad1_u12, tmp_grad1_u12_squared) + + ! TODO + ! OPENMP + if(n_rest .ne. 0) then + + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_rest)) + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,3)) + + ii = n_pass*n_blocks + 1 + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_withsq_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1) & + , tmp_grad1_u12(1,i_rest,2) & + , tmp_grad1_u12(1,i_rest,3) & + , tmp_grad1_u12_squared(1,i_rest)) + enddo + + do m = 1, 3 + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao_num(1,1,ii,m), ao_num*ao_num) + enddo + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao_num(1,1,ii), ao_num*ao_num) + + deallocate(tmp_grad1_u12, tmp_grad1_u12_squared) + endif + + deallocate(tmp) + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao_num & int2_grad1_u12_square_ao_num =', time1-time0 + call print_memory_usage() + +END_PROVIDER + +! --- + + BEGIN_PROVIDER [double precision, int2_grad1_u12_ao_num_1shot , (ao_num,ao_num,n_points_final_grid,3)] +&BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao_num_1shot, (ao_num,ao_num,n_points_final_grid) ] + + BEGIN_DOC + ! + ! int2_grad1_u12_ao_num_1shot(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) + ! + ! int2_grad1_u12_square_ao_num_1shot = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2 + ! + END_DOC + + implicit none + integer :: ipoint, i, j, m, jpoint + double precision :: time0, time1 + double precision, allocatable :: tmp(:,:,:) + + print*, ' providing int2_grad1_u12_ao_num_1shot & int2_grad1_u12_square_ao_num_1shot ...' + call wall_time(time0) + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + PROVIDE grad1_u12_num grad1_u12_squared_num + + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + do m = 1, 3 + !call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -1.d0 & + ! this work also because of the symmetry in K(1,2) and sign compensation in L(1,2,3) + call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, +1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao_num_1shot(1,1,1,m), ao_num*ao_num) + enddo + FREE grad1_u12_num + + call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao_num_1shot(1,1,1), ao_num*ao_num) + FREE grad1_u12_squared_num + + deallocate(tmp) + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao_num_1shot & int2_grad1_u12_square_ao_num_1shot =', time1-time0 + call print_memory_usage() + +END_PROVIDER + +! --- + diff --git a/src/non_h_ints_mu/test_non_h_ints.irp.f b/src/non_h_ints_mu/test_non_h_ints.irp.f index 7d84e73b..84674fa0 100644 --- a/src/non_h_ints_mu/test_non_h_ints.irp.f +++ b/src/non_h_ints_mu/test_non_h_ints.irp.f @@ -618,13 +618,14 @@ subroutine test_int2_grad1_u12_square_ao() I_old = int2_grad1_u12_square_ao_num_1shot(j,i,ipoint) I_new = int2_grad1_u12_square_ao (j,i,ipoint) + !I_new = int2_grad1_u12_square_ao_num (j,i,ipoint) diff = dabs(I_new-I_old) if(diff .gt. thr) then print *, ' problem on:', j, i, ipoint print *, ' old value :', I_old print *, ' new value :', I_new - stop + !stop endif accu += diff @@ -660,13 +661,14 @@ subroutine test_int2_grad1_u12_ao() do m = 1, 3 I_old = int2_grad1_u12_ao_num_1shot(j,i,ipoint,m) I_new = int2_grad1_u12_ao (j,i,ipoint,m) + !I_new = int2_grad1_u12_ao_num (j,i,ipoint,m) diff = dabs(I_new-I_old) if(diff .gt. thr) then print *, ' problem on:', j, i, ipoint, m print *, ' old value :', I_old print *, ' new value :', I_new - stop + !stop endif accu += diff diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 29c238e1..2d244586 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -274,4 +274,15 @@ doc: size of radial grid over r1 interface: ezfio,provider,ocaml default: 30 +[tc_grid2_a] +type: integer +doc: size of angular grid over r2 +interface: ezfio,provider,ocaml +default: 50 + +[tc_grid2_r] +type: integer +doc: size of radial grid over r2 +interface: ezfio,provider,ocaml +default: 30 From 7076fcd202442a94d37d3c1782a7908a0b637ffd Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Wed, 6 Sep 2023 19:21:14 +0200 Subject: [PATCH 12/20] normal order V2 FROZEN CORE --- src/becke_numerical_grid/grid_becke.irp.f | 3 + .../grid_becke_vector.irp.f | 4 +- src/non_h_ints_mu/tc_integ_an.irp.f | 2 - src/non_h_ints_mu/tc_integ_num.irp.f | 51 +- src/non_h_ints_mu/total_tc_int.irp.f | 8 +- src/tc_bi_ortho/normal_ordered.irp.f | 1594 ++++------------- src/tc_bi_ortho/normal_ordered_old.irp.f | 4 +- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 63 +- src/tc_keywords/EZFIO.cfg | 8 +- src/tc_scf/tc_scf.irp.f | 7 + 10 files changed, 419 insertions(+), 1325 deletions(-) diff --git a/src/becke_numerical_grid/grid_becke.irp.f b/src/becke_numerical_grid/grid_becke.irp.f index 91fdc563..f72d452d 100644 --- a/src/becke_numerical_grid/grid_becke.irp.f +++ b/src/becke_numerical_grid/grid_becke.irp.f @@ -37,6 +37,9 @@ n_points_integration_angular = my_n_pt_a_grid endif + print*, " n_points_radial_grid = ", n_points_radial_grid + print*, " n_points_integration_angular = ", n_points_integration_angular + END_PROVIDER ! --- diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index 0386f3c6..e1147ec7 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -20,8 +20,8 @@ BEGIN_PROVIDER [integer, n_points_final_grid] enddo enddo - print*,' n_points_final_grid = ', n_points_final_grid - print*,' n max point = ', n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) + !print*,' n_points_final_grid = ', n_points_final_grid + !print*,' n max point = ', n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) call ezfio_set_becke_numerical_grid_n_points_final_grid(n_points_final_grid) END_PROVIDER diff --git a/src/non_h_ints_mu/tc_integ_an.irp.f b/src/non_h_ints_mu/tc_integ_an.irp.f index e2f181a0..ae7af987 100644 --- a/src/non_h_ints_mu/tc_integ_an.irp.f +++ b/src/non_h_ints_mu/tc_integ_an.irp.f @@ -108,7 +108,6 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f PROVIDE int2_grad1_u12_ao_num int2_grad1_u12_ao = int2_grad1_u12_ao_num - FREE int2_grad1_u12_ao_num else @@ -225,7 +224,6 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p PROVIDE int2_grad1_u12_square_ao_num int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num - FREE int2_grad1_u12_square_ao_num else diff --git a/src/non_h_ints_mu/tc_integ_num.irp.f b/src/non_h_ints_mu/tc_integ_num.irp.f index 707d484c..ee34f531 100644 --- a/src/non_h_ints_mu/tc_integ_num.irp.f +++ b/src/non_h_ints_mu/tc_integ_num.irp.f @@ -15,6 +15,7 @@ integer :: n_blocks, n_rest, n_pass integer :: i_blocks, i_rest, i_pass, ii double precision :: time0, time1 + double precision :: mem, n_double double precision, allocatable :: tmp(:,:,:) double precision, allocatable :: tmp_grad1_u12(:,:,:), tmp_grad1_u12_squared(:,:) @@ -41,31 +42,33 @@ enddo !$OMP END DO !$OMP END PARALLEL - + ! n_points_final_grid = n_blocks * n_pass + n_rest - n_blocks = 4 + call total_memory(mem) + mem = max(1.d0, qp_max_mem - mem) + n_double = mem * 1.d8 + n_blocks = min(n_double / (n_points_extra_final_grid * 4), 1.d0*n_points_final_grid) n_rest = int(mod(n_points_final_grid, n_blocks)) n_pass = int((n_points_final_grid - n_rest) / n_blocks) - - if(n_pass .le. 1) then - print*, ' blocks are to large or grid is very small !' - stop - endif + + call write_int(6, n_pass, 'Number of passes') + call write_int(6, n_blocks, 'Size of the blocks') + call write_int(6, n_rest, 'Size of the last block') + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_blocks)) allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,3)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i_pass, i_blocks, ipoint, ii, m, tmp_grad1_u12, & - !$OMP tmp_grad1_u12_squared) & - !$OMP SHARED (n_pass, n_blocks, n_points_extra_final_grid, ao_num, & - !$OMP final_grid_points, tmp, int2_grad1_u12_ao_num, & - !$OMP int2_grad1_u12_square_ao_num) - !$OMP DO do i_pass = 1, n_pass ii = (i_pass-1)*n_blocks + 1 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_blocks, ipoint) & + !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, & + !$OMP final_grid_points, tmp_grad1_u12, & + !$OMP tmp_grad1_u12_squared) + !$OMP DO do i_blocks = 1, n_blocks ipoint = ii - 1 + i_blocks ! r1 call get_grad1_u12_withsq_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1) & @@ -73,6 +76,8 @@ , tmp_grad1_u12(1,i_blocks,3) & , tmp_grad1_u12_squared(1,i_blocks)) enddo + !$OMP END DO + !$OMP END PARALLEL do m = 1, 3 call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & @@ -83,19 +88,23 @@ , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & , 0.d0, int2_grad1_u12_square_ao_num(1,1,ii), ao_num*ao_num) enddo - !$OMP END DO - !$OMP END PARALLEL deallocate(tmp_grad1_u12, tmp_grad1_u12_squared) - ! TODO - ! OPENMP - if(n_rest .ne. 0) then + if(n_rest .gt. 0) then allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_rest)) allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,3)) ii = n_pass*n_blocks + 1 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_rest, ipoint) & + !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, & + !$OMP final_grid_points, tmp_grad1_u12, & + !$OMP tmp_grad1_u12_squared) + !$OMP DO do i_rest = 1, n_rest ipoint = ii - 1 + i_rest ! r1 call get_grad1_u12_withsq_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1) & @@ -103,6 +112,8 @@ , tmp_grad1_u12(1,i_rest,3) & , tmp_grad1_u12_squared(1,i_rest)) enddo + !$OMP END DO + !$OMP END PARALLEL do m = 1, 3 call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 254defe1..9c19e0ac 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -55,12 +55,13 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao integer :: i, j, k, l double precision :: wall1, wall0 + PROVIDE j1b_type + print *, ' providing ao_tc_int_chemist ...' call wall_time(wall0) if(test_cycle_tc) then - PROVIDE j1b_type if(j1b_type .ne. 3) then print*, ' TC integrals with cycle can not be used for j1b_type =', j1b_type stop @@ -86,6 +87,11 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao FREE tc_grad_square_ao tc_grad_and_lapl_ao ao_two_e_coul + if(j1b_type .ge. 100) then + FREE int2_grad1_u12_ao_num int2_grad1_u12_square_ao_num + endif + + call wall_time(wall1) print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0 call print_memory_usage() diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index ca5875c9..1a9cddda 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -1,1040 +1,12 @@ ! --- -BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_v0, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! Normal ordering of the three body interaction on the HF density - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - - integer :: i, ii, h1, p1, h2, p2, ipoint - integer :: hh1, hh2, pp1, pp2 - integer :: Ne(2) - double precision :: wall0, wall1, walli, wallf - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - - print*,' Providing normal_two_body_bi_orth_v0 ...' - call wall_time(walli) - - if(read_tc_norm_ord) then - - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_v0', action="read") - read(11) normal_two_body_bi_orth_v0 - close(11) - - else - - double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) - double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) - double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) - double precision, allocatable :: tmp(:,:,:,:) - - PROVIDE N_int - - allocate( occ(N_int*bit_kind_size,2) ) - allocate( key_i_core(N_int,2) ) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - allocate(tmp(mo_num,mo_num,mo_num,mo_num)) - - ! --- - ! aba contraction - - print*,' Providing aba_contraction ...' - call wall_time(wall0) - - tmp = 0.d0 - - allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num)) - allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmpval_1(n_points_final_grid)) - allocate(tmpval_2(n_points_final_grid)) - allocate(tmpvec_1(n_points_final_grid,3)) - allocate(tmpvec_2(n_points_final_grid,3)) - allocate(tmp_2d(mo_num,mo_num)) - - ! purely closed shell part - do ii = 1, Ne(2) - i = occ(ii,2) - - ! to avoid tmp(N^4) - do h1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - ! to avoid tmp(N^4) - do p1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_r_in_r_array_transp, & - !$OMP tmpval_1, tmp2) - !$OMP DO - do h2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - enddo ! p1 - enddo ! h1 - enddo ! i - - ! purely open-shell part - if(Ne(2) < Ne(1)) then - do ii = Ne(2) + 1, Ne(1) - i = occ(ii,1) - - do h1 = 1, mo_num - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - do p1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_r_in_r_array_transp, & - !$OMP tmpval_1, tmp2) - !$OMP DO - do h2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - enddo ! p1 - enddo ! h1 - enddo !i - endif - - deallocate(tmp_3d) - deallocate(tmp1) - deallocate(tmp2) - deallocate(tmpval_1) - deallocate(tmpval_2) - deallocate(tmpvec_1) - deallocate(tmpvec_2) - deallocate(tmp_2d) - - tmp = -0.5d0 * tmp - call sum_A_At(tmp(1,1,1,1), mo_num*mo_num) - - call wall_time(wall1) - print*,' Wall time for aba_contraction', wall1-wall0 - - normal_two_body_bi_orth_v0 = tmp - - ! --- - ! aab contraction - - print*,' Providing aab_contraction ...' - call wall_time(wall0) - - tmp = 0.d0 - - allocate(tmp_2d(mo_num,mo_num)) - allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num)) - allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmpval_1(n_points_final_grid)) - allocate(tmpvec_1(n_points_final_grid,3)) - - ! purely closed shell part - do ii = 1, Ne(2) - i = occ(ii,2) - - ! to avoid tmp(N^4) - do h1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpvec_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpvec_1, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - ! to avoid tmp(N^4) - do p1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_r_in_r_array_transp, & - !$OMP tmpval_1, tmp2) - !$OMP DO - do h2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - enddo ! p1 - enddo ! h1 - enddo ! i - - deallocate(tmp_2d) - deallocate(tmp_3d) - deallocate(tmp1) - deallocate(tmp2) - deallocate(tmpval_1) - deallocate(tmpvec_1) - - tmp = -0.5d0 * tmp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h1, h2, p1, p2) & - !$OMP SHARED (tmp, mo_num) - - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 1, mo_num - do p2 = p1, mo_num - tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP DO - do h1 = 1, mo_num-1 - do h2 = h1+1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - tmp(p2,h2,p1,h1) *= -1.d0 - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print*,' Wall time for aab_contraction', wall1-wall0 - - normal_two_body_bi_orth_v0 += tmp - - ! --- - ! aaa contraction - - if(Ne(2) .ge. 3) then - - print*,' Providing aaa_contraction ...' - call wall_time(wall0) - - tmp = 0.d0 - - allocate(tmp_2d(mo_num,mo_num)) - allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num)) - allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmp3(n_points_final_grid,3,mo_num)) - allocate(tmpval_1(n_points_final_grid)) - allocate(tmpval_2(n_points_final_grid)) - allocate(tmpvec_1(n_points_final_grid,3)) - allocate(tmpvec_2(n_points_final_grid,3)) - allocate(tmpvec_3(n_points_final_grid,3)) - - ! purely closed shell part - do ii = 1, Ne(2) - i = occ(ii,2) - - ! to avoid tmp(N^4) - do h1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) - !$OMP DO - do ipoint = 1, n_points_final_grid - - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpvec_1, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_2, tmpvec_2, tmp1) - !$OMP DO - do p2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) - tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) - tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - ! to avoid tmp(N^4) - do p1 = 1, mo_num - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) - !$OMP DO - do ipoint = 1, n_points_final_grid - - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - - tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) - tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) - tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, & - !$OMP mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) - !$OMP DO - do h2 = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & - + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) - - tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) - tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) - tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) - - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) - !$OMP DO - do p2 = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) - - tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) - tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) - tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3(1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 1.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - enddo ! p1 - enddo ! h1 - enddo ! i - - ! purely open-shell part - if(Ne(2) < Ne(1)) then - - do ii = Ne(2) + 1, Ne(1) - i = occ(ii,1) - - ! to avoid tmp(N^4) - do h1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) - !$OMP DO - do ipoint = 1, n_points_final_grid - - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpvec_1, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_2, tmpvec_2, tmp1) - !$OMP DO - do p2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) - tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) - tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & - , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - ! to avoid tmp(N^4) - do p1 = 1, mo_num - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) - !$OMP DO - do ipoint = 1, n_points_final_grid - - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - - tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) - tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) - tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, & - !$OMP mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) - !$OMP DO - do h2 = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & - + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) - - tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) - tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) - tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) - - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) - !$OMP DO - do p2 = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) - - tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) - tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) - tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , tmp3(1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 1.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - enddo ! p1 - enddo ! h1 - enddo !i - endif - - deallocate(tmp_2d) - deallocate(tmp_3d) - deallocate(tmp1) - deallocate(tmp2) - deallocate(tmp3) - deallocate(tmpval_1) - deallocate(tmpval_2) - deallocate(tmpvec_1) - deallocate(tmpvec_2) - deallocate(tmpvec_3) - - tmp = -0.5d0 * tmp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h1, h2, p1, p2) & - !$OMP SHARED (tmp, mo_num) - - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 1, mo_num - do p2 = p1, mo_num - tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP DO - do h1 = 1, mo_num-1 - do h2 = h1+1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - tmp(p2,h2,p1,h1) *= -1.d0 - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print*,' Wall time for aaa_contraction', wall1-wall0 - - normal_two_body_bi_orth_v0 += tmp - endif ! Ne(2) .ge. 3 - - deallocate(tmp) - - endif ! read_tc_norm_ord - - if(write_tc_norm_ord.and.mpi_master) then - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_v0', action="write") - call ezfio_set_work_empty(.False.) - write(11) normal_two_body_bi_orth_v0 - close(11) - call ezfio_set_tc_keywords_io_tc_integ('Read') - endif - - call wall_time(wallf) - print*,' Wall time for normal_two_body_bi_orth_v0 ', wallf-walli - -END_PROVIDER - -! --- - BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC + ! ! Normal ordering of the three body interaction on the HF density + ! END_DOC use bitmasks ! you need to include the bitmasks_module.f90 features @@ -1063,7 +35,45 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) double precision, allocatable :: tmp(:,:,:,:) + double precision, allocatable :: int2_grad1_u12_bimo_t_tmp(:,:,:,:), mos_l_in_r_array_transp_tmp(:,:), mos_r_in_r_array_transp_tmp(:,:) + PROVIDE int2_grad1_u12_bimo_t + PROVIDE mos_l_in_r_array_transp mos_r_in_r_array_transp + + allocate(int2_grad1_u12_bimo_t_tmp(n_points_final_grid,3,mo_num,mo_num)) + allocate(mos_l_in_r_array_transp_tmp(n_points_final_grid,mo_num)) + allocate(mos_r_in_r_array_transp_tmp(n_points_final_grid,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, p1) & + !$OMP SHARED (mo_num, mo_class, & + !$OMP int2_grad1_u12_bimo_t, int2_grad1_u12_bimo_t_tmp, & + !$OMP mos_l_in_r_array_transp, mos_l_in_r_array_transp_tmp, & + !$OMP mos_r_in_r_array_transp, mos_r_in_r_array_transp_tmp) + !$OMP DO + do h1 = 1, mo_num + + mos_l_in_r_array_transp_tmp(:,h1) = 0.d0 + mos_r_in_r_array_transp_tmp(:,h1) = 0.d0 + + if(mo_class(h1) .ne. "Active") cycle + + mos_l_in_r_array_transp_tmp(:,h1) = mos_l_in_r_array_transp(:,h1) + mos_r_in_r_array_transp_tmp(:,h1) = mos_r_in_r_array_transp(:,h1) + + do p1 = 1, mo_num + int2_grad1_u12_bimo_t_tmp(:,:,p1,h1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + + int2_grad1_u12_bimo_t_tmp(:,:,p1,h1) = int2_grad1_u12_bimo_t(:,:,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + PROVIDE mo_class PROVIDE N_int allocate( occ(N_int*bit_kind_size,2) ) @@ -1087,16 +97,17 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ print*,' Providing aba_contraction ...' call wall_time(wall0) - tmp = 0.d0 + call set_multiple_levels_omp(.false.) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & - !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) @@ -1115,82 +126,87 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP DO - do ii = 1, Ne(2) - i = occ(ii,2) + do h1 = 1, mo_num + tmp(:,:,:,h1) = 0.d0 + if(mo_class(h1) .ne. "Active") cycle - do h1 = 1, mo_num + do ii = 1, Ne(2) + i = occ(ii,2) do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) enddo do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) enddo enddo call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - !$OMP END CRITICAL enddo enddo enddo do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) ) enddo do h2 = 1, mo_num + tmp2(:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - !$OMP END CRITICAL enddo enddo enddo ! p1 - enddo ! h1 - enddo ! i + enddo ! i + enddo ! h1 !$OMP END DO @@ -1205,14 +221,17 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ ! purely open-shell part if(Ne(2) < Ne(1)) then - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & - !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) Allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) @@ -1231,82 +250,86 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP DO - do ii = Ne(2) + 1, Ne(1) - i = occ(ii,1) + do h1 = 1, mo_num + if(mo_class(h1) .ne. "Active") cycle - do h1 = 1, mo_num + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) enddo do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) enddo enddo call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - !$OMP END CRITICAL enddo enddo enddo do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) ) enddo do h2 = 1, mo_num + tmp2(:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - !$OMP END CRITICAL enddo enddo enddo ! p1 - enddo ! h1 - enddo !i + enddo ! i + enddo ! h1 !$OMP END DO deallocate(tmp_3d, tmp_2d) @@ -1331,16 +354,17 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ print*,' Providing aab_contraction ...' call wall_time(wall0) - tmp = 0.d0 + call set_multiple_levels_omp(.false.) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, ii, i, h1, p1, h2, p2, & - !$OMP tmp_2d, tmp_3d, tmp1, tmp2, & - !$OMP tmpval_1, tmpvec_1) & - !$OMP SHARED (n_points_final_grid, mo_num, Ne, occ, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, ii, i, h1, p1, h2, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, & + !$OMP tmpval_1, tmpvec_1) & + !$OMP SHARED (n_points_final_grid, mo_num, Ne, occ, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) allocate(tmp_2d(mo_num,mo_num)) @@ -1359,71 +383,76 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP DO - do ii = 1, Ne(2) - i = occ(ii,2) + do h1 = 1, mo_num + tmp(:,:,:,h1) = 0.d0 + if(mo_class(h1) .ne. "Active") cycle - do h1 = 1, mo_num + do ii = 1, Ne(2) + i = occ(ii,2) do ipoint = 1, n_points_final_grid tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) enddo do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) enddo enddo call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - !$OMP END CRITICAL enddo enddo enddo do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) enddo do h2 = 1, mo_num + if(mo_class(h2) .ne. "Active") cycle + tmp2(:,h2) = 0.d0 + do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - !$OMP END CRITICAL enddo enddo enddo ! p1 - enddo ! h1 - enddo ! i + enddo ! i + enddo ! h1 !$OMP END DO @@ -1441,7 +470,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP PRIVATE (h1, h2, p1, p2) & !$OMP SHARED (tmp, mo_num) - !$OMP DO + !$OMP DO COLLAPSE(2) do h1 = 1, mo_num do h2 = 1, mo_num do p1 = 1, mo_num @@ -1453,7 +482,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo !$OMP END DO - !$OMP DO + !$OMP DO COLLAPSE(2) do h1 = 1, mo_num do h2 = 1, mo_num do p1 = 2, mo_num @@ -1465,7 +494,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo !$OMP END DO - !$OMP DO + !$OMP DO do h1 = 1, mo_num-1 do h2 = h1+1, mo_num do p1 = 2, mo_num @@ -1491,17 +520,18 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ print*,' Providing aaa_contraction ...' call wall_time(wall0) - tmp = 0.d0 + call set_multiple_levels_omp(.false.) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & - !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & - !$OMP tmpval_1, tmpval_2, & - !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, & + !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) allocate(tmp_2d(mo_num,mo_num)) @@ -1527,20 +557,23 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ tmpvec_3 = 0.d0 !$OMP DO - do ii = 1, Ne(2) - i = occ(ii,2) - do h1 = 1, mo_num + do h1 = 1, mo_num + tmp(:,:,:,h1) = 0.d0 + if(mo_class(h1) .ne. "Active") cycle + + do ii = 1, Ne(2) + i = occ(ii,2) do ipoint = 1, n_points_final_grid tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) @@ -1548,69 +581,72 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) enddo enddo call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - !$OMP END CRITICAL enddo enddo enddo do p2 = 1, mo_num + tmp1(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) - tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) - tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,3) enddo enddo call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) - !$OMP END CRITICAL enddo enddo enddo do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp_tmp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) @@ -1618,9 +654,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo do h2 = 1, mo_num + tmp2( :,h2) = 0.d0 + tmp1(:,:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) & + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) @@ -1632,35 +672,37 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - !$OMP END CRITICAL enddo enddo do p2 = 1, mo_num + tmp2( :,p2) = 0.d0 + tmp3(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) - tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) - tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) - tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & @@ -1670,15 +712,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - !$OMP END CRITICAL enddo enddo enddo ! p1 - enddo ! h1 - enddo ! i + enddo ! i + enddo ! h1 !$OMP END DO deallocate(tmp_2d) @@ -1697,15 +737,16 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ ! purely open-shell part if(Ne(2) < Ne(1)) then - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & - !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & - !$OMP tmpval_1, tmpval_2, & - !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) allocate(tmp_2d(mo_num,mo_num)) @@ -1732,20 +773,21 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP DO - do ii = Ne(2) + 1, Ne(1) - i = occ(ii,1) + do h1 = 1, mo_num + if(mo_class(h1) .ne. "Active") cycle - do h1 = 1, mo_num + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) do ipoint = 1, n_points_final_grid tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) @@ -1753,69 +795,72 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) enddo enddo call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - !$OMP END CRITICAL enddo enddo enddo do p2 = 1, mo_num + tmp1(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) - tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) - tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,3) enddo enddo call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) - !$OMP END CRITICAL enddo enddo enddo do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp_tmp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) @@ -1823,9 +868,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo do h2 = 1, mo_num + tmp2( :,h2) = 0.d0 + tmp1(:,:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) & + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) @@ -1837,35 +886,37 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - !$OMP END CRITICAL enddo enddo do p2 = 1, mo_num + tmp2( :,p2) = 0.d0 + tmp3(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) - tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) - tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) - tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & @@ -1875,15 +926,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - !$OMP END CRITICAL enddo enddo enddo ! p1 - enddo ! h1 - enddo !i + enddo ! i + enddo ! h1 !$OMP END DO deallocate(tmp_2d) @@ -1907,7 +956,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP PRIVATE (h1, h2, p1, p2) & !$OMP SHARED (tmp, mo_num) - !$OMP DO + !$OMP DO COLLAPSE(2) do h1 = 1, mo_num do h2 = 1, mo_num do p1 = 1, mo_num @@ -1919,7 +968,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo !$OMP END DO - !$OMP DO + !$OMP DO COLLAPSE(2) do h1 = 1, mo_num do h2 = 1, mo_num do p1 = 2, mo_num @@ -1931,7 +980,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo !$OMP END DO - !$OMP DO + !$OMP DO do h1 = 1, mo_num-1 do h2 = h1+1, mo_num do p1 = 2, mo_num @@ -1951,6 +1000,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ endif ! Ne(2) .ge. 3 deallocate(tmp) + deallocate(int2_grad1_u12_bimo_t_tmp, mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp) endif ! read_tc_norm_ord diff --git a/src/tc_bi_ortho/normal_ordered_old.irp.f b/src/tc_bi_ortho/normal_ordered_old.irp.f index 417580dd..e5e63355 100644 --- a/src/tc_bi_ortho/normal_ordered_old.irp.f +++ b/src/tc_bi_ortho/normal_ordered_old.irp.f @@ -26,7 +26,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_old, (mo_num, mo_num, if(read_tc_norm_ord) then - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_old', action="read") + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") read(11) normal_two_body_bi_orth_old close(11) @@ -103,7 +103,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_old, (mo_num, mo_num, endif if(write_tc_norm_ord.and.mpi_master) then - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_old', action="write") + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="write") call ezfio_set_work_empty(.False.) write(11) normal_two_body_bi_orth_old close(11) diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index b6beb65b..3301fcc2 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -11,14 +11,17 @@ program tc_bi_ortho print *, 'Hello world' - my_grid_becke = .True. + my_grid_becke = .True. PROVIDE tc_grid1_a tc_grid1_r my_n_pt_r_grid = tc_grid1_r my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - read_wf = .True. - touch read_wf + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + +! read_wf = .True. +! touch read_wf ! call test_h_u0 ! call test_slater_tc_opt @@ -27,10 +30,12 @@ program tc_bi_ortho ! call timing_single ! call timing_double - call test_no() !call test_no_aba() !call test_no_aab() !call test_no_aaa() + + call test_no() + end subroutine test_h_u0 @@ -272,9 +277,9 @@ subroutine test_no() implicit none integer :: i, j, k, l - double precision :: accu, contrib, new, ref, thr + double precision :: accu, contrib, new, ref, thr, norm - print*, ' testing normal_two_body_bi_orth ...' + print*, ' test_no ...' thr = 1d-8 @@ -282,6 +287,7 @@ subroutine test_no() PROVIDE normal_two_body_bi_orth accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -289,8 +295,8 @@ subroutine test_no() new = normal_two_body_bi_orth (l,k,j,i) ref = normal_two_body_bi_orth_old(l,k,j,i) + contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem on normal_two_body_bi_orth' print*, l, k, j, i @@ -298,14 +304,17 @@ subroutine test_no() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on normal_two_body_bi_orth = ', accu / dble(mo_num)**4 - return -end + print*, ' accu (%) = ', 100.d0*accu/norm + + return +end subroutine test_no ! --- @@ -313,7 +322,7 @@ subroutine test_no_aba() implicit none integer :: i, j, k, l - double precision :: accu, contrib, new, ref, thr + double precision :: accu, contrib, new, ref, thr, norm print*, ' testing no_aba_contraction ...' @@ -323,6 +332,7 @@ subroutine test_no_aba() PROVIDE no_aba_contraction accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -331,7 +341,6 @@ subroutine test_no_aba() new = no_aba_contraction (l,k,j,i) ref = no_aba_contraction_v0(l,k,j,i) contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem on no_aba_contraction' print*, l, k, j, i @@ -339,13 +348,16 @@ subroutine test_no_aba() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on no_aba_contraction = ', accu / dble(mo_num)**4 - return + print*, ' accu (%) = ', 100.d0*accu/norm + + return end ! --- @@ -355,7 +367,7 @@ subroutine test_no_aab() implicit none integer :: i, j, k, l - double precision :: accu, contrib, new, ref, thr + double precision :: accu, contrib, new, ref, thr, norm print*, ' testing no_aab_contraction ...' @@ -365,6 +377,7 @@ subroutine test_no_aab() PROVIDE no_aab_contraction accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -373,7 +386,6 @@ subroutine test_no_aab() new = no_aab_contraction (l,k,j,i) ref = no_aab_contraction_v0(l,k,j,i) contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem on no_aab_contraction' print*, l, k, j, i @@ -381,13 +393,16 @@ subroutine test_no_aab() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on no_aab_contraction = ', accu / dble(mo_num)**4 - return + print*, ' accu (%) = ', 100.d0*accu/norm + + return end ! --- @@ -396,7 +411,7 @@ subroutine test_no_aaa() implicit none integer :: i, j, k, l - double precision :: accu, contrib, new, ref, thr + double precision :: accu, contrib, new, ref, thr, norm print*, ' testing no_aaa_contraction ...' @@ -406,6 +421,7 @@ subroutine test_no_aaa() PROVIDE no_aaa_contraction accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -414,7 +430,6 @@ subroutine test_no_aaa() new = no_aaa_contraction (l,k,j,i) ref = no_aaa_contraction_v0(l,k,j,i) contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem on no_aaa_contraction' print*, l, k, j, i @@ -422,13 +437,17 @@ subroutine test_no_aaa() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on no_aaa_contraction = ', accu / dble(mo_num)**4 - return + print*, ' accu (%) = ', 100.d0*accu/norm + + return end ! --- + diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 2d244586..a70ccc63 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -110,13 +110,13 @@ default: False type: Threshold doc: Threshold on the convergence of the Hartree Fock energy. interface: ezfio,provider,ocaml -default: 1.e-10 +default: 1.e-8 [n_it_tcscf_max] type: Strictly_positive_int doc: Maximum number of SCF iterations interface: ezfio,provider,ocaml -default: 100 +default: 50 [selection_tc] type: integer @@ -278,11 +278,11 @@ default: 30 type: integer doc: size of angular grid over r2 interface: ezfio,provider,ocaml -default: 50 +default: 194 [tc_grid2_r] type: integer doc: size of radial grid over r2 interface: ezfio,provider,ocaml -default: 30 +default: 50 diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 9bcbb13b..22f66484 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -18,6 +18,10 @@ program tc_scf my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + PROVIDE mu_erf print *, ' mu = ', mu_erf PROVIDE j1b_type @@ -30,6 +34,9 @@ program tc_scf my_n_pt_r_extra_grid = tc_grid2_r my_n_pt_a_extra_grid = tc_grid2_a touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') endif !call create_guess() From 8739c26509e07efa0586dc543f61f3920bd2e28a Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Wed, 6 Sep 2023 21:03:22 +0200 Subject: [PATCH 13/20] OPTIMZATIONS IN 4-ind integ --- src/bi_ort_ints/bi_ort_ints.irp.f | 35 +++-- src/bi_ort_ints/three_body_ijmk.irp.f | 183 +++++++++++++------------- 2 files changed, 116 insertions(+), 102 deletions(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index cac46b18..0349c731 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -18,10 +18,11 @@ program bi_ort_ints ! call test_5idx ! call test_5idx2 call test_4idx() - call test_4idx_n4() + !call test_4idx_n4() !call test_4idx2() !call test_5idx2 !call test_5idx + end subroutine test_5idx2 @@ -340,7 +341,7 @@ subroutine test_4idx() implicit none integer :: i, j, k, l - double precision :: accu, contrib, new, ref, thr + double precision :: accu, contrib, new, ref, thr, norm thr = 1d-10 @@ -348,6 +349,7 @@ subroutine test_4idx() PROVIDE three_e_4_idx_direct_bi_ort accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -356,7 +358,6 @@ subroutine test_4idx() new = three_e_4_idx_direct_bi_ort (l,k,j,i) ref = three_e_4_idx_direct_bi_ort_old(l,k,j,i) contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem in three_e_4_idx_direct_bi_ort' print*, l, k, j, i @@ -364,11 +365,14 @@ subroutine test_4idx() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on three_e_4_idx_direct_bi_ort = ', accu / dble(mo_num)**4 + + print*, ' accu on three_e_4_idx_direct_bi_ort (%) = ', 100.d0 * accu / norm ! --- @@ -376,6 +380,7 @@ subroutine test_4idx() PROVIDE three_e_4_idx_exch13_bi_ort accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -384,7 +389,6 @@ subroutine test_4idx() new = three_e_4_idx_exch13_bi_ort (l,k,j,i) ref = three_e_4_idx_exch13_bi_ort_old(l,k,j,i) contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem in three_e_4_idx_exch13_bi_ort' print*, l, k, j, i @@ -392,11 +396,14 @@ subroutine test_4idx() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on three_e_4_idx_exch13_bi_ort = ', accu / dble(mo_num)**4 + + print*, ' accu on three_e_4_idx_exch13_bi_ort (%) = ', 100.d0 * accu / norm ! --- @@ -404,6 +411,7 @@ subroutine test_4idx() PROVIDE three_e_4_idx_cycle_1_bi_ort accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -412,7 +420,6 @@ subroutine test_4idx() new = three_e_4_idx_cycle_1_bi_ort (l,k,j,i) ref = three_e_4_idx_cycle_1_bi_ort_old(l,k,j,i) contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem in three_e_4_idx_cycle_1_bi_ort' print*, l, k, j, i @@ -420,11 +427,14 @@ subroutine test_4idx() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on three_e_4_idx_cycle_1_bi_ort = ', accu / dble(mo_num)**4 + + print*, ' accu on three_e_4_idx_cycle_1_bi_ort (%) = ', 100.d0 * accu / norm ! --- @@ -432,6 +442,7 @@ subroutine test_4idx() PROVIDE three_e_4_idx_exch23_bi_ort accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -440,7 +451,6 @@ subroutine test_4idx() new = three_e_4_idx_exch23_bi_ort (l,k,j,i) ref = three_e_4_idx_exch23_bi_ort_old(l,k,j,i) contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem in three_e_4_idx_exch23_bi_ort' print*, l, k, j, i @@ -448,13 +458,18 @@ subroutine test_4idx() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on three_e_4_idx_exch23_bi_ort = ', accu / dble(mo_num)**4 + + print*, ' accu on three_e_4_idx_exch23_bi_ort (%) = ', 100.d0 * accu / norm ! --- return end + + diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 669861b7..742d5a80 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -64,120 +64,117 @@ !$OMP END DO !$OMP END PARALLEL + + + + + ! loops approach to break the O(N^4) scaling in memory + + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2, tmp_2d, tmp1, tmp2) & + !$OMP SHARED (mo_num, n_points_final_grid, i, k, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_aux_1, tmp_aux_2, & + !$OMP three_e_4_idx_direct_bi_ort, three_e_4_idx_exch13_bi_ort, & + !$OMP three_e_4_idx_exch23_bi_ort, three_e_4_idx_cycle_1_bi_ort) + allocate(tmp_2d(mo_num,mo_num)) allocate(tmp1(n_points_final_grid,4,mo_num)) allocate(tmp2(n_points_final_grid,4,mo_num)) - ! loops approach to break the O(N^4) scaling in memory + !$OMP DO do k = 1, mo_num + + ! --- + do i = 1, mo_num - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) & - !$OMP SHARED (mo_num, n_points_final_grid, i, k, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp_aux_2, tmp1) - !$OMP DO - do n = 1, mo_num - do ipoint = 1, n_points_final_grid + ! --- - tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) - tmp_loc_2 = tmp_aux_2(ipoint,n) + do n = 1, mo_num + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,i) * tmp_loc_2 - tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,i) * tmp_loc_2 - tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,i) * tmp_loc_2 - tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & - + int2_grad1_u12_bimo_t(ipoint,2,n,n) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & - + int2_grad1_u12_bimo_t(ipoint,3,n,n) * int2_grad1_u12_bimo_t(ipoint,3,k,i) + tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) + tmp_loc_2 = tmp_aux_2(ipoint,n) + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,i) * tmp_loc_2 + tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,i) * tmp_loc_2 + tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,i) * tmp_loc_2 + tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + + int2_grad1_u12_bimo_t(ipoint,2,n,n) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + + int2_grad1_u12_bimo_t(ipoint,3,n,n) * int2_grad1_u12_bimo_t(ipoint,3,k,i) + + enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & - , tmp_aux_1(1,1,1), 4*n_points_final_grid, tmp1(1,1,1), 4*n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp_aux_1(1,1,1), 4*n_points_final_grid, tmp1(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) - !$OMP PARALLEL DO PRIVATE(j,m) - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_2d(m,j) + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_2d(m,j) + enddo enddo - enddo - !$OMP END PARALLEL DO + ! --- - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) & - !$OMP SHARED (mo_num, n_points_final_grid, i, k, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1, tmp2) - !$OMP DO - do n = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) - tmp_loc_2 = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,i) - - tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,n) * tmp_loc_2 - tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,n) * tmp_loc_2 - tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,n) * tmp_loc_2 - tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * int2_grad1_u12_bimo_t(ipoint,1,k,n) & - + int2_grad1_u12_bimo_t(ipoint,2,n,i) * int2_grad1_u12_bimo_t(ipoint,2,k,n) & - + int2_grad1_u12_bimo_t(ipoint,3,n,i) * int2_grad1_u12_bimo_t(ipoint,3,k,n) - - tmp2(ipoint,1,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,n) - tmp2(ipoint,2,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,n) - tmp2(ipoint,3,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,n) - tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,n) + do n = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) + tmp_loc_2 = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,i) + + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,n) * tmp_loc_2 + tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,n) * tmp_loc_2 + tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,n) * tmp_loc_2 + tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * int2_grad1_u12_bimo_t(ipoint,1,k,n) & + + int2_grad1_u12_bimo_t(ipoint,2,n,i) * int2_grad1_u12_bimo_t(ipoint,2,k,n) & + + int2_grad1_u12_bimo_t(ipoint,3,n,i) * int2_grad1_u12_bimo_t(ipoint,3,k,n) + + tmp2(ipoint,1,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,n) + tmp2(ipoint,2,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,n) + tmp2(ipoint,3,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,n) + tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,n) + enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & - , tmp1(1,1,1), 4*n_points_final_grid, tmp_aux_1(1,1,1), 4*n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + ! --- - !$OMP PARALLEL DO PRIVATE(j,m) - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_2d(m,j) + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 4*n_points_final_grid, tmp_aux_1(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_2d(m,j) + enddo enddo - enddo - !$OMP END PARALLEL DO - call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & - , tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + ! --- - !$OMP PARALLEL DO PRIVATE(j,m) - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_cycle_1_bi_ort(m,i,k,j) = -tmp_2d(m,j) + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_cycle_1_bi_ort(m,i,k,j) = -tmp_2d(m,j) + enddo enddo - enddo - !$OMP END PARALLEL DO - enddo ! i + ! --- + + enddo ! i + + ! --- do j = 1, mo_num - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) & - !$OMP SHARED (mo_num, n_points_final_grid, j, k, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1, tmp2) - !$OMP DO do n = 1, mo_num do ipoint = 1, n_points_final_grid @@ -197,31 +194,33 @@ tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) enddo enddo - !$OMP END DO - !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & , tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) - !$OMP PARALLEL DO PRIVATE(i,m) do i = 1, mo_num do m = 1, mo_num three_e_4_idx_exch23_bi_ort(m,j,k,i) = -tmp_2d(m,i) enddo enddo - !$OMP END PARALLEL DO enddo ! j + + ! --- + enddo !k + !$OMP END DO deallocate(tmp_2d) deallocate(tmp1) deallocate(tmp2) + + !$OMP END PARALLEL + deallocate(tmp_aux_1) deallocate(tmp_aux_2) - call wall_time(wall1) print *, ' wall time for three_e_4_idx_bi_ort', wall1 - wall0 call print_memory_usage() From 3270602419ece18431da2f0eebd4ec74faea10a8 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Thu, 7 Sep 2023 21:13:57 +0200 Subject: [PATCH 14/20] REDUCED NB OF DGEMM IN NORMAL_OREDER TABLE --- .../grid_becke_vector.irp.f | 4 +- src/tc_bi_ortho/normal_ordered.irp.f | 844 ++++++-------- src/tc_bi_ortho/normal_ordered_v0.irp.f | 1022 +++++++++++++++++ src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 49 +- 4 files changed, 1439 insertions(+), 480 deletions(-) create mode 100644 src/tc_bi_ortho/normal_ordered_v0.irp.f diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index e1147ec7..0386f3c6 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -20,8 +20,8 @@ BEGIN_PROVIDER [integer, n_points_final_grid] enddo enddo - !print*,' n_points_final_grid = ', n_points_final_grid - !print*,' n max point = ', n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) + print*,' n_points_final_grid = ', n_points_final_grid + print*,' n max point = ', n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) call ezfio_set_becke_numerical_grid_n_points_final_grid(n_points_final_grid) END_PROVIDER diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index 1a9cddda..e65df450 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -13,13 +13,18 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ implicit none - integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: i, ii, ipoint + integer :: h1, p1, h2, p2 integer :: hh1, hh2, pp1, pp2 integer :: Ne(2) double precision :: wall0, wall1, walli, wallf integer, allocatable :: occ(:,:) integer(bit_kind), allocatable :: key_i_core(:,:) + PROVIDE mo_class + PROVIDE list_act n_act_orb + PROVIDE N_int + print*,' Providing normal_two_body_bi_orth ...' call wall_time(walli) @@ -31,8 +36,8 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ else - double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) - double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) double precision, allocatable :: tmp(:,:,:,:) double precision, allocatable :: int2_grad1_u12_bimo_t_tmp(:,:,:,:), mos_l_in_r_array_transp_tmp(:,:), mos_r_in_r_array_transp_tmp(:,:) @@ -72,10 +77,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP END DO !$OMP END PARALLEL - - PROVIDE mo_class - PROVIDE N_int - allocate( occ(N_int*bit_kind_size,2) ) allocate( key_i_core(N_int,2) ) @@ -97,120 +98,98 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ print*,' Providing aba_contraction ...' call wall_time(wall0) + tmp = 0.d0 + call set_multiple_levels_omp(.false.) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & - !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP PRIVATE (ipoint, hh1, pp1, hh2, pp2, h1, p1, h2, p2, i, ii, tmp1, tmp2, & !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, n_act_orb, list_act, & !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & - !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t_tmp, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) - allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) - tmp_3d = 0.d0 - tmp_2d = 0.d0 - tmp1 = 0.d0 - tmp2 = 0.d0 - tmpval_1 = 0.d0 - tmpval_2 = 0.d0 - tmpvec_1 = 0.d0 - tmpvec_2 = 0.d0 - !$OMP DO + do hh1 = 1, n_act_orb + h1 = list_act(hh1) - do h1 = 1, mo_num - tmp(:,:,:,h1) = 0.d0 - if(mo_class(h1) .ne. "Active") cycle - + tmp1 = 0.d0 do ii = 1, Ne(2) i = occ(ii,2) do ipoint = 1, n_points_final_grid tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) enddo - do p1 = 1, mo_num - tmp1(:,:,p1) = 0.d0 - if(mo_class(p1) .ne. "Active") cycle - + do pp1 = 1, n_act_orb + p1 = list_act(pp1) do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + tmp1(ipoint,1,p1) = tmp1(ipoint,1,p1) + mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = tmp1(ipoint,2,p1) + mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = tmp1(ipoint,3,p1) + mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) enddo enddo - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + enddo ! ii - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp(1,1,1,h1), mo_num*mo_num) - do p1 = 1, mo_num - if(mo_class(p1) .ne. "Active") cycle + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + + tmp2 = 0.d0 + do ii = 1, Ne(2) + i = occ(ii,2) do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) ) enddo - do h2 = 1, mo_num - tmp2(:,h2) = 0.d0 - if(mo_class(h2) .ne. "Active") cycle - + do hh2 = 1, n_act_orb + h2 = list_act(hh2) do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) + tmp2(ipoint,h2) = tmp2(ipoint,h2) + mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + enddo ! ii - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - enddo ! p1 - enddo ! i + enddo ! p1 enddo ! h1 !$OMP END DO - deallocate(tmp_3d, tmp_2d) deallocate(tmp1, tmp2) deallocate(tmpval_1, tmpval_2) deallocate(tmpvec_1, tmpvec_2) @@ -225,114 +204,93 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & - !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP PRIVATE (ipoint, hh1, pp1, hh2, pp2, h1, p1, h2, p2, i, ii, tmp1, tmp2, & !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, n_act_orb, list_act, & !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & - !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t_tmp, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) - Allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) Allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) Allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) Allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) - Tmp_3d = 0.d0 - Tmp_2d = 0.d0 - Tmp1 = 0.d0 - Tmp2 = 0.d0 - Tmpval_1 = 0.d0 - Tmpval_2 = 0.d0 - Tmpvec_1 = 0.d0 - Tmpvec_2 = 0.d0 - !$OMP DO - do h1 = 1, mo_num - if(mo_class(h1) .ne. "Active") cycle + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + tmp1 = 0.d0 do ii = Ne(2) + 1, Ne(1) i = occ(ii,1) do ipoint = 1, n_points_final_grid tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) enddo - do p1 = 1, mo_num - tmp1(:,:,p1) = 0.d0 - if(mo_class(p1) .ne. "Active") cycle + do pp1 = 1, n_act_orb + p1 = list_act(pp1) do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + tmp1(ipoint,1,p1) = tmp1(ipoint,1,p1) + mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = tmp1(ipoint,2,p1) + mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = tmp1(ipoint,3,p1) + mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) enddo enddo - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + enddo ! ii - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp(1,1,1,h1), mo_num*mo_num) - do p1 = 1, mo_num - if(mo_class(p1) .ne. "Active") cycle + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + + tmp2 = 0.d0 + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) ) enddo - do h2 = 1, mo_num - tmp2(:,h2) = 0.d0 - if(mo_class(h2) .ne. "Active") cycle - + do hh2 = 1, n_act_orb + h2 = list_act(hh2) do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) + tmp2(ipoint,h2) = tmp2(ipoint,h2) + mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + enddo ! ii - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - enddo ! p1 - enddo ! i + enddo ! p1 enddo ! h1 !$OMP END DO - deallocate(tmp_3d, tmp_2d) deallocate(tmp1, tmp2) deallocate(tmpval_1, tmpval_2) deallocate(tmpvec_1, tmpvec_2) @@ -354,109 +312,89 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ print*,' Providing aab_contraction ...' call wall_time(wall0) + tmp = 0.d0 + call set_multiple_levels_omp(.false.) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, ii, i, h1, p1, h2, p2, & - !$OMP tmp_2d, tmp_3d, tmp1, tmp2, & + !$OMP PRIVATE (ipoint, hh1, pp1, hh2, pp2, ii, i, h1, p1, h2, p2, tmp1, tmp2, & !$OMP tmpval_1, tmpvec_1) & - !$OMP SHARED (n_points_final_grid, mo_num, Ne, occ, mo_class, & + !$OMP SHARED (n_points_final_grid, mo_num, Ne, occ, n_act_orb, list_act, & !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & - !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t_tmp, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) - allocate(tmp_2d(mo_num,mo_num)) - allocate(tmp_3d(mo_num,mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num)) allocate(tmp2(n_points_final_grid,mo_num)) allocate(tmpval_1(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3)) - tmp_2d = 0.d0 - tmp_3d = 0.d0 - tmp1 = 0.d0 - tmp2 = 0.d0 - tmpval_1 = 0.d0 - tmpvec_1 = 0.d0 - !$OMP DO - do h1 = 1, mo_num - tmp(:,:,:,h1) = 0.d0 - if(mo_class(h1) .ne. "Active") cycle + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + tmp1 = 0.d0 do ii = 1, Ne(2) i = occ(ii,2) do ipoint = 1, n_points_final_grid tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) enddo - do p1 = 1, mo_num - tmp1(:,:,p1) = 0.d0 - if(mo_class(p1) .ne. "Active") cycle + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = tmp1(ipoint,1,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = tmp1(ipoint,2,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = tmp1(ipoint,3,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + enddo ! ii + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp(1,1,1,h1), mo_num*mo_num) + + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + + tmp2 = 0.d0 + do ii = 1, Ne(2) + i = occ(ii,2) do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) - enddo - enddo - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - - do p1 = 1, mo_num - if(mo_class(p1) .ne. "Active") cycle - - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) enddo - do h2 = 1, mo_num - if(mo_class(h2) .ne. "Active") cycle - tmp2(:,h2) = 0.d0 - + do hh2 = 1, n_act_orb + h2 = list_act(hh2) do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) + tmp2(ipoint,h2) = tmp2(ipoint,h2) + mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + enddo ! ii - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - enddo ! p1 - enddo ! i + enddo ! p1 enddo ! h1 !$OMP END DO - deallocate(tmp_3d) deallocate(tmp1, tmp2) deallocate(tmpval_1) deallocate(tmpvec_1) @@ -520,212 +458,189 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ print*,' Providing aaa_contraction ...' call wall_time(wall0) + tmp = 0.d0 + call set_multiple_levels_omp(.false.) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & - !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP PRIVATE (ipoint, i, ii, hh1, hh2, pp1, pp2, h1, h2, p1, p2, tmp1, tmp2, tmp_3d, & !$OMP tmpval_1, tmpval_2, & !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, n_act_orb, list_act, & !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & - !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t_tmp, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) - allocate(tmp_2d(mo_num,mo_num)) allocate(tmp_3d(mo_num,mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num)) allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmp3(n_points_final_grid,3,mo_num)) allocate(tmpval_1(n_points_final_grid)) allocate(tmpval_2(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3)) allocate(tmpvec_2(n_points_final_grid,3)) allocate(tmpvec_3(n_points_final_grid,3)) - tmp_2d = 0.d0 - tmp_3d = 0.d0 - tmp1 = 0.d0 - tmp2 = 0.d0 - tmp3 = 0.d0 - tmpval_1 = 0.d0 - tmpval_2 = 0.d0 - tmpvec_1 = 0.d0 - tmpvec_2 = 0.d0 - tmpvec_3 = 0.d0 - !$OMP DO - do h1 = 1, mo_num - tmp(:,:,:,h1) = 0.d0 - if(mo_class(h1) .ne. "Active") cycle + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + tmp1 = 0.d0 + do ii = 1, Ne(2) + i = occ(ii,2) + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = tmp1(ipoint,1,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = tmp1(ipoint,2,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = tmp1(ipoint,3,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + enddo ! ii + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp(1,1,1,h1), mo_num*mo_num) + + tmp1 = 0.d0 do ii = 1, Ne(2) i = occ(ii,2) do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) enddo - do p1 = 1, mo_num - tmp1(:,:,p1) = 0.d0 - if(mo_class(p1) .ne. "Active") cycle - + do pp2 = 1, n_act_orb + p2 = list_act(pp2) do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) + tmp1(ipoint,1,p2) = tmp1(ipoint,1,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmp1(ipoint,2,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmp1(ipoint,3,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) enddo enddo - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + enddo ! ii - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + + tmp2 = 0.d0 + do ii = 1, Ne(2) + i = occ(ii,2) + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = tmp2(ipoint,h2) + mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) enddo enddo - enddo - do p2 = 1, mo_num - tmp1(:,:,p2) = 0.d0 - if(mo_class(p2) .ne. "Active") cycle + enddo ! ii - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,1) - tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,2) - tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,3) - enddo - enddo + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num) - - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) - enddo - enddo - enddo - - do p1 = 1, mo_num - if(mo_class(p1) .ne. "Active") cycle + tmp1 = 0.d0 + tmp2 = 0.d0 + do ii = 1, Ne(2) + i = occ(ii,2) do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp_tmp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) enddo - do h2 = 1, mo_num - tmp2( :,h2) = 0.d0 - tmp1(:,:,h2) = 0.d0 - if(mo_class(h2) .ne. "Active") cycle - + do pp2 = 1, n_act_orb + p2 = list_act(pp2) do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) & - + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) - - tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) - tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) - tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + tmp2(ipoint,p2) = tmp2(ipoint,p2) + int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + tmp1(ipoint,1,p2) = tmp1(ipoint,1,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,p2) + tmp1(ipoint,2,p2) = tmp1(ipoint,2,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,p2) + tmp1(ipoint,3,p2) = tmp1(ipoint,3,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,p2) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + enddo ! ii - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,h1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - do p2 = 1, mo_num - tmp2( :,p2) = 0.d0 - tmp3(:,:,p2) = 0.d0 - if(mo_class(p2) .ne. "Active") cycle + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - do ipoint = 1, n_points_final_grid - - tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) - - tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) - tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) - tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) - enddo - enddo - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3(1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 1.d0, tmp_2d(1,1), mo_num) - - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - - enddo ! p1 - enddo ! i + enddo ! p1 enddo ! h1 !$OMP END DO - deallocate(tmp_2d) deallocate(tmp_3d) deallocate(tmp1) deallocate(tmp2) - deallocate(tmp3) deallocate(tmpval_1) deallocate(tmpval_2) deallocate(tmpvec_1) @@ -741,41 +656,29 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP PRIVATE (ipoint, i, ii, hh1, hh2, pp1, pp2, h1, h2, p1, p2, tmp_3d, tmp1, tmp2, & !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, n_act_orb, list_act, & !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & - !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t_tmp, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) - allocate(tmp_2d(mo_num,mo_num)) allocate(tmp_3d(mo_num,mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num)) allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmp3(n_points_final_grid,3,mo_num)) allocate(tmpval_1(n_points_final_grid)) allocate(tmpval_2(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3)) allocate(tmpvec_2(n_points_final_grid,3)) allocate(tmpvec_3(n_points_final_grid,3)) - tmp_2d = 0.d0 - tmp_3d = 0.d0 - tmp1 = 0.d0 - tmp2 = 0.d0 - tmp3 = 0.d0 - tmpval_1 = 0.d0 - tmpval_2 = 0.d0 - tmpvec_1 = 0.d0 - tmpvec_2 = 0.d0 - tmpvec_3 = 0.d0 - !$OMP DO - do h1 = 1, mo_num - if(mo_class(h1) .ne. "Active") cycle + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + tmp1 = 0.d0 do ii = Ne(2) + 1, Ne(1) i = occ(ii,1) @@ -783,163 +686,150 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = tmp1(ipoint,1,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = tmp1(ipoint,2,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = tmp1(ipoint,3,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + enddo ! ii + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp(1,1,1,h1), mo_num*mo_num) + + tmp1 = 0.d0 + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do ipoint = 1, n_points_final_grid + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) enddo - do p1 = 1, mo_num - tmp1(:,:,p1) = 0.d0 - if(mo_class(p1) .ne. "Active") cycle - + do pp2 = 1, n_act_orb + p2 = list_act(pp2) do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) + tmp1(ipoint,1,p2) = tmp1(ipoint,1,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmp1(ipoint,2,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmp1(ipoint,3,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) enddo enddo - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + enddo ! ii - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + + tmp2 = 0.d0 + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = tmp2(ipoint,h2) + mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) enddo enddo - enddo - do p2 = 1, mo_num - tmp1(:,:,p2) = 0.d0 - if(mo_class(p2) .ne. "Active") cycle + enddo ! ii - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,1) - tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,2) - tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,3) - enddo - enddo + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & - , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) - enddo - enddo - enddo - - do p1 = 1, mo_num - if(mo_class(p1) .ne. "Active") cycle + tmp1 = 0.d0 + tmp2 = 0.d0 + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp_tmp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) enddo - do h2 = 1, mo_num - tmp2( :,h2) = 0.d0 - tmp1(:,:,h2) = 0.d0 - if(mo_class(h2) .ne. "Active") cycle - + do pp2 = 1, n_act_orb + p2 = list_act(pp2) do ipoint = 1, n_points_final_grid - - tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) & - + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) - - tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) - tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) - tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) - + tmp1(ipoint,1,p2) = tmp1(ipoint,1,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,p2) + tmp1(ipoint,2,p2) = tmp1(ipoint,2,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,p2) + tmp1(ipoint,3,p2) = tmp1(ipoint,3,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,p2) + tmp2(ipoint,p2) = tmp2(ipoint,p2) + int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + enddo ! ii - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - do p2 = 1, mo_num - tmp2( :,p2) = 0.d0 - tmp3(:,:,p2) = 0.d0 - if(mo_class(p2) .ne. "Active") cycle + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,h1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - do ipoint = 1, n_points_final_grid - - tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) - - tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) - tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) - tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) - enddo - enddo - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , tmp3(1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 1.d0, tmp_2d(1,1), mo_num) - - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - - enddo ! p1 - enddo ! i + enddo ! p1 enddo ! h1 !$OMP END DO - deallocate(tmp_2d) deallocate(tmp_3d) deallocate(tmp1) deallocate(tmp2) - deallocate(tmp3) deallocate(tmpval_1) deallocate(tmpval_2) deallocate(tmpvec_1) diff --git a/src/tc_bi_ortho/normal_ordered_v0.irp.f b/src/tc_bi_ortho/normal_ordered_v0.irp.f new file mode 100644 index 00000000..784af9db --- /dev/null +++ b/src/tc_bi_ortho/normal_ordered_v0.irp.f @@ -0,0 +1,1022 @@ + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_v0, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! Normal ordering of the three body interaction on the HF density + ! + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: hh1, hh2, pp1, pp2 + integer :: Ne(2) + double precision :: wall0, wall1, walli, wallf + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + + PROVIDE mo_class + PROVIDE N_int + + print*,' Providing normal_two_body_bi_orth_v0 ...' + call wall_time(walli) + + if(read_tc_norm_ord) then + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") + read(11) normal_two_body_bi_orth_v0 + close(11) + + else + + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + double precision, allocatable :: tmp(:,:,:,:) + double precision, allocatable :: int2_grad1_u12_bimo_t_tmp(:,:,:,:), mos_l_in_r_array_transp_tmp(:,:), mos_r_in_r_array_transp_tmp(:,:) + + PROVIDE int2_grad1_u12_bimo_t + PROVIDE mos_l_in_r_array_transp mos_r_in_r_array_transp + + allocate(int2_grad1_u12_bimo_t_tmp(n_points_final_grid,3,mo_num,mo_num)) + allocate(mos_l_in_r_array_transp_tmp(n_points_final_grid,mo_num)) + allocate(mos_r_in_r_array_transp_tmp(n_points_final_grid,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, p1) & + !$OMP SHARED (mo_num, mo_class, & + !$OMP int2_grad1_u12_bimo_t, int2_grad1_u12_bimo_t_tmp, & + !$OMP mos_l_in_r_array_transp, mos_l_in_r_array_transp_tmp, & + !$OMP mos_r_in_r_array_transp, mos_r_in_r_array_transp_tmp) + !$OMP DO + do h1 = 1, mo_num + + mos_l_in_r_array_transp_tmp(:,h1) = 0.d0 + mos_r_in_r_array_transp_tmp(:,h1) = 0.d0 + + if(mo_class(h1) .ne. "Active") cycle + + mos_l_in_r_array_transp_tmp(:,h1) = mos_l_in_r_array_transp(:,h1) + mos_r_in_r_array_transp_tmp(:,h1) = mos_r_in_r_array_transp(:,h1) + + do p1 = 1, mo_num + int2_grad1_u12_bimo_t_tmp(:,:,p1,h1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + + int2_grad1_u12_bimo_t_tmp(:,:,p1,h1) = int2_grad1_u12_bimo_t(:,:,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + allocate( occ(N_int*bit_kind_size,2) ) + allocate( key_i_core(N_int,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp(mo_num,mo_num,mo_num,mo_num)) + + ! --- + ! aba contraction + + print*,' Providing aba_contraction_v0 ...' + call wall_time(wall0) + + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmp) + + allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + tmp_3d = 0.d0 + tmp_2d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + + ! TODO: active electrons + + !$OMP DO + + do h1 = 1, mo_num + tmp(:,:,:,h1) = 0.d0 + if(mo_class(h1) .ne. "Active") cycle + + do ii = 1, Ne(2) + i = occ(ii,2) + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + + do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) ) + enddo + + do h2 = 1, mo_num + tmp2(:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + enddo ! p1 + enddo ! i + enddo ! h1 + + !$OMP END DO + + deallocate(tmp_3d, tmp_2d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + !$OMP END PARALLEL + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmp) + + Allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + Allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + Allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + Allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + Tmp_3d = 0.d0 + Tmp_2d = 0.d0 + Tmp1 = 0.d0 + Tmp2 = 0.d0 + Tmpval_1 = 0.d0 + Tmpval_2 = 0.d0 + Tmpvec_1 = 0.d0 + Tmpvec_2 = 0.d0 + + !$OMP DO + + do h1 = 1, mo_num + if(mo_class(h1) .ne. "Active") cycle + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + + do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) ) + enddo + + do h2 = 1, mo_num + tmp2(:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + enddo ! p1 + enddo ! i + enddo ! h1 + !$OMP END DO + + deallocate(tmp_3d, tmp_2d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + !$OMP END PARALLEL + endif + + tmp = -0.5d0 * tmp + call sum_A_At(tmp(1,1,1,1), mo_num*mo_num) + + call wall_time(wall1) + print*,' Wall time for aba_contraction_v0', wall1-wall0 + + normal_two_body_bi_orth_v0 = tmp + + ! --- + ! aab contraction + + print*,' Providing aab_contraction_v0 ...' + call wall_time(wall0) + + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, ii, i, h1, p1, h2, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, & + !$OMP tmpval_1, tmpvec_1) & + !$OMP SHARED (n_points_final_grid, mo_num, Ne, occ, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmp) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmpval_1 = 0.d0 + tmpvec_1 = 0.d0 + + !$OMP DO + + do h1 = 1, mo_num + tmp(:,:,:,h1) = 0.d0 + if(mo_class(h1) .ne. "Active") cycle + + do ii = 1, Ne(2) + i = occ(ii,2) + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + enddo + + do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + + do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) + enddo + + do h2 = 1, mo_num + if(mo_class(h2) .ne. "Active") cycle + tmp2(:,h2) = 0.d0 + + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + enddo ! p1 + enddo ! i + enddo ! h1 + + !$OMP END DO + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + !$OMP END PARALLEL + + tmp = -0.5d0 * tmp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) + + !$OMP DO COLLAPSE(2) + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO COLLAPSE(2) + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for aab_contraction_v0', wall1-wall0 + + normal_two_body_bi_orth_v0 += tmp + + ! --- + ! aaa contraction + + if(Ne(2) .ge. 3) then + + print*,' Providing aaa_contraction_v0 ...' + call wall_time(wall0) + + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, & + !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmp) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmp3 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + tmpvec_3 = 0.d0 + + !$OMP DO + + do h1 = 1, mo_num + tmp(:,:,:,h1) = 0.d0 + if(mo_class(h1) .ne. "Active") cycle + + do ii = 1, Ne(2) + i = occ(ii,2) + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + + do p2 = 1, mo_num + tmp1(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + + do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp_tmp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + + do h2 = 1, mo_num + tmp2( :,h2) = 0.d0 + tmp1(:,:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + do p2 = 1, mo_num + tmp2( :,p2) = 0.d0 + tmp3(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + enddo ! p1 + enddo ! i + enddo ! h1 + !$OMP END DO + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + !$OMP END PARALLEL + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmp) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmp3 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + tmpvec_3 = 0.d0 + + !$OMP DO + + do h1 = 1, mo_num + if(mo_class(h1) .ne. "Active") cycle + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + + do p2 = 1, mo_num + tmp1(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + + do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp_tmp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + + do h2 = 1, mo_num + tmp2( :,h2) = 0.d0 + tmp1(:,:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + do p2 = 1, mo_num + tmp2( :,p2) = 0.d0 + tmp3(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + enddo ! p1 + enddo ! i + enddo ! h1 + !$OMP END DO + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + !$OMP END PARALLEL + endif + + tmp = -0.5d0 * tmp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) + + !$OMP DO COLLAPSE(2) + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO COLLAPSE(2) + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for aaa_contraction_v0', wall1-wall0 + + normal_two_body_bi_orth_v0 += tmp + endif ! Ne(2) .ge. 3 + + deallocate(tmp) + deallocate(int2_grad1_u12_bimo_t_tmp, mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp) + + endif ! read_tc_norm_ord + + if(write_tc_norm_ord.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="write") + call ezfio_set_work_empty(.False.) + write(11) normal_two_body_bi_orth_v0 + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') + endif + + call wall_time(wallf) + print*,' Wall time for normal_two_body_bi_orth_v0 ', wallf-walli + +END_PROVIDER + +! --- + diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 3301fcc2..d25a1f70 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -34,7 +34,8 @@ program tc_bi_ortho !call test_no_aab() !call test_no_aaa() - call test_no() + !call test_no() + call test_no_v0() end @@ -273,6 +274,52 @@ end ! --- +subroutine test_no_v0() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr, norm + + print*, ' test_no_v0 ...' + + thr = 1d-8 + + PROVIDE normal_two_body_bi_orth_v0 + PROVIDE normal_two_body_bi_orth + + accu = 0.d0 + norm = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = normal_two_body_bi_orth (l,k,j,i) + ref = normal_two_body_bi_orth_v0(l,k,j,i) + + contrib = dabs(new - ref) + if(contrib .gt. thr) then + print*, ' problem on normal_two_body_bi_orth' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + accu += contrib + norm += dabs(ref) + enddo + enddo + enddo + enddo + + print*, ' accu (%) = ', 100.d0*accu/norm + + return +end subroutine test_no + +! --- + + subroutine test_no() implicit none From a88730f10f90795e69253dd57255a234a69501ae Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Tue, 12 Sep 2023 11:57:38 +0200 Subject: [PATCH 15/20] NO-L0 V0 --- src/tc_bi_ortho/no_dressing_naive.irp.f | 92 +++++++++++++++++++++ src/tc_bi_ortho/no_dressing_v0.irp.f | 105 ++++++++++++++++++++++++ src/tc_bi_ortho/tc_utils.irp.f | 22 +++++ src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 26 +++++- 4 files changed, 243 insertions(+), 2 deletions(-) create mode 100644 src/tc_bi_ortho/no_dressing_naive.irp.f create mode 100644 src/tc_bi_ortho/no_dressing_v0.irp.f diff --git a/src/tc_bi_ortho/no_dressing_naive.irp.f b/src/tc_bi_ortho/no_dressing_naive.irp.f new file mode 100644 index 00000000..10c80cec --- /dev/null +++ b/src/tc_bi_ortho/no_dressing_naive.irp.f @@ -0,0 +1,92 @@ + +! --- + +BEGIN_PROVIDER [double precision, no_0_naive] + + implicit none + integer :: ii, jj, kk + integer :: i, j, k + double precision :: sigma_i, sigma_j, sigma_k + double precision :: tmp + double precision :: I_ijk_ijk, I_ijk_kij, I_ijk_jki, I_ijk_jik, I_ijk_kji, I_ijk_ikj + double precision :: t0, t1 + logical, external :: is_same_spin + + print*, " Providing no_0_naive ..." + call wall_time(t0) + + + tmp = 0.d0 + do ii = 1, elec_num + + if(ii .le. elec_beta_num) then + i = ii + sigma_i = -1.d0 + else + i = ii - elec_beta_num + sigma_i = +1.d0 + endif + + do jj = 1, elec_num + + if(jj .le. elec_beta_num) then + j = jj + sigma_j = -1.d0 + else + j = jj - elec_beta_num + sigma_j = +1.d0 + endif + + do kk = 1, elec_num + + if(kk .le. elec_beta_num) then + k = kk + sigma_k = -1.d0 + else + k = kk - elec_beta_num + sigma_k = +1.d0 + endif + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, j, sigma_j, k, sigma_k & + , i, sigma_i, j, sigma_j, k, sigma_k & + , I_ijk_ijk) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, j, sigma_j, k, sigma_k & + , k, sigma_k, i, sigma_i, j, sigma_j & + , I_ijk_kij) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, j, sigma_j, k, sigma_k & + , j, sigma_j, k, sigma_k, i, sigma_i & + , I_ijk_jki) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, j, sigma_j, k, sigma_k & + , j, sigma_j, i, sigma_i, k, sigma_k & + , I_ijk_jik) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, j, sigma_j, k, sigma_k & + , k, sigma_k, j, sigma_j, i, sigma_i & + , I_ijk_kji) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, j, sigma_j, k, sigma_k & + , i, sigma_i, k, sigma_k, j, sigma_j & + , I_ijk_ikj) + + + tmp = tmp + I_ijk_ijk + I_ijk_kij + I_ijk_jki - I_ijk_jik - I_ijk_kji - I_ijk_ikj + !tmp = tmp + I_ijk_ijk + 2.d0 * I_ijk_kij - 3.d0 * I_ijk_jik + enddo + enddo + enddo + + no_0_naive = -1.d0 * (-tmp) / 6.d0 + + call wall_time(t1) + print*, " Wall time for no_0_naive (sec) = ", (t1 - t0)/60.d0 + + print*, " no_0_naive = ", no_0_naive + +END_PROVIDER + +! --- + + diff --git a/src/tc_bi_ortho/no_dressing_v0.irp.f b/src/tc_bi_ortho/no_dressing_v0.irp.f new file mode 100644 index 00000000..9a070dab --- /dev/null +++ b/src/tc_bi_ortho/no_dressing_v0.irp.f @@ -0,0 +1,105 @@ + +! --- + +BEGIN_PROVIDER [double precision, no_0_v0] + + implicit none + integer :: i, j, k + double precision :: tmp + double precision :: I_ijk_ijk, I_ijk_kij, I_ijk_jik, I_ijk_jki, I_ijk_ikj, I_ijk_kji + double precision :: t0, t1 + + call wall_time(t0) + print*, " Providing no_0_v0 ..." + + if(elec_alpha_num .eq. elec_beta_num) then + + tmp = 0.d0 + + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do k = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) + call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) + call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) + + tmp = tmp + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) + enddo + enddo + enddo + + no_0_v0 = -1.d0 * (-tmp) / 6.d0 + + else + + tmp = 0.d0 + + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do k = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) + call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) + call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) + + tmp = tmp + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) + enddo + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do k = elec_beta_num+1, elec_alpha_num + + call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) + call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) + call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) + + tmp = tmp + I_ijk_ijk + 2.d0 * I_ijk_kij - 3.d0 * I_ijk_jik + enddo + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do j = 1, elec_beta_num + + do k = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) + call give_integrals_3_body_bi_ort(i, j, k, j, k, i, I_ijk_jki) + call give_integrals_3_body_bi_ort(i, j, k, i, k, j, I_ijk_ikj) + call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) + call give_integrals_3_body_bi_ort(i, j, k, k, j, i, I_ijk_kji) + + tmp = tmp + 6.d0 * (2.d0 * I_ijk_ijk + I_ijk_jki - I_ijk_ikj - I_ijk_jik - I_ijk_kji) + enddo + + do k = elec_beta_num+1, elec_alpha_num + + call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) + call give_integrals_3_body_bi_ort(i, j, k, j, k, i, I_ijk_jki) + call give_integrals_3_body_bi_ort(i, j, k, i, k, j, I_ijk_ikj) + call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) + call give_integrals_3_body_bi_ort(i, j, k, k, j, i, I_ijk_kji) + + tmp = tmp + 3.d0 * (2.d0 * I_ijk_ijk + 2.d0 * I_ijk_jki - I_ijk_ikj - I_ijk_jik - 2.d0 * I_ijk_kji) + enddo + + enddo + enddo + + no_0_v0 = -1.d0 * (-tmp) / 6.d0 + + endif + + call wall_time(t1) + print*, " Wall time for no_0_v0 (sec) = ", (t1 - t0)/60.d0 + + print*, " no_0_v0 = ", no_0_v0 + +END_PROVIDER + +! --- + + diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index 53fe5884..ba5ffff9 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -66,3 +66,25 @@ end ! --- + +logical function is_same_spin(sigma_1, sigma_2) + + BEGIN_DOC + ! + ! true if sgn(sigma_1) = sgn(sigma_2) + ! + END_DOC + + implicit none + double precision, intent(in) :: sigma_1, sigma_2 + + if((sigma_1 * sigma_2) .gt. 0.d0) then + is_same_spin = .true. + else + is_same_spin = .false. + endif + +end function is_same_spin + +! --- + diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index d25a1f70..226854ed 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -35,7 +35,9 @@ program tc_bi_ortho !call test_no_aaa() !call test_no() - call test_no_v0() + !call test_no_v0() + + call test_no_0() end @@ -315,7 +317,7 @@ subroutine test_no_v0() print*, ' accu (%) = ', 100.d0*accu/norm return -end subroutine test_no +end subroutine test_no_0 ! --- @@ -498,3 +500,23 @@ end ! --- +subroutine test_no_0() + + implicit none + double precision :: accu, norm + + print*, ' testing test_no_0 ...' + + PROVIDE no_0_naive + PROVIDE no_0_v0 + + accu = dabs(no_0_naive - no_0_v0) + norm = dabs(no_0_naive) + + print*, ' accu (%) = ', 100.d0*accu/norm + + return +end + +! --- + From e288c40d8b117b17b87db4f75e1aff902a7fa0c7 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Tue, 12 Sep 2023 16:52:58 +0200 Subject: [PATCH 16/20] NO: L1 & L2 added and tested --- src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 54 +++ src/tc_bi_ortho/no_dressing_naive.irp.f | 450 ++++++++++++++++++- src/tc_bi_ortho/no_dressing_v0.irp.f | 277 +++++++++++- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 89 +++- 4 files changed, 834 insertions(+), 36 deletions(-) diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index d8145c3e..cb5c08cf 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -68,6 +68,60 @@ END_PROVIDER ! --- +subroutine give_integrals_3_body_bi_ort_spin( n, sigma_n, l, sigma_l, k, sigma_k & + , m, sigma_m, j, sigma_j, i, sigma_i & + , integral) + + BEGIN_DOC + ! + ! < n l k | -L | m j i > with a BI-ORTHONORMAL SPIN-ORBITALS + ! + END_DOC + + implicit none + integer, intent(in) :: n, l, k, m, j, i + double precision, intent(in) :: sigma_n, sigma_l, sigma_k, sigma_m, sigma_j, sigma_i + double precision, intent(out) :: integral + integer :: ipoint + double precision :: weight, tmp + logical, external :: is_same_spin + + integral = 0.d0 + + if( is_same_spin(sigma_n, sigma_m) .and. & + is_same_spin(sigma_l, sigma_j) .and. & + is_same_spin(sigma_k, sigma_i) ) then + + PROVIDE mo_l_coef mo_r_coef + PROVIDE int2_grad1_u12_bimo_t + + do ipoint = 1, n_points_final_grid + + tmp = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & + * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,l,j) & + + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,l,j) & + + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,l,j) ) + + tmp = tmp + mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & + * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,k,i) ) + + tmp = tmp + mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) & + * ( int2_grad1_u12_bimo_t(ipoint,1,l,j) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + + int2_grad1_u12_bimo_t(ipoint,2,l,j) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + + int2_grad1_u12_bimo_t(ipoint,3,l,j) * int2_grad1_u12_bimo_t(ipoint,3,k,i) ) + + integral = integral + tmp * final_weight_at_r_vector(ipoint) + enddo + + endif + + return +end subroutine give_integrals_3_body_bi_ort_spin + +! --- + subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) BEGIN_DOC diff --git a/src/tc_bi_ortho/no_dressing_naive.irp.f b/src/tc_bi_ortho/no_dressing_naive.irp.f index 10c80cec..a57b1723 100644 --- a/src/tc_bi_ortho/no_dressing_naive.irp.f +++ b/src/tc_bi_ortho/no_dressing_naive.irp.f @@ -4,19 +4,26 @@ BEGIN_PROVIDER [double precision, no_0_naive] implicit none - integer :: ii, jj, kk - integer :: i, j, k - double precision :: sigma_i, sigma_j, sigma_k - double precision :: tmp - double precision :: I_ijk_ijk, I_ijk_kij, I_ijk_jki, I_ijk_jik, I_ijk_kji, I_ijk_ikj - double precision :: t0, t1 - logical, external :: is_same_spin + integer :: ii, jj, kk + integer :: i, j, k + double precision :: sigma_i, sigma_j, sigma_k + double precision :: I_ijk_ijk, I_ijk_kij, I_ijk_jki, I_ijk_jik, I_ijk_kji, I_ijk_ikj + double precision :: t0, t1 + double precision, allocatable :: tmp(:) print*, " Providing no_0_naive ..." call wall_time(t0) + allocate(tmp(elec_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ii, i, sigma_i, jj, j, sigma_j, kk, k, sigma_k, & + !$OMP I_ijk_ijk, I_ijk_kij, I_ijk_jki, I_ijk_jik, & + !$OMP I_ijk_kji, I_ijk_ikj) & + !$OMP SHARED (elec_beta_num, elec_num, tmp) + !$OMP DO - tmp = 0.d0 do ii = 1, elec_num if(ii .le. elec_beta_num) then @@ -27,6 +34,8 @@ BEGIN_PROVIDER [double precision, no_0_naive] sigma_i = +1.d0 endif + tmp(ii) = 0.d0 + do jj = 1, elec_num if(jj .le. elec_beta_num) then @@ -72,16 +81,20 @@ BEGIN_PROVIDER [double precision, no_0_naive] , I_ijk_ikj) - tmp = tmp + I_ijk_ijk + I_ijk_kij + I_ijk_jki - I_ijk_jik - I_ijk_kji - I_ijk_ikj - !tmp = tmp + I_ijk_ijk + 2.d0 * I_ijk_kij - 3.d0 * I_ijk_jik + tmp(ii) = tmp(ii) + I_ijk_ijk + I_ijk_kij + I_ijk_jki - I_ijk_jik - I_ijk_kji - I_ijk_ikj + ! = tmp(ii) + I_ijk_ijk + 2.d0 * I_ijk_kij - 3.d0 * I_ijk_jik enddo enddo enddo + !$OMP END DO + !$OMP END PARALLEL - no_0_naive = -1.d0 * (-tmp) / 6.d0 + no_0_naive = -1.d0 * (-sum(tmp)) / 6.d0 + + deallocate(tmp) call wall_time(t1) - print*, " Wall time for no_0_naive (sec) = ", (t1 - t0)/60.d0 + print*, " Wall time for no_0_naive (min) = ", (t1 - t0)/60.d0 print*, " no_0_naive = ", no_0_naive @@ -89,4 +102,417 @@ END_PROVIDER ! --- +BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! < p | H(1) | s > is dressed with no_1_naive(p,s) + ! + END_DOC + + implicit none + integer :: ii, jj + integer :: i, j, p, s + double precision :: sigma_i, sigma_j, sigma_p, sigma_s + double precision :: I_pij_sji, I_pij_sij, I_pij_jis, I_pij_ijs, I_pij_isj, I_pij_jsi + double precision :: t0, t1 + + print*, " Providing no_1_naive ..." + call wall_time(t0) + + ! ---- + ! up-up part + + sigma_p = +1.d0 + sigma_s = +1.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ii, i, sigma_i, jj, j, sigma_j, & + !$OMP I_pij_sji, I_pij_sij, I_pij_jis, & + !$OMP I_pij_ijs, I_pij_isj, I_pij_jsi ) & + !$OMP SHARED (mo_num, elec_beta_num, elec_num, & + !$OMP sigma_p, sigma_s, no_1_naive) + + !$OMP DO COLLAPSE (2) + + do s = 1, mo_num + do p = 1, mo_num + + no_1_naive(p,s) = 0.d0 + do ii = 1, elec_num + if(ii .le. elec_beta_num) then + i = ii + sigma_i = -1.d0 + else + i = ii - elec_beta_num + sigma_i = +1.d0 + endif + + do jj = 1, elec_num + if(jj .le. elec_beta_num) then + j = jj + sigma_j = -1.d0 + else + j = jj - elec_beta_num + sigma_j = +1d0 + endif + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , s, sigma_s, j, sigma_j, i, sigma_i & + , I_pij_sji) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , s, sigma_s, i, sigma_i, j, sigma_j & + , I_pij_sij) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , j, sigma_j, i, sigma_i, s, sigma_s & + , I_pij_jis) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , i, sigma_i, j, sigma_j, s, sigma_s & + , I_pij_ijs) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , i, sigma_i, s, sigma_s, j, sigma_j & + , I_pij_isj) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , j, sigma_j, s, sigma_s, i, sigma_i & + , I_pij_jsi) + + ! x (-1) because integrals are over -L + ! x 0.5 because we consider 0.5 (up + down) + no_1_naive(p,s) = no_1_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) + enddo ! j + enddo ! i + enddo ! s + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + + ! ---- + ! down-down part + + sigma_p = -1.d0 + sigma_s = -1.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ii, i, sigma_i, jj, j, sigma_j, & + !$OMP I_pij_sji, I_pij_sij, I_pij_jis, & + !$OMP I_pij_ijs, I_pij_isj, I_pij_jsi ) & + !$OMP SHARED (mo_num, elec_beta_num, elec_num, & + !$OMP sigma_p, sigma_s, no_1_naive) + + !$OMP DO COLLAPSE (2) + + do s = 1, mo_num + do p = 1, mo_num + + do ii = 1, elec_num + if(ii .le. elec_beta_num) then + i = ii + sigma_i = -1.d0 + else + i = ii - elec_beta_num + sigma_i = +1.d0 + endif + + do jj = 1, elec_num + if(jj .le. elec_beta_num) then + j = jj + sigma_j = -1.d0 + else + j = jj - elec_beta_num + sigma_j = +1d0 + endif + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , s, sigma_s, j, sigma_j, i, sigma_i & + , I_pij_sji) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , s, sigma_s, i, sigma_i, j, sigma_j & + , I_pij_sij) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , j, sigma_j, i, sigma_i, s, sigma_s & + , I_pij_jis) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , i, sigma_i, j, sigma_j, s, sigma_s & + , I_pij_ijs) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , i, sigma_i, s, sigma_s, j, sigma_j & + , I_pij_isj) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , j, sigma_j, s, sigma_s, i, sigma_i & + , I_pij_jsi) + + ! x (-1) because integrals are over -L + ! x 0.5 because we consider 0.5 (up + down) + no_1_naive(p,s) = no_1_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) + enddo ! j + enddo ! i + enddo ! s + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call wall_time(t1) + print*, " Wall time for no_1_naive (min) = ", (t1 - t0)/60.d0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! < p q | H(2) | s t > is dressed with no_2_naive(p,q,s,t) + ! + END_DOC + + implicit none + integer :: ii + integer :: i, p, q, s, t + double precision :: sigma_i, sigma_p, sigma_q, sigma_s, sigma_t + double precision :: I_ipq_ist, I_ipq_sit, I_ipq_tsi + double precision :: t0, t1 + + print*, " Providing no_2_naive ..." + call wall_time(t0) + + ! ---- + ! up-up & up-up part + + sigma_p = +1.d0 + sigma_s = +1.d0 + sigma_q = +1.d0 + sigma_t = +1.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ii, i, sigma_i, p, q, s, t, & + !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & + !$OMP SHARED (mo_num, elec_beta_num, elec_num, & + !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & + !$OMP no_2_naive) + + !$OMP DO COLLAPSE (4) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + no_2_naive(p,q,s,t) = 0.d0 + do ii = 1, elec_num + if(ii .le. elec_beta_num) then + i = ii + sigma_i = -1.d0 + else + i = ii - elec_beta_num + sigma_i = +1.d0 + endif + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , i, sigma_i, s, sigma_s, t, sigma_t & + , I_ipq_ist) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , s, sigma_s, i, sigma_i, t, sigma_t & + , I_ipq_sit) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , t, sigma_t, s, sigma_s, i, sigma_i & + , I_ipq_tsi) + + ! x (-1) because integrals are over -L + ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) + no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + enddo ! i + enddo ! p + enddo ! q + enddo ! s + enddo ! t + !$OMP END DO + !$OMP END PARALLEL + + ! ---- + ! up-up & down-down part + + sigma_p = +1.d0 + sigma_s = +1.d0 + sigma_q = -1.d0 + sigma_t = -1.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ii, i, sigma_i, p, q, s, t, & + !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & + !$OMP SHARED (mo_num, elec_beta_num, elec_num, & + !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & + !$OMP no_2_naive) + + !$OMP DO COLLAPSE (4) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + do ii = 1, elec_num + if(ii .le. elec_beta_num) then + i = ii + sigma_i = -1.d0 + else + i = ii - elec_beta_num + sigma_i = +1.d0 + endif + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , i, sigma_i, s, sigma_s, t, sigma_t & + , I_ipq_ist) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , s, sigma_s, i, sigma_i, t, sigma_t & + , I_ipq_sit) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , t, sigma_t, s, sigma_s, i, sigma_i & + , I_ipq_tsi) + + ! x (-1) because integrals are over -L + ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) + no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + enddo ! i + enddo ! p + enddo ! q + enddo ! s + enddo ! t + !$OMP END DO + !$OMP END PARALLEL + + ! ---- + ! down-down & up-up part + + sigma_p = -1.d0 + sigma_s = -1.d0 + sigma_q = +1.d0 + sigma_t = +1.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ii, i, sigma_i, p, q, s, t, & + !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & + !$OMP SHARED (mo_num, elec_beta_num, elec_num, & + !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & + !$OMP no_2_naive) + + !$OMP DO COLLAPSE (4) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + do ii = 1, elec_num + if(ii .le. elec_beta_num) then + i = ii + sigma_i = -1.d0 + else + i = ii - elec_beta_num + sigma_i = +1.d0 + endif + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , i, sigma_i, s, sigma_s, t, sigma_t & + , I_ipq_ist) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , s, sigma_s, i, sigma_i, t, sigma_t & + , I_ipq_sit) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , t, sigma_t, s, sigma_s, i, sigma_i & + , I_ipq_tsi) + + ! x (-1) because integrals are over -L + ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) + no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + enddo ! i + enddo ! p + enddo ! q + enddo ! s + enddo ! t + !$OMP END DO + !$OMP END PARALLEL + + ! ---- + ! down-down & down-down part + + sigma_p = -1.d0 + sigma_s = -1.d0 + sigma_q = -1.d0 + sigma_t = -1.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ii, i, sigma_i, p, q, s, t, & + !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & + !$OMP SHARED (mo_num, elec_beta_num, elec_num, & + !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & + !$OMP no_2_naive) + + !$OMP DO COLLAPSE (4) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + do ii = 1, elec_num + if(ii .le. elec_beta_num) then + i = ii + sigma_i = -1.d0 + else + i = ii - elec_beta_num + sigma_i = +1.d0 + endif + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , i, sigma_i, s, sigma_s, t, sigma_t & + , I_ipq_ist) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , s, sigma_s, i, sigma_i, t, sigma_t & + , I_ipq_sit) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , t, sigma_t, s, sigma_s, i, sigma_i & + , I_ipq_tsi) + + ! x (-1) because integrals are over -L + ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) + no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + enddo ! i + enddo ! p + enddo ! q + enddo ! s + enddo ! t + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(t1) + print*, " Wall time for no_2_naive (min) = ", (t1 - t0)/60.d0 + +END_PROVIDER + +! --- + diff --git a/src/tc_bi_ortho/no_dressing_v0.irp.f b/src/tc_bi_ortho/no_dressing_v0.irp.f index 9a070dab..4d40c76f 100644 --- a/src/tc_bi_ortho/no_dressing_v0.irp.f +++ b/src/tc_bi_ortho/no_dressing_v0.irp.f @@ -4,19 +4,28 @@ BEGIN_PROVIDER [double precision, no_0_v0] implicit none - integer :: i, j, k - double precision :: tmp - double precision :: I_ijk_ijk, I_ijk_kij, I_ijk_jik, I_ijk_jki, I_ijk_ikj, I_ijk_kji - double precision :: t0, t1 + integer :: i, j, k + double precision :: I_ijk_ijk, I_ijk_kij, I_ijk_jik, I_ijk_jki, I_ijk_ikj, I_ijk_kji + double precision :: t0, t1 + double precision, allocatable :: tmp(:) call wall_time(t0) print*, " Providing no_0_v0 ..." if(elec_alpha_num .eq. elec_beta_num) then - tmp = 0.d0 + allocate(tmp(elec_beta_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, & + !$OMP I_ijk_ijk, I_ijk_kij, I_ijk_jik) & + !$OMP SHARED (elec_beta_num, tmp) + + !$OMP DO do i = 1, elec_beta_num + + tmp(i) = 0.d0 do j = 1, elec_beta_num do k = 1, elec_beta_num @@ -24,18 +33,32 @@ BEGIN_PROVIDER [double precision, no_0_v0] call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) - tmp = tmp + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) + tmp(i) = tmp(i) + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) enddo enddo enddo + !$OMP END DO + !$OMP END PARALLEL - no_0_v0 = -1.d0 * (-tmp) / 6.d0 + no_0_v0 = -1.d0 * (-sum(tmp)) / 6.d0 + + deallocate(tmp) else - tmp = 0.d0 + allocate(tmp(elec_alpha_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, & + !$OMP I_ijk_ijk, I_ijk_kij, I_ijk_jik, & + !$OMP I_ijk_jki, I_ijk_ikj, I_ijk_kji) & + !$OMP SHARED (elec_beta_num, elec_alpha_num, tmp) + + !$OMP DO do i = 1, elec_beta_num + + tmp(i) = 0.d0 do j = 1, elec_beta_num do k = 1, elec_beta_num @@ -43,12 +66,16 @@ BEGIN_PROVIDER [double precision, no_0_v0] call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) - tmp = tmp + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) + tmp(i) = tmp(i) + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) enddo enddo enddo + !$OMP END DO + !$OMP DO do i = elec_beta_num+1, elec_alpha_num + + tmp(i) = 0.d0 do j = elec_beta_num+1, elec_alpha_num do k = elec_beta_num+1, elec_alpha_num @@ -56,14 +83,11 @@ BEGIN_PROVIDER [double precision, no_0_v0] call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) - tmp = tmp + I_ijk_ijk + 2.d0 * I_ijk_kij - 3.d0 * I_ijk_jik - enddo - enddo - enddo + tmp(i) = tmp(i) + I_ijk_ijk + 2.d0 * I_ijk_kij - 3.d0 * I_ijk_jik + enddo ! k + enddo ! j - do i = elec_beta_num+1, elec_alpha_num do j = 1, elec_beta_num - do k = 1, elec_beta_num call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) @@ -72,8 +96,8 @@ BEGIN_PROVIDER [double precision, no_0_v0] call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) call give_integrals_3_body_bi_ort(i, j, k, k, j, i, I_ijk_kji) - tmp = tmp + 6.d0 * (2.d0 * I_ijk_ijk + I_ijk_jki - I_ijk_ikj - I_ijk_jik - I_ijk_kji) - enddo + tmp(i) = tmp(i) + 6.d0 * (2.d0 * I_ijk_ijk + I_ijk_jki - I_ijk_ikj - I_ijk_jik - I_ijk_kji) + enddo ! k do k = elec_beta_num+1, elec_alpha_num @@ -83,18 +107,21 @@ BEGIN_PROVIDER [double precision, no_0_v0] call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) call give_integrals_3_body_bi_ort(i, j, k, k, j, i, I_ijk_kji) - tmp = tmp + 3.d0 * (2.d0 * I_ijk_ijk + 2.d0 * I_ijk_jki - I_ijk_ikj - I_ijk_jik - 2.d0 * I_ijk_kji) - enddo + tmp(i) = tmp(i) + 3.d0 * (2.d0 * I_ijk_ijk + 2.d0 * I_ijk_jki - I_ijk_ikj - I_ijk_jik - 2.d0 * I_ijk_kji) + enddo ! k + enddo ! j + enddo ! i + !$OMP END DO + !$OMP END PARALLEL - enddo - enddo + no_0_v0 = -1.d0 * (-sum(tmp)) / 6.d0 - no_0_v0 = -1.d0 * (-tmp) / 6.d0 + deallocate(tmp) endif call wall_time(t1) - print*, " Wall time for no_0_v0 (sec) = ", (t1 - t0)/60.d0 + print*, " Wall time for no_0_v0 (min) = ", (t1 - t0)/60.d0 print*, " no_0_v0 = ", no_0_v0 @@ -102,4 +129,208 @@ END_PROVIDER ! --- +BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! x (-1) because integrals are over -L + ! + END_DOC + + implicit none + integer :: p, s, i, j + double precision :: I_pij_sij, I_pij_isj, I_pij_ijs, I_pij_sji, I_pij_jsi, I_pij_jis + double precision :: t0, t1 + + call wall_time(t0) + print*, " Providing no_1_v0 ..." + + if(elec_alpha_num .eq. elec_beta_num) then + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p, s, i, j, & + !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & + !$OMP I_pij_sji) & + !$OMP SHARED (mo_num, elec_beta_num, no_1_v0) + + !$OMP DO COLLAPSE(2) + do s = 1, mo_num + do p = 1, mo_num + + no_1_v0(p,s) = 0.d0 + do i = 1, elec_beta_num + do j = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) + call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) + call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) + call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) + + no_1_v0(p,s) = no_1_v0(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + else + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p, s, i, j, & + !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & + !$OMP I_pij_sji, I_pij_jsi, I_pij_jis) & + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, no_1_v0) + + !$OMP DO COLLAPSE(2) + do s = 1, mo_num + do p = 1, mo_num + + no_1_v0(p,s) = 0.d0 + do i = 1, elec_beta_num + do j = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) + call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) + call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) + call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) + + no_1_v0(p,s) = no_1_v0(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + enddo ! j + enddo ! i + + do i = elec_beta_num+1, elec_alpha_num + do j = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) + call give_integrals_3_body_bi_ort(p, i, j, j, s, i, I_pij_jsi) + call give_integrals_3_body_bi_ort(p, i, j, j, i, s, I_pij_jis) + call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) + call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) + call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) + + no_1_v0(p,s) = no_1_v0(p,s) + 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) + enddo ! j + + do j = elec_beta_num+1, elec_alpha_num + + call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) + call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) + call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) + call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) + + no_1_v0(p,s) = no_1_v0(p,s) - 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) + enddo ! j + enddo ! i + + enddo ! p + enddo ! s + !$OMP END DO + !$OMP END PARALLEL + + endif + + call wall_time(t1) + print*, " Wall time for no_1_v0 (min) = ", (t1 - t0)/60.d0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! x (-1) because integrals are over -L + ! + END_DOC + + implicit none + integer :: p, q, s, t, i + double precision :: I_ipq_sit, I_ipq_tsi, I_ipq_ist + double precision :: t0, t1 + + call wall_time(t0) + print*, " Providing no_2_v0 ..." + + if(elec_alpha_num .eq. elec_beta_num) then + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p, q, s, t, i, & + !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & + !$OMP SHARED (mo_num, elec_beta_num, no_2_v0) + + !$OMP DO COLLAPSE(4) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + no_2_v0(p,q,s,t) = 0.d0 + do i = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) + call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) + call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) + + no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + else + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p, q, s, t, i, & + !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, no_2_v0) + + !$OMP DO COLLAPSE(4) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + no_2_v0(p,q,s,t) = 0.d0 + do i = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) + call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) + call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) + + no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + enddo ! i + + do i = elec_beta_num+1, elec_alpha_num + + call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) + call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) + call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) + + no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.25d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + enddo ! i + + enddo ! p + enddo ! q + enddo ! s + enddo ! t + !$OMP END DO + !$OMP END PARALLEL + + endif + + call wall_time(t1) + print*, " Wall time for no_2_v0 (min) = ", (t1 - t0)/60.d0 + +END_PROVIDER + +! --- diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 226854ed..9cbf7748 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -38,6 +38,8 @@ program tc_bi_ortho !call test_no_v0() call test_no_0() + call test_no_1() + call test_no_2() end @@ -505,7 +507,7 @@ subroutine test_no_0() implicit none double precision :: accu, norm - print*, ' testing test_no_0 ...' + print*, ' testing no_0 ...' PROVIDE no_0_naive PROVIDE no_0_v0 @@ -520,3 +522,88 @@ end ! --- +subroutine test_no_1() + + implicit none + integer :: i, j + double precision :: accu, contrib, new, ref, thr, norm + + print*, ' testing no_1 ...' + + PROVIDE no_1_naive + PROVIDE no_1_v0 + + thr = 1d-8 + + accu = 0.d0 + norm = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + + new = no_1_v0 (j,i) + ref = no_1_naive(j,i) + contrib = dabs(new - ref) + if(contrib .gt. thr) then + print*, ' problem on no_aaa_contraction' + print*, j, i + print*, ref, new, contrib + stop + endif + + accu += contrib + norm += dabs(ref) + enddo + enddo + + print*, ' accu (%) = ', 100.d0*accu/norm + + return +end + +! --- + +subroutine test_no_2() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr, norm + + print*, ' testing no_2 ...' + + PROVIDE no_2_naive + PROVIDE no_2_v0 + + thr = 1d-8 + + accu = 0.d0 + norm = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = no_2_v0 (l,k,j,i) + ref = no_2_naive(l,k,j,i) + contrib = dabs(new - ref) + if(contrib .gt. thr) then + print*, ' problem on no_aaa_contraction' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + accu += contrib + norm += dabs(ref) + enddo + enddo + enddo + enddo + + print*, ' accu (%) = ', 100.d0*accu/norm + + return +end + +! --- + + From 19b9976d134ceee49dc3afe180d3de8ae0dede96 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Tue, 12 Sep 2023 20:43:54 +0200 Subject: [PATCH 17/20] noL dredding --- src/bi_ort_ints/one_e_bi_ort.irp.f | 7 ++- src/bi_ort_ints/total_twoe_pot.irp.f | 6 +++ src/tc_bi_ortho/no_dressing_naive.irp.f | 54 ++++++++++++------------ src/tc_bi_ortho/no_dressing_v0.irp.f | 54 ++++++++++++------------ src/tc_bi_ortho/slater_tc_opt.irp.f | 6 ++- src/tc_bi_ortho/slater_tc_opt_diag.irp.f | 10 +++++ src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 24 +++++------ src/tc_keywords/EZFIO.cfg | 6 +++ 8 files changed, 98 insertions(+), 69 deletions(-) diff --git a/src/bi_ort_ints/one_e_bi_ort.irp.f b/src/bi_ort_ints/one_e_bi_ort.irp.f index 49181182..7c2ac860 100644 --- a/src/bi_ort_ints/one_e_bi_ort.irp.f +++ b/src/bi_ort_ints/one_e_bi_ort.irp.f @@ -29,7 +29,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_one_e, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_one_e, (mo_num, mo_num)] BEGIN_DOC ! @@ -41,6 +41,11 @@ BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_one_e, (mo_num, mo_num)] call ao_to_mo_bi_ortho(ao_one_e_integrals_tc_tot, ao_num, mo_bi_ortho_tc_one_e, mo_num) + if(noL_standard) then + PROVIDE noL_1e + mo_bi_ortho_tc_one_e = mo_bi_ortho_tc_one_e + noL_1e + endif + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f index f03e8a34..49f613b5 100644 --- a/src/bi_ort_ints/total_twoe_pot.irp.f +++ b/src/bi_ort_ints/total_twoe_pot.irp.f @@ -256,6 +256,12 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e, (mo_num, mo_num, mo_num, FREE mo_bi_ortho_tc_two_e_chemist + if(noL_standard) then + PROVIDE noL_2e + mo_bi_ortho_tc_two_e = mo_bi_ortho_tc_two_e + noL_2e + FREE noL_2e + endif + END_PROVIDER ! --- diff --git a/src/tc_bi_ortho/no_dressing_naive.irp.f b/src/tc_bi_ortho/no_dressing_naive.irp.f index a57b1723..a0c488b3 100644 --- a/src/tc_bi_ortho/no_dressing_naive.irp.f +++ b/src/tc_bi_ortho/no_dressing_naive.irp.f @@ -1,7 +1,7 @@ ! --- -BEGIN_PROVIDER [double precision, no_0_naive] +BEGIN_PROVIDER [double precision, noL_0e_naive] implicit none integer :: ii, jj, kk @@ -11,7 +11,7 @@ BEGIN_PROVIDER [double precision, no_0_naive] double precision :: t0, t1 double precision, allocatable :: tmp(:) - print*, " Providing no_0_naive ..." + print*, " Providing noL_0e_naive ..." call wall_time(t0) allocate(tmp(elec_num)) @@ -89,24 +89,24 @@ BEGIN_PROVIDER [double precision, no_0_naive] !$OMP END DO !$OMP END PARALLEL - no_0_naive = -1.d0 * (-sum(tmp)) / 6.d0 + noL_0e_naive = -1.d0 * (-sum(tmp)) / 6.d0 deallocate(tmp) call wall_time(t1) - print*, " Wall time for no_0_naive (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_0e_naive (min) = ", (t1 - t0)/60.d0 - print*, " no_0_naive = ", no_0_naive + print*, " noL_0e_naive = ", noL_0e_naive END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_1e_naive, (mo_num, mo_num)] BEGIN_DOC ! - ! < p | H(1) | s > is dressed with no_1_naive(p,s) + ! < p | H(1) | s > is dressed with noL_1e_naive(p,s) ! END_DOC @@ -117,7 +117,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] double precision :: I_pij_sji, I_pij_sij, I_pij_jis, I_pij_ijs, I_pij_isj, I_pij_jsi double precision :: t0, t1 - print*, " Providing no_1_naive ..." + print*, " Providing noL_1e_naive ..." call wall_time(t0) ! ---- @@ -132,14 +132,14 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] !$OMP I_pij_sji, I_pij_sij, I_pij_jis, & !$OMP I_pij_ijs, I_pij_isj, I_pij_jsi ) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & - !$OMP sigma_p, sigma_s, no_1_naive) + !$OMP sigma_p, sigma_s, noL_1e_naive) !$OMP DO COLLAPSE (2) do s = 1, mo_num do p = 1, mo_num - no_1_naive(p,s) = 0.d0 + noL_1e_naive(p,s) = 0.d0 do ii = 1, elec_num if(ii .le. elec_beta_num) then i = ii @@ -184,7 +184,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.5 because we consider 0.5 (up + down) - no_1_naive(p,s) = no_1_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) + noL_1e_naive(p,s) = noL_1e_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) enddo ! j enddo ! i enddo ! s @@ -205,7 +205,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] !$OMP I_pij_sji, I_pij_sij, I_pij_jis, & !$OMP I_pij_ijs, I_pij_isj, I_pij_jsi ) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & - !$OMP sigma_p, sigma_s, no_1_naive) + !$OMP sigma_p, sigma_s, noL_1e_naive) !$OMP DO COLLAPSE (2) @@ -256,7 +256,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.5 because we consider 0.5 (up + down) - no_1_naive(p,s) = no_1_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) + noL_1e_naive(p,s) = noL_1e_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) enddo ! j enddo ! i enddo ! s @@ -267,17 +267,17 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] ! --- call wall_time(t1) - print*, " Wall time for no_1_naive (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_1e_naive (min) = ", (t1 - t0)/60.d0 END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_2e_naive, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! - ! < p q | H(2) | s t > is dressed with no_2_naive(p,q,s,t) + ! < p q | H(2) | s t > is dressed with noL_2e_naive(p,q,s,t) ! END_DOC @@ -288,7 +288,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] double precision :: I_ipq_ist, I_ipq_sit, I_ipq_tsi double precision :: t0, t1 - print*, " Providing no_2_naive ..." + print*, " Providing noL_2e_naive ..." call wall_time(t0) ! ---- @@ -305,7 +305,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -313,7 +313,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] do q = 1, mo_num do p = 1, mo_num - no_2_naive(p,q,s,t) = 0.d0 + noL_2e_naive(p,q,s,t) = 0.d0 do ii = 1, elec_num if(ii .le. elec_beta_num) then i = ii @@ -337,7 +337,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -360,7 +360,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -391,7 +391,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -414,7 +414,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -445,7 +445,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -468,7 +468,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -499,7 +499,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -509,7 +509,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP END PARALLEL call wall_time(t1) - print*, " Wall time for no_2_naive (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_2e_naive (min) = ", (t1 - t0)/60.d0 END_PROVIDER diff --git a/src/tc_bi_ortho/no_dressing_v0.irp.f b/src/tc_bi_ortho/no_dressing_v0.irp.f index 4d40c76f..efcf51db 100644 --- a/src/tc_bi_ortho/no_dressing_v0.irp.f +++ b/src/tc_bi_ortho/no_dressing_v0.irp.f @@ -1,7 +1,7 @@ ! --- -BEGIN_PROVIDER [double precision, no_0_v0] +BEGIN_PROVIDER [double precision, noL_0e] implicit none integer :: i, j, k @@ -10,7 +10,7 @@ BEGIN_PROVIDER [double precision, no_0_v0] double precision, allocatable :: tmp(:) call wall_time(t0) - print*, " Providing no_0_v0 ..." + print*, " Providing noL_0e ..." if(elec_alpha_num .eq. elec_beta_num) then @@ -40,7 +40,7 @@ BEGIN_PROVIDER [double precision, no_0_v0] !$OMP END DO !$OMP END PARALLEL - no_0_v0 = -1.d0 * (-sum(tmp)) / 6.d0 + noL_0e = -1.d0 * (-sum(tmp)) / 6.d0 deallocate(tmp) @@ -114,22 +114,22 @@ BEGIN_PROVIDER [double precision, no_0_v0] !$OMP END DO !$OMP END PARALLEL - no_0_v0 = -1.d0 * (-sum(tmp)) / 6.d0 + noL_0e = -1.d0 * (-sum(tmp)) / 6.d0 deallocate(tmp) endif call wall_time(t1) - print*, " Wall time for no_0_v0 (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_0e (min) = ", (t1 - t0)/60.d0 - print*, " no_0_v0 = ", no_0_v0 + print*, " noL_0e = ", noL_0e END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] BEGIN_DOC ! @@ -143,7 +143,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] double precision :: t0, t1 call wall_time(t0) - print*, " Providing no_1_v0 ..." + print*, " Providing noL_1e ..." if(elec_alpha_num .eq. elec_beta_num) then @@ -152,13 +152,13 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] !$OMP PRIVATE (p, s, i, j, & !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & !$OMP I_pij_sji) & - !$OMP SHARED (mo_num, elec_beta_num, no_1_v0) + !$OMP SHARED (mo_num, elec_beta_num, noL_1e) !$OMP DO COLLAPSE(2) do s = 1, mo_num do p = 1, mo_num - no_1_v0(p,s) = 0.d0 + noL_1e(p,s) = 0.d0 do i = 1, elec_beta_num do j = 1, elec_beta_num @@ -167,7 +167,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - no_1_v0(p,s) = no_1_v0(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e(p,s) = noL_1e(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) enddo enddo enddo @@ -182,13 +182,13 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] !$OMP PRIVATE (p, s, i, j, & !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & !$OMP I_pij_sji, I_pij_jsi, I_pij_jis) & - !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, no_1_v0) + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, noL_1e) !$OMP DO COLLAPSE(2) do s = 1, mo_num do p = 1, mo_num - no_1_v0(p,s) = 0.d0 + noL_1e(p,s) = 0.d0 do i = 1, elec_beta_num do j = 1, elec_beta_num @@ -197,7 +197,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - no_1_v0(p,s) = no_1_v0(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e(p,s) = noL_1e(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) enddo ! j enddo ! i @@ -211,7 +211,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) - no_1_v0(p,s) = no_1_v0(p,s) + 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) + noL_1e(p,s) = noL_1e(p,s) + 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) enddo ! j do j = elec_beta_num+1, elec_alpha_num @@ -221,7 +221,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - no_1_v0(p,s) = no_1_v0(p,s) - 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e(p,s) = noL_1e(p,s) - 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) enddo ! j enddo ! i @@ -233,13 +233,13 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] endif call wall_time(t1) - print*, " Wall time for no_1_v0 (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_1e (min) = ", (t1 - t0)/60.d0 END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_2e, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! @@ -253,7 +253,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] double precision :: t0, t1 call wall_time(t0) - print*, " Providing no_2_v0 ..." + print*, " Providing noL_2e ..." if(elec_alpha_num .eq. elec_beta_num) then @@ -261,7 +261,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] !$OMP DEFAULT (NONE) & !$OMP PRIVATE (p, q, s, t, i, & !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & - !$OMP SHARED (mo_num, elec_beta_num, no_2_v0) + !$OMP SHARED (mo_num, elec_beta_num, noL_2e) !$OMP DO COLLAPSE(4) do t = 1, mo_num @@ -269,14 +269,14 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] do q = 1, mo_num do p = 1, mo_num - no_2_v0(p,q,s,t) = 0.d0 + noL_2e(p,q,s,t) = 0.d0 do i = 1, elec_beta_num call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + noL_2e(p,q,s,t) = noL_2e(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) enddo enddo enddo @@ -291,7 +291,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] !$OMP DEFAULT (NONE) & !$OMP PRIVATE (p, q, s, t, i, & !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & - !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, no_2_v0) + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, noL_2e) !$OMP DO COLLAPSE(4) do t = 1, mo_num @@ -299,14 +299,14 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] do q = 1, mo_num do p = 1, mo_num - no_2_v0(p,q,s,t) = 0.d0 + noL_2e(p,q,s,t) = 0.d0 do i = 1, elec_beta_num call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + noL_2e(p,q,s,t) = noL_2e(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) enddo ! i do i = elec_beta_num+1, elec_alpha_num @@ -315,7 +315,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.25d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + noL_2e(p,q,s,t) = noL_2e(p,q,s,t) - 0.25d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) enddo ! i enddo ! p @@ -328,7 +328,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] endif call wall_time(t1) - print*, " Wall time for no_2_v0 (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_2e (min) = ", (t1 - t0)/60.d0 END_PROVIDER diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index 72f55aca..f12b83e3 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -34,7 +34,9 @@ end ! --- subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) - implicit none + + implicit none + BEGIN_DOC ! ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis @@ -82,7 +84,7 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, hmono = 0.d0 htwoe = 0.d0 htot = 0.d0 - hthree = 0.D0 + hthree = 0.d0 call get_excitation_degree(key_i, key_j, degree, Nint) if(degree.gt.2) return diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f index d95c87b1..367d90dd 100644 --- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f @@ -29,6 +29,11 @@ ref_tc_energy_tot = ref_tc_energy_1e + ref_tc_energy_2e + ref_tc_energy_3e + nuclear_repulsion + if(noL_standard) then + PROVIDE noL_0e + ref_tc_energy_tot += noL_0e + endif + END_PROVIDER ! --- @@ -107,6 +112,11 @@ subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree, htot = hmono + htwoe + hthree + nuclear_repulsion + if(noL_standard) then + PROVIDE noL_0e + htot += noL_0e + endif + end ! --- diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 9cbf7748..b55419a8 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -509,11 +509,11 @@ subroutine test_no_0() print*, ' testing no_0 ...' - PROVIDE no_0_naive - PROVIDE no_0_v0 + PROVIDE noL_0e_naive + PROVIDE noL_0e - accu = dabs(no_0_naive - no_0_v0) - norm = dabs(no_0_naive) + accu = dabs(noL_0e_naive - noL_0e) + norm = dabs(noL_0e_naive) print*, ' accu (%) = ', 100.d0*accu/norm @@ -530,8 +530,8 @@ subroutine test_no_1() print*, ' testing no_1 ...' - PROVIDE no_1_naive - PROVIDE no_1_v0 + PROVIDE noL_1e_naive + PROVIDE noL_1e thr = 1d-8 @@ -540,8 +540,8 @@ subroutine test_no_1() do i = 1, mo_num do j = 1, mo_num - new = no_1_v0 (j,i) - ref = no_1_naive(j,i) + new = noL_1e (j,i) + ref = noL_1e_naive(j,i) contrib = dabs(new - ref) if(contrib .gt. thr) then print*, ' problem on no_aaa_contraction' @@ -570,8 +570,8 @@ subroutine test_no_2() print*, ' testing no_2 ...' - PROVIDE no_2_naive - PROVIDE no_2_v0 + PROVIDE noL_2e_naive + PROVIDE noL_2e thr = 1d-8 @@ -582,8 +582,8 @@ subroutine test_no_2() do k = 1, mo_num do l = 1, mo_num - new = no_2_v0 (l,k,j,i) - ref = no_2_naive(l,k,j,i) + new = noL_2e (l,k,j,i) + ref = noL_2e_naive(l,k,j,i) contrib = dabs(new - ref) if(contrib .gt. thr) then print*, ' problem on no_aaa_contraction' diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index a70ccc63..fee492b4 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -46,6 +46,12 @@ doc: If |true|, contracted double excitation three-body terms are included interface: ezfio,provider,ocaml default: False +[noL_standard] +type: logical +doc: If |true|, standard normal-ordering for L +interface: ezfio,provider,ocaml +default: False + [core_tc_op] type: logical doc: If |true|, takes the usual Hamiltonian for core orbitals (assumed to be doubly occupied) From 677b58ae61e8733ec28068ffdf4167e53950a120 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Tue, 12 Sep 2023 20:43:54 +0200 Subject: [PATCH 18/20] noL dredding --- .../no_dressing_naive.irp.f | 54 +++++++++---------- .../no_dressing_v0.irp.f | 54 +++++++++---------- src/bi_ort_ints/one_e_bi_ort.irp.f | 7 ++- src/bi_ort_ints/total_twoe_pot.irp.f | 6 +++ src/tc_bi_ortho/slater_tc_opt.irp.f | 6 ++- src/tc_bi_ortho/slater_tc_opt_diag.irp.f | 10 ++++ src/tc_bi_ortho/tc_utils.irp.f | 22 -------- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 24 ++++----- src/tc_keywords/EZFIO.cfg | 6 +++ src/utils/util.irp.f | 25 +++++++++ 10 files changed, 123 insertions(+), 91 deletions(-) rename src/{tc_bi_ortho => bi_ort_ints}/no_dressing_naive.irp.f (90%) rename src/{tc_bi_ortho => bi_ort_ints}/no_dressing_v0.irp.f (84%) diff --git a/src/tc_bi_ortho/no_dressing_naive.irp.f b/src/bi_ort_ints/no_dressing_naive.irp.f similarity index 90% rename from src/tc_bi_ortho/no_dressing_naive.irp.f rename to src/bi_ort_ints/no_dressing_naive.irp.f index a57b1723..a0c488b3 100644 --- a/src/tc_bi_ortho/no_dressing_naive.irp.f +++ b/src/bi_ort_ints/no_dressing_naive.irp.f @@ -1,7 +1,7 @@ ! --- -BEGIN_PROVIDER [double precision, no_0_naive] +BEGIN_PROVIDER [double precision, noL_0e_naive] implicit none integer :: ii, jj, kk @@ -11,7 +11,7 @@ BEGIN_PROVIDER [double precision, no_0_naive] double precision :: t0, t1 double precision, allocatable :: tmp(:) - print*, " Providing no_0_naive ..." + print*, " Providing noL_0e_naive ..." call wall_time(t0) allocate(tmp(elec_num)) @@ -89,24 +89,24 @@ BEGIN_PROVIDER [double precision, no_0_naive] !$OMP END DO !$OMP END PARALLEL - no_0_naive = -1.d0 * (-sum(tmp)) / 6.d0 + noL_0e_naive = -1.d0 * (-sum(tmp)) / 6.d0 deallocate(tmp) call wall_time(t1) - print*, " Wall time for no_0_naive (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_0e_naive (min) = ", (t1 - t0)/60.d0 - print*, " no_0_naive = ", no_0_naive + print*, " noL_0e_naive = ", noL_0e_naive END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_1e_naive, (mo_num, mo_num)] BEGIN_DOC ! - ! < p | H(1) | s > is dressed with no_1_naive(p,s) + ! < p | H(1) | s > is dressed with noL_1e_naive(p,s) ! END_DOC @@ -117,7 +117,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] double precision :: I_pij_sji, I_pij_sij, I_pij_jis, I_pij_ijs, I_pij_isj, I_pij_jsi double precision :: t0, t1 - print*, " Providing no_1_naive ..." + print*, " Providing noL_1e_naive ..." call wall_time(t0) ! ---- @@ -132,14 +132,14 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] !$OMP I_pij_sji, I_pij_sij, I_pij_jis, & !$OMP I_pij_ijs, I_pij_isj, I_pij_jsi ) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & - !$OMP sigma_p, sigma_s, no_1_naive) + !$OMP sigma_p, sigma_s, noL_1e_naive) !$OMP DO COLLAPSE (2) do s = 1, mo_num do p = 1, mo_num - no_1_naive(p,s) = 0.d0 + noL_1e_naive(p,s) = 0.d0 do ii = 1, elec_num if(ii .le. elec_beta_num) then i = ii @@ -184,7 +184,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.5 because we consider 0.5 (up + down) - no_1_naive(p,s) = no_1_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) + noL_1e_naive(p,s) = noL_1e_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) enddo ! j enddo ! i enddo ! s @@ -205,7 +205,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] !$OMP I_pij_sji, I_pij_sij, I_pij_jis, & !$OMP I_pij_ijs, I_pij_isj, I_pij_jsi ) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & - !$OMP sigma_p, sigma_s, no_1_naive) + !$OMP sigma_p, sigma_s, noL_1e_naive) !$OMP DO COLLAPSE (2) @@ -256,7 +256,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.5 because we consider 0.5 (up + down) - no_1_naive(p,s) = no_1_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) + noL_1e_naive(p,s) = noL_1e_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) enddo ! j enddo ! i enddo ! s @@ -267,17 +267,17 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] ! --- call wall_time(t1) - print*, " Wall time for no_1_naive (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_1e_naive (min) = ", (t1 - t0)/60.d0 END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_2e_naive, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! - ! < p q | H(2) | s t > is dressed with no_2_naive(p,q,s,t) + ! < p q | H(2) | s t > is dressed with noL_2e_naive(p,q,s,t) ! END_DOC @@ -288,7 +288,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] double precision :: I_ipq_ist, I_ipq_sit, I_ipq_tsi double precision :: t0, t1 - print*, " Providing no_2_naive ..." + print*, " Providing noL_2e_naive ..." call wall_time(t0) ! ---- @@ -305,7 +305,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -313,7 +313,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] do q = 1, mo_num do p = 1, mo_num - no_2_naive(p,q,s,t) = 0.d0 + noL_2e_naive(p,q,s,t) = 0.d0 do ii = 1, elec_num if(ii .le. elec_beta_num) then i = ii @@ -337,7 +337,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -360,7 +360,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -391,7 +391,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -414,7 +414,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -445,7 +445,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -468,7 +468,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -499,7 +499,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -509,7 +509,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP END PARALLEL call wall_time(t1) - print*, " Wall time for no_2_naive (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_2e_naive (min) = ", (t1 - t0)/60.d0 END_PROVIDER diff --git a/src/tc_bi_ortho/no_dressing_v0.irp.f b/src/bi_ort_ints/no_dressing_v0.irp.f similarity index 84% rename from src/tc_bi_ortho/no_dressing_v0.irp.f rename to src/bi_ort_ints/no_dressing_v0.irp.f index 4d40c76f..efcf51db 100644 --- a/src/tc_bi_ortho/no_dressing_v0.irp.f +++ b/src/bi_ort_ints/no_dressing_v0.irp.f @@ -1,7 +1,7 @@ ! --- -BEGIN_PROVIDER [double precision, no_0_v0] +BEGIN_PROVIDER [double precision, noL_0e] implicit none integer :: i, j, k @@ -10,7 +10,7 @@ BEGIN_PROVIDER [double precision, no_0_v0] double precision, allocatable :: tmp(:) call wall_time(t0) - print*, " Providing no_0_v0 ..." + print*, " Providing noL_0e ..." if(elec_alpha_num .eq. elec_beta_num) then @@ -40,7 +40,7 @@ BEGIN_PROVIDER [double precision, no_0_v0] !$OMP END DO !$OMP END PARALLEL - no_0_v0 = -1.d0 * (-sum(tmp)) / 6.d0 + noL_0e = -1.d0 * (-sum(tmp)) / 6.d0 deallocate(tmp) @@ -114,22 +114,22 @@ BEGIN_PROVIDER [double precision, no_0_v0] !$OMP END DO !$OMP END PARALLEL - no_0_v0 = -1.d0 * (-sum(tmp)) / 6.d0 + noL_0e = -1.d0 * (-sum(tmp)) / 6.d0 deallocate(tmp) endif call wall_time(t1) - print*, " Wall time for no_0_v0 (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_0e (min) = ", (t1 - t0)/60.d0 - print*, " no_0_v0 = ", no_0_v0 + print*, " noL_0e = ", noL_0e END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] BEGIN_DOC ! @@ -143,7 +143,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] double precision :: t0, t1 call wall_time(t0) - print*, " Providing no_1_v0 ..." + print*, " Providing noL_1e ..." if(elec_alpha_num .eq. elec_beta_num) then @@ -152,13 +152,13 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] !$OMP PRIVATE (p, s, i, j, & !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & !$OMP I_pij_sji) & - !$OMP SHARED (mo_num, elec_beta_num, no_1_v0) + !$OMP SHARED (mo_num, elec_beta_num, noL_1e) !$OMP DO COLLAPSE(2) do s = 1, mo_num do p = 1, mo_num - no_1_v0(p,s) = 0.d0 + noL_1e(p,s) = 0.d0 do i = 1, elec_beta_num do j = 1, elec_beta_num @@ -167,7 +167,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - no_1_v0(p,s) = no_1_v0(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e(p,s) = noL_1e(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) enddo enddo enddo @@ -182,13 +182,13 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] !$OMP PRIVATE (p, s, i, j, & !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & !$OMP I_pij_sji, I_pij_jsi, I_pij_jis) & - !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, no_1_v0) + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, noL_1e) !$OMP DO COLLAPSE(2) do s = 1, mo_num do p = 1, mo_num - no_1_v0(p,s) = 0.d0 + noL_1e(p,s) = 0.d0 do i = 1, elec_beta_num do j = 1, elec_beta_num @@ -197,7 +197,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - no_1_v0(p,s) = no_1_v0(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e(p,s) = noL_1e(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) enddo ! j enddo ! i @@ -211,7 +211,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) - no_1_v0(p,s) = no_1_v0(p,s) + 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) + noL_1e(p,s) = noL_1e(p,s) + 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) enddo ! j do j = elec_beta_num+1, elec_alpha_num @@ -221,7 +221,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - no_1_v0(p,s) = no_1_v0(p,s) - 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e(p,s) = noL_1e(p,s) - 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) enddo ! j enddo ! i @@ -233,13 +233,13 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] endif call wall_time(t1) - print*, " Wall time for no_1_v0 (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_1e (min) = ", (t1 - t0)/60.d0 END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_2e, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! @@ -253,7 +253,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] double precision :: t0, t1 call wall_time(t0) - print*, " Providing no_2_v0 ..." + print*, " Providing noL_2e ..." if(elec_alpha_num .eq. elec_beta_num) then @@ -261,7 +261,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] !$OMP DEFAULT (NONE) & !$OMP PRIVATE (p, q, s, t, i, & !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & - !$OMP SHARED (mo_num, elec_beta_num, no_2_v0) + !$OMP SHARED (mo_num, elec_beta_num, noL_2e) !$OMP DO COLLAPSE(4) do t = 1, mo_num @@ -269,14 +269,14 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] do q = 1, mo_num do p = 1, mo_num - no_2_v0(p,q,s,t) = 0.d0 + noL_2e(p,q,s,t) = 0.d0 do i = 1, elec_beta_num call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + noL_2e(p,q,s,t) = noL_2e(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) enddo enddo enddo @@ -291,7 +291,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] !$OMP DEFAULT (NONE) & !$OMP PRIVATE (p, q, s, t, i, & !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & - !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, no_2_v0) + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, noL_2e) !$OMP DO COLLAPSE(4) do t = 1, mo_num @@ -299,14 +299,14 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] do q = 1, mo_num do p = 1, mo_num - no_2_v0(p,q,s,t) = 0.d0 + noL_2e(p,q,s,t) = 0.d0 do i = 1, elec_beta_num call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + noL_2e(p,q,s,t) = noL_2e(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) enddo ! i do i = elec_beta_num+1, elec_alpha_num @@ -315,7 +315,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.25d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + noL_2e(p,q,s,t) = noL_2e(p,q,s,t) - 0.25d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) enddo ! i enddo ! p @@ -328,7 +328,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] endif call wall_time(t1) - print*, " Wall time for no_2_v0 (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_2e (min) = ", (t1 - t0)/60.d0 END_PROVIDER diff --git a/src/bi_ort_ints/one_e_bi_ort.irp.f b/src/bi_ort_ints/one_e_bi_ort.irp.f index 49181182..7c2ac860 100644 --- a/src/bi_ort_ints/one_e_bi_ort.irp.f +++ b/src/bi_ort_ints/one_e_bi_ort.irp.f @@ -29,7 +29,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_one_e, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_one_e, (mo_num, mo_num)] BEGIN_DOC ! @@ -41,6 +41,11 @@ BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_one_e, (mo_num, mo_num)] call ao_to_mo_bi_ortho(ao_one_e_integrals_tc_tot, ao_num, mo_bi_ortho_tc_one_e, mo_num) + if(noL_standard) then + PROVIDE noL_1e + mo_bi_ortho_tc_one_e = mo_bi_ortho_tc_one_e + noL_1e + endif + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f index f03e8a34..49f613b5 100644 --- a/src/bi_ort_ints/total_twoe_pot.irp.f +++ b/src/bi_ort_ints/total_twoe_pot.irp.f @@ -256,6 +256,12 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e, (mo_num, mo_num, mo_num, FREE mo_bi_ortho_tc_two_e_chemist + if(noL_standard) then + PROVIDE noL_2e + mo_bi_ortho_tc_two_e = mo_bi_ortho_tc_two_e + noL_2e + FREE noL_2e + endif + END_PROVIDER ! --- diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index 72f55aca..f12b83e3 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -34,7 +34,9 @@ end ! --- subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) - implicit none + + implicit none + BEGIN_DOC ! ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis @@ -82,7 +84,7 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, hmono = 0.d0 htwoe = 0.d0 htot = 0.d0 - hthree = 0.D0 + hthree = 0.d0 call get_excitation_degree(key_i, key_j, degree, Nint) if(degree.gt.2) return diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f index d95c87b1..367d90dd 100644 --- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f @@ -29,6 +29,11 @@ ref_tc_energy_tot = ref_tc_energy_1e + ref_tc_energy_2e + ref_tc_energy_3e + nuclear_repulsion + if(noL_standard) then + PROVIDE noL_0e + ref_tc_energy_tot += noL_0e + endif + END_PROVIDER ! --- @@ -107,6 +112,11 @@ subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree, htot = hmono + htwoe + hthree + nuclear_repulsion + if(noL_standard) then + PROVIDE noL_0e + htot += noL_0e + endif + end ! --- diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index ba5ffff9..53fe5884 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -66,25 +66,3 @@ end ! --- - -logical function is_same_spin(sigma_1, sigma_2) - - BEGIN_DOC - ! - ! true if sgn(sigma_1) = sgn(sigma_2) - ! - END_DOC - - implicit none - double precision, intent(in) :: sigma_1, sigma_2 - - if((sigma_1 * sigma_2) .gt. 0.d0) then - is_same_spin = .true. - else - is_same_spin = .false. - endif - -end function is_same_spin - -! --- - diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 9cbf7748..b55419a8 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -509,11 +509,11 @@ subroutine test_no_0() print*, ' testing no_0 ...' - PROVIDE no_0_naive - PROVIDE no_0_v0 + PROVIDE noL_0e_naive + PROVIDE noL_0e - accu = dabs(no_0_naive - no_0_v0) - norm = dabs(no_0_naive) + accu = dabs(noL_0e_naive - noL_0e) + norm = dabs(noL_0e_naive) print*, ' accu (%) = ', 100.d0*accu/norm @@ -530,8 +530,8 @@ subroutine test_no_1() print*, ' testing no_1 ...' - PROVIDE no_1_naive - PROVIDE no_1_v0 + PROVIDE noL_1e_naive + PROVIDE noL_1e thr = 1d-8 @@ -540,8 +540,8 @@ subroutine test_no_1() do i = 1, mo_num do j = 1, mo_num - new = no_1_v0 (j,i) - ref = no_1_naive(j,i) + new = noL_1e (j,i) + ref = noL_1e_naive(j,i) contrib = dabs(new - ref) if(contrib .gt. thr) then print*, ' problem on no_aaa_contraction' @@ -570,8 +570,8 @@ subroutine test_no_2() print*, ' testing no_2 ...' - PROVIDE no_2_naive - PROVIDE no_2_v0 + PROVIDE noL_2e_naive + PROVIDE noL_2e thr = 1d-8 @@ -582,8 +582,8 @@ subroutine test_no_2() do k = 1, mo_num do l = 1, mo_num - new = no_2_v0 (l,k,j,i) - ref = no_2_naive(l,k,j,i) + new = noL_2e (l,k,j,i) + ref = noL_2e_naive(l,k,j,i) contrib = dabs(new - ref) if(contrib .gt. thr) then print*, ' problem on no_aaa_contraction' diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index a70ccc63..fee492b4 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -46,6 +46,12 @@ doc: If |true|, contracted double excitation three-body terms are included interface: ezfio,provider,ocaml default: False +[noL_standard] +type: logical +doc: If |true|, standard normal-ordering for L +interface: ezfio,provider,ocaml +default: False + [core_tc_op] type: logical doc: If |true|, takes the usual Hamiltonian for core orbitals (assumed to be doubly occupied) diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f index a9f1a438..ebb13781 100644 --- a/src/utils/util.irp.f +++ b/src/utils/util.irp.f @@ -556,3 +556,28 @@ subroutine sub_A_At(A, N) !$OMP END PARALLEL end + +! --- + +logical function is_same_spin(sigma_1, sigma_2) + + BEGIN_DOC + ! + ! true if sgn(sigma_1) = sgn(sigma_2) + ! + END_DOC + + implicit none + double precision, intent(in) :: sigma_1, sigma_2 + + if((sigma_1 * sigma_2) .gt. 0.d0) then + is_same_spin = .true. + else + is_same_spin = .false. + endif + +end function is_same_spin + +! --- + + From bb155c0dfd0b12dadf34b321f0aaa6eb25ac3055 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Fri, 15 Sep 2023 11:30:10 +0200 Subject: [PATCH 19/20] J_qmckl en & ee --- src/non_h_ints_mu/NEED | 1 + src/non_h_ints_mu/jast_deriv.irp.f | 95 ++++++++++++++++++++++++++ src/non_h_ints_mu/qmckl.irp.f | 102 ++++++++++++++++++++++++++++ src/non_h_ints_mu/tc_integ_an.irp.f | 14 ++-- src/qmckl/README.md | 4 ++ src/qmckl/qmckl.F90 | 1 + 6 files changed, 213 insertions(+), 4 deletions(-) create mode 100644 src/non_h_ints_mu/qmckl.irp.f create mode 100644 src/qmckl/README.md create mode 100644 src/qmckl/qmckl.F90 diff --git a/src/non_h_ints_mu/NEED b/src/non_h_ints_mu/NEED index d09ab4a5..ecde6390 100644 --- a/src/non_h_ints_mu/NEED +++ b/src/non_h_ints_mu/NEED @@ -1,2 +1,3 @@ +qmckl ao_tc_eff_map bi_ortho_mos diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index ee01886c..6b8445b1 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -24,11 +24,15 @@ double precision :: v1b_r1, v1b_r2, u2b_r12 double precision :: grad1_v1b(3), grad1_u2b(3) double precision :: dx, dy, dz + double precision :: time0, time1 double precision, external :: j12_mu, j1b_nucl PROVIDE j1b_type PROVIDE final_grid_points_extra + print*, ' providing grad1_u12_num & grad1_u12_squared_num ...' + call wall_time(time0) + grad1_u12_num = 0.d0 grad1_u12_squared_num = 0.d0 @@ -112,6 +116,94 @@ !$OMP END DO !$OMP END PARALLEL + elseif (j1b_type .eq. 1000) then + + double precision :: f + f = 1.d0 / dble(elec_num - 1) + + double precision, allocatable :: rij(:,:,:) + allocate( rij(3, 2, n_points_extra_final_grid) ) + + use qmckl + integer(qmckl_exit_code) :: rc + + integer*8 :: npoints + npoints = n_points_extra_final_grid + + double precision, allocatable :: gl(:,:,:) + allocate( gl(2,4,n_points_extra_final_grid) ) + + do ipoint = 1, n_points_final_grid ! r1 + + do jpoint = 1, n_points_extra_final_grid ! r2 + rij(1:3, 1, jpoint) = final_grid_points (1:3, ipoint) + rij(1:3, 2, jpoint) = final_grid_points_extra(1:3, jpoint) + enddo + + + rc = qmckl_set_electron_coord(qmckl_ctx_jastrow, 'N', npoints, rij, npoints*6_8) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in set_electron_coord' + stop -1 + endif + + + ! --- + ! e-e term + + rc = qmckl_get_jastrow_champ_factor_ee_gl(qmckl_ctx_jastrow, gl, 8_8*npoints) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in fact_ee_gl' + stop -1 + endif + + do jpoint = 1, n_points_extra_final_grid ! r2 + grad1_u12_num(jpoint,ipoint,1) = gl(1,1,jpoint) + grad1_u12_num(jpoint,ipoint,2) = gl(1,2,jpoint) + grad1_u12_num(jpoint,ipoint,3) = gl(1,3,jpoint) + enddo + + ! --- + ! e-e-n term + +! rc = qmckl_get_jastrow_champ_factor_een_gl(qmckl_ctx_jastrow, gl, 8_8*npoints) +! if (rc /= QMCKL_SUCCESS) then +! print *, irp_here, 'qmckl error in fact_een_gl' +! stop -1 +! endif +! +! do jpoint = 1, n_points_extra_final_grid ! r2 +! grad1_u12_num(jpoint,ipoint,1) = grad1_u12_num(jpoint,ipoint,1) + gl(1,1,jpoint) +! grad1_u12_num(jpoint,ipoint,2) = grad1_u12_num(jpoint,ipoint,2) + gl(1,2,jpoint) +! grad1_u12_num(jpoint,ipoint,3) = grad1_u12_num(jpoint,ipoint,3) + gl(1,3,jpoint) +! enddo + + ! --- + ! e-n term + + rc = qmckl_get_jastrow_champ_factor_en_gl(qmckl_ctx_jastrow, gl, 8_8*npoints) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in fact_en_gl' + stop -1 + endif + + do jpoint = 1, n_points_extra_final_grid ! r2 + grad1_u12_num(jpoint,ipoint,1) = grad1_u12_num(jpoint,ipoint,1) + f * gl(1,1,jpoint) + grad1_u12_num(jpoint,ipoint,2) = grad1_u12_num(jpoint,ipoint,2) + f * gl(1,2,jpoint) + grad1_u12_num(jpoint,ipoint,3) = grad1_u12_num(jpoint,ipoint,3) + f * gl(1,3,jpoint) + enddo + + do jpoint = 1, n_points_extra_final_grid ! r2 + dx = grad1_u12_num(jpoint,ipoint,1) + dy = grad1_u12_num(jpoint,ipoint,2) + dz = grad1_u12_num(jpoint,ipoint,3) + grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz + enddo + + enddo + + deallocate(gl, rij) + else print *, ' j1b_type = ', j1b_type, 'not implemented yet' @@ -119,6 +211,9 @@ endif + call wall_time(time1) + print*, ' Wall time for grad1_u12_num & grad1_u12_squared_num (min) =', (time1-time0)/60.d0 + END_PROVIDER ! --- diff --git a/src/non_h_ints_mu/qmckl.irp.f b/src/non_h_ints_mu/qmckl.irp.f new file mode 100644 index 00000000..d83de4dc --- /dev/null +++ b/src/non_h_ints_mu/qmckl.irp.f @@ -0,0 +1,102 @@ +BEGIN_PROVIDER [ integer*8, qmckl_ctx_jastrow ] + use qmckl + implicit none + BEGIN_DOC + ! Context for the QMCKL library + END_DOC + integer(qmckl_exit_code) :: rc + + qmckl_ctx_jastrow = qmckl_context_create() + + rc = qmckl_set_nucleus_num(qmckl_ctx_jastrow, nucl_num*1_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_nucleus_charge(qmckl_ctx_jastrow, nucl_charge, nucl_num*1_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_nucleus_coord(qmckl_ctx_jastrow, 'T', nucl_coord, nucl_num*3_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_electron_num(qmckl_ctx_jastrow, 1_8, 1_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + + ! Jastrow parameters + rc = qmckl_set_jastrow_champ_type_nucl_num (qmckl_ctx_jastrow, 2_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_jastrow_champ_type_nucl_vector (qmckl_ctx_jastrow, (/0_8,1_8,1_8/), 1_8*nucl_num) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_jastrow_champ_rescale_factor_ee (qmckl_ctx_jastrow, 0.6d0) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_jastrow_champ_rescale_factor_en (qmckl_ctx_jastrow, (/0.6d0, 0.6d0 /), 2_8 ) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_jastrow_champ_aord_num (qmckl_ctx_jastrow, 5_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_jastrow_champ_bord_num (qmckl_ctx_jastrow, 5_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_jastrow_champ_cord_num (qmckl_ctx_jastrow, 0_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + +! double precision :: a_vector(12) = dble(& +! (/ 0.00000000, 0.00000000, -0.71168405, -0.44415699, -0.13865109, 0.07002267 , & +! 0.00000000, 0.00000000, -0.11379992, 0.04542846, 0.01696997, -0.01809299 /) ) + +! double precision :: b_vector(6) = dble(& +! (/ 0.00000000, 0.65603311, 0.14581988, 0.03138163, 0.00153156, -0.00447302 /) ) + +! double precision :: c_vector(46) = & +! (/ 1.06384279d0, -1.44303973d0, -0.92409833d0, 0.11845356d0, -0.02980776d0, & +! 1.07048863d0, 0.06009623d0, -0.01854872d0, -0.00915398d0, 0.01324198d0, & +! -0.00504959d0, -0.01202497d0, -0.00531644d0, 0.15101629d0, -0.00723831d0, & +! -0.00384182d0, -0.00295036d0, -0.00114583d0, 0.00158107d0, -0.00078107d0, & +! -0.00080000d0, -0.14140576d0, -0.00237271d0, -0.03006706d0, 0.01537009d0, & +! -0.02327226d0, 0.16502789d0, -0.01458259d0, -0.09946065d0, 0.00850029d0, & +! -0.02969361d0, -0.01159547d0, 0.00516313d0, 0.00405247d0, -0.02200886d0, & +! 0.03376709d0, 0.01277767d0, -0.01523013d0, -0.00739224d0, -0.00463953d0, & +! 0.00003174d0, -0.01421128d0, 0.00808140d0, 0.00612988d0, -0.00610632d0, & +! 0.01926215d0 /) + +! a_vector = 0.d0 +! b_vector = 0.d0 +! c_vector = 0.d0 + + double precision :: a_vector(12) = dble(& + (/ 0.00000000 , 0.00000000, -0.45105821, -0.23519218, -0.03825391, 0.10072866, & + 0.00000000 , 0.00000000, -0.06930592, -0.02909224, -0.00134650, 0.01477242 /) ) + + double precision :: b_vector(6) = dble(& + (/ 0.00000000, 0.00000000, 0.29217862, -0.00450671, -0.02925982, -0.01381532 /) ) + + double precision :: c_vector(46) + c_vector = 0.d0 + + rc = qmckl_set_jastrow_champ_a_vector(qmckl_ctx_jastrow, a_vector, 12_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_jastrow_champ_b_vector(qmckl_ctx_jastrow, b_vector, 6_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + +! rc = qmckl_set_jastrow_champ_c_vector(qmckl_ctx_jastrow, c_vector, 46_8) +! rc = qmckl_check(qmckl_ctx_jastrow, rc) +! if (rc /= QMCKL_SUCCESS) stop -1 + +END_PROVIDER diff --git a/src/non_h_ints_mu/tc_integ_an.irp.f b/src/non_h_ints_mu/tc_integ_an.irp.f index ae7af987..a6459761 100644 --- a/src/non_h_ints_mu/tc_integ_an.irp.f +++ b/src/non_h_ints_mu/tc_integ_an.irp.f @@ -106,8 +106,11 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f elseif(j1b_type .ge. 100) then - PROVIDE int2_grad1_u12_ao_num - int2_grad1_u12_ao = int2_grad1_u12_ao_num +! PROVIDE int2_grad1_u12_ao_num +! int2_grad1_u12_ao = int2_grad1_u12_ao_num + + PROVIDE int2_grad1_u12_ao_num_1shot + int2_grad1_u12_ao = int2_grad1_u12_ao_num_1shot else @@ -222,8 +225,11 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p elseif(j1b_type .ge. 100) then - PROVIDE int2_grad1_u12_square_ao_num - int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num + ! PROVIDE int2_grad1_u12_square_ao_num + ! int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num + + PROVIDE int2_grad1_u12_square_ao_num_1shot + int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num_1shot else diff --git a/src/qmckl/README.md b/src/qmckl/README.md new file mode 100644 index 00000000..ebc4b089 --- /dev/null +++ b/src/qmckl/README.md @@ -0,0 +1,4 @@ +#QMCkl + +Info related to the QMCkl library. + diff --git a/src/qmckl/qmckl.F90 b/src/qmckl/qmckl.F90 new file mode 100644 index 00000000..94ac962f --- /dev/null +++ b/src/qmckl/qmckl.F90 @@ -0,0 +1 @@ +#include From b26f7e7fe68ce696a28d074d7f37ea70d36f051d Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Fri, 15 Sep 2023 11:37:11 +0200 Subject: [PATCH 20/20] Fixed Conflict --- .github/workflows/configuration.yml | 2 +- RELEASE_NOTES.org | 5 +- config/gfortran_armpl.cfg | 2 +- config/ifort_2021_debug.cfg | 66 + etc/qp.rc | 13 +- external/ezfio | 2 +- external/irpf90 | 2 +- src/ao_two_e_ints/EZFIO.cfg | 6 + src/ao_two_e_ints/cholesky.irp.f | 516 ++++-- src/ao_two_e_ints/two_e_integrals.irp.f | 511 +++++- src/casscf_cipsi/50.casscf.bats | 49 + src/casscf_cipsi/EZFIO.cfg | 75 + src/casscf_cipsi/NEED | 5 + src/casscf_cipsi/README.rst | 5 + src/casscf_cipsi/bavard.irp.f | 6 + src/casscf_cipsi/bielec.irp.f | 155 ++ src/casscf_cipsi/bielec_natorb.irp.f | 369 +++++ src/casscf_cipsi/casscf.irp.f | 110 ++ src/casscf_cipsi/class.irp.f | 12 + src/casscf_cipsi/dav_sx_mat.irp.f | 45 + src/casscf_cipsi/densities.irp.f | 67 + src/casscf_cipsi/densities_peter.irp.f | 150 ++ src/casscf_cipsi/det_manip.irp.f | 125 ++ src/casscf_cipsi/driver_optorb.irp.f | 3 + src/casscf_cipsi/get_energy.irp.f | 51 + src/casscf_cipsi/grad_old.irp.f | 74 + src/casscf_cipsi/gradient.irp.f | 215 +++ src/casscf_cipsi/hessian.irp.f | 539 ++++++ src/casscf_cipsi/hessian_old.irp.f | 310 ++++ src/casscf_cipsi/mcscf_fock.irp.f | 80 + src/casscf_cipsi/natorb.irp.f | 231 +++ src/casscf_cipsi/neworbs.irp.f | 253 +++ src/casscf_cipsi/reorder_orb.irp.f | 70 + src/casscf_cipsi/save_energy.irp.f | 9 + src/casscf_cipsi/superci_dm.irp.f | 207 +++ src/casscf_cipsi/swap_orb.irp.f | 132 ++ src/casscf_cipsi/tot_en.irp.f | 101 ++ src/casscf_tc_bi/det_manip.irp.f | 125 ++ src/casscf_tc_bi/grad_dm.irp.f | 263 +++ src/casscf_tc_bi/grad_old.irp.f | 134 ++ src/casscf_tc_bi/gradient.irp.f | 94 ++ src/casscf_tc_bi/test_tc_casscf.irp.f | 252 +++ src/ccsd/ccsd_space_orb_sub.irp.f | 232 ++- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 1458 +++++++++++++++++ src/ccsd/ccsd_spin_orb_sub.irp.f | 4 +- src/ccsd/ccsd_t_space_orb_abc.irp.f | 2 +- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 21 +- src/cipsi/pt2_stoch_routines.irp.f | 2 +- src/cipsi_tc_bi_ortho/get_d0_good.irp.f | 8 +- src/cipsi_tc_bi_ortho/get_d1_good.irp.f | 36 +- src/cipsi_tc_bi_ortho/get_d2_good.irp.f | 28 +- src/cipsi_tc_bi_ortho/selection.irp.f | 35 +- src/cisd/EZFIO.cfg | 8 + src/cisd/NEED | 1 + src/cisd/lccsd.irp.f | 95 ++ src/cisd/lccsd_prov.irp.f | 50 + .../dav_diag_dressed_ext_rout.irp.f | 2 +- .../dav_double_dress_ext_rout.irp.f | 2 +- .../dav_dressed_ext_rout.irp.f | 2 +- src/dav_general_mat/dav_ext_rout.irp.f | 2 +- src/dav_general_mat/dav_general.irp.f | 2 +- src/davidson/diagonalization_h_dressed.irp.f | 2 +- .../diagonalization_hcsf_dressed.irp.f | 2 +- .../diagonalization_hs2_dressed.irp.f | 2 +- .../diagonalization_nonsym_h_dressed.irp.f | 2 +- src/davidson_keywords/usef.irp.f | 2 + src/determinants/density_matrix.irp.f | 4 +- src/determinants/dipole_moments.irp.f | 6 +- src/determinants/h_apply.irp.f | 2 +- src/determinants/s2.irp.f | 2 +- src/ezfio_files/NEED | 1 + src/ezfio_files/ezfio.irp.f | 9 +- src/fci_tc_bi/selectors.irp.f | 2 + src/hartree_fock/fock_matrix_hf.irp.f | 90 +- src/mo_optimization/EZFIO.cfg | 4 +- ...optimization.irp.f => cipsi_orb_opt.irp.f} | 2 +- src/mo_optimization/first_gradient_opt.irp.f | 2 +- .../state_average_energy.irp.f | 17 +- src/mo_two_e_ints/cholesky.irp.f | 60 +- src/mo_two_e_ints/integrals_3_index.irp.f | 14 +- src/mo_two_e_ints/mo_bi_integrals.irp.f | 113 +- src/tc_bi_ortho/h_mat_triple.irp.f | 391 +++++ src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f | 93 ++ src/tc_bi_ortho/normal_ordered_old.irp.f | 10 +- src/tc_bi_ortho/print_tc_dump.irp.f | 10 +- src/tc_bi_ortho/slater_tc_3e_slow.irp.f | 2 +- src/tc_bi_ortho/slater_tc_opt.irp.f | 31 +- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 19 +- src/tc_bi_ortho/tc_natorb.irp.f | 17 +- src/tc_bi_ortho/tc_prop.irp.f | 8 +- src/tc_bi_ortho/test_normal_order.irp.f | 43 +- src/tc_bi_ortho/test_s2_tc.irp.f | 100 +- src/tc_bi_ortho/test_tc_fock.irp.f | 9 +- src/tc_bi_ortho/test_tc_two_rdm.irp.f | 67 + src/tc_bi_ortho/two_rdm_naive.irp.f | 166 ++ src/tc_keywords/EZFIO.cfg | 6 + src/tc_scf/molden_lr_mos.irp.f | 14 +- src/tc_scf/routines_rotates.irp.f | 16 +- src/tools/molden.irp.f | 4 +- src/tools/print_ci_vectors.irp.f | 2 +- src/two_body_rdm/state_av_act_2rdm.irp.f | 10 +- .../state_av_full_orb_2_rdm.irp.f | 320 ++-- src/two_body_rdm/two_e_dm_mo.irp.f | 5 +- src/utils/block_diag_degen_core.irp.f | 244 +++ src/utils/c_functions.f90 | 7 +- src/utils/fast_mkl.c | 5 + src/utils/format_w_error.irp.f | 2 +- src/utils/fortran_mmap.c | 15 +- src/utils/linear_algebra.irp.f | 12 +- src/utils/map_functions.irp.f | 4 + src/utils/memory.irp.f | 49 +- src/utils/mmap.f90 | 22 +- src/utils_cc/mo_integrals_cc.irp.f | 427 ++++- .../rotation_matrix_iterative.irp.f | 4 +- .../trust_region_optimal_lambda.irp.f | 6 +- 115 files changed, 9231 insertions(+), 678 deletions(-) create mode 100644 config/ifort_2021_debug.cfg create mode 100644 src/casscf_cipsi/50.casscf.bats create mode 100644 src/casscf_cipsi/EZFIO.cfg create mode 100644 src/casscf_cipsi/NEED create mode 100644 src/casscf_cipsi/README.rst create mode 100644 src/casscf_cipsi/bavard.irp.f create mode 100644 src/casscf_cipsi/bielec.irp.f create mode 100644 src/casscf_cipsi/bielec_natorb.irp.f create mode 100644 src/casscf_cipsi/casscf.irp.f create mode 100644 src/casscf_cipsi/class.irp.f create mode 100644 src/casscf_cipsi/dav_sx_mat.irp.f create mode 100644 src/casscf_cipsi/densities.irp.f create mode 100644 src/casscf_cipsi/densities_peter.irp.f create mode 100644 src/casscf_cipsi/det_manip.irp.f create mode 100644 src/casscf_cipsi/driver_optorb.irp.f create mode 100644 src/casscf_cipsi/get_energy.irp.f create mode 100644 src/casscf_cipsi/grad_old.irp.f create mode 100644 src/casscf_cipsi/gradient.irp.f create mode 100644 src/casscf_cipsi/hessian.irp.f create mode 100644 src/casscf_cipsi/hessian_old.irp.f create mode 100644 src/casscf_cipsi/mcscf_fock.irp.f create mode 100644 src/casscf_cipsi/natorb.irp.f create mode 100644 src/casscf_cipsi/neworbs.irp.f create mode 100644 src/casscf_cipsi/reorder_orb.irp.f create mode 100644 src/casscf_cipsi/save_energy.irp.f create mode 100644 src/casscf_cipsi/superci_dm.irp.f create mode 100644 src/casscf_cipsi/swap_orb.irp.f create mode 100644 src/casscf_cipsi/tot_en.irp.f create mode 100644 src/casscf_tc_bi/det_manip.irp.f create mode 100644 src/casscf_tc_bi/grad_dm.irp.f create mode 100644 src/casscf_tc_bi/grad_old.irp.f create mode 100644 src/casscf_tc_bi/gradient.irp.f create mode 100644 src/casscf_tc_bi/test_tc_casscf.irp.f create mode 100644 src/ccsd/ccsd_space_orb_sub_chol.irp.f create mode 100644 src/cisd/lccsd.irp.f create mode 100644 src/cisd/lccsd_prov.irp.f rename src/mo_optimization/{optimization.irp.f => cipsi_orb_opt.irp.f} (96%) create mode 100644 src/tc_bi_ortho/h_mat_triple.irp.f create mode 100644 src/tc_bi_ortho/test_tc_two_rdm.irp.f create mode 100644 src/tc_bi_ortho/two_rdm_naive.irp.f create mode 100644 src/utils/block_diag_degen_core.irp.f create mode 100644 src/utils/fast_mkl.c diff --git a/.github/workflows/configuration.yml b/.github/workflows/configuration.yml index ba37f5dd..178b394e 100644 --- a/.github/workflows/configuration.yml +++ b/.github/workflows/configuration.yml @@ -22,7 +22,7 @@ jobs: - uses: actions/checkout@v3 - name: Install dependencies run: | - sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config hdf5 + sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config libhdf5-dev - name: zlib run: | ./configure -i zlib || echo OK diff --git a/RELEASE_NOTES.org b/RELEASE_NOTES.org index 3bd02898..a0e6d104 100644 --- a/RELEASE_NOTES.org +++ b/RELEASE_NOTES.org @@ -10,7 +10,8 @@ - Added many types of integrals - Accelerated four-index transformation - Added transcorrelated SCF - - Added transcorrelated CIPSI + - Added bi-orthonormal transcorrelated CIPSI + - Added Cholesky decomposition of AO integrals - Added CCSD and CCSD(T) - Added MO localization - Changed coupling parameters for ROHF @@ -20,7 +21,7 @@ - Removed cryptokit dependency in OCaml - Using now standard convention in RDM - Added molecular properties - - [ ] Added GTOs with complex exponent + - Added GTOs with complex exponent *** TODO: take from dev - Updated version of f77-zmq diff --git a/config/gfortran_armpl.cfg b/config/gfortran_armpl.cfg index fb5ee1cc..370e396e 100644 --- a/config/gfortran_armpl.cfg +++ b/config/gfortran_armpl.cfg @@ -14,7 +14,7 @@ # [COMMON] FC : gfortran -g -ffree-line-length-none -I . -fPIC -march=native -LAPACK_LIB : -larmpl_lp64 +LAPACK_LIB : -larmpl_lp64_mp IRPF90 : irpf90 IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED diff --git a/config/ifort_2021_debug.cfg b/config/ifort_2021_debug.cfg new file mode 100644 index 00000000..d70b1465 --- /dev/null +++ b/config/ifort_2021_debug.cfg @@ -0,0 +1,66 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FC : -traceback +FCFLAGS : -msse4.2 -O2 -ip -ftz -g + + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -msse4.2 -O2 -ip -ftz + + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -msse4.2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -msse4.2 -check all -debug all -fpe-all=0 -implicitnone + + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/etc/qp.rc b/etc/qp.rc index 9eec4570..d316faf5 100644 --- a/etc/qp.rc +++ b/etc/qp.rc @@ -188,7 +188,18 @@ _qp_Complete() ;; esac;; set_file) - COMPREPLY=( $(compgen -W "$(for i in */ $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) ) + # Array to store directory names + dirs="" + + # Find directories containing "ezfio/.version" file recursively + for i in $(find . -name ezfio | sed 's/ezfio$/.version/') + do + dir_name=${i%/.version} # Remove the ".version" suffix + dir_name=${dir_name#./} # Remove the leading "./" + dirs+="./$dir_name " + done + + COMPREPLY=( $(compgen -W "$dirs" -- ${cur} ) ) return 0 ;; plugins) diff --git a/external/ezfio b/external/ezfio index 0520b5e2..ed1df9f3 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit 0520b5e2cf70e2451c37ce5b7f2f64f6d2e5e956 +Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c diff --git a/external/irpf90 b/external/irpf90 index 0007f72f..33ca5e10 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 +Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index 9f523fca..9c017813 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -4,6 +4,12 @@ doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None +[io_ao_cholesky] +type: Disk_access +doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None + [ao_integrals_threshold] type: Threshold doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 77eb6ddc..2977f0f4 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -1,121 +1,3 @@ -BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] - implicit none - BEGIN_DOC - ! Number of Cholesky vectors in AO basis - END_DOC - - cholesky_ao_num_guess = ao_num*ao_num -END_PROVIDER - - BEGIN_PROVIDER [ integer, cholesky_ao_num ] -&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, cholesky_ao_num_guess) ] - use mmap_module - implicit none - BEGIN_DOC - ! Cholesky vectors in AO basis: (ik|a): - ! = (ik|jl) = sum_a (ik|a).(a|jl) - END_DOC - - type(c_ptr) :: ptr - integer :: fd, i,j,k,l,m,rank - double precision, pointer :: ao_integrals(:,:,:,:) - double precision, external :: ao_two_e_integral - - ! Store AO integrals in a memory mapped file - call mmap(trim(ezfio_work_dir)//'ao_integrals', & - (/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), & - 8, fd, .False., ptr) - call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/)) - - print*, 'Providing the AO integrals (Cholesky)' - call wall_time(wall_1) - call cpu_time(cpu_1) - - ao_integrals = 0.d0 - - double precision :: integral, cpu_1, cpu_2, wall_1, wall_2 - logical, external :: ao_two_e_integral_zero - double precision, external :: get_ao_two_e_integral - - if (read_ao_two_e_integrals) then - PROVIDE ao_two_e_integrals_in_map - - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) - do m=0,9 - do l=1+m,ao_num,10 - !$OMP DO SCHEDULE(dynamic) - do j=1,ao_num - do k=1,ao_num - do i=1,ao_num - if (ao_two_e_integral_zero(i,j,k,l)) cycle - integral = get_ao_two_e_integral(i,j,k,l, ao_integrals_map) - ao_integrals(i,k,j,l) = integral - enddo - enddo - enddo - !$OMP END DO NOWAIT - enddo - !$OMP MASTER - call wall_time(wall_2) - print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 - !$OMP END MASTER - enddo - !$OMP END PARALLEL - - else - - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) - do m=0,9 - do l=1+m,ao_num,10 - !$OMP DO SCHEDULE(dynamic) - do j=1,l - do k=1,ao_num - do i=1,min(k,j) - if (ao_two_e_integral_zero(i,j,k,l)) cycle - integral = ao_two_e_integral(i,k,j,l) - ao_integrals(i,k,j,l) = integral - ao_integrals(k,i,j,l) = integral - ao_integrals(i,k,l,j) = integral - ao_integrals(k,i,l,j) = integral - ao_integrals(j,l,i,k) = integral - ao_integrals(j,l,k,i) = integral - ao_integrals(l,j,i,k) = integral - ao_integrals(l,j,k,i) = integral - enddo - enddo - enddo - !$OMP END DO NOWAIT - enddo - !$OMP MASTER - call wall_time(wall_2) - print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 - !$OMP END MASTER - enddo - !$OMP END PARALLEL - - call wall_time(wall_2) - call cpu_time(cpu_2) - print*, 'AO integrals provided:' - print*, ' cpu time :',cpu_2 - cpu_1, 's' - print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' - - endif - - ! Call Lapack - cholesky_ao_num = cholesky_ao_num_guess - call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) - print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' - - ! Remove mmap - double precision, external :: getUnitAndOpen - call munmap( & - (/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), & - 8, fd, ptr) - open(unit=99,file=trim(ezfio_work_dir)//'ao_integrals') - close(99, status='delete') - -END_PROVIDER - BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ] implicit none BEGIN_DOC @@ -131,3 +13,401 @@ BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, enddo END_PROVIDER + + BEGIN_PROVIDER [ integer, cholesky_ao_num ] +&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, 1) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in AO basis: (ik|a): + ! = (ik|jl) = sum_a (ik|a).(a|jl) + ! + ! Last dimension of cholesky_ao is cholesky_ao_num + END_DOC + + integer :: rank, ndim + double precision :: tau + double precision, pointer :: L(:,:), L_old(:,:) + + + double precision :: s + double precision, parameter :: dscale = 1.d0 + + double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) + integer, allocatable :: Lset(:), Dset(:), addr(:,:) + logical, allocatable :: computed(:) + + integer :: i,j,k,m,p,q, qj, dj, p2, q2 + integer :: N, np, nq + + double precision :: Dmax, Dmin, Qmax, f + double precision, external :: get_ao_two_e_integral + logical, external :: ao_two_e_integral_zero + + double precision, external :: ao_two_e_integral + integer :: block_size, iblock, ierr + + double precision :: mem + double precision, external :: memory_of_double, memory_of_int + + integer, external :: getUnitAndOpen + integer :: iunit + + ndim = ao_num*ao_num + deallocate(cholesky_ao) + + if (read_ao_cholesky) then + print *, 'Reading Cholesky vectors from disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'R') + read(iunit) rank + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) + read(iunit) cholesky_ao + close(iunit) + cholesky_ao_num = rank + + else + + PROVIDE nucl_coord + + if (do_direct_integrals) then + if (ao_two_e_integral(1,1,1,1) < huge(1.d0)) then + ! Trigger providers inside ao_two_e_integral + continue + endif + else + PROVIDE ao_two_e_integrals_in_map + endif + + tau = ao_cholesky_threshold + + mem = 6.d0 * memory_of_double(ndim) + 6.d0 * memory_of_int(ndim) + call check_mem(mem, irp_here) + + call print_memory_usage() + + allocate(L(ndim,1)) + + print *, '' + print *, 'Cholesky decomposition of AO integrals' + print *, '======================================' + print *, '' + print *, '============ =============' + print *, ' Rank Threshold' + print *, '============ =============' + + + rank = 0 + + allocate( D(ndim), Lset(ndim), Dset(ndim) ) + allocate( addr(3,ndim) ) + + ! 1. + k=0 + do j=1,ao_num + do i=1,ao_num + k = k+1 + addr(1,k) = i + addr(2,k) = j + addr(3,k) = (i-1)*ao_num + j + enddo + enddo + + if (do_direct_integrals) then + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) + do i=1,ndim + D(i) = ao_two_e_integral(addr(1,i), addr(2,i), & + addr(1,i), addr(2,i)) + enddo + !$OMP END PARALLEL DO + else + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) + do i=1,ndim + D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & + addr(2,i), addr(2,i), & + ao_integrals_map) + enddo + !$OMP END PARALLEL DO + endif + + Dmax = maxval(D) + + ! 2. + np=0 + do p=1,ndim + if ( dscale*dscale*Dmax*D(p) > tau*tau ) then + np = np+1 + Lset(np) = p + endif + enddo + + ! 3. + N = 0 + + ! 4. + i = 0 + + ! 5. + do while ( (Dmax > tau).and.(rank < ndim) ) + ! a. + i = i+1 + + s = 0.01d0 + + ! Inrease s until the arrays fit in memory + do while (.True.) + + ! b. + Dmin = max(s*Dmax,tau) + + ! c. + nq=0 + do p=1,np + if ( D(Lset(p)) > Dmin ) then + nq = nq+1 + Dset(nq) = Lset(p) + endif + enddo + + call total_memory(mem) + mem = mem & + + np*memory_of_double(nq) &! Delta(np,nq) + + (rank+nq)* memory_of_double(ndim) &! L(ndim,rank+nq) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) + + if (mem > qp_max_mem) then + s = s*2.d0 + else + exit + endif + + if ((s > 1.d0).or.(nq == 0)) then + call print_memory_usage() + print *, 'Not enough memory. Reduce cholesky threshold' + stop -1 + endif + + enddo + + ! d., e. + block_size = max(N,24) + + L_old => L + allocate(L(ndim,rank+nq), stat=ierr) + if (ierr /= 0) then + call print_memory_usage() + print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' + stop -1 + endif + + !$OMP PARALLEL DO PRIVATE(k,j) + do k=1,rank + do j=1,ndim + L(j,k) = L_old(j,k) + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(L_old) + + allocate(Delta(np,nq), stat=ierr) + if (ierr /= 0) then + call print_memory_usage() + print *, irp_here, ': allocation failed : (Delta(np,nq))' + stop -1 + endif + + allocate(Ltmp_p(np,block_size), stat=ierr) + if (ierr /= 0) then + call print_memory_usage() + print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' + stop -1 + endif + + allocate(Ltmp_q(nq,block_size), stat=ierr) + if (ierr /= 0) then + call print_memory_usage() + print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' + stop -1 + endif + + + allocate(computed(nq)) + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) + + !$OMP DO + do q=1,nq + do j=1,np + Delta(j,q) = 0.d0 + enddo + computed(q) = .False. + enddo + !$OMP ENDDO NOWAIT + + !$OMP DO + do k=1,N + do p=1,np + Ltmp_p(p,k) = L(Lset(p),k) + enddo + do q=1,nq + Ltmp_q(q,k) = L(Dset(q),k) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP BARRIER + !$OMP END PARALLEL + + if (N>0) then + call dgemm('N','T', np, nq, N, -1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + endif + + ! f. + Qmax = D(Dset(1)) + do q=1,nq + Qmax = max(Qmax, D(Dset(q))) + enddo + + ! g. + + iblock = 0 + do j=1,nq + + if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit + ! i. + rank = N+j + + if (iblock == block_size) then + call dgemm('N','T',np,nq,block_size,-1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + iblock = 0 + endif + + ! ii. + do dj=1,nq + qj = Dset(dj) + if (D(qj) == Qmax) then + exit + endif + enddo + + L(1:ndim, rank) = 0.d0 + + if (.not.computed(dj)) then + m = dj + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(guided) + do k=np,1,-1 + if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)),& + addr(2,Lset(k)), addr(2,Dset(m)) ) ) then + if (do_direct_integrals) then + Delta(k,m) = Delta(k,m) + & + ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)),& + addr(1,Dset(m)), addr(2,Dset(m))) + else + Delta(k,m) = Delta(k,m) + & + get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)),& + addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + endif + endif + enddo + !$OMP END PARALLEL DO + computed(dj) = .True. + endif + + iblock = iblock+1 + do p=1,np + Ltmp_p(p,iblock) = Delta(p,dj) + enddo + + ! iv. + if (iblock > 1) then + call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0,& + Ltmp_p(1,iblock), 1) + endif + + ! iii. + f = 1.d0/dsqrt(Qmax) + + !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) + !$OMP DO + do p=1,np + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) * f + L(Lset(p), rank) = Ltmp_p(p,iblock) + D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,iblock) * Ltmp_p(p,iblock) + enddo + !$OMP END DO + + !$OMP DO + do q=1,nq + Ltmp_q(q,iblock) = L(Dset(q), rank) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + Qmax = D(Dset(1)) + do q=1,nq + Qmax = max(Qmax, D(Dset(q))) + enddo + + enddo + + print '(I10, 4X, ES12.3)', rank, Qmax + + deallocate(computed) + deallocate(Delta) + deallocate(Ltmp_p) + deallocate(Ltmp_q) + + ! i. + N = rank + + ! j. + Dmax = D(Lset(1)) + do p=1,np + Dmax = max(Dmax, D(Lset(p))) + enddo + + np=0 + do p=1,ndim + if ( dscale*dscale*Dmax*D(p) > tau*tau ) then + np = np+1 + Lset(np) = p + endif + enddo + + enddo + + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) + if (ierr /= 0) then + call print_memory_usage() + print *, irp_here, ': Allocation failed' + stop -1 + endif + !$OMP PARALLEL DO PRIVATE(k) + do k=1,rank + call dcopy(ndim, L(1,k), 1, cholesky_ao(1,1,k), 1) + enddo + !$OMP END PARALLEL DO + deallocate(L) + cholesky_ao_num = rank + + print *, '============ =============' + print *, '' + + if (write_ao_cholesky) then + print *, 'Writing Cholesky vectors to disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'W') + write(iunit) rank + write(iunit) cholesky_ao + close(iunit) + call ezfio_set_ao_two_e_ints_io_ao_cholesky('Read') + endif + + endif + + print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' + print *, '' + +END_PROVIDER + diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 85ff5bcf..148ebb62 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -460,7 +460,7 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz, (ao_num, ao_num) !$OMP PARALLEL DO PRIVATE(i,k) & !$OMP DEFAULT(NONE) & !$OMP SHARED (ao_num,ao_two_e_integral_schwartz) & - !$OMP SCHEDULE(dynamic) + !$OMP SCHEDULE(guided) do i=1,ao_num do k=1,i ao_two_e_integral_schwartz(i,k) = dsqrt(ao_two_e_integral(i,i,k,k)) @@ -951,7 +951,7 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt double precision :: X(0:max_dim) double precision :: Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y - integer :: nx, ix,iy,ny + integer :: nx, ix,iy,ny,ib ASSERT (a>2) !DIR$ LOOP COUNT(8) @@ -974,8 +974,43 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt enddo ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_10,2,d,nd) - call multiply_poly_c2(X,nx,B_10,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) + if (nx >= 0) then + select case (nx) + case (0) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(2) * X(0) + + case (1) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(2) * X(1) + + case (2) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(0) * X(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(1) * X(2) + B_10(2) * X(1) + d(4) = d(4) + B_10(2) * X(2) + + case default + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_10(0) * X(ib) + B_10(1) * X(ib-1) + B_10(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_10(1) * X(nx) + B_10(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_10(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif nx = nd !DIR$ LOOP COUNT(8) @@ -996,9 +1031,47 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt X(ix) *= c enddo endif + ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_00,2,d,nd) - call multiply_poly_c2(X,nx,B_00,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(2) * X(0) + + case (1) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(2) * X(1) + + case (2) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(0) * X(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(1) * X(2) + B_00(2) * X(1) + d(4) = d(4) + B_00(2) * X(2) + + case default + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_00(0) * X(ib) + B_00(1) * X(ib-1) + B_00(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_00(1) * X(nx) + B_00(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_00(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + endif ny=0 @@ -1016,8 +1089,45 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt endif ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,C_00,2,d,nd) - call multiply_poly_c2(Y,ny,C_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(2) * Y(0) + + case (1) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(2) * Y(1) + + case (2) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(0) * Y(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(1) * Y(2) + C_00(2) * Y(1) + d(4) = d(4) + C_00(2) * Y(2) + + case default + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + C_00(0) * Y(ib) + C_00(1) * Y(ib-1) + C_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + C_00(1) * Y(ny) + C_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + C_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + end recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) @@ -1034,7 +1144,7 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) double precision :: X(0:max_dim) double precision :: Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y - integer :: nx, ix,iy,ny + integer :: nx, ix,iy,ny,ib if( (c<0).or.(nd<0) )then nd = -1 @@ -1056,8 +1166,44 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) endif ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_00,2,d,nd) - call multiply_poly_c2(X,nx,B_00,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(2) * X(0) + + case (1) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(2) * X(1) + + case (2) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(0) * X(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(1) * X(2) + B_00(2) * X(1) + d(4) = d(4) + B_00(2) * X(2) + + case default + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_00(0) * X(ib) + B_00(1) * X(ib-1) + B_00(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_00(1) * X(nx) + B_00(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_00(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif ny=0 @@ -1068,8 +1214,44 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,C_00,2,d,nd) - call multiply_poly_c2(Y,ny,C_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(2) * Y(0) + + case (1) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(2) * Y(1) + + case (2) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(0) * Y(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(1) * Y(2) + C_00(2) * Y(1) + d(4) = d(4) + C_00(2) * Y(2) + + case default + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + C_00(0) * Y(ib) + C_00(1) * Y(ib-1) + C_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + C_00(1) * Y(ny) + C_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + C_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif end @@ -1087,7 +1269,7 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) double precision :: X(0:max_dim) double precision :: Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y - integer :: nx, ix,iy,ny + integer :: nx, ix,iy,ny,ib !DIR$ LOOP COUNT(8) do ix=0,n_pt_in @@ -1097,8 +1279,44 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in) ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_10,2,d,nd) - call multiply_poly_c2(X,nx,B_10,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(2) * X(0) + + case (1) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(2) * X(1) + + case (2) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(0) * X(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(1) * X(2) + B_10(2) * X(1) + d(4) = d(4) + B_10(2) * X(2) + + case default + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_10(0) * X(ib) + B_10(1) * X(ib-1) + B_10(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_10(1) * X(nx) + B_10(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_10(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif nx = nd !DIR$ LOOP COUNT(8) @@ -1117,8 +1335,44 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) endif ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_00,2,d,nd) - call multiply_poly_c2(X,nx,B_00,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(2) * X(0) + + case (1) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(2) * X(1) + + case (2) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(0) * X(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(1) * X(2) + B_00(2) * X(1) + d(4) = d(4) + B_00(2) * X(2) + + case default + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_00(0) * X(ib) + B_00(1) * X(ib-1) + B_00(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_00(1) * X(nx) + B_00(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_00(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif ny=0 !DIR$ LOOP COUNT(8) @@ -1129,8 +1383,45 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) call I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,C_00,2,d,nd) - call multiply_poly_c2(Y,ny,C_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(2) * Y(0) + + case (1) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(2) * Y(1) + + case (2) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(0) * Y(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(1) * Y(2) + C_00(2) * Y(1) + d(4) = d(4) + C_00(2) * Y(2) + + case default + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + C_00(0) * Y(ib) + C_00(1) * Y(ib-1) + C_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + C_00(1) * Y(ny) + C_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + C_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + end recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) @@ -1147,7 +1438,7 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) integer :: nx, ix,ny double precision :: X(0:max_dim),Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y - integer :: i + integer :: i, ib select case (c) case (0) @@ -1178,8 +1469,45 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) Y(2) = D_00(2) ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,D_00,2,d,nd) - call multiply_poly_c2(Y,ny,D_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(2) * Y(0) + + case (1) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(2) * Y(1) + + case (2) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(0) * Y(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(1) * Y(2) + D_00(2) * Y(1) + d(4) = d(4) + D_00(2) * Y(2) + + case default + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + D_00(0) * Y(ib) + D_00(1) * Y(ib-1) + D_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + D_00(1) * Y(ny) + D_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + D_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + return @@ -1198,8 +1526,44 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) enddo ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_01,2,d,nd) - call multiply_poly_c2(X,nx,B_01,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_01,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(1) * X(0) + d(2) = d(2) + B_01(2) * X(0) + + case (1) + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(0) * X(1) + B_01(1) * X(0) + d(2) = d(2) + B_01(1) * X(1) + B_01(2) * X(0) + d(3) = d(3) + B_01(2) * X(1) + + case (2) + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(0) * X(1) + B_01(1) * X(0) + d(2) = d(2) + B_01(0) * X(2) + B_01(1) * X(1) + B_01(2) * X(0) + d(3) = d(3) + B_01(1) * X(2) + B_01(2) * X(1) + d(4) = d(4) + B_01(2) * X(2) + + case default + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(0) * X(1) + B_01(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_01(0) * X(ib) + B_01(1) * X(ib-1) + B_01(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_01(1) * X(nx) + B_01(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_01(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif ny = 0 !DIR$ LOOP COUNT(6) @@ -1209,8 +1573,45 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim) ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,D_00,2,d,nd) - call multiply_poly_c2(Y,ny,D_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) + + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(2) * Y(0) + + case (1) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(2) * Y(1) + + case (2) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(0) * Y(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(1) * Y(2) + D_00(2) * Y(1) + d(4) = d(4) + D_00(2) * Y(2) + + case default + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + D_00(0) * Y(ib) + D_00(1) * Y(ib-1) + D_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + D_00(1) * Y(ny) + D_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + D_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif end select end @@ -1232,7 +1633,8 @@ subroutine compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value) logical, external :: ao_two_e_integral_zero integer :: i,k - double precision :: ao_two_e_integral,cpu_1,cpu_2, wall_1, wall_2 + double precision, external :: ao_two_e_integral + double precision :: cpu_1,cpu_2, wall_1, wall_2 double precision :: integral, wall_0 double precision :: thr integer :: kk, m, j1, i1 @@ -1299,3 +1701,56 @@ subroutine multiply_poly_local(b,nb,c,nc,d,nd) end +!DIR$ FORCEINLINE +subroutine multiply_poly_c2_inline_2e(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:2) + double precision, intent(inout) :: d(0:nb+2) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + select case (nb) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(1) * b(0) + d(2) = d(2) + c(2) * b(0) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(2) * b(1) + + case (2) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(1) * b(2) + c(2) * b(1) + d(4) = d(4) + c(2) * b(2) + + case default + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + do ib=2,nb + d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + c(2) * b(ib-2) + enddo + d(nb+1) = d(nb+1) + c(1) * b(nb) + c(2) * b(nb-1) + d(nb+2) = d(nb+2) + c(2) * b(nb) + + end select + + do nd = nb+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + diff --git a/src/casscf_cipsi/50.casscf.bats b/src/casscf_cipsi/50.casscf.bats new file mode 100644 index 00000000..a0db725d --- /dev/null +++ b/src/casscf_cipsi/50.casscf.bats @@ -0,0 +1,49 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + + +function run_stoch() { + thresh=$2 + test_exe casscf || skip + qp set perturbation do_pt2 True + qp set determinants n_det_max $3 + qp set davidson threshold_davidson 1.e-10 + qp set davidson n_states_diag 4 + qp run casscf | tee casscf.out + energy1="$(ezfio get casscf energy_pt2 | tr '[]' ' ' | cut -d ',' -f 1)" + eq $energy1 $1 $thresh +} + +@test "F2" { # 18.0198s + rm -rf f2_casscf + qp_create_ezfio -b aug-cc-pvdz ../input/f2.zmt -o f2_casscf + qp set_file f2_casscf + qp run scf + qp set_mo_class --core="[1-6,8-9]" --act="[7,10]" --virt="[11-46]" + run_stoch -198.773366970 1.e-4 100000 +} + +@test "N2" { # 18.0198s + rm -rf n2_casscf + qp_create_ezfio -b aug-cc-pvdz ../input/n2.xyz -o n2_casscf + qp set_file n2_casscf + qp run scf + qp set_mo_class --core="[1-4]" --act="[5-10]" --virt="[11-46]" + run_stoch -109.0961643162 1.e-4 100000 +} + +@test "N2_stretched" { + rm -rf n2_stretched_casscf + qp_create_ezfio -b aug-cc-pvdz -m 7 ../input/n2_stretched.xyz -o n2_stretched_casscf + qp set_file n2_stretched_casscf + qp run scf | tee scf.out + qp set_mo_class --core="[1-4]" --act="[5-10]" --virt="[11-46]" + qp set electrons elec_alpha_num 7 + qp set electrons elec_beta_num 7 + run_stoch -108.7860471300 1.e-4 100000 +# + +} + diff --git a/src/casscf_cipsi/EZFIO.cfg b/src/casscf_cipsi/EZFIO.cfg new file mode 100644 index 00000000..2a1f1926 --- /dev/null +++ b/src/casscf_cipsi/EZFIO.cfg @@ -0,0 +1,75 @@ +[energy] +type: double precision +doc: Calculated Selected |FCI| energy +interface: ezfio +size: (determinants.n_states) + +[energy_pt2] +type: double precision +doc: Calculated |FCI| energy + |PT2| +interface: ezfio +size: (determinants.n_states) + +[state_following_casscf] +type: logical +doc: If |true|, the CASSCF will try to follow the guess CI vector and orbitals +interface: ezfio,provider,ocaml +default: False + + +[diag_hess_cas] +type: logical +doc: If |true|, only the DIAGONAL part of the hessian is retained for the CASSCF +interface: ezfio,provider,ocaml +default: False + +[hess_cv_cv] +type: logical +doc: If |true|, the core-virtual - core-virtual part of the hessian is computed +interface: ezfio,provider,ocaml +default: True + + +[level_shift_casscf] +type: Positive_float +doc: Energy shift on the virtual MOs to improve SCF convergence +interface: ezfio,provider,ocaml +default: 0.005 + + +[fast_2rdm] +type: logical +doc: If true, the two-rdm are computed with a fast algo +interface: ezfio,provider,ocaml +default: True + +[criterion_casscf] +type: character*(32) +doc: choice of the criterion for the convergence of the casscf: can be energy or gradients or e_pt2 +interface: ezfio, provider, ocaml +default: e_pt2 + +[thresh_casscf] +type: Threshold +doc: Threshold on the convergence of the CASCF energy. +interface: ezfio,provider,ocaml +default: 1.e-06 + + +[pt2_min_casscf] +type: Threshold +doc: Minimum value of the pt2_max parameter for the CIPSI in the CASSCF iterations. +interface: ezfio,provider,ocaml +default: 1.e-04 + +[n_big_act_orb] +type: integer +doc: Number of active orbitals from which the active space is considered as large, and therefore pt2_min_casscf is activated. +interface: ezfio,provider,ocaml +default: 16 + +[adaptive_pt2_max] +type: logical +doc: If |true|, the pt2_max value in the CIPSI iterations will automatically adapt, otherwise it is fixed at the value given in the EZFIO folder +interface: ezfio,provider,ocaml +default: True diff --git a/src/casscf_cipsi/NEED b/src/casscf_cipsi/NEED new file mode 100644 index 00000000..dd91c7bd --- /dev/null +++ b/src/casscf_cipsi/NEED @@ -0,0 +1,5 @@ +cipsi +selectors_full +generators_cas +two_body_rdm +dav_general_mat diff --git a/src/casscf_cipsi/README.rst b/src/casscf_cipsi/README.rst new file mode 100644 index 00000000..08bfd95b --- /dev/null +++ b/src/casscf_cipsi/README.rst @@ -0,0 +1,5 @@ +====== +casscf +====== + +|CASSCF| program with the CIPSI algorithm. diff --git a/src/casscf_cipsi/bavard.irp.f b/src/casscf_cipsi/bavard.irp.f new file mode 100644 index 00000000..463c3ea4 --- /dev/null +++ b/src/casscf_cipsi/bavard.irp.f @@ -0,0 +1,6 @@ +! -*- F90 -*- +BEGIN_PROVIDER [logical, bavard] +! bavard=.true. + bavard=.false. +END_PROVIDER + diff --git a/src/casscf_cipsi/bielec.irp.f b/src/casscf_cipsi/bielec.irp.f new file mode 100644 index 00000000..0a44f994 --- /dev/null +++ b/src/casscf_cipsi/bielec.irp.f @@ -0,0 +1,155 @@ +BEGIN_PROVIDER [real*8, bielec_PQxx, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] + BEGIN_DOC + ! bielec_PQxx : integral (pq|xx) with p,q arbitrary, x core or active + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,ii,jj,p,q,i3,j3,t3,v3 + real*8 :: mo_two_e_integral + + bielec_PQxx(:,:,:,:) = 0.d0 + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,ii,j,jj,i3,j3) & + !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PQxx, & + !$OMP n_act_orb,mo_integrals_map,list_act) + + !$OMP DO + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do j=i,n_core_inact_orb + jj=list_core_inact(j) + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j),mo_integrals_map) + bielec_PQxx(:,:,j,i)=bielec_PQxx(:,:,i,j) + end do + do j=1,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j3),mo_integrals_map) + bielec_PQxx(:,:,j3,i)=bielec_PQxx(:,:,i,j3) + end do + end do + !$OMP END DO + + + !$OMP DO + do i=1,n_act_orb + ii=list_act(i) + i3=i+n_core_inact_orb + do j=i,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i3,j3),mo_integrals_map) + bielec_PQxx(:,:,j3,i3)=bielec_PQxx(:,:,i3,j3) + end do + end do + !$OMP END DO + + !$OMP END PARALLEL + +END_PROVIDER + + + +BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] + BEGIN_DOC + ! bielec_PxxQ : integral (px|xq) with p,q arbitrary, x core or active + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,ii,jj,p,q,i3,j3,t3,v3 + double precision, allocatable :: integrals_array(:,:) + real*8 :: mo_two_e_integral + + PROVIDE mo_two_e_integrals_in_map + bielec_PxxQ = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,ii,j,jj,i3,j3,integrals_array) & + !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PxxQ, & + !$OMP n_act_orb,mo_integrals_map,list_act) + + allocate(integrals_array(mo_num,mo_num)) + + !$OMP DO + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do j=i,n_core_inact_orb + jj=list_core_inact(j) + call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) + do q=1,mo_num + do p=1,mo_num + bielec_PxxQ(p,i,j,q)=integrals_array(p,q) + bielec_PxxQ(p,j,i,q)=integrals_array(q,p) + end do + end do + end do + do j=1,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) + do q=1,mo_num + do p=1,mo_num + bielec_PxxQ(p,i,j3,q)=integrals_array(p,q) + bielec_PxxQ(p,j3,i,q)=integrals_array(q,p) + end do + end do + end do + end do + !$OMP END DO + + + ! (ip|qj) + !$OMP DO + do i=1,n_act_orb + ii=list_act(i) + i3=i+n_core_inact_orb + do j=i,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) + do q=1,mo_num + do p=1,mo_num + bielec_PxxQ(p,i3,j3,q)=integrals_array(p,q) + bielec_PxxQ(p,j3,i3,q)=integrals_array(q,p) + end do + end do + end do + end do + !$OMP END DO + + deallocate(integrals_array) + !$OMP END PARALLEL + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, bielecCI, (n_act_orb,n_act_orb,n_act_orb, mo_num)] + BEGIN_DOC + ! bielecCI : integrals (tu|vp) with p arbitrary, tuv active + ! index p runs over the whole basis, t,u,v only over the active orbitals + END_DOC + implicit none + integer :: i,j,k,p,t,u,v + double precision, external :: mo_two_e_integral + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL DO DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,p,t,u,v) & + !$OMP SHARED(mo_num,n_act_orb,list_act,bielecCI) + do p=1,mo_num + do j=1,n_act_orb + u=list_act(j) + do k=1,n_act_orb + v=list_act(k) + do i=1,n_act_orb + t=list_act(i) + bielecCI(i,k,j,p) = mo_two_e_integral(t,u,v,p) + end do + end do + end do + end do + !$OMP END PARALLEL DO + +END_PROVIDER diff --git a/src/casscf_cipsi/bielec_natorb.irp.f b/src/casscf_cipsi/bielec_natorb.irp.f new file mode 100644 index 00000000..9968530c --- /dev/null +++ b/src/casscf_cipsi/bielec_natorb.irp.f @@ -0,0 +1,369 @@ + BEGIN_PROVIDER [real*8, bielec_PQxx_no, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] + BEGIN_DOC + ! integral (pq|xx) in the basis of natural MOs + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,k,l,t,u,p,q + double precision, allocatable :: f(:,:,:), d(:,:,:) + + + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,l,p,d,f) & + !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & + !$OMP bielec_PQxx_no,bielec_PQxx,list_act,natorbsCI) + + allocate (f(n_act_orb,mo_num,n_core_inact_act_orb), & + d(n_act_orb,mo_num,n_core_inact_act_orb)) + + !$OMP DO + do l=1,n_core_inact_act_orb + bielec_PQxx_no(:,:,:,l) = bielec_PQxx(:,:,:,l) + + do k=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + f(p,j,k)=bielec_PQxx_no(list_act(p),j,k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,mo_num*n_core_inact_act_orb,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + bielec_PQxx_no(list_act(p),j,k,l)=d(p,j,k) + end do + end do + + do j=1,mo_num + do p=1,n_act_orb + f(p,j,k)=bielec_PQxx_no(j,list_act(p),k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,mo_num*n_core_inact_act_orb,n_act_orb,1.d0, & + natorbsCI, n_act_orb, & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_core_inact_act_orb + do p=1,n_act_orb + do j=1,mo_num + bielec_PQxx_no(j,list_act(p),k,l)=d(p,j,k) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + deallocate (f,d) + + allocate (f(mo_num,mo_num,n_act_orb),d(mo_num,mo_num,n_act_orb)) + + !$OMP DO + do l=1,n_core_inact_act_orb + + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + f(j,k,p) = bielec_PQxx_no(j,k,n_core_inact_orb+p,l) + end do + end do + end do + call dgemm('N','N',mo_num*mo_num,n_act_orb,n_act_orb,1.d0, & + f, mo_num*mo_num, & + natorbsCI, n_act_orb, & + 0.d0, & + d, mo_num*mo_num) + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + bielec_PQxx_no(j,k,n_core_inact_orb+p,l)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + !$OMP BARRIER + + !$OMP DO + do l=1,n_core_inact_act_orb + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + f(j,k,p) = bielec_PQxx_no(j,k,l,n_core_inact_orb+p) + end do + end do + end do + call dgemm('N','N',mo_num*mo_num,n_act_orb,n_act_orb,1.d0, & + f, mo_num*mo_num, & + natorbsCI, n_act_orb, & + 0.d0, & + d, mo_num*mo_num) + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + bielec_PQxx_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO + + deallocate (f,d) + !$OMP END PARALLEL + +END_PROVIDER + + + +BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] + BEGIN_DOC + ! integral (px|xq) in the basis of natural MOs + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,k,l,t,u,p,q + double precision, allocatable :: f(:,:,:), d(:,:,:) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,l,p,d,f) & + !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & + !$OMP bielec_PxxQ_no,bielec_PxxQ,list_act,natorbsCI) + + + allocate (f(n_act_orb,n_core_inact_act_orb,n_core_inact_act_orb), & + d(n_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)) + + !$OMP DO + do j=1,mo_num + bielec_PxxQ_no(:,:,:,j) = bielec_PxxQ(:,:,:,j) + do l=1,n_core_inact_act_orb + do k=1,n_core_inact_act_orb + do p=1,n_act_orb + f(p,k,l) = bielec_PxxQ_no(list_act(p),k,l,j) + end do + end do + end do + call dgemm('T','N',n_act_orb,n_core_inact_act_orb**2,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do l=1,n_core_inact_act_orb + do k=1,n_core_inact_act_orb + do p=1,n_act_orb + bielec_PxxQ_no(list_act(p),k,l,j)=d(p,k,l) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + deallocate (f,d) + + allocate (f(n_act_orb,mo_num,n_core_inact_act_orb), & + d(n_act_orb,mo_num,n_core_inact_act_orb)) + + !$OMP DO + do k=1,mo_num + do l=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + f(p,j,l) = bielec_PxxQ_no(j,n_core_inact_orb+p,l,k) + end do + end do + end do + call dgemm('T','N',n_act_orb,mo_num*n_core_inact_act_orb,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do l=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + bielec_PxxQ_no(j,n_core_inact_orb+p,l,k)=d(p,j,l) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + deallocate(f,d) + + allocate(f(mo_num,n_core_inact_act_orb,n_act_orb), & + d(mo_num,n_core_inact_act_orb,n_act_orb) ) + + !$OMP DO + do k=1,mo_num + do p=1,n_act_orb + do l=1,n_core_inact_act_orb + do j=1,mo_num + f(j,l,p) = bielec_PxxQ_no(j,l,n_core_inact_orb+p,k) + end do + end do + end do + call dgemm('N','N',mo_num*n_core_inact_act_orb,n_act_orb,n_act_orb,1.d0, & + f, mo_num*n_core_inact_act_orb, & + natorbsCI, size(natorbsCI,1), & + 0.d0, & + d, mo_num*n_core_inact_act_orb) + do p=1,n_act_orb + do l=1,n_core_inact_act_orb + do j=1,mo_num + bielec_PxxQ_no(j,l,n_core_inact_orb+p,k)=d(j,l,p) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + !$OMP BARRIER + + !$OMP DO + do l=1,n_core_inact_act_orb + do p=1,n_act_orb + do k=1,n_core_inact_act_orb + do j=1,mo_num + f(j,k,p) = bielec_PxxQ_no(j,k,l,n_core_inact_orb+p) + end do + end do + end do + call dgemm('N','N',mo_num*n_core_inact_act_orb,n_act_orb,n_act_orb,1.d0, & + f, mo_num*n_core_inact_act_orb, & + natorbsCI, size(natorbsCI,1), & + 0.d0, & + d, mo_num*n_core_inact_act_orb) + do p=1,n_act_orb + do k=1,n_core_inact_act_orb + do j=1,mo_num + bielec_PxxQ_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO NOWAIT + deallocate(f,d) + !$OMP END PARALLEL + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, bielecCI_no, (n_act_orb,n_act_orb,n_act_orb, mo_num)] + BEGIN_DOC + ! integrals (tu|vp) in the basis of natural MOs + ! index p runs over the whole basis, t,u,v only over the active orbitals + END_DOC + implicit none + integer :: i,j,k,l,t,u,p,q + double precision, allocatable :: f(:,:,:), d(:,:,:) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,l,p,d,f) & + !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & + !$OMP bielecCI_no,bielecCI,list_act,natorbsCI) + + allocate (f(n_act_orb,n_act_orb,mo_num), & + d(n_act_orb,n_act_orb,mo_num)) + + !$OMP DO + do l=1,mo_num + bielecCI_no(:,:,:,l) = bielecCI(:,:,:,l) + do k=1,n_act_orb + do j=1,n_act_orb + do p=1,n_act_orb + f(p,j,k)=bielecCI_no(p,j,k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,n_act_orb*n_act_orb,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_act_orb + do j=1,n_act_orb + do p=1,n_act_orb + bielecCI_no(p,j,k,l)=d(p,j,k) + end do + end do + + do j=1,n_act_orb + do p=1,n_act_orb + f(p,j,k)=bielecCI_no(j,p,k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,n_act_orb*n_act_orb,n_act_orb,1.d0, & + natorbsCI, n_act_orb, & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_act_orb + do p=1,n_act_orb + do j=1,n_act_orb + bielecCI_no(j,p,k,l)=d(p,j,k) + end do + end do + end do + + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + f(j,k,p)=bielecCI_no(j,k,p,l) + end do + end do + end do + call dgemm('N','N',n_act_orb*n_act_orb,n_act_orb,n_act_orb,1.d0, & + f, n_act_orb*n_act_orb, & + natorbsCI, n_act_orb, & + 0.d0, & + d, n_act_orb*n_act_orb) + + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + bielecCI_no(j,k,p,l)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO + + !$OMP DO + do l=1,n_act_orb + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + f(j,k,p)=bielecCI_no(j,k,l,list_act(p)) + end do + end do + end do + call dgemm('N','N',n_act_orb*n_act_orb,n_act_orb,n_act_orb,1.d0, & + f, n_act_orb*n_act_orb, & + natorbsCI, n_act_orb, & + 0.d0, & + d, n_act_orb*n_act_orb) + + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + bielecCI_no(j,k,l,list_act(p))=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO + + deallocate(d,f) + !$OMP END PARALLEL + + +END_PROVIDER + diff --git a/src/casscf_cipsi/casscf.irp.f b/src/casscf_cipsi/casscf.irp.f new file mode 100644 index 00000000..02954ebf --- /dev/null +++ b/src/casscf_cipsi/casscf.irp.f @@ -0,0 +1,110 @@ +program casscf + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + call reorder_orbitals_for_casscf +! no_vvvv_integrals = .True. +! touch no_vvvv_integrals + n_det_max_full = 500 + touch n_det_max_full + pt2_relative_error = 0.04 + touch pt2_relative_error +! call run_stochastic_cipsi + call run +end + +subroutine run + implicit none + double precision :: energy_old, energy, pt2_max_before, ept2_before,delta_E + logical :: converged,state_following_casscf_cipsi_save + integer :: iteration + converged = .False. + + energy = 0.d0 + mo_label = "MCSCF" + iteration = 1 + state_following_casscf_cipsi_save = state_following_casscf + state_following_casscf = .True. + touch state_following_casscf + ept2_before = 0.d0 + if(adaptive_pt2_max)then + pt2_max = 0.005 + SOFT_TOUCH pt2_max + endif + do while (.not.converged) + print*,'pt2_max = ',pt2_max + call run_stochastic_cipsi + energy_old = energy + energy = eone+etwo+ecore + pt2_max_before = pt2_max + + call write_time(6) + call write_int(6,iteration,'CAS-SCF iteration = ') + call write_double(6,energy,'CAS-SCF energy = ') + if(n_states == 1)then + double precision :: E_PT2, PT2 + call ezfio_get_casscf_cipsi_energy_pt2(E_PT2) + call ezfio_get_casscf_cipsi_energy(PT2) + PT2 -= E_PT2 + call write_double(6,E_PT2,'E + PT2 energy = ') + call write_double(6,PT2,' PT2 = ') + call write_double(6,pt2_max,' PT2_MAX = ') + endif + + print*,'' + call write_double(6,norm_grad_vec2,'Norm of gradients = ') + call write_double(6,norm_grad_vec2_tab(1), ' Core-active gradients = ') + call write_double(6,norm_grad_vec2_tab(2), ' Core-virtual gradients = ') + call write_double(6,norm_grad_vec2_tab(3), ' Active-virtual gradients = ') + print*,'' + call write_double(6,energy_improvement, 'Predicted energy improvement = ') + + if(criterion_casscf == "energy")then + converged = dabs(energy_improvement) < thresh_scf + else if (criterion_casscf == "gradients")then + converged = norm_grad_vec2 < thresh_scf + else if (criterion_casscf == "e_pt2")then + delta_E = dabs(E_PT2 - ept2_before) + converged = dabs(delta_E) < thresh_casscf + endif + ept2_before = E_PT2 + if(adaptive_pt2_max)then + pt2_max = dabs(energy_improvement / (pt2_relative_error)) + pt2_max = min(pt2_max, pt2_max_before) + if(n_act_orb.ge.n_big_act_orb)then + pt2_max = max(pt2_max,pt2_min_casscf) + endif + endif + print*,'' + call write_double(6,pt2_max, 'PT2_MAX for next iteration = ') + + mo_coef = NewOrbs + mo_occ = occnum + call save_mos + if(.not.converged)then + iteration += 1 + if(norm_grad_vec2.gt.0.01d0)then + N_det = N_states + else + N_det = max(N_det/8 ,N_states) + endif + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + read_wf = .True. + call clear_mo_map + SOFT_TOUCH mo_coef N_det psi_det psi_coef + if(adaptive_pt2_max)then + SOFT_TOUCH pt2_max + endif + if(iteration .gt. 3)then + state_following_casscf = state_following_casscf_cipsi_save + soft_touch state_following_casscf + endif + endif + + enddo + +end + + diff --git a/src/casscf_cipsi/class.irp.f b/src/casscf_cipsi/class.irp.f new file mode 100644 index 00000000..7360a661 --- /dev/null +++ b/src/casscf_cipsi/class.irp.f @@ -0,0 +1,12 @@ + BEGIN_PROVIDER [ logical, do_only_1h1p ] +&BEGIN_PROVIDER [ logical, do_only_cas ] +&BEGIN_PROVIDER [ logical, do_ddci ] + implicit none + BEGIN_DOC + ! In the CAS case, all those are always false except do_only_cas + END_DOC + do_only_cas = .True. + do_only_1h1p = .False. + do_ddci = .False. +END_PROVIDER + diff --git a/src/casscf_cipsi/dav_sx_mat.irp.f b/src/casscf_cipsi/dav_sx_mat.irp.f new file mode 100644 index 00000000..1e24f0e2 --- /dev/null +++ b/src/casscf_cipsi/dav_sx_mat.irp.f @@ -0,0 +1,45 @@ + + +subroutine davidson_diag_sx_mat(N_st, u_in, energies) + implicit none + integer, intent(in) :: N_st + double precision, intent(out) :: u_in(nMonoEx+1,n_states_diag), energies(N_st) + integer :: i,j,N_st_tmp, dim_in, sze, N_st_diag_in + integer, allocatable :: list_guess(:) + double precision, allocatable :: H_jj(:) + logical :: converged + N_st_diag_in = n_states_diag + provide SXmatrix + sze = nMonoEx+1 + dim_in = sze + allocate(H_jj(sze), list_guess(sze)) + H_jj(1) = 0.d0 + N_st_tmp = 1 + list_guess(1) = 1 + do j = 2, nMonoEx+1 + H_jj(j) = SXmatrix(j,j) + if(H_jj(j).lt.0.d0)then + list_guess(N_st_tmp) = j + N_st_tmp += 1 + endif + enddo + if(N_st_tmp .ne. N_st)then + print*,'Pb in davidson_diag_sx_mat' + print*,'N_st_tmp .ne. N_st' + print*,N_st_tmp, N_st + stop + endif + print*,'Number of possibly interesting states = ',N_st + print*,'Corresponding diagonal elements of the SX matrix ' + u_in = 0.d0 + do i = 1, min(N_st, N_st_diag_in) +! do i = 1, N_st + j = list_guess(i) + print*,'i,j',i,j + print*,'SX(i,i) = ',H_jj(j) + u_in(j,i) = 1.d0 + enddo + call davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,converged,SXmatrix) + print*,'energies = ',energies + +end diff --git a/src/casscf_cipsi/densities.irp.f b/src/casscf_cipsi/densities.irp.f new file mode 100644 index 00000000..bebcf5d7 --- /dev/null +++ b/src/casscf_cipsi/densities.irp.f @@ -0,0 +1,67 @@ +use bitmasks + +BEGIN_PROVIDER [real*8, D0tu, (n_act_orb,n_act_orb) ] + implicit none + BEGIN_DOC + ! the first-order density matrix in the basis of the starting MOs. + ! matrix is state averaged. + END_DOC + integer :: t,u + + do u=1,n_act_orb + do t=1,n_act_orb + D0tu(t,u) = one_e_dm_mo_alpha_average( list_act(t), list_act(u) ) + & + one_e_dm_mo_beta_average ( list_act(t), list_act(u) ) + enddo + enddo + +END_PROVIDER + +BEGIN_PROVIDER [real*8, P0tuvx, (n_act_orb,n_act_orb,n_act_orb,n_act_orb) ] + BEGIN_DOC + ! The second-order density matrix in the basis of the starting MOs ONLY IN THE RANGE OF ACTIVE MOS + ! The values are state averaged + ! + ! We use the spin-free generators of mono-excitations + ! E_pq destroys q and creates p + ! D_pq = <0|E_pq|0> = D_qp + ! P_pqrs = 1/2 <0|E_pq E_rs - delta_qr E_ps|0> + ! + ! P0tuvx(p,q,r,s) = chemist notation : 1/2 <0|E_pq E_rs - delta_qr E_ps|0> + END_DOC + implicit none + integer :: t,u,v,x + integer :: tt,uu,vv,xx + integer :: mu,nu,istate,ispin,jspin,ihole,ipart,jhole,jpart + integer :: ierr + real*8 :: phase1,phase11,phase12,phase2,phase21,phase22 + integer :: nu1,nu2,nu11,nu12,nu21,nu22 + integer :: ierr1,ierr2,ierr11,ierr12,ierr21,ierr22 + real*8 :: cI_mu(N_states),term + integer(bit_kind), dimension(N_int,2) :: det_mu, det_mu_ex + integer(bit_kind), dimension(N_int,2) :: det_mu_ex1, det_mu_ex11, det_mu_ex12 + integer(bit_kind), dimension(N_int,2) :: det_mu_ex2, det_mu_ex21, det_mu_ex22 + + if (bavard) then + write(6,*) ' providing the 2 body RDM on the active part' + endif + + P0tuvx= 0.d0 + if(fast_2rdm)then + do istate=1,N_states + do x = 1, n_act_orb + do v = 1, n_act_orb + do u = 1, n_act_orb + do t = 1, n_act_orb + ! 1 1 2 2 1 2 1 2 + P0tuvx(t,u,v,x) = 0.5d0 * state_av_act_2_rdm_spin_trace_mo(t,v,u,x) + enddo + enddo + enddo + enddo + enddo + else + P0tuvx = P0tuvx_peter + endif + +END_PROVIDER diff --git a/src/casscf_cipsi/densities_peter.irp.f b/src/casscf_cipsi/densities_peter.irp.f new file mode 100644 index 00000000..ee7414da --- /dev/null +++ b/src/casscf_cipsi/densities_peter.irp.f @@ -0,0 +1,150 @@ +use bitmasks + +BEGIN_PROVIDER [real*8, P0tuvx_peter, (n_act_orb,n_act_orb,n_act_orb,n_act_orb) ] + BEGIN_DOC + ! the second-order density matrix in the basis of the starting MOs + ! matrices are state averaged + ! + ! we use the spin-free generators of mono-excitations + ! E_pq destroys q and creates p + ! D_pq = <0|E_pq|0> = D_qp + ! P_pqrs = 1/2 <0|E_pq E_rs - delta_qr E_ps|0> + ! + END_DOC + implicit none + integer :: t,u,v,x,mu,nu,istate,ispin,jspin,ihole,ipart,jhole,jpart + integer :: ierr + real*8 :: phase1,phase11,phase12,phase2,phase21,phase22 + integer :: nu1,nu2,nu11,nu12,nu21,nu22 + integer :: ierr1,ierr2,ierr11,ierr12,ierr21,ierr22 + real*8 :: cI_mu(N_states),term + integer(bit_kind), dimension(N_int,2) :: det_mu, det_mu_ex + integer(bit_kind), dimension(N_int,2) :: det_mu_ex1, det_mu_ex11, det_mu_ex12 + integer(bit_kind), dimension(N_int,2) :: det_mu_ex2, det_mu_ex21, det_mu_ex22 + + if (bavard) then + write(6,*) ' providing density matrix P0' + endif + + P0tuvx_peter = 0.d0 + + ! first loop: we apply E_tu, once for D_tu, once for -P_tvvu + do mu=1,n_det + call det_extract(det_mu,mu,N_int) + do istate=1,n_states + cI_mu(istate)=psi_coef(mu,istate) + end do + do t=1,n_act_orb + ipart=list_act(t) + do u=1,n_act_orb + ihole=list_act(u) + ! apply E_tu + call det_copy(det_mu,det_mu_ex1,N_int) + call det_copy(det_mu,det_mu_ex2,N_int) + call do_spinfree_mono_excitation(det_mu,det_mu_ex1 & + ,det_mu_ex2,nu1,nu2,ihole,ipart,phase1,phase2,ierr1,ierr2) + ! det_mu_ex1 is in the list + if (nu1.ne.-1) then + do istate=1,n_states + term=cI_mu(istate)*psi_coef(nu1,istate)*phase1 + ! and we fill P0_tvvu + do v=1,n_act_orb + P0tuvx_peter(t,v,v,u)-=term + end do + end do + end if + ! det_mu_ex2 is in the list + if (nu2.ne.-1) then + do istate=1,n_states + term=cI_mu(istate)*psi_coef(nu2,istate)*phase2 + do v=1,n_act_orb + P0tuvx_peter(t,v,v,u)-=term + end do + end do + end if + end do + end do + end do + ! now we do the double excitation E_tu E_vx |0> + do mu=1,n_det + call det_extract(det_mu,mu,N_int) + do istate=1,n_states + cI_mu(istate)=psi_coef(mu,istate) + end do + do v=1,n_act_orb + ipart=list_act(v) + do x=1,n_act_orb + ihole=list_act(x) + ! apply E_vx + call det_copy(det_mu,det_mu_ex1,N_int) + call det_copy(det_mu,det_mu_ex2,N_int) + call do_spinfree_mono_excitation(det_mu,det_mu_ex1 & + ,det_mu_ex2,nu1,nu2,ihole,ipart,phase1,phase2,ierr1,ierr2) + ! we apply E_tu to the first resultant determinant, thus E_tu E_vx |0> + if (ierr1.eq.1) then + do t=1,n_act_orb + jpart=list_act(t) + do u=1,n_act_orb + jhole=list_act(u) + call det_copy(det_mu_ex1,det_mu_ex11,N_int) + call det_copy(det_mu_ex1,det_mu_ex12,N_int) + call do_spinfree_mono_excitation(det_mu_ex1,det_mu_ex11& + ,det_mu_ex12,nu11,nu12,jhole,jpart,phase11,phase12,ierr11,ierr12) + if (nu11.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu11,istate)& + *phase11*phase1 + end do + end if + if (nu12.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu12,istate)& + *phase12*phase1 + end do + end if + end do + end do + end if + + ! we apply E_tu to the second resultant determinant + if (ierr2.eq.1) then + do t=1,n_act_orb + jpart=list_act(t) + do u=1,n_act_orb + jhole=list_act(u) + call det_copy(det_mu_ex2,det_mu_ex21,N_int) + call det_copy(det_mu_ex2,det_mu_ex22,N_int) + call do_spinfree_mono_excitation(det_mu_ex2,det_mu_ex21& + ,det_mu_ex22,nu21,nu22,jhole,jpart,phase21,phase22,ierr21,ierr22) + if (nu21.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu21,istate)& + *phase21*phase2 + end do + end if + if (nu22.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu22,istate)& + *phase22*phase2 + end do + end if + end do + end do + end if + + end do + end do + end do + + ! we average by just dividing by the number of states + do x=1,n_act_orb + do v=1,n_act_orb + do u=1,n_act_orb + do t=1,n_act_orb + P0tuvx_peter(t,u,v,x)*=0.5D0/dble(N_states) + end do + end do + end do + end do + +END_PROVIDER diff --git a/src/casscf_cipsi/det_manip.irp.f b/src/casscf_cipsi/det_manip.irp.f new file mode 100644 index 00000000..d8c309a4 --- /dev/null +++ b/src/casscf_cipsi/det_manip.irp.f @@ -0,0 +1,125 @@ +use bitmasks + +subroutine do_signed_mono_excitation(key1,key2,nu,ihole,ipart, & + ispin,phase,ierr) + BEGIN_DOC + ! we create the mono-excitation, and determine, if possible, + ! the phase and the number in the list of determinants + END_DOC + implicit none + integer(bit_kind) :: key1(N_int,2),key2(N_int,2) + integer(bit_kind), allocatable :: keytmp(:,:) + integer :: exc(0:2,2,2),ihole,ipart,ierr,nu,ispin + real*8 :: phase + logical :: found + allocate(keytmp(N_int,2)) + + nu=-1 + phase=1.D0 + ierr=0 + call det_copy(key1,key2,N_int) + ! write(6,*) ' key2 before excitation ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + call do_single_excitation(key2,ihole,ipart,ispin,ierr) + ! write(6,*) ' key2 after ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + ! write(6,*) ' excitation ',ihole,' -> ',ipart,' gives ierr = ',ierr + if (ierr.eq.1) then + ! excitation is possible + ! get the phase + call get_single_excitation(key1,key2,exc,phase,N_int) + ! get the number in the list + found=.false. + nu=0 + + !TODO BOTTLENECK + do while (.not.found) + nu+=1 + if (nu.gt.N_det) then + ! the determinant is possible, but not in the list + found=.true. + nu=-1 + else + call det_extract(keytmp,nu,N_int) + integer :: i,ii + found=.true. + do ii=1,2 + do i=1,N_int + if (keytmp(i,ii).ne.key2(i,ii)) then + found=.false. + end if + end do + end do + end if + end do + end if + ! + ! we found the new string, the phase, and possibly the number in the list + ! +end subroutine do_signed_mono_excitation + +subroutine det_extract(key,nu,Nint) + BEGIN_DOC + ! extract a determinant from the list of determinants + END_DOC + implicit none + integer :: ispin,i,nu,Nint + integer(bit_kind) :: key(Nint,2) + do ispin=1,2 + do i=1,Nint + key(i,ispin)=psi_det(i,ispin,nu) + end do + end do +end subroutine det_extract + +subroutine det_copy(key1,key2,Nint) + use bitmasks ! you need to include the bitmasks_module.f90 features + BEGIN_DOC + ! copy a determinant from key1 to key2 + END_DOC + implicit none + integer :: ispin,i,Nint + integer(bit_kind) :: key1(Nint,2),key2(Nint,2) + do ispin=1,2 + do i=1,Nint + key2(i,ispin)=key1(i,ispin) + end do + end do +end subroutine det_copy + +subroutine do_spinfree_mono_excitation(key_in,key_out1,key_out2 & + ,nu1,nu2,ihole,ipart,phase1,phase2,ierr,jerr) + BEGIN_DOC + ! we create the spin-free mono-excitation E_pq=(a^+_p a_q + a^+_P a_Q) + ! we may create two determinants as result + ! + END_DOC + implicit none + integer(bit_kind) :: key_in(N_int,2),key_out1(N_int,2) + integer(bit_kind) :: key_out2(N_int,2) + integer :: ihole,ipart,ierr,jerr,nu1,nu2 + integer :: ispin + real*8 :: phase1,phase2 + + ! write(6,*) ' applying E_',ipart,ihole,' on determinant ' + ! call print_det(key_in,N_int) + + ! spin alpha + ispin=1 + call do_signed_mono_excitation(key_in,key_out1,nu1,ihole & + ,ipart,ispin,phase1,ierr) + ! if (ierr.eq.1) then + ! write(6,*) ' 1 result is ',nu1,phase1 + ! call print_det(key_out1,N_int) + ! end if + ! spin beta + ispin=2 + call do_signed_mono_excitation(key_in,key_out2,nu2,ihole & + ,ipart,ispin,phase2,jerr) + ! if (jerr.eq.1) then + ! write(6,*) ' 2 result is ',nu2,phase2 + ! call print_det(key_out2,N_int) + ! end if + +end subroutine do_spinfree_mono_excitation + diff --git a/src/casscf_cipsi/driver_optorb.irp.f b/src/casscf_cipsi/driver_optorb.irp.f new file mode 100644 index 00000000..2e3e02dc --- /dev/null +++ b/src/casscf_cipsi/driver_optorb.irp.f @@ -0,0 +1,3 @@ +subroutine driver_optorb + implicit none +end diff --git a/src/casscf_cipsi/get_energy.irp.f b/src/casscf_cipsi/get_energy.irp.f new file mode 100644 index 00000000..cfb26b59 --- /dev/null +++ b/src/casscf_cipsi/get_energy.irp.f @@ -0,0 +1,51 @@ +program print_2rdm + implicit none + BEGIN_DOC + ! get the active part of the bielectronic energy on a given wave function. + ! + ! useful to test the active part of the spin trace 2 rdms + END_DOC +!no_vvvv_integrals = .True. + read_wf = .True. +!touch read_wf no_vvvv_integrals +!call routine +!call routine_bis + call print_grad +end + +subroutine print_grad + implicit none + integer :: i + do i = 1, nMonoEx + if(dabs(gradvec2(i)).gt.1.d-5)then + print*,'' + print*,i,gradvec2(i),excit(:,i) + endif + enddo +end + +subroutine routine + integer :: i,j,k,l + integer :: ii,jj,kk,ll + double precision :: accu(4),twodm,thr,act_twodm2,integral,get_two_e_integral + thr = 1.d-10 + + + accu = 0.d0 + do ll = 1, n_act_orb + l = list_act(ll) + do kk = 1, n_act_orb + k = list_act(kk) + do jj = 1, n_act_orb + j = list_act(jj) + do ii = 1, n_act_orb + i = list_act(ii) + integral = get_two_e_integral(i,j,k,l,mo_integrals_map) + accu(1) += state_av_act_2_rdm_spin_trace_mo(ii,jj,kk,ll) * integral + enddo + enddo + enddo + enddo + print*,'accu = ',accu(1) + +end diff --git a/src/casscf_cipsi/grad_old.irp.f b/src/casscf_cipsi/grad_old.irp.f new file mode 100644 index 00000000..d60a60c8 --- /dev/null +++ b/src/casscf_cipsi/grad_old.irp.f @@ -0,0 +1,74 @@ + +BEGIN_PROVIDER [real*8, gradvec_old, (nMonoEx)] + BEGIN_DOC + ! calculate the orbital gradient by hand, i.e. for + ! each determinant I we determine the string E_pq |I> (alpha and beta + ! separately) and generate + ! sum_I c_I is then the pq component of the orbital + ! gradient + ! E_pq = a^+_pa_q + a^+_Pa_Q + END_DOC + implicit none + integer :: ii,tt,aa,indx,ihole,ipart,istate + real*8 :: res + + do indx=1,nMonoEx + ihole=excit(1,indx) + ipart=excit(2,indx) + call calc_grad_elem(ihole,ipart,res) + gradvec_old(indx)=res + end do + + real*8 :: norm_grad + norm_grad=0.d0 + do indx=1,nMonoEx + norm_grad+=gradvec_old(indx)*gradvec_old(indx) + end do + norm_grad=sqrt(norm_grad) + if (bavard) then + write(6,*) + write(6,*) ' Norm of the orbital gradient (via <0|EH|0>) : ', norm_grad + write(6,*) + endif + + +END_PROVIDER + +subroutine calc_grad_elem(ihole,ipart,res) + BEGIN_DOC + ! eq 18 of Siegbahn et al, Physica Scripta 1980 + ! we calculate 2 , q=hole, p=particle + END_DOC + implicit none + integer :: ihole,ipart,mu,iii,ispin,ierr,nu,istate + real*8 :: res + integer(bit_kind), allocatable :: det_mu(:,:),det_mu_ex(:,:) + real*8 :: i_H_psi_array(N_states),phase + allocate(det_mu(N_int,2)) + allocate(det_mu_ex(N_int,2)) + + res=0.D0 + + do mu=1,n_det + ! get the string of the determinant + call det_extract(det_mu,mu,N_int) + do ispin=1,2 + ! do the monoexcitation on it + call det_copy(det_mu,det_mu_ex,N_int) + call do_signed_mono_excitation(det_mu,det_mu_ex,nu & + ,ihole,ipart,ispin,phase,ierr) + if (ierr.eq.1) then + call i_H_psi(det_mu_ex,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase + end do + end if + end do + end do + + ! state-averaged gradient + res*=2.D0/dble(N_states) + +end subroutine calc_grad_elem + diff --git a/src/casscf_cipsi/gradient.irp.f b/src/casscf_cipsi/gradient.irp.f new file mode 100644 index 00000000..a1c5e947 --- /dev/null +++ b/src/casscf_cipsi/gradient.irp.f @@ -0,0 +1,215 @@ +use bitmasks + +BEGIN_PROVIDER [ integer, nMonoEx ] + BEGIN_DOC + ! Number of single excitations + END_DOC + implicit none + nMonoEx=n_core_inact_orb*n_act_orb+n_core_inact_orb*n_virt_orb+n_act_orb*n_virt_orb +END_PROVIDER + + BEGIN_PROVIDER [integer, n_c_a_prov] +&BEGIN_PROVIDER [integer, n_c_v_prov] +&BEGIN_PROVIDER [integer, n_a_v_prov] + implicit none + n_c_a_prov = n_core_inact_orb * n_act_orb + n_c_v_prov = n_core_inact_orb * n_virt_orb + n_a_v_prov = n_act_orb * n_virt_orb + END_PROVIDER + + BEGIN_PROVIDER [integer, excit, (2,nMonoEx)] +&BEGIN_PROVIDER [character*3, excit_class, (nMonoEx)] +&BEGIN_PROVIDER [integer, list_idx_c_a, (3,n_c_a_prov) ] +&BEGIN_PROVIDER [integer, list_idx_c_v, (3,n_c_v_prov) ] +&BEGIN_PROVIDER [integer, list_idx_a_v, (3,n_a_v_prov) ] +&BEGIN_PROVIDER [integer, mat_idx_c_a, (n_core_inact_orb,n_act_orb) +&BEGIN_PROVIDER [integer, mat_idx_c_v, (n_core_inact_orb,n_virt_orb) +&BEGIN_PROVIDER [integer, mat_idx_a_v, (n_act_orb,n_virt_orb) + BEGIN_DOC + ! a list of the orbitals involved in the excitation + END_DOC + + implicit none + integer :: i,t,a,ii,tt,aa,indx,indx_tmp + indx=0 + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do tt=1,n_act_orb + t=list_act(tt) + indx+=1 + excit(1,indx)=i + excit(2,indx)=t + excit_class(indx)='c-a' + indx_tmp += 1 + list_idx_c_a(1,indx_tmp) = indx + list_idx_c_a(2,indx_tmp) = ii + list_idx_c_a(3,indx_tmp) = tt + mat_idx_c_a(ii,tt) = indx + end do + end do + + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=i + excit(2,indx)=a + excit_class(indx)='c-v' + indx_tmp += 1 + list_idx_c_v(1,indx_tmp) = indx + list_idx_c_v(2,indx_tmp) = ii + list_idx_c_v(3,indx_tmp) = aa + mat_idx_c_v(ii,aa) = indx + end do + end do + + indx_tmp = 0 + do tt=1,n_act_orb + t=list_act(tt) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=t + excit(2,indx)=a + excit_class(indx)='a-v' + indx_tmp += 1 + list_idx_a_v(1,indx_tmp) = indx + list_idx_a_v(2,indx_tmp) = tt + list_idx_a_v(3,indx_tmp) = aa + mat_idx_a_v(tt,aa) = indx + end do + end do + + if (bavard) then + write(6,*) ' Filled the table of the Monoexcitations ' + do indx=1,nMonoEx + write(6,*) ' ex ',indx,' : ',excit(1,indx),' -> ' & + ,excit(2,indx),' ',excit_class(indx) + end do + end if + +END_PROVIDER + + BEGIN_PROVIDER [real*8, gradvec2, (nMonoEx)] +&BEGIN_PROVIDER [real*8, norm_grad_vec2] +&BEGIN_PROVIDER [real*8, norm_grad_vec2_tab, (3)] + BEGIN_DOC + ! calculate the orbital gradient from density + ! matrices and integrals; Siegbahn et al, Phys Scr 1980 + ! eqs 14 a,b,c + END_DOC + implicit none + integer :: i,t,a,indx + real*8 :: gradvec_it,gradvec_ia,gradvec_ta + + indx=0 + norm_grad_vec2_tab = 0.d0 + do i=1,n_core_inact_orb + do t=1,n_act_orb + indx+=1 + gradvec2(indx)=gradvec_it(i,t) + norm_grad_vec2_tab(1) += gradvec2(indx)*gradvec2(indx) + end do + end do + + do i=1,n_core_inact_orb + do a=1,n_virt_orb + indx+=1 + gradvec2(indx)=gradvec_ia(i,a) + norm_grad_vec2_tab(2) += gradvec2(indx)*gradvec2(indx) + end do + end do + + do t=1,n_act_orb + do a=1,n_virt_orb + indx+=1 + gradvec2(indx)=gradvec_ta(t,a) + norm_grad_vec2_tab(3) += gradvec2(indx)*gradvec2(indx) + end do + end do + + norm_grad_vec2=0.d0 + do indx=1,nMonoEx + norm_grad_vec2+=gradvec2(indx)*gradvec2(indx) + end do + do i = 1, 3 + norm_grad_vec2_tab(i) = dsqrt(norm_grad_vec2_tab(i)) + enddo + norm_grad_vec2=sqrt(norm_grad_vec2) + if(bavard)then + write(6,*) + write(6,*) ' Norm of the orbital gradient (via D, P and integrals): ', norm_grad_vec2 + write(6,*) + endif + +END_PROVIDER + +real*8 function gradvec_it(i,t) + BEGIN_DOC + ! the orbital gradient core/inactive -> active + ! we assume natural orbitals + END_DOC + implicit none + integer :: i,t + + integer :: ii,tt,v,vv,x,y + integer :: x3,y3 + + ii=list_core_inact(i) + tt=list_act(t) + gradvec_it=2.D0*(Fipq(tt,ii)+Fapq(tt,ii)) + gradvec_it-=occnum(tt)*Fipq(ii,tt) + do v=1,n_act_orb ! active + vv=list_act(v) + do x=1,n_act_orb ! active + x3=x+n_core_inact_orb ! list_act(x) + do y=1,n_act_orb ! active + y3=y+n_core_inact_orb ! list_act(y) + ! Gamma(2) a a a a 1/r12 i a a a + gradvec_it-=2.D0*P0tuvx_no(t,v,x,y)*bielec_PQxx_no(ii,vv,x3,y3) + end do + end do + end do + gradvec_it*=2.D0 +end function gradvec_it + +real*8 function gradvec_ia(i,a) + BEGIN_DOC + ! the orbital gradient core/inactive -> virtual + END_DOC + implicit none + integer :: i,a,ii,aa + + ii=list_core_inact(i) + aa=list_virt(a) + gradvec_ia=2.D0*(Fipq(aa,ii)+Fapq(aa,ii)) + gradvec_ia*=2.D0 + +end function gradvec_ia + +real*8 function gradvec_ta(t,a) + BEGIN_DOC + ! the orbital gradient active -> virtual + ! we assume natural orbitals + END_DOC + implicit none + integer :: t,a,tt,aa,v,vv,x,y + + tt=list_act(t) + aa=list_virt(a) + gradvec_ta=0.D0 + gradvec_ta+=occnum(tt)*Fipq(aa,tt) + do v=1,n_act_orb + do x=1,n_act_orb + do y=1,n_act_orb + gradvec_ta+=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,aa) + end do + end do + end do + gradvec_ta*=2.D0 + +end function gradvec_ta + diff --git a/src/casscf_cipsi/hessian.irp.f b/src/casscf_cipsi/hessian.irp.f new file mode 100644 index 00000000..458c6aa6 --- /dev/null +++ b/src/casscf_cipsi/hessian.irp.f @@ -0,0 +1,539 @@ +use bitmasks + +real*8 function hessmat_itju(i,t,j,u) + BEGIN_DOC + ! the orbital hessian for core/inactive -> active, core/inactive -> active + ! i, t, j, u are list indices, the corresponding orbitals are ii,tt,jj,uu + ! + ! we assume natural orbitals + END_DOC + implicit none + integer :: i,t,j,u,ii,tt,uu,v,vv,x,xx,y,jj + real*8 :: term,t2 + + ii=list_core_inact(i) + tt=list_act(t) + if (i.eq.j) then + if (t.eq.u) then + ! diagonal element + term=occnum(tt)*Fipq(ii,ii)+2.D0*(Fipq(tt,tt)+Fapq(tt,tt)) & + -2.D0*(Fipq(ii,ii)+Fapq(ii,ii)) + term+=2.D0*(3.D0*bielec_pxxq_no(tt,i,i,tt)-bielec_pqxx_no(tt,tt,i,i)) + term-=2.D0*occnum(tt)*(3.D0*bielec_pxxq_no(tt,i,i,tt) & + -bielec_pqxx_no(tt,tt,i,i)) + term-=occnum(tt)*Fipq(tt,tt) + do v=1,n_act_orb + vv=list_act(v) + do x=1,n_act_orb + xx=list_act(x) + term+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(vv,xx,i,i) & + +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))* & + bielec_pxxq_no(vv,i,i,xx)) + do y=1,n_act_orb + term-=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(t,v,y,xx) + end do + end do + end do + else + ! it/iu, t != u + uu=list_act(u) + term=2.D0*(Fipq(tt,uu)+Fapq(tt,uu)) + term+=2.D0*(4.D0*bielec_PxxQ_no(tt,i,j,uu)-bielec_PxxQ_no(uu,i,j,tt) & + -bielec_PQxx_no(tt,uu,i,j)) + term-=occnum(tt)*Fipq(uu,tt) + term-=(occnum(tt)+occnum(uu)) & + *(3.D0*bielec_PxxQ_no(tt,i,i,uu)-bielec_PQxx_no(uu,tt,i,i)) + do v=1,n_act_orb + vv=list_act(v) + ! term-=D0tu(u,v)*Fipq(tt,vv) ! published, but inverting t and u seems more correct + do x=1,n_act_orb + xx=list_act(x) + term+=2.D0*(P0tuvx_no(u,t,v,x)*bielec_pqxx_no(vv,xx,i,i) & + +(P0tuvx_no(u,x,v,t)+P0tuvx_no(u,x,t,v)) & + *bielec_pxxq_no(vv,i,i,xx)) + do y=1,n_act_orb + term-=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(u,v,y,xx) + end do + end do + end do + end if + else + ! it/ju + jj=list_core_inact(j) + uu=list_act(u) + if (t.eq.u) then + term=occnum(tt)*Fipq(ii,jj) + term-=2.D0*(Fipq(ii,jj)+Fapq(ii,jj)) + else + term=0.D0 + end if + term+=2.D0*(4.D0*bielec_PxxQ_no(tt,i,j,uu)-bielec_PxxQ_no(uu,i,j,tt) & + -bielec_PQxx_no(tt,uu,i,j)) + term-=(occnum(tt)+occnum(uu))* & + (4.D0*bielec_PxxQ_no(tt,i,j,uu)-bielec_PxxQ_no(uu,i,j,tt) & + -bielec_PQxx_no(uu,tt,i,j)) + do v=1,n_act_orb + vv=list_act(v) + do x=1,n_act_orb + xx=list_act(x) + term+=2.D0*(P0tuvx_no(u,t,v,x)*bielec_pqxx_no(vv,xx,i,j) & + +(P0tuvx_no(u,x,v,t)+P0tuvx_no(u,x,t,v)) & + *bielec_pxxq_no(vv,i,j,xx)) + end do + end do + end if + + term*=2.D0 + hessmat_itju=term + +end function hessmat_itju + +real*8 function hessmat_itja(i,t,j,a) + BEGIN_DOC + ! the orbital hessian for core/inactive -> active, core/inactive -> virtual + END_DOC + implicit none + integer :: i,t,j,a,ii,tt,jj,aa,v,vv,x,y + real*8 :: term + + ! it/ja + ii=list_core_inact(i) + tt=list_act(t) + jj=list_core_inact(j) + aa=list_virt(a) + term=2.D0*(4.D0*bielec_pxxq_no(aa,j,i,tt) & + -bielec_pqxx_no(aa,tt,i,j) -bielec_pxxq_no(aa,i,j,tt)) + term-=occnum(tt)*(4.D0*bielec_pxxq_no(aa,j,i,tt) & + -bielec_pqxx_no(aa,tt,i,j) -bielec_pxxq_no(aa,i,j,tt)) + if (i.eq.j) then + term+=2.D0*(Fipq(aa,tt)+Fapq(aa,tt)) + term-=0.5D0*occnum(tt)*Fipq(aa,tt) + do v=1,n_act_orb + do x=1,n_act_orb + do y=1,n_act_orb + term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,aa) + end do + end do + end do + end if + term*=2.D0 + hessmat_itja=term + +end function hessmat_itja + +real*8 function hessmat_itua(i,t,u,a) + BEGIN_DOC + ! the orbital hessian for core/inactive -> active, active -> virtual + END_DOC + implicit none + integer :: i,t,u,a,ii,tt,uu,aa,v,vv,x,xx,u3,t3,v3 + real*8 :: term + + ii=list_core_inact(i) + tt=list_act(t) + t3=t+n_core_inact_orb + uu=list_act(u) + u3=u+n_core_inact_orb + aa=list_virt(a) + if (t.eq.u) then + term=-occnum(tt)*Fipq(aa,ii) + else + term=0.D0 + end if + term-=occnum(uu)*(bielec_pqxx_no(aa,ii,t3,u3)-4.D0*bielec_pqxx_no(aa,uu,t3,i)& + +bielec_pxxq_no(aa,t3,u3,ii)) + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + integer :: x3 + xx=list_act(x) + x3=x+n_core_inact_orb + term-=2.D0*(P0tuvx_no(t,u,v,x)*bielec_pqxx_no(aa,ii,v3,x3) & + +(P0tuvx_no(t,v,u,x)+P0tuvx_no(t,v,x,u)) & + *bielec_pqxx_no(aa,xx,v3,i)) + end do + end do + if (t.eq.u) then + term+=Fipq(aa,ii)+Fapq(aa,ii) + end if + term*=2.D0 + hessmat_itua=term + +end function hessmat_itua + +real*8 function hessmat_iajb(i,a,j,b) + BEGIN_DOC + ! the orbital hessian for core/inactive -> virtual, core/inactive -> virtual + END_DOC + implicit none + integer :: i,a,j,b,ii,aa,jj,bb + real*8 :: term + + ii=list_core_inact(i) + aa=list_virt(a) + if (i.eq.j) then + if (a.eq.b) then + ! ia/ia + term=2.D0*(Fipq(aa,aa)+Fapq(aa,aa)-Fipq(ii,ii)-Fapq(ii,ii)) + term+=2.D0*(3.D0*bielec_pxxq_no(aa,i,i,aa)-bielec_pqxx_no(aa,aa,i,i)) + else + bb=list_virt(b) + ! ia/ib + term=2.D0*(Fipq(aa,bb)+Fapq(aa,bb)) + term+=2.D0*(3.D0*bielec_pxxq_no(aa,i,i,bb)-bielec_pqxx_no(aa,bb,i,i)) + end if + else + ! ia/jb + jj=list_core_inact(j) + bb=list_virt(b) + term=2.D0*(4.D0*bielec_pxxq_no(aa,i,j,bb)-bielec_pqxx_no(aa,bb,i,j) & + -bielec_pxxq_no(aa,j,i,bb)) + if (a.eq.b) then + term-=2.D0*(Fipq(ii,jj)+Fapq(ii,jj)) + end if + end if + term*=2.D0 + hessmat_iajb=term + +end function hessmat_iajb + +real*8 function hessmat_iatb(i,a,t,b) + BEGIN_DOC + ! the orbital hessian for core/inactive -> virtual, active -> virtual + END_DOC + implicit none + integer :: i,a,t,b,ii,aa,tt,bb,v,vv,x,y,v3,t3 + real*8 :: term + + ii=list_core_inact(i) + aa=list_virt(a) + tt=list_act(t) + bb=list_virt(b) + t3=t+n_core_inact_orb + term=occnum(tt)*(4.D0*bielec_pxxq_no(aa,i,t3,bb)-bielec_pxxq_no(aa,t3,i,bb)& + -bielec_pqxx_no(aa,bb,i,t3)) + if (a.eq.b) then + term-=Fipq(tt,ii)+Fapq(tt,ii) + term-=0.5D0*occnum(tt)*Fipq(tt,ii) + do v=1,n_act_orb + do x=1,n_act_orb + do y=1,n_act_orb + term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,ii) + end do + end do + end do + end if + term*=2.D0 + hessmat_iatb=term + +end function hessmat_iatb + +real*8 function hessmat_taub(t,a,u,b) + BEGIN_DOC + ! the orbital hessian for act->virt,act->virt + END_DOC + implicit none + integer :: t,a,u,b,tt,aa,uu,bb,v,vv,x,xx,y + integer :: v3,x3 + real*8 :: term,t1,t2,t3 + + tt=list_act(t) + aa=list_virt(a) + if (t == u) then + if (a == b) then + ! ta/ta + t1=occnum(tt)*Fipq(aa,aa) + t2=0.D0 + t3=0.D0 + t1-=occnum(tt)*Fipq(tt,tt) + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + t2+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,aa,v3,x3) & + +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))* & + bielec_pxxq_no(aa,x3,v3,aa)) + do y=1,n_act_orb + t3-=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(t,v,y,xx) + end do + end do + end do + term=t1+t2+t3 + else + bb=list_virt(b) + ! ta/tb b/=a + term=occnum(tt)*Fipq(aa,bb) + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + term+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,bb,v3,x3) & + +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v)) & + *bielec_pxxq_no(aa,x3,v3,bb)) + end do + end do + end if + else + ! ta/ub t/=u + uu=list_act(u) + bb=list_virt(b) + term=0.D0 + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + term+=2.D0*(P0tuvx_no(t,u,v,x)*bielec_pqxx_no(aa,bb,v3,x3) & + +(P0tuvx_no(t,x,v,u)+P0tuvx_no(t,x,u,v)) & + *bielec_pxxq_no(aa,x3,v3,bb)) + end do + end do + if (a.eq.b) then + term-=0.5D0*(occnum(tt)*Fipq(uu,tt)+occnum(uu)*Fipq(tt,uu)) + do v=1,n_act_orb + do y=1,n_act_orb + do x=1,n_act_orb + term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,uu) + term-=P0tuvx_no(u,v,x,y)*bielecCI_no(x,y,v,tt) + end do + end do + end do + end if + + end if + + term*=2.D0 + hessmat_taub=term + +end function hessmat_taub + +BEGIN_PROVIDER [real*8, hessdiag, (nMonoEx)] + BEGIN_DOC + ! the diagonal of the Hessian, needed for the Davidson procedure + END_DOC + implicit none + integer :: i,t,a,indx,indx_shift + real*8 :: hessmat_itju,hessmat_iajb,hessmat_taub + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessdiag,n_core_inact_orb,n_act_orb,n_virt_orb,nMonoEx) & + !$OMP PRIVATE(i,indx,t,a,indx_shift) + + !$OMP DO + do i=1,n_core_inact_orb + do t=1,n_act_orb + indx = t + (i-1)*n_act_orb + hessdiag(indx)=hessmat_itju(i,t,i,t) + end do + end do + !$OMP END DO NOWAIT + + indx_shift = n_core_inact_orb*n_act_orb + !$OMP DO + do a=1,n_virt_orb + do i=1,n_core_inact_orb + indx = a + (i-1)*n_virt_orb + indx_shift + hessdiag(indx)=hessmat_iajb(i,a,i,a) + end do + end do + !$OMP END DO NOWAIT + + indx_shift += n_core_inact_orb*n_virt_orb + !$OMP DO + do a=1,n_virt_orb + do t=1,n_act_orb + indx = a + (t-1)*n_virt_orb + indx_shift + hessdiag(indx)=hessmat_taub(t,a,t,a) + end do + end do + !$OMP END DO + !$OMP END PARALLEL + +END_PROVIDER + + +BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)] + implicit none + integer :: i,j,t,u,a,b + integer :: indx,indx_tmp, jndx, jndx_tmp + integer :: ustart,bstart + real*8 :: hessmat_itju + real*8 :: hessmat_itja + real*8 :: hessmat_itua + real*8 :: hessmat_iajb + real*8 :: hessmat_iatb + real*8 :: hessmat_taub + ! c-a c-v a-v + ! c-a | X X X + ! c-v | X X + ! a-v | X + + provide mo_two_e_integrals_in_map + + hessmat = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_a_prov,list_idx_c_a,n_core_inact_orb,n_act_orb,mat_idx_c_a) & + !$OMP PRIVATE(indx_tmp,indx,i,t,j,u,ustart,jndx) + + !$OMP DO +!!!! < Core-active| H |Core-active > + ! Core-active excitations + do indx_tmp = 1, n_c_a_prov + indx = list_idx_c_a(1,indx_tmp) + i = list_idx_c_a(2,indx_tmp) + t = list_idx_c_a(3,indx_tmp) + ! Core-active excitations + do j = 1, n_core_inact_orb + if (i.eq.j) then + ustart=t + else + ustart=1 + end if + do u=ustart,n_act_orb + jndx = mat_idx_c_a(j,u) + hessmat(jndx,indx) = hessmat_itju(i,t,j,u) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_a_prov,n_c_v_prov,list_idx_c_a,list_idx_c_v) & + !$OMP PRIVATE(indx_tmp,jndx_tmp,indx,i,t,j,a,jndx) + + !$OMP DO +!!!! < Core-active| H |Core-VIRTUAL > + ! Core-active excitations + do indx_tmp = 1, n_c_a_prov + indx = list_idx_c_a(1,indx_tmp) + i = list_idx_c_a(2,indx_tmp) + t = list_idx_c_a(3,indx_tmp) + ! Core-VIRTUAL excitations + do jndx_tmp = 1, n_c_v_prov + jndx = list_idx_c_v(1,jndx_tmp) + j = list_idx_c_v(2,jndx_tmp) + a = list_idx_c_v(3,jndx_tmp) + hessmat(jndx,indx) = hessmat_itja(i,t,j,a) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_a_prov,n_a_v_prov,list_idx_c_a,list_idx_a_v) & + !$OMP PRIVATE(indx_tmp,jndx_tmp,indx,i,t,u,a,jndx) + + !$OMP DO +!!!! < Core-active| H |ACTIVE-VIRTUAL > + ! Core-active excitations + do indx_tmp = 1, n_c_a_prov + indx = list_idx_c_a(1,indx_tmp) + i = list_idx_c_a(2,indx_tmp) + t = list_idx_c_a(3,indx_tmp) + ! ACTIVE-VIRTUAL excitations + do jndx_tmp = 1, n_a_v_prov + jndx = list_idx_a_v(1,jndx_tmp) + u = list_idx_a_v(2,jndx_tmp) + a = list_idx_a_v(3,jndx_tmp) + hessmat(jndx,indx) = hessmat_itua(i,t,u,a) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + + if(hess_cv_cv)then + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_v_prov,list_idx_c_v,n_core_inact_orb,n_virt_orb,mat_idx_c_v) & + !$OMP PRIVATE(indx_tmp,indx,i,a,j,b,bstart,jndx) + !$OMP DO +!!!!! < Core-VIRTUAL | H |Core-VIRTUAL > + ! Core-VIRTUAL excitations + do indx_tmp = 1, n_c_v_prov + indx = list_idx_c_v(1,indx_tmp) + i = list_idx_c_v(2,indx_tmp) + a = list_idx_c_v(3,indx_tmp) + ! Core-VIRTUAL excitations + do j = 1, n_core_inact_orb + if (i.eq.j) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + jndx = mat_idx_c_v(j,b) + hessmat(jndx,indx) = hessmat_iajb(i,a,j,b) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + enddo + + !$OMP END DO NOWAIT + !$OMP END PARALLEL + endif + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_v_prov,n_a_v_prov,list_idx_c_v,list_idx_a_v) & + !$OMP PRIVATE(indx_tmp,jndx_tmp,indx,i,a,t,b,jndx) + + !$OMP DO +!!!! < Core-VIRTUAL | H |Active-VIRTUAL > + ! Core-VIRTUAL excitations + do indx_tmp = 1, n_c_v_prov + indx = list_idx_c_v(1,indx_tmp) + i = list_idx_c_v(2,indx_tmp) + a = list_idx_c_v(3,indx_tmp) + ! Active-VIRTUAL excitations + do jndx_tmp = 1, n_a_v_prov + jndx = list_idx_a_v(1,jndx_tmp) + t = list_idx_a_v(2,jndx_tmp) + b = list_idx_a_v(3,jndx_tmp) + hessmat(jndx,indx) = hessmat_iatb(i,a,t,b) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_a_v_prov,list_idx_a_v,n_act_orb,n_virt_orb,mat_idx_a_v) & + !$OMP PRIVATE(indx_tmp,indx,t,a,u,b,bstart,jndx) + + !$OMP DO +!!!! < Active-VIRTUAL | H |Active-VIRTUAL > + ! Active-VIRTUAL excitations + do indx_tmp = 1, n_a_v_prov + indx = list_idx_a_v(1,indx_tmp) + t = list_idx_a_v(2,indx_tmp) + a = list_idx_a_v(3,indx_tmp) + ! Active-VIRTUAL excitations + do u=t,n_act_orb + if (t.eq.u) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + jndx = mat_idx_a_v(u,b) + hessmat(jndx,indx) = hessmat_taub(t,a,u,b) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + +END_PROVIDER diff --git a/src/casscf_cipsi/hessian_old.irp.f b/src/casscf_cipsi/hessian_old.irp.f new file mode 100644 index 00000000..d17f1f0a --- /dev/null +++ b/src/casscf_cipsi/hessian_old.irp.f @@ -0,0 +1,310 @@ + +use bitmasks +BEGIN_PROVIDER [real*8, hessmat_old, (nMonoEx,nMonoEx)] + BEGIN_DOC + ! calculate the orbital hessian 2 + ! + + by hand, + ! determinant per determinant, as for the gradient + ! + ! we assume that we have natural active orbitals + END_DOC + implicit none + integer :: indx,ihole,ipart + integer :: jndx,jhole,jpart + character*3 :: iexc,jexc + real*8 :: res + + if (bavard) then + write(6,*) ' providing Hessian matrix hessmat_old ' + write(6,*) ' nMonoEx = ',nMonoEx + endif + + do indx=1,nMonoEx + do jndx=1,nMonoEx + hessmat_old(indx,jndx)=0.D0 + end do + end do + + do indx=1,nMonoEx + ihole=excit(1,indx) + ipart=excit(2,indx) + iexc=excit_class(indx) + do jndx=indx,nMonoEx + jhole=excit(1,jndx) + jpart=excit(2,jndx) + jexc=excit_class(jndx) + call calc_hess_elem(ihole,ipart,jhole,jpart,res) + hessmat_old(indx,jndx)=res + hessmat_old(jndx,indx)=res + end do + end do + +END_PROVIDER + +subroutine calc_hess_elem(ihole,ipart,jhole,jpart,res) + BEGIN_DOC + ! eq 19 of Siegbahn et al, Physica Scripta 1980 + ! we calculate 2 + ! + + + ! average over all states is performed. + ! no transition between states. + END_DOC + implicit none + integer :: ihole,ipart,ispin,mu,istate + integer :: jhole,jpart,jspin + integer :: mu_pq, mu_pqrs, mu_rs, mu_rspq, nu_rs,nu + real*8 :: res + integer(bit_kind), allocatable :: det_mu(:,:) + integer(bit_kind), allocatable :: det_nu(:,:) + integer(bit_kind), allocatable :: det_mu_pq(:,:) + integer(bit_kind), allocatable :: det_mu_rs(:,:) + integer(bit_kind), allocatable :: det_nu_rs(:,:) + integer(bit_kind), allocatable :: det_mu_pqrs(:,:) + integer(bit_kind), allocatable :: det_mu_rspq(:,:) + real*8 :: i_H_psi_array(N_states),phase,phase2,phase3 + real*8 :: i_H_j_element + allocate(det_mu(N_int,2)) + allocate(det_nu(N_int,2)) + allocate(det_mu_pq(N_int,2)) + allocate(det_mu_rs(N_int,2)) + allocate(det_nu_rs(N_int,2)) + allocate(det_mu_pqrs(N_int,2)) + allocate(det_mu_rspq(N_int,2)) + integer :: mu_pq_possible + integer :: mu_rs_possible + integer :: nu_rs_possible + integer :: mu_pqrs_possible + integer :: mu_rspq_possible + + res=0.D0 + + ! the terms <0|E E H |0> + do mu=1,n_det + ! get the string of the determinant + call det_extract(det_mu,mu,N_int) + do ispin=1,2 + ! do the monoexcitation pq on it + call det_copy(det_mu,det_mu_pq,N_int) + call do_signed_mono_excitation(det_mu,det_mu_pq,mu_pq & + ,ihole,ipart,ispin,phase,mu_pq_possible) + if (mu_pq_possible.eq.1) then + ! possible, but not necessarily in the list + ! do the second excitation + do jspin=1,2 + call det_copy(det_mu_pq,det_mu_pqrs,N_int) + call do_signed_mono_excitation(det_mu_pq,det_mu_pqrs,mu_pqrs& + ,jhole,jpart,jspin,phase2,mu_pqrs_possible) + ! excitation possible + if (mu_pqrs_possible.eq.1) then + call i_H_psi(det_mu_pqrs,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase*phase2 + end do + end if + ! try the de-excitation with opposite sign + call det_copy(det_mu_pq,det_mu_pqrs,N_int) + call do_signed_mono_excitation(det_mu_pq,det_mu_pqrs,mu_pqrs& + ,jpart,jhole,jspin,phase2,mu_pqrs_possible) + phase2=-phase2 + ! excitation possible + if (mu_pqrs_possible.eq.1) then + call i_H_psi(det_mu_pqrs,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase*phase2 + end do + end if + end do + end if + ! exchange the notion of pq and rs + ! do the monoexcitation rs on the initial determinant + call det_copy(det_mu,det_mu_rs,N_int) + call do_signed_mono_excitation(det_mu,det_mu_rs,mu_rs & + ,jhole,jpart,ispin,phase2,mu_rs_possible) + if (mu_rs_possible.eq.1) then + ! do the second excitation + do jspin=1,2 + call det_copy(det_mu_rs,det_mu_rspq,N_int) + call do_signed_mono_excitation(det_mu_rs,det_mu_rspq,mu_rspq& + ,ihole,ipart,jspin,phase3,mu_rspq_possible) + ! excitation possible (of course, the result is outside the CAS) + if (mu_rspq_possible.eq.1) then + call i_H_psi(det_mu_rspq,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase2*phase3 + end do + end if + ! we may try the de-excitation, with opposite sign + call det_copy(det_mu_rs,det_mu_rspq,N_int) + call do_signed_mono_excitation(det_mu_rs,det_mu_rspq,mu_rspq& + ,ipart,ihole,jspin,phase3,mu_rspq_possible) + phase3=-phase3 + ! excitation possible (of course, the result is outside the CAS) + if (mu_rspq_possible.eq.1) then + call i_H_psi(det_mu_rspq,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase2*phase3 + end do + end if + end do + end if + ! + ! the operator E H E, we have to do a double loop over the determinants + ! we still have the determinant mu_pq and the phase in memory + if (mu_pq_possible.eq.1) then + do nu=1,N_det + call det_extract(det_nu,nu,N_int) + do jspin=1,2 + call det_copy(det_nu,det_nu_rs,N_int) + call do_signed_mono_excitation(det_nu,det_nu_rs,nu_rs & + ,jhole,jpart,jspin,phase2,nu_rs_possible) + ! excitation possible ? + if (nu_rs_possible.eq.1) then + call i_H_j(det_mu_pq,det_nu_rs,N_int,i_H_j_element) + do istate=1,N_states + res+=2.D0*i_H_j_element*psi_coef(mu,istate) & + *psi_coef(nu,istate)*phase*phase2 + end do + end if + end do + end do + end if + end do + end do + + ! state-averaged Hessian + res*=1.D0/dble(N_states) + +end subroutine calc_hess_elem + +BEGIN_PROVIDER [real*8, hessmat_peter, (nMonoEx,nMonoEx)] + BEGIN_DOC + ! explicit hessian matrix from density matrices and integrals + ! of course, this will be used for a direct Davidson procedure later + ! we will not store the matrix in real life + ! formulas are broken down as functions for the 6 classes of matrix elements + ! + END_DOC + implicit none + integer :: i,j,t,u,a,b,indx,jndx,bstart,ustart,indx_shift + + real*8 :: hessmat_itju + real*8 :: hessmat_itja + real*8 :: hessmat_itua + real*8 :: hessmat_iajb + real*8 :: hessmat_iatb + real*8 :: hessmat_taub + + if (bavard) then + write(6,*) ' providing Hessian matrix hessmat_peter ' + write(6,*) ' nMonoEx = ',nMonoEx + endif + provide mo_two_e_integrals_in_map + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat_peter,n_core_inact_orb,n_act_orb,n_virt_orb,nMonoEx) & + !$OMP PRIVATE(i,indx,jndx,j,ustart,t,u,a,bstart,indx_shift) + + !$OMP DO + ! (DOUBLY OCCUPIED ---> ACT ) + do i=1,n_core_inact_orb + do t=1,n_act_orb + indx = t + (i-1)*n_act_orb + jndx=indx + ! (DOUBLY OCCUPIED ---> ACT ) + do j=i,n_core_inact_orb + if (i.eq.j) then + ustart=t + else + ustart=1 + end if + do u=ustart,n_act_orb + hessmat_peter(jndx,indx)=hessmat_itju(i,t,j,u) + jndx+=1 + end do + end do + ! (DOUBLY OCCUPIED ---> VIRTUAL) + do j=1,n_core_inact_orb + do a=1,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_itja(i,t,j,a) + jndx+=1 + end do + end do + ! (ACTIVE ---> VIRTUAL) + do u=1,n_act_orb + do a=1,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_itua(i,t,u,a) + jndx+=1 + end do + end do + end do + end do + !$OMP END DO NOWAIT + + indx_shift = n_core_inact_orb*n_act_orb + !$OMP DO + ! (DOUBLY OCCUPIED ---> VIRTUAL) + do a=1,n_virt_orb + do i=1,n_core_inact_orb + indx = a + (i-1)*n_virt_orb + indx_shift + jndx=indx + ! (DOUBLY OCCUPIED ---> VIRTUAL) + do j=i,n_core_inact_orb + if (i.eq.j) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_iajb(i,a,j,b) + jndx+=1 + end do + end do + ! (ACT ---> VIRTUAL) + do t=1,n_act_orb + do b=1,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_iatb(i,a,t,b) + jndx+=1 + end do + end do + end do + end do + !$OMP END DO NOWAIT + + indx_shift += n_core_inact_orb*n_virt_orb + !$OMP DO + ! (ACT ---> VIRTUAL) + do a=1,n_virt_orb + do t=1,n_act_orb + indx = a + (t-1)*n_virt_orb + indx_shift + jndx=indx + ! (ACT ---> VIRTUAL) + do u=t,n_act_orb + if (t.eq.u) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_taub(t,a,u,b) + jndx+=1 + end do + end do + end do + end do + !$OMP END DO + + !$OMP END PARALLEL + + do jndx=1,nMonoEx + do indx=1,jndx-1 + hessmat_peter(indx,jndx) = hessmat_peter(jndx,indx) + enddo + enddo + + +END_PROVIDER + diff --git a/src/casscf_cipsi/mcscf_fock.irp.f b/src/casscf_cipsi/mcscf_fock.irp.f new file mode 100644 index 00000000..e4568405 --- /dev/null +++ b/src/casscf_cipsi/mcscf_fock.irp.f @@ -0,0 +1,80 @@ +BEGIN_PROVIDER [real*8, Fipq, (mo_num,mo_num) ] + BEGIN_DOC + ! the inactive Fock matrix, in molecular orbitals + END_DOC + implicit none + integer :: p,q,k,kk,t,tt,u,uu + + do q=1,mo_num + do p=1,mo_num + Fipq(p,q)=one_ints_no(p,q) + end do + end do + + ! the inactive Fock matrix + do k=1,n_core_inact_orb + kk=list_core_inact(k) + do q=1,mo_num + do p=1,mo_num + Fipq(p,q)+=2.D0*bielec_pqxx_no(p,q,k,k) -bielec_pxxq_no(p,k,k,q) + end do + end do + end do + + if (bavard) then + integer :: i + write(6,*) + write(6,*) ' the diagonal of the inactive effective Fock matrix ' + write(6,'(5(i3,F12.5))') (i,Fipq(i,i),i=1,mo_num) + write(6,*) + end if + + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ] + BEGIN_DOC + ! the active active Fock matrix, in molecular orbitals + ! we create them in MOs, quite expensive + ! + ! for an implementation in AOs we need first the natural orbitals + ! for forming an active density matrix in AOs + ! + END_DOC + implicit none + integer :: p,q,k,kk,t,tt,u,uu + + Fapq = 0.d0 + + ! the active Fock matrix, D0tu is diagonal + do t=1,n_act_orb + tt=list_act(t) + do q=1,mo_num + do p=1,mo_num + Fapq(p,q)+=occnum(tt) & + *(bielec_pqxx_no(p,q,tt,tt)-0.5D0*bielec_pxxq_no(p,tt,tt,q)) + end do + end do + end do + + if (bavard) then + integer :: i + write(6,*) + write(6,*) ' the effective Fock matrix over MOs' + write(6,*) + + write(6,*) + write(6,*) ' the diagonal of the inactive effective Fock matrix ' + write(6,'(5(i3,F12.5))') (i,Fipq(i,i),i=1,mo_num) + write(6,*) + write(6,*) + write(6,*) ' the diagonal of the active Fock matrix ' + write(6,'(5(i3,F12.5))') (i,Fapq(i,i),i=1,mo_num) + write(6,*) + end if + + +END_PROVIDER + + diff --git a/src/casscf_cipsi/natorb.irp.f b/src/casscf_cipsi/natorb.irp.f new file mode 100644 index 00000000..9ce90304 --- /dev/null +++ b/src/casscf_cipsi/natorb.irp.f @@ -0,0 +1,231 @@ + BEGIN_PROVIDER [real*8, occnum, (mo_num)] + implicit none + BEGIN_DOC + ! MO occupation numbers + END_DOC + + integer :: i + occnum=0.D0 + do i=1,n_core_inact_orb + occnum(list_core_inact(i))=2.D0 + end do + + do i=1,n_act_orb + occnum(list_act(i))=occ_act(i) + end do + + if (bavard) then + write(6,*) ' occupation numbers ' + do i=1,mo_num + write(6,*) i,occnum(i) + end do + endif + +END_PROVIDER + + + BEGIN_PROVIDER [ real*8, natorbsCI, (n_act_orb,n_act_orb) ] +&BEGIN_PROVIDER [ real*8, occ_act, (n_act_orb) ] + implicit none + BEGIN_DOC + ! Natural orbitals of CI + END_DOC + integer :: i, j + double precision :: Vt(n_act_orb,n_act_orb) + +! call lapack_diag(occ_act,natorbsCI,D0tu,n_act_orb,n_act_orb) + call svd(D0tu, size(D0tu,1), natorbsCI,size(natorbsCI,1), occ_act, Vt, size(Vt,1),n_act_orb,n_act_orb) + + if (bavard) then + write(6,*) ' found occupation numbers as ' + do i=1,n_act_orb + write(6,*) i,occ_act(i) + end do + + integer :: nmx + real*8 :: xmx + do i=1,n_act_orb + ! largest element of the eigenvector should be positive + xmx=0.D0 + nmx=0 + do j=1,n_act_orb + if (abs(natOrbsCI(j,i)).gt.xmx) then + nmx=j + xmx=abs(natOrbsCI(j,i)) + end if + end do + xmx=sign(1.D0,natOrbsCI(nmx,i)) + do j=1,n_act_orb + natOrbsCI(j,i)*=xmx + end do + + write(6,*) ' Eigenvector No ',i + write(6,'(5(I3,F12.5))') (j,natOrbsCI(j,i),j=1,n_act_orb) + end do + end if + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, P0tuvx_no, (n_act_orb,n_act_orb,n_act_orb,n_act_orb)] + implicit none + BEGIN_DOC + ! 4-index transformation of 2part matrices + END_DOC + integer :: i,j,k,l,p,q + real*8 :: d(n_act_orb) + + ! index per index + ! first quarter + P0tuvx_no(:,:,:,:) = P0tuvx(:,:,:,:) + + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(q,j,k,l)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(p,j,k,l)=d(p) + end do + end do + end do + end do + ! 2nd quarter + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(j,q,k,l)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(j,p,k,l)=d(p) + end do + end do + end do + end do + ! 3rd quarter + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(j,k,q,l)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(j,k,p,l)=d(p) + end do + end do + end do + end do + ! 4th quarter + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(j,k,l,q)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(j,k,l,p)=d(p) + end do + end do + end do + end do + +END_PROVIDER + + + +BEGIN_PROVIDER [real*8, one_ints_no, (mo_num,mo_num)] + implicit none + BEGIN_DOC + ! Transformed one-e integrals + END_DOC + integer :: i,j, p, q + real*8 :: d(n_act_orb) + one_ints_no(:,:)=mo_one_e_integrals(:,:) + + ! 1st half-trf + do j=1,mo_num + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=one_ints_no(list_act(q),j)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + one_ints_no(list_act(p),j)=d(p) + end do + end do + + ! 2nd half-trf + do j=1,mo_num + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=one_ints_no(j,list_act(q))*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + one_ints_no(j,list_act(p))=d(p) + end do + end do +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, NatOrbsCI_mos, (mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Rotation matrix from current MOs to the CI natural MOs + END_DOC + integer :: p,q + + NatOrbsCI_mos(:,:) = 0.d0 + + do q = 1,mo_num + NatOrbsCI_mos(q,q) = 1.d0 + enddo + + do q = 1,n_act_orb + do p = 1,n_act_orb + NatOrbsCI_mos(list_act(p),list_act(q)) = natorbsCI(p,q) + enddo + enddo +END_PROVIDER + + +BEGIN_PROVIDER [real*8, NatOrbsFCI, (ao_num,mo_num)] + implicit none + BEGIN_DOC +! FCI natural orbitals + END_DOC + + call dgemm('N','N', ao_num,mo_num,mo_num,1.d0, & + mo_coef, size(mo_coef,1), & + NatOrbsCI_mos, size(NatOrbsCI_mos,1), 0.d0, & + NatOrbsFCI, size(NatOrbsFCI,1)) +END_PROVIDER + diff --git a/src/casscf_cipsi/neworbs.irp.f b/src/casscf_cipsi/neworbs.irp.f new file mode 100644 index 00000000..a7cebbb2 --- /dev/null +++ b/src/casscf_cipsi/neworbs.irp.f @@ -0,0 +1,253 @@ + BEGIN_PROVIDER [real*8, SXmatrix, (nMonoEx+1,nMonoEx+1)] +&BEGIN_PROVIDER [integer, n_guess_sx_mat ] + implicit none + BEGIN_DOC + ! Single-excitation matrix + END_DOC + + integer :: i,j + + do i=1,nMonoEx+1 + do j=1,nMonoEx+1 + SXmatrix(i,j)=0.D0 + end do + end do + + do i=1,nMonoEx + SXmatrix(1,i+1)=gradvec2(i) + SXmatrix(1+i,1)=gradvec2(i) + end do + if(diag_hess_cas)then + do i = 1, nMonoEx + SXmatrix(i+1,i+1) = hessdiag(i) + enddo + else + do i=1,nMonoEx + do j=1,nMonoEx + SXmatrix(i+1,j+1)=hessmat(i,j) + SXmatrix(j+1,i+1)=hessmat(i,j) + end do + end do + endif + + do i = 1, nMonoEx + SXmatrix(i+1,i+1) += level_shift_casscf + enddo + n_guess_sx_mat = 1 + do i = 1, nMonoEx + if(SXmatrix(i+1,i+1).lt.0.d0 )then + n_guess_sx_mat += 1 + endif + enddo + if (bavard) then + do i=2,nMonoEx + write(6,*) ' diagonal of the Hessian : ',i,hessmat(i,i) + end do + end if + +END_PROVIDER + + BEGIN_PROVIDER [real*8, SXeigenvec, (nMonoEx+1,nMonoEx+1)] +&BEGIN_PROVIDER [real*8, SXeigenval, (nMonoEx+1)] + implicit none + BEGIN_DOC + ! Eigenvectors/eigenvalues of the single-excitation matrix + END_DOC + if(nMonoEx+1.gt.n_det_max_full)then + if(bavard)then + print*,'Using the Davidson algorithm to diagonalize the SXmatrix' + endif + double precision, allocatable :: u_in(:,:),energies(:) + allocate(u_in(nMonoEx+1,n_states_diag),energies(n_guess_sx_mat)) + call davidson_diag_sx_mat(n_guess_sx_mat, u_in, energies) + integer :: i,j + SXeigenvec = 0.d0 + SXeigenval = 0.d0 + do i = 1, n_guess_sx_mat + SXeigenval(i) = energies(i) + do j = 1, nMonoEx+1 + SXeigenvec(j,i) = u_in(j,i) + enddo + enddo + else + if(bavard)then + print*,'Diagonalize the SXmatrix with Jacobi' + endif + call lapack_diag(SXeigenval,SXeigenvec,SXmatrix,nMonoEx+1,nMonoEx+1) + endif + if (bavard) then + write(6,*) ' SXdiag : lowest eigenvalues ' + write(6,*) ' 1 - ',SXeigenval(1),SXeigenvec(1,1) + if(n_guess_sx_mat.gt.0)then + write(6,*) ' 2 - ',SXeigenval(2),SXeigenvec(1,2) + write(6,*) ' 3 - ',SXeigenval(3),SXeigenvec(1,3) + write(6,*) ' 4 - ',SXeigenval(4),SXeigenvec(1,4) + write(6,*) ' 5 - ',SXeigenval(5),SXeigenvec(1,5) + endif + write(6,*) + write(6,*) ' SXdiag : lowest eigenvalue = ',SXeigenval(1) + endif +END_PROVIDER + + BEGIN_PROVIDER [real*8, energy_improvement] + implicit none + if(state_following_casscf)then + energy_improvement = SXeigenval(best_vector_ovrlp_casscf) + else + energy_improvement = SXeigenval(1) + endif + END_PROVIDER + + + + BEGIN_PROVIDER [ integer, best_vector_ovrlp_casscf ] +&BEGIN_PROVIDER [ double precision, best_overlap_casscf ] + implicit none + integer :: i + double precision :: c0 + best_overlap_casscf = 0.D0 + best_vector_ovrlp_casscf = -1000 + do i=1,nMonoEx+1 + if (SXeigenval(i).lt.0.D0) then + if (dabs(SXeigenvec(1,i)).gt.best_overlap_casscf) then + best_overlap_casscf=dabs(SXeigenvec(1,i)) + best_vector_ovrlp_casscf = i + end if + end if + end do + if(best_vector_ovrlp_casscf.lt.0)then + best_vector_ovrlp_casscf = minloc(SXeigenval,nMonoEx+1) + endif + c0=SXeigenvec(1,best_vector_ovrlp_casscf) + if (bavard) then + write(6,*) ' SXdiag : eigenvalue for best overlap with ' + write(6,*) ' previous orbitals = ',SXeigenval(best_vector_ovrlp_casscf) + write(6,*) ' weight of the 1st element ',c0 + endif + END_PROVIDER + + BEGIN_PROVIDER [double precision, SXvector, (nMonoEx+1)] + implicit none + BEGIN_DOC + ! Best eigenvector of the single-excitation matrix + END_DOC + integer :: i + double precision :: c0 + c0=SXeigenvec(1,best_vector_ovrlp_casscf) + do i=1,nMonoEx+1 + SXvector(i)=SXeigenvec(i,best_vector_ovrlp_casscf)/c0 + end do + END_PROVIDER + + +BEGIN_PROVIDER [double precision, NewOrbs, (ao_num,mo_num) ] + implicit none + BEGIN_DOC + ! Updated orbitals + END_DOC + integer :: i,j,ialph + + if(state_following_casscf)then + print*,'Using the state following casscf ' + call dgemm('N','T', ao_num,mo_num,mo_num,1.d0, & + NatOrbsFCI, size(NatOrbsFCI,1), & + Umat, size(Umat,1), 0.d0, & + NewOrbs, size(NewOrbs,1)) + + level_shift_casscf *= 0.5D0 + level_shift_casscf = max(level_shift_casscf,0.002d0) + !touch level_shift_casscf + else + if(best_vector_ovrlp_casscf.ne.1.and.n_orb_swap.ne.0)then + print*,'Taking the lowest root for the CASSCF' + print*,'!!! SWAPPING MOS !!!!!!' + level_shift_casscf *= 2.D0 + level_shift_casscf = min(level_shift_casscf,0.5d0) + print*,'level_shift_casscf = ',level_shift_casscf + NewOrbs = switch_mo_coef + !mo_coef = switch_mo_coef + !soft_touch mo_coef + !call save_mos_no_occ + !stop + else + level_shift_casscf *= 0.5D0 + level_shift_casscf = max(level_shift_casscf,0.002d0) + !touch level_shift_casscf + call dgemm('N','T', ao_num,mo_num,mo_num,1.d0, & + NatOrbsFCI, size(NatOrbsFCI,1), & + Umat, size(Umat,1), 0.d0, & + NewOrbs, size(NewOrbs,1)) + endif + endif + +END_PROVIDER + +BEGIN_PROVIDER [real*8, Umat, (mo_num,mo_num) ] + implicit none + BEGIN_DOC + ! Orbital rotation matrix + END_DOC + integer :: i,j,indx,k,iter,t,a,ii,tt,aa + logical :: converged + + real*8 :: Tpotmat (mo_num,mo_num), Tpotmat2 (mo_num,mo_num) + real*8 :: Tmat(mo_num,mo_num) + real*8 :: f + + ! the orbital rotation matrix T + Tmat(:,:)=0.D0 + indx=1 + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do t=1,n_act_orb + tt=list_act(t) + indx+=1 + Tmat(ii,tt)= SXvector(indx) + Tmat(tt,ii)=-SXvector(indx) + end do + end do + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do a=1,n_virt_orb + aa=list_virt(a) + indx+=1 + Tmat(ii,aa)= SXvector(indx) + Tmat(aa,ii)=-SXvector(indx) + end do + end do + do t=1,n_act_orb + tt=list_act(t) + do a=1,n_virt_orb + aa=list_virt(a) + indx+=1 + Tmat(tt,aa)= SXvector(indx) + Tmat(aa,tt)=-SXvector(indx) + end do + end do + + ! Form the exponential + + Tpotmat(:,:)=0.D0 + Umat(:,:) =0.D0 + do i=1,mo_num + Tpotmat(i,i)=1.D0 + Umat(i,i) =1.d0 + end do + iter=0 + converged=.false. + do while (.not.converged) + iter+=1 + f = 1.d0 / dble(iter) + Tpotmat2(:,:) = Tpotmat(:,:) * f + call dgemm('N','N', mo_num,mo_num,mo_num,1.d0, & + Tpotmat2, size(Tpotmat2,1), & + Tmat, size(Tmat,1), 0.d0, & + Tpotmat, size(Tpotmat,1)) + Umat(:,:) = Umat(:,:) + Tpotmat(:,:) + + converged = ( sum(abs(Tpotmat(:,:))) < 1.d-6).or.(iter>30) + end do +END_PROVIDER + + + diff --git a/src/casscf_cipsi/reorder_orb.irp.f b/src/casscf_cipsi/reorder_orb.irp.f new file mode 100644 index 00000000..3cb90522 --- /dev/null +++ b/src/casscf_cipsi/reorder_orb.irp.f @@ -0,0 +1,70 @@ +subroutine reorder_orbitals_for_casscf + implicit none + BEGIN_DOC +! routine that reorders the orbitals of the CASSCF in terms block of core, active and virtual + END_DOC + integer :: i,j,iorb + integer, allocatable :: iorder(:),array(:) + allocate(iorder(mo_num),array(mo_num)) + do i = 1, n_core_orb + iorb = list_core(i) + array(iorb) = i + enddo + + do i = 1, n_inact_orb + iorb = list_inact(i) + array(iorb) = mo_num + i + enddo + + do i = 1, n_act_orb + iorb = list_act(i) + array(iorb) = 2 * mo_num + i + enddo + + do i = 1, n_virt_orb + iorb = list_virt(i) + array(iorb) = 3 * mo_num + i + enddo + + do i = 1, mo_num + iorder(i) = i + enddo + call isort(array,iorder,mo_num) + double precision, allocatable :: mo_coef_new(:,:) + allocate(mo_coef_new(ao_num,mo_num)) + do i = 1, mo_num + mo_coef_new(:,i) = mo_coef(:,iorder(i)) + enddo + mo_coef = mo_coef_new + touch mo_coef + + list_core_reverse = 0 + do i = 1, n_core_orb + list_core(i) = i + list_core_reverse(i) = i + mo_class(i) = "Core" + enddo + + list_inact_reverse = 0 + do i = 1, n_inact_orb + list_inact(i) = i + n_core_orb + list_inact_reverse(i+n_core_orb) = i + mo_class(i+n_core_orb) = "Inactive" + enddo + + list_act_reverse = 0 + do i = 1, n_act_orb + list_act(i) = n_core_inact_orb + i + list_act_reverse(n_core_inact_orb + i) = i + mo_class(n_core_inact_orb + i) = "Active" + enddo + + list_virt_reverse = 0 + do i = 1, n_virt_orb + list_virt(i) = n_core_inact_orb + n_act_orb + i + list_virt_reverse(n_core_inact_orb + n_act_orb + i) = i + mo_class(n_core_inact_orb + n_act_orb + i) = "Virtual" + enddo + touch list_core_reverse list_core list_inact list_inact_reverse list_act list_act_reverse list_virt list_virt_reverse + +end diff --git a/src/casscf_cipsi/save_energy.irp.f b/src/casscf_cipsi/save_energy.irp.f new file mode 100644 index 00000000..18750c3d --- /dev/null +++ b/src/casscf_cipsi/save_energy.irp.f @@ -0,0 +1,9 @@ +subroutine save_energy(E,pt2) + implicit none + BEGIN_DOC +! Saves the energy in |EZFIO|. + END_DOC + double precision, intent(in) :: E(N_states), pt2(N_states) + call ezfio_set_casscf_cipsi_energy(E(1:N_states)) + call ezfio_set_casscf_cipsi_energy_pt2(E(1:N_states)+pt2(1:N_states)) +end diff --git a/src/casscf_cipsi/superci_dm.irp.f b/src/casscf_cipsi/superci_dm.irp.f new file mode 100644 index 00000000..ee831c35 --- /dev/null +++ b/src/casscf_cipsi/superci_dm.irp.f @@ -0,0 +1,207 @@ + BEGIN_PROVIDER [double precision, super_ci_dm, (mo_num,mo_num)] + implicit none + BEGIN_DOC +! density matrix of the super CI matrix, in the basis of NATURAL ORBITALS OF THE CASCI WF +! +! This is obtained from annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 +! +! WARNING ::: in the equation B3.d there is a TYPO with a forgotten MINUS SIGN (see variable mat_tmp_dm_super_ci ) + END_DOC + super_ci_dm = 0.d0 + integer :: i,j,iorb,jorb + integer :: a,aorb,b,borb + integer :: t,torb,v,vorb,u,uorb,x,xorb + double precision :: c0,ci + c0 = SXeigenvec(1,1) + ! equation B3.a of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + ! loop over the core/inact + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(iorb,iorb) = 2.d0 ! first term of B3.a + ! loop over the core/inact + do j = 1, n_core_inact_orb + jorb = list_core_inact(j) + ! loop over the virtual + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(jorb,iorb) += -2.d0 * lowest_super_ci_coef_mo(aorb,iorb) * lowest_super_ci_coef_mo(aorb,jorb) ! second term in B3.a + enddo + do t = 1, n_act_orb + torb = list_act(t) + ! thrid term of the B3.a + super_ci_dm(jorb,iorb) += - lowest_super_ci_coef_mo(iorb,torb) * lowest_super_ci_coef_mo(jorb,torb) * (2.d0 - occ_act(t)) + enddo + enddo + enddo + + ! equation B3.b of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do t = 1, n_act_orb + torb = list_act(t) + super_ci_dm(iorb,torb) = c0 * lowest_super_ci_coef_mo(torb,iorb) * (2.d0 - occ_act(t)) + super_ci_dm(torb,iorb) = c0 * lowest_super_ci_coef_mo(torb,iorb) * (2.d0 - occ_act(t)) + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(iorb,torb) += - lowest_super_ci_coef_mo(aorb,iorb) * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + super_ci_dm(torb,iorb) += - lowest_super_ci_coef_mo(aorb,iorb) * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + enddo + enddo + enddo + + ! equation B3.c of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(aorb,iorb) = 2.d0 * c0 * lowest_super_ci_coef_mo(aorb,iorb) + super_ci_dm(iorb,aorb) = 2.d0 * c0 * lowest_super_ci_coef_mo(aorb,iorb) + enddo + enddo + + ! equation B3.d of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do t = 1, n_act_orb + torb = list_act(t) + super_ci_dm(torb,torb) = occ_act(t) ! first term of equation B3.d + do x = 1, n_act_orb + xorb = list_act(x) + super_ci_dm(torb,torb) += - occ_act(x) * occ_act(t)* mat_tmp_dm_super_ci(x,x) ! second term involving the ONE-rdm + enddo + do u = 1, n_act_orb + uorb = list_act(u) + + ! second term of equation B3.d + do x = 1, n_act_orb + xorb = list_act(x) + do v = 1, n_act_orb + vorb = list_act(v) + super_ci_dm(torb,uorb) += 2.d0 * P0tuvx_no(v,x,t,u) * mat_tmp_dm_super_ci(v,x) ! second term involving the TWO-rdm + enddo + enddo + + ! third term of equation B3.d + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(torb,uorb) += lowest_super_ci_coef_mo(iorb,torb) * lowest_super_ci_coef_mo(iorb,uorb) * (2.d0 - occ_act(t) - occ_act(u)) + enddo + + enddo + enddo + + ! equation B3.e of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do t = 1, n_act_orb + torb = list_act(t) + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(aorb,torb) += c0 * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + super_ci_dm(torb,aorb) += c0 * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(aorb,torb) += lowest_super_ci_coef_mo(iorb,aorb) * lowest_super_ci_coef_mo(iorb,torb) * (2.d0 - occ_act(t)) + super_ci_dm(torb,aorb) += lowest_super_ci_coef_mo(iorb,aorb) * lowest_super_ci_coef_mo(iorb,torb) * (2.d0 - occ_act(t)) + enddo + enddo + enddo + + ! equation B3.f of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do a = 1, n_virt_orb + aorb = list_virt(a) + do b = 1, n_virt_orb + borb= list_virt(b) + + ! First term of equation B3.f + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(borb,aorb) += 2.d0 * lowest_super_ci_coef_mo(iorb,aorb) * lowest_super_ci_coef_mo(iorb,borb) + enddo + + ! Second term of equation B3.f + do t = 1, n_act_orb + torb = list_act(t) + super_ci_dm(borb,aorb) += lowest_super_ci_coef_mo(torb,aorb) * lowest_super_ci_coef_mo(torb,borb) * occ_act(t) + enddo + enddo + enddo + + END_PROVIDER + + BEGIN_PROVIDER [double precision, superci_natorb, (ao_num,mo_num) +&BEGIN_PROVIDER [double precision, superci_nat_occ, (mo_num) + implicit none + call general_mo_coef_new_as_svd_vectors_of_mo_matrix_eig(super_ci_dm,mo_num,mo_num,mo_num,NatOrbsFCI,superci_nat_occ,superci_natorb) + +END_PROVIDER + + BEGIN_PROVIDER [double precision, mat_tmp_dm_super_ci, (n_act_orb,n_act_orb)] + implicit none + BEGIN_DOC + ! computation of the term in [ ] in the equation B3.d of Roos et. al. Chemical Physics 48 (1980) 157-173 + ! + ! !!!!! WARNING !!!!!! there is a TYPO: a MINUS SIGN SHOULD APPEAR in that term + END_DOC + integer :: a,aorb,i,iorb + integer :: x,xorb,v,vorb + mat_tmp_dm_super_ci = 0.d0 + do v = 1, n_act_orb + vorb = list_act(v) + do x = 1, n_act_orb + xorb = list_act(x) + do a = 1, n_virt_orb + aorb = list_virt(a) + mat_tmp_dm_super_ci(x,v) += lowest_super_ci_coef_mo(aorb,vorb) * lowest_super_ci_coef_mo(aorb,xorb) + enddo + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + ! MARK THE MINUS SIGN HERE !!!!!!!!!!! BECAUSE OF TYPO IN THE ORIGINAL PAPER + mat_tmp_dm_super_ci(x,v) -= lowest_super_ci_coef_mo(iorb,vorb) * lowest_super_ci_coef_mo(iorb,xorb) + enddo + enddo + enddo + END_PROVIDER + + BEGIN_PROVIDER [double precision, lowest_super_ci_coef_mo, (mo_num,mo_num)] + implicit none + integer :: i,j,iorb,jorb + integer :: a, aorb,t, torb + double precision :: sqrt2 + + sqrt2 = 1.d0/dsqrt(2.d0) + do i = 1, nMonoEx + iorb = excit(1,i) + jorb = excit(2,i) + lowest_super_ci_coef_mo(iorb,jorb) = SXeigenvec(i+1,1) + lowest_super_ci_coef_mo(jorb,iorb) = SXeigenvec(i+1,1) + enddo + + ! a_{it} of the equation B.2 of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do t = 1, n_act_orb + torb = list_act(t) + lowest_super_ci_coef_mo(torb,iorb) *= (2.d0 - occ_act(t))**(-0.5d0) + lowest_super_ci_coef_mo(iorb,torb) *= (2.d0 - occ_act(t))**(-0.5d0) + enddo + enddo + + ! a_{ia} of the equation B.2 of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do a = 1, n_virt_orb + aorb = list_virt(a) + lowest_super_ci_coef_mo(aorb,iorb) *= sqrt2 + lowest_super_ci_coef_mo(iorb,aorb) *= sqrt2 + enddo + enddo + + ! a_{ta} of the equation B.2 of Roos et. al. Chemical Physics 48 (1980) 157-173 + do a = 1, n_virt_orb + aorb = list_virt(a) + do t = 1, n_act_orb + torb = list_act(t) + lowest_super_ci_coef_mo(torb,aorb) *= occ_act(t)**(-0.5d0) + lowest_super_ci_coef_mo(aorb,torb) *= occ_act(t)**(-0.5d0) + enddo + enddo + + END_PROVIDER + diff --git a/src/casscf_cipsi/swap_orb.irp.f b/src/casscf_cipsi/swap_orb.irp.f new file mode 100644 index 00000000..49af207c --- /dev/null +++ b/src/casscf_cipsi/swap_orb.irp.f @@ -0,0 +1,132 @@ + BEGIN_PROVIDER [double precision, SXvector_lowest, (nMonoEx)] + implicit none + integer :: i + do i=2,nMonoEx+1 + SXvector_lowest(i-1)=SXeigenvec(i,1) + enddo + END_PROVIDER + + BEGIN_PROVIDER [double precision, thresh_overlap_switch] + implicit none + thresh_overlap_switch = 0.5d0 + END_PROVIDER + + BEGIN_PROVIDER [integer, max_overlap, (nMonoEx)] +&BEGIN_PROVIDER [integer, n_max_overlap] +&BEGIN_PROVIDER [integer, dim_n_max_overlap] + implicit none + double precision, allocatable :: vec_tmp(:) + integer, allocatable :: iorder(:) + allocate(vec_tmp(nMonoEx),iorder(nMonoEx)) + integer :: i + do i = 1, nMonoEx + iorder(i) = i + vec_tmp(i) = -dabs(SXvector_lowest(i)) + enddo + call dsort(vec_tmp,iorder,nMonoEx) + n_max_overlap = 0 + do i = 1, nMonoEx + if(dabs(vec_tmp(i)).gt.thresh_overlap_switch)then + n_max_overlap += 1 + max_overlap(n_max_overlap) = iorder(i) + endif + enddo + dim_n_max_overlap = max(1,n_max_overlap) + END_PROVIDER + + BEGIN_PROVIDER [integer, orb_swap, (2,dim_n_max_overlap)] +&BEGIN_PROVIDER [integer, index_orb_swap, (dim_n_max_overlap)] +&BEGIN_PROVIDER [integer, n_orb_swap ] + implicit none + use bitmasks ! you need to include the bitmasks_module.f90 features + integer :: i,imono,iorb,jorb,j + n_orb_swap = 0 + do i = 1, n_max_overlap + imono = max_overlap(i) + iorb = excit(1,imono) + jorb = excit(2,imono) + if (excit_class(imono) == "c-a" .and.hessmat(imono,imono).gt.0.d0)then ! core --> active rotation + n_orb_swap += 1 + orb_swap(1,n_orb_swap) = iorb ! core + orb_swap(2,n_orb_swap) = jorb ! active + index_orb_swap(n_orb_swap) = imono + else if (excit_class(imono) == "a-v" .and.hessmat(imono,imono).gt.0.d0)then ! active --> virtual rotation + n_orb_swap += 1 + orb_swap(1,n_orb_swap) = jorb ! virtual + orb_swap(2,n_orb_swap) = iorb ! active + index_orb_swap(n_orb_swap) = imono + endif + enddo + + integer,allocatable :: orb_swap_tmp(:,:) + allocate(orb_swap_tmp(2,dim_n_max_overlap)) + do i = 1, n_orb_swap + orb_swap_tmp(1,i) = orb_swap(1,i) + orb_swap_tmp(2,i) = orb_swap(2,i) + enddo + + integer(bit_kind), allocatable :: det_i(:),det_j(:) + allocate(det_i(N_int),det_j(N_int)) + logical, allocatable :: good_orb_rot(:) + allocate(good_orb_rot(n_orb_swap)) + integer, allocatable :: index_orb_swap_tmp(:) + allocate(index_orb_swap_tmp(dim_n_max_overlap)) + index_orb_swap_tmp = index_orb_swap + good_orb_rot = .True. + integer :: icount,k + do i = 1, n_orb_swap + if(.not.good_orb_rot(i))cycle + det_i = 0_bit_kind + call set_bit_to_integer(orb_swap(1,i),det_i,N_int) + call set_bit_to_integer(orb_swap(2,i),det_i,N_int) + do j = i+1, n_orb_swap + det_j = 0_bit_kind + call set_bit_to_integer(orb_swap(1,j),det_j,N_int) + call set_bit_to_integer(orb_swap(2,j),det_j,N_int) + icount = 0 + do k = 1, N_int + icount += popcnt(ior(det_i(k),det_j(k))) + enddo + if (icount.ne.4)then + good_orb_rot(i) = .False. + good_orb_rot(j) = .False. + exit + endif + enddo + enddo + icount = n_orb_swap + n_orb_swap = 0 + do i = 1, icount + if(good_orb_rot(i))then + n_orb_swap += 1 + index_orb_swap(n_orb_swap) = index_orb_swap_tmp(i) + orb_swap(1,n_orb_swap) = orb_swap_tmp(1,i) + orb_swap(2,n_orb_swap) = orb_swap_tmp(2,i) + endif + enddo + + if(n_orb_swap.gt.0)then + print*,'n_orb_swap = ',n_orb_swap + endif + do i = 1, n_orb_swap + print*,'imono = ',index_orb_swap(i) + print*,orb_swap(1,i),'-->',orb_swap(2,i) + enddo + END_PROVIDER + + BEGIN_PROVIDER [double precision, switch_mo_coef, (ao_num,mo_num)] + implicit none + integer :: i,j,iorb,jorb + switch_mo_coef = NatOrbsFCI + do i = 1, n_orb_swap + iorb = orb_swap(1,i) + jorb = orb_swap(2,i) + do j = 1, ao_num + switch_mo_coef(j,jorb) = NatOrbsFCI(j,iorb) + enddo + do j = 1, ao_num + switch_mo_coef(j,iorb) = NatOrbsFCI(j,jorb) + enddo + enddo + + END_PROVIDER diff --git a/src/casscf_cipsi/tot_en.irp.f b/src/casscf_cipsi/tot_en.irp.f new file mode 100644 index 00000000..1d70e087 --- /dev/null +++ b/src/casscf_cipsi/tot_en.irp.f @@ -0,0 +1,101 @@ + BEGIN_PROVIDER [real*8, etwo] +&BEGIN_PROVIDER [real*8, eone] +&BEGIN_PROVIDER [real*8, eone_bis] +&BEGIN_PROVIDER [real*8, etwo_bis] +&BEGIN_PROVIDER [real*8, etwo_ter] +&BEGIN_PROVIDER [real*8, ecore] +&BEGIN_PROVIDER [real*8, ecore_bis] + implicit none + integer :: t,u,v,x,i,ii,tt,uu,vv,xx,j,jj,t3,u3,v3,x3 + real*8 :: e_one_all,e_two_all + e_one_all=0.D0 + e_two_all=0.D0 + do i=1,n_core_inact_orb + ii=list_core_inact(i) + e_one_all+=2.D0*mo_one_e_integrals(ii,ii) + do j=1,n_core_inact_orb + jj=list_core_inact(j) + e_two_all+=2.D0*bielec_PQxx(ii,ii,j,j)-bielec_PQxx(ii,jj,j,i) + end do + do t=1,n_act_orb + tt=list_act(t) + t3=t+n_core_inact_orb + do u=1,n_act_orb + uu=list_act(u) + u3=u+n_core_inact_orb + e_two_all+=D0tu(t,u)*(2.D0*bielec_PQxx(tt,uu,i,i) & + -bielec_PQxx(tt,ii,i,u3)) + end do + end do + end do + do t=1,n_act_orb + tt=list_act(t) + do u=1,n_act_orb + uu=list_act(u) + e_one_all+=D0tu(t,u)*mo_one_e_integrals(tt,uu) + do v=1,n_act_orb + v3=v+n_core_inact_orb + do x=1,n_act_orb + x3=x+n_core_inact_orb + e_two_all +=P0tuvx(t,u,v,x)*bielec_PQxx(tt,uu,v3,x3) + end do + end do + end do + end do + ecore =nuclear_repulsion + ecore_bis=nuclear_repulsion + do i=1,n_core_inact_orb + ii=list_core_inact(i) + ecore +=2.D0*mo_one_e_integrals(ii,ii) + ecore_bis+=2.D0*mo_one_e_integrals(ii,ii) + do j=1,n_core_inact_orb + jj=list_core_inact(j) + ecore +=2.D0*bielec_PQxx(ii,ii,j,j)-bielec_PQxx(ii,jj,j,i) + ecore_bis+=2.D0*bielec_PxxQ(ii,i,j,jj)-bielec_PxxQ(ii,j,j,ii) + end do + end do + eone =0.D0 + eone_bis=0.D0 + etwo =0.D0 + etwo_bis=0.D0 + etwo_ter=0.D0 + do t=1,n_act_orb + tt=list_act(t) + t3=t+n_core_inact_orb + do u=1,n_act_orb + uu=list_act(u) + u3=u+n_core_inact_orb + eone +=D0tu(t,u)*mo_one_e_integrals(tt,uu) + eone_bis+=D0tu(t,u)*mo_one_e_integrals(tt,uu) + do i=1,n_core_inact_orb + ii=list_core_inact(i) + eone +=D0tu(t,u)*(2.D0*bielec_PQxx(tt,uu,i,i) & + -bielec_PQxx(tt,ii,i,u3)) + eone_bis+=D0tu(t,u)*(2.D0*bielec_PxxQ(tt,u3,i,ii) & + -bielec_PxxQ(tt,i,i,uu)) + end do + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + real*8 :: h1,h2,h3 + h1=bielec_PQxx(tt,uu,v3,x3) + h2=bielec_PxxQ(tt,u3,v3,xx) + h3=bielecCI(t,u,v,xx) + etwo +=P0tuvx(t,u,v,x)*h1 + etwo_bis+=P0tuvx(t,u,v,x)*h2 + etwo_ter+=P0tuvx(t,u,v,x)*h3 + if ((h1.ne.h2).or.(h1.ne.h3)) then + write(6,9901) t,u,v,x,h1,h2,h3 + 9901 format('aie: ',4I4,3E20.12) + end if + end do + end do + end do + end do + +END_PROVIDER + + diff --git a/src/casscf_tc_bi/det_manip.irp.f b/src/casscf_tc_bi/det_manip.irp.f new file mode 100644 index 00000000..d8c309a4 --- /dev/null +++ b/src/casscf_tc_bi/det_manip.irp.f @@ -0,0 +1,125 @@ +use bitmasks + +subroutine do_signed_mono_excitation(key1,key2,nu,ihole,ipart, & + ispin,phase,ierr) + BEGIN_DOC + ! we create the mono-excitation, and determine, if possible, + ! the phase and the number in the list of determinants + END_DOC + implicit none + integer(bit_kind) :: key1(N_int,2),key2(N_int,2) + integer(bit_kind), allocatable :: keytmp(:,:) + integer :: exc(0:2,2,2),ihole,ipart,ierr,nu,ispin + real*8 :: phase + logical :: found + allocate(keytmp(N_int,2)) + + nu=-1 + phase=1.D0 + ierr=0 + call det_copy(key1,key2,N_int) + ! write(6,*) ' key2 before excitation ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + call do_single_excitation(key2,ihole,ipart,ispin,ierr) + ! write(6,*) ' key2 after ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + ! write(6,*) ' excitation ',ihole,' -> ',ipart,' gives ierr = ',ierr + if (ierr.eq.1) then + ! excitation is possible + ! get the phase + call get_single_excitation(key1,key2,exc,phase,N_int) + ! get the number in the list + found=.false. + nu=0 + + !TODO BOTTLENECK + do while (.not.found) + nu+=1 + if (nu.gt.N_det) then + ! the determinant is possible, but not in the list + found=.true. + nu=-1 + else + call det_extract(keytmp,nu,N_int) + integer :: i,ii + found=.true. + do ii=1,2 + do i=1,N_int + if (keytmp(i,ii).ne.key2(i,ii)) then + found=.false. + end if + end do + end do + end if + end do + end if + ! + ! we found the new string, the phase, and possibly the number in the list + ! +end subroutine do_signed_mono_excitation + +subroutine det_extract(key,nu,Nint) + BEGIN_DOC + ! extract a determinant from the list of determinants + END_DOC + implicit none + integer :: ispin,i,nu,Nint + integer(bit_kind) :: key(Nint,2) + do ispin=1,2 + do i=1,Nint + key(i,ispin)=psi_det(i,ispin,nu) + end do + end do +end subroutine det_extract + +subroutine det_copy(key1,key2,Nint) + use bitmasks ! you need to include the bitmasks_module.f90 features + BEGIN_DOC + ! copy a determinant from key1 to key2 + END_DOC + implicit none + integer :: ispin,i,Nint + integer(bit_kind) :: key1(Nint,2),key2(Nint,2) + do ispin=1,2 + do i=1,Nint + key2(i,ispin)=key1(i,ispin) + end do + end do +end subroutine det_copy + +subroutine do_spinfree_mono_excitation(key_in,key_out1,key_out2 & + ,nu1,nu2,ihole,ipart,phase1,phase2,ierr,jerr) + BEGIN_DOC + ! we create the spin-free mono-excitation E_pq=(a^+_p a_q + a^+_P a_Q) + ! we may create two determinants as result + ! + END_DOC + implicit none + integer(bit_kind) :: key_in(N_int,2),key_out1(N_int,2) + integer(bit_kind) :: key_out2(N_int,2) + integer :: ihole,ipart,ierr,jerr,nu1,nu2 + integer :: ispin + real*8 :: phase1,phase2 + + ! write(6,*) ' applying E_',ipart,ihole,' on determinant ' + ! call print_det(key_in,N_int) + + ! spin alpha + ispin=1 + call do_signed_mono_excitation(key_in,key_out1,nu1,ihole & + ,ipart,ispin,phase1,ierr) + ! if (ierr.eq.1) then + ! write(6,*) ' 1 result is ',nu1,phase1 + ! call print_det(key_out1,N_int) + ! end if + ! spin beta + ispin=2 + call do_signed_mono_excitation(key_in,key_out2,nu2,ihole & + ,ipart,ispin,phase2,jerr) + ! if (jerr.eq.1) then + ! write(6,*) ' 2 result is ',nu2,phase2 + ! call print_det(key_out2,N_int) + ! end if + +end subroutine do_spinfree_mono_excitation + diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f new file mode 100644 index 00000000..047b5718 --- /dev/null +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -0,0 +1,263 @@ + BEGIN_PROVIDER [real*8, gradvec_tc_r, (0:3,nMonoEx)] +&BEGIN_PROVIDER [real*8, gradvec_tc_l, (0:3,nMonoEx)] + BEGIN_DOC +! gradvec_tc_r(0:3,i) = +! +! gradvec_tc_l(0:3,i) = +! +! where the indices "i" corresponds to E_q^p(i) +! +! i = mat_idx_c_a(q,p) +! +! and gradvec_tc_r/l(0) = full matrix element +! +! gradvec_tc_r/l(1) = one-body part + +! gradvec_tc_r/l(2) = two-body part + +! gradvec_tc_r/l(3) = three-body part + END_DOC + implicit none + integer :: ii,tt,aa,indx + integer :: i,t,a,fff + double precision :: res_l(0:3), res_r(0:3) + gradvec_tc_l = 0.d0 + gradvec_tc_r = 0.d0 + ! computing the core/inactive --> virtual orbitals gradients + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do t=1,n_act_orb + tt=list_act(t) + indx = mat_idx_c_a(i,t) + call gradvec_tc_it(ii,tt,res_l,res_r) + do fff = 0,3 + gradvec_tc_l(fff,indx)=res_l(fff) + gradvec_tc_r(fff,indx)=res_r(fff) + enddo + end do + end do + + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do a=1,n_virt_orb + indx = mat_idx_c_v(i,a) + aa=list_virt(a) + call gradvec_tc_ia(ii,aa,res_l,res_r) + do fff = 0,3 + gradvec_tc_l(fff,indx)=res_l(fff) + gradvec_tc_r(fff,indx)=res_r(fff) + enddo + end do + end do + +! print*,'DM grad' + do t=1,n_act_orb + tt=list_act(t) + do a=1,n_virt_orb + aa=list_virt(a) + indx = mat_idx_a_v(t,a) +! print*,indx,t,a + call gradvec_tc_ta(tt,aa,res_l, res_r) + do fff = 0,3 + gradvec_tc_l(fff,indx)=res_l(fff) + gradvec_tc_r(fff,indx)=res_r(fff) + enddo + end do + end do +END_PROVIDER + +subroutine gradvec_tc_ia(i,a,res_l, res_r) + implicit none + BEGIN_DOC +! doubly occupied --> virtual TC gradient +! +! Corresponds to res_r = , +! +! res_l = + END_DOC + integer, intent(in) :: i,a + double precision, intent(out) :: res_l(0:3), res_r(0:3) + res_l = 0.d0 + res_r = 0.d0 + res_l(1) = -2 * mo_bi_ortho_tc_one_e(a,i) + res_r(1) = -2 * mo_bi_ortho_tc_one_e(i,a) + integer :: j,t,r,jj,tt,rr + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + res_r(2) += -2.d0 * ( 2.d0 * mo_bi_ortho_tc_two_e(j,i,j,a) - mo_bi_ortho_tc_two_e(i,j,j,a)) + res_l(2) -= -2.d0 * ( 2.d0 * mo_bi_ortho_tc_two_e(j,a,j,i) - mo_bi_ortho_tc_two_e(j,a,i,j)) + enddo + do tt = 1, n_act_orb + t = list_act(tt) + do rr = 1, n_act_orb + r = list_act(rr) + res_r(2) += -0.5d0 * ( & + tc_transition_matrix_mo(r,t,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,i,t,a) - mo_bi_ortho_tc_two_e(i,r,t,a)) & + +tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,i,r,a) - mo_bi_ortho_tc_two_e(i,t,r,a)) & + ) + res_l(2) -= -0.5d0 * ( & + tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,a,r,i) - mo_bi_ortho_tc_two_e(t,a,i,r)) & + +tc_transition_matrix_mo(r,t,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,a,t,i) - mo_bi_ortho_tc_two_e(r,a,i,t)) & + ) + enddo + enddo + res_r(0) = res_r(1) + res_r(2) + res_r(3) + res_l(0) = res_l(1) + res_l(2) + res_l(3) +end + +subroutine gradvec_tc_it(i,t,res_l, res_r) + implicit none + BEGIN_DOC +! doubly occupied --> active TC gradient +! +! Corresponds to res_r = +! +! res_l = + END_DOC + integer, intent(in) :: i,t + double precision, intent(out) :: res_l(0:3),res_r(0:3) + integer :: rr,r,j,jj,u,uu,v,vv + res_r = 0.d0 + res_l = 0.d0 + res_r(1) += -2.d0 * mo_bi_ortho_tc_one_e(i,t) + res_l(1) -= -2.D0 * mo_bi_ortho_tc_one_e(t,i) + + do rr = 1, n_act_orb + r = list_act(rr) + res_r(1) += mo_bi_ortho_tc_one_e(i,r) * tc_transition_matrix_mo(t,r,1,1) + res_l(1) -= mo_bi_ortho_tc_one_e(r,i) * tc_transition_matrix_mo(r,t,1,1) + enddo + + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + res_r(2) += -2.d0 * (2d0 * mo_bi_ortho_tc_two_e(i,j,t,j) - mo_bi_ortho_tc_two_e(j,i,t,j)) + res_l(2) -= -2.d0 * (2d0 * mo_bi_ortho_tc_two_e(t,j,i,j) - mo_bi_ortho_tc_two_e(t,j,j,i)) + do rr = 1, n_act_orb + r = list_act(rr) + res_r(2) += tc_transition_matrix_mo(t,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(i,j,r,j) - mo_bi_ortho_tc_two_e(i,j,j,r)) + res_l(2) -= tc_transition_matrix_mo(r,t,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(r,j,i,j) - mo_bi_ortho_tc_two_e(j,r,j,i)) + enddo + enddo + do rr = 1, n_act_orb + r = list_act(rr) + do uu = 1, n_act_orb + u = list_act(uu) + res_r(2) += -0.5d0 * ( & + tc_transition_matrix_mo(u,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(u,i,r,t) - mo_bi_ortho_tc_two_e(u,i,t,r)) & + + tc_transition_matrix_mo(r,u,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(i,r,t,u) - mo_bi_ortho_tc_two_e(i,r,u,t)) & + ) + res_l(2) -= -0.5d0 * ( & + tc_transition_matrix_mo(r,u,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(r,t,u,i) - mo_bi_ortho_tc_two_e(t,r,u,i)) & + + tc_transition_matrix_mo(u,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(t,u,i,r) - mo_bi_ortho_tc_two_e(u,t,i,r)) & + ) + do vv = 1, n_act_orb + v = list_act(vv) + res_r(2) += 0.5d0 * ( & + mo_bi_ortho_tc_two_e(i,r,v,u) * tc_two_rdm(t,r,v,u) + mo_bi_ortho_tc_two_e(r,i,v,u) * tc_two_rdm(r,t,v,u) ) + res_l(2) -= 0.5d0 * ( & + mo_bi_ortho_tc_two_e(v,u,i,r) * tc_two_rdm(v,u,t,r) + mo_bi_ortho_tc_two_e(v,u,r,i) * tc_two_rdm(v,u,r,t) ) + enddo + enddo + enddo + res_r(0) = res_r(1) + res_r(2) + res_r(3) + res_l(0) = res_l(1) + res_l(2) + res_l(3) +end + +subroutine gradvec_tc_ta(t,a,res_l, res_r) + implicit none + BEGIN_DOC +! active --> virtual TC gradient +! +! Corresponds to res_r = +! +! res_l = + END_DOC + integer, intent(in) :: t,a + double precision, intent(out) :: res_l(0:3),res_r(0:3) + integer :: rr,r,j,jj,u,uu,v,vv + double precision :: res_r_inact_test, res_r_act_test + double precision :: res_l_inact_test, res_l_act_test + res_r = 0.d0 + res_l = 0.d0 + do rr = 1, n_act_orb + r = list_act(rr) + res_l(1) += mo_bi_ortho_tc_one_e(a,r) * tc_transition_matrix_mo(t,r,1,1) + res_r(1) -= mo_bi_ortho_tc_one_e(r,a) * tc_transition_matrix_mo(r,t,1,1) + enddo + + res_r_inact_test = 0.d0 + res_l_inact_test = 0.d0 + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + do rr = 1, n_act_orb + r = list_act(rr) + res_r_inact_test += -tc_transition_matrix_mo(r,t,1,1) * & + (2.d0 * mo_bi_ortho_tc_two_e(r,j,a,j) - mo_bi_ortho_tc_two_e(r,j,j,a)) + res_l_inact_test -= -tc_transition_matrix_mo(t,r,1,1) * & + (2.d0 * mo_bi_ortho_tc_two_e(a,j,r,j) - mo_bi_ortho_tc_two_e(j,a,r,j)) + enddo + enddo + res_r_act_test = 0.d0 + res_l_act_test = 0.d0 + do rr = 1, n_act_orb + r = list_act(rr) + do vv = 1, n_act_orb + v = list_act(vv) + do uu = 1, n_act_orb + u = list_act(uu) + res_r_act_test += - (mo_bi_ortho_tc_two_e(v,r,u,a) * tc_two_rdm(r,v,t,u) & + +mo_bi_ortho_tc_two_e(v,r,a,u) * tc_two_rdm(r,v,u,t)) + res_l_act_test -= - (mo_bi_ortho_tc_two_e(u,a,v,r) * tc_two_rdm(t,u,r,v) & + +mo_bi_ortho_tc_two_e(a,u,v,r) * tc_two_rdm(u,t,r,v)) + enddo + enddo + enddo + res_r_act_test *= 0.5d0 + res_l_act_test *= 0.5d0 + res_r(2) = res_r_inact_test + res_r_act_test + res_l(2) = res_l_inact_test + res_l_act_test + + integer :: m,x,y + double precision :: res_r_inact, res_r_act + if(.False.)then + ! test quantities + res_r_inact = 0.d0 + res_r_act = 0.d0 + do m = 1, mo_num + do x = 1, mo_num + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + res_r_inact += 0.5d0 * mo_bi_ortho_tc_two_e(t,j,m,x) * tc_two_rdm(a,j,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(m,j,a,x) * tc_two_rdm(m,j,t,x) & + +0.5d0 * mo_bi_ortho_tc_two_e(j,t,m,x) * tc_two_rdm(j,a,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(x,j,m,a) * tc_two_rdm(x,j,m,t) + enddo + do rr = 1, n_act_orb + r = list_act(rr) + res_r_act += 0.5d0 * mo_bi_ortho_tc_two_e(t,r,m,x) * tc_two_rdm(a,r,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(m,r,a,x) * tc_two_rdm(m,r,t,x) & + +0.5d0 * mo_bi_ortho_tc_two_e(r,t,m,x) * tc_two_rdm(r,a,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(x,r,m,a) * tc_two_rdm(x,r,m,t) + enddo + enddo + enddo + if(dabs(res_r_inact).gt.1.d-12)then + if(dabs(res_r_inact_test - res_r_inact).gt.1.d-10)then + print*,'inact' + print*,'t,a',t,a + print*,res_r_inact_test , res_r_inact, dabs(res_r_inact_test - res_r_inact) + endif + endif + if(dabs(res_r_act).gt.1.d-12)then + if(dabs(res_r_act_test - res_r_act).gt.1.d-10)then + print*,'act' + print*,'t,a',t,a + print*,res_r_act_test , res_r_act, dabs(res_r_act_test - res_r_act) + endif + endif + endif + + res_r(0) = res_r(1) + res_r(2) + res_r(3) + res_l(0) = res_l(1) + res_l(2) + res_l(3) + +end diff --git a/src/casscf_tc_bi/grad_old.irp.f b/src/casscf_tc_bi/grad_old.irp.f new file mode 100644 index 00000000..3f0ffb5e --- /dev/null +++ b/src/casscf_tc_bi/grad_old.irp.f @@ -0,0 +1,134 @@ + + BEGIN_PROVIDER [real*8, gradvec_detail_right_old, (0:3,nMonoEx)] +&BEGIN_PROVIDER [real*8, gradvec_detail_left_old, (0:3,nMonoEx)] + BEGIN_DOC + ! calculate the orbital gradient by hand, i.e. for + ! each determinant I we determine the string E_pq |I> (alpha and beta + ! separately) and generate + ! sum_I c_I is then the pq component of the orbital + ! gradient + ! E_pq = a^+_pa_q + a^+_Pa_Q + END_DOC + implicit none + integer :: ii,tt,aa,indx,ihole,ipart,istate,ll + real*8 :: res_l(0:3), res_r(0:3) + + do ii = 1, n_core_inact_orb + ihole = list_core_inact(ii) + do aa = 1, n_virt_orb + ipart = list_virt(aa) + indx = mat_idx_c_v(ii,aa) + call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + do ll = 0, 3 + gradvec_detail_left_old (ll,indx)=res_l(ll) + gradvec_detail_right_old(ll,indx)=res_r(ll) + enddo + enddo + enddo + + do ii = 1, n_core_inact_orb + ihole = list_core_inact(ii) + do tt = 1, n_act_orb + ipart = list_act(tt) + indx = mat_idx_c_a(ii,tt) + call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + do ll = 0, 3 + gradvec_detail_left_old (ll,indx)=res_l(ll) + gradvec_detail_right_old(ll,indx)=res_r(ll) + enddo + enddo + enddo + +! print*,'old grad' + do tt = 1, n_act_orb + ihole = list_act(tt) + do aa = 1, n_virt_orb + ipart = list_virt(aa) + indx = mat_idx_a_v(tt,aa) +! print*,indx,tt,aa + call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + do ll = 0, 3 + gradvec_detail_left_old (ll,indx)=res_l(ll) + gradvec_detail_right_old(ll,indx)=res_r(ll) + enddo + enddo + enddo + + real*8 :: norm_grad_left, norm_grad_right + norm_grad_left=0.d0 + norm_grad_right=0.d0 + do indx=1,nMonoEx + norm_grad_left+=gradvec_detail_left_old(0,indx)*gradvec_detail_left_old(0,indx) + norm_grad_right+=gradvec_detail_right_old(0,indx)*gradvec_detail_right_old(0,indx) + end do + norm_grad_left=sqrt(norm_grad_left) + norm_grad_right=sqrt(norm_grad_right) +! if (bavard) then + write(6,*) + write(6,*) ' Norm of the LEFT orbital gradient (via <0|EH|0>) : ', norm_grad_left + write(6,*) ' Norm of the RIGHT orbital gradient (via <0|HE|0>) : ', norm_grad_right + write(6,*) +! endif + + +END_PROVIDER + +subroutine calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + BEGIN_DOC + ! Computes the gradient with respect to orbital rotation BRUT FORCE + ! + ! res_l = + ! + ! res_r = + ! + ! q=hole, p=particle. NOTE that on res_l it is E_qp and on res_r it is E_pq + ! + ! res_l(0) = total matrix element, res_l(1) = one-electron part, + ! + ! res_l(2) = two-electron part, res_l(3) = three-electron part + ! + END_DOC + implicit none + integer, intent(in) :: ihole,ipart + double precision, intent(out) :: res_l(0:3), res_r(0:3) + integer :: mu,iii,ispin,ierr,nu,istate,ll + integer(bit_kind), allocatable :: det_mu(:,:),det_mu_ex(:,:) + real*8 :: chi_H_mu_ex_array(0:3,N_states),mu_ex_H_phi_array(0:3,N_states),phase + allocate(det_mu(N_int,2)) + allocate(det_mu_ex(N_int,2)) + + res_l=0.D0 + res_r=0.D0 + + do mu=1,n_det + ! get the string of the determinant |mu> + call det_extract(det_mu,mu,N_int) + do ispin=1,2 + ! do the monoexcitation on it: |det_mu_ex> = a^dagger_{p,ispin} a_{q,ispin} |mu> + call det_copy(det_mu,det_mu_ex,N_int) + call do_signed_mono_excitation(det_mu,det_mu_ex,nu & + ,ihole,ipart,ispin,phase,ierr) + ! |det_mu_ex> = a^dagger_{p,ispin} a_{q,ispin} |mu> + if (ierr.eq.1) then + call i_H_tc_psi_phi(det_mu_ex,psi_det,psi_l_coef_bi_ortho,psi_r_coef_bi_ortho,N_int & + ,N_det,psi_det_size,N_states,chi_H_mu_ex_array,mu_ex_H_phi_array) + ! chi_H_mu_ex_array = + ! mu_ex_H_phi_array = + do istate=1,N_states + do ll = 0,3 ! loop over the body components (1e,2e,3e) + !res_l = \sum_mu c_mu^l = + res_l(ll)+= mu_ex_H_phi_array(ll,istate)*psi_l_coef_bi_ortho(mu,istate)*phase + !res_r = \sum_mu c_mu^r = + res_r(ll)+= chi_H_mu_ex_array(ll,istate)*psi_r_coef_bi_ortho(mu,istate)*phase + enddo + end do + end if + end do + end do + + ! state-averaged gradient + res_l*=1.d0/dble(N_states) + res_r*=1.d0/dble(N_states) + +end + diff --git a/src/casscf_tc_bi/gradient.irp.f b/src/casscf_tc_bi/gradient.irp.f new file mode 100644 index 00000000..630bd891 --- /dev/null +++ b/src/casscf_tc_bi/gradient.irp.f @@ -0,0 +1,94 @@ +use bitmasks + +BEGIN_PROVIDER [ integer, nMonoEx ] + BEGIN_DOC + ! Number of single excitations + END_DOC + implicit none + nMonoEx=n_core_inact_orb*n_act_orb+n_core_inact_orb*n_virt_orb+n_act_orb*n_virt_orb +END_PROVIDER + + BEGIN_PROVIDER [integer, n_c_a_prov] +&BEGIN_PROVIDER [integer, n_c_v_prov] +&BEGIN_PROVIDER [integer, n_a_v_prov] + implicit none + n_c_a_prov = n_core_inact_orb * n_act_orb + n_c_v_prov = n_core_inact_orb * n_virt_orb + n_a_v_prov = n_act_orb * n_virt_orb + END_PROVIDER + + BEGIN_PROVIDER [integer, excit, (2,nMonoEx)] +&BEGIN_PROVIDER [character*3, excit_class, (nMonoEx)] +&BEGIN_PROVIDER [integer, list_idx_c_a, (3,n_c_a_prov) ] +&BEGIN_PROVIDER [integer, list_idx_c_v, (3,n_c_v_prov) ] +&BEGIN_PROVIDER [integer, list_idx_a_v, (3,n_a_v_prov) ] +&BEGIN_PROVIDER [integer, mat_idx_c_a, (n_core_inact_orb,n_act_orb) +&BEGIN_PROVIDER [integer, mat_idx_c_v, (n_core_inact_orb,n_virt_orb) +&BEGIN_PROVIDER [integer, mat_idx_a_v, (n_act_orb,n_virt_orb) + BEGIN_DOC + ! a list of the orbitals involved in the excitation + END_DOC + + implicit none + integer :: i,t,a,ii,tt,aa,indx,indx_tmp + indx=0 + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do tt=1,n_act_orb + t=list_act(tt) + indx+=1 + excit(1,indx)=i + excit(2,indx)=t + excit_class(indx)='c-a' + indx_tmp += 1 + list_idx_c_a(1,indx_tmp) = indx + list_idx_c_a(2,indx_tmp) = ii + list_idx_c_a(3,indx_tmp) = tt + mat_idx_c_a(ii,tt) = indx + end do + end do + + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=i + excit(2,indx)=a + excit_class(indx)='c-v' + indx_tmp += 1 + list_idx_c_v(1,indx_tmp) = indx + list_idx_c_v(2,indx_tmp) = ii + list_idx_c_v(3,indx_tmp) = aa + mat_idx_c_v(ii,aa) = indx + end do + end do + + indx_tmp = 0 + do tt=1,n_act_orb + t=list_act(tt) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=t + excit(2,indx)=a + excit_class(indx)='a-v' + indx_tmp += 1 + list_idx_a_v(1,indx_tmp) = indx + list_idx_a_v(2,indx_tmp) = tt + list_idx_a_v(3,indx_tmp) = aa + mat_idx_a_v(tt,aa) = indx + end do + end do + +! if (bavard) then + write(6,*) ' Filled the table of the Monoexcitations ' + do indx=1,nMonoEx + write(6,*) ' ex ',indx,' : ',excit(1,indx),' -> ' & + ,excit(2,indx),' ',excit_class(indx) + end do +! end if + +END_PROVIDER diff --git a/src/casscf_tc_bi/test_tc_casscf.irp.f b/src/casscf_tc_bi/test_tc_casscf.irp.f new file mode 100644 index 00000000..baa50c0f --- /dev/null +++ b/src/casscf_tc_bi/test_tc_casscf.irp.f @@ -0,0 +1,252 @@ +program tc_bi_ortho + + BEGIN_DOC + ! + ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together + ! with the energy. Saves the left-right wave functions at the end. + ! + END_DOC + + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + read_wf = .True. + touch read_wf + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + print*, ' nb of states = ', N_states + print*, ' nb of det = ', N_det + +! call routine_i_h_psi +! call routine_grad + call routine_grad_num_dm_one_body +end + +subroutine routine_i_h_psi + implicit none + integer :: i,j + double precision :: i_H_chi_array(0:3,N_states),i_H_phi_array(0:3,N_states) + double precision :: hmono, htwoe, hthree, htot + double precision :: accu_l_hmono, accu_l_htwoe, accu_l_hthree, accu_l_htot + double precision :: accu_r_hmono, accu_r_htwoe, accu_r_hthree, accu_r_htot + double precision :: test_l_hmono, test_l_htwoe, test_l_hthree, test_l_htot + double precision :: test_r_hmono, test_r_htwoe, test_r_hthree, test_r_htot + + test_l_hmono = 0.d0 + test_l_htwoe = 0.d0 + test_l_hthree= 0.d0 + test_l_htot = 0.d0 + test_r_hmono = 0.d0 + test_r_htwoe = 0.d0 + test_r_hthree= 0.d0 + test_r_htot = 0.d0 + + do i = 1, N_det + call i_H_tc_psi_phi(psi_det(1,1,i),psi_det,psi_l_coef_bi_ortho,psi_r_coef_bi_ortho,& + N_int,N_det,N_det,N_states,i_H_chi_array,i_H_phi_array) + accu_l_hmono = 0.d0 + accu_l_htwoe = 0.d0 + accu_l_hthree= 0.d0 + accu_l_htot = 0.d0 + accu_r_hmono = 0.d0 + accu_r_htwoe = 0.d0 + accu_r_hthree= 0.d0 + accu_r_htot = 0.d0 + do j = 1, N_det + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + accu_l_hmono += psi_l_coef_bi_ortho(j,1) * hmono + accu_l_htwoe += psi_l_coef_bi_ortho(j,1) * htwoe + accu_l_hthree += psi_l_coef_bi_ortho(j,1) * hthree + accu_l_htot += psi_l_coef_bi_ortho(j,1) * htot + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + accu_r_hmono += psi_r_coef_bi_ortho(j,1) * hmono + accu_r_htwoe += psi_r_coef_bi_ortho(j,1) * htwoe + accu_r_hthree += psi_r_coef_bi_ortho(j,1) * hthree + accu_r_htot += psi_r_coef_bi_ortho(j,1) * htot + enddo + test_l_htot += dabs(i_H_chi_array(0,1)-accu_l_htot) + test_l_hmono += dabs(i_H_chi_array(1,1)-accu_l_hmono) + test_l_htwoe += dabs(i_H_chi_array(2,1)-accu_l_htwoe) + test_l_hthree += dabs(i_H_chi_array(3,1)-accu_l_hthree) + + test_r_htot += dabs(i_H_phi_array(0,1)-accu_r_htot) + test_r_hmono += dabs(i_H_phi_array(1,1)-accu_r_hmono) + test_r_htwoe += dabs(i_H_phi_array(2,1)-accu_r_htwoe) + test_r_hthree += dabs(i_H_phi_array(3,1)-accu_r_hthree) + + enddo + + test_l_htot *= 1.D0/dble(N_det) + test_l_hmono *= 1.D0/dble(N_det) + test_l_htwoe *= 1.D0/dble(N_det) + test_l_hthree *= 1.D0/dble(N_det) + + test_r_htot *= 1.D0/dble(N_det) + test_r_hmono *= 1.D0/dble(N_det) + test_r_htwoe *= 1.D0/dble(N_det) + test_r_hthree *= 1.D0/dble(N_det) + + print*,'**************************' + print*,'test_l_htot = ',test_l_htot + print*,'test_l_hmono = ',test_l_hmono + print*,'test_l_htwoe = ',test_l_htwoe + print*,'test_l_hthree = ',test_l_hthree + print*,'**************************' + print*,'test_r_htot = ',test_r_htot + print*,'test_r_hmono = ',test_r_hmono + print*,'test_r_htwoe = ',test_r_htwoe + print*,'test_r_hthree = ',test_r_hthree + +end + +subroutine routine_grad_num + implicit none + integer :: indx,ihole,ipart + integer :: p,q + double precision :: accu_l, accu_r + double precision :: contrib_l, contrib_r + + accu_l = 0.d0 + accu_r = 0.d0 + do indx=1,nMonoEx + q = excit(1,indx) + p = excit(2,indx) + contrib_l = dabs(dabs(gradvec_detail_left_old(0,indx)) - 2.D0 * dabs( Fock_matrix_tc_mo_tot(q,p))) + contrib_r = dabs(dabs(gradvec_detail_right_old(0,indx)) -2.D0 * dabs( Fock_matrix_tc_mo_tot(p,q))) + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,indx,q,p + print*,gradvec_detail_left_old(0,indx),gradvec_detail_right_old(0,indx) + print*,2.D0* Fock_matrix_tc_mo_tot(q,p), 2.d0* Fock_matrix_tc_mo_tot(p,q) + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + +! do i = 1, nMonoEx +! print*,i,gradvec_old(i) +! enddo + +end + +subroutine routine_grad_num_dm_one_body + implicit none + integer :: indx,ii,i,a,aa,tt,t,ibody + double precision :: accu_l, accu_r,ref_r, new_r, ref_l, new_l + double precision :: contrib_l, contrib_r + double precision :: res_l(0:3),res_r(0:3) + + ibody = 2 ! check only the two-body term + provide gradvec_detail_left_old gradvec_tc_l + if(.True.)then + print*,'**************************' + print*,'**************************' + print*,'testing inactive-->virtual' + accu_l = 0.d0 + accu_r = 0.d0 + do ii = 1, n_core_inact_orb + do aa = 1, n_virt_orb + indx = mat_idx_c_v(ii,aa) + ref_l = gradvec_detail_left_old(ibody,indx) + new_l = gradvec_tc_l(ibody,indx) + contrib_l = dabs(dabs(ref_l) - dabs(new_l)) + ref_r = gradvec_detail_right_old(ibody,indx) + new_r = gradvec_tc_r(ibody,indx) + contrib_r = dabs(dabs(ref_r) - dabs(new_r)) + i = list_core_inact(ii) + a = list_virt(aa) +! if(i==1.and.a==9)then +! print*,i,a,ref_r, new_r +! stop +! endif + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,'---------' + print*,'warning !' + print*,indx,i,a,ii,aa + print*,ref_l, new_l, contrib_l + print*,ref_r, new_r, contrib_r + print*,gradvec_detail_left_old(0,indx),gradvec_tc_l(0,indx) + print*,gradvec_detail_right_old(0,indx),gradvec_tc_r(0,indx) + print*,'---------' + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + print*,'**************************' + print*,'**************************' + endif + + ibody = 2 ! check only the two-body term + if(.True.)then + print*,'**************************' + print*,'**************************' + print*,'testing inactive-->active' + accu_l = 0.d0 + accu_r = 0.d0 + do ii = 1, n_core_inact_orb + do tt = 1, n_act_orb + indx = mat_idx_c_a(ii,tt) + ref_l = gradvec_detail_left_old(ibody,indx) + new_l = gradvec_tc_l(ibody,indx) + contrib_l = dabs(dabs(ref_l) - dabs(new_l)) + ref_r = gradvec_detail_right_old(ibody,indx) + new_r = gradvec_tc_r(ibody,indx) + contrib_r = dabs(dabs(ref_r) - dabs(new_r)) + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,'---------' + print*,'warning !' + i = list_core_inact(ii) + t = list_act(tt) + print*,indx,i,t + print*,ref_l, new_l, contrib_l + print*,ref_r, new_r, contrib_r + print*,'---------' + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + endif + + if(.True.)then + print*,'**************************' + print*,'**************************' + print*,'testing active-->virtual ' + accu_l = 0.d0 + accu_r = 0.d0 + do tt = 1, n_act_orb + do aa = 1, n_virt_orb + indx = mat_idx_a_v(tt,aa) + ref_l = gradvec_detail_left_old(ibody,indx) + new_l = gradvec_tc_l(ibody,indx) + contrib_l = dabs(dabs(ref_l) - dabs(new_l)) + ref_r = gradvec_detail_right_old(ibody,indx) + new_r = gradvec_tc_r(ibody,indx) + contrib_r = dabs(dabs(ref_r) - dabs(new_r)) + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,'---------' + print*,'warning !' + a = list_virt(aa) + t = list_act(tt) + print*,indx,t,a + print*,ref_l, new_l, contrib_l + print*,ref_r, new_r, contrib_r +! print*,gradvec_detail_right_old(0,indx),gradvec_tc_r(0,indx) + print*,'---------' + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + endif + + +end diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 40c57188..b48ca7da 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -9,7 +9,7 @@ subroutine run_ccsd_space_orb double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb logical :: not_converged - double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:) + double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:), tau_x(:,:,:,:) double precision, allocatable :: t1(:,:), r1(:,:) double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) @@ -18,7 +18,12 @@ subroutine run_ccsd_space_orb integer(bit_kind) :: det(N_int,2) integer :: nO, nV, nOa, nVa -! PROVIDE mo_two_e_integrals_in_map + if (do_ao_cholesky) then + PROVIDE cholesky_mo_transp + FREE cholesky_ao + else + PROVIDE mo_two_e_integrals_in_map + endif det = psi_det(:,:,cc_ref) print*,'Reference determinant:' @@ -46,13 +51,39 @@ subroutine run_ccsd_space_orb allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) allocate(tau(nO,nO,nV,nV)) + allocate(tau_x(nO,nO,nV,nV)) allocate(t1(nO,nV), r1(nO,nV)) allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) if (cc_update_method == 'diis') then - allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) - all_err = 0d0 - all_t = 0d0 + double precision :: rss, diis_mem, extra_mem + double precision, external :: memory_of_double + diis_mem = 2.d0*memory_of_double(nO*nV)*(1.d0+nO*nV) + call resident_memory(rss) + do while (cc_diis_depth > 1) + if (rss + diis_mem * cc_diis_depth > qp_max_mem) then + cc_diis_depth = cc_diis_depth - 1 + else + exit + endif + end do + if (cc_diis_depth <= 1) then + print *, 'Not enough memory for DIIS' + stop -1 + endif + print *, 'DIIS size ', cc_diis_depth + + allocate(all_err(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth), all_t(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth)) + !$OMP PARALLEL PRIVATE(i,j) DEFAULT(SHARED) + do j=1,cc_diis_depth + !$OMP DO + do i=1, size(all_err,1) + all_err(i,j) = 0d0 + all_t(i,j) = 0d0 + enddo + !$OMP END DO NOWAIT + enddo + !$OMP END PARALLEL endif if (elec_alpha_num /= elec_beta_num) then @@ -67,10 +98,11 @@ subroutine run_ccsd_space_orb call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1) call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2) call update_tau_space(nO,nV,t1,t2,tau) + call update_tau_x_space(nO,nV,tau,tau_x) !print*,'hf_energy', hf_energy call det_energy(det,uncorr_energy) print*,'Det energy', uncorr_energy - call ccsd_energy_space(nO,nV,tau,t1,energy) + call ccsd_energy_space_x(nO,nV,tau_x,t1,energy) print*,'Guess energy', uncorr_energy+energy, energy nb_iter = 0 @@ -85,13 +117,23 @@ subroutine run_ccsd_space_orb do while (not_converged) - call compute_H_oo(nO,nV,t1,t2,tau,H_oo) - call compute_H_vv(nO,nV,t1,t2,tau,H_vv) - call compute_H_vo(nO,nV,t1,t2,H_vo) - ! Residue - call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) - call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + if (do_ao_cholesky) then +! if (.False.) then + call compute_H_oo_chol(nO,nV,tau_x,H_oo) + call compute_H_vv_chol(nO,nV,tau_x,H_vv) + call compute_H_vo_chol(nO,nV,t1,H_vo) + + call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + else + call compute_H_oo(nO,nV,t1,t2,tau,H_oo) + call compute_H_vv(nO,nV,t1,t2,tau,H_vv) + call compute_H_vo(nO,nV,t1,t2,H_vo) + + call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + endif max_r = max(max_r1,max_r2) ! Update @@ -109,10 +151,11 @@ subroutine run_ccsd_space_orb endif call update_tau_space(nO,nV,t1,t2,tau) + call update_tau_x_space(nO,nV,tau,tau_x) ! Energy - call ccsd_energy_space(nO,nV,tau,t1,energy) - write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + call ccsd_energy_space_x(nO,nV,tau_x,t1,energy) + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' nb_iter = nb_iter + 1 if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then @@ -132,7 +175,7 @@ subroutine run_ccsd_space_orb print*,'' write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' - write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + write(*,'(A15,ES10.2,A3)')' Conv = ', max_r print*,'' if (write_amplitudes) then @@ -239,6 +282,51 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy) end +subroutine ccsd_energy_space_x(nO,nV,tau_x,t1,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau_x(nO,nO,nV,nV) + double precision, intent(in) :: t1(nO,nV) + double precision, intent(out) :: energy + + ! internal + integer :: i,j,a,b + double precision :: e + + energy = 0d0 + !$omp parallel & + !$omp shared(nO,nV,energy,tau_x,t1,& + !$omp cc_space_f_vo,cc_space_v_oovv) & + !$omp private(i,j,a,b,e) & + !$omp default(none) + e = 0d0 + !$omp do + do a = 1, nV + do i = 1, nO + e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) + enddo + enddo + !$omp end do nowait + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + e = e + tau_x(i,j,a,b) * cc_space_v_oovv(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + energy = energy + e + !$omp end critical + !$omp end parallel + +end + ! Tau subroutine update_tau_space(nO,nV,t1,t2,tau) @@ -274,6 +362,39 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) end +subroutine update_tau_x_space(nO,nV,tau,tau_x) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau(nO,nO,nV,nV) + + ! out + double precision, intent(out) :: tau_x(nO,nO,nV,nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tau,tau_x) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + tau_x(i,j,a,b) = 2.d0*tau(i,j,a,b) - tau(i,j,b,a) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + ! R1 subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) @@ -449,25 +570,16 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) ! enddo ! enddo !enddo + + integer :: iblock, block_size, nVmax double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) - allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO)) + block_size = 8 + allocate(W_vvov(nV,nV,nO,block_size), T_vvoo(nV,nV,nO,nO)) !$omp parallel & !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & !$omp private(b,beta,i,a) & !$omp default(none) - !$omp do - do beta = 1, nV - do i = 1, nO - do b = 1, nV - do a = 1, nV - W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) - enddo - enddo - enddo - enddo - !$omp end do nowait - !$omp do do u = 1, nO do i = 1, nO @@ -481,10 +593,30 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp end do nowait !$omp end parallel - call dgemm('T','N',nO,nV,nO*nV*nV, & - 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & - W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & - 1d0, r1 , size(r1,1)) + do iblock = 1, nV, block_size + nVmax = min(block_size,nV-iblock+1) + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau,nVmax,iblock) & + !$omp private(b,i,a,beta) & + !$omp default(none) + !$omp do collapse(2) + do beta = iblock, iblock + nVmax - 1 + do i = 1, nO + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta-iblock+1) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp end parallel + + call dgemm('T','N',nO,nVmax,nO*nV*nV, & + 1d0, T_vvoo, nV*nV*nO, & + W_vvov, nO*nV*nV, & + 1d0, r1(1,iblock), nO) + enddo deallocate(W_vvov,T_vvoo) @@ -839,6 +971,10 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) ! allocate(B1(nV,nV,nV,nV)) ! call compute_B1(nO,nV,t1,t2,B1) +! call dgemm('N','N',nO*nO,nV*nV,nV*nV, & +! 1d0, tau, size(tau,1) * size(tau,2), & +! B1 , size(B1_gam,1) * size(B1_gam,2), & +! 1d0, r2, size(r2,1) * size(r2,2)) allocate(B1_gam(nV,nV,nV)) do gam=1,nV call compute_B1_gam(nO,nV,t1,t2,B1_gam,gam) @@ -1323,7 +1459,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !enddo !$omp parallel & - !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & + !$omp shared(nO,nV,K1,X_ovov,Y_ovov,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) !$omp do @@ -1343,7 +1479,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do v = 1, nO do a = 1, nV do i = 1, nO - Z_ovov(i,a,v,beta) = t2(i,v,beta,a) + Y_ovov(i,a,v,beta) = t2(i,v,beta,a) enddo enddo enddo @@ -1547,21 +1683,29 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) ! enddo double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:) + allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV)) ! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,1, & + cc_list_vir,cc_list_vir,cc_list_vir,cc_list_vir(gam), B1) + + !$omp parallel & !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) & !$omp private(a,b,beta) & !$omp default(none) - !$omp do - do beta = 1, nV - do b = 1, nV - do a = 1, nV - B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) - enddo - enddo - enddo - !$omp end do nowait + +! !$omp do +! do beta = 1, nV +! do b = 1, nV +! do a = 1, nV +! B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) +! enddo +! enddo +! enddo +! !$omp end do nowait + do i = 1, nO !$omp do do b = 1, nV @@ -1569,7 +1713,7 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) X_vvvo(a,b,i) = cc_space_v_vvov(a,b,i,gam) enddo enddo - !$omp end do nowait + !$omp end do enddo !$omp end parallel diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f new file mode 100644 index 00000000..b59dc0bb --- /dev/null +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -0,0 +1,1458 @@ +subroutine ccsd_energy_space_chol(nO,nV,tau,t1,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau(nO,nO,nV,nV) + double precision, intent(in) :: t1(nO,nV) + double precision, intent(out) :: energy + + ! internal + integer :: i,j,a,b + double precision :: e + + energy = 0d0 + !$omp parallel & + !$omp shared(nO,nV,energy,tau,t1,& + !$omp cc_space_f_vo,cc_space_w_oovv) & + !$omp private(i,j,a,b,e) & + !$omp default(none) + e = 0d0 + !$omp do + do a = 1, nV + do i = 1, nO + e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) + enddo + enddo + !$omp end do nowait + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + energy = energy + e + !$omp end critical + !$omp end parallel + +end + +! Tau + +subroutine update_tau_space_chol(nO,nV,t1,t2,tau) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + ! out + double precision, intent(out) :: tau(nO,nO,nV,nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tau,t2,t1) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! R1 + +subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r1(nO,nV), max_r1 + + ! internal + integer :: u,i,j,beta,a,b + + !$omp parallel & + !$omp shared(nO,nV,r1,cc_space_f_ov) & + !$omp private(u,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + r1(u,beta) = cc_space_f_ov(u,beta) + enddo + enddo + !$omp end do + !$omp end parallel + + double precision, allocatable :: X_oo(:,:) + allocate(X_oo(nO,nO)) + call dgemm('N','N', nO, nO, nV, & + -2d0, t1 , size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, X_oo , size(X_oo,1)) + + call dgemm('T','N', nO, nV, nO, & + 1d0, X_oo, size(X_oo,2), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + deallocate(X_oo) + + call dgemm('N','N', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + H_vv, size(H_vv,1), & + 1d0, r1 , size(r1,1)) + + call dgemm('N','N', nO, nV, nO, & + -1d0, H_oo, size(H_oo,1), & + t1 , size(t1,1), & + 1d0, r1, size(r1,1)) + + double precision, allocatable :: X_voov(:,:,:,:) + allocate(X_voov(nV, nO, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_voov,t2,t1) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,u,beta) = 2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nV*nO, nO*nV, & + 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & + H_vo , 1, & + 1d0, r1 , 1) + + deallocate(X_voov) + + double precision, allocatable :: X_ovov(:,:,:,:) + allocate(X_ovov(nO, nV, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + do a = 1, nv + do i = 1, nO + X_ovov(i,a,u,beta) = 2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nO*nV, nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + t1 , 1, & + 1d0, r1 , 1) + + deallocate(X_ovov) + + integer :: iblock, block_size, nVmax + double precision, allocatable :: W_vvov(:,:,:,:), W_vvov_tmp(:,:,:,:), T_vvoo(:,:,:,:) + block_size = 16 + allocate(W_vvov(nV,nV,nO,block_size), W_vvov_tmp(nV,nO,nV,block_size), T_vvoo(nV,nV,nO,nO)) + + !$omp parallel & + !$omp private(u,i,b,a) & + !$omp default(shared) + !$omp do + do u = 1, nO + do i = 1, nO + do b = 1, nV + do a = 1, nV + T_vvoo(a,b,i,u) = tau(i,u,a,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + do iblock = 1, nV, block_size + nVmax = min(block_size,nV-iblock+1) + + call dgemm('T','N', nV*nO, nV*nVmax, cholesky_mo_num, 1.d0, & + cc_space_v_vo_chol , cholesky_mo_num, & + cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & + 0.d0, W_vvov_tmp, nV*nO) + + !$omp parallel & + !$omp private(b,i,a,beta) & + !$omp default(shared) + do beta = 1, nVmax + do i = 1, nO + !$omp do + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta) = 2d0 * W_vvov_tmp(a,i,b,beta) - W_vvov_tmp(b,i,a,beta) + enddo + enddo + !$omp end do nowait + enddo + enddo + !$omp barrier + !$omp end parallel + + call dgemm('T','N',nO,nVmax,nO*nV*nV, & + 1d0, T_vvoo, nV*nV*nO, & + W_vvov, nO*nV*nV, & + 1d0, r1(1,iblock), nO) + enddo + + deallocate(W_vvov,T_vvoo) + + + double precision, allocatable :: W_oovo(:,:,:,:) + allocate(W_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_oovo,W_oovo) & + !$omp private(u,a,i,j) & + !$omp default(none) + do u = 1, nO + !$omp do + do a = 1, nV + do j = 1, nO + do i = 1, nO +! W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) + W_oovo(i,j,a,u) = 2d0 * cc_space_v_oovo(i,j,a,u) - cc_space_v_oovo(j,i,a,u) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & + tau , size(tau,1) * size(tau,2) * size(tau,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_oovo) + + max_r1 = 0d0 + do a = 1, nV + do i = 1, nO + max_r1 = max(dabs(r1(i,a)), max_r1) + enddo + enddo + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r1) & + !$omp private(a,i) & + !$omp default(none) + !$omp do + do a = 1, nV + do i = 1, nO + r1(i,a) = -r1(i,a) + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! H_oo + +subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: tau_x(nO, nO, nV, nV) + double precision, intent(out) :: H_oo(nO, nO) + + integer :: a,b,i,j,u,k + + double precision, allocatable :: tau_kau(:,:,:), tmp_vov(:,:,:) + + allocate(tau_kau(cholesky_mo_num,nV,nO)) + !$omp parallel & + !$omp default(shared) & + !$omp private(i,u,j,k,a,b,tmp_vov) + allocate(tmp_vov(nV,nO,nV) ) + !$omp do + do u = 1, nO + do b=1,nV + do j=1,nO + do a=1,nV + tmp_vov(a,j,b) = tau_x(u,j,a,b) + enddo + enddo + enddo + call dgemm('N','T',cholesky_mo_num,nV,nO*nV,1.d0, & + cc_space_v_ov_chol, cholesky_mo_num, tmp_vov, nV, & + 0.d0, tau_kau(1,1,u), cholesky_mo_num) + enddo + !$omp end do nowait + deallocate(tmp_vov) + !$omp do + do i = 1, nO + do u = 1, nO + H_oo(u,i) = cc_space_f_oo(u,i) + enddo + enddo + !$omp end do nowait + !$omp barrier + !$omp end parallel + call dgemm('T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & + tau_kau, cholesky_mo_num*nV, cc_space_v_vo_chol, cholesky_mo_num*nV, & + 1.d0, H_oo, nO) + +end + +! H_vv + +subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: tau_x(nO, nO, nV, nV) + double precision, intent(out) :: H_vv(nV, nV) + + integer :: a,b,i,j,u,k, beta + + double precision, allocatable :: tau_kia(:,:,:), tmp_oov(:,:,:) + + allocate(tau_kia(cholesky_mo_num,nO,nV)) + !$omp parallel & + !$omp default(shared) & + !$omp private(i,beta,j,k,a,b,tmp_oov) + allocate(tmp_oov(nO,nO,nV) ) + !$omp do + do a = 1, nV + do b=1,nV + do j=1,nO + do i=1,nO + tmp_oov(i,j,b) = tau_x(i,j,a,b) + enddo + enddo + enddo + call dgemm('N','T',cholesky_mo_num,nO,nO*nV,1.d0, & + cc_space_v_ov_chol, cholesky_mo_num, tmp_oov, nO, & + 0.d0, tau_kia(1,1,a), cholesky_mo_num) + enddo + !$omp end do nowait + deallocate(tmp_oov) + + !$omp do + do beta = 1, nV + do a = 1, nV + H_vv(a,beta) = cc_space_f_vv(a,beta) + enddo + enddo + !$omp end do nowait + !$omp barrier + !$omp end parallel + call dgemm('T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & + tau_kia, cholesky_mo_num*nO, cc_space_v_ov_chol, cholesky_mo_num*nO, & + 1.d0, H_vv, nV) + +end + +! H_vo +subroutine compute_H_vo_chol(nO,nV,t1,H_vo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(out) :: H_vo(nV, nO) + + integer :: a,b,i,j,u,k + + double precision, allocatable :: tmp_k(:), tmp(:,:,:), tmp2(:,:,:) + do i=1,nO + do a=1,nV + H_vo(a,i) = cc_space_f_vo(a,i) + enddo + enddo + + allocate(tmp_k(cholesky_mo_num)) + call dgemm('N', 'N', cholesky_mo_num, 1, nO*nV, 2.d0, & + cc_space_v_ov_chol, cholesky_mo_num, & + t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) + + call dgemm('T','N',nV*nO,1,cholesky_mo_num,1.d0, & + cc_space_v_vo_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & + H_vo, nV*nO) + deallocate(tmp_k) + + allocate(tmp(cholesky_mo_num,nO,nO)) + allocate(tmp2(cholesky_mo_num,nO,nO)) + + call dgemm('N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, tmp, cholesky_mo_num*nO) + + do i=1,nO + do j=1,nO + do k=1,cholesky_mo_num + tmp2(k,j,i) = tmp(k,i,j) + enddo + enddo + enddo + deallocate(tmp) + + call dgemm('T','N', nV, nO, cholesky_mo_num*nO, -1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, tmp2, cholesky_mo_num*nO, & + 1.d0, H_vo, nV) + +end + + +! R2 + +subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2 + + ! internal + integer :: u,v,i,j,beta,gam,a,b + double precision :: max_r2_local + + call set_multiple_levels_omp(.False.) + + !$omp parallel & + !$omp shared(nO,nV,r2,cc_space_v_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = cc_space_v_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + double precision, allocatable :: A1(:,:,:,:) + allocate(A1(nO,nO,nO,nO)) + call compute_A1_chol(nO,nV,t1,t2,tau,A1) + call dgemm('N','N',nO*nO,nV*nV,nO*nO, & + 1d0, A1, size(A1,1) * size(A1,2), & + tau, size(tau,1) * size(tau,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + + deallocate(A1) + integer :: block_size, iblock, k + block_size = 16 + double precision, dimension(:,:,:), allocatable :: B1, tmp_cc, tmpB1 + double precision, dimension(:,:), allocatable :: tmp_cc2 + + allocate(tmp_cc(cholesky_mo_num,nV,nV)) + call dgemm('N','N', cholesky_mo_num*nV, nV, nO, 1.d0, & + cc_space_v_vo_chol, cholesky_mo_num*nV, t1, nO, 0.d0, tmp_cc, cholesky_mo_num*nV) + + call set_multiple_levels_omp(.False.) + + !$OMP PARALLEL PRIVATE(gam, iblock, B1, tmpB1, tmp_cc2, beta, b, a) + allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV), tmp_cc2(cholesky_mo_num,nV)) + !$OMP DO + do gam = 1, nV + + do a=1,nV + do k=1,cholesky_mo_num + tmp_cc2(k,a) = cc_space_v_vv_chol(k,a,gam) - tmp_cc(k,a,gam) + enddo + enddo + + do iblock = 1, nV, block_size + + call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & + -1.d0, tmp_cc(1,1,iblock), cholesky_mo_num, & + cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, & + 0.d0, tmpB1, nV*block_size) + + call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & + 1.d0, cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & + tmp_cc2, cholesky_mo_num, & + 1.d0, tmpB1, nV*block_size) + + do beta = iblock, min(nV, iblock+block_size-1) + do b = 1, nV + do a = 1, nV + B1(a,b,beta-iblock+1) = tmpB1(a,beta-iblock+1,b) + enddo + enddo + enddo + + call dgemm('N','N',nO*nO,min(block_size, nV-iblock+1),nV*nV, & + 1d0, tau, size(tau,1) * size(tau,2), & + B1 , size(B1 ,1) * size(B1 ,2), & + 1d0, r2(1,1,iblock,gam), size(r2 ,1) * size(r2 ,2)) + enddo + + enddo + !$OMP ENDDO + + deallocate(B1, tmpB1, tmp_cc2) + !$OMP END PARALLEL + + deallocate(tmp_cc) + + + double precision, allocatable :: X_oovv(:,:,:,:) + allocate(X_oovv(nO,nO,nV,nV)) + !$omp parallel & + !$omp shared(nO,nV,t2,X_oovv) & + !$omp private(u,v,gam,a) & + !$omp default(none) + !$omp do + do a = 1, nV + do gam = 1, nV + do v = 1, nO + do u = 1, nO + X_oovv(u,v,gam,a) = t2(u,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + double precision, allocatable :: g_vir(:,:) + allocate(g_vir(nV,nV)) + call compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) + + double precision, allocatable :: Y_oovv(:,:,:,:) + allocate(Y_oovv(nO,nO,nV,nV)) + + call dgemm('N','N',nO*nO*nV,nV,nV, & + 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + g_vir, size(g_vir,1), & + 0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3)) + deallocate(g_vir) + deallocate(X_oovv) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(u,v,beta,gam) + Y_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(Y_oovv) + + double precision, allocatable :: g_occ(:,:) + allocate(g_occ(nO,nO)) + call compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) + + allocate(X_oovv(nO,nO,nV,nV)) + call dgemm('N','N',nO,nO*nV*nV,nO, & + 1d0, g_occ , size(g_occ,1), & + t2 , size(t2,1), & + 0d0, X_oovv, size(X_oovv,1)) + deallocate(g_occ) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_oovv) + + double precision, allocatable :: X_vovv(:,:,:,:) + + allocate(X_vovv(nV,nO,nV,block_size)) + allocate(Y_oovv(nO,nO,nV,nV)) + + do iblock = 1, nV, block_size + do gam = iblock, min(nV, iblock+block_size-1) + call dgemm('T','N',nV, nO*nV, cholesky_mo_num, 1.d0, & + cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, cc_space_v_ov_chol, & + cholesky_mo_num, 0.d0, X_vovv(1,1,1,gam-iblock+1), nV) + + enddo + call dgemm('N','N',nO,nO*nV*min(block_size, nV-iblock+1),nV, & + 1d0, t1 , size(t1,1), & + X_vovv, size(X_vovv,1), & + 0d0, Y_oovv(1,1,1,iblock), size(Y_oovv,1)) + + enddo + deallocate(X_vovv) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(Y_oovv) + + double precision, allocatable :: X_ovvo(:,:,:,:) + double precision, allocatable :: tcc(:,:,:), tcc2(:,:,:) + allocate(tcc2(cholesky_mo_num,nV,nO), X_ovvo(nO,nV,nV,nO)) + allocate(tcc(cholesky_mo_num,nO,nV)) + + call dgemm('N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num*nV, t1, nO, & + 0.d0, tcc2, cholesky_mo_num*nV) + + call dgemm('N','N', cholesky_mo_num*nO, nV, nO, 1.d0, & + cc_space_v_oo_chol, cholesky_mo_num*nO, t1, nO, & + 0.d0, tcc, cholesky_mo_num*nO) + + call dgemm('T','N', nO*nV, nV*nO, cholesky_mo_num, 1.d0, & + tcc, cholesky_mo_num, tcc2, cholesky_mo_num, 0.d0, & + X_ovvo, nO*nV) + + deallocate(tcc, tcc2) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_ovvo) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_ovvo(u,beta,gam,v) + enddo + enddo + enddo + enddo + !$omp end do + !$omp do + do beta = 1, nV + do gam = 1, nV + do v = 1, nO + do u = 1, nO + r2(v,u,gam,beta) = r2(v,u,gam,beta) - X_ovvo(u,beta,gam,v) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo) + !----- + + allocate(X_oovv(nO,nO,nV,nV)) + + call dgemm('N','N',nO*nO*nV,nV,nO, & + 1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(X_oovv) + + double precision, allocatable :: X_vovo(:,:,:,:), Y_oovo(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & + !$omp private(a,v,gam,i) & + !$omp default(none) + do i = 1, nO + !$omp do + do gam = 1, nV + do v = 1, nO + do a = 1, nV + X_vovo(a,v,gam,i) = cc_space_v_ovvo(v,a,gam,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + allocate(Y_oovo(nO,nO,nV,nO)) + call dgemm('N','N',nO,nO*nV*nO,nV, & + 1d0, t1, size(t1,1), & + X_vovo, size(X_vovo,1), & + 0d0, Y_oovo, size(Y_oovo,1)) + + deallocate(X_vovo) + allocate(X_oovv(nO,nO,nV,nV)) + call dgemm('N','N',nO*nO*nV, nV, nO, & + 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + deallocate(Y_oovo) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,gam,beta) - X_oovv(v,u,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(X_oovv) + + + double precision, allocatable :: J1(:,:,:,:) + allocate(J1(nO,nV,nV,nO)) + call compute_J1_chol(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & + cc_space_v_vvoo,J1) + + double precision, allocatable :: K1(:,:,:,:) + allocate(K1(nO,nV,nO,nV)) + call compute_K1_chol(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & + cc_space_v_ovov,K1) + + allocate(X_ovvo(nO,nV,nV,nO)) + !$omp parallel & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(shared) + do i = 1, nO + !$omp do + do a = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,a,i) = (J1(u,a,beta,i) - 0.5d0 * K1(u,a,i,beta)) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + deallocate(J1) + + double precision, allocatable :: Y_voov(:,:,:,:) + allocate(Y_voov(nV,nO,nO,nV)) + + !$omp parallel & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(shared) + !$omp do + do gam = 1, nV + do v = 1, nO + do i = 1, nO + do a = 1, nV + Y_voov(a,i,v,gam) = 2d0 * t2(i,v,a,gam) - t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + double precision, allocatable :: Z_ovov(:,:,:,:) + allocate(Z_ovov(nO,nV,nO,nV)) + + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_voov, size(Y_voov,1) * size(Y_voov,2), & + 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) + + deallocate(X_ovvo,Y_voov) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(Z_ovov) + + double precision, allocatable :: Y_ovov(:,:,:,:), X_ovov(:,:,:,:) + allocate(X_ovov(nO,nV,nO,nV)) + allocate(Y_ovov(nO,nV,nO,nV)) + + !$omp parallel & + !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & + !$omp private(u,a,i,beta,gam) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + do a = 1, nV + do i = 1, nO + X_ovov(i,a,u,beta) = 0.5d0 * K1(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do + do gam = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Y_ovov(i,a,v,gam) = t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + allocate(Z_ovov(nO,nV,nO,nV)) + call dgemm('T','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + deallocate(X_ovov, Y_ovov) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(Z_ovov) + + allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,K1,X_ovov,Y_ovov,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do + do a = 1, nV + do i = 1, nO + do gam = 1, nV + do u = 1, nO + X_ovov(u,gam,i,a) = K1(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do + do beta = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Y_ovov(i,a,v,beta) = t2(i,v,beta,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(K1) + + allocate(Z_ovov(nO,nV,nO,nV)) + call dgemm('N','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + deallocate(X_ovov,Y_ovov) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(Z_ovov) + + ! Change the sign for consistency with the code in spin orbitals + + max_r2 = 0d0 + !$omp parallel & + !$omp shared(nO,nV,r2,max_r2) & + !$omp private(i,j,a,b,max_r2_local) & + !$omp default(none) + max_r2_local = 0.d0 + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = -r2(i,j,a,b) + max_r2_local = max(r2(i,j,a,b), max_r2_local) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + max_r2 = max(max_r2, max_r2_local) + !$omp end critical + !$omp end parallel + +end + +! A1 + +subroutine compute_A1_chol(nO,nV,t1,t2,tau,A1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: A1(nO, nO, nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta + + double precision, allocatable :: Y_oooo(:,:,:,:) + allocate(Y_oooo(nO,nO,nO,nO)) + + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + cc_space_v_vooo, size(cc_space_v_vooo,1), & + 0d0, Y_oooo, size(Y_oooo,1)) + + !$omp parallel & + !$omp private(u,v,i,j) & + !$omp default(shared) + !$omp do collapse(2) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(Y_oooo) + + ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + call dgemm('N','N', nO*nO, nO*nO, nV*nV, & + 1d0, tau , size(tau,1) * size(tau,2), & + cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & + 1d0, A1 , size(A1,1) * size(A1,2)) + +end + +! g_occ + +subroutine compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_oo(nO, nO) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_occ(nO, nO) + + g_occ = H_oo + + call dgemm('N','N',nO,nO,nV, & + 1d0, t1, size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 1d0, g_occ, size(g_occ,1)) + + double precision, allocatable :: X(:) + allocate(X(cholesky_mo_num)) + call dgemv('N',cholesky_mo_num,nO*nV,2.d0, & + cc_space_v_ov_chol, cholesky_mo_num, & + t1, 1, 0.d0, X, 1) + + call dgemv('T',cholesky_mo_num,nO*nO,1.d0, & + cc_space_v_oo_chol, cholesky_mo_num, & + X, 1, 1.d0, g_occ, 1) + deallocate(X) + + call dgemv('T',nO*nV,nO*nO,-1.d0, & + cc_space_v_ovoo, nO*nV, & + t1, 1, 1.d0, g_occ, 1) + +end + +! g_vir + +subroutine compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_vv(nV, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_vir(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + call dgemm('N','N',nV,nV,nO, & + -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & + t1 , size(t1,1), & + 0d0, g_vir, size(g_vir,1)) + + double precision, allocatable :: tmp_k(:), tmp_vo(:,:,:), tmp_vo2(:,:,:) + allocate(tmp_k(cholesky_mo_num)) + call dgemm('N','N', cholesky_mo_num, 1, nO*nV, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num, t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) + + call dgemm('T','N', nV*nV, 1, cholesky_mo_num, 2.d0, & + cc_space_v_vv_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & + g_vir, nV*nV) + deallocate(tmp_k) + + allocate(tmp_vo(cholesky_mo_num,nV,nO)) + call dgemm('N','T',cholesky_mo_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num*nV, t1, nO, 0.d0, tmp_vo, cholesky_mo_num*nV) + + allocate(tmp_vo2(cholesky_mo_num,nO,nV)) + do beta=1,nV + do i=1,nO + do k=1,cholesky_mo_num + tmp_vo2(k,i,beta) = -tmp_vo(k,beta,i) + enddo + enddo + enddo + deallocate(tmp_vo) + + do beta = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta) + enddo + enddo + + call dgemm('T','N', nV, nV, nO*cholesky_mo_num, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, & + tmp_vo2, cholesky_mo_num*nO, 1.d0, g_vir, nV) + +end + +! J1 + +subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvoo,J1) + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: J1(nO, nV, nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) + allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) + + !$omp parallel & + !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & + !$omp private(i,j,a,u,beta) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = v_ovvo(u,a,beta,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + + !$omp do collapse(2) + do j = 1, nO + do i = 1, nO + do a = 1, nV + do u = 1, nO + X_ovoo(u,a,i,j) = v_ovoo(u,a,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & + t1 , size(t1,1), & + 0d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2) * size(Y_ovov,3)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Y_ovov) & + !$omp private(i,beta,a,u) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Y_ovov(u,a,i,beta) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + deallocate(X_ovoo) + + double precision, allocatable :: tmp_cc(:,:,:), J1_tmp(:,:,:,:) + allocate(tmp_cc(cholesky_mo_num,nV,nO), J1_tmp(nV,nO,nV,nO)) + + call dgemm('N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num*nV, & + t1, nO, & + 0.d0, tmp_cc, cholesky_mo_num*nV) + + call dgemm('T','N', nV*nO, nV*nO, cholesky_mo_num, 1.d0, & + tmp_cc, cholesky_mo_num, cc_space_v_vo_chol, cholesky_mo_num, & + 0.d0, J1_tmp, nV*nO) + + deallocate(tmp_cc) + + do i=1,nO + do b=1,nV + do a=1,nV + do u=1,nO + J1(u,a,b,i) = J1(u,a,b,i) + J1_tmp(b,u,a,i) + enddo + enddo + enddo + enddo + + deallocate(J1_tmp) + + !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:) + allocate(X_voov(nV,nO,nO,nV), Z_ovvo(nO,nV,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & + !$omp private(i,beta,a,u,b,j) & + !$omp default(none) + !$omp do + do b = 1, nV + do j = 1, nO + do beta = 1, nV + do u = 1, nO + Y_ovov(u,beta,j,b) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,j,b) = v_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nO*nV, & + -1d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + X_voov, size(X_voov,1) * size(X_voov,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + deallocate(X_voov) + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_vovo(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO),Y_vovo(nV,nO,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + + !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + do j = 1, nO + !$omp do + do b = 1, nV + do i = 1, nO + do a = 1, nV + Y_vovo(a,i,b,j) = 0.5d0 * (2d0 * v_vvoo(a,b,i,j) - v_vvoo(b,a,i,j)) + enddo + enddo + enddo + !$omp end do nowait + enddo + + do j = 1, nO + !$omp do + do b = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,b,j) = t2(u,j,beta,b) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo) & + !$omp private(i,beta,a,u) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + deallocate(X_ovvo,Z_ovvo,Y_ovov) + +end + +! K1 + +subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,K1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV) + double precision, intent(in) :: v_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: K1(nO, nV, nO, nV) + + double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + do i = 1, nO + !$omp do + do a = 1, nV + do j = 1, nO + do b = 1, nV + X(b,j,a,i) = - v_vvoo(b,a,i,j) + enddo + enddo + enddo + !$omp end do nowait + enddo + + do j = 1, nO + !$omp do + do b = 1, nV + do beta = 1, nV + do u = 1, nO + Y(u,beta,b,j) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + !$omp end do + enddo + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, v_ovoo, size(v_ovoo,1) * size(v_ovoo,2) * size(v_ovoo,3), & + t1 , size(t1,1), & + 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) + + double precision, allocatable :: K1tmp(:,:,:,:), t1v(:,:,:) + allocate(K1tmp(nO,nO,nV,nV), t1v(cholesky_mo_num,nO,nO)) + + call dgemm('N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, & + t1v, cholesky_mo_num*nO) + + call dgemm('T','N', nO*nO, nV*nV, cholesky_mo_num, 1.d0, & + t1v, cholesky_mo_num, cc_space_v_vv_chol, cholesky_mo_num, 0.d0, & + K1tmp, nO*nO) + + deallocate(t1v) + ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i) + call dgemm('N','N',nV*nO,nO*nV,nV*nO, & + 1d0, Y, size(Y,1) * size(Y,2), & + X, size(X,1) * size(X,2), & + 0d0, Z, size(Z,1) * size(Z,2)) + + !$omp parallel & + !$omp shared(nO,nV,K1,Z,K1tmp) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = K1(u,a,i,beta) + K1tmp(u,i,a,beta) + Z(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(K1tmp,X,Y,Z) + +end diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f index a267cc45..09d6a0fe 100644 --- a/src/ccsd/ccsd_spin_orb_sub.irp.f +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -241,7 +241,7 @@ subroutine run_ccsd_spin_orb call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) call wall_time(tfi) - write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', & + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', & uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' if (cc_dev) then print*,'Total:',tfi-tbi,'s' @@ -266,7 +266,7 @@ subroutine run_ccsd_spin_orb print*,'' write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' - write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + write(*,'(A15,ES10.2,A3)')' Conv = ', max_r print*,'' if (write_amplitudes) then diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 1aab6bd7..12a71045 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -101,7 +101,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED) e = 0d0 - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(guided) do a = 1, nV do b = a+1, nV do c = b+1, nV diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 31fe67ce..13fa4f1a 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -94,6 +94,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ enddo !$OMP END DO nowait + !$OMP BARRIER !$OMP END PARALLEL double precision, external :: ccsd_t_task_aba @@ -209,9 +210,9 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ Pabc(:) = 1.d0/Pabc(:) print '(A)', '' - print '(A)', ' +----------------------+--------------+----------+' - print '(A)', ' | E(CCSD(T)) | Error | % |' - print '(A)', ' +----------------------+--------------+----------+' + print '(A)', ' ======================= ============== ==========' + print '(A)', ' E(CCSD(T)) Error % ' + print '(A)', ' ======================= ============== ==========' call wall_time(t00) @@ -256,7 +257,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ if (imin >= bounds(2,isample)) then cycle endif - ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc) + ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc)+1 if (sampled(ieta) == -1_8) then sampled(ieta) = 0_8 @@ -280,9 +281,10 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ call wall_time(t01) if ((t01-t00 > 1.0d0).or.(imin >= Nabc)) then - t00 = t01 !$OMP TASKWAIT + call wall_time(t01) + t00 = t01 double precision :: ET, ET2 double precision :: energy_stoch, energy_det @@ -322,17 +324,20 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ energy = energy_det + energy_stoch - print '('' | '',F20.8, '' | '', E12.4,'' | '', F8.2,'' |'')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) + print '('' '',F20.8, '' '', ES12.4,'' '', F8.2,'' '')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) endif !$OMP END MASTER if (imin >= Nabc) exit enddo !$OMP END PARALLEL - print '(A)', ' +----------------------+--------------+----------+' + print '(A)', ' ======================= ============== ========== ' print '(A)', '' - deallocate(X_vovv,X_ooov,T_voov,T_oovv) + deallocate(X_vovv) + deallocate(X_ooov) + deallocate(T_voov) + deallocate(T_oovv) end diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f index 7909007a..3b048c14 100644 --- a/src/cipsi/pt2_stoch_routines.irp.f +++ b/src/cipsi/pt2_stoch_routines.irp.f @@ -591,7 +591,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_ time-time0 ! Old print - !print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1,1pE16.6,1pE16.6)', c, & + !print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1,ES16.6,ES16.6)', c, & ! pt2_data % pt2(pt2_stoch_istate) +E, & ! pt2_data_err % pt2(pt2_stoch_istate), & ! pt2_data % variance(pt2_stoch_istate), & diff --git a/src/cipsi_tc_bi_ortho/get_d0_good.irp.f b/src/cipsi_tc_bi_ortho/get_d0_good.irp.f index 4270e7b8..9bba162e 100644 --- a/src/cipsi_tc_bi_ortho/get_d0_good.irp.f +++ b/src/cipsi_tc_bi_ortho/get_d0_good.irp.f @@ -53,7 +53,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hij == (0.d0,0.d0)) cycle !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * hij ! HOTSPOT + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij ! HOTSPOT enddo end do !!!!!!!!!! @@ -72,7 +72,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hji == (0.d0,0.d0)) cycle !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,2) * hji ! HOTSPOT + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji ! HOTSPOT enddo end do end do @@ -109,7 +109,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, end if !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo end do @@ -128,7 +128,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, end if !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo end do end do diff --git a/src/cipsi_tc_bi_ortho/get_d1_good.irp.f b/src/cipsi_tc_bi_ortho/get_d1_good.irp.f index bc19e7e4..b2a38e02 100644 --- a/src/cipsi_tc_bi_ortho/get_d1_good.irp.f +++ b/src/cipsi_tc_bi_ortho/get_d1_good.irp.f @@ -76,7 +76,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,1) + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) enddo endif end do @@ -88,7 +88,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,1) + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) enddo endif end do @@ -114,7 +114,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) enddo endif end do @@ -126,7 +126,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) enddo endif end do @@ -169,7 +169,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,1) + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) enddo endif end if @@ -180,7 +180,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) do k=1,N_states - tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,1) + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) enddo endif end if @@ -211,7 +211,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) enddo endif end if @@ -222,7 +222,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) do k=1,N_states - tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) enddo endif end if @@ -265,7 +265,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij_cache(putj,1) - hij_cache(putj,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,1) + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) endif end do do putj=hfix+1,mo_num @@ -274,7 +274,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij_cache(putj,2) - hij_cache(putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,1) + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) endif end do @@ -293,7 +293,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji_cache(putj,1) - hji_cache(putj,2) if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) endif end do do putj=hfix+1,mo_num @@ -302,7 +302,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji_cache(putj,2) - hji_cache(putj,1) if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) endif end do @@ -342,7 +342,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,1) + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) enddo endif end if @@ -353,7 +353,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) do k=1,N_states - tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,1) + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) enddo endif end if @@ -385,7 +385,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) enddo endif end if @@ -396,7 +396,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) do k=1,N_states - tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) enddo endif end if @@ -445,8 +445,8 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, do k=1,N_states ! take conjugate to get contribution to instead of ! mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * dconjg(hij) - mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * hij - mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,2) * hji + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji enddo end do end do diff --git a/src/cipsi_tc_bi_ortho/get_d2_good.irp.f b/src/cipsi_tc_bi_ortho/get_d2_good.irp.f index 0a08c808..d01ed433 100644 --- a/src/cipsi_tc_bi_ortho/get_d2_good.irp.f +++ b/src/cipsi_tc_bi_ortho/get_d2_good.irp.f @@ -79,12 +79,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,1) * hij + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij enddo else ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo end if end do @@ -103,12 +103,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,2) * hji + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji enddo else ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo end if end do @@ -135,7 +135,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo endif end do @@ -154,7 +154,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo endif end do @@ -189,7 +189,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) +coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) +coefs(k,2) * hij enddo end do end do @@ -210,7 +210,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) +coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) +coefs(k,1) * hji enddo end do end do @@ -239,12 +239,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (puti < putj) then !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo else !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,1) * hij + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij enddo endif end do @@ -262,12 +262,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (puti < putj) then !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo else !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,2) * hji + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji enddo endif end do @@ -290,7 +290,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo end if !! @@ -299,7 +299,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo end if end if diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f index 77377554..06cf848b 100644 --- a/src/cipsi_tc_bi_ortho/selection.irp.f +++ b/src/cipsi_tc_bi_ortho/selection.irp.f @@ -893,20 +893,45 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d alpha_h_psi += alpha_h_i * psi_selectors_coef_tc(iii,1,1) ! right function enddo else if(debug_tc_pt2 == 2)then !! debugging the new version +! psi_h_alpha_tmp = 0.d0 +! alpha_h_psi_tmp = 0.d0 +! do iii = 1, N_det_selectors ! old version +! call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) +! call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) +! psi_h_alpha_tmp += i_h_alpha * psi_selectors_coef_tc(iii,1,1) ! left function +! alpha_h_psi_tmp += alpha_h_i * psi_selectors_coef_tc(iii,2,1) ! right function +! enddo psi_h_alpha_tmp = mat_l(istate, p1, p2) ! new version alpha_h_psi_tmp = mat_r(istate, p1, p2) ! new version psi_h_alpha = 0.d0 alpha_h_psi = 0.d0 - do iii = 1, N_det_selectors ! old version - call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - psi_h_alpha += i_h_alpha * psi_selectors_coef_tc(iii,2,1) ! left function - alpha_h_psi += alpha_h_i * psi_selectors_coef_tc(iii,1,1) ! right function + do iii = 1, N_det ! old version + call htilde_mu_mat_opt_bi_ortho_no_3e(psi_det(1,1,iii), det, N_int, i_h_alpha) + call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_det(1,1,iii), N_int, alpha_h_i) + psi_h_alpha += i_h_alpha * psi_l_coef_bi_ortho(iii,1) ! left function + alpha_h_psi += alpha_h_i * psi_r_coef_bi_ortho(iii,1) ! right function enddo if(dabs(psi_h_alpha*alpha_h_psi/delta_E).gt.1.d-10)then error = dabs(psi_h_alpha * alpha_h_psi - psi_h_alpha_tmp * alpha_h_psi_tmp)/dabs(psi_h_alpha * alpha_h_psi) if(error.gt.1.d-2)then + call debug_det(det, N_int) print*,'error =',error,psi_h_alpha * alpha_h_psi/delta_E,psi_h_alpha_tmp * alpha_h_psi_tmp/delta_E + print*,psi_h_alpha , alpha_h_psi + print*,psi_h_alpha_tmp , alpha_h_psi_tmp + print*,'selectors ' + do iii = 1, N_det_selectors ! old version + print*,'iii',iii,psi_selectors_coef_tc(iii,1,1),psi_selectors_coef_tc(iii,2,1) + call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) + call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) + print*,i_h_alpha,alpha_h_i + call debug_det(psi_selectors(1,1,iii),N_int) + enddo +! print*,'psi_det ' +! do iii = 1, N_det! old version +! print*,'iii',iii,psi_l_coef_bi_ortho(iii,1),psi_r_coef_bi_ortho(iii,1) +! call debug_det(psi_det(1,1,iii),N_int) +! enddo + stop endif endif else diff --git a/src/cisd/EZFIO.cfg b/src/cisd/EZFIO.cfg index 4565d2df..688f802a 100644 --- a/src/cisd/EZFIO.cfg +++ b/src/cisd/EZFIO.cfg @@ -5,3 +5,11 @@ interface: ezfio size: (determinants.n_states) + +[lcc_energy] +type: double precision +doc: lccsd energy +interface: ezfio +size: (determinants.n_states) + + diff --git a/src/cisd/NEED b/src/cisd/NEED index d9ad3efc..616d021e 100644 --- a/src/cisd/NEED +++ b/src/cisd/NEED @@ -1,3 +1,4 @@ selectors_full single_ref_method davidson_undressed +dav_general_mat diff --git a/src/cisd/lccsd.irp.f b/src/cisd/lccsd.irp.f new file mode 100644 index 00000000..919c5aaa --- /dev/null +++ b/src/cisd/lccsd.irp.f @@ -0,0 +1,95 @@ +program lccsd + implicit none + BEGIN_DOC +! Linerarized CCSD +! + ! This program takes a reference Slater determinant of ROHF-like occupancy, + ! + ! and performs all single and double excitations on top of it, disregarding + ! spatial symmetry and compute the "n_states" lowest eigenstates of that CI + ! matrix (see :option:`determinants n_states`). + ! + ! This program can be useful in many cases: + ! + ! * **Ground state calculation**: if even after a :c:func:`cis` calculation, natural + ! orbitals (see :c:func:`save_natorb`) and then :c:func:`scf` optimization, you are not sure to have the lowest scf + ! solution, + ! do the same strategy with the :c:func:`cisd` executable instead of the :c:func:`cis` exectuable to generate the natural + ! orbitals as a guess for the :c:func:`scf`. + ! + ! + ! + ! * **Excited states calculations**: the lowest excited states are much likely to + ! be dominanted by single- or double-excitations. + ! Therefore, running a :c:func:`cisd` will save the "n_states" lowest states within + ! the CISD space + ! in the |EZFIO| directory, which can afterward be used as guess wave functions + ! for a further multi-state fci calculation if you specify "read_wf" = True + ! before running the fci executable (see :option:`determinants read_wf`). + ! Also, if you specify "s2_eig" = True, the cisd will only retain states + ! having the good value :math:`S^2` value + ! (see :option:`determinants expected_s2` and :option:`determinants s2_eig`). + ! If "s2_eig" = False, it will take the lowest n_states, whatever + ! multiplicity they are. + ! + ! + ! + ! Note: if you would like to discard some orbitals, use + ! :ref:`qp_set_mo_class` to specify: + ! + ! * "core" orbitals which will be always doubly occupied + ! + ! * "act" orbitals where an electron can be either excited from or to + ! + ! * "del" orbitals which will be never occupied + ! + END_DOC + PROVIDE N_states + read_wf = .False. + TOUCH read_wf + call run +end + +subroutine run + implicit none + + if(pseudo_sym)then + call H_apply_cisd_sym + else + call H_apply_cisd + endif + call get_lccsd_2 +end + +subroutine get_lccsd_2 + implicit none + integer :: i,k + double precision :: cisdq(N_states), delta_e + double precision,external :: diag_h_mat_elem + psi_coef = lccsd_coef + SOFT_TOUCH psi_coef + call save_wavefunction_truncated(save_threshold) + call ezfio_set_cisd_lcc_energy(lccsd_energies) + + print *, 'N_det = ', N_det + print*,'' + print*,'******************************' + print *, 'LCCSD Energies' + do i = 1,N_states + print *, i, lccsd_energies(i) + enddo + if (N_states > 1) then + print*,'******************************' + print*,'Excitation energies (au) (LCCSD)' + do i = 2, N_states + print*, i ,lccsd_energies(i) - lccsd_energies(1) + enddo + print*,'' + print*,'******************************' + print*,'Excitation energies (eV) (LCCSD)' + do i = 2, N_states + print*, i ,(lccsd_energies(i) - lccsd_energies(1)) * ha_to_ev + enddo + endif + +end diff --git a/src/cisd/lccsd_prov.irp.f b/src/cisd/lccsd_prov.irp.f new file mode 100644 index 00000000..8338cf81 --- /dev/null +++ b/src/cisd/lccsd_prov.irp.f @@ -0,0 +1,50 @@ + BEGIN_PROVIDER [ double precision, lccsd_coef, (N_det, N_states)] +&BEGIN_PROVIDER [ double precision, lccsd_energies, (N_states)] + implicit none + double precision, allocatable :: Dress_jj(:), H_jj(:), u_in(:,:) + double precision :: ebefore, eafter, ecorr, thresh + integer :: i,it,degree + logical :: converged + external H_u_0_nstates_openmp + allocate(Dress_jj(N_det),H_jj(N_det),u_in(N_det,N_states_diag)) + thresh = 1.d-6 + converged = .False. + Dress_jj = 0.d0 + u_in = 0.d0 + it = 0 + ! initial guess + do i = 1, N_states_diag + u_in(i,i) = 1.d0 + enddo + do i = 1,N_det + call i_H_j(psi_det(1,1,i),psi_det(1,1,i),N_int,H_jj(i)) + enddo + ebefore = H_jj(1) + do while (.not.converged) + it += 1 + print*,'N_det = ',N_det + call davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,lccsd_energies,& + N_det,N_states,N_states_diag,converged,H_u_0_nstates_openmp) + ecorr = lccsd_energies(1) - H_jj(1) + print*,'---------------------' + print*,'it = ',it + print*,'ecorr = ',ecorr + Dress_jj(1) = 0.d0 + do i = 2, N_det + if(ecorr + H_jj(i) .lt. H_jj(1))then + print*,'Warning, some dets are not dressed: ' + call get_excitation_degree(ref_bitmask,psi_det(1,1,i),degree,N_int) + print*,'degree, Delta E, coef', degree, H_jj(i)-H_jj(1), u_in(i,1)/u_in(1,1) + else + Dress_jj(i) = ecorr + endif + enddo + eafter = lccsd_energies(1) + converged = (dabs(eafter - ebefore).lt.thresh) + ebefore = eafter + enddo + do i = 1, N_states + lccsd_coef(1:N_det,i) = u_in(1:N_det,i) + enddo + +END_PROVIDER diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f index 73608720..0dc939cb 100644 --- a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f @@ -331,7 +331,7 @@ subroutine davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,energies,sz !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f index e59d21d1..24f4fa10 100644 --- a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f +++ b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f @@ -405,7 +405,7 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_dressed_ext_rout.irp.f index c045aa1a..cedaaf0a 100644 --- a/src/dav_general_mat/dav_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_dressed_ext_rout.irp.f @@ -398,7 +398,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_ext_rout.irp.f b/src/dav_general_mat/dav_ext_rout.irp.f index 2621e3a9..deb7e3a9 100644 --- a/src/dav_general_mat/dav_ext_rout.irp.f +++ b/src/dav_general_mat/dav_ext_rout.irp.f @@ -316,7 +316,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_general.irp.f b/src/dav_general_mat/dav_general.irp.f index cd9124e6..9940bf1e 100644 --- a/src/dav_general_mat/dav_general.irp.f +++ b/src/dav_general_mat/dav_general.irp.f @@ -327,7 +327,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_h_dressed.irp.f b/src/davidson/diagonalization_h_dressed.irp.f index 26853df9..b7179c18 100644 --- a/src/davidson/diagonalization_h_dressed.irp.f +++ b/src/davidson/diagonalization_h_dressed.irp.f @@ -457,7 +457,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index 0c3c6f92..fa8aff80 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -477,7 +477,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 45258c1c..7b559925 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -611,7 +611,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:3,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:3,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_nonsym_h_dressed.irp.f b/src/davidson/diagonalization_nonsym_h_dressed.irp.f index 3ff060a6..96ca84ab 100644 --- a/src/davidson/diagonalization_nonsym_h_dressed.irp.f +++ b/src/davidson/diagonalization_nonsym_h_dressed.irp.f @@ -436,7 +436,7 @@ subroutine davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze, !don't print continue else - write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, E11.3))') iter-1, to_print(1:2,1:N_st) + write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson_keywords/usef.irp.f b/src/davidson_keywords/usef.irp.f index fed2ba9b..7ca2d203 100644 --- a/src/davidson_keywords/usef.irp.f +++ b/src/davidson_keywords/usef.irp.f @@ -13,7 +13,9 @@ BEGIN_PROVIDER [ integer, nthreads_davidson ] character*(32) :: env call getenv('QP_NTHREADS_DAVIDSON',env) if (trim(env) /= '') then + call lock_io read(env,*) nthreads_davidson + call unlock_io call write_int(6,nthreads_davidson,'Target number of threads for ') endif END_PROVIDER diff --git a/src/determinants/density_matrix.irp.f b/src/determinants/density_matrix.irp.f index 1a1d92b5..ce4d96c2 100644 --- a/src/determinants/density_matrix.irp.f +++ b/src/determinants/density_matrix.irp.f @@ -117,7 +117,7 @@ END_PROVIDER !$OMP N_det_alpha_unique,N_det_beta_unique,irp_here) allocate(tmp_a(mo_num,mo_num,N_states), tmp_b(mo_num,mo_num,N_states) ) tmp_a = 0.d0 - !$OMP DO SCHEDULE(dynamic,64) + !$OMP DO SCHEDULE(guided) do k_a=1,N_det krow = psi_bilinear_matrix_rows(k_a) ASSERT (krow <= N_det_alpha_unique) @@ -173,7 +173,7 @@ END_PROVIDER deallocate(tmp_a) tmp_b = 0.d0 - !$OMP DO SCHEDULE(dynamic,64) + !$OMP DO SCHEDULE(guided) do k_b=1,N_det krow = psi_bilinear_matrix_transp_rows(k_b) ASSERT (krow <= N_det_alpha_unique) diff --git a/src/determinants/dipole_moments.irp.f b/src/determinants/dipole_moments.irp.f index 06fca0cd..e445c56b 100644 --- a/src/determinants/dipole_moments.irp.f +++ b/src/determinants/dipole_moments.irp.f @@ -66,9 +66,9 @@ END_PROVIDER write(*,'(i16)',advance='no') i end do write(*,*) '' - write(*,'(A17,100(1pE16.8))') 'x_dipole_moment = ',x_dipole_moment - write(*,'(A17,100(1pE16.8))') 'y_dipole_moment = ',y_dipole_moment - write(*,'(A17,100(1pE16.8))') 'z_dipole_moment = ',z_dipole_moment + write(*,'(A17,100(ES16.8))') 'x_dipole_moment = ',x_dipole_moment + write(*,'(A17,100(ES16.8))') 'y_dipole_moment = ',y_dipole_moment + write(*,'(A17,100(ES16.8))') 'z_dipole_moment = ',z_dipole_moment !print*, 'x_dipole_moment = ',x_dipole_moment !print*, 'y_dipole_moment = ',y_dipole_moment !print*, 'z_dipole_moment = ',z_dipole_moment diff --git a/src/determinants/h_apply.irp.f b/src/determinants/h_apply.irp.f index 078c2104..65f1a832 100644 --- a/src/determinants/h_apply.irp.f +++ b/src/determinants/h_apply.irp.f @@ -250,7 +250,7 @@ subroutine remove_duplicates_in_psi_det(found_duplicates) enddo !$OMP END DO - !$OMP DO schedule(dynamic,1024) + !$OMP DO schedule(guided,64) do i=1,N_det-1 if (duplicate(i)) then cycle diff --git a/src/determinants/s2.irp.f b/src/determinants/s2.irp.f index 2c1a8757..6dc49526 100644 --- a/src/determinants/s2.irp.f +++ b/src/determinants/s2.irp.f @@ -317,7 +317,7 @@ subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nst !$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates)& !$OMP REDUCTION(+:accu) allocate(idx(0:n)) - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(guided) do i = n,1,-1 ! Better OMP scheduling call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),N_int,s2_tmp) accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj) diff --git a/src/ezfio_files/NEED b/src/ezfio_files/NEED index d06d604c..1766924f 100644 --- a/src/ezfio_files/NEED +++ b/src/ezfio_files/NEED @@ -1,2 +1,3 @@ mpi zmq +utils diff --git a/src/ezfio_files/ezfio.irp.f b/src/ezfio_files/ezfio.irp.f index 4f53b173..7e414a04 100644 --- a/src/ezfio_files/ezfio.irp.f +++ b/src/ezfio_files/ezfio.irp.f @@ -5,7 +5,9 @@ BEGIN_PROVIDER [ character*(1024), ezfio_filename ] ! variable if it is set, or as the 1st argument of the command line. END_DOC - PROVIDE mpi_initialized + PROVIDE mpi_initialized output_wall_time_0 + + integer :: i ! Get the QPACKAGE_INPUT environment variable call getenv('QPACKAGE_INPUT',ezfio_filename) @@ -44,11 +46,14 @@ BEGIN_PROVIDER [ character*(1024), ezfio_filename ] END_PROVIDER BEGIN_PROVIDER [ character*(1024), ezfio_work_dir ] + use c_functions implicit none BEGIN_DOC ! EZFIO/work/ END_DOC - call ezfio_set_work_empty(.False.) + logical :: b + b = mkl_serv_intel_cpu_true() /= 1 + call ezfio_set_work_empty(b) ezfio_work_dir = trim(ezfio_filename)//'/work/' END_PROVIDER diff --git a/src/fci_tc_bi/selectors.irp.f b/src/fci_tc_bi/selectors.irp.f index 4d3de7d0..7f93ae55 100644 --- a/src/fci_tc_bi/selectors.irp.f +++ b/src/fci_tc_bi/selectors.irp.f @@ -27,6 +27,8 @@ END_PROVIDER implicit none BEGIN_DOC ! Determinants on which we apply for perturbation. + ! psi_selectors_coef_tc(iii,1,istate) = left coefficient of the iii determinant + ! psi_selectors_coef_tc(iii,2,istate) = right coefficient of the iii determinant END_DOC integer :: i,k diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f index 8c6658c5..a5ab6a60 100644 --- a/src/hartree_fock/fock_matrix_hf.irp.f +++ b/src/hartree_fock/fock_matrix_hf.irp.f @@ -190,47 +190,75 @@ END_PROVIDER deallocate(X) - ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol + if (elec_alpha_num > elec_beta_num) then + ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol + endif - allocate(X2(ao_num,ao_num,cholesky_ao_num,2)) + double precision :: rss + double precision :: memory_of_double + integer :: iblock + integer, parameter :: block_size = 32 + + rss = memory_of_double(ao_num*ao_num) + call check_mem(2.d0*block_size*rss, irp_here) + allocate(X2(ao_num,ao_num,block_size,2)) + allocate(X3(ao_num,block_size,ao_num,2)) + ! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j) - call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, & - SCF_density_matrix_ao_alpha, ao_num, & - cholesky_ao, ao_num, 0.d0, & - X2(1,1,1,1), ao_num) + do iblock=1,cholesky_ao_num,block_size - call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, & - SCF_density_matrix_ao_beta, ao_num, & - cholesky_ao, ao_num, 0.d0, & - X2(1,1,1,2), ao_num) + call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, & + SCF_density_matrix_ao_alpha, ao_num, & + cholesky_ao(1,1,iblock), ao_num, 0.d0, & + X2(1,1,1,1), ao_num) - allocate(X3(ao_num,cholesky_ao_num,ao_num,2)) + if (elec_alpha_num > elec_beta_num) then + call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, & + SCF_density_matrix_ao_beta, ao_num, & + cholesky_ao(1,1,iblock), ao_num, 0.d0, & + X2(1,1,1,2), ao_num) + + do s=1,ao_num + do j=1,min(cholesky_ao_num-iblock+1,block_size) + do m=1,ao_num + X3(m,j,s,1) = X2(m,s,j,1) + X3(m,j,s,2) = X2(m,s,j,2) + enddo + enddo + enddo + + else + + do s=1,ao_num + do j=1,min(cholesky_ao_num-iblock+1,block_size) + do m=1,ao_num + X3(m,j,s,1) = X2(m,s,j,1) + enddo + enddo + enddo + endif + + call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, & + cholesky_ao(1,1,iblock), ao_num, & + X3(1,1,1,1), ao_num*block_size, 1.d0, & + ao_two_e_integral_alpha_chol, ao_num) + + if (elec_alpha_num > elec_beta_num) then + call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, & + cholesky_ao(1,1,iblock), ao_num, & + X3(1,1,1,2), ao_num*block_size, 1.d0, & + ao_two_e_integral_beta_chol, ao_num) + endif - do s=1,ao_num - do j=1,cholesky_ao_num - do m=1,ao_num - X3(m,j,s,1) = X2(m,s,j,1) - X3(m,j,s,2) = X2(m,s,j,2) - enddo - enddo enddo - deallocate(X2) - - call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, & - cholesky_ao, ao_num, & - X3(1,1,1,1), ao_num*cholesky_ao_num, 1.d0, & - ao_two_e_integral_alpha_chol, ao_num) - - call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, & - cholesky_ao, ao_num, & - X3(1,1,1,2), ao_num*cholesky_ao_num, 1.d0, & - ao_two_e_integral_beta_chol, ao_num) - - deallocate(X3) + if (elec_alpha_num == elec_beta_num) then + ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol + endif + deallocate(X2,X3) END_PROVIDER diff --git a/src/mo_optimization/EZFIO.cfg b/src/mo_optimization/EZFIO.cfg index e6aa2d67..078da3a2 100644 --- a/src/mo_optimization/EZFIO.cfg +++ b/src/mo_optimization/EZFIO.cfg @@ -2,7 +2,7 @@ type: character*(32) doc: Define the kind of hessian for the orbital optimization full : full hessian, diag : diagonal hessian, none : no hessian interface: ezfio,provider,ocaml -default: full +default: diag [n_det_max_opt] type: integer @@ -14,7 +14,7 @@ default: 200000 type: integer doc: Maximal number of iterations for the orbital optimization interface: ezfio,provider,ocaml -default: 20 +default: 10 [thresh_opt_max_elem_grad] type: double precision diff --git a/src/mo_optimization/optimization.irp.f b/src/mo_optimization/cipsi_orb_opt.irp.f similarity index 96% rename from src/mo_optimization/optimization.irp.f rename to src/mo_optimization/cipsi_orb_opt.irp.f index 9892b3e3..ae3aa1bf 100644 --- a/src/mo_optimization/optimization.irp.f +++ b/src/mo_optimization/cipsi_orb_opt.irp.f @@ -15,7 +15,7 @@ subroutine run_optimization logical :: not_converged character (len=100) :: filename - PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map ao_pseudo_integrals not_converged = .True. nb_iter = 0 diff --git a/src/mo_optimization/first_gradient_opt.irp.f b/src/mo_optimization/first_gradient_opt.irp.f index d6918a00..f08b9d1f 100644 --- a/src/mo_optimization/first_gradient_opt.irp.f +++ b/src/mo_optimization/first_gradient_opt.irp.f @@ -111,7 +111,7 @@ subroutine first_gradient_opt(n,v_grad) if (debug) then print*,'Matrix containing the gradient :' do i = 1, mo_num - write(*,'(100(E12.5))') A(i,1:mo_num) + write(*,'(100(ES12.5))') A(i,1:mo_num) enddo endif diff --git a/src/mo_optimization/state_average_energy.irp.f b/src/mo_optimization/state_average_energy.irp.f index 2cd063da..05aec18a 100644 --- a/src/mo_optimization/state_average_energy.irp.f +++ b/src/mo_optimization/state_average_energy.irp.f @@ -39,17 +39,24 @@ subroutine state_average_energy(energy) double precision :: get_two_e_integral double precision :: mono_e, bi_e integer :: i,j,k,l - + + energy = nuclear_repulsion ! mono electronic part + !$OMP PARALLEL DEFAULT(NONE) PRIVATE(i,j,k,l,mono_e, bi_e) & + !$OMP SHARED(mo_num, mo_integrals_map, two_e_dm_mo, one_e_dm_mo, energy, & + !$OMP mo_one_e_integrals) mono_e = 0d0 + !$OMP DO do j = 1, mo_num do i = 1, mo_num mono_e = mono_e + mo_one_e_integrals(i,j) * one_e_dm_mo(i,j) enddo enddo + !$OMP END DO NOWAIT ! bi electronic part bi_e = 0d0 + !$OMP DO do l = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -59,13 +66,17 @@ subroutine state_average_energy(energy) enddo enddo enddo + !$OMP END DO ! State average energy - energy = mono_e + 0.5d0 * bi_e + nuclear_repulsion + !$OMP CRITICAL + energy = energy + mono_e + 0.5d0 * bi_e + !$OMP END CRITICAL + !$OMP END PARALLEL ! Check !call print_energy_components - + print*,'State average energy:', energy !print*,ci_energy diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 32c0dccd..349f13b9 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -1,49 +1,51 @@ -BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num) ] +BEGIN_PROVIDER [ integer, cholesky_mo_num ] implicit none BEGIN_DOC - ! Cholesky vectors in MO basis + ! Number of Cholesky vectors in MO basis END_DOC - - integer :: k - - call set_multiple_levels_omp(.False.) - print *, 'AO->MO Transformation of Cholesky vectors' - !$OMP PARALLEL DO PRIVATE(k) - do k=1,cholesky_ao_num - call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num) - enddo - !$OMP END PARALLEL DO - print *, '' - + cholesky_mo_num = cholesky_ao_num END_PROVIDER -BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, mo_num) ] +BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] implicit none BEGIN_DOC ! Cholesky vectors in MO basis END_DOC - integer :: i,j,k - double precision, allocatable :: buffer(:,:) - - print *, 'AO->MO Transformation of Cholesky vectors .' + integer :: k, i, j call set_multiple_levels_omp(.False.) - !$OMP PARALLEL PRIVATE(i,j,k,buffer) - allocate(buffer(mo_num,mo_num)) - !$OMP DO SCHEDULE(static) - do k=1,cholesky_ao_num - call ao_to_mo(cholesky_ao(1,1,k),ao_num,buffer,mo_num) + !$OMP PARALLEL DO PRIVATE(k) + do k=1,cholesky_mo_num do j=1,mo_num do i=1,mo_num - cholesky_mo_transp(k,i,j) = buffer(i,j) + cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j) enddo enddo enddo - !$OMP END DO - deallocate(buffer) - !$OMP END PARALLEL - print *, '' + !$OMP END PARALLEL DO + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis + END_DOC + + double precision, allocatable :: X(:,:,:) + integer :: ierr + print *, 'AO->MO Transformation of Cholesky vectors' + + allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': Allocation failed' + endif + call dgemm('T','N', ao_num*cholesky_mo_num, mo_num, ao_num, 1.d0, & + cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_mo_num) + call dgemm('T','N', cholesky_mo_num*mo_num, mo_num, ao_num, 1.d0, & + X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_mo_num*mo_num) + deallocate(X) END_PROVIDER diff --git a/src/mo_two_e_ints/integrals_3_index.irp.f b/src/mo_two_e_ints/integrals_3_index.irp.f index d807f619..eb05da84 100644 --- a/src/mo_two_e_ints/integrals_3_index.irp.f +++ b/src/mo_two_e_ints/integrals_3_index.irp.f @@ -13,14 +13,14 @@ if (do_ao_cholesky) then double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:) - allocate(buffer_jj(cholesky_ao_num,mo_num), buffer(mo_num,mo_num,mo_num)) + allocate(buffer_jj(cholesky_mo_num,mo_num), buffer(mo_num,mo_num,mo_num)) do j=1,mo_num buffer_jj(:,j) = cholesky_mo_transp(:,j,j) enddo - call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - buffer_jj, cholesky_ao_num, 0.d0, & + call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_mo_num, 1.d0, & + cholesky_mo_transp, cholesky_mo_num, & + buffer_jj, cholesky_mo_num, 0.d0, & buffer, mo_num*mo_num) do k = 1, mo_num @@ -36,9 +36,9 @@ do j = 1, mo_num - call dgemm('T','N',mo_num,mo_num,cholesky_ao_num, 1.d0, & - cholesky_mo_transp(1,1,j), cholesky_ao_num, & - cholesky_mo_transp(1,1,j), cholesky_ao_num, 0.d0, & + call dgemm('T','N',mo_num,mo_num,cholesky_mo_num, 1.d0, & + cholesky_mo_transp(1,1,j), cholesky_mo_num, & + cholesky_mo_transp(1,1,j), cholesky_mo_num, 0.d0, & buffer_jj, mo_num) do k=1,mo_num diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index a461504e..0e77b6a2 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -37,7 +37,9 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] call map_load_from_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) print*, 'MO integrals provided' return - else + endif + + if (.not. do_direct_integrals) then PROVIDE ao_two_e_integrals_in_map endif @@ -90,6 +92,10 @@ subroutine four_idx_dgemm double precision, allocatable :: a1(:,:,:,:) double precision, allocatable :: a2(:,:,:,:) + if (ao_num > 1289) then + print *, irp_here, ': Integer overflow in ao_num**3' + endif + allocate (a1(ao_num,ao_num,ao_num,ao_num)) print *, 'Getting AOs' @@ -103,6 +109,7 @@ subroutine four_idx_dgemm enddo !$OMP END PARALLEL DO + print *, '1st transformation' ! 1st transformation allocate (a2(ao_num,ao_num,ao_num,mo_num)) @@ -166,11 +173,9 @@ subroutine four_idx_dgemm deallocate (a1) + call map_sort(mo_integrals_map) call map_unique(mo_integrals_map) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - end subroutine subroutine add_integrals_to_map(mask_ijkl) @@ -250,7 +255,7 @@ subroutine add_integrals_to_map(mask_ijkl) call wall_time(wall_1) - size_buffer = min(ao_num*ao_num*ao_num,8000000) + size_buffer = min(ao_num*ao_num,8000000) print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' @@ -443,11 +448,6 @@ subroutine add_integrals_to_map(mask_ijkl) !$OMP END PARALLEL call map_merge(mo_integrals_map) - call wall_time(wall_2) - call cpu_time(cpu_2) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - deallocate(list_ijkl) @@ -465,51 +465,53 @@ subroutine add_integrals_to_map_cholesky integer :: size_buffer, n_integrals size_buffer = min(mo_num*mo_num*mo_num,16000000) - double precision, allocatable :: Vtmp(:,:,:,:) + double precision, allocatable :: Vtmp(:,:,:) integer(key_kind) , allocatable :: buffer_i(:) real(integral_kind), allocatable :: buffer_value(:) - if (.True.) then - ! In-memory transformation + call set_multiple_levels_omp(.False.) - allocate (Vtmp(mo_num,mo_num,mo_num,mo_num)) + !$OMP PARALLEL DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i, Vtmp) + allocate (buffer_i(size_buffer), buffer_value(size_buffer)) + allocate (Vtmp(mo_num,mo_num,mo_num)) + n_integrals = 0 - call dgemm('N','T',mo_num*mo_num,mo_num*mo_num,cholesky_ao_num,1.d0, & - cholesky_mo, mo_num*mo_num, & - cholesky_mo, mo_num*mo_num, 0.d0, & + !$OMP DO SCHEDULE(dynamic) + do l=1,mo_num + call dgemm('T','N',mo_num*mo_num,mo_num,cholesky_mo_num,1.d0, & + cholesky_mo_transp, cholesky_mo_num, & + cholesky_mo_transp(1,1,l), cholesky_mo_num, 0.d0, & Vtmp, mo_num*mo_num) - !$OMP PARALLEL PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i) - allocate (buffer_i(size_buffer), buffer_value(size_buffer)) - n_integrals = 0 - !$OMP DO - do l=1,mo_num - do k=1,l - do j=1,mo_num - do i=1,j - if (abs(Vtmp(i,j,k,l)) > mo_integrals_threshold) then - n_integrals += 1 - buffer_value(n_integrals) = Vtmp(i,j,k,l) - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) - n_integrals = 0 - endif + do k=1,l + do j=1,mo_num + do i=1,j + if (dabs(Vtmp(i,j,k)) > mo_integrals_threshold) then + n_integrals = n_integrals + 1 + buffer_value(n_integrals) = Vtmp(i,j,k) + !DIR$ FORCEINLINE + call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals)) + if (n_integrals == size_buffer) then + call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) + n_integrals = 0 endif - enddo + endif enddo enddo enddo - !$OMP END DO + enddo + !$OMP END DO NOWAIT + + if (n_integrals > 0) then call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) - deallocate(buffer_i, buffer_value) - !$OMP END PARALLEL - - deallocate(Vtmp) - call map_unique(mo_integrals_map) - endif + deallocate(buffer_i, buffer_value, Vtmp) + !$OMP BARRIER + !$OMP END PARALLEL + + call map_sort(mo_integrals_map) + call map_unique(mo_integrals_map) end @@ -580,6 +582,9 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) return endif + if (ao_num > 1289) then + print *, irp_here, ': Integer overflow in ao_num**3' + endif size_buffer = min(ao_num*ao_num*ao_num,16000000) print*, 'Providing the molecular integrals ' print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& @@ -855,6 +860,9 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl) call bitstring_to_list( mask_ijkl(1,3), list_ijkl(1,3), n_k, N_int ) call bitstring_to_list( mask_ijkl(1,4), list_ijkl(1,4), n_l, N_int ) + if (ao_num > 1289) then + print *, irp_here, ': Integer overflow in ao_num**3' + endif size_buffer = min(ao_num*ao_num*ao_num,16000000) print*, 'Providing the molecular integrals ' print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& @@ -1350,16 +1358,29 @@ END_PROVIDER ! mo_two_e_integrals_jj_anti(i,j) = J_ij - K_ij END_DOC - integer :: i,j + integer :: i,j,k double precision :: get_two_e_integral if (do_ao_cholesky) then + double precision, allocatable :: buffer(:,:) + allocate (buffer(cholesky_mo_num,mo_num)) + do k=1,cholesky_mo_num + do i=1,mo_num + buffer(k,i) = cholesky_mo_transp(k,i,i) + enddo + enddo + call dgemm('T','N',mo_num,mo_num,cholesky_mo_num,1.d0, & + buffer, cholesky_mo_num, buffer, cholesky_mo_num, 0.d0, mo_two_e_integrals_jj, mo_num) + deallocate(buffer) + do j=1,mo_num do i=1,mo_num - !TODO: use dgemm - mo_two_e_integrals_jj(i,j) = sum(cholesky_mo_transp(:,i,i)*cholesky_mo_transp(:,j,j)) - mo_two_e_integrals_jj_exchange(i,j) = sum(cholesky_mo_transp(:,i,j)*cholesky_mo_transp(:,j,i)) + mo_two_e_integrals_jj_exchange(i,j) = 0.d0 + do k=1,cholesky_mo_num + mo_two_e_integrals_jj_exchange(i,j) = mo_two_e_integrals_jj_exchange(i,j) + & + cholesky_mo_transp(k,i,j)*cholesky_mo_transp(k,j,i) + enddo enddo enddo diff --git a/src/tc_bi_ortho/h_mat_triple.irp.f b/src/tc_bi_ortho/h_mat_triple.irp.f new file mode 100644 index 00000000..4c8c107a --- /dev/null +++ b/src/tc_bi_ortho/h_mat_triple.irp.f @@ -0,0 +1,391 @@ +subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) + use bitmasks + BEGIN_DOC +! returns the array, for each spin, of holes/particles between key_i and key_j +! +! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> + END_DOC + include 'utils/constants.include.F' + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision, intent(out) :: phase + integer :: ispin,k,i,pos + integer(bit_kind) :: key_hole, key_particle + integer(bit_kind) :: xorvec(N_int_max,2) + holes_array = -1 + particles_array = -1 + degree_array = 0 + do i = 1, N_int + xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) + xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) + degree_array(1) += popcnt(xorvec(i,1)) + degree_array(2) += popcnt(xorvec(i,2)) + enddo + degree_array(1) = shiftr(degree_array(1),1) + degree_array(2) = shiftr(degree_array(2),1) + + do ispin = 1, 2 + k = 1 + !!! GETTING THE HOLES + do i = 1, N_int + key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + do ispin = 1, 2 + k = 1 + !!! GETTING THE PARTICLES + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general ' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + integer :: h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree_array(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + +subroutine get_holes_general(key_i, key_j,Nint, holes_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of holes between key_i and key_j +! +! with the following convention: a_{hole}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2) + integer(bit_kind) :: key_hole + integer :: ispin,k,i,pos + holes_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_particles_general(key_i, key_j,Nint,particles_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of particles between key_i and key_j +! +! with the following convention: a^dagger_{particle}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: particles_array(100,2) + integer(bit_kind) :: key_particle + integer :: ispin,k,i,pos + particles_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'Those are the two determinants' + call debug_det(key_i, N_int) + call debug_det(key_j, N_int) + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) + implicit none + integer, intent(in) :: degree(2), Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + integer, intent(in) :: holes_array(100,2),particles_array(100,2) + double precision, intent(out) :: phase + integer :: i,ispin,h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + +subroutine H_tc_s2_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo +end + +subroutine H_tc_s2_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & + !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & + !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo + !$OMP END PARALLEL DO +end + +! --- + +subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo +end + +subroutine H_tc_s2_dagger_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & + !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & + !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo + !$OMP END PARALLEL DO +end + +! --- +subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + use bitmasks + BEGIN_DOC +! for triple excitation +!! +!! WARNING !! +! +! Genuine triple excitations of the same spin are not yet implemented + END_DOC + implicit none + integer(bit_kind), intent(in) :: key_j(N_int,2),key_i(N_int,2) + integer, intent(in) :: Nint + double precision, intent(out) :: hmono, htwoe, hthree, htot + integer :: degree + integer :: h1, p1, h2, p2, s1, s2, h3, p3, s3 + integer :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision :: phase,sym_3_e_int_from_6_idx_tensor + + hmono = 0.d0 + htwoe = 0.d0 + hthree = 0.d0 + htot = 0.d0 + call get_excitation_general(key_j, key_i, Nint,degree_array,holes_array, particles_array,phase) + degree = degree_array(1) + degree_array(2) + if(degree .ne. 3)return + if(degree_array(1)==3.or.degree_array(2)==3)then + if(degree_array(1) == 3)then + h1 = holes_array(1,1) + h2 = holes_array(2,1) + h3 = holes_array(3,1) + p1 = particles_array(1,1) + p2 = particles_array(2,1) + p3 = particles_array(3,1) + else + h1 = holes_array(1,2) + h2 = holes_array(2,2) + h3 = holes_array(3,2) + p1 = particles_array(1,2) + p2 = particles_array(2,2) + p3 = particles_array(3,2) + endif + hthree = sym_3_e_int_from_6_idx_tensor(p3, p2, p1, h3, h2, h1) + else + if(degree_array(1) == 2.and.degree_array(2) == 1)then ! double alpha + single beta + h1 = holes_array(1,1) + h2 = holes_array(2,1) + h3 = holes_array(1,2) + p1 = particles_array(1,1) + p2 = particles_array(2,1) + p3 = particles_array(1,2) + else if(degree_array(2) == 2 .and. degree_array(1) == 1)then ! double beta + single alpha + h1 = holes_array(1,2) + h2 = holes_array(2,2) + h3 = holes_array(1,1) + p1 = particles_array(1,2) + p2 = particles_array(2,2) + p3 = particles_array(1,1) + else + print*,'PB !!' + stop + endif + hthree = three_body_ints_bi_ort(p3,p2,p1,h3,h2,h1) - three_body_ints_bi_ort(p3,p2,p1,h3,h1,h2) + endif + hthree *= phase + htot = hthree + end + diff --git a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f index 1d1b26cc..e96e738e 100644 --- a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f +++ b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f @@ -90,3 +90,96 @@ subroutine htcdag_bi_ortho_calc_tdav_slow(v, u, N_st, sze) end +subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,chi_H_i_array,i_H_phi_array) + use bitmasks + implicit none + BEGIN_DOC +! Computes $\langle i|H|Phi \rangle = \sum_J c^R_J \langle i | H | J \rangle$. +! +! AND $\langle Chi|H| i \rangle = \sum_J c^L_J \langle J | H | i \rangle$. +! +! CONVENTION: i_H_phi_array(0) = total matrix element, +! +! i_H_phi_array(1) = one-electron matrix element, +! +! i_H_phi_array(2) = two-electron matrix element, +! +! i_H_phi_array(3) = three-electron matrix element, +! +! Uses filter_connected_i_H_psi0 to get all the $|J \rangle$ to which $|i \rangle$ +! is connected. +! +! The i_H_psi_minilist is much faster but requires to build the +! minilists. + END_DOC + integer, intent(in) :: Nint, Ndet,Ndet_max,Nstate + integer(bit_kind), intent(in) :: keys(Nint,2,Ndet) + integer(bit_kind), intent(in) :: key(Nint,2) + double precision, intent(in) :: coef_l(Ndet_max,Nstate),coef_r(Ndet_max,Nstate) + double precision, intent(out) :: chi_H_i_array(0:3,Nstate),i_H_phi_array(0:3,Nstate) + + integer :: i, ii,j + double precision :: phase + integer :: exc(0:2,2,2) + double precision :: hmono, htwoe, hthree, htot + integer, allocatable :: idx(:) + + ASSERT (Nint > 0) + ASSERT (N_int == Nint) + ASSERT (Nstate > 0) + ASSERT (Ndet > 0) + ASSERT (Ndet_max >= Ndet) + allocate(idx(0:Ndet)) + + chi_H_i_array = 0.d0 + i_H_phi_array = 0.d0 + + call filter_connected_i_H_psi0(keys,key,Nint,Ndet,idx) + if (Nstate == 1) then + + do ii=1,idx(0) + i = idx(ii) + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(keys(1,1,i), key, Nint, hmono, htwoe, hthree, htot) + chi_H_i_array(0,1) = chi_H_i_array(0,1) + coef_l(i,1)*htot + chi_H_i_array(1,1) = chi_H_i_array(1,1) + coef_l(i,1)*hmono + chi_H_i_array(2,1) = chi_H_i_array(2,1) + coef_l(i,1)*htwoe + chi_H_i_array(3,1) = chi_H_i_array(3,1) + coef_l(i,1)*hthree + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(key,keys(1,1,i), Nint, hmono, htwoe, hthree, htot) + i_H_phi_array(0,1) = i_H_phi_array(0,1) + coef_r(i,1)*htot + i_H_phi_array(1,1) = i_H_phi_array(1,1) + coef_r(i,1)*hmono + i_H_phi_array(2,1) = i_H_phi_array(2,1) + coef_r(i,1)*htwoe + i_H_phi_array(3,1) = i_H_phi_array(3,1) + coef_r(i,1)*hthree + enddo + + else + + do ii=1,idx(0) + i = idx(ii) + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(keys(1,1,i), key, Nint, hmono, htwoe, hthree, htot) + do j = 1, Nstate + chi_H_i_array(0,j) = chi_H_i_array(0,j) + coef_l(i,j)*htot + chi_H_i_array(1,j) = chi_H_i_array(1,j) + coef_l(i,j)*hmono + chi_H_i_array(2,j) = chi_H_i_array(2,j) + coef_l(i,j)*htwoe + chi_H_i_array(3,j) = chi_H_i_array(3,j) + coef_l(i,j)*hthree + enddo + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(key,keys(1,1,i), Nint, hmono, htwoe, hthree, htot) + do j = 1, Nstate + i_H_phi_array(0,j) = i_H_phi_array(0,j) + coef_r(i,j)*htot + i_H_phi_array(1,j) = i_H_phi_array(1,j) + coef_r(i,j)*hmono + i_H_phi_array(2,j) = i_H_phi_array(2,j) + coef_r(i,j)*htwoe + i_H_phi_array(3,j) = i_H_phi_array(3,j) + coef_r(i,j)*hthree + enddo + enddo + + endif + +end + diff --git a/src/tc_bi_ortho/normal_ordered_old.irp.f b/src/tc_bi_ortho/normal_ordered_old.irp.f index e5e63355..f40805a9 100644 --- a/src/tc_bi_ortho/normal_ordered_old.irp.f +++ b/src/tc_bi_ortho/normal_ordered_old.irp.f @@ -120,6 +120,13 @@ END_PROVIDER subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) use bitmasks ! you need to include the bitmasks_module.f90 features + BEGIN_DOC +! give the contribution for a double excitation of opposite spin BUT averaged over spin +! +! it is the average of and +! +! because the orbitals h1,h2,p1,p2 are spatial orbitals and therefore can be of different spins + END_DOC implicit none integer, intent(in) :: Nint, h1, h2, p1, p2 @@ -158,7 +165,8 @@ subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) int_exc_12 = -1.d0 * integral - hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) + hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) ! spin average +! hthree += 1.d0 * int_direct - 1.0d0 * (int_exc_13 + int_exc_12) enddo return diff --git a/src/tc_bi_ortho/print_tc_dump.irp.f b/src/tc_bi_ortho/print_tc_dump.irp.f index 868de444..37dfe051 100644 --- a/src/tc_bi_ortho/print_tc_dump.irp.f +++ b/src/tc_bi_ortho/print_tc_dump.irp.f @@ -62,7 +62,7 @@ subroutine KMat_tilde_dump() do j = 1, mo_num do i = 1, mo_num ! TCHint convention - write(33, '(E15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_two_e_chemist(j,i,l,k), i, j, k, l + write(33, '(ES15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_two_e_chemist(j,i,l,k), i, j, k, l enddo enddo enddo @@ -71,7 +71,7 @@ subroutine KMat_tilde_dump() do j = 1, mo_num do i = 1, mo_num ! TCHint convention - write(33, '(E15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_one_e(i,j), i, j, 0, 0 + write(33, '(ES15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_one_e(i,j), i, j, 0, 0 enddo enddo @@ -128,7 +128,7 @@ subroutine ERI_dump() do k = 1, mo_num do j = 1, mo_num do i = 1, mo_num - write(33, '(4(I4, 2X), 4X, E15.7)') i, j, k, l, a1(i,j,k,l) + write(33, '(4(I4, 2X), 4X, ES15.7)') i, j, k, l, a1(i,j,k,l) enddo enddo enddo @@ -167,8 +167,8 @@ subroutine LMat_tilde_dump() !write(33, '(6(I4, 2X), 4X, E15.7)') i, j, k, l, m, n, integral ! TCHint convention if(dabs(integral).gt.1d-10) then - write(33, '(E15.7, 4X, 6(I4, 2X))') -integral/3.d0, i, j, k, l, m, n - !write(33, '(E15.7, 4X, 6(I4, 2X))') -integral/3.d0, l, m, n, i, j, k + write(33, '(ES15.7, 4X, 6(I4, 2X))') -integral/3.d0, i, j, k, l, m, n + !write(33, '(ES15.7, 4X, 6(I4, 2X))') -integral/3.d0, l, m, n, i, j, k endif enddo enddo diff --git a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f index 35abbbc4..cb33d343 100644 --- a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f @@ -184,7 +184,7 @@ subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) ii = occ(i,s1) do j = i+1, Ne(s1) jj = occ(j,s1) -! ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) +! !ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) hthree += three_e_single_parrallel_spin(jj,ii,p1,h1) ! USES THE 4-IDX TENSOR enddo enddo diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index f12b83e3..c3a5ba6b 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -19,6 +19,9 @@ subroutine provide_all_three_ints_bi_ortho() if(three_e_4_idx_term) then PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort endif + if(pure_three_body_h_tc)then + provide three_body_ints_bi_ort + endif if(.not. double_normal_ord .and. three_e_5_idx_term) then PROVIDE three_e_5_idx_direct_bi_ort @@ -87,14 +90,26 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, hthree = 0.d0 call get_excitation_degree(key_i, key_j, degree, Nint) - if(degree.gt.2) return - - if(degree == 0) then - call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) - else if (degree == 1) then - call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) - else if(degree == 2) then - call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + if(.not.pure_three_body_h_tc)then + if(degree.gt.2) return + if(degree == 0) then + call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) + else if (degree == 1) then + call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) + else if(degree == 2) then + call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + endif + else + if(degree.gt.3) return + if(degree == 0) then + call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) + else if (degree == 1) then + call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) + else if(degree == 2) then + call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + else + call triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + endif endif if(degree==0) then diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index f027c38f..a9e22e03 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -225,6 +225,8 @@ end external H_tc_dagger_u_0_opt external H_tc_s2_dagger_u_0_opt external H_tc_s2_u_0_opt + external H_tc_s2_dagger_u_0_with_pure_three_omp + external H_tc_s2_u_0_with_pure_three_omp allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) @@ -250,7 +252,11 @@ end converged = .False. i_it = 0 do while (.not.converged) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + if(.not.pure_three_body_h_tc)then + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + else + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_with_pure_three_omp) + endif i_it += 1 if(i_it .gt. 5) exit enddo @@ -275,7 +281,11 @@ end converged = .False. i_it = 0 do while (.not. converged) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) + if(.not.pure_three_body_h_tc)then + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) + else + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_with_pure_three_omp) + endif i_it += 1 if(i_it .gt. 5) exit enddo @@ -328,6 +338,11 @@ end TOUCH psi_r_coef_bi_ortho call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer) deallocate(buffer) +! print*,'After diag' +! do i = 1, N_det! old version +! print*,'i',i,psi_l_coef_bi_ortho(i,1),psi_r_coef_bi_ortho(i,1) +! call debug_det(psi_det(1,1,i),N_int) +! enddo END_PROVIDER diff --git a/src/tc_bi_ortho/tc_natorb.irp.f b/src/tc_bi_ortho/tc_natorb.irp.f index b7e5ae81..a72d356a 100644 --- a/src/tc_bi_ortho/tc_natorb.irp.f +++ b/src/tc_bi_ortho/tc_natorb.irp.f @@ -23,7 +23,7 @@ dm_tmp(1:mo_num,1:mo_num) = -tc_transition_matrix_mo(1:mo_num,1:mo_num,1,1) - print *, ' dm_tmp' + print *, ' Transition density matrix ' do i = 1, mo_num fock_diag(i) = fock_matrix_tc_mo_tot(i,i) write(*, '(100(F16.10,X))') -dm_tmp(:,i) @@ -32,12 +32,17 @@ thr_d = 1.d-6 thr_nd = 1.d-6 thr_deg = 1.d-3 - call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & - , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) -! call non_hrmt_bieig( mo_num, dm_tmp& -! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo& -! , mo_num, natorb_tc_eigval ) +! if(n_core_orb.ne.0)then +! call diag_mat_per_fock_degen_core( fock_diag, dm_tmp, list_core, n_core_orb, mo_num, thr_d, thr_nd, thr_deg & +! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) +! else +! call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & +! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) +! endif + call non_hrmt_bieig(mo_num, dm_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag & + , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo & + , mo_num, natorb_tc_eigval ) accu = 0.d0 do i = 1, mo_num print*,'natorb_tc_eigval(i) = ',-natorb_tc_eigval(i) diff --git a/src/tc_bi_ortho/tc_prop.irp.f b/src/tc_bi_ortho/tc_prop.irp.f index 5bb0e2c0..a13dc9a2 100644 --- a/src/tc_bi_ortho/tc_prop.irp.f +++ b/src/tc_bi_ortho/tc_prop.irp.f @@ -29,7 +29,7 @@ tc_transition_matrix_mo_alpha(m,m,istate,jstate)+= psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) enddo do p = 1, n_occ_ab(2) ! browsing the beta electrons - m = occ(p,1) + m = occ(p,2) tc_transition_matrix_mo_beta(m,m,istate,jstate)+= psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) enddo else @@ -38,12 +38,14 @@ ! Single alpha h = exc(1,1,1) ! hole in psi_det(1,1,j) p = exc(1,2,1) ! particle in psi_det(1,1,j) - tc_transition_matrix_mo_alpha(p,h,istate,jstate)+= phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) + tc_transition_matrix_mo_alpha(p,h,istate,jstate)+= & + phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) else ! Single beta h = exc(1,1,2) ! hole in psi_det(1,1,j) p = exc(1,2,2) ! particle in psi_det(1,1,j) - tc_transition_matrix_mo_beta(p,h,istate,jstate)+= phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) + tc_transition_matrix_mo_beta(p,h,istate,jstate)+= & + phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) endif endif enddo diff --git a/src/tc_bi_ortho/test_normal_order.irp.f b/src/tc_bi_ortho/test_normal_order.irp.f index e805eecb..0cf27396 100644 --- a/src/tc_bi_ortho/test_normal_order.irp.f +++ b/src/tc_bi_ortho/test_normal_order.irp.f @@ -33,7 +33,7 @@ subroutine test integer :: h1,h2,p1,p2,s1,s2,i_ok,degree,Ne(2) integer :: exc(0:2,2,2) integer(bit_kind), allocatable :: det_i(:,:) - double precision :: hmono,htwoe,hthree,htilde_ij,accu,phase,normal + double precision :: hmono,htwoe,hthree,htilde_ij,accu,phase,normal,hthree_tmp integer, allocatable :: occ(:,:) allocate( occ(N_int*bit_kind_size,2) ) call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) @@ -45,15 +45,44 @@ subroutine test do p1 = elec_alpha_num+1, mo_num do h2 = 1, elec_beta_num do p2 = elec_beta_num+1, mo_num + hthree = 0.d0 + det_i = ref_bitmask + s1 = 1 + s2 = 2 call do_single_excitation(det_i,h1,p1,s1,i_ok) + if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) - call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + if(i_ok.ne.1)cycle + call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - hthree *= phase -! !normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1) - call three_comp_two_e_elem(det_i,h1,h2,p1,p2,s1,s2,normal) + hthree_tmp *= phase + hthree += 0.5d0 * hthree_tmp + det_i = ref_bitmask + s1 = 2 + s2 = 1 + call do_single_excitation(det_i,h1,p1,s1,i_ok) + if(i_ok.ne.1)cycle + call do_single_excitation(det_i,h2,p2,s2,i_ok) + if(i_ok.ne.1)cycle + call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) + call get_excitation_degree(ref_bitmask,det_i,degree,N_int) + call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) + hthree_tmp *= phase + hthree += 0.5d0 * hthree_tmp + + +! normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1) + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, normal) + if(dabs(hthree).lt.1.d-10)cycle + if(dabs(hthree-normal).gt.1.d-10)then +! print*,pp2,pp1,hh2,hh1 + print*,p2,p1,h2,h1 + print*,hthree,normal,dabs(hthree-normal) + stop + endif +! call three_comp_two_e_elem(det_i,h1,h2,p1,p2,s1,s2,normal) ! normal = eff_2_e_from_3_e_ab(p2,p1,h2,h1) accu += dabs(hthree-normal) enddo @@ -86,8 +115,8 @@ do h1 = 1, elec_alpha_num integer :: hh1, pp1, hh2, pp2, ss1, ss2 call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) hthree *= phase -! normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) - normal = eff_2_e_from_3_e_aa(p2,p1,h2,h1) + normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) +! normal = eff_2_e_from_3_e_aa(p2,p1,h2,h1) if(dabs(hthree).lt.1.d-10)cycle if(dabs(hthree-normal).gt.1.d-10)then print*,pp2,pp1,hh2,hh1 diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index b398507a..7c70b119 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -14,12 +14,14 @@ program test_tc read_wf = .True. touch read_wf - call routine_test_s2 - call routine_test_s2_davidson + call provide_all_three_ints_bi_ortho() + call routine_h_triple_left + call routine_h_triple_right +! call routine_test_s2_davidson end -subroutine routine_test_s2 +subroutine routine_h_triple_right implicit none logical :: do_right integer :: sze ,i, N_st, j @@ -29,67 +31,65 @@ subroutine routine_test_s2 sze = N_det N_st = 1 allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) - print*,'Checking first the Left ' - do_right = .False. - do i = 1, sze - u_0(i,1) = psi_l_coef_bi_ortho(i,1) - enddo - call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) - s_0_ref = 0.d0 - do i = 1, sze - do j = 1, sze - call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) - s_0_ref(i,1) += u_0(j,1) * sij - enddo - enddo - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) - accu_e = 0.d0 - accu_s = 0.d0 - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 - do i = 1, sze - accu_e_0 += v_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) - accu_s_0 += s_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) - accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) - accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) - enddo - print*,'accu_e = ',accu_e - print*,'accu_s = ',accu_s - print*,'accu_e_0 = ',accu_e_0 - print*,'accu_s_0 = ',accu_s_0 - - print*,'Checking then the right ' - do_right = .True. + print*,'Checking first the Right ' do i = 1, sze u_0(i,1) = psi_r_coef_bi_ortho(i,1) enddo - call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) - s_0_ref = 0.d0 - do i = 1, sze - do j = 1, sze - call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) - s_0_ref(i,1) += u_0(j,1) * sij - enddo - enddo - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) + double precision :: wall0,wall1 + call wall_time(wall0) + call H_tc_s2_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) + call wall_time(wall1) + print*,'time for omp',wall1 - wall0 + call wall_time(wall0) + call H_tc_s2_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) + call wall_time(wall1) + print*,'time serial ',wall1 - wall0 accu_e = 0.d0 accu_s = 0.d0 - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 do i = 1, sze - accu_e_0 += v_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) - accu_s_0 += s_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) enddo print*,'accu_e = ',accu_e print*,'accu_s = ',accu_s - print*,'accu_e_0 = ',accu_e_0 - print*,'accu_s_0 = ',accu_s_0 - end +subroutine routine_h_triple_left + implicit none + logical :: do_right + integer :: sze ,i, N_st, j + double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 + double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) + double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) + sze = N_det + N_st = 1 + allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) + print*,'Checking the Left ' + do i = 1, sze + u_0(i,1) = psi_l_coef_bi_ortho(i,1) + enddo + double precision :: wall0,wall1 + call wall_time(wall0) + call H_tc_s2_dagger_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) + call wall_time(wall1) + print*,'time for omp',wall1 - wall0 + call wall_time(wall0) + call H_tc_s2_dagger_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) + call wall_time(wall1) + print*,'time serial ',wall1 - wall0 + accu_e = 0.d0 + accu_s = 0.d0 + do i = 1, sze + accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) + accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) + enddo + print*,'accu_e = ',accu_e + print*,'accu_s = ',accu_s + +end + + subroutine routine_test_s2_davidson implicit none double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) , s2(:) diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f index 182c03d7..f1a7cc0a 100644 --- a/src/tc_bi_ortho/test_tc_fock.irp.f +++ b/src/tc_bi_ortho/test_tc_fock.irp.f @@ -152,9 +152,7 @@ subroutine routine_tot() ! do i = 1, elec_num_tab(s1) ! do a = elec_num_tab(s1)+1, mo_num ! virtual do i = 1, elec_beta_num - do a = elec_beta_num+1, elec_alpha_num! virtual -! do i = elec_beta_num+1, elec_alpha_num -! do a = elec_alpha_num+1, mo_num! virtual + do a = elec_beta_num+1, mo_num! virtual print*,i,a det_i = ref_bitmask @@ -167,7 +165,7 @@ subroutine routine_tot() call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) print*,htilde_ij - if(dabs(htilde_ij).lt.1.d-10)cycle +! if(dabs(htilde_ij).lt.1.d-10)cycle print*, ' excited det' call debug_det(det_i, N_int) @@ -184,9 +182,12 @@ subroutine routine_tot() ! endif err_ai = dabs(dabs(ref) - dabs(new)) if(err_ai .gt. 1d-7) then + print*,'---------' print*,'s1 = ',s1 print*, ' warning on', i, a print*, ref,new,err_ai + print*,hmono, htwoe, hthree + print*,'---------' endif print*, ref,new,err_ai err_tot += err_ai diff --git a/src/tc_bi_ortho/test_tc_two_rdm.irp.f b/src/tc_bi_ortho/test_tc_two_rdm.irp.f new file mode 100644 index 00000000..68b96f37 --- /dev/null +++ b/src/tc_bi_ortho/test_tc_two_rdm.irp.f @@ -0,0 +1,67 @@ +program test_tc_rdm + + BEGIN_DOC + ! + ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together + ! with the energy. Saves the left-right wave functions at the end. + ! + END_DOC + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + read_wf = .True. + touch read_wf + + print*, ' nb of states = ', N_states + print*, ' nb of det = ', N_det + + call test() + +end + +subroutine test + implicit none + integer :: h1,p1,h2,p2,i,j,istate,s1,s2 + double precision :: rdm, integral, accu,ref, accu_new ,rdm_new + double precision :: hmono, htwoe, hthree, htot + accu = 0.d0 + accu_new = 0.d0 + do h1 = 1, mo_num + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + integral = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + rdm = tc_two_rdm(p2,p1,h2,h1) + accu += integral * rdm + rdm_new = 0.d0 + do s2 = 1, 2 + do s1 = 1, 2 + rdm_new += tc_two_rdm_s1s2(p2,p1,h2,h1,s1,s2) + enddo + enddo + accu_new += integral * rdm_new + enddo + enddo + enddo + enddo + accu *= 0.5d0 + accu_new *= 0.5d0 + print*,'accu = ',accu + print*,'accu_new = ',accu_new + ref = 0.d0 + do i = 1, N_det + do j = 1, N_det + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + do istate = 1,N_states + ref += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) * htwoe + enddo + enddo + enddo + print*,' ref = ',ref + print*,'delta= ',ref-accu + +end diff --git a/src/tc_bi_ortho/two_rdm_naive.irp.f b/src/tc_bi_ortho/two_rdm_naive.irp.f new file mode 100644 index 00000000..90163de5 --- /dev/null +++ b/src/tc_bi_ortho/two_rdm_naive.irp.f @@ -0,0 +1,166 @@ + BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist_s1s2, (mo_num, mo_num, mo_num, mo_num, 2,2)] + implicit none + BEGIN_DOC + ! tc_two_rdm_chemist(p,s,q,r) = = CHEMIST NOTATION + END_DOC + integer :: i,j,istate,m,mm,nn + integer :: exc(0:2,2,2) + double precision :: phase + double precision :: contrib + integer :: h1,p1,s1,h2,p2,s2,degree + integer, allocatable :: occ(:,:) + integer :: n_occ_ab(2),other_spin(2) + other_spin(1) = 2 + other_spin(2) = 1 + allocate(occ(N_int*bit_kind_size,2)) + tc_two_rdm_chemist = 0.d0 + tc_two_rdm_chemist_s1s2 = 0.d0 + + do i = 1, N_det ! psi_left + do j = 1, N_det ! psi_right + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree.gt.2)cycle + if(degree.gt.0)then + ! get excitation operators: from psi_det(j) --> psi_det(i) + ! T_{j-->i} = a^p1_s1 a_h1_s1 + call get_excitation(psi_det(1,1,j),psi_det(1,1,i),exc,degree,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * phase * state_average_weight(1) + do istate = 2, N_states + contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * phase * state_average_weight(istate) + enddo + if(degree == 2)then + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + else if(degree==1)then + ! occupation of the determinant psi_det(j) + call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) + + ! run over the electrons of opposite spin than the excitation + s2 = other_spin(s1) + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + enddo + ! run over the electrons of same spin than the excitation + s2 = s1 + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + enddo + endif + else if(degree == 0)then +! cycle + contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * state_average_weight(1) + do istate = 2, N_states + contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) + enddo + ! occupation of the determinant psi_det(j) + call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) + s1 = 1 ! alpha electrons + do nn = 1, n_occ_ab(s1) + h1 = occ(nn,s1) + p1 = occ(nn,s1) + ! run over the couple of alpha-beta electrons + s2 = other_spin(s1) + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + enddo + ! run over the couple of alpha-alpha electrons + s2 = s1 + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + if(h2.le.h1)cycle + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + enddo + enddo + s1 = 2 + do nn = 1, n_occ_ab(s1) + h1 = occ(nn,s1) + p1 = occ(nn,s1) + ! run over the couple of beta-beta electrons + s2 = s1 + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + if(h2.le.h1)cycle + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + enddo + enddo + endif + enddo + enddo + +END_PROVIDER + +subroutine update_tc_rdm(h1,p1,h2,p2,s1,s2,array,sze,contrib) + implicit none + integer, intent(in) :: h1,p1,h2,p2,s1,s2,sze + double precision, intent(in) :: contrib + double precision, intent(inout) :: array(sze, sze, sze, sze) + integer :: istate + if(s1.ne.s2)then + array(p1,h1,p2,h2) += contrib + ! permutation for particle symmetry + array(p2,h2,p1,h1) += contrib + else ! same spin double excitation + array(p1,h1,p2,h2) += contrib + ! exchange + ! exchanging the particles + array(p2,h1,p1,h2) -= contrib + ! exchanging the + array(p1,h2,p2,h1) -= contrib + ! permutation for particle symmetry + array(p2,h2,p1,h1) += contrib + endif + +end + + + BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, tc_two_rdm_s1s2, (mo_num, mo_num, mo_num, mo_num,2,2)] + implicit none + BEGIN_DOC + ! tc_two_rdm(p,q,s,r) = = PHYSICIST NOTATION + END_DOC + integer :: p,q,r,s,s1,s2 + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + do p = 1, mo_num + tc_two_rdm(p,q,s,r) = tc_two_rdm_chemist(p,s,q,r) + enddo + enddo + enddo + enddo + do s2 = 1, 2 + do s1 = 1, 2 + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + do p = 1, mo_num + tc_two_rdm_s1s2(p,q,s,r,s1,s2) = tc_two_rdm_chemist_s1s2(p,s,q,r,s1,s2) + enddo + enddo + enddo + enddo + enddo + enddo + +END_PROVIDER diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index fee492b4..fe396381 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -220,6 +220,12 @@ doc: Threshold to determine if diagonal elements of the bi-orthogonal condition interface: ezfio,provider,ocaml default: 1.e-6 +[thresh_lr_angle] +type: double precision +doc: Maximum value of the angle between the couple of left and right orbital for the rotations +interface: ezfio,provider,ocaml +default: 20.0 + [thresh_biorthog_nondiag] type: Threshold doc: Threshold to determine if non-diagonal elements of L.T x R are close enouph to 0 diff --git a/src/tc_scf/molden_lr_mos.irp.f b/src/tc_scf/molden_lr_mos.irp.f index b86009ee..98c7b230 100644 --- a/src/tc_scf/molden_lr_mos.irp.f +++ b/src/tc_scf/molden_lr_mos.irp.f @@ -72,7 +72,7 @@ subroutine molden_lr write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -170,7 +170,7 @@ subroutine molden_lr write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_r_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_r_coef(iorder(j),i) enddo write (i_unit_output,*) 'Sym= 1' @@ -178,7 +178,7 @@ subroutine molden_lr write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_l_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_l_coef(iorder(j),i) enddo enddo close(i_unit_output) @@ -235,7 +235,7 @@ subroutine molden_l() write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -333,7 +333,7 @@ subroutine molden_l() write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_l_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_l_coef(iorder(j),i) enddo enddo close(i_unit_output) @@ -390,7 +390,7 @@ subroutine molden_r() write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -488,7 +488,7 @@ subroutine molden_r() write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_r_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_r_coef(iorder(j),i) enddo enddo close(i_unit_output) diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f index 755c35b9..588382b5 100644 --- a/src/tc_scf/routines_rotates.irp.f +++ b/src/tc_scf/routines_rotates.irp.f @@ -140,7 +140,11 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) ! compute the overlap between the left and rescaled right call build_s_matrix(ao_num, mo_num, mo_r_coef_new, mo_r_coef_new, ao_overlap, s_mat) ! call give_degen(fock_diag,mo_num,thr_deg,list_degen,n_degen_list) - call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list) + if(n_core_orb.ne.0)then + call give_degen_full_listcore(fock_diag, mo_num, list_core, n_core_orb, thr_deg, list_degen, n_degen_list) + else + call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list) + endif print *, ' fock_matrix_mo' do i = 1, mo_num print *, i, fock_diag(i), angle_left_right(i) @@ -152,6 +156,8 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) ! n_degen = ilast - ifirst +1 n_degen = list_degen(i,0) + if(n_degen .ge. 1000)n_degen = 1 ! convention for core orbitals + if(n_degen .eq. 1) cycle allocate(stmp(n_degen,n_degen), smat2(n_degen,n_degen)) @@ -279,7 +285,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) allocate(new_angles(mo_num)) new_angles(1:mo_num) = dabs(angle_left_right(1:mo_num)) max_angle = maxval(new_angles) - good_angles = max_angle.lt.45.d0 + good_angles = max_angle.lt.thresh_lr_angle print *, ' max_angle = ', max_angle deallocate(new_angles) @@ -397,11 +403,11 @@ subroutine print_energy_and_mos(good_angles) print *, ' TC SCF energy gradient = ', grad_non_hermit print *, ' Max angle Left/right = ', max_angle_left_right - if(max_angle_left_right .lt. 45.d0) then + if(max_angle_left_right .lt. thresh_lr_angle) then print *, ' Maximum angle BELOW 45 degrees, everthing is OK !' good_angles = .true. - else if(max_angle_left_right .gt. 45.d0 .and. max_angle_left_right .lt. 75.d0) then - print *, ' Maximum angle between 45 and 75 degrees, this is not the best for TC-CI calculations ...' + else if(max_angle_left_right .gt. thresh_lr_angle .and. max_angle_left_right .lt. 75.d0) then + print *, ' Maximum angle between thresh_lr_angle and 75 degrees, this is not the best for TC-CI calculations ...' good_angles = .false. else if(max_angle_left_right .gt. 75.d0) then print *, ' Maximum angle between ABOVE 75 degrees, YOU WILL CERTAINLY FIND TROUBLES IN TC-CI calculations ...' diff --git a/src/tools/molden.irp.f b/src/tools/molden.irp.f index 830a141e..e5902a6f 100644 --- a/src/tools/molden.irp.f +++ b/src/tools/molden.irp.f @@ -44,7 +44,7 @@ program molden write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -142,7 +142,7 @@ program molden write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_coef(iorder(j),i) enddo enddo close(i_unit_output) diff --git a/src/tools/print_ci_vectors.irp.f b/src/tools/print_ci_vectors.irp.f index 97dfdc0b..d5f86213 100644 --- a/src/tools/print_ci_vectors.irp.f +++ b/src/tools/print_ci_vectors.irp.f @@ -28,7 +28,7 @@ subroutine routine do i = 1, N_det print *, 'Determinant ', i call debug_det(psi_det(1,1,i),N_int) - print '(4E20.12,X)', (psi_coef(i,k), k=1,N_states) + print '(4ES20.12,X)', (psi_coef(i,k), k=1,N_states) print *, '' print *, '' enddo diff --git a/src/two_body_rdm/state_av_act_2rdm.irp.f b/src/two_body_rdm/state_av_act_2rdm.irp.f index cd417a9d..ea636212 100644 --- a/src/two_body_rdm/state_av_act_2rdm.irp.f +++ b/src/two_body_rdm/state_av_act_2rdm.irp.f @@ -17,12 +17,12 @@ state_weights = state_average_weight integer :: ispin ! condition for alpha/beta spin - print*,'' - print*,'' - print*,'' - print*,'providint state_av_act_2_rdm_ab_mo ' +! print*,'' +! print*,'' +! print*,'' +! print*,'Providing state_av_act_2_rdm_ab_mo ' ispin = 3 - print*,'ispin = ',ispin +! print*,'ispin = ',ispin state_av_act_2_rdm_ab_mo = 0.d0 call wall_time(wall_1) double precision :: wall_1, wall_2 diff --git a/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f b/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f index 2e44665d..5fb9e475 100644 --- a/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f +++ b/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f @@ -4,7 +4,7 @@ state_av_full_occ_2_rdm_ab_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_ab_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/beta + beta/alpha electrons +! state_av_full_occ_2_rdm_ab_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/beta + beta/alpha electrons ! ! = \sum_{istate} w(istate) * ! @@ -12,11 +12,19 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * N_{\beta} * 2 ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" -! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC + PROVIDE n_core_orb list_core state_av_full_occ_2_rdm_ab_mo = 0.d0 + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_ab_mo, state_av_full_occ_2_rdm_ab_mo) + + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -25,15 +33,17 @@ korb = list_act(k) do l = 1, n_act_orb lorb = list_act(l) - ! alph beta alph beta - state_av_full_occ_2_rdm_ab_mo(lorb,korb,jorb,iorb) = & + ! alph beta alph beta + state_av_full_occ_2_rdm_ab_mo(lorb,korb,jorb,iorb) = & state_av_act_2_rdm_ab_mo(l,k,j,i) enddo enddo enddo enddo - !! BETA ACTIVE - ALPHA inactive - !! + !$OMP END DO + !! BETA ACTIVE - ALPHA inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -45,9 +55,11 @@ enddo enddo enddo + !$OMP END DO - !! ALPHA ACTIVE - BETA inactive - !! + !! ALPHA ACTIVE - BETA inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -59,9 +71,11 @@ enddo enddo enddo + !$OMP END DO - !! ALPHA INACTIVE - BETA INACTIVE - !! + !! ALPHA INACTIVE - BETA INACTIVE + !! + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb @@ -70,13 +84,15 @@ state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb) = 2.D0 enddo enddo + !$OMP END DO !!!!!!!!!!!! -!!!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!!!! CAN BE USED +!!!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!!!! CAN BE USED if (.not.no_core_density)then - !! BETA ACTIVE - ALPHA CORE - !! + !! BETA ACTIVE - ALPHA CORE + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -88,9 +104,11 @@ enddo enddo enddo - + !$OMP END DO + !! ALPHA ACTIVE - BETA CORE - !! + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -102,9 +120,11 @@ enddo enddo enddo + !$OMP END DO - !! ALPHA CORE - BETA CORE - !! + !! ALPHA CORE - BETA CORE + !! + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb @@ -113,9 +133,11 @@ state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb) = 2.D0 enddo enddo + !$OMP END DO endif - END_PROVIDER + !$OMP END PARALLEL + END_PROVIDER BEGIN_PROVIDER [double precision, state_av_full_occ_2_rdm_aa_mo, (n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)] @@ -123,7 +145,7 @@ state_av_full_occ_2_rdm_aa_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_aa_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/alpha electrons +! state_av_full_occ_2_rdm_aa_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/alpha electrons ! ! = \sum_{istate} w(istate) * ! @@ -131,13 +153,20 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * (N_{\alpha} - 1) ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" ! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC + PROVIDE n_core_orb list_core + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_aa_mo, state_av_full_occ_2_rdm_aa_mo) !! PURE ACTIVE PART ALPHA-ALPHA - !! + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -152,74 +181,84 @@ enddo enddo enddo - !! ALPHA ACTIVE - ALPHA inactive - !! + !$OMP END DO + !! ALPHA ACTIVE - ALPHA inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo + !$OMP END DO - !! ALPHA INACTIVE - ALPHA INACTIVE + !! ALPHA INACTIVE - ALPHA INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO !!!!!!!!!! -!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!! CAN BE USED +!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!! CAN BE USED if (.not.no_core_density)then - !! ALPHA ACTIVE - ALPHA CORE + !! ALPHA ACTIVE - ALPHA CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA CORE - ALPHA CORE - + !$OMP END DO + !! ALPHA CORE - ALPHA CORE + + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif - END_PROVIDER + !$OMP END PARALLEL + END_PROVIDER BEGIN_PROVIDER [double precision, state_av_full_occ_2_rdm_bb_mo, (n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)] implicit none state_av_full_occ_2_rdm_bb_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons +! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons ! ! = \sum_{istate} w(istate) * ! @@ -227,13 +266,20 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\beta} * (N_{\beta} - 1) ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" ! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC + PROVIDE n_core_orb list_core + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_bb_mo, state_av_full_occ_2_rdm_bb_mo) !! PURE ACTIVE PART beta-beta - !! + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -242,80 +288,90 @@ korb = list_act(k) do l = 1, n_act_orb lorb = list_act(l) - state_av_full_occ_2_rdm_bb_mo(lorb,korb,jorb,iorb) = & + state_av_full_occ_2_rdm_bb_mo(lorb,korb,jorb,iorb) = & state_av_act_2_rdm_bb_mo(l,k,j,i) enddo enddo enddo enddo - !! beta ACTIVE - beta inactive - !! + !$OMP END DO + !! beta ACTIVE - beta inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo + !$OMP END DO - !! beta INACTIVE - beta INACTIVE + !! beta INACTIVE - beta INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO !!!!!!!!!!!! -!!!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!!!! CAN BE USED +!!!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!!!! CAN BE USED if (.not.no_core_density)then - !! beta ACTIVE - beta CORE + !! beta ACTIVE - beta CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo - !! beta CORE - beta CORE - + !$OMP END DO + !! beta CORE - beta CORE + + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif + !$OMP END PARALLEL - END_PROVIDER + END_PROVIDER BEGIN_PROVIDER [double precision, state_av_full_occ_2_rdm_spin_trace_mo, (n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)] implicit none state_av_full_occ_2_rdm_spin_trace_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons +! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons ! ! = \sum_{istate} w(istate) * \sum_{sigma,sigma'} ! @@ -324,14 +380,22 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{elec} * (N_{elec} - 1) ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" ! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + PROVIDE n_core_orb list_core + + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_spin_trace_mo, state_av_full_occ_2_rdm_spin_trace_mo) + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !! PURE ACTIVE PART SPIN-TRACE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -340,128 +404,146 @@ korb = list_act(k) do l = 1, n_act_orb lorb = list_act(l) - state_av_full_occ_2_rdm_spin_trace_mo(lorb,korb,jorb,iorb) += & + state_av_full_occ_2_rdm_spin_trace_mo(lorb,korb,jorb,iorb) += & state_av_act_2_rdm_spin_trace_mo(l,k,j,i) enddo enddo enddo enddo + !$OMP END DO - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !!!!! BETA-BETA !!!!! - !! beta ACTIVE - beta inactive + !! beta ACTIVE - beta inactive + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo - !! beta INACTIVE - beta INACTIVE + !$OMP END DO + !! beta INACTIVE - beta INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO if (.not.no_core_density)then - !! beta ACTIVE - beta CORE + !! beta ACTIVE - beta CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo - !! beta CORE - beta CORE + !$OMP END DO + !! beta CORE - beta CORE + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !!!!! ALPHA-ALPHA !!!!! - !! ALPHA ACTIVE - ALPHA inactive + !! ALPHA ACTIVE - ALPHA inactive + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA INACTIVE - ALPHA INACTIVE + !$OMP END DO + !! ALPHA INACTIVE - ALPHA INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO if (.not.no_core_density)then - !! ALPHA ACTIVE - ALPHA CORE + !! ALPHA ACTIVE - ALPHA CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA CORE - ALPHA CORE + !$OMP END DO + !! ALPHA CORE - ALPHA CORE + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !!!!! ALPHA-BETA + BETA-ALPHA !!!!! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -474,14 +556,16 @@ ! beta alph beta alph state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) ! BETA INACTIVE - ALPHA ACTIVE - ! beta alph beta alpha + ! beta alph beta alpha state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! alph beta alph beta + ! alph beta alph beta state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA INACTIVE - BETA INACTIVE + !$OMP END DO + !! ALPHA INACTIVE - BETA INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb @@ -491,31 +575,35 @@ state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb) += 1.0d0 enddo enddo + !$OMP END DO !!!!!!!!!!!! -!!!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!!!! CAN BE USED +!!!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!!!! CAN BE USED if (.not.no_core_density)then + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - !! BETA ACTIVE - ALPHA CORE + !! BETA ACTIVE - ALPHA CORE ! alph beta alph beta state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0D0 * one_e_dm_mo_beta_average(jorb,iorb) - ! beta alph beta alph + ! beta alph beta alph state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0D0 * one_e_dm_mo_beta_average(jorb,iorb) - !! ALPHA ACTIVE - BETA CORE + !! ALPHA ACTIVE - BETA CORE ! alph beta alph beta state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0D0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! beta alph beta alph + ! beta alph beta alph state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0D0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA CORE - BETA CORE + !$OMP END DO + !! ALPHA CORE - BETA CORE + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb @@ -525,7 +613,9 @@ state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb) += 1.0D0 enddo enddo + !$OMP END DO endif + !$OMP END PARALLEL - END_PROVIDER + END_PROVIDER diff --git a/src/two_body_rdm/two_e_dm_mo.irp.f b/src/two_body_rdm/two_e_dm_mo.irp.f index 99be1f54..04c44f61 100644 --- a/src/two_body_rdm/two_e_dm_mo.irp.f +++ b/src/two_body_rdm/two_e_dm_mo.irp.f @@ -16,6 +16,9 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] two_e_dm_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb,istate + !$OMP PARALLEL DO PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_core_inact_act_orb, list_core_inact_act, & + !$OMP two_e_dm_mo, state_av_full_occ_2_rdm_spin_trace_mo) do l=1,n_core_inact_act_orb lorb = list_core_inact_act(l) do k=1,n_core_inact_act_orb @@ -29,7 +32,7 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] enddo enddo enddo - two_e_dm_mo(:,:,:,:) = two_e_dm_mo(:,:,:,:) + !$OMP END PARALLEL DO END_PROVIDER diff --git a/src/utils/block_diag_degen_core.irp.f b/src/utils/block_diag_degen_core.irp.f new file mode 100644 index 00000000..5d46bd87 --- /dev/null +++ b/src/utils/block_diag_degen_core.irp.f @@ -0,0 +1,244 @@ + +subroutine diag_mat_per_fock_degen_core(fock_diag, mat_ref, listcore,ncore, n, thr_d, thr_nd, thr_deg, leigvec, reigvec, eigval) + + + BEGIN_DOC + ! + ! subroutine that diagonalizes a matrix mat_ref BY BLOCK + ! + ! the blocks are defined by the elements having the SAME DEGENERACIES in the entries "fock_diag" + ! + ! the elements of listcore are untouched + ! + ! examples : all elements having degeneracy 1 in fock_diag (i.e. not being degenerated) will be treated together + ! + ! : all elements having degeneracy 2 in fock_diag (i.e. two elements are equal) will be treated together + ! + ! : all elements having degeneracy 3 in fock_diag (i.e. two elements are equal) will be treated together + ! + ! etc... the advantage is to guarentee no spurious mixing because of numerical problems. + ! + END_DOC + + implicit none + integer, intent(in) :: n,ncore, listcore(ncore) + double precision, intent(in) :: fock_diag(n), mat_ref(n,n), thr_d, thr_nd, thr_deg + double precision, intent(out) :: leigvec(n,n), reigvec(n,n), eigval(n) + + integer :: n_degen_list, n_degen,size_mat, i, j, k, icount, m, index_degen + integer :: ii, jj, i_good, j_good, n_real + integer :: icount_eigval + logical, allocatable :: is_ok(:) + integer, allocatable :: list_degen(:,:), list_same_degen(:) + integer, allocatable :: iorder(:), list_degen_sorted(:) + double precision, allocatable :: leigvec_unsrtd(:,:), reigvec_unsrtd(:,:), eigval_unsrtd(:) + double precision, allocatable :: mat_tmp(:,:), eigval_tmp(:), leigvec_tmp(:,:), reigvec_tmp(:,:) + + allocate(leigvec_unsrtd(n,n), reigvec_unsrtd(n,n), eigval_unsrtd(n)) + leigvec_unsrtd = 0.d0 + reigvec_unsrtd = 0.d0 + eigval_unsrtd = 0.d0 + + ! obtain degeneracies + allocate(list_degen(n,0:n)) + call give_degen_full_listcore(fock_diag, n, listcore, ncore, thr_deg, list_degen, n_degen_list) + + allocate(iorder(n_degen_list), list_degen_sorted(n_degen_list)) + do i = 1, n_degen_list + n_degen = list_degen(i,0) + list_degen_sorted(i) = n_degen + iorder(i) = i + enddo + + ! sort by number of degeneracies + call isort(list_degen_sorted, iorder, n_degen_list) + + allocate(is_ok(n_degen_list)) + is_ok = .True. + icount_eigval = 0 + + ! loop over degeneracies + do i = 1, n_degen_list + if(.not.is_ok(i)) cycle + + is_ok(i) = .False. + n_degen = list_degen_sorted(i) + + + if(n_degen.ge.1000)then + print*,'core orbital ' + else + print *, ' diagonalizing for n_degen = ', n_degen + endif + + k = 1 + + ! group all the entries having the same degeneracies +!! do while (list_degen_sorted(i+k)==n_degen) + do m = i+1, n_degen_list + if(list_degen_sorted(m)==n_degen) then + is_ok(i+k) = .False. + k += 1 + endif + enddo + + print *, ' number of identical degeneracies = ', k + if(n_degen.ge.1000)then + n_degen = 1 + endif + size_mat = k*n_degen + print *, ' size_mat = ', size_mat + allocate(mat_tmp(size_mat,size_mat), list_same_degen(size_mat)) + allocate(eigval_tmp(size_mat), leigvec_tmp(size_mat,size_mat), reigvec_tmp(size_mat,size_mat)) + ! group all the elements sharing the same degeneracy + icount = 0 + do j = 1, k ! jth set of degeneracy + index_degen = iorder(i+j-1) + do m = 1, n_degen + icount += 1 + list_same_degen(icount) = list_degen(index_degen,m) + enddo + enddo + + print *, ' list of elements ' + do icount = 1, size_mat + print *, icount, list_same_degen(icount) + enddo + + ! you copy subset of matrix elements having all the same degeneracy in mat_tmp + do ii = 1, size_mat + i_good = list_same_degen(ii) + do jj = 1, size_mat + j_good = list_same_degen(jj) + mat_tmp(jj,ii) = mat_ref(j_good,i_good) + enddo + enddo + + call non_hrmt_bieig( size_mat, mat_tmp, thr_d, thr_nd & + , leigvec_tmp, reigvec_tmp & + , n_real, eigval_tmp ) + + do ii = 1, size_mat + icount_eigval += 1 + eigval_unsrtd(icount_eigval) = eigval_tmp(ii) ! copy eigenvalues + do jj = 1, size_mat ! copy the eigenvectors + j_good = list_same_degen(jj) + leigvec_unsrtd(j_good,icount_eigval) = leigvec_tmp(jj,ii) + reigvec_unsrtd(j_good,icount_eigval) = reigvec_tmp(jj,ii) + enddo + enddo + + deallocate(mat_tmp, list_same_degen) + deallocate(eigval_tmp, leigvec_tmp, reigvec_tmp) + enddo + + if(icount_eigval .ne. n) then + print *, ' pb !! (icount_eigval.ne.n)' + print *, ' icount_eigval,n', icount_eigval, n + stop + endif + + deallocate(iorder) + allocate(iorder(n)) + do i = 1, n + iorder(i) = i + enddo + call dsort(eigval_unsrtd, iorder, n) + + do i = 1, n + print*,'sorted eigenvalues ' + i_good = iorder(i) + eigval(i) = eigval_unsrtd(i) + print*,'i,eigval(i) = ',i,eigval(i) + do j = 1, n + leigvec(j,i) = leigvec_unsrtd(j,i_good) + reigvec(j,i) = reigvec_unsrtd(j,i_good) + enddo + enddo + + deallocate(leigvec_unsrtd, reigvec_unsrtd, eigval_unsrtd) + deallocate(list_degen) + deallocate(iorder, list_degen_sorted) + deallocate(is_ok) + +end + +! --- + +subroutine give_degen_full_listcore(A, n, listcore, ncore, thr, list_degen, n_degen_list) + + BEGIN_DOC + ! you enter with an array A(n) and spits out all the elements degenerated up to thr + ! + ! the elements of A(n) DON'T HAVE TO BE SORTED IN THE ENTRANCE: TOTALLY GENERAL + ! + ! list_degen(i,0) = number of degenerate entries + ! + ! list_degen(i,1) = index of the first degenerate entry + ! + ! list_degen(i,2:list_degen(i,0)) = list of all other dengenerate entries + ! + ! if list_degen(i,0) == 1 it means that there is no degeneracy for that element + ! + ! if list_degen(i,0) >= 1000 it means that it is core orbitals + END_DOC + + implicit none + + double precision, intent(in) :: A(n) + double precision, intent(in) :: thr + integer, intent(in) :: n,ncore, listcore(ncore) + integer, intent(out) :: list_degen(n,0:n), n_degen_list + integer :: i, j, icount, icheck,k + logical, allocatable :: is_ok(:) + + + allocate(is_ok(n)) + n_degen_list = 0 + is_ok = .True. + ! you first exclude the "core" orbitals + do i = 1, ncore + j=listcore(i) + is_ok(j) = .False. + enddo + do i = 1, n + if(.not.is_ok(i)) cycle + n_degen_list +=1 + is_ok(i) = .False. + list_degen(n_degen_list,1) = i + icount = 1 + do j = i+1, n + if(dabs(A(i)-A(j)).lt.thr.and.is_ok(j)) then + is_ok(j) = .False. + icount += 1 + list_degen(n_degen_list,icount) = j + endif + enddo + + list_degen(n_degen_list,0) = icount + enddo + ! you set all the core orbitals as separate entities + icheck = 0 + do i = 1, n_degen_list + icheck += list_degen(i,0) + enddo + if(icheck.ne.(n-ncore))then + print *, ' pb ! :: icheck.ne.n-ncore' + print *, icheck, n-ncore + stop + endif + k=1000 + do i = 1, ncore + n_degen_list+= 1 + j=listcore(i) + list_degen(n_degen_list,1) = i + list_degen(n_degen_list,0) = k + k+=1 + enddo + + + +end + +! --- + diff --git a/src/utils/c_functions.f90 b/src/utils/c_functions.f90 index 65d4ad62..a9c8900b 100644 --- a/src/utils/c_functions.f90 +++ b/src/utils/c_functions.f90 @@ -57,6 +57,12 @@ module c_functions end subroutine sscanf_sd_c end interface + interface + integer(kind=c_int) function mkl_serv_intel_cpu_true() bind(C) + use iso_c_binding + end function + end interface + contains integer function atoi(a) @@ -131,4 +137,3 @@ subroutine usleep(us) call usleep_c(u) end subroutine usleep - diff --git a/src/utils/fast_mkl.c b/src/utils/fast_mkl.c new file mode 100644 index 00000000..aa1f82f1 --- /dev/null +++ b/src/utils/fast_mkl.c @@ -0,0 +1,5 @@ +int mkl_serv_intel_cpu_true() { + return 1; +} + + diff --git a/src/utils/format_w_error.irp.f b/src/utils/format_w_error.irp.f index 7f7458b6..c253456e 100644 --- a/src/utils/format_w_error.irp.f +++ b/src/utils/format_w_error.irp.f @@ -39,7 +39,7 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err write(str_size,'(I3)') size_nb ! Error - write(str_exp,'(1pE20.0)') error + write(str_exp,'(ES20.0)') error str_error = trim(adjustl(str_exp)) ! Number of digit: Y (FX.Y) from the exponent diff --git a/src/utils/fortran_mmap.c b/src/utils/fortran_mmap.c index 52df2476..e8d85a2f 100644 --- a/src/utils/fortran_mmap.c +++ b/src/utils/fortran_mmap.c @@ -9,7 +9,6 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) { - int i; int fd; int result; void* map; @@ -22,11 +21,7 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) perror("Error opening mmap file for reading"); exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ, MAP_SHARED | MAP_HUGETLB, fd, 0); - if (map == MAP_FAILED) { - /* try again without huge pages */ - map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0); - } + map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0); } else { @@ -53,16 +48,12 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_HUGETLB, fd, 0); - if (map == MAP_FAILED) { - /* try again without huge pages */ - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - } + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); } if (map == MAP_FAILED) { close(fd); - printf("%s:\n", filename); + printf("%s: %lu\n", filename, bytes); perror("Error mmapping the file"); exit(EXIT_FAILURE); } diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 76a539a6..314ad4f6 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1565,7 +1565,7 @@ subroutine nullify_small_elements(m,n,A,LDA,thresh) ! Remove tiny elements do j=1,n do i=1,m - if ( dabs(A(i,j) * amax) < thresh ) then + if ( (dabs(A(i,j) * amax) < thresh).or.(dabs(A(i,j)) < 1.d-99) ) then A(i,j) = 0.d0 endif enddo @@ -1661,7 +1661,15 @@ subroutine restore_symmetry(m,n,A,LDA,thresh) ! Update i i = i + 1 enddo - copy(i:) = 0.d0 + + ! To nullify the remaining elements that are below the threshold + if (i == sze) then + if (-copy(i) <= thresh) then + copy(i) = 0d0 + endif + else + copy(i:) = 0.d0 + endif !$OMP PARALLEL if (sze>10000) & !$OMP SHARED(m,sze,copy_sign,copy,key,A,ii,jj) & diff --git a/src/utils/map_functions.irp.f b/src/utils/map_functions.irp.f index cd3b28a8..97d0e8bf 100644 --- a/src/utils/map_functions.irp.f +++ b/src/utils/map_functions.irp.f @@ -11,6 +11,10 @@ subroutine map_save_to_disk(filename,map) integer*8 :: n_elements n_elements = int(map % n_elements,8) + if (n_elements <= 0) then + print *, 'Unable to write map to disk: n_elements = ', n_elements + stop -1 + endif if (map % consolidated) then diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index 115b2cbe..41ec0428 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -4,8 +4,10 @@ BEGIN_PROVIDER [ integer, qp_max_mem ] ! Maximum memory in Gb END_DOC character*(128) :: env + integer, external :: get_total_available_memory - qp_max_mem = 2000 + qp_max_mem = get_total_available_memory() + call write_int(6,qp_max_mem,'Total available memory (GB)') call getenv('QP_MAXMEM',env) if (trim(env) /= '') then call lock_io() @@ -97,16 +99,15 @@ subroutine check_mem(rss_in,routine) END_DOC double precision, intent(in) :: rss_in character*(*) :: routine - double precision :: rss - !$OMP CRITICAL - call resident_memory(rss) - rss += rss_in - if (int(rss)+1 > qp_max_mem) then + double precision :: mem + call total_memory(mem) + mem += rss_in + if (mem > qp_max_mem) then + call print_memory_usage() print *, 'Not enough memory: aborting in ', routine - print *, int(rss)+1, ' GB required' + print *, mem, ' GB required' stop -1 endif - !$OMP END CRITICAL end subroutine print_memory_usage() @@ -122,3 +123,35 @@ subroutine print_memory_usage() '.. >>>>> [ RES MEM : ', rss , & ' GB ] [ VIRT MEM : ', mem, ' GB ] <<<<< ..' end + +integer function get_total_available_memory() result(res) + implicit none + BEGIN_DOC +! Returns the total available memory on the current machine + END_DOC + + character(len=128) :: line + integer :: status + integer :: iunit + integer*8, parameter :: KB = 1024 + integer*8, parameter :: GiB = 1024**3 + integer, external :: getUnitAndOpen + + iunit = getUnitAndOpen('/proc/meminfo','r') + + res = 512 + do + read(iunit, '(A)', END=10) line + if (line(1:10) == "MemTotal: ") then + read(line(11:), *, ERR=20) res + res = int((res*KB) / GiB,4) + exit + 20 continue + end if + end do + 10 continue + close(iunit) + +end function get_total_available_memory + + diff --git a/src/utils/mmap.f90 b/src/utils/mmap.f90 index 49147283..41e60224 100644 --- a/src/utils/mmap.f90 +++ b/src/utils/mmap.f90 @@ -46,7 +46,13 @@ module mmap_module integer(c_size_t) :: length integer(c_int) :: fd_ - length = PRODUCT( shape(:) ) * bytes + integer :: i + + length = int(bytes,8) + do i=1,size(shape) + length = length * shape(i) + enddo + if (read_only) then map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 1) else @@ -66,7 +72,12 @@ module mmap_module integer(c_size_t) :: length integer(c_int) :: fd_ - length = PRODUCT( shape(:) ) * bytes + integer :: i + + length = int(bytes,8) + do i=1,size(shape) + length = length * shape(i) + enddo fd_ = fd call c_munmap_fortran( length, fd_, map) end subroutine @@ -82,7 +93,12 @@ module mmap_module integer(c_size_t) :: length integer(c_int) :: fd_ - length = PRODUCT( shape(:) ) * bytes + integer :: i + + length = int(bytes,8) + do i=1,size(shape) + length = length * shape(i) + enddo fd_ = fd call c_msync_fortran( length, fd_, map) end subroutine diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index dafcf7af..b2b68d05 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -48,32 +48,31 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k if (do_ao_cholesky) then - double precision, allocatable :: buffer(:,:,:) - !$OMP PARALLEL & - !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) & - !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)& - !$OMP DEFAULT(NONE) - allocate(buffer(mo_num,mo_num,mo_num)) - !$OMP DO + double precision, allocatable :: buffer(:,:,:,:) + double precision, allocatable :: v1(:,:,:), v2(:,:,:) + allocate(v1(cholesky_mo_num,n1,n3), v2(cholesky_mo_num,n2,n4)) + allocate(buffer(n1,n3,n2,n4)) + + call gen_v_space_chol(n1,n3,list1,list3,v1,cholesky_mo_num) + call gen_v_space_chol(n2,n4,list2,list4,v2,cholesky_mo_num) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1*n3) + + deallocate(v1,v2) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) do i4 = 1, n4 - idx4 = list4(i4) - call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) - do i2 = 1, n2 - idx2 = list2(i2) - do i3 = 1, n3 - idx3 = list3(i3) + do i3 = 1, n3 + do i2 = 1, n2 do i1 = 1, n1 - idx1 = list1(i1) - v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2) + v(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) enddo enddo enddo enddo - !$OMP END DO - deallocate(buffer) - !$OMP END PARALLEL + !$OMP END PARALLEL DO else double precision :: get_two_e_integral @@ -105,6 +104,30 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) end +subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) + + implicit none + + integer, intent(in) :: n1,n3,ldv + integer, intent(in) :: list1(n1),list3(n3) + double precision, intent(out) :: v(ldv,n1,n3) + + integer :: i1,i3,idx1,idx3,k + + !$OMP PARALLEL DO PRIVATE(i1,i3,idx1,idx3,k) + do i3=1,n3 + idx3 = list3(i3) + do i1=1,n1 + idx1 = list1(i1) + do k=1,cholesky_mo_num + v(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3) + enddo + enddo + enddo + !$OMP END PARALLEL DO + +end + ! full BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] @@ -112,16 +135,17 @@ BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] if (do_ao_cholesky) then integer :: i1,i2,i3,i4 double precision, allocatable :: buffer(:,:,:) + call set_multiple_levels_omp(.False.) !$OMP PARALLEL & - !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_mo_num) & !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)& !$OMP DEFAULT(NONE) allocate(buffer(mo_num,mo_num,mo_num)) !$OMP DO do i4 = 1, mo_num - call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,i4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_mo_num, 1.d0, & + cholesky_mo_transp, cholesky_mo_num, & + cholesky_mo_transp(1,1,i4), cholesky_mo_num, 0.d0, buffer, mo_num*mo_num) do i2 = 1, mo_num do i3 = 1, mo_num do i1 = 1, mo_num @@ -166,7 +190,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oooo,1) + n2 = size(cc_space_v_oooo,2) + n3 = size(cc_space_v_oooo,3) + n4 = size(cc_space_v_oooo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_oo_chol, cholesky_mo_num, & + cc_space_v_oo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_oooo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo) + endif END_PROVIDER @@ -176,7 +233,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_vooo,1) + n2 = size(cc_space_v_vooo,2) + n3 = size(cc_space_v_vooo,3) + n4 = size(cc_space_v_vooo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_vo_chol, cholesky_mo_num, & + cc_space_v_oo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_vooo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo) + endif END_PROVIDER @@ -186,7 +276,32 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovoo, (cc_nOa, cc_nVa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo) + + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_ovoo,1) + n2 = size(cc_space_v_ovoo,2) + n3 = size(cc_space_v_ovoo,3) + n4 = size(cc_space_v_ovoo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ovoo(i1,i2,i3,i4) = cc_space_v_vooo(i2,i1,i4,i3) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo) + endif END_PROVIDER @@ -196,7 +311,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oovo,1) + n2 = size(cc_space_v_oovo,2) + n3 = size(cc_space_v_oovo,3) + n4 = size(cc_space_v_oovo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_oovo(i1,i2,i3,i4) = cc_space_v_vooo(i3,i2,i1,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo) + endif END_PROVIDER @@ -206,7 +345,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oovo,1) + n2 = size(cc_space_v_oovo,2) + n3 = size(cc_space_v_oovo,3) + n4 = size(cc_space_v_oovo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ooov(i1,i2,i3,i4) = cc_space_v_ovoo(i1,i4,i3,i2) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov) + endif END_PROVIDER @@ -216,7 +379,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_vvoo,1) + n2 = size(cc_space_v_vvoo,2) + n3 = size(cc_space_v_vvoo,3) + n4 = size(cc_space_v_vvoo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_vo_chol, cholesky_mo_num, & + cc_space_v_vo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_vvoo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo) + endif END_PROVIDER @@ -226,7 +422,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_vovo,1) + n2 = size(cc_space_v_vovo,2) + n3 = size(cc_space_v_vovo,3) + n4 = size(cc_space_v_vovo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num, & + cc_space_v_oo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_vovo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo) + endif END_PROVIDER @@ -236,7 +465,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_voov, (cc_nVa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_voov,1) + n2 = size(cc_space_v_voov,2) + n3 = size(cc_space_v_voov,3) + n4 = size(cc_space_v_voov,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_voov(i1,i2,i3,i4) = cc_space_v_vvoo(i1,i4,i3,i2) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov) + endif END_PROVIDER @@ -246,7 +499,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovvo, (cc_nOa, cc_nVa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_ovvo,1) + n2 = size(cc_space_v_ovvo,2) + n3 = size(cc_space_v_ovvo,3) + n4 = size(cc_space_v_ovvo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ovvo(i1,i2,i3,i4) = cc_space_v_vvoo(i3,i2,i1,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo) + endif END_PROVIDER @@ -256,7 +533,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_ovov,1) + n2 = size(cc_space_v_ovov,2) + n3 = size(cc_space_v_ovov,3) + n4 = size(cc_space_v_ovov,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ovov(i1,i2,i3,i4) = cc_space_v_vovo(i2,i1,i4,i3) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov) + endif END_PROVIDER @@ -266,7 +567,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oovv,1) + n2 = size(cc_space_v_oovv,2) + n3 = size(cc_space_v_oovv,3) + n4 = size(cc_space_v_oovv,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_oovv(i1,i2,i3,i4) = cc_space_v_vvoo(i3,i4,i1,i2) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv) + endif END_PROVIDER @@ -320,6 +645,38 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvvv, (cc_nVa, cc_nVa, cc_nVa, cc_n END_PROVIDER +BEGIN_PROVIDER [double precision, cc_space_v_vv_chol, (cholesky_mo_num, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space_chol(cc_nVa, cc_nVa, cc_list_vir, cc_list_vir, cc_space_v_vv_chol, cholesky_mo_num) + +END_PROVIDER + +BEGIN_PROVIDER [double precision, cc_space_v_vo_chol, (cholesky_mo_num, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space_chol(cc_nVa, cc_nOa, cc_list_vir, cc_list_occ, cc_space_v_vo_chol, cholesky_mo_num) + +END_PROVIDER + +BEGIN_PROVIDER [double precision, cc_space_v_ov_chol, (cholesky_mo_num, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space_chol(cc_nOa, cc_nVa, cc_list_occ, cc_list_vir, cc_space_v_ov_chol, cholesky_mo_num) + +END_PROVIDER + +BEGIN_PROVIDER [double precision, cc_space_v_oo_chol, (cholesky_mo_num, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space_chol(cc_nOa, cc_nOa, cc_list_occ, cc_list_occ, cc_space_v_oo_chol, cholesky_mo_num) + +END_PROVIDER + ! ppqq BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)] diff --git a/src/utils_trust_region/rotation_matrix_iterative.irp.f b/src/utils_trust_region/rotation_matrix_iterative.irp.f index f268df04..db3d5c99 100644 --- a/src/utils_trust_region/rotation_matrix_iterative.irp.f +++ b/src/utils_trust_region/rotation_matrix_iterative.irp.f @@ -73,7 +73,7 @@ subroutine rotation_matrix_iterative(m,X,R) !print*,'R' !do i = 1, m - ! write(*,'(10(E12.5))') R(i,:) + ! write(*,'(10(ES12.5))') R(i,:) !enddo do i = 1, m @@ -82,7 +82,7 @@ subroutine rotation_matrix_iterative(m,X,R) !print*,'RRT' !do i = 1, m - ! write(*,'(10(E12.5))') RRT(i,:) + ! write(*,'(10(ES12.5))') RRT(i,:) !enddo max_elem = 0d0 diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f index b7dcf875..e98bbfb7 100644 --- a/src/utils_trust_region/trust_region_optimal_lambda.irp.f +++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f @@ -336,7 +336,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 endif - !write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + !write(*,'(a,ES12.5,a,ES12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 ! Newton's step y = -(1d0/DABS(d_2))*d_1 @@ -345,7 +345,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) if (DABS(y) > alpha) then y = alpha * (y/DABS(y)) ! preservation of the sign of y endif - !write(*,'(a,E12.5)') ' Step length: ', y + !write(*,'(a,ES12.5)') ' Step length: ', y ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 @@ -414,7 +414,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) else alpha = 0.25d0 * alpha endif - !write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + !write(*,'(a,ES12.5)') ' New trust length alpha: ', alpha ! cancellaion of the step if rho < 0.1 if (rho_2 < thresh_rho_2) then !0.1d0) then