From 63650890e2e936934ba794b30961cef09d9792e3 Mon Sep 17 00:00:00 2001 From: Emmanuel Giner LCT Date: Fri, 22 Sep 2023 16:40:12 +0200 Subject: [PATCH 001/131] trying to fix bug --- src/tc_bi_ortho/psi_r_l_prov.irp.f | 6 ++++-- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/tc_bi_ortho/psi_r_l_prov.irp.f b/src/tc_bi_ortho/psi_r_l_prov.irp.f index b28c417f..1d233b0b 100644 --- a/src/tc_bi_ortho/psi_r_l_prov.irp.f +++ b/src/tc_bi_ortho/psi_r_l_prov.irp.f @@ -1,6 +1,7 @@ use bitmasks -BEGIN_PROVIDER [ double precision, psi_l_coef_bi_ortho, (psi_det_size,N_states) ] +!BEGIN_PROVIDER [ double precision, psi_l_coef_bi_ortho, (psi_det_size,N_states) ] +BEGIN_PROVIDER [ double precision, psi_l_coef_bi_ortho, (N_det,N_states) ] implicit none BEGIN_DOC ! The wave function coefficients. Initialized with Hartree-Fock if the |EZFIO| file @@ -68,7 +69,8 @@ BEGIN_PROVIDER [ double precision, psi_l_coef_bi_ortho, (psi_det_size,N_states) END_PROVIDER -BEGIN_PROVIDER [ double precision, psi_r_coef_bi_ortho, (psi_det_size,N_states) ] +!BEGIN_PROVIDER [ double precision, psi_r_coef_bi_ortho, (psi_det_size,N_states) ] +BEGIN_PROVIDER [ double precision, psi_r_coef_bi_ortho, (N_det,N_states) ] implicit none BEGIN_DOC ! The wave function coefficients. Initialized with Hartree-Fock if the |EZFIO| file diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index a9e22e03..a636e8d6 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -320,24 +320,38 @@ end enddo double precision, allocatable :: buffer(:,:) - allocate(buffer(N_det,N_states)) + allocate(buffer(psi_det_size,N_states)) + print*,'passed the allocate' +! print*,N_det,N_states +! print*,size(psi_l_coef_bi_ortho,1),size(psi_l_coef_bi_ortho,2) +! print*,size(leigvec_tc_bi_orth,1),size(leigvec_tc_bi_orth,2) +! print*,size(reigvec_tc_bi_orth,1),size(reigvec_tc_bi_orth,2) +! print*,size(psi_r_coef_bi_ortho,1),size(psi_r_coef_bi_ortho,2) + buffer = 0.d0 do k = 1, N_states do i = 1, N_det psi_l_coef_bi_ortho(i,k) = leigvec_tc_bi_orth(i,k) buffer(i,k) = leigvec_tc_bi_orth(i,k) enddo enddo + print*,'passed the first loop' TOUCH psi_l_coef_bi_ortho + print*,'passed the TOUCH psi_l_coef_bi_ortho' call ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho(buffer) + print*,'passed the ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho' do k = 1, N_states do i = 1, N_det psi_r_coef_bi_ortho(i,k) = reigvec_tc_bi_orth(i,k) buffer(i,k) = reigvec_tc_bi_orth(i,k) enddo enddo + print*,'passed the second loop' TOUCH psi_r_coef_bi_ortho + print*,'passed the TOUCH psi_r_coef_bi_ortho' call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer) + print*,'passed the ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho' deallocate(buffer) + print*,'passed saving the wf' ! print*,'After diag' ! do i = 1, N_det! old version ! print*,'i',i,psi_l_coef_bi_ortho(i,1),psi_r_coef_bi_ortho(i,1) From eaab1b80648bd2b4ec38aa17722a751fccf42ce0 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 7 Mar 2024 07:34:59 +0100 Subject: [PATCH 002/131] few modif for HHG --- .../multi_s_dipole_moment.irp.f | 23 ++++++++++++++++ src/utils/constants.include.F | 27 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/src/mol_properties/multi_s_dipole_moment.irp.f b/src/mol_properties/multi_s_dipole_moment.irp.f index 913ae2f3..f21e08cd 100644 --- a/src/mol_properties/multi_s_dipole_moment.irp.f +++ b/src/mol_properties/multi_s_dipole_moment.irp.f @@ -91,3 +91,26 @@ BEGIN_PROVIDER [double precision, multi_s_dipole_moment, (N_states, N_states)] enddo END_PROVIDER + +! --- + + BEGIN_PROVIDER [double precision, multi_s_x_dipole_moment_eigenvec, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_y_dipole_moment_eigenvec, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_z_dipole_moment_eigenvec, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_x_dipole_moment_eigenval, (N_states)] +&BEGIN_PROVIDER [double precision, multi_s_y_dipole_moment_eigenval, (N_states)] +&BEGIN_PROVIDER [double precision, multi_s_z_dipole_moment_eigenval, (N_states)] + + implicit none + + PROVIDE multi_s_x_dipole_moment multi_s_y_dipole_moment multi_s_z_dipole_moment + + call lapack_diag(multi_s_x_dipole_moment_eigenval(1), multi_s_x_dipole_moment_eigenvec(1,1), multi_s_x_dipole_moment(1,1), N_states, N_states) + call lapack_diag(multi_s_y_dipole_moment_eigenval(1), multi_s_y_dipole_moment_eigenvec(1,1), multi_s_y_dipole_moment(1,1), N_states, N_states) + call lapack_diag(multi_s_z_dipole_moment_eigenval(1), multi_s_z_dipole_moment_eigenvec(1,1), multi_s_z_dipole_moment(1,1), N_states, N_states) + +END_PROVIDER + +! --- + + diff --git a/src/utils/constants.include.F b/src/utils/constants.include.F index 422eff95..7b01f888 100644 --- a/src/utils/constants.include.F +++ b/src/utils/constants.include.F @@ -18,3 +18,30 @@ double precision, parameter :: c_4_3 = 4.d0/3.d0 double precision, parameter :: c_1_3 = 1.d0/3.d0 double precision, parameter :: sq_op5 = dsqrt(0.5d0) double precision, parameter :: dlog_2pi = dlog(2.d0*dacos(-1.d0)) + +! physical constants and units conversion factors +double precision, parameter :: k_boltzman_si = 1.38066d-23 ! K k^-1 +double precision, parameter :: k_boltzman_au = 3.1667d-6 ! Hartree k^-1 +double precision, parameter :: k_boltzman_m1_au = 315795.26d0 ! Hartree^-1 k +double precision, parameter :: bohr_radius_si = 0.529177d-10 ! m +double precision, parameter :: bohr_radius_cm = 0.529177d-8 ! cm +double precision, parameter :: bohr_radius_angs = 0.529177d0 ! Angstrom +double precision, parameter :: electronmass_si = 9.10953d-31 ! Kg +double precision, parameter :: electronmass_uma = 5.4858d-4 ! uma +double precision, parameter :: electronvolt_si = 1.6021892d-19 ! J +double precision, parameter :: uma_si = 1.66057d-27 ! Kg +double precision, parameter :: debye_si = 3.33564d-30 ! coulomb meter +double precision, parameter :: debye_au = 0.393427228d0 ! e * Bohr +double precision, parameter :: angstrom_to_au = 1.889727d0 ! au +double precision, parameter :: au_to_ohmcmm1 = 46000.0d0 ! (ohm cm)^-1 +double precision, parameter :: au_to_kb = 294210.0d0 ! kbar +double precision, parameter :: au_to_eV = 27.211652d0 +double precision, parameter :: uma_to_au = 1822.89d0 +double precision, parameter :: au_to_terahertz = 2.4189d-5 +double precision, parameter :: au_to_sec = 2.4189d-17 +double precision, parameter :: au_to_fsec = 2.4189d-2 +double precision, parameter :: Wcm2 = 3.5d16 +double precision, parameter :: amconv = 1.66042d-24/9.1095d-28*0.5d0 ! mass conversion: a.m.u to a.u. (ry) +double precision, parameter :: uakbar = 147105.d0 ! pressure conversion from ry/(a.u)^3 to k + + From 3f861a41b5438d1722fa003da233642c79d96a47 Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 8 Mar 2024 17:27:18 +0100 Subject: [PATCH 003/131] added the thresh_de_tc_angles keyword in minimize tc angles --- bin/zcat | 23 --------------------- plugins/local/tc_keywords/EZFIO.cfg | 5 +++++ plugins/local/tc_scf/routines_rotates.irp.f | 1 + scripts/PYSCF_EOMCC.py | 1 + 4 files changed, 7 insertions(+), 23 deletions(-) delete mode 100755 bin/zcat create mode 120000 scripts/PYSCF_EOMCC.py diff --git a/bin/zcat b/bin/zcat deleted file mode 100755 index 7ccecf07..00000000 --- a/bin/zcat +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -# On Darwin: try gzcat if available, otherwise use Python - -if [[ $(uname -s) = Darwin ]] ; then - which gzcat &> /dev/null - if [[ $? -eq 0 ]] ; then - exec gzcat $@ - else - - exec python3 << EOF -import sys -import gzip -with gzip.open("$1", "rt") as f: - print(f.read()) -EOF - fi -else - SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" - command=$(which -a zcat | grep -v "$SCRIPTPATH/" | head -1) - exec $command $@ -fi - diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index 93ff790f..68fe9c94 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -280,3 +280,8 @@ doc: approach used to evaluate TC integrals [ analytic | numeric | semi-analytic interface: ezfio,ocaml,provider default: semi-analytic +[thresh_de_tc_angles] +type: Threshold +doc: Thresholds on delta E for changing angles between orbitals +interface: ezfio,provider,ocaml +default: 1.e-03 diff --git a/plugins/local/tc_scf/routines_rotates.irp.f b/plugins/local/tc_scf/routines_rotates.irp.f index c42e846e..92abfa44 100644 --- a/plugins/local/tc_scf/routines_rotates.irp.f +++ b/plugins/local/tc_scf/routines_rotates.irp.f @@ -301,6 +301,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) ! check if TC energy has changed E_new = TC_HF_energy + E_thr = thresh_de_tc_angles if(dabs(E_new - E_old) .gt. E_thr) then mo_r_coef = mo_r_coef_old mo_l_coef = mo_l_coef_old diff --git a/scripts/PYSCF_EOMCC.py b/scripts/PYSCF_EOMCC.py new file mode 120000 index 00000000..8ad341da --- /dev/null +++ b/scripts/PYSCF_EOMCC.py @@ -0,0 +1 @@ +/home_lct/eginer/qp2/plugins/qp_plugins_lct/dev/fcidump_for_vbarb/PYSCF_EOMCC.py \ No newline at end of file From d405aea95785060f7550be7901c90d133b287a65 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Mon, 11 Mar 2024 10:21:59 +0100 Subject: [PATCH 004/131] few mom opt --- .../local/bi_ort_ints/total_twoe_pot.irp.f | 99 +++++++++++++++---- .../local/non_h_ints_mu/total_tc_int.irp.f | 2 + src/tools/print_detweights.irp.f | 66 +++++++++++++ 3 files changed, 148 insertions(+), 19 deletions(-) create mode 100644 src/tools/print_detweights.irp.f diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f index 5e6a24e9..bf5cc36f 100644 --- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f +++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f @@ -40,38 +40,95 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, END_DOC implicit none - integer :: i, j, k, l, m, n, p, q + integer :: i, j, k, l, m, n, p, q, s, r + double precision :: t1, t2 double precision, allocatable :: a1(:,:,:,:), a2(:,:,:,:) + double precision, allocatable :: a_jkp(:,:,:), a_kpq(:,:,:), a_pqr(:,:,:) + + print *, ' PROVIDING mo_bi_ortho_tc_two_e_chemist ...' + call wall_time(t1) + call print_memory_usage() PROVIDE mo_r_coef mo_l_coef + PROVIDe ao_two_e_tc_tot - allocate(a2(ao_num,ao_num,ao_num,mo_num)) + if(ao_to_mo_tc_n3) then - call dgemm( 'T', 'N', ao_num*ao_num*ao_num, mo_num, ao_num, 1.d0 & - , ao_two_e_tc_tot(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & - , 0.d0 , a2(1,1,1,1), ao_num*ao_num*ao_num) + print*, ' memory scale of TC ao -> mo: O(N3) ' - allocate(a1(ao_num,ao_num,mo_num,mo_num)) + allocate(a_jkp(ao_num,ao_num,mo_num)) + allocate(a_kpq(ao_num,mo_num,mo_num)) + allocate(a_pqr(mo_num,mo_num,mo_num)) - call dgemm( 'T', 'N', ao_num*ao_num*mo_num, mo_num, ao_num, 1.d0 & - , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & - , 0.d0, a1(1,1,1,1), ao_num*ao_num*mo_num) + do s = 1, mo_num + mo_bi_ortho_tc_two_e_chemist(:,:,:,s) = 0.d0 - deallocate(a2) - allocate(a2(ao_num,mo_num,mo_num,mo_num)) + do l = 1, ao_num - call dgemm( 'T', 'N', ao_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & - , a1(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & - , 0.d0, a2(1,1,1,1), ao_num*mo_num*mo_num) + call dgemm( 'T', 'N', ao_num*ao_num, mo_num, ao_num, 1.d0 & + , ao_two_e_tc_tot(1,1,1,l), ao_num, mo_l_coef(1,1), ao_num & + , 0.d0, a_jkp(1,1,1), ao_num*ao_num) + + call dgemm( 'T', 'N', ao_num*mo_num, mo_num, ao_num, 1.d0 & + , a_jkp(1,1,1), ao_num, mo_r_coef(1,1), ao_num & + , 0.d0, a_kpq(1,1,1), ao_num*mo_num) + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, ao_num, 1.d0 & + , a_kpq(1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 0.d0, a_pqr(1,1,1), mo_num*mo_num) - deallocate(a1) + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, q, r) & + !$OMP SHARED(s, l, mo_num, mo_bi_ortho_tc_two_e_chemist, mo_r_coef, a_pqr) + !$OMP DO COLLAPSE(2) + do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + mo_bi_ortho_tc_two_e_chemist(p,q,r,s) = mo_bi_ortho_tc_two_e_chemist(p,q,r,s) + mo_r_coef(l,s) * a_pqr(p,q,r) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & - , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & - , 0.d0, mo_bi_ortho_tc_two_e_chemist(1,1,1,1), mo_num*mo_num*mo_num) + enddo ! l + enddo ! s - deallocate(a2) + deallocate(a_jkp, a_kpq, a_pqr) + else + + print*, ' memory scale of TC ao -> mo: O(N4) ' + + allocate(a2(ao_num,ao_num,ao_num,mo_num)) + + call dgemm( 'T', 'N', ao_num*ao_num*ao_num, mo_num, ao_num, 1.d0 & + , ao_two_e_tc_tot(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 0.d0, a2(1,1,1,1), ao_num*ao_num*ao_num) + + allocate(a1(ao_num,ao_num,mo_num,mo_num)) + + call dgemm( 'T', 'N', ao_num*ao_num*mo_num, mo_num, ao_num, 1.d0 & + , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & + , 0.d0, a1(1,1,1,1), ao_num*ao_num*mo_num) + + deallocate(a2) + allocate(a2(ao_num,mo_num,mo_num,mo_num)) + + call dgemm( 'T', 'N', ao_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & + , a1(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 0.d0, a2(1,1,1,1), ao_num*mo_num*mo_num) + + deallocate(a1) + + call dgemm( 'T', 'N', mo_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & + , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & + , 0.d0, mo_bi_ortho_tc_two_e_chemist(1,1,1,1), mo_num*mo_num*mo_num) + + deallocate(a2) + + endif !allocate(a1(mo_num,ao_num,ao_num,ao_num)) !a1 = 0.d0 @@ -135,6 +192,10 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, !enddo !deallocate(a1) + call wall_time(t2) + print *, ' WALL TIME for PROVIDING mo_bi_ortho_tc_two_e_chemist (min)', (t2-t1)/60.d0 + call print_memory_usage() + END_PROVIDER ! --- diff --git a/plugins/local/non_h_ints_mu/total_tc_int.irp.f b/plugins/local/non_h_ints_mu/total_tc_int.irp.f index 9d3cf565..ba078d9b 100644 --- a/plugins/local/non_h_ints_mu/total_tc_int.irp.f +++ b/plugins/local/non_h_ints_mu/total_tc_int.irp.f @@ -201,6 +201,8 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n !$OMP END DO !$OMP END PARALLEL + call clear_ao_map() + if(tc_integ_type .eq. "numeric") then FREE int2_grad1_u12_ao_num int2_grad1_u12_square_ao_num endif diff --git a/src/tools/print_detweights.irp.f b/src/tools/print_detweights.irp.f new file mode 100644 index 00000000..d5b0f2c9 --- /dev/null +++ b/src/tools/print_detweights.irp.f @@ -0,0 +1,66 @@ +program print_detweights + + implicit none + + read_wf = .True. + touch read_wf + + call main() + +end + +! --- + +subroutine main() + + implicit none + integer :: i + integer :: degree + integer :: ios + integer, allocatable :: deg(:), ii(:), deg_sorted(:) + double precision, allocatable :: c(:) + + PROVIDE N_int + PROVIDE N_det + PROVIDE psi_det + PROVIDe psi_coef + + allocate(deg(N_det), ii(N_det), deg_sorted(N_det), c(N_det)) + + do i = 1, N_det + + call debug_det(psi_det(1,1,i), N_int) + call get_excitation_degree(psi_det(1,1,i), psi_det(1,1,1), degree, N_int) + + ii (i) = i + deg(i) = degree + c (i) = dabs(psi_coef(i,1)) + enddo + + call dsort(c, ii, N_det) + + do i = 1, N_det + deg_sorted(i) = deg(ii(i)) + enddo + + print *, ' saving psi' + + ! Writing output in binary format + open(unit=10, file="coef.bin", status="replace", action="write", iostat=ios, form="unformatted") + + if(ios /= 0) then + print *, ' Error opening file!' + stop + endif + + write(10) N_det + write(10) deg_sorted + write(10) c + + close(10) + + deallocate(deg, ii, deg_sorted, c) + +end + + From 1dbde5643920054cc16f148c50d84fed01a88b13 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Wed, 13 Mar 2024 07:04:54 +0100 Subject: [PATCH 005/131] O(N4) -> O(N3) transformations --- .../local/bi_ort_ints/total_twoe_pot.irp.f | 19 ++++++++++++++++--- plugins/local/tc_keywords/EZFIO.cfg | 7 +++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f index bf5cc36f..79bfd336 100644 --- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f +++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f @@ -41,7 +41,7 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, implicit none integer :: i, j, k, l, m, n, p, q, s, r - double precision :: t1, t2 + double precision :: t1, t2, tt1, tt2 double precision, allocatable :: a1(:,:,:,:), a2(:,:,:,:) double precision, allocatable :: a_jkp(:,:,:), a_kpq(:,:,:), a_pqr(:,:,:) @@ -60,9 +60,11 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, allocate(a_kpq(ao_num,mo_num,mo_num)) allocate(a_pqr(mo_num,mo_num,mo_num)) - do s = 1, mo_num - mo_bi_ortho_tc_two_e_chemist(:,:,:,s) = 0.d0 + call wall_time(tt1) + do s = 1, mo_num + + mo_bi_ortho_tc_two_e_chemist(:,:,:,s) = 0.d0 do l = 1, ao_num call dgemm( 'T', 'N', ao_num*ao_num, mo_num, ao_num, 1.d0 & @@ -93,6 +95,17 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, !$OMP END PARALLEL enddo ! l + + if(s == 2) then + call wall_time(tt2) + print*, ' 1 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-1)*(tt2-tt1)/60.d0 + elseif(s == 11) then + call wall_time(tt2) + print*, ' 10 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-10)*(tt2-tt1)/600.d0 + endif + enddo ! s deallocate(a_jkp, a_kpq, a_pqr) diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index 93ff790f..a8491660 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -280,3 +280,10 @@ doc: approach used to evaluate TC integrals [ analytic | numeric | semi-analytic interface: ezfio,ocaml,provider default: semi-analytic +[ao_to_mo_tc_n3] +type: logical +doc: If |true|, memory scale of TC ao -> mo: O(N3) +interface: ezfio,provider,ocaml +default: False + + From 83ed57312d9bc86dc2ec4cbc486491ded16d7053 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Tue, 19 Mar 2024 17:23:41 +0100 Subject: [PATCH 006/131] few modif in ao tc integ --- .../local/bi_ort_ints/total_twoe_pot.irp.f | 25 +++++++++++++++++-- .../local/non_h_ints_mu/tc_integ_num.irp.f | 2 +- .../local/non_h_ints_mu/total_tc_int.irp.f | 3 ++- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f index 79bfd336..1e558038 100644 --- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f +++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f @@ -50,7 +50,7 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, call print_memory_usage() PROVIDE mo_r_coef mo_l_coef - PROVIDe ao_two_e_tc_tot + PROVIDE ao_two_e_tc_tot if(ao_to_mo_tc_n3) then @@ -103,9 +103,30 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, elseif(s == 11) then call wall_time(tt2) print*, ' 10 / mo_num done in (min)', (tt2-tt1)/60.d0 - print*, ' estimated time required (min)', dble(mo_num-10)*(tt2-tt1)/600.d0 + print*, ' estimated time required (min)', dble(mo_num-10)*(tt2-tt1)/(60.d0*10.d0) + elseif(s == 26) then + call wall_time(tt2) + print*, ' 25 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-25)*(tt2-tt1)/(60.d0*25.d0) + elseif(s == 51) then + call wall_time(tt2) + print*, ' 50 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-50)*(tt2-tt1)/(60.d0*50.d0) + elseif(s == 101) then + call wall_time(tt2) + print*, ' 100 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-100)*(tt2-tt1)/(60.d0*100.d0) + elseif(s == 201) then + call wall_time(tt2) + print*, ' 200 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-200)*(tt2-tt1)/(60.d0*200.d0) + elseif(s == 501) then + call wall_time(tt2) + print*, ' 500 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-500)*(tt2-tt1)/(60.d0*500.d0) endif + enddo ! s deallocate(a_jkp, a_kpq, a_pqr) diff --git a/plugins/local/non_h_ints_mu/tc_integ_num.irp.f b/plugins/local/non_h_ints_mu/tc_integ_num.irp.f index e5d75c3d..6d446037 100644 --- a/plugins/local/non_h_ints_mu/tc_integ_num.irp.f +++ b/plugins/local/non_h_ints_mu/tc_integ_num.irp.f @@ -131,7 +131,7 @@ deallocate(tmp) call wall_time(time1) - print*, ' wall time for int2_grad1_u12_ao_num & int2_grad1_u12_square_ao_num =', time1-time0 + print*, ' wall time for int2_grad1_u12_ao_num & int2_grad1_u12_square_ao_num = (min)', (time1-time0) / 60.d0 call print_memory_usage() END_PROVIDER diff --git a/plugins/local/non_h_ints_mu/total_tc_int.irp.f b/plugins/local/non_h_ints_mu/total_tc_int.irp.f index ba078d9b..c7230dc3 100644 --- a/plugins/local/non_h_ints_mu/total_tc_int.irp.f +++ b/plugins/local/non_h_ints_mu/total_tc_int.irp.f @@ -201,7 +201,8 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n !$OMP END DO !$OMP END PARALLEL - call clear_ao_map() + !call clear_ao_map() + FREE ao_integrals_map if(tc_integ_type .eq. "numeric") then FREE int2_grad1_u12_ao_num int2_grad1_u12_square_ao_num From d872d60e70f8eedb3913f5566d4f35d198d4aad5 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sun, 7 Apr 2024 00:29:40 +0200 Subject: [PATCH 007/131] saving olympe2 modif --- plugins/local/bi_ortho_mos/overlap.irp.f | 8 +- .../lapack_diag_non_hermit.irp.f | 1 + plugins/local/tc_scf/minimize_tc_angles.irp.f | 2 +- plugins/local/tc_scf/routines_rotates.irp.f | 79 ++++++++++--------- src/tools/print_detweights.irp.f | 35 +++++++- src/utils/block_diag_degen.irp.f | 2 +- 6 files changed, 81 insertions(+), 46 deletions(-) diff --git a/plugins/local/bi_ortho_mos/overlap.irp.f b/plugins/local/bi_ortho_mos/overlap.irp.f index ff5d5c84..7f07929f 100644 --- a/plugins/local/bi_ortho_mos/overlap.irp.f +++ b/plugins/local/bi_ortho_mos/overlap.irp.f @@ -56,10 +56,10 @@ print*,'Average trace of overlap_bi_ortho is different from 1 by ', dabs(accu_d-1.d0) print*,'And bi orthogonality is off by an average of ',accu_nd print*,'****************' - print*,'Overlap matrix betwee mo_l_coef and mo_r_coef ' - do i = 1, mo_num - write(*,'(100(F16.10,X))')overlap_bi_ortho(i,:) - enddo + !print*,'Overlap matrix betwee mo_l_coef and mo_r_coef ' + !do i = 1, mo_num + ! write(*,'(100(F16.10,X))')overlap_bi_ortho(i,:) + !enddo endif print*,'Average trace of overlap_bi_ortho (should be 1.)' print*,'accu_d = ',accu_d diff --git a/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f b/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f index cb38347e..4d4bc047 100644 --- a/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f +++ b/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f @@ -2144,6 +2144,7 @@ subroutine impose_biorthog_degen_eigvec(n, deg_num, e0, L0, R0) enddo !print*,' accu_nd after = ', accu_nd if(accu_nd .gt. 1d-12) then + print*, ' accu_nd =', accu_nd print*, ' your strategy for degenerates orbitals failed !' print*, m, 'deg on', i stop diff --git a/plugins/local/tc_scf/minimize_tc_angles.irp.f b/plugins/local/tc_scf/minimize_tc_angles.irp.f index c7752930..e5f6cf87 100644 --- a/plugins/local/tc_scf/minimize_tc_angles.irp.f +++ b/plugins/local/tc_scf/minimize_tc_angles.irp.f @@ -20,7 +20,7 @@ program minimize_tc_angles ! TODO ! check if rotations of orbitals affect the TC energy ! and refuse the step - call minimize_tc_orb_angles + call minimize_tc_orb_angles() end diff --git a/plugins/local/tc_scf/routines_rotates.irp.f b/plugins/local/tc_scf/routines_rotates.irp.f index c42e846e..2c5510f2 100644 --- a/plugins/local/tc_scf/routines_rotates.irp.f +++ b/plugins/local/tc_scf/routines_rotates.irp.f @@ -40,9 +40,6 @@ subroutine LTxSxR(n, m, L, S, R, C) end subroutine LTxR -! --- - - ! --- subroutine minimize_tc_orb_angles() @@ -103,7 +100,10 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) double precision, allocatable :: stmp(:,:), T(:,:), Snew(:,:), smat2(:,:) double precision, allocatable :: mo_l_coef_tmp(:,:), mo_r_coef_tmp(:,:), mo_l_coef_new(:,:) - E_thr = 1d-04 + PROVIDE TC_HF_energy + PROVIDE mo_r_coef mo_l_coef + + E_thr = 1d-07 E_old = TC_HF_energy allocate(mo_l_coef_old(ao_num,mo_num), mo_r_coef_old(ao_num,mo_num)) mo_r_coef_old = mo_r_coef @@ -111,7 +111,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) good_angles = .False. - allocate(mo_l_coef_good(ao_num, mo_num), mo_r_coef_good(ao_num,mo_num)) + allocate(mo_l_coef_good(ao_num,mo_num), mo_r_coef_good(ao_num,mo_num)) print *, ' ***************************************' print *, ' ***************************************' @@ -123,7 +123,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) mo_r_coef_good = mo_r_coef mo_l_coef_good = mo_l_coef - allocate(mo_r_coef_new(ao_num, mo_num)) + allocate(mo_r_coef_new(ao_num,mo_num)) mo_r_coef_new = mo_r_coef do i = 1, mo_num norm = 1.d0/dsqrt(overlap_mo_r(i,i)) @@ -141,10 +141,11 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) call build_s_matrix(ao_num, mo_num, mo_r_coef_new, mo_r_coef_new, ao_overlap, s_mat) ! call give_degen(fock_diag,mo_num,thr_deg,list_degen,n_degen_list) if(n_core_orb.ne.0)then - call give_degen_full_listcore(fock_diag, mo_num, list_core, n_core_orb, thr_deg, list_degen, n_degen_list) + call give_degen_full_listcore(fock_diag, mo_num, list_core, n_core_orb, thr_deg, list_degen, n_degen_list) else - call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list) + call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list) endif + print *, ' fock_matrix_mo' do i = 1, mo_num print *, i, fock_diag(i), angle_left_right(i) @@ -156,50 +157,52 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) ! n_degen = ilast - ifirst +1 n_degen = list_degen(i,0) - if(n_degen .ge. 1000)n_degen = 1 ! convention for core orbitals + if(n_degen .ge. 1000) n_degen = 1 ! convention for core orbitals if(n_degen .eq. 1) cycle + print*, ' working on orbital', i + print*, ' multiplicity =', n_degen allocate(stmp(n_degen,n_degen), smat2(n_degen,n_degen)) allocate(mo_r_coef_tmp(ao_num,n_degen), mo_l_coef_tmp(ao_num,n_degen), mo_l_coef_new(ao_num,n_degen)) allocate(T(n_degen,n_degen), Snew(n_degen,n_degen)) print*,'Right orbitals before' - do j = 1, n_degen - write(*,'(100(F16.10,X))') mo_r_coef_new(1:ao_num,list_degen(i,j)) - enddo + do j = 1, n_degen + write(*,'(1000(F16.10,X))') mo_r_coef_new(1:ao_num,list_degen(i,j)) + enddo print*,'Left orbitals before' - do j = 1, n_degen - write(*,'(100(F16.10,X))')mo_l_coef(1:ao_num,list_degen(i,j)) - enddo + do j = 1, n_degen + write(*,'(1000(F16.10,X))') mo_l_coef(1:ao_num,list_degen(i,j)) + enddo if(angle_left_right(list_degen(i,1)).gt.80.d0.and.n_degen==2)then - integer :: i_list, j_list - i_list = list_degen(i,1) - j_list = list_degen(i,2) - print*,'Huge angle !!! == ',angle_left_right(list_degen(i,1)),angle_left_right(list_degen(i,2)) - print*,'i_list = ',i_list - print*,'i_list = ',j_list - print*,'Swapping left/right orbitals' - call print_strong_overlap(i_list, j_list) - mo_r_coef_tmp(1:ao_num,1) = mo_r_coef_new(1:ao_num,i_list) - mo_r_coef_tmp(1:ao_num,2) = mo_l_coef(1:ao_num,i_list) - mo_l_coef_tmp(1:ao_num,1) = mo_l_coef(1:ao_num,j_list) - mo_l_coef_tmp(1:ao_num,2) = mo_r_coef_new(1:ao_num,j_list) + integer :: i_list, j_list + i_list = list_degen(i,1) + j_list = list_degen(i,2) + print*,'Huge angle !!! == ',angle_left_right(list_degen(i,1)),angle_left_right(list_degen(i,2)) + print*,'i_list = ',i_list + print*,'i_list = ',j_list + print*,'Swapping left/right orbitals' + call print_strong_overlap(i_list, j_list) + mo_r_coef_tmp(1:ao_num,1) = mo_r_coef_new(1:ao_num,i_list) + mo_r_coef_tmp(1:ao_num,2) = mo_l_coef(1:ao_num,i_list) + mo_l_coef_tmp(1:ao_num,1) = mo_l_coef(1:ao_num,j_list) + mo_l_coef_tmp(1:ao_num,2) = mo_r_coef_new(1:ao_num,j_list) else - do j = 1, n_degen - print*,'i_list = ',list_degen(i,j) - mo_r_coef_tmp(1:ao_num,j) = mo_r_coef_new(1:ao_num,list_degen(i,j)) - mo_l_coef_tmp(1:ao_num,j) = mo_l_coef(1:ao_num,list_degen(i,j)) - enddo + do j = 1, n_degen + print*,'i_list = ',list_degen(i,j) + mo_r_coef_tmp(1:ao_num,j) = mo_r_coef_new(1:ao_num,list_degen(i,j)) + mo_l_coef_tmp(1:ao_num,j) = mo_l_coef(1:ao_num,list_degen(i,j)) + enddo endif print*,'Right orbitals ' - do j = 1, n_degen - write(*,'(100(F16.10,X))')mo_r_coef_tmp(1:ao_num,j) - enddo + do j = 1, n_degen + write(*,'(1000(F16.10,X))') mo_r_coef_tmp(1:ao_num,j) + enddo print*,'Left orbitals ' - do j = 1, n_degen - write(*,'(100(F16.10,X))')mo_l_coef_tmp(1:ao_num,j) - enddo + do j = 1, n_degen + write(*,'(100(F16.10,X))') mo_l_coef_tmp(1:ao_num,j) + enddo ! Orthogonalization of right functions print *, ' Orthogonalization of RIGHT functions' print *, ' ------------------------------------' diff --git a/src/tools/print_detweights.irp.f b/src/tools/print_detweights.irp.f index d5b0f2c9..5e5f2bb1 100644 --- a/src/tools/print_detweights.irp.f +++ b/src/tools/print_detweights.irp.f @@ -5,7 +5,8 @@ program print_detweights read_wf = .True. touch read_wf - call main() + call print_exc() + !call main() end @@ -41,6 +42,7 @@ subroutine main() do i = 1, N_det deg_sorted(i) = deg(ii(i)) + print *, deg_sorted(i), c(i) enddo print *, ' saving psi' @@ -52,7 +54,7 @@ subroutine main() print *, ' Error opening file!' stop endif - + write(10) N_det write(10) deg_sorted write(10) c @@ -63,4 +65,33 @@ subroutine main() end +! --- + +subroutine print_exc() + + implicit none + + integer :: i + integer, allocatable :: deg(:) + + PROVIDE N_int + PROVIDE N_det + PROVIDE psi_det + + allocate(deg(N_det)) + + do i = 1, N_det + call get_excitation_degree(psi_det(1,1,1), psi_det(1,1,i), deg(i), N_int) + enddo + + open(unit=10, file="exc.dat", action="write") + write(10,*) N_det + write(10,*) deg + close(10) + + deallocate(deg) + +end + + diff --git a/src/utils/block_diag_degen.irp.f b/src/utils/block_diag_degen.irp.f index 188bfa58..1a9ca8d6 100644 --- a/src/utils/block_diag_degen.irp.f +++ b/src/utils/block_diag_degen.irp.f @@ -191,7 +191,7 @@ subroutine give_degen_full_list(A, n, thr, list_degen, n_degen_list) list_degen(n_degen_list,1) = i icount = 1 do j = i+1, n - if(dabs(A(i)-A(j)).lt.thr.and.is_ok(j)) then + if(dabs(A(i)-A(j)).lt.thr .and. is_ok(j)) then is_ok(j) = .False. icount += 1 list_degen(n_degen_list,icount) = j From f8bff471222ac9cd2e6f23342f7d7a7aff5d62cd Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Thu, 28 Mar 2024 15:27:11 +0100 Subject: [PATCH 008/131] added loops --- .../local/non_h_ints_mu/total_tc_int.irp.f | 165 +++++++++++++----- 1 file changed, 121 insertions(+), 44 deletions(-) diff --git a/plugins/local/non_h_ints_mu/total_tc_int.irp.f b/plugins/local/non_h_ints_mu/total_tc_int.irp.f index c7230dc3..72fd0f53 100644 --- a/plugins/local/non_h_ints_mu/total_tc_int.irp.f +++ b/plugins/local/non_h_ints_mu/total_tc_int.irp.f @@ -65,27 +65,59 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n PROVIDE int2_grad1_u12_square_ao - allocate(c_mat(n_points_final_grid,ao_num,ao_num)) + if(tc_save_mem) then - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & - !$OMP SHARED (aos_in_r_array_transp, c_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) - !$OMP DO SCHEDULE (static) - do i = 1, ao_num - do k = 1, ao_num - do ipoint = 1, n_points_final_grid - c_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) + print*, ' LOOPS are used to evaluate Hermitian part of ao_two_e_tc_tot ...' + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, l, ipoint, ao_i_r, ao_k_r, weight1) & + !$OMP SHARED (ao_num, n_points_final_grid, ao_two_e_tc_tot, & + !$OMP aos_in_r_array_transp, final_weight_at_r_vector, int2_grad1_u12_square_ao) + !$OMP DO COLLAPSE(4) + do i = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + do j = 1, ao_num + ao_two_e_tc_tot(j,l,k,i) = 0.d0 + do ipoint = 1, n_points_final_grid + weight1 = final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + ao_two_e_tc_tot(j,l,k,i) = ao_two_e_tc_tot(j,l,k,i) + int2_grad1_u12_square_ao(j,l,ipoint) * weight1 * ao_i_r * ao_k_r + enddo + enddo + enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL - call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & - , 0.d0, ao_two_e_tc_tot, ao_num*ao_num) + else + print*, ' DGEMM are used to evaluate Hermitian part of ao_two_e_tc_tot ...' + + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (aos_in_r_array_transp, c_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + c_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , 0.d0, ao_two_e_tc_tot, ao_num*ao_num) + deallocate(c_mat) + endif + FREE int2_grad1_u12_square_ao if( (tc_integ_type .eq. "semi-analytic") .and. & @@ -96,6 +128,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n ! an additional term is added here directly instead of ! being added in int2_grad1_u12_square_ao for performance + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) PROVIDE int2_u2_env2 !$OMP PARALLEL & @@ -127,10 +160,13 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n , int2_u2_env2(1,1,1), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & , 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) + deallocate(c_mat) FREE int2_u2_env2 endif ! use_ipp - deallocate(c_mat) + call wall_time(time1) + print*, ' done with Hermitian part after (min) ', (time1 - time0) / 60.d0 + call print_memory_usage() ! --- @@ -138,38 +174,73 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n PROVIDE int2_grad1_u12_ao - allocate(b_mat(n_points_final_grid,ao_num,ao_num,3)) + if(tc_save_mem) then - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & - !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & - !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) - !$OMP DO SCHEDULE (static) - do i = 1, ao_num - do k = 1, ao_num - do ipoint = 1, n_points_final_grid + print*, ' LOOPS are used to evaluate non-Hermitian part of ao_two_e_tc_tot ...' - weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) - ao_i_r = aos_in_r_array_transp(ipoint,i) - ao_k_r = aos_in_r_array_transp(ipoint,k) - - b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) - b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) - b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, l, ipoint, ao_i_r, ao_k_r, weight1) & + !$OMP SHARED (ao_num, n_points_final_grid, ao_two_e_tc_tot, & + !$OMP aos_in_r_array_transp, final_weight_at_r_vector, & + !$OMP int2_grad1_u12_ao, aos_grad_in_r_array_transp_bis) + !$OMP DO COLLAPSE(4) + do i = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + do j = 1, ao_num + do ipoint = 1, n_points_final_grid + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + ao_two_e_tc_tot(j,l,k,i) = ao_two_e_tc_tot(j,l,k,i) & + + weight1 * int2_grad1_u12_ao(j,l,ipoint,1) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) & + + weight1 * int2_grad1_u12_ao(j,l,ipoint,2) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) & + + weight1 * int2_grad1_u12_ao(j,l,ipoint,3) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) + enddo + enddo + enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL - do m = 1, 3 - call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 & - , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & - , 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) - enddo - deallocate(b_mat) + else + print*, ' DGEMM are used to evaluate non-Hermitian part of ao_two_e_tc_tot ...' + + allocate(b_mat(n_points_final_grid,ao_num,ao_num,3)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & + !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + + b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) + b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) + b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + do m = 1, 3 + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 & + , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & + , 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) + enddo + deallocate(b_mat) + + end if FREE int2_grad1_u12_ao if(tc_integ_type .eq. "semi-analytic") then @@ -178,16 +249,22 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n endif ! var_tc + call wall_time(time1) + print*, ' done with non-Hermitian part after (min) ', (time1 - time0) / 60.d0 + call print_memory_usage() + ! --- call sum_A_At(ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) + ! --- + PROVIDE ao_integrals_map !$OMP PARALLEL DEFAULT(NONE) & !$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & !$OMP PRIVATE(i, j, k, l) - !$OMP DO + !$OMP DO COLLAPSE(4) do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num From 002aff90f5e6ecd7a4929eb48e75608d94f9e3a8 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Thu, 28 Mar 2024 17:05:00 +0100 Subject: [PATCH 009/131] working on mem reduction --- .../local/non_h_ints_mu/total_tc_int.irp.f | 98 ++++++++++++------- plugins/local/tc_keywords/EZFIO.cfg | 6 ++ 2 files changed, 67 insertions(+), 37 deletions(-) diff --git a/plugins/local/non_h_ints_mu/total_tc_int.irp.f b/plugins/local/non_h_ints_mu/total_tc_int.irp.f index 72fd0f53..b8379006 100644 --- a/plugins/local/non_h_ints_mu/total_tc_int.irp.f +++ b/plugins/local/non_h_ints_mu/total_tc_int.irp.f @@ -33,8 +33,10 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n double precision :: weight1, ao_k_r, ao_i_r double precision :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq double precision :: time0, time1 - double precision, allocatable :: b_mat(:,:,:,:), c_mat(:,:,:) + double precision, allocatable :: c_mat(:,:,:) + logical, external :: ao_two_e_integral_zero double precision, external :: get_ao_two_e_integral + double precision, external :: ao_two_e_integral PROVIDe tc_integ_type PROVIDE env_type @@ -194,9 +196,9 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n ao_i_r = aos_in_r_array_transp(ipoint,i) ao_k_r = aos_in_r_array_transp(ipoint,k) ao_two_e_tc_tot(j,l,k,i) = ao_two_e_tc_tot(j,l,k,i) & - + weight1 * int2_grad1_u12_ao(j,l,ipoint,1) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) & - + weight1 * int2_grad1_u12_ao(j,l,ipoint,2) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) & - + weight1 * int2_grad1_u12_ao(j,l,ipoint,3) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) + - weight1 * int2_grad1_u12_ao(j,l,ipoint,1) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) & + - weight1 * int2_grad1_u12_ao(j,l,ipoint,2) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) & + - weight1 * int2_grad1_u12_ao(j,l,ipoint,3) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) enddo enddo enddo @@ -209,39 +211,37 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n print*, ' DGEMM are used to evaluate non-Hermitian part of ao_two_e_tc_tot ...' - allocate(b_mat(n_points_final_grid,ao_num,ao_num,3)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & - !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & - !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) - !$OMP DO SCHEDULE (static) - do i = 1, ao_num - do k = 1, ao_num - do ipoint = 1, n_points_final_grid + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) + do m = 1, 3 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, c_mat, & + !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector, m) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid - weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) - ao_i_r = aos_in_r_array_transp(ipoint,i) - ao_k_r = aos_in_r_array_transp(ipoint,k) + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) - b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) - b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) - b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) + c_mat(ipoint,k,i) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,m) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,m)) + enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - do m = 1, 3 - call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 & - , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 & + , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & , 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) enddo - deallocate(b_mat) + deallocate(c_mat) end if - FREE int2_grad1_u12_ao + !FREE int2_grad1_u12_ao if(tc_integ_type .eq. "semi-analytic") then FREE int2_grad1_u2e_ao @@ -258,19 +258,26 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n call sum_A_At(ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) ! --- + + logical :: integ_zero + double precision :: integ_val - PROVIDE ao_integrals_map + print*, ' adding ERI to ao_two_e_tc_tot ...' - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & - !$OMP PRIVATE(i, j, k, l) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i, j, k, l, integ_zero, integ_val) & + !$OMP SHARED(ao_num, ao_two_e_tc_tot) !$OMP DO COLLAPSE(4) do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num do k = 1, ao_num - ! < 1:i, 2:j | 1:k, 2:l > - ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + integ_zero = ao_two_e_integral_zero(i,j,k,l) + if(.not. integ_zero) then + ! i,k : r1 j,l : r2 + integ_val = ao_two_e_integral(i,k,j,l) + ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + integ_val + endif enddo enddo enddo @@ -278,8 +285,25 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n !$OMP END DO !$OMP END PARALLEL - !call clear_ao_map() - FREE ao_integrals_map + !PROVIDE ao_integrals_map + !!$OMP PARALLEL DEFAULT(NONE) & + !!$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & + !!$OMP PRIVATE(i, j, k, l) + !!$OMP DO COLLAPSE(4) + !do j = 1, ao_num + ! do l = 1, ao_num + ! do i = 1, ao_num + ! do k = 1, ao_num + ! ! < 1:i, 2:j | 1:k, 2:l > + ! ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO + !!$OMP END PARALLEL + !!call clear_ao_map() + !FREE ao_integrals_map if(tc_integ_type .eq. "numeric") then FREE int2_grad1_u12_ao_num int2_grad1_u12_square_ao_num diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index a8491660..24362796 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -286,4 +286,10 @@ doc: If |true|, memory scale of TC ao -> mo: O(N3) interface: ezfio,provider,ocaml default: False +[tc_save_mem] +type: logical +doc: If |true|, use loops to save memory TC +interface: ezfio,provider,ocaml +default: False + From 1a36d974b0bd5cd0c06453a15c96a8492c4baecc Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sun, 7 Apr 2024 00:33:09 +0200 Subject: [PATCH 010/131] saving lcpq-ampere --- .../local/bi_ort_ints/semi_num_ints_mo.irp.f | 68 ++++--- .../bi_ort_ints/three_body_ints_bi_ort.irp.f | 2 +- .../local/bi_ort_ints/total_twoe_pot.irp.f | 87 ++++----- plugins/local/non_h_ints_mu/tc_integ.irp.f | 175 ++++++++++++++++-- .../local/non_h_ints_mu/total_tc_int.irp.f | 93 +++++----- plugins/local/tc_keywords/EZFIO.cfg | 8 +- .../local/tc_scf/fock_3e_bi_ortho_cs.irp.f | 121 +++++++----- .../local/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 2 +- plugins/local/tc_scf/fock_tc.irp.f | 2 +- .../local/tc_scf/write_ao_2e_tc_integ.irp.f | 58 ++++++ .../multi_s_dipole_moment.irp.f | 22 ++- 11 files changed, 442 insertions(+), 196 deletions(-) create mode 100644 plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f diff --git a/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f b/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f index 51f0cba4..77e4cb9b 100644 --- a/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f @@ -107,8 +107,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, integer :: i, j, ipoint double precision :: wall0, wall1 - print *, ' providing int2_grad1_u12_ao_transp ...' - call wall_time(wall0) + !print *, ' providing int2_grad1_u12_ao_transp ...' + !call wall_time(wall0) if(test_cycle_tc) then @@ -142,15 +142,15 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, endif - call wall_time(wall1) - print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0 - call print_memory_usage() + !call wall_time(wall1) + !print *, ' wall time for int2_grad1_u12_ao_transp (min) = ', (wall1 - wall0) / 60.d0 + !call print_memory_usage() END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)] +BEGIN_PROVIDER [double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)] implicit none integer :: ipoint @@ -159,7 +159,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_ao_transp - !print *, ' providing int2_grad1_u12_bimo_transp' + !print *, ' providing int2_grad1_u12_bimo_transp ...' !call wall_time(wall0) !$OMP PARALLEL & @@ -167,33 +167,35 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, !$OMP PRIVATE (ipoint) & !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp) !$OMP DO SCHEDULE (dynamic) - do ipoint = 1, n_points_final_grid - call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,1,ipoint), size(int2_grad1_u12_ao_transp , 1) & - , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) - call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,2,ipoint), size(int2_grad1_u12_ao_transp , 1) & - , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) - call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,3,ipoint), size(int2_grad1_u12_ao_transp , 1) & - , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) - enddo + do ipoint = 1, n_points_final_grid + call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,1,ipoint), size(int2_grad1_u12_ao_transp , 1) & + , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) + call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,2,ipoint), size(int2_grad1_u12_ao_transp , 1) & + , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) + call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,3,ipoint), size(int2_grad1_u12_ao_transp , 1) & + , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) + enddo !$OMP END DO !$OMP END PARALLEL + !FREE int2_grad1_u12_ao_transp + !call wall_time(wall1) - !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + !print *, ' wall time for int2_grad1_u12_bimo_transp (min) =', (wall1 - wall0) / 60.d0 !call print_memory_usage() END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)] +BEGIN_PROVIDER [double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)] implicit none integer :: i, j, ipoint double precision :: wall0, wall1 !call wall_time(wall0) - !print *, ' Providing int2_grad1_u12_bimo_t ...' + !print *, ' providing int2_grad1_u12_bimo_t ...' PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_transp @@ -211,17 +213,21 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, FREE int2_grad1_u12_bimo_transp !call wall_time(wall1) - !print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0 + !print *, ' wall time for int2_grad1_u12_bimo_t (min) =', (wall1 - wall0) / 60.d0 !call print_memory_usage() END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, ao_num, ao_num)] +BEGIN_PROVIDER [double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, ao_num, ao_num)] implicit none - integer :: i, j, ipoint + integer :: i, j, ipoint + double precision :: wall0, wall1 + + !call wall_time(wall0) + !print *, ' providing int2_grad1_u12_ao_t ...' PROVIDE int2_grad1_u12_ao @@ -235,6 +241,10 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, enddo enddo + !call wall_time(wall1) + !print *, ' wall time for int2_grad1_u12_ao_t (min) =', (wall1 - wall0) / 60.d0 + !call print_memory_usage() + END_PROVIDER ! --- @@ -275,8 +285,8 @@ BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk, (n_points_final_grid, double precision :: xyz double precision :: wall0, wall1 - print*, ' providing x_W_ki_bi_ortho_erf_rk ...' - call wall_time(wall0) + !print*, ' providing x_W_ki_bi_ortho_erf_rk ...' + !call wall_time(wall0) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -300,8 +310,8 @@ BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk, (n_points_final_grid, ! FREE mo_v_ki_bi_ortho_erf_rk_cst_mu_transp ! FREE mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp - call wall_time(wall1) - print *, ' time to provide x_W_ki_bi_ortho_erf_rk = ', wall1 - wall0 + !call wall_time(wall1) + !print *, ' time to provide x_W_ki_bi_ortho_erf_rk = ', wall1 - wall0 END_PROVIDER @@ -323,8 +333,8 @@ BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk_diag, (n_points_final_ double precision :: xyz double precision :: wall0, wall1 - print*,'providing x_W_ki_bi_ortho_erf_rk_diag ...' - call wall_time(wall0) + !print*,'providing x_W_ki_bi_ortho_erf_rk_diag ...' + !call wall_time(wall0) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -343,8 +353,8 @@ BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk_diag, (n_points_final_ !$OMP END DO !$OMP END PARALLEL - call wall_time(wall1) - print*,'time to provide x_W_ki_bi_ortho_erf_rk_diag = ',wall1 - wall0 + !call wall_time(wall1) + !print*,'time to provide x_W_ki_bi_ortho_erf_rk_diag = ',wall1 - wall0 END_PROVIDER diff --git a/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f b/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f index 726e48ba..fd4a162f 100644 --- a/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -168,7 +168,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) integral = integral + tmp * final_weight_at_r_vector(ipoint) enddo -end subroutine give_integrals_3_body_bi_ort +end ! --- diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f index 1e558038..e34a7b7b 100644 --- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f +++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f @@ -16,10 +16,10 @@ double precision function bi_ortho_mo_ints(l, k, j, i) integer :: m, n, p, q bi_ortho_mo_ints = 0.d0 - do m = 1, ao_num - do p = 1, ao_num - do n = 1, ao_num - do q = 1, ao_num + do p = 1, ao_num + do m = 1, ao_num + do q = 1, ao_num + do n = 1, ao_num ! p1h1p2h2 l1 l2 r1 r2 bi_ortho_mo_ints += ao_two_e_tc_tot(n,q,m,p) * mo_l_coef(m,l) * mo_l_coef(n,k) * mo_r_coef(p,j) * mo_r_coef(q,i) enddo @@ -27,7 +27,7 @@ double precision function bi_ortho_mo_ints(l, k, j, i) enddo enddo -end function bi_ortho_mo_ints +end ! --- @@ -43,93 +43,68 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, integer :: i, j, k, l, m, n, p, q, s, r double precision :: t1, t2, tt1, tt2 double precision, allocatable :: a1(:,:,:,:), a2(:,:,:,:) - double precision, allocatable :: a_jkp(:,:,:), a_kpq(:,:,:), a_pqr(:,:,:) + double precision, allocatable :: a_jkp(:,:,:), a_kpq(:,:,:), ao_two_e_tc_tot_tmp(:,:,:) print *, ' PROVIDING mo_bi_ortho_tc_two_e_chemist ...' call wall_time(t1) call print_memory_usage() PROVIDE mo_r_coef mo_l_coef - PROVIDE ao_two_e_tc_tot if(ao_to_mo_tc_n3) then print*, ' memory scale of TC ao -> mo: O(N3) ' + if(.not.read_tc_integ) then + stop 'read_tc_integ needs to be set to true' + endif + allocate(a_jkp(ao_num,ao_num,mo_num)) allocate(a_kpq(ao_num,mo_num,mo_num)) - allocate(a_pqr(mo_num,mo_num,mo_num)) + allocate(ao_two_e_tc_tot_tmp(ao_num,ao_num,ao_num)) + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="read") call wall_time(tt1) - do s = 1, mo_num + mo_bi_ortho_tc_two_e_chemist(:,:,:,:) = 0.d0 + do l = 1, ao_num + read(11) ao_two_e_tc_tot_tmp(:,:,:) - mo_bi_ortho_tc_two_e_chemist(:,:,:,s) = 0.d0 - do l = 1, ao_num + do s = 1, mo_num - call dgemm( 'T', 'N', ao_num*ao_num, mo_num, ao_num, 1.d0 & - , ao_two_e_tc_tot(1,1,1,l), ao_num, mo_l_coef(1,1), ao_num & + call dgemm( 'T', 'N', ao_num*ao_num, mo_num, ao_num, 1.d0 & + , ao_two_e_tc_tot_tmp(1,1,1), ao_num, mo_l_coef(1,1), ao_num & , 0.d0, a_jkp(1,1,1), ao_num*ao_num) - + call dgemm( 'T', 'N', ao_num*mo_num, mo_num, ao_num, 1.d0 & , a_jkp(1,1,1), ao_num, mo_r_coef(1,1), ao_num & , 0.d0, a_kpq(1,1,1), ao_num*mo_num) - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, ao_num, 1.d0 & - , a_kpq(1,1,1), ao_num, mo_l_coef(1,1), ao_num & - , 0.d0, a_pqr(1,1,1), mo_num*mo_num) - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(p, q, r) & - !$OMP SHARED(s, l, mo_num, mo_bi_ortho_tc_two_e_chemist, mo_r_coef, a_pqr) - !$OMP DO COLLAPSE(2) - do p = 1, mo_num - do q = 1, mo_num - do r = 1, mo_num - mo_bi_ortho_tc_two_e_chemist(p,q,r,s) = mo_bi_ortho_tc_two_e_chemist(p,q,r,s) + mo_r_coef(l,s) * a_pqr(p,q,r) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, ao_num, mo_r_coef(l,s) & + , a_kpq(1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 1.d0, mo_bi_ortho_tc_two_e_chemist(1,1,1,s), mo_num*mo_num) - enddo ! l + enddo ! s - if(s == 2) then + if(l == 2) then call wall_time(tt2) print*, ' 1 / mo_num done in (min)', (tt2-tt1)/60.d0 print*, ' estimated time required (min)', dble(mo_num-1)*(tt2-tt1)/60.d0 - elseif(s == 11) then + elseif(l == 11) then call wall_time(tt2) print*, ' 10 / mo_num done in (min)', (tt2-tt1)/60.d0 print*, ' estimated time required (min)', dble(mo_num-10)*(tt2-tt1)/(60.d0*10.d0) - elseif(s == 26) then - call wall_time(tt2) - print*, ' 25 / mo_num done in (min)', (tt2-tt1)/60.d0 - print*, ' estimated time required (min)', dble(mo_num-25)*(tt2-tt1)/(60.d0*25.d0) - elseif(s == 51) then - call wall_time(tt2) - print*, ' 50 / mo_num done in (min)', (tt2-tt1)/60.d0 - print*, ' estimated time required (min)', dble(mo_num-50)*(tt2-tt1)/(60.d0*50.d0) - elseif(s == 101) then + elseif(l == 101) then call wall_time(tt2) print*, ' 100 / mo_num done in (min)', (tt2-tt1)/60.d0 print*, ' estimated time required (min)', dble(mo_num-100)*(tt2-tt1)/(60.d0*100.d0) - elseif(s == 201) then - call wall_time(tt2) - print*, ' 200 / mo_num done in (min)', (tt2-tt1)/60.d0 - print*, ' estimated time required (min)', dble(mo_num-200)*(tt2-tt1)/(60.d0*200.d0) - elseif(s == 501) then - call wall_time(tt2) - print*, ' 500 / mo_num done in (min)', (tt2-tt1)/60.d0 - print*, ' estimated time required (min)', dble(mo_num-500)*(tt2-tt1)/(60.d0*500.d0) endif + enddo ! l + close(11) - enddo ! s - - deallocate(a_jkp, a_kpq, a_pqr) + deallocate(a_jkp, a_kpq, ao_two_e_tc_tot_tmp) else @@ -141,6 +116,8 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, , ao_two_e_tc_tot(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & , 0.d0, a2(1,1,1,1), ao_num*ao_num*ao_num) + FREE ao_two_e_tc_tot + allocate(a1(ao_num,ao_num,mo_num,mo_num)) call dgemm( 'T', 'N', ao_num*ao_num*mo_num, mo_num, ao_num, 1.d0 & diff --git a/plugins/local/non_h_ints_mu/tc_integ.irp.f b/plugins/local/non_h_ints_mu/tc_integ.irp.f index 775a9e4c..58e3db48 100644 --- a/plugins/local/non_h_ints_mu/tc_integ.irp.f +++ b/plugins/local/non_h_ints_mu/tc_integ.irp.f @@ -44,14 +44,92 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f elseif(tc_integ_type .eq. "numeric") then print *, ' Numerical integration over r1 and r2 will be performed' - - ! TODO combine 1shot & int2_grad1_u12_ao_num - PROVIDE int2_grad1_u12_ao_num - int2_grad1_u12_ao = int2_grad1_u12_ao_num + if(tc_save_mem) then - !PROVIDE int2_grad1_u12_ao_num_1shot - !int2_grad1_u12_ao = int2_grad1_u12_ao_num_1shot + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision :: mem, n_double + double precision, allocatable :: tmp(:,:,:), xx(:) + double precision, allocatable :: tmp_grad1_u12(:,:,:) + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num), xx(n_points_extra_final_grid)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO COLLAPSE(2) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call total_memory(mem) + mem = max(1.d0, qp_max_mem - mem) + n_double = mem * 1.d8 + n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid)) + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + call write_int(6, n_pass, 'Number of passes') + call write_int(6, n_blocks, 'Size of the blocks') + call write_int(6, n_rest, 'Size of the last block') + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,3)) + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_blocks, ipoint) & + !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, final_grid_points, xx, tmp_grad1_u12) + !$OMP DO + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1), tmp_grad1_u12(1,i_blocks,2), tmp_grad1_u12(1,i_blocks,3), xx(1)) + enddo + !$OMP END DO + !$OMP END PARALLEL + do m = 1, 3 + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + enddo + deallocate(tmp_grad1_u12) + if(n_rest .gt. 0) then + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,3)) + ii = n_pass*n_blocks + 1 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_rest, ipoint) & + !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, final_grid_points, xx, tmp_grad1_u12) + !$OMP DO + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1), tmp_grad1_u12(1,i_rest,2), tmp_grad1_u12(1,i_rest,3), xx(1)) + enddo + !$OMP END DO + !$OMP END PARALLEL + do m = 1, 3 + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + deallocate(tmp_grad1_u12) + endif + deallocate(tmp,xx) + + else + ! TODO combine 1shot & int2_grad1_u12_ao_num + PROVIDE int2_grad1_u12_ao_num + int2_grad1_u12_ao = int2_grad1_u12_ao_num + !PROVIDE int2_grad1_u12_ao_num_1shot + !int2_grad1_u12_ao = int2_grad1_u12_ao_num_1shot + endif elseif(tc_integ_type .eq. "semi-analytic") then @@ -177,13 +255,88 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p print *, ' Numerical integration over r1 and r2 will be performed' - ! TODO combine 1shot & int2_grad1_u12_square_ao_num + if(tc_save_mem) then - PROVIDE int2_grad1_u12_square_ao_num - int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision :: mem, n_double + double precision, allocatable :: tmp(:,:,:), xx(:,:,:) + double precision, allocatable :: tmp_grad1_u12_squared(:,:) - !PROVIDE int2_grad1_u12_square_ao_num_1shot - !int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num_1shot + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO COLLAPSE(2) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call total_memory(mem) + mem = max(1.d0, qp_max_mem - mem) + n_double = mem * 1.d8 + n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid)) + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + call write_int(6, n_pass, 'Number of passes') + call write_int(6, n_blocks, 'Size of the blocks') + call write_int(6, n_rest, 'Size of the last block') + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_blocks), xx(n_points_extra_final_grid,n_blocks,3)) + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_blocks, ipoint) & + !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, xx, final_grid_points, tmp_grad1_u12_squared) + !$OMP DO + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, xx(1,i_blocks,1), xx(1,i_blocks,2), xx(1,i_blocks,3), tmp_grad1_u12_squared(1,i_blocks)) + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num) + enddo + deallocate(tmp_grad1_u12_squared, xx) + if(n_rest .gt. 0) then + ii = n_pass*n_blocks + 1 + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_rest), xx(n_points_extra_final_grid,n_rest,3)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_rest, ipoint) & + !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, xx, final_grid_points, tmp_grad1_u12_squared) + !$OMP DO + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, xx(1,i_rest,1), xx(1,i_rest,2), xx(1,i_rest,3), tmp_grad1_u12_squared(1,i_rest)) + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num) + deallocate(tmp_grad1_u12_squared, xx) + endif + deallocate(tmp) + + else + + ! TODO combine 1shot & int2_grad1_u12_square_ao_num + PROVIDE int2_grad1_u12_square_ao_num + int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num + !PROVIDE int2_grad1_u12_square_ao_num_1shot + !int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num_1shot + endif elseif(tc_integ_type .eq. "semi-analytic") then diff --git a/plugins/local/non_h_ints_mu/total_tc_int.irp.f b/plugins/local/non_h_ints_mu/total_tc_int.irp.f index b8379006..a1bbd6e0 100644 --- a/plugins/local/non_h_ints_mu/total_tc_int.irp.f +++ b/plugins/local/non_h_ints_mu/total_tc_int.irp.f @@ -55,7 +55,9 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n print*, ' Reading ao_two_e_tc_tot from ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot' open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="read") - read(11) ao_two_e_tc_tot + do i = 1, ao_num + read(11) ao_two_e_tc_tot(:,:,:,i) + enddo close(11) else @@ -67,7 +69,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n PROVIDE int2_grad1_u12_square_ao - if(tc_save_mem) then + if(tc_save_mem_loops) then print*, ' LOOPS are used to evaluate Hermitian part of ao_two_e_tc_tot ...' @@ -176,7 +178,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n PROVIDE int2_grad1_u12_ao - if(tc_save_mem) then + if(tc_save_mem_loops) then print*, ' LOOPS are used to evaluate non-Hermitian part of ao_two_e_tc_tot ...' @@ -241,7 +243,6 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n deallocate(c_mat) end if - !FREE int2_grad1_u12_ao if(tc_integ_type .eq. "semi-analytic") then FREE int2_grad1_u2e_ao @@ -264,48 +265,52 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n print*, ' adding ERI to ao_two_e_tc_tot ...' - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i, j, k, l, integ_zero, integ_val) & - !$OMP SHARED(ao_num, ao_two_e_tc_tot) - !$OMP DO COLLAPSE(4) - do j = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do k = 1, ao_num - integ_zero = ao_two_e_integral_zero(i,j,k,l) - if(.not. integ_zero) then - ! i,k : r1 j,l : r2 - integ_val = ao_two_e_integral(i,k,j,l) - ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + integ_val - endif + if(tc_save_mem) then + print*, ' ao_integrals_map will not be used' + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i, j, k, l, integ_zero, integ_val) & + !$OMP SHARED(ao_num, ao_two_e_tc_tot) + !$OMP DO COLLAPSE(4) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + integ_zero = ao_two_e_integral_zero(i,j,k,l) + if(.not. integ_zero) then + ! i,k : r1 j,l : r2 + integ_val = ao_two_e_integral(i,k,j,l) + ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + integ_val + endif + enddo enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL + else + print*, ' ao_integrals_map will be used' + PROVIDE ao_integrals_map + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & + !$OMP PRIVATE(i, j, k, l) + !$OMP DO COLLAPSE(4) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ! < 1:i, 2:j | 1:k, 2:l > + ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + !call clear_ao_map() + FREE ao_integrals_map + endif - !PROVIDE ao_integrals_map - !!$OMP PARALLEL DEFAULT(NONE) & - !!$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & - !!$OMP PRIVATE(i, j, k, l) - !!$OMP DO COLLAPSE(4) - !do j = 1, ao_num - ! do l = 1, ao_num - ! do i = 1, ao_num - ! do k = 1, ao_num - ! ! < 1:i, 2:j | 1:k, 2:l > - ! ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) - ! enddo - ! enddo - ! enddo - !enddo - !!$OMP END DO - !!$OMP END PARALLEL - !!call clear_ao_map() - !FREE ao_integrals_map - - if(tc_integ_type .eq. "numeric") then + if((tc_integ_type .eq. "numeric") .and. (.not. tc_save_mem)) then FREE int2_grad1_u12_ao_num int2_grad1_u12_square_ao_num endif @@ -315,7 +320,9 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n print*, ' Saving ao_two_e_tc_tot in ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot' open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="write") call ezfio_set_work_empty(.False.) - write(11) ao_two_e_tc_tot + do i = 1, ao_num + write(11) ao_two_e_tc_tot(:,:,:,i) + enddo close(11) call ezfio_set_tc_keywords_io_tc_integ('Read') endif diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index 24362796..70169189 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -286,10 +286,16 @@ doc: If |true|, memory scale of TC ao -> mo: O(N3) interface: ezfio,provider,ocaml default: False -[tc_save_mem] +[tc_save_mem_loops] type: logical doc: If |true|, use loops to save memory TC interface: ezfio,provider,ocaml default: False +[tc_save_mem] +type: logical +doc: If |true|, more calc but less mem +interface: ezfio,provider,ocaml +default: False + diff --git a/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f b/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f index 0b883865..8fd5e5b6 100644 --- a/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f +++ b/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f @@ -9,7 +9,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] double precision :: loc_1, loc_2, loc_3 double precision, allocatable :: Okappa(:), Jkappa(:,:) double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) - double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:), tmp_22(:,:,:) double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) PROVIDE mo_l_coef mo_r_coef @@ -63,17 +63,13 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] allocate(tmp_1(n_points_final_grid,4)) do ipoint = 1, n_points_final_grid - loc_1 = 2.d0 * Okappa(ipoint) - tmp_1(ipoint,1) = loc_1 * Jkappa(ipoint,1) tmp_1(ipoint,2) = loc_1 * Jkappa(ipoint,2) tmp_1(ipoint,3) = loc_1 * Jkappa(ipoint,3) - tmp_1(ipoint,4) = Okappa(ipoint) enddo - !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, loc_1, tmp_omp_d2) & @@ -112,58 +108,81 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] ! --- - allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) + if(tc_save_mem) then - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) - tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) - tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b, i) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - tmp_2(:,4,b,a) = 0.d0 - do i = 1, elec_beta_num + allocate(tmp_22(n_points_final_grid,4,mo_num)) + do a = 1, mo_num + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, a, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_22) + !$OMP DO + do b = 1, mo_num do ipoint = 1, n_points_final_grid - tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + tmp_22(ipoint,1,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_22(ipoint,2,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_22(ipoint,3,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + tmp_22(:,4,b) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_22(ipoint,4,b) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemv( 'T', 4*n_points_final_grid, mo_num, -2.d0 & + , tmp_22(1,1,1), size(tmp_22, 1) * size(tmp_22, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_uhf_mo_cs(1,a), 1) + enddo + deallocate(tmp_22) + + else + + allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + tmp_2(:,4,b,a) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL + call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_uhf_mo_cs(1,1), 1) + deallocate(tmp_2) - ! --- + endif - call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 & - , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & - , tmp_1(1,1), 1 & - , 0.d0, fock_3e_uhf_mo_cs(1,1), 1) - - deallocate(tmp_1, tmp_2) + deallocate(tmp_1) ! --- @@ -272,7 +291,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] ! --- !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti + !print *, ' total Wall time for fock_3e_uhf_mo_cs =', (tf - ti) / 60.d0 END_PROVIDER diff --git a/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f index 63a1e162..47ee5b48 100644 --- a/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -32,7 +32,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] endif !call wall_time(tf) - !print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti + !print *, ' Wall time for fock_3e_uhf_mo_a (min) =', (tf - ti)/60.d0 END_PROVIDER diff --git a/plugins/local/tc_scf/fock_tc.irp.f b/plugins/local/tc_scf/fock_tc.irp.f index 282f9873..d3ddb8ad 100644 --- a/plugins/local/tc_scf/fock_tc.irp.f +++ b/plugins/local/tc_scf/fock_tc.irp.f @@ -175,7 +175,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ] +BEGIN_PROVIDER [double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num)] BEGIN_DOC ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis diff --git a/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f b/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f new file mode 100644 index 00000000..7ce57578 --- /dev/null +++ b/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f @@ -0,0 +1,58 @@ +! --- + +program write_ao_2e_tc_integ + + implicit none + + PROVIDE j1e_type + PROVIDE j2e_type + + print *, ' j1e_type = ', j1e_type + print *, ' j2e_type = ', j2e_type + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + if(tc_integ_type .eq. "numeric") then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + endif + + call main() + +end + +! --- + +subroutine main() + + implicit none + + PROVIDE io_tc_integ + + print*, 'io_tc_integ = ', io_tc_integ + + if(io_tc_integ .ne. "Write") then + print*, 'io_tc_integ != Write' + print*, io_tc_integ + stop + endif + + PROVIDE ao_two_e_tc_tot + +end + +! --- + diff --git a/src/mol_properties/multi_s_dipole_moment.irp.f b/src/mol_properties/multi_s_dipole_moment.irp.f index f21e08cd..c7216a61 100644 --- a/src/mol_properties/multi_s_dipole_moment.irp.f +++ b/src/mol_properties/multi_s_dipole_moment.irp.f @@ -102,12 +102,28 @@ END_PROVIDER &BEGIN_PROVIDER [double precision, multi_s_z_dipole_moment_eigenval, (N_states)] implicit none + double precision, allocatable :: eigval(:), eigvec(:,:), A(:,:) PROVIDE multi_s_x_dipole_moment multi_s_y_dipole_moment multi_s_z_dipole_moment - call lapack_diag(multi_s_x_dipole_moment_eigenval(1), multi_s_x_dipole_moment_eigenvec(1,1), multi_s_x_dipole_moment(1,1), N_states, N_states) - call lapack_diag(multi_s_y_dipole_moment_eigenval(1), multi_s_y_dipole_moment_eigenvec(1,1), multi_s_y_dipole_moment(1,1), N_states, N_states) - call lapack_diag(multi_s_z_dipole_moment_eigenval(1), multi_s_z_dipole_moment_eigenvec(1,1), multi_s_z_dipole_moment(1,1), N_states, N_states) + allocate(A(N_states,N_states), eigvec(N_states,N_states), eigval(N_states)) + + A = multi_s_x_dipole_moment + call lapack_diag(eigval(1), eigvec(1,1), A(1,1), N_states, N_states) + multi_s_x_dipole_moment_eigenval = eigval + multi_s_x_dipole_moment_eigenvec = eigvec + + A = multi_s_y_dipole_moment + call lapack_diag(eigval(1), eigvec(1,1), A(1,1), N_states, N_states) + multi_s_y_dipole_moment_eigenval = eigval + multi_s_y_dipole_moment_eigenvec = eigvec + + A = multi_s_z_dipole_moment + call lapack_diag(eigval(1), eigvec(1,1), A(1,1), N_states, N_states) + multi_s_z_dipole_moment_eigenval = eigval + multi_s_z_dipole_moment_eigenvec = eigvec + + deallocate(A, eigvec, eigval) END_PROVIDER From e65d7913bfdf83159ffd50eb39c76e63dea221d5 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sun, 7 Apr 2024 00:43:32 +0200 Subject: [PATCH 011/131] saving lcpq --- bin/qp_convert_output_to_ezfio | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/qp_convert_output_to_ezfio b/bin/qp_convert_output_to_ezfio index 1b33f156..6f2d02d0 100755 --- a/bin/qp_convert_output_to_ezfio +++ b/bin/qp_convert_output_to_ezfio @@ -227,8 +227,8 @@ def write_ezfio(res, filename): shell_index += [nshell_tot] * len(b.prim) shell_num = len(ang_mom) - assert(shell_index[0] = 1) - assert(shell_index[-1] = shell_num) + assert(shell_index[0] == 1) + assert(shell_index[-1] == shell_num) # ~#~#~#~#~ # # W r i t e # From 2c899e6dd71247ae26cd337ede2bb13ce9419489 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 20 Apr 2024 12:39:39 +0200 Subject: [PATCH 012/131] few modif in grids --- .../extra_grid_vector.irp.f | 23 +++++++++++++++++-- .../grid_becke_vector.irp.f | 6 ++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/becke_numerical_grid/extra_grid_vector.irp.f b/src/becke_numerical_grid/extra_grid_vector.irp.f index ae167282..16a52dc6 100644 --- a/src/becke_numerical_grid/extra_grid_vector.irp.f +++ b/src/becke_numerical_grid/extra_grid_vector.irp.f @@ -47,8 +47,12 @@ END_PROVIDER END_DOC implicit none - integer :: i,j,k,l,i_count - double precision :: r(3) + integer :: i, j, k, l, i_count + double precision :: r(3) + double precision :: wall0, wall1 + + call wall_time(wall0) + print *, ' Providing extra_final_grid_points ...' i_count = 0 do j = 1, nucl_num @@ -66,10 +70,25 @@ END_PROVIDER index_final_points_extra(2,i_count) = i index_final_points_extra(3,i_count) = j index_final_points_extra_reverse(k,i,j) = i_count + + if(final_weight_at_r_vector_extra(i_count) .lt. 0.d0) then + print *, ' !!! WARNING !!!' + print *, ' negative weight !!!!' + print *, i_count, final_weight_at_r_vector_extra(i_count) + if(dabs(final_weight_at_r_vector_extra(i_count)) .lt. 1d-10) then + final_weight_at_r_vector_extra(i_count) = 0.d0 + else + stop + endif + endif enddo enddo enddo + call wall_time(wall1) + print *, ' wall time for extra_final_grid_points,', wall1 - wall0 + call print_memory_usage() + END_PROVIDER diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index 473096d0..c35918c3 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -72,7 +72,11 @@ END_PROVIDER print *, ' !!! WARNING !!!' print *, ' negative weight !!!!' print *, i_count, final_weight_at_r_vector(i_count) - stop + if(dabs(final_weight_at_r_vector(i_count)) .lt. 1d-10) then + final_weight_at_r_vector(i_count) = 0.d0 + else + stop + endif endif enddo enddo From e9dccd2364f282397df9f3b5bc4e3373fe3bd7e6 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 25 Apr 2024 19:46:26 +0200 Subject: [PATCH 013/131] added spherical harmonics --- plugins/local/spher_harm/.gitignore | 59 +++++ plugins/local/spher_harm/NEED | 1 + plugins/local/spher_harm/README.rst | 4 + plugins/local/spher_harm/assoc_gaus_pol.irp.f | 50 ++++ plugins/local/spher_harm/spher_harm.irp.f | 217 ++++++++++++++++++ .../local/spher_harm/spher_harm_func.irp.f | 151 ++++++++++++ 6 files changed, 482 insertions(+) create mode 100644 plugins/local/spher_harm/.gitignore create mode 100644 plugins/local/spher_harm/NEED create mode 100644 plugins/local/spher_harm/README.rst create mode 100644 plugins/local/spher_harm/assoc_gaus_pol.irp.f create mode 100644 plugins/local/spher_harm/spher_harm.irp.f create mode 100644 plugins/local/spher_harm/spher_harm_func.irp.f diff --git a/plugins/local/spher_harm/.gitignore b/plugins/local/spher_harm/.gitignore new file mode 100644 index 00000000..1561915b --- /dev/null +++ b/plugins/local/spher_harm/.gitignore @@ -0,0 +1,59 @@ +IRPF90_temp/ +IRPF90_man/ +build.ninja +irpf90.make +ezfio_interface.irp.f +irpf90_entities +tags +Makefile +ao_basis +ao_one_e_ints +ao_two_e_erf_ints +ao_two_e_ints +aux_quantities +becke_numerical_grid +bitmask +cis +cisd +cipsi +davidson +davidson_dressed +davidson_undressed +density_for_dft +determinants +dft_keywords +dft_utils_in_r +dft_utils_one_e +dft_utils_two_body +dressing +dummy +electrons +ezfio_files +fci +generators_cas +generators_full +hartree_fock +iterations +kohn_sham +kohn_sham_rs +mo_basis +mo_guess +mo_one_e_ints +mo_two_e_erf_ints +mo_two_e_ints +mpi +mrpt_utils +nuclei +perturbation +pseudo +psiref_cas +psiref_utils +scf_utils +selectors_cassd +selectors_full +selectors_utils +single_ref_method +slave +tools +utils +zmq diff --git a/plugins/local/spher_harm/NEED b/plugins/local/spher_harm/NEED new file mode 100644 index 00000000..92df7f12 --- /dev/null +++ b/plugins/local/spher_harm/NEED @@ -0,0 +1 @@ +dft_utils_in_r diff --git a/plugins/local/spher_harm/README.rst b/plugins/local/spher_harm/README.rst new file mode 100644 index 00000000..bf897f73 --- /dev/null +++ b/plugins/local/spher_harm/README.rst @@ -0,0 +1,4 @@ +========== +spher_harm +========== + diff --git a/plugins/local/spher_harm/assoc_gaus_pol.irp.f b/plugins/local/spher_harm/assoc_gaus_pol.irp.f new file mode 100644 index 00000000..fa790307 --- /dev/null +++ b/plugins/local/spher_harm/assoc_gaus_pol.irp.f @@ -0,0 +1,50 @@ +double precision function plgndr(l,m,x) + integer, intent(in) :: l,m + double precision, intent(in) :: x + BEGIN_DOC + ! associated Legenre polynom P_l,m(x). Used for the Y_lm(theta,phi) + ! Taken from https://iate.oac.uncor.edu/~mario/materia/nr/numrec/f6-8.pdf + END_DOC + integer :: i,ll + double precision :: fact,pll,pmm,pmmp1,somx2 + if(m.lt.0.or.m.gt.l.or.dabs(x).gt.1.d0)then + print*,'bad arguments in plgndr' + pause + endif + pmm=1.d0 + if(m.gt.0) then + somx2=dsqrt((1.d0-x)*(1.d0+x)) + fact=1.d0 + do i=1,m + pmm=-pmm*fact*somx2 + fact=fact+2.d0 + enddo + endif ! m > 0 + if(l.eq.m) then + plgndr=pmm + else + pmmp1=x*(2*m+1)*pmm ! Compute P_m+1^m + if(l.eq.m+1) then + plgndr=pmmp1 + else ! Compute P_l^m, l> m+1 + do ll=m+2,l + pll=(x*dble(2*ll-1)*pmmp1-dble(ll+m-1)*pmm)/(ll-m) + pmm=pmmp1 + pmmp1=pll + enddo + plgndr=pll + endif ! l.eq.m+1 + endif ! l.eq.m + return +end + +double precision function ortho_assoc_gaus_pol(l1,m1,l2) + implicit none + integer, intent(in) :: l1,m1,l2 + double precision :: fact + if(l1.ne.l2)then + ortho_assoc_gaus_pol= 0.d0 + else + ortho_assoc_gaus_pol = 2.d0*fact(l1+m1) / (dble(2*l1+1)*fact(l1-m1)) + endif +end diff --git a/plugins/local/spher_harm/spher_harm.irp.f b/plugins/local/spher_harm/spher_harm.irp.f new file mode 100644 index 00000000..40661db1 --- /dev/null +++ b/plugins/local/spher_harm/spher_harm.irp.f @@ -0,0 +1,217 @@ +program spher_harm + implicit none + call test_spher_harm +! call test_cart +! call test_brutal_spheric +end + +subroutine test_cart + implicit none + include 'constants.include.F' + double precision :: r(3),theta,phi,r_abs + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi +end + +subroutine test_spher_harm + implicit none + include 'constants.include.F' + integer :: l1,m1,i,l2,m2,lmax + double precision :: r(3),weight,accu_re, accu_im,accu + double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 + l1 = 0 + m1 = 0 + l2 = 0 + m2 = 0 + lmax = 5 + do l1 = 0,lmax + do m1 = -l1 ,l1 + do l2 = 0,lmax + do m2 = -l2 ,l2 + accu_re = 0.d0 + accu_im = 0.d0 + ! = \int dOmega Y_l1,m1^* Y_l2,m2 + ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) + ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu = 0.d0 + do i = 1, n_points_integration_angular + double precision :: theta,phi,r_abs + r(1:3) = angular_quadrature_points(i,1:3) + weight = weights_angular_points(i) + call cartesian_to_spherical(r,theta,phi,r_abs) + if(theta.gt.pi.or.theta.lt.0.d0)then + print*,'pb with theta',theta + print*,r + endif + if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then + print*,'pb with phi',phi/pi + print*,r + endif + call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) + call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) +! call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) +! call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) +! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) +! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) +! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) +! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + write(33,'(10(F16.10,X))')phi/pi + enddo + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + enddo + enddo + enddo + enddo + double precision :: x,dx,xmax,xmin + integer:: nx + nx = 10000 + xmin = -5.d0 + xmax = 5.d0 + dx = (xmax - xmin)/dble(nx) + x = xmin + do i = 1, nx + write(34,*)x,datan(x),dacos(x) + x += dx + enddo +end + +subroutine test_brutal_spheric + implicit none + include 'constants.include.F' + integer :: itheta, iphi,ntheta,nphi + double precision :: theta_min, theta_max, dtheta,theta + double precision :: phi_min, phi_max, dphi,phi + double precision :: accu_re, accu_im,weight + double precision :: re_ylm_1, im_ylm_1 ,re_ylm_2, im_ylm_2,accu + integer :: l1,m1,i,l2,m2,lmax + phi_min = 0.d0 + phi_max = 2.D0 * pi + theta_min = 0.d0 + theta_max = 1.D0 * pi + ntheta = 1000 + nphi = 1000 + dphi = (phi_max - phi_min)/dble(nphi) + dtheta = (theta_max - theta_min)/dble(ntheta) + + lmax = 3 + do l1 = 0,lmax + do m1 = 0 ,l1 + do l2 = 0,lmax + do m2 = 0 ,l2 + accu_re = 0.d0 + accu_im = 0.d0 + accu = 0.d0 + theta = theta_min + do itheta = 1, ntheta + phi = phi_min + do iphi = 1, nphi +! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) +! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) + call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + weight = dtheta * dphi * dsin(theta) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + phi += dphi + enddo + theta += dtheta + enddo + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + print*,'accu = ',accu + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + endif + endif + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + endif + endif + enddo + enddo + enddo + enddo + + +end + +subroutine test_assoc_leg_pol + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + print *, 'Hello world' + integer :: l1,m1,ngrid,i,l2,m2 + l1 = 0 + m1 = 0 + l2 = 2 + m2 = 0 + double precision :: x, dx,xmax,accu,xmin + double precision :: plgndr,func_1,func_2,ortho_assoc_gaus_pol + ngrid = 100000 + xmax = 1.d0 + xmin = -1.d0 + dx = (xmax-xmin)/dble(ngrid) + do l2 = 0,10 + x = xmin + accu = 0.d0 + do i = 1, ngrid + func_1 = plgndr(l1,m1,x) + func_2 = plgndr(l2,m2,x) + write(33,*)x, func_1,func_2 + accu += func_1 * func_2 * dx + x += dx + enddo + print*,'l2 = ',l2 + print*,'accu = ',accu + print*,ortho_assoc_gaus_pol(l1,m1,l2) + enddo +end diff --git a/plugins/local/spher_harm/spher_harm_func.irp.f b/plugins/local/spher_harm/spher_harm_func.irp.f new file mode 100644 index 00000000..825bd8ac --- /dev/null +++ b/plugins/local/spher_harm/spher_harm_func.irp.f @@ -0,0 +1,151 @@ +subroutine spher_harm_func_r3(r,l,m,re_ylm, im_ylm) + implicit none + integer, intent(in) :: l,m + double precision, intent(in) :: r(3) + double precision, intent(out) :: re_ylm, im_ylm + + double precision :: theta, phi,r_abs + call cartesian_to_spherical(r,theta,phi,r_abs) + call spher_harm_func(l,m,theta,phi,re_ylm, im_ylm) +end + + +subroutine spher_harm_func_m_pos(l,m,theta,phi,re_ylm, im_ylm) + include 'constants.include.F' + implicit none + BEGIN_DOC +! Y_lm(theta,phi) with m >0 +! + END_DOC + double precision, intent(in) :: theta, phi + integer, intent(in) :: l,m + double precision, intent(out):: re_ylm,im_ylm + double precision :: prefact,fact,cos_theta,plgndr,p_lm + double precision :: tmp + prefact = dble(2*l+1)*fact(l-m)/(dfour_pi * fact(l+m)) + prefact = dsqrt(prefact) + cos_theta = dcos(theta) + p_lm = plgndr(l,m,cos_theta) + tmp = prefact * p_lm + re_ylm = dcos(dble(m)*phi) * tmp + im_ylm = dsin(dble(m)*phi) * tmp +end + +subroutine spher_harm_func(l,m,theta,phi,re_ylm, im_ylm) + implicit none + BEGIN_DOC + ! Y_lm(theta,phi) with -l l in spher_harm_func !! stopping ...' + stop + endif + if(m.ge.0)then + call spher_harm_func_m_pos(l,m,theta,phi,re_ylm_pos, im_ylm_pos) + re_ylm = re_ylm_pos + im_ylm = im_ylm_pos + else + minus_m = -m !> 0 + call spher_harm_func_m_pos(l,minus_m,theta,phi,re_ylm_pos, im_ylm_pos) + tmp = (-1)**minus_m + re_ylm = tmp * re_ylm_pos + im_ylm = -tmp * im_ylm_pos ! complex conjugate + endif +end + +subroutine cartesian_to_spherical(r,theta,phi,r_abs) + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out):: theta, phi,r_abs + double precision :: r_2,x_2_y_2,tmp + include 'constants.include.F' + x_2_y_2 = r(1)*r(1) + r(2)*r(2) + r_2 = x_2_y_2 + r(3)*r(3) + r_abs = dsqrt(r_2) + + if(r_abs.gt.1.d-20)then + theta = dacos(r(3)/r_abs) + else + theta = 0.d0 + endif + + if(.true.)then + if(dabs(r(1)).gt.0.d0)then + tmp = datan(r(2)/r(1)) +! phi = datan2(r(2),r(1)) + endif + ! From Wikipedia on Spherical Harmonics + if(r(1).gt.0.d0)then + phi = tmp + else if(r(1).lt.0.d0.and.r(2).ge.0.d0)then + phi = tmp + pi + else if(r(1).lt.0.d0.and.r(2).lt.0.d0)then + phi = tmp - pi + else if(r(1)==0.d0.and.r(2).gt.0.d0)then + phi = 0.5d0*pi + else if(r(1)==0.d0.and.r(2).lt.0.d0)then + phi =-0.5d0*pi + else if(r(1)==0.d0.and.r(2)==0.d0)then + phi = 0.d0 + endif + if(r(2).lt.0.d0.and.r(1).le.0.d0)then + tmp = pi - dabs(phi) + phi = pi + tmp + else if(r(2).lt.0.d0.and.r(1).gt.0.d0)then + phi = dtwo_pi + phi + endif + endif + + if(.false.)then + x_2_y_2 = dsqrt(x_2_y_2) + if(dabs(x_2_y_2).gt.1.d-20.and.dabs(r(2)).gt.1.d-20)then + phi = dabs(r(2))/r(2) * dacos(r(1)/x_2_y_2) + else + phi = 0.d0 + endif + endif +end + + +subroutine spher_harm_func_expl(l,m,theta,phi,re_ylm, im_ylm) + implicit none + BEGIN_DOC + ! Y_lm(theta,phi) with -l Date: Wed, 24 Apr 2024 14:48:23 +0200 Subject: [PATCH 014/131] Begining to make some cleaning in TC --- .../local/bi_ort_ints/total_twoe_pot.irp.f | 22 +++++++++++++++++++ plugins/local/mo_localization/README.md | 2 +- .../normal_ordered.irp.f | 0 .../normal_ordered_contractions.irp.f | 0 .../normal_ordered_old.irp.f | 0 .../normal_ordered_v0.irp.f | 0 .../h_biortho.irp.f | 0 .../h_mat_triple.irp.f | 0 .../h_tc_bi_ortho_psi.irp.f | 0 .../h_tc_s2_u0.irp.f | 0 .../slater_tc_3e_slow.irp.f | 0 .../slater_tc_opt.irp.f | 0 .../slater_tc_opt_diag.irp.f | 0 .../slater_tc_opt_double.irp.f | 0 .../slater_tc_opt_single.irp.f | 0 .../slater_tc_slow.irp.f | 0 .../{tc_bi_ortho => slater_tc}/tc_hmat.irp.f | 0 plugins/local/tc_bi_ortho/NEED | 6 +---- 18 files changed, 24 insertions(+), 6 deletions(-) rename plugins/local/{tc_bi_ortho => normal_order_old}/normal_ordered.irp.f (100%) rename plugins/local/{tc_bi_ortho => normal_order_old}/normal_ordered_contractions.irp.f (100%) rename plugins/local/{tc_bi_ortho => normal_order_old}/normal_ordered_old.irp.f (100%) rename plugins/local/{tc_bi_ortho => normal_order_old}/normal_ordered_v0.irp.f (100%) rename plugins/local/{tc_bi_ortho => slater_tc}/h_biortho.irp.f (100%) rename plugins/local/{tc_bi_ortho => slater_tc}/h_mat_triple.irp.f (100%) rename plugins/local/{tc_bi_ortho => slater_tc}/h_tc_bi_ortho_psi.irp.f (100%) rename plugins/local/{tc_bi_ortho => slater_tc}/h_tc_s2_u0.irp.f (100%) rename plugins/local/{tc_bi_ortho => slater_tc}/slater_tc_3e_slow.irp.f (100%) rename plugins/local/{tc_bi_ortho => slater_tc}/slater_tc_opt.irp.f (100%) rename plugins/local/{tc_bi_ortho => slater_tc}/slater_tc_opt_diag.irp.f (100%) rename plugins/local/{tc_bi_ortho => slater_tc}/slater_tc_opt_double.irp.f (100%) rename plugins/local/{tc_bi_ortho => slater_tc}/slater_tc_opt_single.irp.f (100%) rename plugins/local/{tc_bi_ortho => slater_tc}/slater_tc_slow.irp.f (100%) rename plugins/local/{tc_bi_ortho => slater_tc}/tc_hmat.irp.f (100%) diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f index 5e6a24e9..42a7ba62 100644 --- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f +++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f @@ -176,6 +176,28 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e, (mo_num, mo_num, mo_num, END_PROVIDER +BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_transp, (mo_num, mo_num, mo_num, mo_num)] + implicit none + BEGIN_DOC + ! + ! mo_bi_ortho_tc_two_e_transp(i,j,k,l) = = transpose of mo_bi_ortho_tc_two_e + ! + ! the potential V(r_12) contains ALL TWO-E CONTRIBUTION OF THE TC-HAMILTONIAN + ! + END_DOC + + integer :: i,j,k,l + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + mo_bi_ortho_tc_two_e_transp(i,j,k,l) = mo_bi_ortho_tc_two_e_transp(k,l,i,j) + enddo + enddo + enddo + enddo + +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj, (mo_num,mo_num)] diff --git a/plugins/local/mo_localization/README.md b/plugins/local/mo_localization/README.md index c28a5ee1..512e36af 100644 --- a/plugins/local/mo_localization/README.md +++ b/plugins/local/mo_localization/README.md @@ -3,7 +3,7 @@ To localize the MOs: ``` qp run localization ``` -By default, the different otbital classes are automatically set by splitting +By default, the different orbital classes are automatically set by splitting the orbitales in the following classes: - Core -> Core - Active, doubly occupied -> Inactive diff --git a/plugins/local/tc_bi_ortho/normal_ordered.irp.f b/plugins/local/normal_order_old/normal_ordered.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/normal_ordered.irp.f rename to plugins/local/normal_order_old/normal_ordered.irp.f diff --git a/plugins/local/tc_bi_ortho/normal_ordered_contractions.irp.f b/plugins/local/normal_order_old/normal_ordered_contractions.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/normal_ordered_contractions.irp.f rename to plugins/local/normal_order_old/normal_ordered_contractions.irp.f diff --git a/plugins/local/tc_bi_ortho/normal_ordered_old.irp.f b/plugins/local/normal_order_old/normal_ordered_old.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/normal_ordered_old.irp.f rename to plugins/local/normal_order_old/normal_ordered_old.irp.f diff --git a/plugins/local/tc_bi_ortho/normal_ordered_v0.irp.f b/plugins/local/normal_order_old/normal_ordered_v0.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/normal_ordered_v0.irp.f rename to plugins/local/normal_order_old/normal_ordered_v0.irp.f diff --git a/plugins/local/tc_bi_ortho/h_biortho.irp.f b/plugins/local/slater_tc/h_biortho.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/h_biortho.irp.f rename to plugins/local/slater_tc/h_biortho.irp.f diff --git a/plugins/local/tc_bi_ortho/h_mat_triple.irp.f b/plugins/local/slater_tc/h_mat_triple.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/h_mat_triple.irp.f rename to plugins/local/slater_tc/h_mat_triple.irp.f diff --git a/plugins/local/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/plugins/local/slater_tc/h_tc_bi_ortho_psi.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f rename to plugins/local/slater_tc/h_tc_bi_ortho_psi.irp.f diff --git a/plugins/local/tc_bi_ortho/h_tc_s2_u0.irp.f b/plugins/local/slater_tc/h_tc_s2_u0.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/h_tc_s2_u0.irp.f rename to plugins/local/slater_tc/h_tc_s2_u0.irp.f diff --git a/plugins/local/tc_bi_ortho/slater_tc_3e_slow.irp.f b/plugins/local/slater_tc/slater_tc_3e_slow.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/slater_tc_3e_slow.irp.f rename to plugins/local/slater_tc/slater_tc_3e_slow.irp.f diff --git a/plugins/local/tc_bi_ortho/slater_tc_opt.irp.f b/plugins/local/slater_tc/slater_tc_opt.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/slater_tc_opt.irp.f rename to plugins/local/slater_tc/slater_tc_opt.irp.f diff --git a/plugins/local/tc_bi_ortho/slater_tc_opt_diag.irp.f b/plugins/local/slater_tc/slater_tc_opt_diag.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/slater_tc_opt_diag.irp.f rename to plugins/local/slater_tc/slater_tc_opt_diag.irp.f diff --git a/plugins/local/tc_bi_ortho/slater_tc_opt_double.irp.f b/plugins/local/slater_tc/slater_tc_opt_double.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/slater_tc_opt_double.irp.f rename to plugins/local/slater_tc/slater_tc_opt_double.irp.f diff --git a/plugins/local/tc_bi_ortho/slater_tc_opt_single.irp.f b/plugins/local/slater_tc/slater_tc_opt_single.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/slater_tc_opt_single.irp.f rename to plugins/local/slater_tc/slater_tc_opt_single.irp.f diff --git a/plugins/local/tc_bi_ortho/slater_tc_slow.irp.f b/plugins/local/slater_tc/slater_tc_slow.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/slater_tc_slow.irp.f rename to plugins/local/slater_tc/slater_tc_slow.irp.f diff --git a/plugins/local/tc_bi_ortho/tc_hmat.irp.f b/plugins/local/slater_tc/tc_hmat.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/tc_hmat.irp.f rename to plugins/local/slater_tc/tc_hmat.irp.f diff --git a/plugins/local/tc_bi_ortho/NEED b/plugins/local/tc_bi_ortho/NEED index 9a0c20ef..01841e02 100644 --- a/plugins/local/tc_bi_ortho/NEED +++ b/plugins/local/tc_bi_ortho/NEED @@ -1,6 +1,2 @@ -bi_ort_ints -bi_ortho_mos -tc_keywords -non_hermit_dav -dav_general_mat tc_scf +slater_tc From 05f35ab601a1f8ee17a3b55136bec92aefc96176 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 25 Apr 2024 20:00:42 +0200 Subject: [PATCH 015/131] Added properly the routines for the test of the Spherical Harmonics --- plugins/local/spher_harm/routines_test.irp.f | 227 +++++++++++++++++++ plugins/local/spher_harm/spher_harm.irp.f | 210 ----------------- 2 files changed, 227 insertions(+), 210 deletions(-) create mode 100644 plugins/local/spher_harm/routines_test.irp.f diff --git a/plugins/local/spher_harm/routines_test.irp.f b/plugins/local/spher_harm/routines_test.irp.f new file mode 100644 index 00000000..6f7cbc1c --- /dev/null +++ b/plugins/local/spher_harm/routines_test.irp.f @@ -0,0 +1,227 @@ + +subroutine test_cart + implicit none + BEGIN_DOC + ! test for the cartesian --> spherical change of coordinates + ! + ! simple test such that the polar angle theta ranges in [0,pi] + ! + ! and the asymuthal angle phi ranges in [0,2pi] + END_DOC + include 'constants.include.F' + double precision :: r(3),theta,phi,r_abs + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi +end + +subroutine test_spher_harm + implicit none + BEGIN_DOC + ! routine to test the spherical harmonics integration on a sphere with the grid. + ! + ! We test = delta_m1,m2 delta_l1,l2 + END_DOC + include 'constants.include.F' + integer :: l1,m1,i,l2,m2,lmax + double precision :: r(3),weight,accu_re, accu_im,accu + double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 + l1 = 0 + m1 = 0 + l2 = 0 + m2 = 0 + lmax = 5 + do l1 = 0,lmax + do m1 = -l1 ,l1 + do l2 = 0,lmax + do m2 = -l2 ,l2 + accu_re = 0.d0 + accu_im = 0.d0 + ! = \int dOmega Y_l1,m1^* Y_l2,m2 + ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) + ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu = 0.d0 + do i = 1, n_points_integration_angular + double precision :: theta,phi,r_abs + r(1:3) = angular_quadrature_points(i,1:3) + weight = weights_angular_points(i) + call cartesian_to_spherical(r,theta,phi,r_abs) + if(theta.gt.pi.or.theta.lt.0.d0)then + print*,'pb with theta',theta + print*,r + endif + if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then + print*,'pb with phi',phi/pi + print*,r + endif + call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) + call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + write(33,'(10(F16.10,X))')phi/pi + enddo + ! Test for the delta l1,l2 and delta m1,m2 + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + enddo + enddo + enddo + enddo + double precision :: x,dx,xmax,xmin + integer:: nx + nx = 10000 + xmin = -5.d0 + xmax = 5.d0 + dx = (xmax - xmin)/dble(nx) + x = xmin + do i = 1, nx + write(34,*)x,datan(x),dacos(x) + x += dx + enddo +end + +subroutine test_brutal_spheric + implicit none + include 'constants.include.F' + BEGIN_DOC + ! test for the = delta_m1,m2 delta_l1,l2 using a two dimentional integration + ! + ! \int_0^2pi d Phi \int_-1^+1 d(cos(Theta)) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) + ! + != \int_0^2pi d Phi \int_0^pi dTheta sin(Theta) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) + ! + ! Allows to test for the general functions spher_harm_func_m_pos with spher_harm_func_expl + END_DOC + integer :: itheta, iphi,ntheta,nphi + double precision :: theta_min, theta_max, dtheta,theta + double precision :: phi_min, phi_max, dphi,phi + double precision :: accu_re, accu_im,weight + double precision :: re_ylm_1, im_ylm_1 ,re_ylm_2, im_ylm_2,accu + integer :: l1,m1,i,l2,m2,lmax + phi_min = 0.d0 + phi_max = 2.D0 * pi + theta_min = 0.d0 + theta_max = 1.D0 * pi + ntheta = 1000 + nphi = 1000 + dphi = (phi_max - phi_min)/dble(nphi) + dtheta = (theta_max - theta_min)/dble(ntheta) + + lmax = 3 + do l1 = 0,lmax + do m1 = 0 ,l1 + do l2 = 0,lmax + do m2 = 0 ,l2 + accu_re = 0.d0 + accu_im = 0.d0 + accu = 0.d0 + theta = theta_min + do itheta = 1, ntheta + phi = phi_min + do iphi = 1, nphi +! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) +! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) + call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + weight = dtheta * dphi * dsin(theta) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + phi += dphi + enddo + theta += dtheta + enddo + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + print*,'accu = ',accu + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + endif + endif + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + endif + endif + enddo + enddo + enddo + enddo + + +end + +subroutine test_assoc_leg_pol + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + print *, 'Hello world' + integer :: l1,m1,ngrid,i,l2,m2 + l1 = 0 + m1 = 0 + l2 = 2 + m2 = 0 + double precision :: x, dx,xmax,accu,xmin + double precision :: plgndr,func_1,func_2,ortho_assoc_gaus_pol + ngrid = 100000 + xmax = 1.d0 + xmin = -1.d0 + dx = (xmax-xmin)/dble(ngrid) + do l2 = 0,10 + x = xmin + accu = 0.d0 + do i = 1, ngrid + func_1 = plgndr(l1,m1,x) + func_2 = plgndr(l2,m2,x) + write(33,*)x, func_1,func_2 + accu += func_1 * func_2 * dx + x += dx + enddo + print*,'l2 = ',l2 + print*,'accu = ',accu + print*,ortho_assoc_gaus_pol(l1,m1,l2) + enddo +end diff --git a/plugins/local/spher_harm/spher_harm.irp.f b/plugins/local/spher_harm/spher_harm.irp.f index 40661db1..e8deafb9 100644 --- a/plugins/local/spher_harm/spher_harm.irp.f +++ b/plugins/local/spher_harm/spher_harm.irp.f @@ -5,213 +5,3 @@ program spher_harm ! call test_brutal_spheric end -subroutine test_cart - implicit none - include 'constants.include.F' - double precision :: r(3),theta,phi,r_abs - print*,'' - r = 0.d0 - r(1) = 1.d0 - r(2) = 1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi - print*,'' - r = 0.d0 - r(1) =-1.d0 - r(2) = 1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi - print*,'' - r = 0.d0 - r(1) =-1.d0 - r(2) =-1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi - print*,'' - r = 0.d0 - r(1) = 1.d0 - r(2) =-1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi -end - -subroutine test_spher_harm - implicit none - include 'constants.include.F' - integer :: l1,m1,i,l2,m2,lmax - double precision :: r(3),weight,accu_re, accu_im,accu - double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 - l1 = 0 - m1 = 0 - l2 = 0 - m2 = 0 - lmax = 5 - do l1 = 0,lmax - do m1 = -l1 ,l1 - do l2 = 0,lmax - do m2 = -l2 ,l2 - accu_re = 0.d0 - accu_im = 0.d0 - ! = \int dOmega Y_l1,m1^* Y_l2,m2 - ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) - ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu = 0.d0 - do i = 1, n_points_integration_angular - double precision :: theta,phi,r_abs - r(1:3) = angular_quadrature_points(i,1:3) - weight = weights_angular_points(i) - call cartesian_to_spherical(r,theta,phi,r_abs) - if(theta.gt.pi.or.theta.lt.0.d0)then - print*,'pb with theta',theta - print*,r - endif - if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then - print*,'pb with phi',phi/pi - print*,r - endif - call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) - call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) -! call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) -! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) -! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) - accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) - accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu += weight - write(33,'(10(F16.10,X))')phi/pi - enddo - if(l1.ne.l2.or.m1.ne.m2)then - if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then - print*,'pb OFF DIAG !!!!! ' - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - endif - endif - if(l1==l2.and.m1==m2)then - if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then - print*,'pb DIAG !!!!! ' - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - endif - endif - enddo - enddo - enddo - enddo - double precision :: x,dx,xmax,xmin - integer:: nx - nx = 10000 - xmin = -5.d0 - xmax = 5.d0 - dx = (xmax - xmin)/dble(nx) - x = xmin - do i = 1, nx - write(34,*)x,datan(x),dacos(x) - x += dx - enddo -end - -subroutine test_brutal_spheric - implicit none - include 'constants.include.F' - integer :: itheta, iphi,ntheta,nphi - double precision :: theta_min, theta_max, dtheta,theta - double precision :: phi_min, phi_max, dphi,phi - double precision :: accu_re, accu_im,weight - double precision :: re_ylm_1, im_ylm_1 ,re_ylm_2, im_ylm_2,accu - integer :: l1,m1,i,l2,m2,lmax - phi_min = 0.d0 - phi_max = 2.D0 * pi - theta_min = 0.d0 - theta_max = 1.D0 * pi - ntheta = 1000 - nphi = 1000 - dphi = (phi_max - phi_min)/dble(nphi) - dtheta = (theta_max - theta_min)/dble(ntheta) - - lmax = 3 - do l1 = 0,lmax - do m1 = 0 ,l1 - do l2 = 0,lmax - do m2 = 0 ,l2 - accu_re = 0.d0 - accu_im = 0.d0 - accu = 0.d0 - theta = theta_min - do itheta = 1, ntheta - phi = phi_min - do iphi = 1, nphi -! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) - call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) - call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) - weight = dtheta * dphi * dsin(theta) - accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) - accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu += weight - phi += dphi - enddo - theta += dtheta - enddo - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - print*,'accu = ',accu - if(l1.ne.l2.or.m1.ne.m2)then - if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then - print*,'pb OFF DIAG !!!!! ' - endif - endif - if(l1==l2.and.m1==m2)then - if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then - print*,'pb DIAG !!!!! ' - endif - endif - enddo - enddo - enddo - enddo - - -end - -subroutine test_assoc_leg_pol - implicit none - BEGIN_DOC -! TODO : Put the documentation of the program here - END_DOC - print *, 'Hello world' - integer :: l1,m1,ngrid,i,l2,m2 - l1 = 0 - m1 = 0 - l2 = 2 - m2 = 0 - double precision :: x, dx,xmax,accu,xmin - double precision :: plgndr,func_1,func_2,ortho_assoc_gaus_pol - ngrid = 100000 - xmax = 1.d0 - xmin = -1.d0 - dx = (xmax-xmin)/dble(ngrid) - do l2 = 0,10 - x = xmin - accu = 0.d0 - do i = 1, ngrid - func_1 = plgndr(l1,m1,x) - func_2 = plgndr(l2,m2,x) - write(33,*)x, func_1,func_2 - accu += func_1 * func_2 * dx - x += dx - enddo - print*,'l2 = ',l2 - print*,'accu = ',accu - print*,ortho_assoc_gaus_pol(l1,m1,l2) - enddo -end From c3483df9a16003065a41bfa92d37274a3eb466ee Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 25 Apr 2024 20:00:42 +0200 Subject: [PATCH 016/131] Added properly the routines for the test of the Spherical Harmonics --- plugins/local/spher_harm/routines_test.irp.f | 227 +++++++++++++++++++ plugins/local/spher_harm/spher_harm.irp.f | 210 ----------------- 2 files changed, 227 insertions(+), 210 deletions(-) create mode 100644 plugins/local/spher_harm/routines_test.irp.f diff --git a/plugins/local/spher_harm/routines_test.irp.f b/plugins/local/spher_harm/routines_test.irp.f new file mode 100644 index 00000000..6f7cbc1c --- /dev/null +++ b/plugins/local/spher_harm/routines_test.irp.f @@ -0,0 +1,227 @@ + +subroutine test_cart + implicit none + BEGIN_DOC + ! test for the cartesian --> spherical change of coordinates + ! + ! simple test such that the polar angle theta ranges in [0,pi] + ! + ! and the asymuthal angle phi ranges in [0,2pi] + END_DOC + include 'constants.include.F' + double precision :: r(3),theta,phi,r_abs + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi +end + +subroutine test_spher_harm + implicit none + BEGIN_DOC + ! routine to test the spherical harmonics integration on a sphere with the grid. + ! + ! We test = delta_m1,m2 delta_l1,l2 + END_DOC + include 'constants.include.F' + integer :: l1,m1,i,l2,m2,lmax + double precision :: r(3),weight,accu_re, accu_im,accu + double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 + l1 = 0 + m1 = 0 + l2 = 0 + m2 = 0 + lmax = 5 + do l1 = 0,lmax + do m1 = -l1 ,l1 + do l2 = 0,lmax + do m2 = -l2 ,l2 + accu_re = 0.d0 + accu_im = 0.d0 + ! = \int dOmega Y_l1,m1^* Y_l2,m2 + ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) + ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu = 0.d0 + do i = 1, n_points_integration_angular + double precision :: theta,phi,r_abs + r(1:3) = angular_quadrature_points(i,1:3) + weight = weights_angular_points(i) + call cartesian_to_spherical(r,theta,phi,r_abs) + if(theta.gt.pi.or.theta.lt.0.d0)then + print*,'pb with theta',theta + print*,r + endif + if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then + print*,'pb with phi',phi/pi + print*,r + endif + call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) + call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + write(33,'(10(F16.10,X))')phi/pi + enddo + ! Test for the delta l1,l2 and delta m1,m2 + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + enddo + enddo + enddo + enddo + double precision :: x,dx,xmax,xmin + integer:: nx + nx = 10000 + xmin = -5.d0 + xmax = 5.d0 + dx = (xmax - xmin)/dble(nx) + x = xmin + do i = 1, nx + write(34,*)x,datan(x),dacos(x) + x += dx + enddo +end + +subroutine test_brutal_spheric + implicit none + include 'constants.include.F' + BEGIN_DOC + ! test for the = delta_m1,m2 delta_l1,l2 using a two dimentional integration + ! + ! \int_0^2pi d Phi \int_-1^+1 d(cos(Theta)) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) + ! + != \int_0^2pi d Phi \int_0^pi dTheta sin(Theta) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) + ! + ! Allows to test for the general functions spher_harm_func_m_pos with spher_harm_func_expl + END_DOC + integer :: itheta, iphi,ntheta,nphi + double precision :: theta_min, theta_max, dtheta,theta + double precision :: phi_min, phi_max, dphi,phi + double precision :: accu_re, accu_im,weight + double precision :: re_ylm_1, im_ylm_1 ,re_ylm_2, im_ylm_2,accu + integer :: l1,m1,i,l2,m2,lmax + phi_min = 0.d0 + phi_max = 2.D0 * pi + theta_min = 0.d0 + theta_max = 1.D0 * pi + ntheta = 1000 + nphi = 1000 + dphi = (phi_max - phi_min)/dble(nphi) + dtheta = (theta_max - theta_min)/dble(ntheta) + + lmax = 3 + do l1 = 0,lmax + do m1 = 0 ,l1 + do l2 = 0,lmax + do m2 = 0 ,l2 + accu_re = 0.d0 + accu_im = 0.d0 + accu = 0.d0 + theta = theta_min + do itheta = 1, ntheta + phi = phi_min + do iphi = 1, nphi +! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) +! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) + call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + weight = dtheta * dphi * dsin(theta) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + phi += dphi + enddo + theta += dtheta + enddo + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + print*,'accu = ',accu + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + endif + endif + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + endif + endif + enddo + enddo + enddo + enddo + + +end + +subroutine test_assoc_leg_pol + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + print *, 'Hello world' + integer :: l1,m1,ngrid,i,l2,m2 + l1 = 0 + m1 = 0 + l2 = 2 + m2 = 0 + double precision :: x, dx,xmax,accu,xmin + double precision :: plgndr,func_1,func_2,ortho_assoc_gaus_pol + ngrid = 100000 + xmax = 1.d0 + xmin = -1.d0 + dx = (xmax-xmin)/dble(ngrid) + do l2 = 0,10 + x = xmin + accu = 0.d0 + do i = 1, ngrid + func_1 = plgndr(l1,m1,x) + func_2 = plgndr(l2,m2,x) + write(33,*)x, func_1,func_2 + accu += func_1 * func_2 * dx + x += dx + enddo + print*,'l2 = ',l2 + print*,'accu = ',accu + print*,ortho_assoc_gaus_pol(l1,m1,l2) + enddo +end diff --git a/plugins/local/spher_harm/spher_harm.irp.f b/plugins/local/spher_harm/spher_harm.irp.f index 40661db1..e8deafb9 100644 --- a/plugins/local/spher_harm/spher_harm.irp.f +++ b/plugins/local/spher_harm/spher_harm.irp.f @@ -5,213 +5,3 @@ program spher_harm ! call test_brutal_spheric end -subroutine test_cart - implicit none - include 'constants.include.F' - double precision :: r(3),theta,phi,r_abs - print*,'' - r = 0.d0 - r(1) = 1.d0 - r(2) = 1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi - print*,'' - r = 0.d0 - r(1) =-1.d0 - r(2) = 1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi - print*,'' - r = 0.d0 - r(1) =-1.d0 - r(2) =-1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi - print*,'' - r = 0.d0 - r(1) = 1.d0 - r(2) =-1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi -end - -subroutine test_spher_harm - implicit none - include 'constants.include.F' - integer :: l1,m1,i,l2,m2,lmax - double precision :: r(3),weight,accu_re, accu_im,accu - double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 - l1 = 0 - m1 = 0 - l2 = 0 - m2 = 0 - lmax = 5 - do l1 = 0,lmax - do m1 = -l1 ,l1 - do l2 = 0,lmax - do m2 = -l2 ,l2 - accu_re = 0.d0 - accu_im = 0.d0 - ! = \int dOmega Y_l1,m1^* Y_l2,m2 - ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) - ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu = 0.d0 - do i = 1, n_points_integration_angular - double precision :: theta,phi,r_abs - r(1:3) = angular_quadrature_points(i,1:3) - weight = weights_angular_points(i) - call cartesian_to_spherical(r,theta,phi,r_abs) - if(theta.gt.pi.or.theta.lt.0.d0)then - print*,'pb with theta',theta - print*,r - endif - if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then - print*,'pb with phi',phi/pi - print*,r - endif - call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) - call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) -! call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) -! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) -! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) - accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) - accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu += weight - write(33,'(10(F16.10,X))')phi/pi - enddo - if(l1.ne.l2.or.m1.ne.m2)then - if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then - print*,'pb OFF DIAG !!!!! ' - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - endif - endif - if(l1==l2.and.m1==m2)then - if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then - print*,'pb DIAG !!!!! ' - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - endif - endif - enddo - enddo - enddo - enddo - double precision :: x,dx,xmax,xmin - integer:: nx - nx = 10000 - xmin = -5.d0 - xmax = 5.d0 - dx = (xmax - xmin)/dble(nx) - x = xmin - do i = 1, nx - write(34,*)x,datan(x),dacos(x) - x += dx - enddo -end - -subroutine test_brutal_spheric - implicit none - include 'constants.include.F' - integer :: itheta, iphi,ntheta,nphi - double precision :: theta_min, theta_max, dtheta,theta - double precision :: phi_min, phi_max, dphi,phi - double precision :: accu_re, accu_im,weight - double precision :: re_ylm_1, im_ylm_1 ,re_ylm_2, im_ylm_2,accu - integer :: l1,m1,i,l2,m2,lmax - phi_min = 0.d0 - phi_max = 2.D0 * pi - theta_min = 0.d0 - theta_max = 1.D0 * pi - ntheta = 1000 - nphi = 1000 - dphi = (phi_max - phi_min)/dble(nphi) - dtheta = (theta_max - theta_min)/dble(ntheta) - - lmax = 3 - do l1 = 0,lmax - do m1 = 0 ,l1 - do l2 = 0,lmax - do m2 = 0 ,l2 - accu_re = 0.d0 - accu_im = 0.d0 - accu = 0.d0 - theta = theta_min - do itheta = 1, ntheta - phi = phi_min - do iphi = 1, nphi -! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) - call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) - call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) - weight = dtheta * dphi * dsin(theta) - accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) - accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu += weight - phi += dphi - enddo - theta += dtheta - enddo - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - print*,'accu = ',accu - if(l1.ne.l2.or.m1.ne.m2)then - if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then - print*,'pb OFF DIAG !!!!! ' - endif - endif - if(l1==l2.and.m1==m2)then - if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then - print*,'pb DIAG !!!!! ' - endif - endif - enddo - enddo - enddo - enddo - - -end - -subroutine test_assoc_leg_pol - implicit none - BEGIN_DOC -! TODO : Put the documentation of the program here - END_DOC - print *, 'Hello world' - integer :: l1,m1,ngrid,i,l2,m2 - l1 = 0 - m1 = 0 - l2 = 2 - m2 = 0 - double precision :: x, dx,xmax,accu,xmin - double precision :: plgndr,func_1,func_2,ortho_assoc_gaus_pol - ngrid = 100000 - xmax = 1.d0 - xmin = -1.d0 - dx = (xmax-xmin)/dble(ngrid) - do l2 = 0,10 - x = xmin - accu = 0.d0 - do i = 1, ngrid - func_1 = plgndr(l1,m1,x) - func_2 = plgndr(l2,m2,x) - write(33,*)x, func_1,func_2 - accu += func_1 * func_2 * dx - x += dx - enddo - print*,'l2 = ',l2 - print*,'accu = ',accu - print*,ortho_assoc_gaus_pol(l1,m1,l2) - enddo -end From 5c69a7c005ecabe8428c386bf17bad3327891578 Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 26 Apr 2024 10:57:57 +0200 Subject: [PATCH 017/131] removed stupid stuffs in spher_harm --- plugins/local/spher_harm/README.rst | 3 + plugins/local/spher_harm/routines_test.irp.f | 172 ++++++++++--------- plugins/local/spher_harm/spher_harm.irp.f | 4 +- 3 files changed, 93 insertions(+), 86 deletions(-) diff --git a/plugins/local/spher_harm/README.rst b/plugins/local/spher_harm/README.rst index bf897f73..9c9b12a6 100644 --- a/plugins/local/spher_harm/README.rst +++ b/plugins/local/spher_harm/README.rst @@ -2,3 +2,6 @@ spher_harm ========== +Routines for spherical Harmonics evaluation in real space. +The main routine is "spher_harm_func_r3(r,l,m,re_ylm, im_ylm)". +The test routine is "test_spher_harm" where everything is explained in details. diff --git a/plugins/local/spher_harm/routines_test.irp.f b/plugins/local/spher_harm/routines_test.irp.f index 6f7cbc1c..fe8fc422 100644 --- a/plugins/local/spher_harm/routines_test.irp.f +++ b/plugins/local/spher_harm/routines_test.irp.f @@ -1,10 +1,93 @@ +subroutine test_spher_harm + implicit none + BEGIN_DOC + ! routine to test the generic spherical harmonics routine "spher_harm_func_r3" from R^3 --> C + ! + ! We test = delta_m1,m2 delta_l1,l2 + ! + ! The test is done through the integration on a sphere with the Lebedev grid. + END_DOC + include 'constants.include.F' + integer :: l1,m1,i,l2,m2,lmax + double precision :: r(3),weight,accu_re, accu_im,accu + double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 + double precision :: theta,phi,r_abs + lmax = 5 ! Maximum angular momentum until which we are going to test orthogonality conditions + do l1 = 0,lmax + do m1 = -l1 ,l1 + do l2 = 0,lmax + do m2 = -l2 ,l2 + accu_re = 0.d0 ! accumulator for the REAL part of + accu_im = 0.d0 ! accumulator for the IMAGINARY part of + accu = 0.d0 ! accumulator for the weights ==> should be \int dOmega == 4 pi + ! = \int dOmega Y_l1,m1^* Y_l2,m2 + ! \approx \sum_i W_i Y_l1,m1^*(r_i) Y_l2,m2(r_i) WITH r_i being on the spher of radius 1 + do i = 1, n_points_integration_angular + r(1:3) = angular_quadrature_points(i,1:3) ! ith Lebedev point (x,y,z) on the sphere of radius 1 + weight = weights_angular_points(i) ! associated Lebdev weight not necessarily positive + +!!!!!!!!!!! Test of the Cartesian --> Spherical coordinates + ! theta MUST belong to [0,pi] and phi to [0,2pi] + ! gets the cartesian to spherical change of coordinates + call cartesian_to_spherical(r,theta,phi,r_abs) + if(theta.gt.pi.or.theta.lt.0.d0)then + print*,'pb with theta, it should be in [0,pi]',theta + print*,r + endif + if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then + print*,'pb with phi, it should be in [0,2 pi]',phi/pi + print*,r + endif + +!!!!!!!!!!! Routines returning the Spherical harmonics on the grid point + call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) + call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) + +!!!!!!!!!!! Integration of Y_l1,m1^*(r) Y_l2,m2(r) + ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) + ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + enddo + ! Test that the sum of the weights is 4 pi + if(dabs(accu - dfour_pi).gt.1.d-6)then + print*,'Problem !! The sum of the Lebedev weight is not 4 pi ..' + print*,accu + stop + endif + ! Test for the delta l1,l2 and delta m1,m2 + ! + ! Test for the off-diagonal part of the Kronecker delta + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + ! Test for the diagonal part of the Kronecker delta + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + enddo + enddo + enddo + enddo +end subroutine test_cart implicit none BEGIN_DOC ! test for the cartesian --> spherical change of coordinates ! - ! simple test such that the polar angle theta ranges in [0,pi] + ! test the routine "cartesian_to_spherical" such that the polar angle theta ranges in [0,pi] ! ! and the asymuthal angle phi ranges in [0,2pi] END_DOC @@ -40,97 +123,18 @@ subroutine test_cart print*,phi/pi end -subroutine test_spher_harm - implicit none - BEGIN_DOC - ! routine to test the spherical harmonics integration on a sphere with the grid. - ! - ! We test = delta_m1,m2 delta_l1,l2 - END_DOC - include 'constants.include.F' - integer :: l1,m1,i,l2,m2,lmax - double precision :: r(3),weight,accu_re, accu_im,accu - double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 - l1 = 0 - m1 = 0 - l2 = 0 - m2 = 0 - lmax = 5 - do l1 = 0,lmax - do m1 = -l1 ,l1 - do l2 = 0,lmax - do m2 = -l2 ,l2 - accu_re = 0.d0 - accu_im = 0.d0 - ! = \int dOmega Y_l1,m1^* Y_l2,m2 - ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) - ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu = 0.d0 - do i = 1, n_points_integration_angular - double precision :: theta,phi,r_abs - r(1:3) = angular_quadrature_points(i,1:3) - weight = weights_angular_points(i) - call cartesian_to_spherical(r,theta,phi,r_abs) - if(theta.gt.pi.or.theta.lt.0.d0)then - print*,'pb with theta',theta - print*,r - endif - if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then - print*,'pb with phi',phi/pi - print*,r - endif - call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) - call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) - accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) - accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu += weight - write(33,'(10(F16.10,X))')phi/pi - enddo - ! Test for the delta l1,l2 and delta m1,m2 - if(l1.ne.l2.or.m1.ne.m2)then - if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then - print*,'pb OFF DIAG !!!!! ' - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - endif - endif - if(l1==l2.and.m1==m2)then - if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then - print*,'pb DIAG !!!!! ' - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - endif - endif - enddo - enddo - enddo - enddo - double precision :: x,dx,xmax,xmin - integer:: nx - nx = 10000 - xmin = -5.d0 - xmax = 5.d0 - dx = (xmax - xmin)/dble(nx) - x = xmin - do i = 1, nx - write(34,*)x,datan(x),dacos(x) - x += dx - enddo -end subroutine test_brutal_spheric implicit none include 'constants.include.F' BEGIN_DOC - ! test for the = delta_m1,m2 delta_l1,l2 using a two dimentional integration + ! Test for the = delta_m1,m2 delta_l1,l2 using the following two dimentional integration ! ! \int_0^2pi d Phi \int_-1^+1 d(cos(Theta)) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) ! != \int_0^2pi d Phi \int_0^pi dTheta sin(Theta) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) ! - ! Allows to test for the general functions spher_harm_func_m_pos with spher_harm_func_expl + ! Allows to test for the general functions "spher_harm_func_m_pos" with "spher_harm_func_expl" END_DOC integer :: itheta, iphi,ntheta,nphi double precision :: theta_min, theta_max, dtheta,theta @@ -147,7 +151,7 @@ subroutine test_brutal_spheric dphi = (phi_max - phi_min)/dble(nphi) dtheta = (theta_max - theta_min)/dble(ntheta) - lmax = 3 + lmax = 2 do l1 = 0,lmax do m1 = 0 ,l1 do l2 = 0,lmax @@ -196,7 +200,7 @@ end subroutine test_assoc_leg_pol implicit none BEGIN_DOC -! TODO : Put the documentation of the program here +! Test for the associated Legendre Polynoms. The test is done through the orthogonality condition. END_DOC print *, 'Hello world' integer :: l1,m1,ngrid,i,l2,m2 diff --git a/plugins/local/spher_harm/spher_harm.irp.f b/plugins/local/spher_harm/spher_harm.irp.f index e8deafb9..7a2eea06 100644 --- a/plugins/local/spher_harm/spher_harm.irp.f +++ b/plugins/local/spher_harm/spher_harm.irp.f @@ -1,7 +1,7 @@ program spher_harm implicit none - call test_spher_harm +! call test_spher_harm ! call test_cart -! call test_brutal_spheric + call test_brutal_spheric end From 40ea886cf1f6fe18d2501f1964e4f69deb66d947 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 1 May 2024 19:00:02 +0200 Subject: [PATCH 018/131] added NEED in local/slater_tc --- external/irpf90 | 2 +- plugins/local/slater_tc/NEED | 7 +++++++ plugins/local/slater_tc/slater_tc.irp.f | 7 +++++++ 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 plugins/local/slater_tc/NEED create mode 100644 plugins/local/slater_tc/slater_tc.irp.f diff --git a/external/irpf90 b/external/irpf90 index beac6153..4ab1b175 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit beac615343f421bd6c0571a408ba389a6d5a32ac +Subproject commit 4ab1b175fc7ed0d96c1912f13dc53579b24157a6 diff --git a/plugins/local/slater_tc/NEED b/plugins/local/slater_tc/NEED new file mode 100644 index 00000000..ef0aa3f7 --- /dev/null +++ b/plugins/local/slater_tc/NEED @@ -0,0 +1,7 @@ +determinants +normal_order_old +bi_ort_ints +bi_ortho_mos +tc_keywords +non_hermit_dav +dav_general_mat diff --git a/plugins/local/slater_tc/slater_tc.irp.f b/plugins/local/slater_tc/slater_tc.irp.f new file mode 100644 index 00000000..27ab47c5 --- /dev/null +++ b/plugins/local/slater_tc/slater_tc.irp.f @@ -0,0 +1,7 @@ +program slater_tc + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + print *, 'Hello world' +end From 0465a0f4397a53daa5a3a1c8374a5e34f5b61c67 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 1 May 2024 19:03:21 +0200 Subject: [PATCH 019/131] added local/normal_order_old/NEED --- plugins/local/normal_order_old/NEED | 1 + plugins/local/normal_order_old/README.rst | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 plugins/local/normal_order_old/NEED create mode 100644 plugins/local/normal_order_old/README.rst diff --git a/plugins/local/normal_order_old/NEED b/plugins/local/normal_order_old/NEED new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/plugins/local/normal_order_old/NEED @@ -0,0 +1 @@ + diff --git a/plugins/local/normal_order_old/README.rst b/plugins/local/normal_order_old/README.rst new file mode 100644 index 00000000..a284fcfd --- /dev/null +++ b/plugins/local/normal_order_old/README.rst @@ -0,0 +1,4 @@ +================ +normal_order_old +================ + From c50018e8bdbd0e11da5af2ddfe4032c7d6e86df2 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 1 May 2024 20:25:01 +0200 Subject: [PATCH 020/131] TC SPRING CLEANING: BEGINNING --- .../bi_ort_ints/three_body_ints_bi_ort.irp.f | 2 +- .../local/non_h_ints_mu/jast_1e_utils.irp.f | 18 +- .../local/non_h_ints_mu/numerical_integ.irp.f | 6 +- .../local/non_h_ints_mu/tc_integ_num.irp.f | 20 +- .../local/non_h_ints_mu/test_non_h_ints.irp.f | 26 +- plugins/local/non_hermit_dav/biorthog.irp.f | 1069 +---------------- .../lapack_diag_non_hermit.irp.f | 118 -- .../local/non_hermit_dav/new_routines.irp.f | 670 ----------- .../mu_j_ints_usual_mos.irp.f | 8 - plugins/local/tc_bi_ortho/EZFIO.cfg | 11 + .../local/tc_bi_ortho/print_tc_energy.irp.f | 30 +- plugins/local/tc_bi_ortho/print_tc_var.irp.f | 5 +- .../save_bitcpsileft_for_qmcchem.irp.f | 8 +- plugins/local/tc_bi_ortho/tc_utils.irp.f | 89 +- plugins/local/tc_scf/EZFIO.cfg | 4 +- plugins/local/tc_scf/combine_lr_tcscf.irp.f | 75 -- plugins/local/tc_scf/diago_vartcfock.irp.f | 96 -- plugins/local/tc_scf/diis_tcscf.irp.f | 75 +- .../local/tc_scf/fock_3e_bi_ortho_cs.irp.f | 299 ----- .../local/tc_scf/fock_3e_bi_ortho_os.irp.f | 536 --------- .../local/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 77 -- .../tc_scf/fock_3e_bi_ortho_uhf_old.irp.f | 490 -------- plugins/local/tc_scf/fock_tc.irp.f | 1000 +++++++++++++-- plugins/local/tc_scf/fock_tc_mo_tot.irp.f | 11 +- plugins/local/tc_scf/fock_vartc.irp.f | 287 ----- plugins/local/tc_scf/rh_tcscf_diis.irp.f | 4 +- plugins/local/tc_scf/rh_tcscf_simple.irp.f | 2 +- plugins/local/tc_scf/rh_vartcscf_simple.irp.f | 89 -- plugins/local/tc_scf/tc_scf.irp.f | 58 +- plugins/local/tc_scf/tc_scf_energy.irp.f | 41 +- plugins/local/tc_scf/test_int.irp.f | 970 --------------- .../extra_grid_vector.irp.f | 11 - .../grid_becke_vector.irp.f | 11 - src/utils/util.irp.f | 80 +- 34 files changed, 1188 insertions(+), 5108 deletions(-) delete mode 100644 plugins/local/non_hermit_dav/new_routines.irp.f delete mode 100644 plugins/local/tc_scf/combine_lr_tcscf.irp.f delete mode 100644 plugins/local/tc_scf/diago_vartcfock.irp.f delete mode 100644 plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f delete mode 100644 plugins/local/tc_scf/fock_3e_bi_ortho_os.irp.f delete mode 100644 plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f delete mode 100644 plugins/local/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f delete mode 100644 plugins/local/tc_scf/fock_vartc.irp.f delete mode 100644 plugins/local/tc_scf/rh_vartcscf_simple.irp.f delete mode 100644 plugins/local/tc_scf/test_int.irp.f diff --git a/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f b/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f index fd4a162f..73e5a611 100644 --- a/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -123,7 +123,7 @@ subroutine give_integrals_3_body_bi_ort_spin( n, sigma_n, l, sigma_l, k, sigma_k endif return -end subroutine give_integrals_3_body_bi_ort_spin +end ! --- diff --git a/plugins/local/non_h_ints_mu/jast_1e_utils.irp.f b/plugins/local/non_h_ints_mu/jast_1e_utils.irp.f index 9cfabf58..c6b2b0a0 100644 --- a/plugins/local/non_h_ints_mu/jast_1e_utils.irp.f +++ b/plugins/local/non_h_ints_mu/jast_1e_utils.irp.f @@ -132,6 +132,7 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit) double precision, allocatable :: A(:,:,:,:), b(:), A_tmp(:,:,:,:) double precision, allocatable :: Pa(:,:), Pb(:,:), Pt(:,:) double precision, allocatable :: u1e_tmp(:), tmp(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:,:) double precision, allocatable :: U(:,:), D(:), Vt(:,:), work(:) @@ -176,26 +177,27 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit) ! --- --- --- ! get A - allocate(tmp(n_points_final_grid,ao_num,ao_num)) + allocate(tmp1(n_points_final_grid,ao_num,ao_num), tmp2(n_points_final_grid,ao_num,ao_num)) allocate(A(ao_num,ao_num,ao_num,ao_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, j, ipoint) & - !$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, tmp) + !$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, tmp1, tmp2) !$OMP DO COLLAPSE(2) do j = 1, ao_num do i = 1, ao_num do ipoint = 1, n_points_final_grid - tmp(ipoint,i,j) = dsqrt(final_weight_at_r_vector(ipoint)) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) + tmp1(ipoint,i,j) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) + tmp2(ipoint,i,j) = aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) enddo enddo enddo !$OMP END DO !$OMP END PARALLEL - call dgemm( "T", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , tmp(1,1,1), n_points_final_grid, tmp(1,1,1), n_points_final_grid & + call dgemm( "T", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , tmp1(1,1,1), n_points_final_grid, tmp2(1,1,1), n_points_final_grid & , 0.d0, A(1,1,1,1), ao_num*ao_num) allocate(A_tmp(ao_num,ao_num,ao_num,ao_num)) @@ -207,13 +209,13 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit) allocate(b(ao_num*ao_num)) do ipoint = 1, n_points_final_grid - u1e_tmp(ipoint) = dsqrt(final_weight_at_r_vector(ipoint)) * u1e_tmp(ipoint) + u1e_tmp(ipoint) = u1e_tmp(ipoint) enddo - call dgemv("T", n_points_final_grid, ao_num*ao_num, 1.d0, tmp(1,1,1), n_points_final_grid, u1e_tmp(1), 1, 0.d0, b(1), 1) + call dgemv("T", n_points_final_grid, ao_num*ao_num, 1.d0, tmp1(1,1,1), n_points_final_grid, u1e_tmp(1), 1, 0.d0, b(1), 1) deallocate(u1e_tmp) - deallocate(tmp) + deallocate(tmp1, tmp2) ! --- --- --- ! solve Ax = b diff --git a/plugins/local/non_h_ints_mu/numerical_integ.irp.f b/plugins/local/non_h_ints_mu/numerical_integ.irp.f index 5436b857..2737774a 100644 --- a/plugins/local/non_h_ints_mu/numerical_integ.irp.f +++ b/plugins/local/non_h_ints_mu/numerical_integ.irp.f @@ -179,7 +179,7 @@ double precision function num_v_ij_erf_rk_cst_mu_env(i, j, ipoint) dx = r1(1) - r2(1) dy = r1(2) - r2(2) dz = r1(3) - r2(3) - r12 = dsqrt( dx * dx + dy * dy + dz * dz ) + r12 = dsqrt(dx*dx + dy*dy + dz*dz) if(r12 .lt. 1d-10) cycle tmp1 = (derf(mu_erf * r12) - 1.d0) / r12 @@ -228,7 +228,7 @@ subroutine num_x_v_ij_erf_rk_cst_mu_env(i, j, ipoint, integ) dx = r1(1) - r2(1) dy = r1(2) - r2(2) dz = r1(3) - r2(3) - r12 = dsqrt( dx * dx + dy * dy + dz * dz ) + r12 = dsqrt(dx*dx + dy*dy + dz*dz) if(r12 .lt. 1d-10) cycle tmp1 = (derf(mu_erf * r12) - 1.d0) / r12 @@ -530,7 +530,7 @@ subroutine num_int2_u_grad1u_total_env2(i, j, ipoint, integ) dx = r1(1) - r2(1) dy = r1(2) - r2(2) dz = r1(3) - r2(3) - r12 = dsqrt( dx * dx + dy * dy + dz * dz ) + r12 = dsqrt(dx*dx + dy*dy + dz*dz) if(r12 .lt. 1d-10) cycle tmp0 = env_nucl(r2) diff --git a/plugins/local/non_h_ints_mu/tc_integ_num.irp.f b/plugins/local/non_h_ints_mu/tc_integ_num.irp.f index 6d446037..9d9601c0 100644 --- a/plugins/local/non_h_ints_mu/tc_integ_num.irp.f +++ b/plugins/local/non_h_ints_mu/tc_integ_num.irp.f @@ -63,12 +63,10 @@ do i_pass = 1, n_pass ii = (i_pass-1)*n_blocks + 1 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i_blocks, ipoint) & - !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, & - !$OMP final_grid_points, tmp_grad1_u12, & - !$OMP tmp_grad1_u12_squared) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_blocks, ipoint) & + !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, final_grid_points, tmp_grad1_u12, tmp_grad1_u12_squared) !$OMP DO do i_blocks = 1, n_blocks ipoint = ii - 1 + i_blocks ! r1 @@ -99,12 +97,10 @@ ii = n_pass*n_blocks + 1 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i_rest, ipoint) & - !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, & - !$OMP final_grid_points, tmp_grad1_u12, & - !$OMP tmp_grad1_u12_squared) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_rest, ipoint) & + !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, final_grid_points, tmp_grad1_u12, tmp_grad1_u12_squared) !$OMP DO do i_rest = 1, n_rest ipoint = ii - 1 + i_rest ! r1 diff --git a/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f b/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f index 464a1c1f..4c63dec4 100644 --- a/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f +++ b/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f @@ -1125,6 +1125,7 @@ subroutine test_fit_coef_A1() double precision :: accu, norm, diff double precision, allocatable :: A1(:,:) double precision, allocatable :: A2(:,:,:,:), tmp(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:,:) ! --- @@ -1165,16 +1166,17 @@ subroutine test_fit_coef_A1() call wall_time(t1) - allocate(tmp(ao_num,ao_num,n_points_final_grid)) + allocate(tmp1(ao_num,ao_num,n_points_final_grid), tmp2(ao_num,ao_num,n_points_final_grid)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, j, ipoint) & - !$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, tmp) + !$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, tmp1, tmp2) !$OMP DO COLLAPSE(2) do j = 1, ao_num do i = 1, ao_num do ipoint = 1, n_points_final_grid - tmp(i,j,ipoint) = dsqrt(final_weight_at_r_vector(ipoint)) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) + tmp1(i,j,ipoint) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) + tmp2(i,j,ipoint) = aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) enddo enddo enddo @@ -1184,9 +1186,9 @@ subroutine test_fit_coef_A1() allocate(A2(ao_num,ao_num,ao_num,ao_num)) call dgemm( "N", "T", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , tmp(1,1,1), ao_num*ao_num, tmp(1,1,1), ao_num*ao_num & + , tmp1(1,1,1), ao_num*ao_num, tmp2(1,1,1), ao_num*ao_num & , 0.d0, A2(1,1,1,1), ao_num*ao_num) - deallocate(tmp) + deallocate(tmp1, tmp2) call wall_time(t2) print*, ' WALL TIME FOR A2 (min) =', (t2-t1)/60.d0 @@ -1238,6 +1240,7 @@ subroutine test_fit_coef_inv() double precision, allocatable :: A1(:,:), A1_inv(:,:), A1_tmp(:,:) double precision, allocatable :: A2(:,:,:,:), tmp(:,:,:), A2_inv(:,:,:,:) double precision, allocatable :: U(:,:), D(:), Vt(:,:), work(:), A2_tmp(:,:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:,:) cutoff_svd = 5d-8 @@ -1286,16 +1289,17 @@ subroutine test_fit_coef_inv() call wall_time(t1) - allocate(tmp(n_points_final_grid,ao_num,ao_num)) + allocate(tmp1(n_points_final_grid,ao_num,ao_num), tmp2(n_points_final_grid,ao_num,ao_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, j, ipoint) & - !$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, tmp) + !$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, tmp1, tmp2) !$OMP DO COLLAPSE(2) do j = 1, ao_num do i = 1, ao_num do ipoint = 1, n_points_final_grid - tmp(ipoint,i,j) = dsqrt(final_weight_at_r_vector(ipoint)) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) + tmp1(ipoint,i,j) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) + tmp2(ipoint,i,j) = aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) enddo enddo enddo @@ -1304,11 +1308,11 @@ subroutine test_fit_coef_inv() allocate(A2(ao_num,ao_num,ao_num,ao_num)) - call dgemm( "T", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , tmp(1,1,1), n_points_final_grid, tmp(1,1,1), n_points_final_grid & + call dgemm( "T", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , tmp1(1,1,1), n_points_final_grid, tmp2(1,1,1), n_points_final_grid & , 0.d0, A2(1,1,1,1), ao_num*ao_num) - deallocate(tmp) + deallocate(tmp1, tmp2) call wall_time(t2) print*, ' WALL TIME FOR A2 (min) =', (t2-t1)/60.d0 diff --git a/plugins/local/non_hermit_dav/biorthog.irp.f b/plugins/local/non_hermit_dav/biorthog.irp.f index 2229e17d..b36b0130 100644 --- a/plugins/local/non_hermit_dav/biorthog.irp.f +++ b/plugins/local/non_hermit_dav/biorthog.irp.f @@ -1,254 +1,3 @@ -subroutine non_hrmt_diag_split_degen(n, A, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors - ! - ! of a non hermitian matrix A(n,n) - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" - ! - END_DOC - - implicit none - - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:) - - integer :: i, j, n_degen,k , iteration - integer :: n_good - double precision :: shift,shift_current - double precision :: r,thr - integer, allocatable :: list_good(:), iorder_origin(:),iorder(:) - double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:) - double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:) - double precision, allocatable :: im_part(:),re_part(:) - - - print*,'Computing the left/right eigenvectors ...' - print*,'Using the degeneracy splitting algorithm' - - - ! pre-processing the matrix :: sorting by diagonal elements - allocate(reigvec_tmp(n,n), leigvec_tmp(n,n)) - allocate(diag_elem(n),iorder_origin(n),A_save(n,n)) - do i = 1, n - iorder_origin(i) = i - diag_elem(i) = A(i,i) - enddo - call dsort(diag_elem, iorder_origin, n) - do i = 1, n - do j = 1, n - A_save(j,i) = A(iorder_origin(j),iorder_origin(i)) - enddo - enddo - - shift = 1.d-15 - shift_current = shift - iteration = 1 - logical :: good_ortho - good_ortho = .False. - do while(n_real_eigv.ne.n.or. .not.good_ortho) - if(shift.gt.1.d-3)then - print*,'shift > 1.d-3 !!' - print*,'Your matrix intrinsically contains complex eigenvalues' - stop - endif - print*,'***** iteration = ',iteration - print*,'shift = ',shift - allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) - Aw = A_save - do i = 1, n - do j = 1, n - if(dabs(Aw(j,i)).lt.shift)then - Aw(j,i) = 0.d0 - endif - enddo - enddo - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - allocate(im_part(n),iorder(n)) - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part, iorder, n) - - shift_current = max(10.d0 * dabs(im_part(1)),shift) - print*,'Largest imaginary part found in eigenvalues = ',im_part(1) - print*,'Splitting the degeneracies by ',shift_current - Aw = A_save - call split_matrix_degen(Aw,n,shift_current) - deallocate( im_part, iorder ) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - ! You track the real eigenvalues - n_good = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_good += 1 - else - print*,'Found an imaginary component to eigenvalue' - print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - allocate( list_good(n_good), iorder(n_good) ) - n_good = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_good += 1 - list_good(n_good) = i - eigval(n_good) = WR(i) - endif - enddo - deallocate( WR, WI ) - - n_real_eigv = n_good - do i = 1, n_good - iorder(i) = i - enddo - - ! You sort the real eigenvalues - call dsort(eigval, iorder, n_good) - - reigvec(:,:) = 0.d0 - leigvec(:,:) = 0.d0 - do i = 1, n_real_eigv - do j = 1, n - reigvec_tmp(j,i) = VR(j,list_good(iorder(i))) - leigvec_tmp(j,i) = Vl(j,list_good(iorder(i))) - enddo - enddo - - if(n_real_eigv == n)then - allocate(S(n,n)) - call check_bi_ortho(reigvec_tmp,leigvec_tmp,n,S,accu_nd) - print*,'accu_nd = ',accu_nd - double precision :: accu_nd - good_ortho = accu_nd .lt. 1.d-10 - deallocate(S) - endif - - deallocate( list_good, iorder ) - deallocate( VL, VR, Aw) - shift *= 10.d0 - iteration += 1 - enddo - do i = 1, n - do j = 1, n - reigvec(iorder_origin(j),i) = reigvec_tmp(j,i) - leigvec(iorder_origin(j),i) = leigvec_tmp(j,i) - enddo - enddo - -end - -! --- - -subroutine non_hrmt_real_diag_new(n, A, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors - ! - ! of a non hermitian matrix A(n,n) - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" - ! - END_DOC - - implicit none - - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - - integer :: i, j - integer :: n_good - double precision :: shift,shift_current - double precision :: r,thr - integer, allocatable :: list_good(:), iorder(:) - double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:) - double precision, allocatable :: Aw(:,:) - double precision, allocatable :: im_part(:) - - - print*,'Computing the left/right eigenvectors ...' - - ! Eigvalue(n) = WR(n) + i * WI(n) - shift = 1.d-10 - do while(n_real_eigv.ne.n.or.shift.gt.1.d-3) - allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) - Aw = A - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - allocate(im_part(n), iorder(n)) - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - shift_current = max(10.d0 * dabs(im_part(1)),shift) - print*,'adding random number of magnitude ',shift_current - Aw = A - do i = 1, n - call RANDOM_NUMBER(r) - Aw(i,i) += shift_current * r - enddo - deallocate( im_part, iorder ) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - - ! You track the real eigenvalues - thr = 1.d-10 - n_good = 0 - do i = 1, n - if(dabs(WI(i)).lt.thr)then - n_good += 1 - else - print*,'Found an imaginary component to eigenvalue' - print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - - allocate( list_good(n_good), iorder(n_good) ) - n_good = 0 - do i = 1, n - if(dabs(WI(i)).lt.thr)then - n_good += 1 - list_good(n_good) = i - eigval(n_good) = WR(i) - endif - enddo - - deallocate( WR, WI ) - - n_real_eigv = n_good - do i = 1, n_good - iorder(i) = i - enddo - - ! You sort the real eigenvalues - call dsort(eigval, iorder, n_good) - - reigvec(:,:) = 0.d0 - leigvec(:,:) = 0.d0 - do i = 1, n_real_eigv - do j = 1, n - reigvec(j,i) = VR(j,list_good(iorder(i))) - leigvec(j,i) = Vl(j,list_good(iorder(i))) - enddo - enddo - - deallocate( list_good, iorder ) - deallocate( VL, VR, Aw) - shift *= 10.d0 - enddo - if(shift.gt.1.d-3)then - print*,'shift > 1.d-3 !!' - print*,'Your matrix intrinsically contains complex eigenvalues' - endif - -end ! --- @@ -282,126 +31,20 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei allocate(phi_1_tilde(n),phi_2_tilde(n),chi_1_tilde(n),chi_2_tilde(n)) - - ! ------------------------------------------------------------------------------------- - ! - - !print *, ' ' - !print *, ' Computing the left/right eigenvectors ...' - !print *, ' ' - allocate(WR(n), WI(n), VL(n,n), VR(n,n)) - - !print *, ' fock matrix' - !do i = 1, n - ! write(*, '(1000(F16.10,X))') A(i,:) - !enddo - !thr_cut = 1.d-15 - !call cancel_small_elmts(A, n, thr_cut) - - !call lapack_diag_non_sym_right(n, A, WR, WI, VR) call lapack_diag_non_sym(n, A, WR, WI, VL, VR) - !call lapack_diag_non_sym_new(n, A, WR, WI, VL, VR) - - - - !print *, ' ' - !print *, ' eigenvalues' - i = 1 - do while(i .le. n) - !write(*, '(I3,X,1000(F16.10,X))')i, WR(i), WI(i) - if(.false.)then - if(WI(i).ne.0.d0)then - print*,'*****************' - print*,'WARNING ! IMAGINARY EIGENVALUES !!!' - write(*, '(1000(F16.10,X))') WR(i), WI(i+1) - ! phi = VR(:,i), psi = VR(:,i+1), |Phi_i> = phi + j psi , |Phi_i+1> = phi - j psi - ! chi = VL(:,i), xhi = VL(:,i+1), |Chi_i> = chi + j xhi , |Chi_i+1> = chi - j xhi - ! - accu_chi_phi = 0.d0 - accu_xhi_psi = 0.d0 - accu_chi_psi = 0.d0 - accu_xhi_phi = 0.d0 - double precision :: accu_chi_phi, accu_xhi_psi, accu_chi_psi, accu_xhi_phi - double precision :: mat_ovlp(2,2),eigval_tmp(2),eigvec(2,2),mat_ovlp_orig(2,2) - do j = 1, n - accu_chi_phi += VL(j,i) * VR(j,i) - accu_xhi_psi += VL(j,i+1) * VR(j,i+1) - accu_chi_psi += VL(j,i) * VR(j,i+1) - accu_xhi_phi += VL(j,i+1) * VR(j,i) - enddo - mat_ovlp_orig(1,1) = accu_chi_phi - mat_ovlp_orig(2,1) = accu_xhi_phi - mat_ovlp_orig(1,2) = accu_chi_psi - mat_ovlp_orig(2,2) = accu_xhi_psi - print*,'old overlap matrix ' - write(*,'(100(F16.10,X))')mat_ovlp_orig(1:2,1) - write(*,'(100(F16.10,X))')mat_ovlp_orig(1:2,2) - - - mat_ovlp(1,1) = accu_xhi_phi - mat_ovlp(2,1) = accu_chi_phi - mat_ovlp(1,2) = accu_xhi_psi - mat_ovlp(2,2) = accu_chi_psi - !print*,'accu_chi_phi = ',accu_chi_phi - !print*,'accu_xhi_psi = ',accu_xhi_psi - !print*,'accu_chi_psi = ',accu_chi_psi - !print*,'accu_xhi_phi = ',accu_xhi_phi - print*,'new overlap matrix ' - write(*,'(100(F16.10,X))')mat_ovlp(1:2,1) - write(*,'(100(F16.10,X))')mat_ovlp(1:2,2) - call lapack_diag(eigval_tmp,eigvec,mat_ovlp,2,2) - print*,'eigval_tmp(1) = ',eigval_tmp(1) - print*,'eigvec(1) = ',eigvec(1:2,1) - print*,'eigval_tmp(2) = ',eigval_tmp(2) - print*,'eigvec(2) = ',eigvec(1:2,2) - print*,'*****************' - phi_1_tilde = 0.d0 - phi_2_tilde = 0.d0 - chi_1_tilde = 0.d0 - chi_2_tilde = 0.d0 - do j = 1, n - phi_1_tilde(j) += VR(j,i) * eigvec(1,1) + VR(j,i+1) * eigvec(2,1) - phi_2_tilde(j) += VR(j,i) * eigvec(1,2) + VR(j,i+1) * eigvec(2,2) - chi_1_tilde(j) += VL(j,i+1) * eigvec(1,1) + VL(j,i) * eigvec(2,1) - chi_2_tilde(j) += VL(j,i+1) * eigvec(1,2) + VL(j,i) * eigvec(2,2) - enddo - VR(1:n,i) = phi_1_tilde(1:n) - VR(1:n,i+1) = phi_2_tilde(1:n) -! Vl(1:n,i) = -chi_1_tilde(1:n) -! Vl(1:n,i+1) = chi_2_tilde(1:n) - i+=1 - endif - endif - i+=1 - enddo - !print *, ' right eigenvect bef' - !do i = 1, n - ! write(*, '(1000(F16.10,X))') VR(:,i) - !enddo - !print *, ' left eigenvect bef' - !do i = 1, n - ! write(*, '(1000(F16.10,X))') VL(:,i) - !enddo thr_diag = 1d-06 thr_norm = 1d+10 - !call check_EIGVEC(n, n, A, WR, VL, VR, thr_diag, thr_norm, .false.) - - ! - ! ------------------------------------------------------------------------------------- ! --- - ! ------------------------------------------------------------------------------------- - ! track & sort the real eigenvalues + ! track & sort the real eigenvalues n_good = 0 - !thr = 100d0 thr = Im_thresh_tcscf do i = 1, n - !print*, 'Re(i) + Im(i)', WR(i), WI(i) if(dabs(WI(i)) .lt. thr) then n_good += 1 else @@ -410,11 +53,12 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei endif enddo - if(n_good.ne.n)then - print*,'there are some imaginary eigenvalues ' - thr_diag = 1d-03 - n_good = n + if(n_good.ne.n) then + print*,'there are some imaginary eigenvalues ' + thr_diag = 1d-03 + n_good = n endif + allocate(list_good(n_good), iorder(n_good)) n_good = 0 @@ -446,26 +90,9 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei ASSERT(n==n_real_eigv) - !print *, ' eigenvalues' - !do i = 1, n - ! write(*, '(1000(F16.10,X))') eigval(i) - !enddo - !print *, ' right eigenvect aft ord' - !do i = 1, n - ! write(*, '(1000(F16.10,X))') reigvec(:,i) - !enddo - !print *, ' left eigenvect aft ord' - !do i = 1, n - ! write(*, '(1000(F16.10,X))') leigvec(:,i) - !enddo - - ! - ! ------------------------------------------------------------------------------------- - ! --- - ! ------------------------------------------------------------------------------------- - ! check bi-orthogonality + ! check bi-orthogonality thr_diag = 10.d0 thr_norm = 1d+10 @@ -495,8 +122,6 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei print *, ' lapack vectors are not normalized neither bi-orthogonalized' - ! --- - allocate(deg_num(n)) call reorder_degen_eigvec(n, deg_num, eigval, leigvec, reigvec) call impose_biorthog_degen_eigvec(n, deg_num, eigval, leigvec, reigvec) @@ -508,700 +133,36 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei endif call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .true.) - !call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.) - deallocate(S) endif - ! - ! ------------------------------------------------------------------------------------- - return end ! --- -subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigval) +subroutine check_bi_ortho(reigvec, leigvec, n, S, accu_nd) BEGIN_DOC - ! - ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors - ! of a non hermitian matrix A(n,n) - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" + ! retunrs the overlap matrix S = Leigvec^T Reigvec ! + ! and the square root of the sum of the squared off-diagonal elements of S END_DOC implicit none integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) + double precision, intent(in) :: reigvec(n,n), leigvec(n,n) + double precision, intent(out) :: S(n,n), accu_nd - integer :: i, j - integer :: n_good - double precision :: thr - double precision :: accu_nd + integer :: i,j - integer, allocatable :: list_good(:), iorder(:) - double precision, allocatable :: Aw(:,:) - double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:) - double precision, allocatable :: S(:,:) - double precision :: r - - - ! ------------------------------------------------------------------------------------- - ! - - print *, 'Computing the left/right eigenvectors ...' - allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n) ) - - Aw(:,:) = A(:,:) - call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR) - - thr = 1.d-12 - double precision, allocatable :: im_part(:) - n_good = 0 - do i = 1, n - if( dabs(WI(i)).lt.thr ) then - n_good += 1 - else - print*, 'Found an imaginary component to eigenvalue on i = ', i - print*, 'Re(i) + Im(i)', WR(i), WI(i) - endif - enddo - print*,'n_good = ',n_good - if(n_good .lt. n)then - print*,'Removing degeneracies to remove imaginary parts' - allocate(im_part(n),iorder(n)) - r = 0.d0 - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part,iorder,n) - thr = 10.d0 * dabs(im_part(1)) - print*,'adding random numbers on the diagonal of magnitude ',thr - Aw(:,:) = A(:,:) - do i = 1, n - call RANDOM_NUMBER(r) - print*,'r = ',r*thr - Aw(i,i) += thr * r - enddo - print*,'Rediagonalizing the matrix with random numbers' - call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR) - deallocate(im_part,iorder) - endif - deallocate( Aw ) - - ! - ! ------------------------------------------------------------------------------------- - - ! --- - - ! ------------------------------------------------------------------------------------- - ! track & sort the real eigenvalues - - n_good = 0 - thr = 1.d-5 - do i = 1, n - if( dabs(WI(i)).lt.thr ) then - n_good += 1 - else - print*, 'Found an imaginary component to eigenvalue on i = ', i - print*, 'Re(i) + Im(i)', WR(i), WI(i) - endif - enddo - print*,'n_good = ',n_good - allocate( list_good(n_good), iorder(n_good) ) - - n_good = 0 - do i = 1, n - if( dabs(WI(i)).lt.thr ) then - n_good += 1 - list_good(n_good) = i - eigval(n_good) = WR(i) - endif - enddo - - deallocate( WR, WI ) - - n_real_eigv = n_good - do i = 1, n_good - iorder(i) = i - enddo - call dsort(eigval, iorder, n_good) - - reigvec(:,:) = 0.d0 - leigvec(:,:) = 0.d0 - do i = 1, n_real_eigv - do j = 1, n - reigvec(j,i) = VR(j,list_good(iorder(i))) - leigvec(j,i) = VL(j,list_good(iorder(i))) - enddo - enddo - - deallocate( list_good, iorder ) - deallocate( VL, VR ) - - ! - ! ------------------------------------------------------------------------------------- - - ! --- - - ! ------------------------------------------------------------------------------------- - ! check bi-orthogonality - - allocate( S(n_real_eigv,n_real_eigv) ) - - ! S = VL x VR - call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n, 1.d0 & - , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & - , 0.d0, S, size(S, 1) ) - - accu_nd = 0.d0 - do i = 1, n_real_eigv - do j = 1, n_real_eigv - if(i==j) cycle - accu_nd = accu_nd + S(j,i) * S(j,i) - enddo - enddo - accu_nd = dsqrt(accu_nd) - - if(accu_nd .lt. thresh_biorthog_nondiag) then - ! L x R is already bi-orthogonal - - print *, ' L & T bi-orthogonality: ok' - deallocate( S ) - return - - else - ! impose bi-orthogonality - - print *, ' L & T bi-orthogonality: not imposed yet' - print *, ' accu_nd = ', accu_nd - call impose_biorthog_qr(n, n_real_eigv, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec) - deallocate( S ) - - endif - - ! - ! ------------------------------------------------------------------------------------- - - return - -end - -! --- - -subroutine non_hrmt_real_im(n, A, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine which returns the EIGENVALUES sorted the REAL part and corresponding LEFT/RIGHT eigenvetors - ! of a non hermitian matrix A(n,n) - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" - ! - END_DOC - - implicit none - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - - integer :: i, j - integer :: n_bad - double precision :: thr - double precision :: accu_nd - - integer, allocatable :: iorder(:) - double precision, allocatable :: Aw(:,:) - double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:) - double precision, allocatable :: S(:,:) - double precision :: r - - ! ------------------------------------------------------------------------------------- - ! - - print *, 'Computing the left/right eigenvectors ...' - allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), iorder(n)) - - Aw(:,:) = A(:,:) - do i = 1, n - call RANDOM_NUMBER(r) - Aw(i,i) += 10.d-10* r - enddo - call lapack_diag_non_sym(n, Aw, WR, WI, VL, VR) - - ! ------------------------------------------------------------------------------------- - ! track & sort the real eigenvalues - - i = 1 - thr = 1.d-15 - n_real_eigv = 0 - do while (i.le.n) -! print*,i,dabs(WI(i)) - if( dabs(WI(i)).gt.thr ) then - print*, 'Found an imaginary component to eigenvalue on i = ', i - print*, 'Re(i) , Im(i) ', WR(i), WI(i) - iorder(i) = i - eigval(i) = WR(i) - i+=1 - print*, 'Re(i+1),Im(i+1)',WR(i), WI(i) - iorder(i) = i - eigval(i) = WR(i) - i+=1 - else - n_real_eigv += 1 - iorder(i) = i - eigval(i) = WR(i) - i+=1 - endif - enddo - call dsort(eigval, iorder, n) - reigvec(:,:) = 0.d0 - leigvec(:,:) = 0.d0 - do i = 1, n - do j = 1, n - reigvec(j,i) = VR(j,iorder(i)) - leigvec(j,i) = VL(j,iorder(i)) - enddo - enddo - - deallocate( iorder ) - deallocate( VL, VR ) - - ! - ! ------------------------------------------------------------------------------------- - - ! --- - - ! ------------------------------------------------------------------------------------- - ! check bi-orthogonality - - allocate( S(n,n) ) - - ! S = VL x VR - call dgemm( 'T', 'N', n, n, n, 1.d0 & - , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & - , 0.d0, S, size(S, 1) ) - - accu_nd = 0.d0 - do i = 1, n - do j = 1, n - if(i==j) cycle - accu_nd = accu_nd + S(j,i) * S(j,i) - enddo - enddo - accu_nd = dsqrt(accu_nd) - - deallocate( S ) - -end - -! --- - -subroutine non_hrmt_generalized_real_im(n, A, B, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine which returns the EIGENVALUES sorted the REAL part and corresponding LEFT/RIGHT eigenvetors - ! for A R = lambda B R and A^\dagger L = lambda B^\dagger L - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" - ! - END_DOC - - implicit none - integer, intent(in) :: n - double precision, intent(in) :: A(n,n),B(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - - integer :: i, j - integer :: n_bad - double precision :: thr - double precision :: accu_nd - - integer, allocatable :: iorder(:) - double precision, allocatable :: Aw(:,:),Bw(:,:) - double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:), beta(:) - double precision, allocatable :: S(:,:) - double precision :: r - - ! ------------------------------------------------------------------------------------- - ! - - print *, 'Computing the left/right eigenvectors ...' - allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), Bw(n,n),iorder(n),beta(n)) - - Aw(:,:) = A(:,:) - Bw(:,:) = B(:,:) - call lapack_diag_general_non_sym(n,Aw,Bw,WR,beta,WI,VL,VR) - - ! ------------------------------------------------------------------------------------- - ! track & sort the real eigenvalues - - i = 1 - thr = 1.d-10 - n_real_eigv = 0 - do while (i.le.n) - if( dabs(WI(i)).gt.thr ) then - print*, 'Found an imaginary component to eigenvalue on i = ', i - print*, 'Re(i) , Im(i) ', WR(i), WI(i) - iorder(i) = i - eigval(i) = WR(i)/(beta(i) + 1.d-10) - i+=1 - print*, 'Re(i+1),Im(i+1)',WR(i), WI(i) - iorder(i) = i - eigval(i) = WR(i)/(beta(i) + 1.d-10) - i+=1 - else - n_real_eigv += 1 - iorder(i) = i - eigval(i) = WR(i)/(beta(i) + 1.d-10) - i+=1 - endif - enddo - call dsort(eigval, iorder, n) - reigvec(:,:) = 0.d0 - leigvec(:,:) = 0.d0 - do i = 1, n - do j = 1, n - reigvec(j,i) = VR(j,iorder(i)) - leigvec(j,i) = VL(j,iorder(i)) - enddo - enddo - - deallocate( iorder ) - deallocate( VL, VR ) - - ! - ! ------------------------------------------------------------------------------------- - - ! --- - - ! ------------------------------------------------------------------------------------- - ! check bi-orthogonality - - allocate( S(n,n) ) - - ! S = VL x VR - call dgemm( 'T', 'N', n, n, n, 1.d0 & - , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & - , 0.d0, S, size(S, 1) ) - - accu_nd = 0.d0 - do i = 1, n - do j = 1, n - if(i==j) cycle - accu_nd = accu_nd + S(j,i) * S(j,i) - enddo - enddo - accu_nd = dsqrt(accu_nd) - - deallocate( S ) - -end - -! --- - -subroutine non_hrmt_bieig_fullvect(n, A, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors - ! of a non hermitian matrix A(n,n) - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" - ! - END_DOC - - implicit none - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - - integer :: i, j - integer :: n_good - double precision :: thr - double precision :: accu_nd - - integer, allocatable :: iorder(:) - double precision, allocatable :: Aw(:,:) - double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:) - double precision, allocatable :: S(:,:) - double precision, allocatable :: eigval_sorted(:) - - - ! ------------------------------------------------------------------------------------- - ! - - print *, 'Computing the left/right eigenvectors ...' - - allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n) ) - Aw(:,:) = A(:,:) - - call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR) - - deallocate( Aw ) - - ! - ! ------------------------------------------------------------------------------------- - - ! --- - - ! ------------------------------------------------------------------------------------- - ! track & sort the real eigenvalues - - allocate( eigval_sorted(n), iorder(n) ) - - n_good = 0 - thr = 1.d-10 - - do i = 1, n - - iorder(i) = i - eigval_sorted(i) = WR(i) - - if(dabs(WI(i)) .gt. thr) then - print*, ' Found an imaginary component to eigenvalue on i = ', i - print*, ' Re(i) + Im(i)', WR(i), WI(i) - else - n_good += 1 - endif - - enddo - - n_real_eigv = n_good - - call dsort(eigval_sorted, iorder, n) - - reigvec(:,:) = 0.d0 - leigvec(:,:) = 0.d0 - do i = 1, n - eigval(i) = WR(i) - do j = 1, n - reigvec(j,i) = VR(j,iorder(i)) - leigvec(j,i) = VL(j,iorder(i)) - enddo - enddo - - deallocate( eigval_sorted, iorder ) - deallocate( WR, WI ) - deallocate( VL, VR ) - - ! - ! ------------------------------------------------------------------------------------- - - ! --- - - ! ------------------------------------------------------------------------------------- - ! check bi-orthogonality - - allocate( S(n,n) ) - - ! S = VL x VR - call dgemm( 'T', 'N', n, n, n, 1.d0 & - , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & - , 0.d0, S, size(S, 1) ) - - accu_nd = 0.d0 - do i = 1, n - do j = 1, n - if(i==j) cycle - accu_nd = accu_nd + S(j,i) * S(j,i) - enddo - enddo - accu_nd = dsqrt(accu_nd) - - if(accu_nd .lt. thresh_biorthog_nondiag) then - ! L x R is already bi-orthogonal - - !print *, ' L & T bi-orthogonality: ok' - deallocate( S ) - return - - else - ! impose bi-orthogonality - - !print *, ' L & T bi-orthogonality: not imposed yet' - !print *, ' accu_nd = ', accu_nd - call impose_biorthog_qr(n, n, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec) - deallocate( S ) - - endif - - ! - ! ------------------------------------------------------------------------------------- - - return - -end - -! --- - - -subroutine split_matrix_degen(aw,n,shift) - implicit none - BEGIN_DOC - ! subroutines that splits the degeneracies of a matrix by adding a splitting of magnitude thr * n_degen/2 - ! - ! WARNING !! THE MATRIX IS ASSUMED TO BE PASSED WITH INCREASING DIAGONAL ELEMENTS - END_DOC - double precision,intent(inout) :: Aw(n,n) - double precision,intent(in) :: shift - integer, intent(in) :: n - integer :: i,j,n_degen - logical :: keep_on - i=1 - do while(i.lt.n) - if(dabs(Aw(i,i)-Aw(i+1,i+1)).lt.shift)then - j=1 - keep_on = .True. - do while(keep_on) - if(i+j.gt.n)then - keep_on = .False. - exit - endif - if(dabs(Aw(i,i)-Aw(i+j,i+j)).lt.shift)then - j+=1 - else - keep_on=.False. - exit - endif - enddo - n_degen = j - j=0 - keep_on = .True. - do while(keep_on) - if(i+j+1.gt.n)then - keep_on = .False. - exit - endif - if(dabs(Aw(i+j,i+j)-Aw(i+j+1,i+j+1)).lt.shift)then - Aw(i+j,i+j) += (j-n_degen/2) * shift - j+=1 - else - keep_on = .False. - exit - endif - enddo - Aw(i+n_degen-1,i+n_degen-1) += (n_degen-1-n_degen/2) * shift - i+=n_degen - else - i+=1 - endif - enddo - -end - -subroutine give_degen(a,n,shift,list_degen,n_degen_list) - implicit none - BEGIN_DOC - ! returns n_degen_list :: the number of degenerated SET of elements (i.e. with |A(i)-A(i+1)| below shift) - ! - ! for each of these sets, list_degen(1,i) = first degenerate element of the set i, - ! - ! list_degen(2,i) = last degenerate element of the set i. - END_DOC - double precision,intent(in) :: A(n) - double precision,intent(in) :: shift - integer, intent(in) :: n - integer, intent(out) :: list_degen(2,n),n_degen_list - integer :: i,j,n_degen,k - logical :: keep_on - double precision,allocatable :: Aw(:) - list_degen = -1 - allocate(Aw(n)) - Aw = A - i=1 - k = 0 - do while(i.lt.n) - if(dabs(Aw(i)-Aw(i+1)).lt.shift)then - k+=1 - j=1 - list_degen(1,k) = i - keep_on = .True. - do while(keep_on) - if(i+j.gt.n)then - keep_on = .False. - exit - endif - if(dabs(Aw(i)-Aw(i+j)).lt.shift)then - j+=1 - else - keep_on=.False. - exit - endif - enddo - n_degen = j - list_degen(2,k) = list_degen(1,k)-1 + n_degen - j=0 - keep_on = .True. - do while(keep_on) - if(i+j+1.gt.n)then - keep_on = .False. - exit - endif - if(dabs(Aw(i+j)-Aw(i+j+1)).lt.shift)then - Aw(i+j) += (j-n_degen/2) * shift - j+=1 - else - keep_on = .False. - exit - endif - enddo - Aw(i+n_degen-1) += (n_degen-1-n_degen/2) * shift - i+=n_degen - else - i+=1 - endif - enddo - n_degen_list = k - -end - -subroutine cancel_small_elmts(aw,n,shift) - implicit none - BEGIN_DOC - ! subroutines that splits the degeneracies of a matrix by adding a splitting of magnitude thr * n_degen/2 - ! - ! WARNING !! THE MATRIX IS ASSUMED TO BE PASSED WITH INCREASING DIAGONAL ELEMENTS - END_DOC - double precision,intent(inout) :: Aw(n,n) - double precision,intent(in) :: shift - integer, intent(in) :: n - integer :: i,j - do i = 1, n - do j = 1, n - if(dabs(Aw(j,i)).lt.shift)then - Aw(j,i) = 0.d0 - endif - enddo - enddo -end - -subroutine check_bi_ortho(reigvec,leigvec,n,S,accu_nd) - implicit none - integer, intent(in) :: n - double precision,intent(in) :: reigvec(n,n),leigvec(n,n) - double precision, intent(out) :: S(n,n),accu_nd - BEGIN_DOC -! retunrs the overlap matrix S = Leigvec^T Reigvec -! -! and the square root of the sum of the squared off-diagonal elements of S - END_DOC - integer :: i,j ! S = VL x VR call dgemm( 'T', 'N', n, n, n, 1.d0 & , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & , 0.d0, S, size(S, 1) ) + accu_nd = 0.d0 do i = 1, n do j = 1, n @@ -1213,3 +174,5 @@ subroutine check_bi_ortho(reigvec,leigvec,n,S,accu_nd) accu_nd = dsqrt(accu_nd) end + + diff --git a/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f b/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f index 4d4bc047..2c053ac8 100644 --- a/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f +++ b/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f @@ -273,60 +273,6 @@ end ! --- -subroutine lapack_diag_non_sym_right(n, A, WR, WI, VR) - - implicit none - - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - double precision, intent(out) :: WR(n), WI(n), VR(n,n) - - integer :: i, lda, ldvl, ldvr, LWORK, INFO - double precision, allocatable :: Atmp(:,:), WORK(:), VL(:,:) - - lda = n - ldvl = 1 - ldvr = n - - allocate( Atmp(n,n), VL(1,1) ) - Atmp(1:n,1:n) = A(1:n,1:n) - - allocate(WORK(1)) - LWORK = -1 - call dgeev('N', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO) - if(INFO.gt.0)then - print*,'dgeev failed !!',INFO - stop - endif - - LWORK = max(int(WORK(1)), 1) ! this is the optimal size of WORK - deallocate(WORK) - - allocate(WORK(LWORK)) - - ! Actual diagonalization - call dgeev('N', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO) - if(INFO.ne.0) then - print*,'dgeev failed !!', INFO - stop - endif - - deallocate(Atmp, WORK, VL) - -! print *, ' JOBL = F' -! print *, ' eigenvalues' -! do i = 1, n -! write(*, '(1000(F16.10,X))') WR(i), WI(i) -! enddo -! print *, ' right eigenvect' -! do i = 1, n -! write(*, '(1000(F16.10,X))') VR(:,i) -! enddo - -end - -! --- - subroutine non_hrmt_real_diag(n, A, leigvec, reigvec, n_real_eigv, eigval) BEGIN_DOC @@ -1780,70 +1726,6 @@ end ! --- -subroutine check_weighted_biorthog(n, m, W, Vl, Vr, thr_d, thr_nd, accu_d, accu_nd, S, stop_ifnot) - - implicit none - - integer, intent(in) :: n, m - double precision, intent(in) :: Vl(n,m), Vr(n,m), W(n,n) - double precision, intent(in) :: thr_d, thr_nd - logical, intent(in) :: stop_ifnot - double precision, intent(out) :: accu_d, accu_nd, S(m,m) - - integer :: i, j - double precision, allocatable :: SS(:,:), tmp(:,:) - - print *, ' check weighted bi-orthogonality' - - ! --- - - allocate(tmp(m,n)) - call dgemm( 'T', 'N', m, n, n, 1.d0 & - , Vl, size(Vl, 1), W, size(W, 1) & - , 0.d0, tmp, size(tmp, 1) ) - call dgemm( 'N', 'N', m, m, n, 1.d0 & - , tmp, size(tmp, 1), Vr, size(Vr, 1) & - , 0.d0, S, size(S, 1) ) - deallocate(tmp) - - !print *, ' overlap matrix:' - !do i = 1, m - ! write(*,'(1000(F16.10,X))') S(i,:) - !enddo - - accu_d = 0.d0 - accu_nd = 0.d0 - do i = 1, m - do j = 1, m - if(i==j) then - accu_d = accu_d + dabs(S(i,i)) - else - accu_nd = accu_nd + S(j,i) * S(j,i) - endif - enddo - enddo - accu_nd = dsqrt(accu_nd) - - print *, ' accu_nd = ', accu_nd - print *, ' accu_d = ', dabs(accu_d-dble(m))/dble(m) - - ! --- - - if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then - print *, ' non bi-orthogonal vectors !' - print *, ' accu_nd = ', accu_nd - print *, ' accu_d = ', dabs(accu_d-dble(m))/dble(m) - !print *, ' overlap matrix:' - !do i = 1, m - ! write(*,'(1000(F16.10,X))') S(i,:) - !enddo - stop - endif - -end - -! --- - subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_ifnot) implicit none diff --git a/plugins/local/non_hermit_dav/new_routines.irp.f b/plugins/local/non_hermit_dav/new_routines.irp.f deleted file mode 100644 index 8db044d3..00000000 --- a/plugins/local/non_hermit_dav/new_routines.irp.f +++ /dev/null @@ -1,670 +0,0 @@ -subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors - ! - ! of a non hermitian matrix A(n,n) - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" - ! - END_DOC - - implicit none - - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:) - - integer :: i, j, n_degen,k , iteration - double precision :: shift_current - double precision :: r,thr,accu_d, accu_nd - integer, allocatable :: iorder_origin(:),iorder(:) - double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:) - double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:) - double precision, allocatable :: im_part(:),re_part(:) - double precision :: accu,thr_cut, thr_norm=1d0 - - - thr_cut = 1.d-15 - print*,'Computing the left/right eigenvectors ...' - print*,'Using the degeneracy splitting algorithm' - ! initialization - shift_current = 1.d-15 - iteration = 0 - print*,'***** iteration = ',iteration - - - ! pre-processing the matrix :: sorting by diagonal elements - allocate(reigvec_tmp(n,n), leigvec_tmp(n,n)) - allocate(diag_elem(n),iorder_origin(n),A_save(n,n)) -! print*,'Aw' - do i = 1, n - iorder_origin(i) = i - diag_elem(i) = A(i,i) -! write(*,'(100(F16.10,X))')A(:,i) - enddo - call dsort(diag_elem, iorder_origin, n) - do i = 1, n - do j = 1, n - A_save(j,i) = A(iorder_origin(j),iorder_origin(i)) - enddo - enddo - - allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) - allocate(im_part(n),iorder(n)) - allocate( S(n,n) ) - - - Aw = A_save - call cancel_small_elmts(aw,n,thr_cut) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part, iorder, n) - n_real_eigv = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_real_eigv += 1 - else -! print*,'Found an imaginary component to eigenvalue' -! print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - if(n_real_eigv.ne.n)then - shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) - print*,'Largest imaginary part found in eigenvalues = ',im_part(1) - print*,'Splitting the degeneracies by ',shift_current - else - print*,'All eigenvalues are real !' - endif - - - do while(n_real_eigv.ne.n) - iteration += 1 - print*,'***** iteration = ',iteration - if(shift_current.gt.1.d-3)then - print*,'shift_current > 1.d-3 !!' - print*,'Your matrix intrinsically contains complex eigenvalues' - stop - endif - Aw = A_save - call cancel_small_elmts(Aw,n,thr_cut) - call split_matrix_degen(Aw,n,shift_current) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - n_real_eigv = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_real_eigv+= 1 - else -! print*,'Found an imaginary component to eigenvalue' -! print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - if(n_real_eigv.ne.n)then - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part, iorder, n) - shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) - print*,'Largest imaginary part found in eigenvalues = ',im_part(1) - print*,'Splitting the degeneracies by ',shift_current - else - print*,'All eigenvalues are real !' - endif - enddo - !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES - do i = 1, n - eigval(i) = WR(i) - iorder(i) = i - enddo - call dsort(eigval,iorder,n) - do i = 1, n -! print*,'eigval(i) = ',eigval(i) - reigvec_tmp(:,i) = VR(:,iorder(i)) - leigvec_tmp(:,i) = Vl(:,iorder(i)) - enddo - -!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY - ! check bi-orthogonality - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - print *, ' accu_nd bi-orthog = ', accu_nd - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print *, ' ' - print *, ' bi-orthogonality: not imposed yet' - print *, ' ' - print *, ' ' - print *, ' orthog between degen eigenvect' - print *, ' ' - double precision, allocatable :: S_nh_inv_half(:,:) - allocate(S_nh_inv_half(n,n)) - logical :: complex_root - deallocate(S_nh_inv_half) - call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp) - call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp) - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'New vectors not bi-orthonormals at ',accu_nd - call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S) - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'New vectors not bi-orthonormals at ',accu_nd - print*,'Must be a deep problem ...' - stop - endif - endif - endif - - !! EIGENVECTORS SORTED AND BI-ORTHONORMAL - do i = 1, n - do j = 1, n - VR(iorder_origin(j),i) = reigvec_tmp(j,i) - VL(iorder_origin(j),i) = leigvec_tmp(j,i) - enddo - enddo - - !! RECOMPUTING THE EIGENVALUES - eigval = 0.d0 - do i = 1, n - iorder(i) = i - accu = 0.d0 - do j = 1, n - accu += VL(j,i) * VR(j,i) - do k = 1, n - eigval(i) += VL(j,i) * A(j,k) * VR(k,i) - enddo - enddo - eigval(i) *= 1.d0/accu -! print*,'eigval(i) = ',eigval(i) - enddo - !! RESORT JUST TO BE SURE - call dsort(eigval, iorder, n) - do i = 1, n - do j = 1, n - reigvec(j,i) = VR(j,iorder(i)) - leigvec(j,i) = VL(j,iorder(i)) - enddo - enddo - print*,'Checking for final reigvec/leigvec' - shift_current = max(1.d-10,shift_current) - print*,'Thr for eigenvectors = ',shift_current - call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.) - call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - print *, ' accu_nd bi-orthog = ', accu_nd - - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog' - print*,'Eigenvectors are not bi orthonormal ..' - print*,'accu_nd = ',accu_nd - stop - endif - -end - - - -subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors - ! - ! of a non hermitian matrix A(n,n) - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" - ! - END_DOC - - implicit none - - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:) - - integer :: i, j, n_degen,k , iteration - double precision :: shift_current - double precision :: r,thr,accu_d, accu_nd - integer, allocatable :: iorder_origin(:),iorder(:) - double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:) - double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:) - double precision, allocatable :: im_part(:),re_part(:) - double precision :: accu,thr_cut, thr_norm=1.d0 - double precision, allocatable :: S_nh_inv_half(:,:) - logical :: complex_root - - - thr_cut = 1.d-15 - print*,'Computing the left/right eigenvectors ...' - print*,'Using the degeneracy splitting algorithm' - ! initialization - shift_current = 1.d-15 - iteration = 0 - print*,'***** iteration = ',iteration - - - ! pre-processing the matrix :: sorting by diagonal elements - allocate(reigvec_tmp(n,n), leigvec_tmp(n,n)) - allocate(diag_elem(n),iorder_origin(n),A_save(n,n)) -! print*,'Aw' - do i = 1, n - iorder_origin(i) = i - diag_elem(i) = A(i,i) -! write(*,'(100(F16.10,X))')A(:,i) - enddo - call dsort(diag_elem, iorder_origin, n) - do i = 1, n - do j = 1, n - A_save(j,i) = A(iorder_origin(j),iorder_origin(i)) - enddo - enddo - - allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) - allocate(im_part(n),iorder(n)) - allocate( S(n,n) ) - allocate(S_nh_inv_half(n,n)) - - - Aw = A_save - call cancel_small_elmts(aw,n,thr_cut) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part, iorder, n) - n_real_eigv = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_real_eigv += 1 - else -! print*,'Found an imaginary component to eigenvalue' -! print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - if(n_real_eigv.ne.n)then - shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) - print*,'Largest imaginary part found in eigenvalues = ',im_part(1) - print*,'Splitting the degeneracies by ',shift_current - else - print*,'All eigenvalues are real !' - endif - - - do while(n_real_eigv.ne.n) - iteration += 1 - print*,'***** iteration = ',iteration - if(shift_current.gt.1.d-3)then - print*,'shift_current > 1.d-3 !!' - print*,'Your matrix intrinsically contains complex eigenvalues' - stop - endif - Aw = A_save -! thr_cut = shift_current - call cancel_small_elmts(Aw,n,thr_cut) - call split_matrix_degen(Aw,n,shift_current) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - n_real_eigv = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_real_eigv+= 1 - else -! print*,'Found an imaginary component to eigenvalue' -! print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - if(n_real_eigv.ne.n)then - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part, iorder, n) - shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) - print*,'Largest imaginary part found in eigenvalues = ',im_part(1) - print*,'Splitting the degeneracies by ',shift_current - else - print*,'All eigenvalues are real !' - endif - enddo - !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES - do i = 1, n - eigval(i) = WR(i) - iorder(i) = i - enddo - call dsort(eigval,iorder,n) - do i = 1, n -! print*,'eigval(i) = ',eigval(i) - reigvec_tmp(:,i) = VR(:,iorder(i)) - leigvec_tmp(:,i) = Vl(:,iorder(i)) - enddo - -!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY - ! check bi-orthogonality - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - print *, ' accu_nd bi-orthog = ', accu_nd - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print *, ' ' - print *, ' bi-orthogonality: not imposed yet' - if(complex_root) then - print *, ' ' - print *, ' ' - print *, ' orthog between degen eigenvect' - print *, ' ' - ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right - call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp) ! orthogonalization of reigvec - call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp) ! orthogonalization of leigvec - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'New vectors not bi-orthonormals at ', accu_nd - call get_inv_half_nonsymmat_diago(S, n, S_nh_inv_half, complex_root) - if(complex_root)then - call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S) ! bi-orthonormalization using QR - else - print*,'S^{-1/2} exists !!' - call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization - endif - endif - else ! the matrix S^{-1/2} exists - print*,'S^{-1/2} exists !!' - call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization - endif - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'New vectors not bi-orthonormals at ',accu_nd - print*,'Must be a deep problem ...' - stop - endif - endif - - !! EIGENVECTORS SORTED AND BI-ORTHONORMAL - do i = 1, n - do j = 1, n - VR(iorder_origin(j),i) = reigvec_tmp(j,i) - VL(iorder_origin(j),i) = leigvec_tmp(j,i) - enddo - enddo - - !! RECOMPUTING THE EIGENVALUES - eigval = 0.d0 - do i = 1, n - iorder(i) = i - accu = 0.d0 - do j = 1, n - accu += VL(j,i) * VR(j,i) - do k = 1, n - eigval(i) += VL(j,i) * A(j,k) * VR(k,i) - enddo - enddo - eigval(i) *= 1.d0/accu -! print*,'eigval(i) = ',eigval(i) - enddo - !! RESORT JUST TO BE SURE - call dsort(eigval, iorder, n) - do i = 1, n - do j = 1, n - reigvec(j,i) = VR(j,iorder(i)) - leigvec(j,i) = VL(j,iorder(i)) - enddo - enddo - print*,'Checking for final reigvec/leigvec' - shift_current = max(1.d-10,shift_current) - print*,'Thr for eigenvectors = ',shift_current - call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.) - call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - print *, ' accu_nd bi-orthog = ', accu_nd - - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog' - print*,'Eigenvectors are not bi orthonormal ..' - print*,'accu_nd = ',accu_nd - stop - endif - -end - - -subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine returning the eigenvalues and left/right eigenvectors of the TC fock matrix - ! - END_DOC - - implicit none - - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:) - - integer :: i, j, n_degen,k , iteration - double precision :: shift_current - double precision :: r,thr,accu_d, accu_nd - integer, allocatable :: iorder_origin(:),iorder(:) - double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:) - double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:) - double precision, allocatable :: im_part(:),re_part(:) - double precision :: accu,thr_cut - double precision, allocatable :: S_nh_inv_half(:,:) - logical :: complex_root - double precision :: thr_norm=1d0 - - - thr_cut = 1.d-15 - print*,'Computing the left/right eigenvectors ...' - print*,'Using the degeneracy splitting algorithm' - ! initialization - shift_current = 1.d-15 - iteration = 0 - print*,'***** iteration = ',iteration - - - ! pre-processing the matrix :: sorting by diagonal elements - allocate(reigvec_tmp(n,n), leigvec_tmp(n,n)) - allocate(diag_elem(n),iorder_origin(n),A_save(n,n)) -! print*,'Aw' - do i = 1, n - iorder_origin(i) = i - diag_elem(i) = A(i,i) -! write(*,'(100(F16.10,X))')A(:,i) - enddo - call dsort(diag_elem, iorder_origin, n) - do i = 1, n - do j = 1, n - A_save(j,i) = A(iorder_origin(j),iorder_origin(i)) - enddo - enddo - - allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) - allocate(im_part(n),iorder(n)) - allocate( S(n,n) ) - allocate(S_nh_inv_half(n,n)) - - - Aw = A_save - call cancel_small_elmts(aw,n,thr_cut) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part, iorder, n) - n_real_eigv = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_real_eigv += 1 - else -! print*,'Found an imaginary component to eigenvalue' -! print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - if(n_real_eigv.ne.n)then - shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) - print*,'Largest imaginary part found in eigenvalues = ',im_part(1) - print*,'Splitting the degeneracies by ',shift_current - else - print*,'All eigenvalues are real !' - endif - - - do while(n_real_eigv.ne.n) - iteration += 1 - print*,'***** iteration = ',iteration - if(shift_current.gt.1.d-3)then - print*,'shift_current > 1.d-3 !!' - print*,'Your matrix intrinsically contains complex eigenvalues' - stop - endif - Aw = A_save -! thr_cut = shift_current - call cancel_small_elmts(Aw,n,thr_cut) - call split_matrix_degen(Aw,n,shift_current) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - n_real_eigv = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_real_eigv+= 1 - else -! print*,'Found an imaginary component to eigenvalue' -! print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - if(n_real_eigv.ne.n)then - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part, iorder, n) - shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) - print*,'Largest imaginary part found in eigenvalues = ',im_part(1) - print*,'Splitting the degeneracies by ',shift_current - else - print*,'All eigenvalues are real !' - endif - enddo - !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES - do i = 1, n - eigval(i) = WR(i) - iorder(i) = i - enddo - call dsort(eigval,iorder,n) - do i = 1, n -! print*,'eigval(i) = ',eigval(i) - reigvec_tmp(:,i) = VR(:,iorder(i)) - leigvec_tmp(:,i) = Vl(:,iorder(i)) - enddo - -!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY - ! check bi-orthogonality - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - print *, ' accu_nd bi-orthog = ', accu_nd - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print *, ' ' - print *, ' bi-orthogonality: not imposed yet' - print *, ' ' - print *, ' ' - print *, ' Using impose_unique_biorthog_degen_eigvec' - print *, ' ' - ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right - call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, leigvec_tmp, reigvec_tmp) - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - print*,'accu_nd = ',accu_nd - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'New vectors not bi-orthonormals at ',accu_nd - call get_inv_half_nonsymmat_diago(S, n, S_nh_inv_half,complex_root) - if(complex_root)then - print*,'S^{-1/2} does not exits, using QR bi-orthogonalization' - call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S) ! bi-orthonormalization using QR - else - print*,'S^{-1/2} exists !!' - call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization - endif - endif - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'New vectors not bi-orthonormals at ',accu_nd - print*,'Must be a deep problem ...' - stop - endif - endif - - !! EIGENVECTORS SORTED AND BI-ORTHONORMAL - do i = 1, n - do j = 1, n - VR(iorder_origin(j),i) = reigvec_tmp(j,i) - VL(iorder_origin(j),i) = leigvec_tmp(j,i) - enddo - enddo - - !! RECOMPUTING THE EIGENVALUES - eigval = 0.d0 - do i = 1, n - iorder(i) = i - accu = 0.d0 - do j = 1, n - accu += VL(j,i) * VR(j,i) - do k = 1, n - eigval(i) += VL(j,i) * A(j,k) * VR(k,i) - enddo - enddo - eigval(i) *= 1.d0/accu -! print*,'eigval(i) = ',eigval(i) - enddo - !! RESORT JUST TO BE SURE - call dsort(eigval, iorder, n) - do i = 1, n - do j = 1, n - reigvec(j,i) = VR(j,iorder(i)) - leigvec(j,i) = VL(j,iorder(i)) - enddo - enddo - print*,'Checking for final reigvec/leigvec' - shift_current = max(1.d-10,shift_current) - print*,'Thr for eigenvectors = ',shift_current - call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.) - call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - print *, ' accu_nd bi-orthog = ', accu_nd - - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog' - print*,'Eigenvectors are not bi orthonormal ..' - print*,'accu_nd = ',accu_nd - stop - endif - -end - - diff --git a/plugins/local/ortho_three_e_ints/mu_j_ints_usual_mos.irp.f b/plugins/local/ortho_three_e_ints/mu_j_ints_usual_mos.irp.f index a3f1b6ef..cb7cdb22 100644 --- a/plugins/local/ortho_three_e_ints/mu_j_ints_usual_mos.irp.f +++ b/plugins/local/ortho_three_e_ints/mu_j_ints_usual_mos.irp.f @@ -183,11 +183,3 @@ BEGIN_PROVIDER [ double precision, x_W_ij_erf_rk, ( n_points_final_grid,3,mo_num END_PROVIDER -BEGIN_PROVIDER [ double precision, sqrt_weight_at_r, (n_points_final_grid)] - implicit none - integer :: ipoint - do ipoint = 1, n_points_final_grid - sqrt_weight_at_r(ipoint) = dsqrt(final_weight_at_r_vector(ipoint)) - enddo -END_PROVIDER - diff --git a/plugins/local/tc_bi_ortho/EZFIO.cfg b/plugins/local/tc_bi_ortho/EZFIO.cfg index a34d2134..67c780d7 100644 --- a/plugins/local/tc_bi_ortho/EZFIO.cfg +++ b/plugins/local/tc_bi_ortho/EZFIO.cfg @@ -9,3 +9,14 @@ interface: ezfio doc: Coefficients for the right wave function type: double precision size: (determinants.n_det,determinants.n_states) + +[tc_gs_energy] +type: Threshold +doc: TC GS Energy +interface: ezfio + +[tc_gs_var] +type: Threshold +doc: TC GS VAR +interface: ezfio + diff --git a/plugins/local/tc_bi_ortho/print_tc_energy.irp.f b/plugins/local/tc_bi_ortho/print_tc_energy.irp.f index ef38cbcc..1fa0c6d9 100644 --- a/plugins/local/tc_bi_ortho/print_tc_energy.irp.f +++ b/plugins/local/tc_bi_ortho/print_tc_energy.irp.f @@ -6,18 +6,9 @@ program print_tc_energy implicit none - print *, 'Hello world' - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - read_wf = .True. touch read_wf - PROVIDE j2e_type PROVIDE j1e_type PROVIDE env_type @@ -26,6 +17,27 @@ program print_tc_energy print *, ' j1e_type = ', j1e_type print *, ' env_type = ', env_type + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + if(tc_integ_type .eq. "numeric") then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + endif + call write_tc_energy() end diff --git a/plugins/local/tc_bi_ortho/print_tc_var.irp.f b/plugins/local/tc_bi_ortho/print_tc_var.irp.f index bec34f18..6743cd11 100644 --- a/plugins/local/tc_bi_ortho/print_tc_var.irp.f +++ b/plugins/local/tc_bi_ortho/print_tc_var.irp.f @@ -6,7 +6,8 @@ program print_tc_var implicit none - print *, 'Hello world' + print *, ' TC VAR is available only for HF REF WF' + print *, ' DO NOT FORGET TO RUN A CISD CALCULATION BEF' my_grid_becke = .True. PROVIDE tc_grid1_a tc_grid1_r @@ -17,7 +18,7 @@ program print_tc_var read_wf = .True. touch read_wf - call write_tc_var() + call write_tc_gs_var_HF() end diff --git a/plugins/local/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f b/plugins/local/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f index efa4aa2c..ac90f737 100644 --- a/plugins/local/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f +++ b/plugins/local/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f @@ -38,9 +38,9 @@ subroutine main() call ezfio_has_cisd_energy(exists) if(.not.exists) then - call ezfio_has_tc_scf_bitc_energy(exists) + call ezfio_has_tc_scf_tcscf_energy(exists) if(exists) then - call ezfio_get_tc_scf_bitc_energy(e_ref) + call ezfio_get_tc_scf_tcscf_energy(e_ref) endif else @@ -59,7 +59,7 @@ subroutine main() close(iunit) -end subroutine main +end ! -- @@ -89,7 +89,7 @@ subroutine write_lr_spindeterminants() call ezfio_set_spindeterminants_psi_left_coef_matrix_values(buffer) deallocate(buffer) -end subroutine write_lr_spindeterminants +end ! --- diff --git a/plugins/local/tc_bi_ortho/tc_utils.irp.f b/plugins/local/tc_bi_ortho/tc_utils.irp.f index 53fe5884..43a6865e 100644 --- a/plugins/local/tc_bi_ortho/tc_utils.irp.f +++ b/plugins/local/tc_bi_ortho/tc_utils.irp.f @@ -2,12 +2,67 @@ subroutine write_tc_energy() implicit none - integer :: i, j, k - double precision :: hmono, htwoe, hthree, htot - double precision :: E_TC, O_TC - double precision :: E_1e, E_2e, E_3e + integer :: i, j, k + double precision :: hmono, htwoe, hthree, htot + double precision :: E_TC, O_TC + double precision :: E_1e, E_2e, E_3e + double precision, allocatable :: E_TC_tmp(:), E_1e_tmp(:), E_2e_tmp(:), E_3e_tmp(:) - do k = 1, n_states + ! GS + ! --- + + allocate(E_TC_tmp(N_det), E_1e_tmp(N_det), E_2e_tmp(N_det), E_3e_tmp(N_det)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE(i, j, hmono, htwoe, hthree, htot) & + !$OMP SHARED(N_det, psi_det, N_int, psi_l_coef_bi_ortho, psi_r_coef_bi_ortho, & + !$OMP E_TC_tmp, E_1e_tmp, E_2e_tmp, E_3e_tmp) + !$OMP DO + do i = 1, N_det + E_TC_tmp(i) = 0.d0 + E_1e_tmp(i) = 0.d0 + E_2e_tmp(i) = 0.d0 + E_3e_tmp(i) = 0.d0 + do j = 1, N_det + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + E_TC_tmp(i) = E_TC_tmp(i) + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * htot + E_1e_tmp(i) = E_1e_tmp(i) + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * hmono + E_2e_tmp(i) = E_2e_tmp(i) + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * htwoe + E_3e_tmp(i) = E_3e_tmp(i) + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * hthree + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + E_1e = 0.d0 + E_2e = 0.d0 + E_3e = 0.d0 + E_TC = 0.d0 + O_TC = 0.d0 + do i = 1, N_det + E_1e = E_1e + E_1e_tmp(i) + E_2e = E_2e + E_2e_tmp(i) + E_3e = E_3e + E_3e_tmp(i) + E_TC = E_TC + E_TC_tmp(i) + O_TC = O_TC + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(i,1) + enddo + + print *, ' state :', 1 + print *, " E_TC = ", E_TC / O_TC + print *, " E_1e = ", E_1e / O_TC + print *, " E_2e = ", E_2e / O_TC + print *, " E_3e = ", E_3e / O_TC + print *, " O_TC = ", O_TC + + call ezfio_set_tc_bi_ortho_tc_gs_energy(E_TC/O_TC) + + ! --- + + ! ES + ! --- + + do k = 2, n_states E_TC = 0.d0 E_1e = 0.d0 @@ -37,6 +92,8 @@ subroutine write_tc_energy() enddo + deallocate(E_TC_tmp, E_1e_tmp, E_2e_tmp, E_3e_tmp) + end ! --- @@ -66,3 +123,25 @@ end ! --- +subroutine write_tc_gs_var_HF() + + implicit none + integer :: i, j, k + double precision :: hmono, htwoe, hthree, htot + double precision :: SIGMA_TC + + SIGMA_TC = 0.d0 + do j = 2, N_det + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot) + SIGMA_TC = SIGMA_TC + htot * htot + enddo + + print *, " SIGMA_TC = ", SIGMA_TC + + call ezfio_set_tc_bi_ortho_tc_gs_var(SIGMA_TC) + +end + +! --- + + diff --git a/plugins/local/tc_scf/EZFIO.cfg b/plugins/local/tc_scf/EZFIO.cfg index 3dfa9a71..510c777c 100644 --- a/plugins/local/tc_scf/EZFIO.cfg +++ b/plugins/local/tc_scf/EZFIO.cfg @@ -1,6 +1,6 @@ -[bitc_energy] +[tcscf_energy] type: Threshold -doc: Energy bi-tc HF +doc: TC-SCF ENERGY interface: ezfio [converged_tcscf] diff --git a/plugins/local/tc_scf/combine_lr_tcscf.irp.f b/plugins/local/tc_scf/combine_lr_tcscf.irp.f deleted file mode 100644 index a22614ba..00000000 --- a/plugins/local/tc_scf/combine_lr_tcscf.irp.f +++ /dev/null @@ -1,75 +0,0 @@ - -! --- - -program combine_lr_tcscf - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - bi_ortho = .True. - touch bi_ortho - - call comb_orbitals() - -end - -! --- - -subroutine comb_orbitals() - - implicit none - integer :: i, m, n, nn, mm - double precision :: accu_d, accu_nd - double precision, allocatable :: R(:,:), L(:,:), Rnew(:,:), tmp(:,:), S(:,:) - - n = ao_num - m = mo_num - nn = elec_alpha_num - mm = m - nn - - allocate(L(n,m), R(n,m), Rnew(n,m), S(m,m)) - L = mo_l_coef - R = mo_r_coef - - call check_weighted_biorthog(n, m, ao_overlap, L, R, accu_d, accu_nd, S, .true.) - - allocate(tmp(n,nn)) - do i = 1, nn - tmp(1:n,i) = R(1:n,i) - enddo - call impose_weighted_orthog_svd(n, nn, ao_overlap, tmp) - do i = 1, nn - Rnew(1:n,i) = tmp(1:n,i) - enddo - deallocate(tmp) - - allocate(tmp(n,mm)) - do i = 1, mm - tmp(1:n,i) = L(1:n,i+nn) - enddo - call impose_weighted_orthog_svd(n, mm, ao_overlap, tmp) - do i = 1, mm - Rnew(1:n,i+nn) = tmp(1:n,i) - enddo - deallocate(tmp) - - call check_weighted_biorthog(n, m, ao_overlap, Rnew, Rnew, accu_d, accu_nd, S, .true.) - - mo_r_coef = Rnew - call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) - - deallocate(L, R, Rnew, S) - -end subroutine comb_orbitals - -! --- - diff --git a/plugins/local/tc_scf/diago_vartcfock.irp.f b/plugins/local/tc_scf/diago_vartcfock.irp.f deleted file mode 100644 index 0c881dcb..00000000 --- a/plugins/local/tc_scf/diago_vartcfock.irp.f +++ /dev/null @@ -1,96 +0,0 @@ - -! --- - -BEGIN_PROVIDER [ double precision, fock_vartc_eigvec_mo, (mo_num, mo_num)] - - implicit none - - integer :: i, j - integer :: liwork, lwork, n, info - integer, allocatable :: iwork(:) - double precision, allocatable :: work(:), F(:,:), F_save(:,:) - double precision, allocatable :: diag(:) - - PROVIDE mo_r_coef - PROVIDE Fock_matrix_vartc_mo_tot - - allocate( F(mo_num,mo_num), F_save(mo_num,mo_num) ) - allocate (diag(mo_num) ) - - do j = 1, mo_num - do i = 1, mo_num - F(i,j) = Fock_matrix_vartc_mo_tot(i,j) - enddo - enddo - - ! Insert level shift here - do i = elec_beta_num+1, elec_alpha_num - F(i,i) += 0.5d0 * level_shift_tcscf - enddo - do i = elec_alpha_num+1, mo_num - F(i,i) += level_shift_tcscf - enddo - - n = mo_num - lwork = 1+6*n + 2*n*n - liwork = 3 + 5*n - - allocate(work(lwork)) - allocate(iwork(liwork) ) - - lwork = -1 - liwork = -1 - - F_save = F - call dsyevd('V', 'U', mo_num, F, size(F, 1), diag, work, lwork, iwork, liwork, info) - - if (info /= 0) then - print *, irp_here//' DSYEVD failed : ', info - stop 1 - endif - lwork = int(work(1)) - liwork = iwork(1) - deallocate(iwork) - deallocate(work) - - allocate(work(lwork)) - allocate(iwork(liwork) ) - call dsyevd('V', 'U', mo_num, F, size(F, 1), diag, work, lwork, iwork, liwork, info) - deallocate(iwork) - - if (info /= 0) then - F = F_save - call dsyev('V', 'L', mo_num, F, size(F, 1), diag, work, lwork, info) - - if (info /= 0) then - print *, irp_here//' DSYEV failed : ', info - stop 1 - endif - endif - - do i = 1, mo_num - do j = 1, mo_num - fock_vartc_eigvec_mo(j,i) = F(j,i) - enddo - enddo - - deallocate(work, F, F_save, diag) - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, fock_vartc_eigvec_ao, (ao_num, mo_num)] - - implicit none - - PROVIDE mo_r_coef - - call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0 & - , mo_r_coef, size(mo_r_coef, 1), fock_vartc_eigvec_mo, size(fock_vartc_eigvec_mo, 1) & - , 0.d0, fock_vartc_eigvec_ao, size(fock_vartc_eigvec_ao, 1)) - -END_PROVIDER - -! --- - diff --git a/plugins/local/tc_scf/diis_tcscf.irp.f b/plugins/local/tc_scf/diis_tcscf.irp.f index 5d7d6b2e..ccc8eb15 100644 --- a/plugins/local/tc_scf/diis_tcscf.irp.f +++ b/plugins/local/tc_scf/diis_tcscf.irp.f @@ -91,28 +91,14 @@ BEGIN_PROVIDER [double precision, FQS_SQF_ao, (ao_num, ao_num)] double precision, allocatable :: tmp(:,:) double precision, allocatable :: F(:,:) - !print *, ' Providing FQS_SQF_ao ...' - !call wall_time(t0) + PROVIDE Fock_matrix_tc_ao_tot allocate(F(ao_num,ao_num)) - if(var_tc) then - - do i = 1, ao_num - do j = 1, ao_num - F(j,i) = Fock_matrix_vartc_ao_tot(j,i) - enddo + do i = 1, ao_num + do j = 1, ao_num + F(j,i) = Fock_matrix_tc_ao_tot(j,i) enddo - - else - - PROVIDE Fock_matrix_tc_ao_tot - do i = 1, ao_num - do j = 1, ao_num - F(j,i) = Fock_matrix_tc_ao_tot(j,i) - enddo - enddo - - endif + enddo allocate(tmp(ao_num,ao_num)) @@ -140,9 +126,6 @@ BEGIN_PROVIDER [double precision, FQS_SQF_ao, (ao_num, ao_num)] deallocate(tmp) deallocate(F) - !call wall_time(t1) - !print *, ' Wall time for FQS_SQF_ao =', t1-t0 - END_PROVIDER ! --- @@ -152,61 +135,13 @@ BEGIN_PROVIDER [double precision, FQS_SQF_mo, (mo_num, mo_num)] implicit none double precision :: t0, t1 - !print*, ' Providing FQS_SQF_mo ...' - !call wall_time(t0) - PROVIDE mo_r_coef mo_l_coef PROVIDE FQS_SQF_ao call ao_to_mo_bi_ortho( FQS_SQF_ao, size(FQS_SQF_ao, 1) & , FQS_SQF_mo, size(FQS_SQF_mo, 1) ) - !call wall_time(t1) - !print*, ' Wall time for FQS_SQF_mo =', t1-t0 - END_PROVIDER ! --- -! BEGIN_PROVIDER [ double precision, eigenval_Fock_tc_ao, (ao_num) ] -!&BEGIN_PROVIDER [ double precision, eigenvec_Fock_tc_ao, (ao_num,ao_num) ] -! -! BEGIN_DOC -! ! -! ! Eigenvalues and eigenvectors of the Fock matrix over the ao basis -! ! -! ! F' = X.T x F x X where X = ao_overlap^(-1/2) -! ! -! ! F' x Cr' = Cr' x E ==> F Cr = Cr x E with Cr = X x Cr' -! ! F'.T x Cl' = Cl' x E ==> F.T Cl = Cl x E with Cl = X x Cl' -! ! -! END_DOC -! -! implicit none -! double precision, allocatable :: tmp1(:,:), tmp2(:,:) -! -! ! --- -! ! Fock matrix in orthogonal basis: F' = X.T x F x X -! -! allocate(tmp1(ao_num,ao_num)) -! call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0 & -! , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), S_half_inv, size(S_half_inv, 1) & -! , 0.d0, tmp1, size(tmp1, 1) ) -! -! allocate(tmp2(ao_num,ao_num)) -! call dgemm( 'T', 'N', ao_num, ao_num, ao_num, 1.d0 & -! , S_half_inv, size(S_half_inv, 1), tmp1, size(tmp1, 1) & -! , 0.d0, tmp2, size(tmp2, 1) ) -! -! ! --- -! -! ! Diagonalize F' to obtain eigenvectors in orthogonal basis C' and eigenvalues -! ! TODO -! -! ! Back-transform eigenvectors: C =X.C' -! -!END_PROVIDER - -! --- - -~ diff --git a/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f b/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f deleted file mode 100644 index 8fd5e5b6..00000000 --- a/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f +++ /dev/null @@ -1,299 +0,0 @@ - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] - - implicit none - integer :: a, b, i, j, ipoint - double precision :: ti, tf - double precision :: loc_1, loc_2, loc_3 - double precision, allocatable :: Okappa(:), Jkappa(:,:) - double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) - double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:), tmp_22(:,:,:) - double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) - - PROVIDE mo_l_coef mo_r_coef - - !print *, ' PROVIDING fock_3e_uhf_mo_cs ...' - !call wall_time(ti) - - ! --- - - allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) - Jkappa = 0.d0 - Okappa = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa) - - allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) - tmp_omp_d2 = 0.d0 - tmp_omp_d1 = 0.d0 - - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) - tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) - Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) - Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) - Okappa(ipoint) += tmp_omp_d1(ipoint) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2, tmp_omp_d1) - - !$OMP END PARALLEL - - ! --- - - allocate(tmp_1(n_points_final_grid,4)) - - do ipoint = 1, n_points_final_grid - loc_1 = 2.d0 * Okappa(ipoint) - tmp_1(ipoint,1) = loc_1 * Jkappa(ipoint,1) - tmp_1(ipoint,2) = loc_1 * Jkappa(ipoint,2) - tmp_1(ipoint,3) = loc_1 * Jkappa(ipoint,3) - tmp_1(ipoint,4) = Okappa(ipoint) - enddo - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, loc_1, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_1) - - allocate(tmp_omp_d2(n_points_final_grid,3)) - tmp_omp_d2 = 0.d0 - - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - - tmp_omp_d2(ipoint,1) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) - tmp_omp_d2(ipoint,2) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) - tmp_omp_d2(ipoint,3) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) - tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) - tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2) - !$OMP END PARALLEL - - ! --- - - if(tc_save_mem) then - - allocate(tmp_22(n_points_final_grid,4,mo_num)) - do a = 1, mo_num - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, a, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp_22) - !$OMP DO - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_22(ipoint,1,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) - tmp_22(ipoint,2,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) - tmp_22(ipoint,3,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) - enddo - tmp_22(:,4,b) = 0.d0 - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_22(ipoint,4,b) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call dgemv( 'T', 4*n_points_final_grid, mo_num, -2.d0 & - , tmp_22(1,1,1), size(tmp_22, 1) * size(tmp_22, 2) & - , tmp_1(1,1), 1 & - , 0.d0, fock_3e_uhf_mo_cs(1,a), 1) - enddo - deallocate(tmp_22) - - else - - allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b, i) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) - tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) - tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) - enddo - tmp_2(:,4,b,a) = 0.d0 - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 & - , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & - , tmp_1(1,1), 1 & - , 0.d0, fock_3e_uhf_mo_cs(1,1), 1) - deallocate(tmp_2) - - endif - - deallocate(tmp_1) - - ! --- - - allocate(tmp_3(n_points_final_grid,5,mo_num), tmp_4(n_points_final_grid,5,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, Jkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - tmp_3(:,:,b) = 0.d0 - tmp_4(:,:,b) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - - tmp_4(ipoint,1,b) = -2.d0 * mos_r_in_r_array_transp(ipoint,b) * ( Jkappa(ipoint,1) * Jkappa(ipoint,1) & - + Jkappa(ipoint,2) * Jkappa(ipoint,2) & - + Jkappa(ipoint,3) * Jkappa(ipoint,3) ) - tmp_4(ipoint,5,b) = mos_r_in_r_array_transp(ipoint,b) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP Jkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_2 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) - tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) - tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - tmp_3(ipoint,5,b) += 2.d0 * loc_1 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) - - tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - tmp_4(ipoint,1,b) += 2.d0 * loc_2 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) - loc_2 = mos_r_in_r_array_transp(ipoint,b) - loc_3 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,5,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) - - tmp_4(ipoint,1,b) += ( loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) & - - loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemm( 'T', 'N', mo_num, mo_num, 5*n_points_final_grid, 1.d0 & - , tmp_3(1,1,1), 5*n_points_final_grid & - , tmp_4(1,1,1), 5*n_points_final_grid & - , 1.d0, fock_3e_uhf_mo_cs(1,1), mo_num) - - deallocate(tmp_3, tmp_4) - deallocate(Jkappa, Okappa) - - ! --- - - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_mo_cs =', (tf - ti) / 60.d0 - -END_PROVIDER - -! --- - diff --git a/plugins/local/tc_scf/fock_3e_bi_ortho_os.irp.f b/plugins/local/tc_scf/fock_3e_bi_ortho_os.irp.f deleted file mode 100644 index 4bbce720..00000000 --- a/plugins/local/tc_scf/fock_3e_bi_ortho_os.irp.f +++ /dev/null @@ -1,536 +0,0 @@ - -! --- - - BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a_os, (mo_num, mo_num)] -&BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b_os, (mo_num, mo_num)] - - BEGIN_DOC - ! - ! Open Shell part of the Fock matrix from three-electron terms - ! - ! WARNING :: non hermitian if bi-ortho MOS used - ! - END_DOC - - implicit none - integer :: a, b, i, j, ipoint - double precision :: loc_1, loc_2, loc_3, loc_4 - double precision :: ti, tf - double precision, allocatable :: Okappa(:), Jkappa(:,:), Obarkappa(:), Jbarkappa(:,:) - double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) - double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) - double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) - - PROVIDE mo_l_coef mo_r_coef - - !print *, ' Providing fock_3e_uhf_mo_a_os and fock_3e_uhf_mo_b_os ...' - !call wall_time(ti) - - ! --- - - allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) - allocate(Jbarkappa(n_points_final_grid,3), Obarkappa(n_points_final_grid)) - Jkappa = 0.d0 - Okappa = 0.d0 - Jbarkappa = 0.d0 - Obarkappa = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa, Obarkappa, Jbarkappa) - - allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) - - tmp_omp_d2 = 0.d0 - tmp_omp_d1 = 0.d0 - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) - tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) - Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) - Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) - Okappa(ipoint) += tmp_omp_d1(ipoint) - enddo - !$OMP END CRITICAL - - tmp_omp_d2 = 0.d0 - tmp_omp_d1 = 0.d0 - !$OMP DO - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) - tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - Jbarkappa(ipoint,1) += tmp_omp_d2(ipoint,1) - Jbarkappa(ipoint,2) += tmp_omp_d2(ipoint,2) - Jbarkappa(ipoint,3) += tmp_omp_d2(ipoint,3) - Obarkappa(ipoint) += tmp_omp_d1(ipoint) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2, tmp_omp_d1) - !$OMP END PARALLEL - - ! --- - - allocate(tmp_1(n_points_final_grid,4)) - - do ipoint = 1, n_points_final_grid - - loc_1 = -2.d0 * Okappa (ipoint) - loc_2 = -2.d0 * Obarkappa(ipoint) - loc_3 = Obarkappa(ipoint) - - tmp_1(ipoint,1) = (loc_1 - loc_3) * Jbarkappa(ipoint,1) + loc_2 * Jkappa(ipoint,1) - tmp_1(ipoint,2) = (loc_1 - loc_3) * Jbarkappa(ipoint,2) + loc_2 * Jkappa(ipoint,2) - tmp_1(ipoint,3) = (loc_1 - loc_3) * Jbarkappa(ipoint,3) + loc_2 * Jkappa(ipoint,3) - - tmp_1(ipoint,4) = Obarkappa(ipoint) - enddo - - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, loc_1, loc_2, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_1) - - allocate(tmp_omp_d2(n_points_final_grid,3)) - - tmp_omp_d2 = 0.d0 - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - loc_2 = mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,1,j,i) - tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,2,j,i) - tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) - tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) - tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) - enddo - !$OMP END CRITICAL - - tmp_omp_d2 = 0.d0 - !$OMP DO COLLAPSE(2) - do i = elec_beta_num+1, elec_alpha_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - - tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) - tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) - tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) - enddo - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) - tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) - tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2) - !$OMP END PARALLEL - - ! --- - - allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) - tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) - tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b, i) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - - tmp_2(:,4,b,a) = 0.d0 - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,4,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, 1.d0 & - , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & - , tmp_1(1,1), 1 & - , 0.d0, fock_3e_uhf_mo_b_os(1,1), 1) - - deallocate(tmp_1, tmp_2) - - ! --- - - allocate(tmp_3(n_points_final_grid,2,mo_num), tmp_4(n_points_final_grid,2,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - tmp_3(:,:,b) = 0.d0 - tmp_4(:,:,b) = 0.d0 - do ipoint = 1, n_points_final_grid - - tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - - loc_1 = -2.0d0 * mos_r_in_r_array_transp(ipoint,b) - - tmp_4(ipoint,1,b) = loc_1 * ( Jbarkappa(ipoint,1) * (Jkappa(ipoint,1) + 0.25d0 * Jbarkappa(ipoint,1)) & - + Jbarkappa(ipoint,2) * (Jkappa(ipoint,2) + 0.25d0 * Jbarkappa(ipoint,2)) & - + Jbarkappa(ipoint,3) * (Jkappa(ipoint,3) + 0.25d0 * Jbarkappa(ipoint,3)) ) - - tmp_4(ipoint,2,b) = mos_r_in_r_array_transp(ipoint,b) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_2 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,2,b) += loc_1 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) - - tmp_4(ipoint,1,b) += loc_2 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - do i = 1, elec_beta_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_2 = mos_r_in_r_array_transp(ipoint,b) - - tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) - enddo - enddo - enddo - - do i = elec_beta_num+1, elec_alpha_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) - - tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemm( 'T', 'N', mo_num, mo_num, 2*n_points_final_grid, 1.d0 & - , tmp_3(1,1,1), 2*n_points_final_grid & - , tmp_4(1,1,1), 2*n_points_final_grid & - , 1.d0, fock_3e_uhf_mo_b_os(1,1), mo_num) - - deallocate(tmp_3, tmp_4) - - - - - ! --- - - fock_3e_uhf_mo_a_os = fock_3e_uhf_mo_b_os - - allocate(tmp_1(n_points_final_grid,1)) - - do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) = Obarkappa(ipoint) + 2.d0 * Okappa(ipoint) - enddo - - allocate(tmp_2(n_points_final_grid,1,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b, i) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - - tmp_2(:,1,b,a) = 0.d0 - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,1,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemv( 'T', n_points_final_grid, mo_num*mo_num, 1.d0 & - , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & - , tmp_1(1,1), 1 & - , 1.d0, fock_3e_uhf_mo_a_os(1,1), 1) - - deallocate(tmp_1, tmp_2) - - ! --- - - allocate(tmp_3(n_points_final_grid,8,mo_num), tmp_4(n_points_final_grid,8,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - tmp_3(:,:,b) = 0.d0 - tmp_4(:,:,b) = 0.d0 - do ipoint = 1, n_points_final_grid - - tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - - tmp_4(ipoint,8,b) = mos_r_in_r_array_transp(ipoint,b) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_2 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) - tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) - tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - - tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - enddo - enddo - - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_3 = 2.d0 * loc_1 - loc_2 = mos_r_in_r_array_transp(ipoint,i) - loc_4 = 2.d0 * loc_2 - - tmp_3(ipoint,5,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) - tmp_3(ipoint,6,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) - tmp_3(ipoint,7,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - - tmp_3(ipoint,8,b) += loc_3 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) - - tmp_4(ipoint,1,b) += loc_4 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - - tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - - tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - - do i = 1, elec_beta_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) - loc_2 = mos_r_in_r_array_transp(ipoint,b) - loc_3 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) - - tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_3 = mos_r_in_r_array_transp(ipoint,j) - - tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,b,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,b,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) - - tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,j,i) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + int2_grad1_u12_bimo_t(ipoint,2,j,i) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + int2_grad1_u12_bimo_t(ipoint,3,j,i) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - enddo - enddo - enddo - - do i = elec_beta_num+1, elec_alpha_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) - loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) - loc_3 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) - - tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemm( 'T', 'N', mo_num, mo_num, 8*n_points_final_grid, 1.d0 & - , tmp_3(1,1,1), 8*n_points_final_grid & - , tmp_4(1,1,1), 8*n_points_final_grid & - , 1.d0, fock_3e_uhf_mo_a_os(1,1), mo_num) - - deallocate(tmp_3, tmp_4) - deallocate(Jkappa, Okappa) - - !call wall_time(tf) - !print *, ' Wall time for fock_3e_uhf_mo_a_os and fock_3e_uhf_mo_b_os =', tf - ti - -END_PROVIDER - -! --- - diff --git a/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f deleted file mode 100644 index 47ee5b48..00000000 --- a/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ /dev/null @@ -1,77 +0,0 @@ - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] - - BEGIN_DOC - ! - ! Fock matrix alpha from three-electron terms - ! - ! WARNING :: non hermitian if bi-ortho MOS used - ! - END_DOC - - implicit none - double precision :: ti, tf - - PROVIDE mo_l_coef mo_r_coef - - !print *, ' Providing fock_3e_uhf_mo_a ...' - !call wall_time(ti) - - ! CLOSED-SHELL PART - PROVIDE fock_3e_uhf_mo_cs - fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs - - if(elec_alpha_num .ne. elec_beta_num) then - - ! OPEN-SHELL PART - PROVIDE fock_3e_uhf_mo_a_os - - fock_3e_uhf_mo_a += fock_3e_uhf_mo_a_os - endif - - !call wall_time(tf) - !print *, ' Wall time for fock_3e_uhf_mo_a (min) =', (tf - ti)/60.d0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] - - BEGIN_DOC - ! - ! Fock matrix beta from three-electron terms - ! - ! WARNING :: non hermitian if bi-ortho MOS used - ! - END_DOC - - implicit none - double precision :: ti, tf - - PROVIDE mo_l_coef mo_r_coef - - !print *, ' Providing and fock_3e_uhf_mo_b ...' - !call wall_time(ti) - - ! CLOSED-SHELL PART - PROVIDE fock_3e_uhf_mo_cs - fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs - - if(elec_alpha_num .ne. elec_beta_num) then - - ! OPEN-SHELL PART - PROVIDE fock_3e_uhf_mo_b_os - - fock_3e_uhf_mo_b += fock_3e_uhf_mo_b_os - endif - - !call wall_time(tf) - !print *, ' Wall time for fock_3e_uhf_mo_b =', tf - ti - -END_PROVIDER - -! --- - diff --git a/plugins/local/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f b/plugins/local/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f deleted file mode 100644 index 3bf6bd85..00000000 --- a/plugins/local/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f +++ /dev/null @@ -1,490 +0,0 @@ - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs_old, (mo_num, mo_num)] - - implicit none - integer :: a, b, i, j - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia - double precision :: ti, tf - double precision, allocatable :: tmp(:,:) - - PROVIDE mo_l_coef mo_r_coef - call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - - !print *, ' PROVIDING fock_3e_uhf_mo_cs_old ...' - !call wall_time(ti) - - fock_3e_uhf_mo_cs_old = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, elec_beta_num, fock_3e_uhf_mo_cs_old) - - allocate(tmp(mo_num,mo_num)) - tmp = 0.d0 - - !$OMP DO - do a = 1, mo_num - do b = 1, mo_num - - do j = 1, elec_beta_num - do i = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - 2.d0 * I_bij_aji & - - 2.d0 * I_bij_iaj & - - 2.d0 * I_bij_jia ) - - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - fock_3e_uhf_mo_cs_old(b,a) += tmp(b,a) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp) - !$OMP END PARALLEL - - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_mo_cs_old =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a_old, (mo_num, mo_num)] - - BEGIN_DOC - ! - ! ALPHA part of the Fock matrix from three-electron terms - ! - ! WARNING :: non hermitian if bi-ortho MOS used - ! - END_DOC - - implicit none - integer :: a, b, i, j, o - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia - double precision :: ti, tf - double precision, allocatable :: tmp(:,:) - - PROVIDE mo_l_coef mo_r_coef - PROVIDE fock_3e_uhf_mo_cs - - !print *, ' Providing fock_3e_uhf_mo_a_old ...' - !call wall_time(ti) - - o = elec_beta_num + 1 - call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - - PROVIDE fock_3e_uhf_mo_cs_old - fock_3e_uhf_mo_a_old = fock_3e_uhf_mo_cs_old - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a_old) - - allocate(tmp(mo_num,mo_num)) - tmp = 0.d0 - - !$OMP DO - do a = 1, mo_num - do b = 1, mo_num - - ! --- - - do j = o, elec_alpha_num - do i = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - I_bij_iaj & - - 2.d0 * I_bij_jia ) - - enddo - enddo - - ! --- - - do j = 1, elec_beta_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - 2.d0 * I_bij_iaj & - - I_bij_jia ) - - enddo - enddo - - ! --- - - do j = o, elec_alpha_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - I_bij_iaj & - - I_bij_jia ) - - enddo - enddo - - ! --- - - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - fock_3e_uhf_mo_a_old(b,a) += tmp(b,a) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp) - !$OMP END PARALLEL - - !call wall_time(tf) - !print *, ' Wall time for fock_3e_uhf_mo_a_old =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b_old, (mo_num, mo_num)] - - BEGIN_DOC - ! - ! BETA part of the Fock matrix from three-electron terms - ! - ! WARNING :: non hermitian if bi-ortho MOS used - ! - END_DOC - - implicit none - integer :: a, b, i, j, o - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia - double precision :: ti, tf - double precision, allocatable :: tmp(:,:) - - PROVIDE mo_l_coef mo_r_coef - - !print *, ' PROVIDING fock_3e_uhf_mo_b_old ...' - !call wall_time(ti) - - o = elec_beta_num + 1 - call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - - PROVIDE fock_3e_uhf_mo_cs_old - fock_3e_uhf_mo_b_old = fock_3e_uhf_mo_cs_old - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_b_old) - - allocate(tmp(mo_num,mo_num)) - tmp = 0.d0 - - !$OMP DO - do a = 1, mo_num - do b = 1, mo_num - - ! --- - - do j = o, elec_alpha_num - do i = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - - I_bij_aji & - - I_bij_iaj ) - - enddo - enddo - - ! --- - - do j = 1, elec_beta_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - - I_bij_aji & - - I_bij_jia ) - - enddo - enddo - - ! --- - - do j = o, elec_alpha_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( I_bij_aij & - - I_bij_aji ) - - enddo - enddo - - ! --- - - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - fock_3e_uhf_mo_b_old(b,a) += tmp(b,a) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp) - !$OMP END PARALLEL - - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_mo_b_old =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)] - - BEGIN_DOC - ! - ! Equations (B6) and (B7) - ! - ! g <--> gamma - ! d <--> delta - ! e <--> eta - ! k <--> kappa - ! - END_DOC - - implicit none - integer :: g, d, e, k, mu, nu - double precision :: dm_ge_a, dm_ge_b, dm_ge - double precision :: dm_dk_a, dm_dk_b, dm_dk - double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu - double precision :: ti, tf - double precision, allocatable :: f_tmp(:,:) - - !print *, ' PROVIDING fock_3e_uhf_ao_a ...' - !call wall_time(ti) - - fock_3e_uhf_ao_a = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & - !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & - !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a) - - allocate(f_tmp(ao_num,ao_num)) - f_tmp = 0.d0 - - !$OMP DO - do g = 1, ao_num - do e = 1, ao_num - dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) - dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e) - dm_ge = dm_ge_a + dm_ge_b - do d = 1, ao_num - do k = 1, ao_num - dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k) - dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k) - dm_dk = dm_dk_a + dm_dk_b - do mu = 1, ao_num - do nu = 1, ao_num - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue) - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu) - f_tmp(mu,nu) -= 0.5d0 * ( dm_ge * dm_dk * i_mugd_nuek & - + dm_ge_a * dm_dk_a * i_mugd_eknu & - + dm_ge_a * dm_dk_a * i_mugd_knue & - - dm_ge_a * dm_dk * i_mugd_enuk & - - dm_ge * dm_dk_a * i_mugd_kenu & - - dm_ge_a * dm_dk_a * i_mugd_nuke & - - dm_ge_b * dm_dk_b * i_mugd_nuke ) - enddo - enddo - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do mu = 1, ao_num - do nu = 1, ao_num - fock_3e_uhf_ao_a(mu,nu) += f_tmp(mu,nu) - enddo - enddo - !$OMP END CRITICAL - - deallocate(f_tmp) - !$OMP END PARALLEL - - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)] - - BEGIN_DOC - ! - ! Equations (B6) and (B7) - ! - ! g <--> gamma - ! d <--> delta - ! e <--> eta - ! k <--> kappa - ! - END_DOC - - implicit none - integer :: g, d, e, k, mu, nu - double precision :: dm_ge_a, dm_ge_b, dm_ge - double precision :: dm_dk_a, dm_dk_b, dm_dk - double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu - double precision :: ti, tf - double precision, allocatable :: f_tmp(:,:) - - !print *, ' PROVIDING fock_3e_uhf_ao_b ...' - !call wall_time(ti) - - fock_3e_uhf_ao_b = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & - !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & - !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b) - - allocate(f_tmp(ao_num,ao_num)) - f_tmp = 0.d0 - - !$OMP DO - do g = 1, ao_num - do e = 1, ao_num - dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) - dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e) - dm_ge = dm_ge_a + dm_ge_b - do d = 1, ao_num - do k = 1, ao_num - dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k) - dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k) - dm_dk = dm_dk_a + dm_dk_b - do mu = 1, ao_num - do nu = 1, ao_num - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue) - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu) - f_tmp(mu,nu) -= 0.5d0 * ( dm_ge * dm_dk * i_mugd_nuek & - + dm_ge_b * dm_dk_b * i_mugd_eknu & - + dm_ge_b * dm_dk_b * i_mugd_knue & - - dm_ge_b * dm_dk * i_mugd_enuk & - - dm_ge * dm_dk_b * i_mugd_kenu & - - dm_ge_b * dm_dk_b * i_mugd_nuke & - - dm_ge_a * dm_dk_a * i_mugd_nuke ) - enddo - enddo - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do mu = 1, ao_num - do nu = 1, ao_num - fock_3e_uhf_ao_b(mu,nu) += f_tmp(mu,nu) - enddo - enddo - !$OMP END CRITICAL - - deallocate(f_tmp) - !$OMP END PARALLEL - - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti - -END_PROVIDER - -! --- - diff --git a/plugins/local/tc_scf/fock_tc.irp.f b/plugins/local/tc_scf/fock_tc.irp.f index d3ddb8ad..508f3cd7 100644 --- a/plugins/local/tc_scf/fock_tc.irp.f +++ b/plugins/local/tc_scf/fock_tc.irp.f @@ -1,78 +1,15 @@ + ! --- - BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_alpha, (ao_num, ao_num)] -&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_beta , (ao_num, ao_num)] + BEGIN_PROVIDER [ double precision, two_e_tc_integral_alpha, (ao_num, ao_num)] +&BEGIN_PROVIDER [ double precision, two_e_tc_integral_beta , (ao_num, ao_num)] BEGIN_DOC ! - ! two_e_tc_non_hermit_integral_seq_alpha(k,i) = ON THE AO BASIS + ! two_e_tc_integral_alpha(k,i) = ON THE AO BASIS ! - ! where F^tc is the TWO-BODY part of the TC Fock matrix and k,i are AO basis functions - ! - ! works in SEQUENTIAL - END_DOC - - implicit none - integer :: i, j, k, l - double precision :: density, density_a, density_b - double precision :: t0, t1 - - PROVIDE ao_two_e_tc_tot - - !print*, ' providing two_e_tc_non_hermit_integral_seq ...' - !call wall_time(t0) - - two_e_tc_non_hermit_integral_seq_alpha = 0.d0 - two_e_tc_non_hermit_integral_seq_beta = 0.d0 - - do i = 1, ao_num - do k = 1, ao_num - do j = 1, ao_num - do l = 1, ao_num - - density_a = TCSCF_density_matrix_ao_alpha(l,j) - density_b = TCSCF_density_matrix_ao_beta (l,j) - density = density_a + density_b - - !! rho(l,j) * < k l| T | i j> - !two_e_tc_non_hermit_integral_seq_alpha(k,i) += density * ao_two_e_tc_tot(l,j,k,i) - !! rho(l,j) * < k l| T | i j> - !two_e_tc_non_hermit_integral_seq_beta (k,i) += density * ao_two_e_tc_tot(l,j,k,i) - !! rho_a(l,j) * < l k| T | i j> - !two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i) - !! rho_b(l,j) * < l k| T | i j> - !two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i) - - ! rho(l,j) * < k l| T | i j> - two_e_tc_non_hermit_integral_seq_alpha(k,i) += density * ao_two_e_tc_tot(k,i,l,j) - ! rho(l,j) * < k l| T | i j> - two_e_tc_non_hermit_integral_seq_beta (k,i) += density * ao_two_e_tc_tot(k,i,l,j) - ! rho_a(l,j) * < k l| T | j i> - two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i) - ! rho_b(l,j) * < k l| T | j i> - two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i) - - enddo - enddo - enddo - enddo - - !call wall_time(t1) - !print*, ' wall time for two_e_tc_non_hermit_integral_seq after = ', t1 - t0 - -END_PROVIDER - -! --- - - BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_alpha, (ao_num, ao_num)] -&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_beta , (ao_num, ao_num)] - - BEGIN_DOC - ! - ! two_e_tc_non_hermit_integral_alpha(k,i) = ON THE AO BASIS - ! - ! where F^tc is the TWO-BODY part of the TC Fock matrix and k,i are AO basis functions + ! where F^tc_2e is the TWO-BODY part of the TC Fock matrix and k,i are AO basis functions ! END_DOC @@ -86,16 +23,13 @@ END_PROVIDER PROVIDE mo_l_coef mo_r_coef PROVIDE TCSCF_density_matrix_ao_alpha TCSCF_density_matrix_ao_beta - !print*, ' Providing two_e_tc_non_hermit_integral ...' - !call wall_time(t0) - - two_e_tc_non_hermit_integral_alpha = 0.d0 - two_e_tc_non_hermit_integral_beta = 0.d0 + two_e_tc_integral_alpha = 0.d0 + two_e_tc_integral_beta = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (i, j, k, l, density_a, density_b, density, tmp_a, tmp_b, I_coul, I_kjli) & !$OMP SHARED (ao_num, TCSCF_density_matrix_ao_alpha, TCSCF_density_matrix_ao_beta, ao_two_e_tc_tot, & - !$OMP two_e_tc_non_hermit_integral_alpha, two_e_tc_non_hermit_integral_beta) + !$OMP two_e_tc_integral_alpha, two_e_tc_integral_beta) allocate(tmp_a(ao_num,ao_num), tmp_b(ao_num,ao_num)) tmp_a = 0.d0 @@ -124,8 +58,8 @@ END_PROVIDER !$OMP CRITICAL do i = 1, ao_num do j = 1, ao_num - two_e_tc_non_hermit_integral_alpha(j,i) += tmp_a(j,i) - two_e_tc_non_hermit_integral_beta (j,i) += tmp_b(j,i) + two_e_tc_integral_alpha(j,i) += tmp_a(j,i) + two_e_tc_integral_beta (j,i) += tmp_b(j,i) enddo enddo !$OMP END CRITICAL @@ -133,9 +67,6 @@ END_PROVIDER deallocate(tmp_a, tmp_b) !$OMP END PARALLEL - !call wall_time(t1) - !print*, ' Wall time for two_e_tc_non_hermit_integral = ', t1 - t0 - END_PROVIDER ! --- @@ -149,13 +80,7 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_alpha, (ao_num, ao_num)] implicit none double precision :: t0, t1 - !print*, ' Providing Fock_matrix_tc_ao_alpha ...' - !call wall_time(t0) - - Fock_matrix_tc_ao_alpha = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_alpha - - !call wall_time(t1) - !print*, ' Wall time for Fock_matrix_tc_ao_alpha =', t1-t0 + Fock_matrix_tc_ao_alpha = ao_one_e_integrals_tc_tot + two_e_tc_integral_alpha END_PROVIDER @@ -169,7 +94,7 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_beta, (ao_num, ao_num)] implicit none - Fock_matrix_tc_ao_beta = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_beta + Fock_matrix_tc_ao_beta = ao_one_e_integrals_tc_tot + two_e_tc_integral_beta END_PROVIDER @@ -185,9 +110,6 @@ BEGIN_PROVIDER [double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num)] double precision :: t0, t1, tt0, tt1 double precision, allocatable :: tmp(:,:) - !print*, ' Providing Fock_matrix_tc_mo_alpha ...' - !call wall_time(t0) - if(bi_ortho) then PROVIDE mo_l_coef mo_r_coef @@ -196,8 +118,8 @@ BEGIN_PROVIDER [double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num)] , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) if(three_body_h_tc) then - PROVIDE fock_3e_uhf_mo_a - Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a + PROVIDE fock_3e_mo_a + Fock_matrix_tc_mo_alpha += fock_3e_mo_a endif else @@ -207,9 +129,6 @@ BEGIN_PROVIDER [double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num)] endif - !call wall_time(t1) - !print*, ' Wall time for Fock_matrix_tc_mo_alpha =', t1-t0 - END_PROVIDER ! --- @@ -229,8 +148,8 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ] , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) if(three_body_h_tc) then - PROVIDE fock_3e_uhf_mo_b - Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b + PROVIDE fock_3e_mo_b + Fock_matrix_tc_mo_beta += fock_3e_mo_b endif else @@ -286,20 +205,895 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ] implicit none double precision :: t0, t1 - !print*, ' Providing Fock_matrix_tc_ao_tot ...' - !call wall_time(t0) - PROVIDE mo_l_coef mo_r_coef PROVIDE Fock_matrix_tc_mo_tot call mo_to_ao_bi_ortho( Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) & , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) ) - !call wall_time(t1) - !print*, ' Wall time for Fock_matrix_tc_ao_tot =', t1-t0 - END_PROVIDER ! --- + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_mo_a, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! Fock matrix alpha from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + double precision :: ti, tf + + PROVIDE mo_l_coef mo_r_coef + + ! CLOSED-SHELL PART + PROVIDE fock_3e_mo_cs + fock_3e_mo_a = fock_3e_mo_cs + + if(elec_alpha_num .ne. elec_beta_num) then + + ! OPEN-SHELL PART + PROVIDE fock_3e_mo_a_os + + fock_3e_mo_a += fock_3e_mo_a_os + endif + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_mo_b, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! Fock matrix beta from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + double precision :: ti, tf + + PROVIDE mo_l_coef mo_r_coef + + ! CLOSED-SHELL PART + PROVIDE fock_3e_mo_cs + fock_3e_mo_b = fock_3e_mo_cs + + if(elec_alpha_num .ne. elec_beta_num) then + + ! OPEN-SHELL PART + PROVIDE fock_3e_mo_b_os + + fock_3e_mo_b += fock_3e_mo_b_os + endif + +END_PROVIDER + +! --- + + +! --- + + BEGIN_PROVIDER [double precision, fock_3e_mo_a_os, (mo_num, mo_num)] +&BEGIN_PROVIDER [double precision, fock_3e_mo_b_os, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! Open Shell part of the Fock matrix from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + integer :: a, b, i, j, ipoint + double precision :: loc_1, loc_2, loc_3, loc_4 + double precision :: ti, tf + double precision, allocatable :: Okappa(:), Jkappa(:,:), Obarkappa(:), Jbarkappa(:,:) + double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) + double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) + + PROVIDE mo_l_coef mo_r_coef + + ! --- + + allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) + allocate(Jbarkappa(n_points_final_grid,3), Obarkappa(n_points_final_grid)) + Jkappa = 0.d0 + Okappa = 0.d0 + Jbarkappa = 0.d0 + Obarkappa = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa, Obarkappa, Jbarkappa) + + allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) + + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Okappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + !$OMP DO + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jbarkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jbarkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jbarkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Obarkappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2, tmp_omp_d1) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_1(n_points_final_grid,4)) + + do ipoint = 1, n_points_final_grid + + loc_1 = -2.d0 * Okappa (ipoint) + loc_2 = -2.d0 * Obarkappa(ipoint) + loc_3 = Obarkappa(ipoint) + + tmp_1(ipoint,1) = (loc_1 - loc_3) * Jbarkappa(ipoint,1) + loc_2 * Jkappa(ipoint,1) + tmp_1(ipoint,2) = (loc_1 - loc_3) * Jbarkappa(ipoint,2) + loc_2 * Jkappa(ipoint,2) + tmp_1(ipoint,3) = (loc_1 - loc_3) * Jbarkappa(ipoint,3) + loc_2 * Jkappa(ipoint,3) + + tmp_1(ipoint,4) = Obarkappa(ipoint) + enddo + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, loc_1, loc_2, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_1) + + allocate(tmp_omp_d2(n_points_final_grid,3)) + + tmp_omp_d2 = 0.d0 + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + loc_2 = mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,1,j,i) + tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,2,j,i) + tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + tmp_omp_d2 = 0.d0 + !$OMP DO COLLAPSE(2) + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + + tmp_2(:,4,b,a) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,4,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, 1.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_mo_b_os(1,1), 1) + + deallocate(tmp_1, tmp_2) + + ! --- + + allocate(tmp_3(n_points_final_grid,2,mo_num), tmp_4(n_points_final_grid,2,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 + do ipoint = 1, n_points_final_grid + + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + + loc_1 = -2.0d0 * mos_r_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) = loc_1 * ( Jbarkappa(ipoint,1) * (Jkappa(ipoint,1) + 0.25d0 * Jbarkappa(ipoint,1)) & + + Jbarkappa(ipoint,2) * (Jkappa(ipoint,2) + 0.25d0 * Jbarkappa(ipoint,2)) & + + Jbarkappa(ipoint,3) * (Jkappa(ipoint,3) + 0.25d0 * Jbarkappa(ipoint,3)) ) + + tmp_4(ipoint,2,b) = mos_r_in_r_array_transp(ipoint,b) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,2,b) += loc_1 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,1,b) += loc_2 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_2 = mos_r_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 2*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 2*n_points_final_grid & + , tmp_4(1,1,1), 2*n_points_final_grid & + , 1.d0, fock_3e_mo_b_os(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + + ! --- + + fock_3e_mo_a_os = fock_3e_mo_b_os + + allocate(tmp_1(n_points_final_grid,1)) + + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) = Obarkappa(ipoint) + 2.d0 * Okappa(ipoint) + enddo + + allocate(tmp_2(n_points_final_grid,1,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + + tmp_2(:,1,b,a) = 0.d0 + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemv( 'T', n_points_final_grid, mo_num*mo_num, 1.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 1.d0, fock_3e_mo_a_os(1,1), 1) + + deallocate(tmp_1, tmp_2) + + ! --- + + allocate(tmp_3(n_points_final_grid,8,mo_num), tmp_4(n_points_final_grid,8,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 + do ipoint = 1, n_points_final_grid + + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,8,b) = mos_r_in_r_array_transp(ipoint,b) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + + tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_3 = 2.d0 * loc_1 + loc_2 = mos_r_in_r_array_transp(ipoint,i) + loc_4 = 2.d0 * loc_2 + + tmp_3(ipoint,5,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,6,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,7,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + + tmp_3(ipoint,8,b) += loc_3 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,1,b) += loc_4 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + + tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + + tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_3 = mos_r_in_r_array_transp(ipoint,j) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,b,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,b,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,j,i) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + int2_grad1_u12_bimo_t(ipoint,2,j,i) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + int2_grad1_u12_bimo_t(ipoint,3,j,i) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 8*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 8*n_points_final_grid & + , tmp_4(1,1,1), 8*n_points_final_grid & + , 1.d0, fock_3e_mo_a_os(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + deallocate(Jkappa, Okappa) + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_mo_cs, (mo_num, mo_num)] + + implicit none + integer :: a, b, i, j, ipoint + double precision :: ti, tf + double precision :: loc_1, loc_2, loc_3 + double precision, allocatable :: Okappa(:), Jkappa(:,:) + double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:), tmp_22(:,:,:) + double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) + + PROVIDE mo_l_coef mo_r_coef + + ! --- + + allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) + Jkappa = 0.d0 + Okappa = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa) + + allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Okappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2, tmp_omp_d1) + + !$OMP END PARALLEL + + ! --- + + allocate(tmp_1(n_points_final_grid,4)) + + do ipoint = 1, n_points_final_grid + loc_1 = 2.d0 * Okappa(ipoint) + tmp_1(ipoint,1) = loc_1 * Jkappa(ipoint,1) + tmp_1(ipoint,2) = loc_1 * Jkappa(ipoint,2) + tmp_1(ipoint,3) = loc_1 * Jkappa(ipoint,3) + tmp_1(ipoint,4) = Okappa(ipoint) + enddo + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, loc_1, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_1) + + allocate(tmp_omp_d2(n_points_final_grid,3)) + tmp_omp_d2 = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmp_omp_d2(ipoint,1) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + tmp_omp_d2(ipoint,2) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + tmp_omp_d2(ipoint,3) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2) + !$OMP END PARALLEL + + ! --- + + if(tc_save_mem) then + + allocate(tmp_22(n_points_final_grid,4,mo_num)) + do a = 1, mo_num + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, a, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_22) + !$OMP DO + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_22(ipoint,1,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_22(ipoint,2,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_22(ipoint,3,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + tmp_22(:,4,b) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_22(ipoint,4,b) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemv( 'T', 4*n_points_final_grid, mo_num, -2.d0 & + , tmp_22(1,1,1), size(tmp_22, 1) * size(tmp_22, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_mo_cs(1,a), 1) + enddo + deallocate(tmp_22) + + else + + allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + tmp_2(:,4,b,a) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_mo_cs(1,1), 1) + deallocate(tmp_2) + + endif + + deallocate(tmp_1) + + ! --- + + allocate(tmp_3(n_points_final_grid,5,mo_num), tmp_4(n_points_final_grid,5,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, Jkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 + do ipoint = 1, n_points_final_grid + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) = -2.d0 * mos_r_in_r_array_transp(ipoint,b) * ( Jkappa(ipoint,1) * Jkappa(ipoint,1) & + + Jkappa(ipoint,2) * Jkappa(ipoint,2) & + + Jkappa(ipoint,3) * Jkappa(ipoint,3) ) + tmp_4(ipoint,5,b) = mos_r_in_r_array_transp(ipoint,b) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP Jkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + tmp_3(ipoint,5,b) += 2.d0 * loc_1 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + tmp_4(ipoint,1,b) += 2.d0 * loc_2 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,5,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) += ( loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) & + - loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 5*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 5*n_points_final_grid & + , tmp_4(1,1,1), 5*n_points_final_grid & + , 1.d0, fock_3e_mo_cs(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + deallocate(Jkappa, Okappa) + + ! --- + +END_PROVIDER + +! --- + diff --git a/plugins/local/tc_scf/fock_tc_mo_tot.irp.f b/plugins/local/tc_scf/fock_tc_mo_tot.irp.f index eb8973ff..2df2421e 100644 --- a/plugins/local/tc_scf/fock_tc_mo_tot.irp.f +++ b/plugins/local/tc_scf/fock_tc_mo_tot.irp.f @@ -1,4 +1,6 @@ +! --- + BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_tot, (mo_num,mo_num) ] &BEGIN_PROVIDER [ double precision, Fock_matrix_tc_diag_mo_tot, (mo_num)] @@ -23,9 +25,6 @@ integer :: i, j, n double precision :: t0, t1 - !print*, ' Providing Fock_matrix_tc_mo_tot ...' - !call wall_time(t0) - if(elec_alpha_num == elec_beta_num) then PROVIDE Fock_matrix_tc_mo_alpha @@ -158,8 +157,8 @@ Fock_matrix_tc_mo_tot += fock_3_mat endif - !call wall_time(t1) - !print*, ' Wall time for Fock_matrix_tc_mo_tot =', t1-t0 - END_PROVIDER +! --- + + diff --git a/plugins/local/tc_scf/fock_vartc.irp.f b/plugins/local/tc_scf/fock_vartc.irp.f deleted file mode 100644 index 2b4a57e5..00000000 --- a/plugins/local/tc_scf/fock_vartc.irp.f +++ /dev/null @@ -1,287 +0,0 @@ - -! --- - - BEGIN_PROVIDER [ double precision, two_e_vartc_integral_alpha, (ao_num, ao_num)] -&BEGIN_PROVIDER [ double precision, two_e_vartc_integral_beta , (ao_num, ao_num)] - - implicit none - integer :: i, j, k, l - double precision :: density, density_a, density_b, I_coul, I_kjli - double precision :: t0, t1 - double precision, allocatable :: tmp_a(:,:), tmp_b(:,:) - - two_e_vartc_integral_alpha = 0.d0 - two_e_vartc_integral_beta = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, l, density_a, density_b, density, tmp_a, tmp_b, I_coul, I_kjli) & - !$OMP SHARED (ao_num, TCSCF_density_matrix_ao_alpha, TCSCF_density_matrix_ao_beta, ao_two_e_tc_tot, & - !$OMP two_e_vartc_integral_alpha, two_e_vartc_integral_beta) - - allocate(tmp_a(ao_num,ao_num), tmp_b(ao_num,ao_num)) - tmp_a = 0.d0 - tmp_b = 0.d0 - - !$OMP DO - do j = 1, ao_num - do l = 1, ao_num - density_a = TCSCF_density_matrix_ao_alpha(l,j) - density_b = TCSCF_density_matrix_ao_beta (l,j) - density = density_a + density_b - do i = 1, ao_num - do k = 1, ao_num - - I_coul = density * ao_two_e_tc_tot(k,i,l,j) - I_kjli = ao_two_e_tc_tot(k,j,l,i) - - tmp_a(k,i) += I_coul - density_a * I_kjli - tmp_b(k,i) += I_coul - density_b * I_kjli - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do i = 1, ao_num - do j = 1, ao_num - two_e_vartc_integral_alpha(j,i) += tmp_a(j,i) - two_e_vartc_integral_beta (j,i) += tmp_b(j,i) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp_a, tmp_b) - !$OMP END PARALLEL - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_ao_alpha, (ao_num, ao_num)] - - implicit none - - Fock_matrix_vartc_ao_alpha = ao_one_e_integrals_tc_tot + two_e_vartc_integral_alpha - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_ao_beta, (ao_num, ao_num)] - - implicit none - - Fock_matrix_vartc_ao_beta = ao_one_e_integrals_tc_tot + two_e_vartc_integral_beta - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_mo_alpha, (mo_num, mo_num) ] - - implicit none - - call ao_to_mo_bi_ortho( Fock_matrix_vartc_ao_alpha, size(Fock_matrix_vartc_ao_alpha, 1) & - , Fock_matrix_vartc_mo_alpha, size(Fock_matrix_vartc_mo_alpha, 1) ) - if(three_body_h_tc) then - Fock_matrix_vartc_mo_alpha += fock_3e_uhf_mo_a - endif - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_mo_beta, (mo_num,mo_num) ] - - implicit none - - call ao_to_mo_bi_ortho( Fock_matrix_vartc_ao_beta, size(Fock_matrix_vartc_ao_beta, 1) & - , Fock_matrix_vartc_mo_beta, size(Fock_matrix_vartc_mo_beta, 1) ) - if(three_body_h_tc) then - Fock_matrix_vartc_mo_beta += fock_3e_uhf_mo_b - endif - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, grad_vartc] - - implicit none - integer :: i, k - double precision :: grad_left, grad_right - - grad_left = 0.d0 - grad_right = 0.d0 - - do i = 1, elec_beta_num ! doc --> SOMO - do k = elec_beta_num+1, elec_alpha_num - grad_left = max(grad_left , dabs(Fock_matrix_vartc_mo_tot(k,i))) - grad_right = max(grad_right, dabs(Fock_matrix_vartc_mo_tot(i,k))) - enddo - enddo - - do i = 1, elec_beta_num ! doc --> virt - do k = elec_alpha_num+1, mo_num - grad_left = max(grad_left , dabs(Fock_matrix_vartc_mo_tot(k,i))) - grad_right = max(grad_right, dabs(Fock_matrix_vartc_mo_tot(i,k))) - enddo - enddo - - do i = elec_beta_num+1, elec_alpha_num ! SOMO --> virt - do k = elec_alpha_num+1, mo_num - grad_left = max(grad_left , dabs(Fock_matrix_vartc_mo_tot(k,i))) - grad_right = max(grad_right, dabs(Fock_matrix_vartc_mo_tot(i,k))) - enddo - enddo - - grad_vartc = grad_left + grad_right - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_ao_tot, (ao_num, ao_num) ] - - implicit none - - call mo_to_ao_bi_ortho( Fock_matrix_vartc_mo_tot, size(Fock_matrix_vartc_mo_tot, 1) & - , Fock_matrix_vartc_ao_tot, size(Fock_matrix_vartc_ao_tot, 1) ) - -END_PROVIDER - -! --- - - BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_mo_tot, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_diag_mo_tot, (mo_num)] - - implicit none - integer :: i, j, n - - if(elec_alpha_num == elec_beta_num) then - Fock_matrix_vartc_mo_tot = Fock_matrix_vartc_mo_alpha - else - - do j = 1, elec_beta_num - ! F-K - do i = 1, elec_beta_num !CC - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))& - - (Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j)) - enddo - ! F+K/2 - do i = elec_beta_num+1, elec_alpha_num !CA - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))& - + 0.5d0*(Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j)) - enddo - ! F - do i = elec_alpha_num+1, mo_num !CV - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j)) - enddo - enddo - - do j = elec_beta_num+1, elec_alpha_num - ! F+K/2 - do i = 1, elec_beta_num !AC - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))& - + 0.5d0*(Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j)) - enddo - ! F - do i = elec_beta_num+1, elec_alpha_num !AA - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j)) - enddo - ! F-K/2 - do i = elec_alpha_num+1, mo_num !AV - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))& - - 0.5d0*(Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j)) - enddo - enddo - - do j = elec_alpha_num+1, mo_num - ! F - do i = 1, elec_beta_num !VC - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j)) - enddo - ! F-K/2 - do i = elec_beta_num+1, elec_alpha_num !VA - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))& - - 0.5d0*(Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j)) - enddo - ! F+K - do i = elec_alpha_num+1, mo_num !VV - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j)) & - + (Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j)) - enddo - enddo - if(three_body_h_tc)then - ! C-O - do j = 1, elec_beta_num - do i = elec_beta_num+1, elec_alpha_num - Fock_matrix_vartc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j)) - Fock_matrix_vartc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i)) - enddo - enddo - ! C-V - do j = 1, elec_beta_num - do i = elec_alpha_num+1, mo_num - Fock_matrix_vartc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j)) - Fock_matrix_vartc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i)) - enddo - enddo - ! O-V - do j = elec_beta_num+1, elec_alpha_num - do i = elec_alpha_num+1, mo_num - Fock_matrix_vartc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j)) - Fock_matrix_vartc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i)) - enddo - enddo - endif - - endif - - do i = 1, mo_num - Fock_matrix_vartc_diag_mo_tot(i) = Fock_matrix_vartc_mo_tot(i,i) - enddo - - if(frozen_orb_scf)then - integer :: iorb, jorb - do i = 1, n_core_orb - iorb = list_core(i) - do j = 1, n_act_orb - jorb = list_act(j) - Fock_matrix_vartc_mo_tot(iorb,jorb) = 0.d0 - Fock_matrix_vartc_mo_tot(jorb,iorb) = 0.d0 - enddo - enddo - endif - - if(no_oa_or_av_opt)then - do i = 1, n_act_orb - iorb = list_act(i) - do j = 1, n_inact_orb - jorb = list_inact(j) - Fock_matrix_vartc_mo_tot(iorb,jorb) = 0.d0 - Fock_matrix_vartc_mo_tot(jorb,iorb) = 0.d0 - enddo - do j = 1, n_virt_orb - jorb = list_virt(j) - Fock_matrix_vartc_mo_tot(iorb,jorb) = 0.d0 - Fock_matrix_vartc_mo_tot(jorb,iorb) = 0.d0 - enddo - do j = 1, n_core_orb - jorb = list_core(j) - Fock_matrix_vartc_mo_tot(iorb,jorb) = 0.d0 - Fock_matrix_vartc_mo_tot(jorb,iorb) = 0.d0 - enddo - enddo - endif - - !call check_sym(Fock_matrix_vartc_mo_tot, mo_num) - !do i = 1, mo_num - ! write(*,'(100(F15.8, I4))') Fock_matrix_vartc_mo_tot(i,:) - !enddo - -END_PROVIDER - -! --- - diff --git a/plugins/local/tc_scf/rh_tcscf_diis.irp.f b/plugins/local/tc_scf/rh_tcscf_diis.irp.f index 431b6e08..853c4ab5 100644 --- a/plugins/local/tc_scf/rh_tcscf_diis.irp.f +++ b/plugins/local/tc_scf/rh_tcscf_diis.irp.f @@ -234,7 +234,7 @@ subroutine rh_tcscf_diis() call unlock_io if(er_delta .lt. 0.d0) then - call ezfio_set_tc_scf_bitc_energy(etc_tot) + call ezfio_set_tc_scf_tcscf_energy(etc_tot) call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) write(json_unit, json_true_fmt) 'saved' @@ -263,7 +263,7 @@ subroutine rh_tcscf_diis() deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, E_DIIS) - call ezfio_set_tc_scf_bitc_energy(TC_HF_energy) + call ezfio_set_tc_scf_tcscf_energy(TC_HF_energy) call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) diff --git a/plugins/local/tc_scf/rh_tcscf_simple.irp.f b/plugins/local/tc_scf/rh_tcscf_simple.irp.f index 0b79e8ea..2c2cf2c2 100644 --- a/plugins/local/tc_scf/rh_tcscf_simple.irp.f +++ b/plugins/local/tc_scf/rh_tcscf_simple.irp.f @@ -91,7 +91,7 @@ subroutine rh_tcscf_simple() e_delta = dabs(etc_tot - e_save) e_save = etc_tot - call ezfio_set_tc_scf_bitc_energy(etc_tot) + call ezfio_set_tc_scf_tcscf_energy(etc_tot) call wall_time(t1) write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & diff --git a/plugins/local/tc_scf/rh_vartcscf_simple.irp.f b/plugins/local/tc_scf/rh_vartcscf_simple.irp.f deleted file mode 100644 index ecb0709e..00000000 --- a/plugins/local/tc_scf/rh_vartcscf_simple.irp.f +++ /dev/null @@ -1,89 +0,0 @@ -! --- - -subroutine rh_vartcscf_simple() - - implicit none - integer :: i, j, it, dim_DIIS - double precision :: t0, t1 - double precision :: e_save, e_delta, rho_delta - double precision :: etc_tot, etc_1e, etc_2e, etc_3e - double precision :: er_DIIS - - - it = 0 - e_save = 0.d0 - dim_DIIS = 0 - - ! --- - - PROVIDE level_shift_tcscf - PROVIDE mo_r_coef - - write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & - '====', '================', '================', '================', '================', '================' & - , '================', '================', '====', '========' - write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & - ' it ', ' SCF TC Energy ', ' E(1e) ', ' E(2e) ', ' E(3e) ', ' energy diff ' & - , ' DIIS error ', ' level shift ', 'DIIS', ' WT (m)' - write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & - '====', '================', '================', '================', '================', '================' & - , '================', '================', '====', '========' - - - ! first iteration (HF orbitals) - call wall_time(t0) - - etc_tot = VARTC_HF_energy - etc_1e = VARTC_HF_one_e_energy - etc_2e = VARTC_HF_two_e_energy - etc_3e = 0.d0 - if(three_body_h_tc) then - etc_3e = diag_three_elem_hf - endif - er_DIIS = maxval(abs(FQS_SQF_mo)) - e_delta = dabs(etc_tot - e_save) - e_save = etc_tot - - call wall_time(t1) - write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & - it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 - - do while(er_DIIS .gt. dsqrt(thresh_tcscf)) - call wall_time(t0) - - it += 1 - if(it > n_it_tcscf_max) then - print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max - stop - endif - - mo_r_coef = fock_vartc_eigvec_ao - mo_l_coef = mo_r_coef - call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) - call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) - TOUCH mo_l_coef mo_r_coef - - etc_tot = VARTC_HF_energy - etc_1e = VARTC_HF_one_e_energy - etc_2e = VARTC_HF_two_e_energy - etc_3e = 0.d0 - if(three_body_h_tc) then - etc_3e = diag_three_elem_hf - endif - er_DIIS = maxval(abs(FQS_SQF_mo)) - e_delta = dabs(etc_tot - e_save) - e_save = etc_tot - - call ezfio_set_tc_scf_bitc_energy(etc_tot) - - call wall_time(t1) - write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & - it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 - enddo - - print *, ' VAR-TCSCF Simple converged !' - -end - -! --- - diff --git a/plugins/local/tc_scf/tc_scf.irp.f b/plugins/local/tc_scf/tc_scf.irp.f index 768069d6..ee8e8dad 100644 --- a/plugins/local/tc_scf/tc_scf.irp.f +++ b/plugins/local/tc_scf/tc_scf.irp.f @@ -13,7 +13,6 @@ program tc_scf PROVIDE j1e_type PROVIDE j2e_type PROVIDE tcscf_algorithm - PROVIDE var_tc print *, ' TC-SCF with:' print *, ' j1e_type = ', j1e_type @@ -45,46 +44,29 @@ program tc_scf !call create_guess() !call orthonormalize_mos() - - if(var_tc) then - - print *, ' VAR-TC' - - if(tcscf_algorithm == 'DIIS') then - print*, ' NOT implemented yet' - elseif(tcscf_algorithm == 'Simple') then - call rh_vartcscf_simple() - else - print *, ' not implemented yet', tcscf_algorithm - stop - endif - + if(tcscf_algorithm == 'DIIS') then + call rh_tcscf_diis() + elseif(tcscf_algorithm == 'Simple') then + call rh_tcscf_simple() else - - if(tcscf_algorithm == 'DIIS') then - call rh_tcscf_diis() - elseif(tcscf_algorithm == 'Simple') then - call rh_tcscf_simple() - else - print *, ' not implemented yet', tcscf_algorithm - stop - endif - - PROVIDE Fock_matrix_tc_diag_mo_tot - print*, ' Eigenvalues:' - do i = 1, mo_num - print*, i, Fock_matrix_tc_diag_mo_tot(i) - enddo - - ! TODO - ! rotate angles in separate code only if necessary - if(minimize_lr_angles)then - call minimize_tc_orb_angles() - endif - call print_energy_and_mos(good_angles) - + print *, ' not implemented yet', tcscf_algorithm + stop endif + PROVIDE Fock_matrix_tc_diag_mo_tot + print*, ' Eigenvalues:' + do i = 1, mo_num + print*, i, Fock_matrix_tc_diag_mo_tot(i) + enddo + + ! TODO + ! rotate angles in separate code only if necessary + if(minimize_lr_angles)then + call minimize_tc_orb_angles() + endif + call print_energy_and_mos(good_angles) + + write(json_unit,json_array_close_fmtx) call json_close diff --git a/plugins/local/tc_scf/tc_scf_energy.irp.f b/plugins/local/tc_scf/tc_scf_energy.irp.f index 833b48aa..0266c605 100644 --- a/plugins/local/tc_scf/tc_scf_energy.irp.f +++ b/plugins/local/tc_scf/tc_scf_energy.irp.f @@ -11,11 +11,8 @@ integer :: i, j double precision :: t0, t1 - !print*, ' Providing TC energy ...' - !call wall_time(t0) - PROVIDE mo_l_coef mo_r_coef - PROVIDE two_e_tc_non_hermit_integral_alpha two_e_tc_non_hermit_integral_beta + PROVIDE two_e_tc_integral_alpha two_e_tc_integral_beta TC_HF_energy = nuclear_repulsion TC_HF_one_e_energy = 0.d0 @@ -23,8 +20,8 @@ do j = 1, ao_num do i = 1, ao_num - TC_HF_two_e_energy += 0.5d0 * ( two_e_tc_non_hermit_integral_alpha(i,j) * TCSCF_density_matrix_ao_alpha(i,j) & - + two_e_tc_non_hermit_integral_beta (i,j) * TCSCF_density_matrix_ao_beta (i,j) ) + TC_HF_two_e_energy += 0.5d0 * ( two_e_tc_integral_alpha(i,j) * TCSCF_density_matrix_ao_alpha(i,j) & + + two_e_tc_integral_beta (i,j) * TCSCF_density_matrix_ao_beta (i,j) ) TC_HF_one_e_energy += ao_one_e_integrals_tc_tot(i,j) & * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) ) enddo @@ -33,38 +30,6 @@ TC_HF_energy += TC_HF_one_e_energy + TC_HF_two_e_energy TC_HF_energy += diag_three_elem_hf - !call wall_time(t1) - !print*, ' Wall time for TC energy=', t1-t0 - -END_PROVIDER - -! --- - - BEGIN_PROVIDER [ double precision, VARTC_HF_energy] -&BEGIN_PROVIDER [ double precision, VARTC_HF_one_e_energy] -&BEGIN_PROVIDER [ double precision, VARTC_HF_two_e_energy] - - implicit none - integer :: i, j - - PROVIDE mo_r_coef - - VARTC_HF_energy = nuclear_repulsion - VARTC_HF_one_e_energy = 0.d0 - VARTC_HF_two_e_energy = 0.d0 - - do j = 1, ao_num - do i = 1, ao_num - VARTC_HF_two_e_energy += 0.5d0 * ( two_e_vartc_integral_alpha(i,j) * TCSCF_density_matrix_ao_alpha(i,j) & - + two_e_vartc_integral_beta (i,j) * TCSCF_density_matrix_ao_beta (i,j) ) - VARTC_HF_one_e_energy += ao_one_e_integrals_tc_tot(i,j) & - * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) ) - enddo - enddo - - VARTC_HF_energy += VARTC_HF_one_e_energy + VARTC_HF_two_e_energy - VARTC_HF_energy += diag_three_elem_hf - END_PROVIDER ! --- diff --git a/plugins/local/tc_scf/test_int.irp.f b/plugins/local/tc_scf/test_int.irp.f deleted file mode 100644 index e135fcd8..00000000 --- a/plugins/local/tc_scf/test_int.irp.f +++ /dev/null @@ -1,970 +0,0 @@ -program test_ints - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - print *, ' starting test_ints ...' - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - my_extra_grid_becke = .True. - my_n_pt_r_extra_grid = 30 - my_n_pt_a_extra_grid = 50 ! small extra_grid for quick debug - touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid - -!! OK -! call routine_int2_u_grad1u_env2 -! OK -! call routine_v_ij_erf_rk_cst_mu_env -! OK -! call routine_x_v_ij_erf_rk_cst_mu_env -! OK -! call routine_int2_u2_env2 -! OK -! call routine_int2_u_grad1u_x_env2 -! OK -! call routine_int2_grad1u2_grad2u2_env2 -! call routine_int2_u_grad1u_env2 -! call test_int2_grad1_u12_ao_test -! call routine_v_ij_u_cst_mu_env_test -! call test_grid_points_ao - !call test_int_gauss - - !call test_fock_3e_uhf_ao() - !call test_fock_3e_uhf_mo() - - !call test_two_e_tc_non_hermit_integral() - -!!PROVIDE TC_HF_energy VARTC_HF_energy -!!print *, ' TC_HF_energy = ', TC_HF_energy -!!print *, ' VARTC_HF_energy = ', VARTC_HF_energy - - call test_fock_3e_uhf_mo_cs() - call test_fock_3e_uhf_mo_a() - call test_fock_3e_uhf_mo_b() - -end - -! --- - -subroutine routine_test_env - implicit none - integer :: i,icount,j - icount = 0 - do i = 1, List_env1s_square_size - if(dabs(List_env1s_square_coef(i)).gt.1.d-10)then - print*,'' - print*,List_env1s_square_expo(i),List_env1s_square_coef(i) - print*,List_env1s_square_cent(1:3,i) - print*,'' - icount += 1 - endif - - enddo - print*,'List_env1s_square_coef,icount = ',List_env1s_square_size,icount - do i = 1, ao_num - do j = 1, ao_num - do icount = 1, List_comb_thr_b3_size(j,i) - print*,'',j,i - print*,List_comb_thr_b3_expo(icount,j,i),List_comb_thr_b3_coef(icount,j,i) - print*,List_comb_thr_b3_cent(1:3,icount,j,i) - print*,'' - enddo -! enddo - enddo - enddo - print*,'max_List_comb_thr_b3_size = ',max_List_comb_thr_b3_size,List_env1s_square_size - -end - -subroutine routine_int2_u_grad1u_env2 - implicit none - integer :: i,j,ipoint,k,l - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - array(j,i,l,k) += int2_u_grad1u_env2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += int2_u_grad1u_env2(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - print*,'******' - print*,'******' - print*,'routine_int2_u_grad1u_env2' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - - - -end - -subroutine routine_v_ij_erf_rk_cst_mu_env - implicit none - integer :: i,j,ipoint,k,l - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - array(j,i,l,k) += v_ij_erf_rk_cst_mu_env_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += v_ij_erf_rk_cst_mu_env(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - print*,'******' - print*,'******' - print*,'routine_v_ij_erf_rk_cst_mu_env' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - - - -end - - -subroutine routine_x_v_ij_erf_rk_cst_mu_env - implicit none - integer :: i,j,ipoint,k,l,m - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - do m = 1, 3 - array(j,i,l,k) += x_v_ij_erf_rk_cst_mu_env_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += x_v_ij_erf_rk_cst_mu_env (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - enddo - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - - print*,'******' - print*,'******' - print*,'routine_x_v_ij_erf_rk_cst_mu_env' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - - - -end - - - -subroutine routine_v_ij_u_cst_mu_env_test - implicit none - integer :: i,j,ipoint,k,l - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - array(j,i,l,k) += v_ij_u_cst_mu_env_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += v_ij_u_cst_mu_env_fit (j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - print*,'******' - print*,'******' - print*,'routine_v_ij_u_cst_mu_env_test' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - -end - -subroutine routine_int2_grad1u2_grad2u2_env2 - implicit none - integer :: i,j,ipoint,k,l - integer :: ii , jj - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - double precision, allocatable :: ints(:,:,:) - allocate(ints(ao_num, ao_num, n_points_final_grid)) -! do ipoint = 1, n_points_final_grid -! do i = 1, ao_num -! do j = 1, ao_num -! read(33,*)ints(j,i,ipoint) -! enddo -! enddo -! enddo - - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - array(j,i,l,k) += int2_grad1u2_grad2u2_env2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight -! !array(j,i,l,k) += int2_grad1u2_grad2u2_env2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight -! array_ref(j,i,l,k) += int2_grad1u2_grad2u2_env2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight -! !array(j,i,l,k) += ints(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight -! array_ref(j,i,l,k) += int2_grad1u2_grad2u2_env2(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += ints(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight -! if(dabs(int2_grad1u2_grad2u2_env2_test(j,i,ipoint)).gt.1.d-6)then -! if(dabs(int2_grad1u2_grad2u2_env2_test(j,i,ipoint) - int2_grad1u2_grad2u2_env2_test(j,i,ipoint)).gt.1.d-6)then -! print*,j,i,ipoint -! print*,int2_grad1u2_grad2u2_env2_test(j,i,ipoint) , int2_grad1u2_grad2u2_env2_test(j,i,ipoint), dabs(int2_grad1u2_grad2u2_env2_test(j,i,ipoint) - int2_grad1u2_grad2u2_env2_test(j,i,ipoint)) -! print*,int2_grad1u2_grad2u2_env2_test(i,j,ipoint) , int2_grad1u2_grad2u2_env2_test(i,j,ipoint), dabs(int2_grad1u2_grad2u2_env2_test(i,j,ipoint) - int2_grad1u2_grad2u2_env2_test(i,j,ipoint)) -! stop -! endif -! endif - enddo - enddo - enddo - enddo - enddo - double precision :: e_ref, e_new - accu_relat = 0.d0 - accu_abs = 0.d0 - e_ref = 0.d0 - e_new = 0.d0 - do ii = 1, elec_alpha_num - do jj = ii, elec_alpha_num - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - e_ref += mo_coef(j,ii) * mo_coef(i,ii) * array_ref(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj) - e_new += mo_coef(j,ii) * mo_coef(i,ii) * array(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj) - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib -! if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then -! accu_relat += contrib/dabs(array_ref(j,i,l,k)) -! endif - enddo - enddo - enddo - enddo - - enddo - enddo - print*,'e_ref = ',e_ref - print*,'e_new = ',e_new -! print*,'accu_abs = ',accu_abs/dble(ao_num)**4 -! print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - - - -end - -subroutine routine_int2_u2_env2 - implicit none - integer :: i,j,ipoint,k,l - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - array(j,i,l,k) += int2_u2_env2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += int2_u2_env2(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - print*,'******' - print*,'******' - print*,'routine_int2_u2_env2' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - - - -end - - -subroutine routine_int2_u_grad1u_x_env2 - implicit none - integer :: i,j,ipoint,k,l,m - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - do m = 1, 3 - array(j,i,l,k) += int2_u_grad1u_x_env2_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += int2_u_grad1u_x_env2 (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - enddo - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - print*,'******' - print*,'******' - print*,'routine_int2_u_grad1u_x_env2' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - - - -end - -subroutine routine_v_ij_u_cst_mu_env - implicit none - integer :: i,j,ipoint,k,l - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - array(j,i,l,k) += v_ij_u_cst_mu_env_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += v_ij_u_cst_mu_env_fit (j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - print*,'******' - print*,'******' - print*,'routine_v_ij_u_cst_mu_env' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - -end - -! --- - -subroutine test_fock_3e_uhf_ao() - - implicit none - integer :: i, j - double precision :: diff_tot, diff_ij, thr_ih, norm - double precision, allocatable :: fock_3e_uhf_ao_a_mo(:,:), fock_3e_uhf_ao_b_mo(:,:) - - thr_ih = 1d-7 - - PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth - PROVIDE fock_3e_uhf_ao_a fock_3e_uhf_ao_b - - ! --- - - allocate(fock_3e_uhf_ao_a_mo(mo_num,mo_num)) - call ao_to_mo_bi_ortho( fock_3e_uhf_ao_a , size(fock_3e_uhf_ao_a , 1) & - , fock_3e_uhf_ao_a_mo, size(fock_3e_uhf_ao_a_mo, 1) ) - - norm = 0.d0 - diff_tot = 0.d0 - do i = 1, mo_num - do j = 1, mo_num - - diff_ij = dabs(fock_3e_uhf_ao_a_mo(j,i) - fock_a_tot_3e_bi_orth(j,i)) - if(diff_ij .gt. thr_ih) then - print *, ' difference on ', j, i - print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i) - print *, ' UHF : ', fock_3e_uhf_ao_a_mo (j,i) - !stop - endif - - norm += dabs(fock_a_tot_3e_bi_orth(j,i)) - diff_tot += diff_ij - enddo - enddo - print *, ' diff on F_a = ', diff_tot / norm - print *, ' ' - - deallocate(fock_3e_uhf_ao_a_mo) - - ! --- - - allocate(fock_3e_uhf_ao_b_mo(mo_num,mo_num)) - call ao_to_mo_bi_ortho( fock_3e_uhf_ao_b , size(fock_3e_uhf_ao_b , 1) & - , fock_3e_uhf_ao_b_mo, size(fock_3e_uhf_ao_b_mo, 1) ) - - norm = 0.d0 - diff_tot = 0.d0 - do i = 1, mo_num - do j = 1, mo_num - - diff_ij = dabs(fock_3e_uhf_ao_b_mo(j,i) - fock_b_tot_3e_bi_orth(j,i)) - if(diff_ij .gt. thr_ih) then - print *, ' difference on ', j, i - print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i) - print *, ' UHF : ', fock_3e_uhf_ao_b_mo (j,i) - !stop - endif - - norm += dabs(fock_b_tot_3e_bi_orth(j,i)) - diff_tot += diff_ij - enddo - enddo - print *, ' diff on F_b = ', diff_tot/norm - print *, ' ' - - deallocate(fock_3e_uhf_ao_b_mo) - - ! --- - -end subroutine test_fock_3e_uhf_ao() - -! --- - -subroutine test_fock_3e_uhf_mo() - - implicit none - integer :: i, j - double precision :: diff_tot, diff_ij, thr_ih, norm - - thr_ih = 1d-12 - - PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth - PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_b - - ! --- - - norm = 0.d0 - diff_tot = 0.d0 - do i = 1, mo_num - do j = 1, mo_num - - diff_ij = dabs(fock_3e_uhf_mo_a(j,i) - fock_a_tot_3e_bi_orth(j,i)) - if(diff_ij .gt. thr_ih) then - print *, ' difference on ', j, i - print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i) - print *, ' UHF : ', fock_3e_uhf_mo_a (j,i) - !stop - endif - - norm += dabs(fock_a_tot_3e_bi_orth(j,i)) - diff_tot += diff_ij - enddo - enddo - print *, ' diff on F_a = ', diff_tot / norm - print *, ' norm_a = ', norm - print *, ' ' - - ! --- - - norm = 0.d0 - diff_tot = 0.d0 - do i = 1, mo_num - do j = 1, mo_num - - diff_ij = dabs(fock_3e_uhf_mo_b(j,i) - fock_b_tot_3e_bi_orth(j,i)) - if(diff_ij .gt. thr_ih) then - print *, ' difference on ', j, i - print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i) - print *, ' UHF : ', fock_3e_uhf_mo_b (j,i) - !stop - endif - - norm += dabs(fock_b_tot_3e_bi_orth(j,i)) - diff_tot += diff_ij - enddo - enddo - print *, ' diff on F_b = ', diff_tot/norm - print *, ' norm_b = ', norm - print *, ' ' - - ! --- - -end - -! --- - -subroutine test_grid_points_ao - implicit none - integer :: i,j,ipoint,icount,icount_good, icount_bad,icount_full - double precision :: thr - thr = 1.d-10 -! print*,'max_n_pts_grid_ao_prod = ',max_n_pts_grid_ao_prod -! print*,'n_pts_grid_ao_prod' - do i = 1, ao_num - do j = i, ao_num - icount = 0 - icount_good = 0 - icount_bad = 0 - icount_full = 0 - do ipoint = 1, n_points_final_grid -! if(dabs(int2_u_grad1u_x_env2_test(j,i,ipoint,1)) & -! + dabs(int2_u_grad1u_x_env2_test(j,i,ipoint,2)) & -! + dabs(int2_u_grad1u_x_env2_test(j,i,ipoint,3)) ) -! if(dabs(int2_u2_env2_test(j,i,ipoint)).gt.thr)then -! icount += 1 -! endif - if(dabs(v_ij_u_cst_mu_env_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then - icount_full += 1 - endif - if(dabs(v_ij_u_cst_mu_env_test(j,i,ipoint)).gt.thr)then - icount += 1 - if(dabs(v_ij_u_cst_mu_env_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then - icount_good += 1 - else - print*,j,i,ipoint - print*,dabs(v_ij_u_cst_mu_env_test(j,i,ipoint)), dabs(v_ij_u_cst_mu_env_ng_1_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_env_ng_1_test(j,i,ipoint))/dabs(v_ij_u_cst_mu_env_test(j,i,ipoint)) - icount_bad += 1 - endif - endif -! if(dabs(v_ij_u_cst_mu_env_ng_1_test(j,i,ipoint)).gt.thr)then -! endif - enddo - print*,'' - print*,j,i - print*,icount,icount_full, icount_bad!,n_pts_grid_ao_prod(j,i) - print*,dble(icount)/dble(n_points_final_grid),dble(icount_full)/dble(n_points_final_grid) -! dble(n_pts_grid_ao_prod(j,i))/dble(n_points_final_grid) -! if(icount.gt.n_pts_grid_ao_prod(j,i))then -! print*,'pb !!' -! endif - enddo - enddo -end - -subroutine test_int_gauss - implicit none - integer :: i,j - print*,'center' - do i = 1, ao_num - do j = i, ao_num - print*,j,i - print*,ao_prod_sigma(j,i),ao_overlap_abs_grid(j,i) - print*,ao_prod_center(1:3,j,i) - enddo - enddo - print*,'' - double precision :: weight, r(3),integral_1,pi,center(3),f_r,alpha,distance,integral_2 - center = 0.d0 - pi = dacos(-1.d0) - integral_1 = 0.d0 - integral_2 = 0.d0 - alpha = 0.75d0 - do i = 1, n_points_final_grid - ! you get x, y and z of the ith grid point - r(1) = final_grid_points(1,i) - r(2) = final_grid_points(2,i) - r(3) = final_grid_points(3,i) - weight = final_weight_at_r_vector(i) - distance = dsqrt( (r(1) - center(1))**2 + (r(2) - center(2))**2 + (r(3) - center(3))**2 ) - f_r = dexp(-alpha * distance*distance) - ! you add the contribution of the grid point to the integral - integral_1 += f_r * weight - integral_2 += f_r * distance * weight - enddo - print*,'integral_1 =',integral_1 - print*,'(pi/alpha)**1.5 =',(pi / alpha)**1.5 - print*,'integral_2 =',integral_2 - print*,'(pi/alpha)**1.5 =',2.d0*pi / (alpha)**2 - - -end - -! --- - -subroutine test_two_e_tc_non_hermit_integral() - - implicit none - integer :: i, j - double precision :: diff_tot, diff, thr_ih, norm - - thr_ih = 1d-10 - - PROVIDE two_e_tc_non_hermit_integral_beta two_e_tc_non_hermit_integral_alpha - PROVIDE two_e_tc_non_hermit_integral_seq_beta two_e_tc_non_hermit_integral_seq_alpha - - ! --- - - norm = 0.d0 - diff_tot = 0.d0 - do i = 1, ao_num - do j = 1, ao_num - - diff = dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i) - two_e_tc_non_hermit_integral_alpha(j,i)) - if(diff .gt. thr_ih) then - print *, ' difference on ', j, i - print *, ' seq : ', two_e_tc_non_hermit_integral_seq_alpha(j,i) - print *, ' // : ', two_e_tc_non_hermit_integral_alpha (j,i) - !stop - endif - - norm += dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i)) - diff_tot += diff - enddo - enddo - - print *, ' diff tot a = ', diff_tot / norm - print *, ' norm a = ', norm - print *, ' ' - - ! --- - - norm = 0.d0 - diff_tot = 0.d0 - do i = 1, ao_num - do j = 1, ao_num - - diff = dabs(two_e_tc_non_hermit_integral_seq_beta(j,i) - two_e_tc_non_hermit_integral_beta(j,i)) - if(diff .gt. thr_ih) then - print *, ' difference on ', j, i - print *, ' seq : ', two_e_tc_non_hermit_integral_seq_beta(j,i) - print *, ' // : ', two_e_tc_non_hermit_integral_beta (j,i) - !stop - endif - - norm += dabs(two_e_tc_non_hermit_integral_seq_beta(j,i)) - diff_tot += diff - enddo - enddo - - print *, ' diff tot b = ', diff_tot / norm - print *, ' norm b = ', norm - print *, ' ' - - ! --- - - return - -end - -! --- - -subroutine test_int2_grad1_u12_ao_test - implicit none - integer :: i,j,ipoint,m,k,l - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do m = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - array(j,i,l,k) += int2_grad1_u12_ao_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += int2_grad1_u12_ao(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - enddo - - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - print*,'******' - print*,'******' - print*,'test_int2_grad1_u12_ao_test' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 -end - -! --- - -subroutine test_fock_3e_uhf_mo_cs() - - implicit none - integer :: i, j - double precision :: I_old, I_new - double precision :: diff_tot, diff, thr_ih, norm - -! double precision :: t0, t1 -! print*, ' Providing fock_a_tot_3e_bi_orth ...' -! call wall_time(t0) -! PROVIDE fock_a_tot_3e_bi_orth -! call wall_time(t1) -! print*, ' Wall time for fock_a_tot_3e_bi_orth =', t1 - t0 - - PROVIDE fock_3e_uhf_mo_cs fock_3e_uhf_mo_cs_old - - thr_ih = 1d-8 - norm = 0.d0 - diff_tot = 0.d0 - - do i = 1, mo_num - do j = 1, mo_num - - I_old = fock_3e_uhf_mo_cs_old(j,i) - I_new = fock_3e_uhf_mo_cs (j,i) - - diff = dabs(I_old - I_new) - if(diff .gt. thr_ih) then - print *, ' problem in fock_3e_uhf_mo_cs on ', j, i - print *, ' old value = ', I_old - print *, ' new value = ', I_new - !stop - endif - - norm += dabs(I_old) - diff_tot += diff - enddo - enddo - - print *, ' diff tot (%) = ', 100.d0 * diff_tot / norm - - return -end - -! --- - -subroutine test_fock_3e_uhf_mo_a() - - implicit none - integer :: i, j - double precision :: I_old, I_new - double precision :: diff_tot, diff, thr_ih, norm - - PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_a_old - - thr_ih = 1d-8 - norm = 0.d0 - diff_tot = 0.d0 - - do i = 1, mo_num - do j = 1, mo_num - - I_old = fock_3e_uhf_mo_a_old(j,i) - I_new = fock_3e_uhf_mo_a (j,i) - - diff = dabs(I_old - I_new) - if(diff .gt. thr_ih) then - print *, ' problem in fock_3e_uhf_mo_a on ', j, i - print *, ' old value = ', I_old - print *, ' new value = ', I_new - !stop - endif - - norm += dabs(I_old) - diff_tot += diff - enddo - enddo - - print *, ' diff tot (%) = ', 100.d0 * diff_tot / norm - - return -end - -! --- - -subroutine test_fock_3e_uhf_mo_b() - - implicit none - integer :: i, j - double precision :: I_old, I_new - double precision :: diff_tot, diff, thr_ih, norm - - PROVIDE fock_3e_uhf_mo_b fock_3e_uhf_mo_b_old - - thr_ih = 1d-8 - norm = 0.d0 - diff_tot = 0.d0 - - do i = 1, mo_num - do j = 1, mo_num - - I_old = fock_3e_uhf_mo_b_old(j,i) - I_new = fock_3e_uhf_mo_b (j,i) - - diff = dabs(I_old - I_new) - if(diff .gt. thr_ih) then - print *, ' problem in fock_3e_uhf_mo_b on ', j, i - print *, ' old value = ', I_old - print *, ' new value = ', I_new - !stop - endif - - norm += dabs(I_old) - diff_tot += diff - enddo - enddo - - print *, ' diff tot (%) = ', 100.d0 * diff_tot / norm - - return -end - -! --- - diff --git a/src/becke_numerical_grid/extra_grid_vector.irp.f b/src/becke_numerical_grid/extra_grid_vector.irp.f index 16a52dc6..e054e22c 100644 --- a/src/becke_numerical_grid/extra_grid_vector.irp.f +++ b/src/becke_numerical_grid/extra_grid_vector.irp.f @@ -70,17 +70,6 @@ END_PROVIDER index_final_points_extra(2,i_count) = i index_final_points_extra(3,i_count) = j index_final_points_extra_reverse(k,i,j) = i_count - - if(final_weight_at_r_vector_extra(i_count) .lt. 0.d0) then - print *, ' !!! WARNING !!!' - print *, ' negative weight !!!!' - print *, i_count, final_weight_at_r_vector_extra(i_count) - if(dabs(final_weight_at_r_vector_extra(i_count)) .lt. 1d-10) then - final_weight_at_r_vector_extra(i_count) = 0.d0 - else - stop - endif - endif enddo enddo enddo diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index c35918c3..9da8a099 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -67,17 +67,6 @@ END_PROVIDER index_final_points(2,i_count) = i index_final_points(3,i_count) = j index_final_points_reverse(k,i,j) = i_count - - if(final_weight_at_r_vector(i_count) .lt. 0.d0) then - print *, ' !!! WARNING !!!' - print *, ' negative weight !!!!' - print *, i_count, final_weight_at_r_vector(i_count) - if(dabs(final_weight_at_r_vector(i_count)) .lt. 1d-10) then - final_weight_at_r_vector(i_count) = 0.d0 - else - stop - endif - endif enddo enddo enddo diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f index 97cbde67..c67bbf03 100644 --- a/src/utils/util.irp.f +++ b/src/utils/util.irp.f @@ -576,7 +576,7 @@ logical function is_same_spin(sigma_1, sigma_2) is_same_spin = .false. endif -end function is_same_spin +end ! --- @@ -596,7 +596,7 @@ function Kronecker_delta(i, j) result(delta) delta = 0.d0 endif -end function Kronecker_delta +end ! --- @@ -634,7 +634,81 @@ subroutine diagonalize_sym_matrix(N, A, e) print*,'Problem in diagonalize_sym_matrix (dsyev)!!' endif -end subroutine diagonalize_sym_matrix +end + +! --- + + +subroutine give_degen(A, n, shift, list_degen, n_degen_list) + + BEGIN_DOC + ! returns n_degen_list :: the number of degenerated SET of elements (i.e. with |A(i)-A(i+1)| below shift) + ! + ! for each of these sets, list_degen(1,i) = first degenerate element of the set i, + ! + ! list_degen(2,i) = last degenerate element of the set i. + END_DOC + + implicit none + + double precision, intent(in) :: A(n) + double precision, intent(in) :: shift + integer, intent(in) :: n + integer, intent(out) :: list_degen(2,n), n_degen_list + + integer :: i, j, n_degen, k + logical :: keep_on + double precision, allocatable :: Aw(:) + + list_degen = -1 + allocate(Aw(n)) + Aw = A + i=1 + k = 0 + do while(i.lt.n) + if(dabs(Aw(i)-Aw(i+1)).lt.shift)then + k+=1 + j=1 + list_degen(1,k) = i + keep_on = .True. + do while(keep_on) + if(i+j.gt.n)then + keep_on = .False. + exit + endif + if(dabs(Aw(i)-Aw(i+j)).lt.shift)then + j+=1 + else + keep_on=.False. + exit + endif + enddo + n_degen = j + list_degen(2,k) = list_degen(1,k)-1 + n_degen + j=0 + keep_on = .True. + do while(keep_on) + if(i+j+1.gt.n)then + keep_on = .False. + exit + endif + if(dabs(Aw(i+j)-Aw(i+j+1)).lt.shift)then + Aw(i+j) += (j-n_degen/2) * shift + j+=1 + else + keep_on = .False. + exit + endif + enddo + Aw(i+n_degen-1) += (n_degen-1-n_degen/2) * shift + i+=n_degen + else + i+=1 + endif + enddo + n_degen_list = k + +end ! --- From da8eac81e01e9ee558351195aba1f964ed5fbc0b Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 1 May 2024 21:52:00 +0200 Subject: [PATCH 021/131] TC-SCF CLEANED --- plugins/local/bi_ort_ints/no_dressing.irp.f | 7 +- plugins/local/tc_scf/EZFIO.cfg | 36 + plugins/local/tc_scf/fock_hermit.irp.f | 107 --- plugins/local/tc_scf/fock_tc.irp.f | 40 +- plugins/local/tc_scf/fock_tc_mo_tot.irp.f | 19 +- plugins/local/tc_scf/fock_three_hermit.irp.f | 771 ------------------ .../local/tc_scf/integrals_in_r_stuff.irp.f | 391 --------- plugins/local/tc_scf/jast_schmos_90.irp.f | 318 -------- plugins/local/tc_scf/plot_j_schMos.irp.f | 69 -- plugins/local/tc_scf/print_fit_param.irp.f | 59 -- plugins/local/tc_scf/print_tcscf_energy.irp.f | 55 -- plugins/local/tc_scf/rh_tcscf_simple.irp.f | 129 --- .../local/tc_scf/rotate_tcscf_orbitals.irp.f | 369 --------- .../local/tc_scf/tc_petermann_factor.irp.f | 91 --- plugins/local/tc_scf/tc_scf.irp.f | 25 +- plugins/local/tc_scf/tc_scf_dm.irp.f | 24 +- plugins/local/tc_scf/tc_scf_energy.irp.f | 423 ++++++++++ plugins/local/tc_scf/tcscf_energy_naive.irp.f | 80 -- .../tc_scf/three_e_energy_bi_ortho.irp.f | 189 ----- .../local/tc_scf/write_ao_2e_tc_integ.irp.f | 6 +- 20 files changed, 502 insertions(+), 2706 deletions(-) delete mode 100644 plugins/local/tc_scf/fock_hermit.irp.f delete mode 100644 plugins/local/tc_scf/fock_three_hermit.irp.f delete mode 100644 plugins/local/tc_scf/integrals_in_r_stuff.irp.f delete mode 100644 plugins/local/tc_scf/jast_schmos_90.irp.f delete mode 100644 plugins/local/tc_scf/plot_j_schMos.irp.f delete mode 100644 plugins/local/tc_scf/print_fit_param.irp.f delete mode 100644 plugins/local/tc_scf/print_tcscf_energy.irp.f delete mode 100644 plugins/local/tc_scf/rh_tcscf_simple.irp.f delete mode 100644 plugins/local/tc_scf/rotate_tcscf_orbitals.irp.f delete mode 100644 plugins/local/tc_scf/tc_petermann_factor.irp.f delete mode 100644 plugins/local/tc_scf/tcscf_energy_naive.irp.f delete mode 100644 plugins/local/tc_scf/three_e_energy_bi_ortho.irp.f diff --git a/plugins/local/bi_ort_ints/no_dressing.irp.f b/plugins/local/bi_ort_ints/no_dressing.irp.f index bd225274..721ac0f8 100644 --- a/plugins/local/bi_ort_ints/no_dressing.irp.f +++ b/plugins/local/bi_ort_ints/no_dressing.irp.f @@ -322,6 +322,12 @@ END_PROVIDER BEGIN_PROVIDER [double precision, noL_0e] + BEGIN_DOC + ! + ! < Phi_left | L | Phi_right > + ! + END_DOC + implicit none integer :: i, j, k, ipoint double precision :: t0, t1 @@ -330,7 +336,6 @@ BEGIN_PROVIDER [double precision, noL_0e] double precision, allocatable :: tmp_M(:,:), tmp_S(:), tmp_O(:), tmp_J(:,:) double precision, allocatable :: tmp_M_priv(:,:), tmp_S_priv(:), tmp_O_priv(:), tmp_J_priv(:,:) - call wall_time(t0) print*, " Providing noL_0e ..." diff --git a/plugins/local/tc_scf/EZFIO.cfg b/plugins/local/tc_scf/EZFIO.cfg index 510c777c..6820a8b0 100644 --- a/plugins/local/tc_scf/EZFIO.cfg +++ b/plugins/local/tc_scf/EZFIO.cfg @@ -9,3 +9,39 @@ doc: If |true|, tc-scf has converged interface: ezfio,provider,ocaml default: False +[max_dim_diis_tcscf] +type: integer +doc: Maximum size of the DIIS extrapolation procedure +interface: ezfio,provider,ocaml +default: 15 + +[level_shift_tcscf] +type: Positive_float +doc: Energy shift on the virtual MOs to improve TCSCF convergence +interface: ezfio,provider,ocaml +default: 0. + +[im_thresh_tcscf] +type: Threshold +doc: Thresholds on the Imag part of energy +interface: ezfio,provider,ocaml +default: 1.e-7 + +[thresh_tcscf] +type: Threshold +doc: Threshold on the convergence of the Hartree Fock energy. +interface: ezfio,provider,ocaml +default: 1.e-8 + +[n_it_tcscf_max] +type: Strictly_positive_int +doc: Maximum number of SCF iterations +interface: ezfio,provider,ocaml +default: 50 + +[tc_Brillouin_Right] +type: logical +doc: If |true|, impose only right-Brillouin condition +interface: ezfio,provider,ocaml +default: False + diff --git a/plugins/local/tc_scf/fock_hermit.irp.f b/plugins/local/tc_scf/fock_hermit.irp.f deleted file mode 100644 index 5a51b324..00000000 --- a/plugins/local/tc_scf/fock_hermit.irp.f +++ /dev/null @@ -1,107 +0,0 @@ - -! --- - -BEGIN_PROVIDER [ double precision, good_hermit_tc_fock_mat, (mo_num, mo_num)] - - BEGIN_DOC -! good_hermit_tc_fock_mat = Hermitian Upper triangular Fock matrix -! -! The converged eigenvectors of such matrix yield to orthonormal vectors satisfying the left Brillouin theorem - END_DOC - implicit none - integer :: i, j - - good_hermit_tc_fock_mat = Fock_matrix_tc_mo_tot - do j = 1, mo_num - do i = 1, j-1 - good_hermit_tc_fock_mat(i,j) = Fock_matrix_tc_mo_tot(j,i) - enddo - enddo - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, hermit_average_tc_fock_mat, (mo_num, mo_num)] - - BEGIN_DOC -! hermit_average_tc_fock_mat = (F + F^\dagger)/2 - END_DOC - implicit none - integer :: i, j - - hermit_average_tc_fock_mat = Fock_matrix_tc_mo_tot - do j = 1, mo_num - do i = 1, mo_num - hermit_average_tc_fock_mat(i,j) = 0.5d0 * (Fock_matrix_tc_mo_tot(j,i) + Fock_matrix_tc_mo_tot(i,j)) - enddo - enddo - -END_PROVIDER - - -! --- -BEGIN_PROVIDER [ double precision, grad_hermit] - implicit none - BEGIN_DOC - ! square of gradient of the energy - END_DOC - if(symetric_fock_tc)then - grad_hermit = grad_hermit_average_tc_fock_mat - else - grad_hermit = grad_good_hermit_tc_fock_mat - endif - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, grad_good_hermit_tc_fock_mat] - implicit none - BEGIN_DOC - ! grad_good_hermit_tc_fock_mat = norm of gradients of the upper triangular TC fock - END_DOC - integer :: i, j - grad_good_hermit_tc_fock_mat = 0.d0 - do i = 1, elec_alpha_num - do j = elec_alpha_num+1, mo_num - grad_good_hermit_tc_fock_mat += dabs(good_hermit_tc_fock_mat(i,j)) - enddo - enddo -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, grad_hermit_average_tc_fock_mat] - implicit none - BEGIN_DOC - ! grad_hermit_average_tc_fock_mat = norm of gradients of the upper triangular TC fock - END_DOC - integer :: i, j - grad_hermit_average_tc_fock_mat = 0.d0 - do i = 1, elec_alpha_num - do j = elec_alpha_num+1, mo_num - grad_hermit_average_tc_fock_mat += dabs(hermit_average_tc_fock_mat(i,j)) - enddo - enddo -END_PROVIDER - - -! --- - -subroutine save_good_hermit_tc_eigvectors() - - implicit none - integer :: sign - character*(64) :: label - logical :: output - - sign = 1 - label = "Canonical" - output = .False. - - if(symetric_fock_tc)then - call mo_as_eigvectors_of_mo_matrix(hermit_average_tc_fock_mat, mo_num, mo_num, label, sign, output) - else - call mo_as_eigvectors_of_mo_matrix(good_hermit_tc_fock_mat, mo_num, mo_num, label, sign, output) - endif -end subroutine save_good_hermit_tc_eigvectors - -! --- - diff --git a/plugins/local/tc_scf/fock_tc.irp.f b/plugins/local/tc_scf/fock_tc.irp.f index 508f3cd7..16bb5c87 100644 --- a/plugins/local/tc_scf/fock_tc.irp.f +++ b/plugins/local/tc_scf/fock_tc.irp.f @@ -110,23 +110,14 @@ BEGIN_PROVIDER [double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num)] double precision :: t0, t1, tt0, tt1 double precision, allocatable :: tmp(:,:) - if(bi_ortho) then + PROVIDE mo_l_coef mo_r_coef - PROVIDE mo_l_coef mo_r_coef - - call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & - , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) - - if(three_body_h_tc) then - PROVIDE fock_3e_mo_a - Fock_matrix_tc_mo_alpha += fock_3e_mo_a - endif - - else - - call ao_to_mo( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & - , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) + call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & + , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) + if(three_body_h_tc) then + PROVIDE fock_3e_mo_a + Fock_matrix_tc_mo_alpha += fock_3e_mo_a endif END_PROVIDER @@ -142,21 +133,12 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ] implicit none double precision, allocatable :: tmp(:,:) - if(bi_ortho) then - - call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & - , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) - - if(three_body_h_tc) then - PROVIDE fock_3e_mo_b - Fock_matrix_tc_mo_beta += fock_3e_mo_b - endif - - else - - call ao_to_mo( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & - , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) + call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & + , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) + if(three_body_h_tc) then + PROVIDE fock_3e_mo_b + Fock_matrix_tc_mo_beta += fock_3e_mo_b endif END_PROVIDER diff --git a/plugins/local/tc_scf/fock_tc_mo_tot.irp.f b/plugins/local/tc_scf/fock_tc_mo_tot.irp.f index 2df2421e..fd490af6 100644 --- a/plugins/local/tc_scf/fock_tc_mo_tot.irp.f +++ b/plugins/local/tc_scf/fock_tc_mo_tot.irp.f @@ -132,7 +132,7 @@ enddo endif - if(no_oa_or_av_opt)then + if(no_oa_or_av_opt) then do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_inact_orb @@ -153,8 +153,21 @@ enddo endif - if(.not.bi_ortho .and. three_body_h_tc)then - Fock_matrix_tc_mo_tot += fock_3_mat + if(tc_Brillouin_Right) then + + double precision, allocatable :: tmp(:,:) + allocate(tmp(mo_num,mo_num)) + + tmp = Fock_matrix_tc_mo_tot + do j = 1, mo_num + do i = 1, j-1 + tmp(i,j) = Fock_matrix_tc_mo_tot(j,i) + enddo + enddo + + Fock_matrix_tc_mo_tot = tmp + deallocate(tmp) + endif END_PROVIDER diff --git a/plugins/local/tc_scf/fock_three_hermit.irp.f b/plugins/local/tc_scf/fock_three_hermit.irp.f deleted file mode 100644 index 00d47fae..00000000 --- a/plugins/local/tc_scf/fock_three_hermit.irp.f +++ /dev/null @@ -1,771 +0,0 @@ - -! --- - -BEGIN_PROVIDER [ double precision, fock_3_mat, (mo_num, mo_num)] - - implicit none - integer :: i,j - double precision :: contrib - - fock_3_mat = 0.d0 - if(.not.bi_ortho .and. three_body_h_tc) then - - call give_fock_ia_three_e_total(1, 1, contrib) - !! !$OMP PARALLEL & - !! !$OMP DEFAULT (NONE) & - !! !$OMP PRIVATE (i,j,m,integral) & - !! !$OMP SHARED (mo_num,three_body_3_index) - !! !$OMP DO SCHEDULE (guided) COLLAPSE(3) - do i = 1, mo_num - do j = 1, mo_num - call give_fock_ia_three_e_total(j,i,contrib) - fock_3_mat(j,i) = -contrib - enddo - enddo - !else if(bi_ortho.and.three_body_h_tc) then - !! !$OMP END DO - !! !$OMP END PARALLEL - !! do i = 1, mo_num - !! do j = 1, i-1 - !! mat_three(j,i) = mat_three(i,j) - !! enddo - !! enddo - endif - -END_PROVIDER - - -subroutine give_fock_ia_three_e_total(i,a,contrib) - implicit none - BEGIN_DOC -! contrib is the TOTAL (same spins / opposite spins) contribution from the three body term to the Fock operator -! - END_DOC - integer, intent(in) :: i,a - double precision, intent(out) :: contrib - double precision :: int_1, int_2, int_3 - double precision :: mos_i, mos_a, w_ia - double precision :: mos_ia, weight - - integer :: mm, ipoint,k,l - - int_1 = 0.d0 - int_2 = 0.d0 - int_3 = 0.d0 - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - mos_i = mos_in_r_array_transp(ipoint,i) - mos_a = mos_in_r_array_transp(ipoint,a) - mos_ia = mos_a * mos_i - w_ia = x_W_ij_erf_rk(ipoint,mm,i,a) - - int_1 += weight * fock_3_w_kk_sum(ipoint,mm) * (4.d0 * fock_3_rho_beta(ipoint) * w_ia & - + 2.0d0 * mos_ia * fock_3_w_kk_sum(ipoint,mm) & - - 2.0d0 * fock_3_w_ki_mos_k(ipoint,mm,i) * mos_a & - - 2.0d0 * fock_3_w_ki_mos_k(ipoint,mm,a) * mos_i ) - int_2 += weight * (-1.d0) * ( 2.0d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * w_ia & - + 2.0d0 * fock_3_rho_beta(ipoint) * fock_3_w_ki_wk_a(ipoint,mm,i,a) & - + 1.0d0 * mos_ia * fock_3_trace_w_tilde(ipoint,mm) ) - - int_3 += weight * 1.d0 * (fock_3_w_kl_wla_phi_k(ipoint,mm,i) * mos_a + fock_3_w_kl_wla_phi_k(ipoint,mm,a) * mos_i & - +fock_3_w_ki_mos_k(ipoint,mm,i) * fock_3_w_ki_mos_k(ipoint,mm,a) ) - enddo - enddo - contrib = int_1 + int_2 + int_3 - -end - -! --- - -BEGIN_PROVIDER [double precision, diag_three_elem_hf] - - implicit none - integer :: i, j, k, ipoint, mm - double precision :: contrib, weight, four_third, one_third, two_third, exchange_int_231 - double precision :: integral_aaa, hthree, integral_aab, integral_abb, integral_bbb - double precision, allocatable :: tmp(:) - double precision, allocatable :: tmp_L(:,:), tmp_R(:,:) - double precision, allocatable :: tmp_M(:,:), tmp_S(:), tmp_O(:), tmp_J(:,:) - double precision, allocatable :: tmp_M_priv(:,:), tmp_S_priv(:), tmp_O_priv(:), tmp_J_priv(:,:) - - PROVIDE mo_l_coef mo_r_coef - - !print *, ' providing diag_three_elem_hf' - - if(.not. three_body_h_tc) then - - if(noL_standard) then - PROVIDE noL_0e - diag_three_elem_hf = noL_0e - else - diag_three_elem_hf = 0.d0 - endif - - else - - if(.not. bi_ortho) then - - ! --- - - one_third = 1.d0/3.d0 - two_third = 2.d0/3.d0 - four_third = 4.d0/3.d0 - diag_three_elem_hf = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do k = 1, elec_beta_num - call give_integrals_3_body(k, j, i, j, i, k, exchange_int_231) - diag_three_elem_hf += two_third * exchange_int_231 - enddo - enddo - enddo - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - contrib = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm) & - - 2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm) & - - 1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm) - contrib *= four_third - contrib += -two_third * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm) & - -four_third * fock_3_w_kk_sum(ipoint,mm) * fock_3_w_kl_mo_k_mo_l(ipoint,mm) - diag_three_elem_hf += weight * contrib - enddo - enddo - - diag_three_elem_hf = - diag_three_elem_hf - - ! --- - - else - - ! ------------ - ! SLOW VERSION - ! ------------ - - !call give_aaa_contrib(integral_aaa) - !call give_aab_contrib(integral_aab) - !call give_abb_contrib(integral_abb) - !call give_bbb_contrib(integral_bbb) - !diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb - - ! ------------ - ! ------------ - - PROVIDE int2_grad1_u12_bimo_t - PROVIDE mos_l_in_r_array_transp - PROVIDE mos_r_in_r_array_transp - - if(elec_alpha_num .eq. elec_beta_num) then - - allocate(tmp(elec_beta_num)) - allocate(tmp_L(n_points_final_grid,3), tmp_R(n_points_final_grid,3)) - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & - !$OMP SHARED(elec_beta_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) - - !$OMP DO - do j = 1, elec_beta_num - - tmp_L = 0.d0 - tmp_R = 0.d0 - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - tmp(j) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - enddo ! j - !$OMP END DO - !$OMP END PARALLEL - - diag_three_elem_hf = -2.d0 * sum(tmp) - - deallocate(tmp) - deallocate(tmp_L, tmp_R) - - ! --- - - allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) - tmp_O = 0.d0 - tmp_J = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & - !$OMP SHARED(elec_beta_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) - - allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) - tmp_O_priv = 0.d0 - tmp_J_priv = 0.d0 - - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_O = tmp_O + tmp_O_priv - tmp_J = tmp_J + tmp_J_priv - !$OMP END CRITICAL - - deallocate(tmp_O_priv, tmp_J_priv) - !$OMP END PARALLEL - - allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) - tmp_M = 0.d0 - tmp_S = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & - !$OMP SHARED(elec_beta_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) - - allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) - tmp_M_priv = 0.d0 - tmp_S_priv = 0.d0 - - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_M = tmp_M + tmp_M_priv - tmp_S = tmp_S + tmp_S_priv - !$OMP END CRITICAL - - deallocate(tmp_M_priv, tmp_S_priv) - !$OMP END PARALLEL - - allocate(tmp(n_points_final_grid)) - - do ipoint = 1, n_points_final_grid - - tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint) - - tmp(ipoint) = final_weight_at_r_vector(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) & - - 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) & - + tmp_J(ipoint,2) * tmp_M(ipoint,2) & - + tmp_J(ipoint,3) * tmp_M(ipoint,3))) - enddo - - diag_three_elem_hf = diag_three_elem_hf -2.d0 * (sum(tmp)) - - deallocate(tmp) - - else - - allocate(tmp(elec_alpha_num)) - allocate(tmp_L(n_points_final_grid,3), tmp_R(n_points_final_grid,3)) - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) - - !$OMP DO - do j = 1, elec_beta_num - - tmp_L = 0.d0 - tmp_R = 0.d0 - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - tmp_L(ipoint,1) = tmp_L(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - tmp(j) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - enddo ! j - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) - - !$OMP DO - do j = elec_beta_num+1, elec_alpha_num - - tmp_L = 0.d0 - tmp_R = 0.d0 - do i = 1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - tmp(j) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + 0.5d0 * final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - enddo ! j - !$OMP END DO - !$OMP END PARALLEL - - diag_three_elem_hf = -2.d0 * sum(tmp) - - deallocate(tmp) - deallocate(tmp_L, tmp_R) - - ! --- - - allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) - tmp_O = 0.d0 - tmp_J = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) - - allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) - tmp_O_priv = 0.d0 - tmp_J_priv = 0.d0 - - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP DO - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + 0.5d0 * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_O = tmp_O + tmp_O_priv - tmp_J = tmp_J + tmp_J_priv - !$OMP END CRITICAL - - deallocate(tmp_O_priv, tmp_J_priv) - !$OMP END PARALLEL - - ! --- - - allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) - tmp_M = 0.d0 - tmp_S = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) - - allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) - tmp_M_priv = 0.d0 - tmp_S_priv = 0.d0 - - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP DO COLLAPSE(2) - do i = elec_beta_num+1, elec_alpha_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP DO COLLAPSE(2) - do i = elec_beta_num+1, elec_alpha_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_M = tmp_M + tmp_M_priv - tmp_S = tmp_S + tmp_S_priv - !$OMP END CRITICAL - - deallocate(tmp_M_priv, tmp_S_priv) - !$OMP END PARALLEL - - allocate(tmp(n_points_final_grid)) - - do ipoint = 1, n_points_final_grid - - tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint) - - tmp(ipoint) = final_weight_at_r_vector(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) & - - 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) & - + tmp_J(ipoint,2) * tmp_M(ipoint,2) & - + tmp_J(ipoint,3) * tmp_M(ipoint,3))) - enddo - - diag_three_elem_hf = diag_three_elem_hf - 2.d0 * (sum(tmp)) - - deallocate(tmp) - - endif - - - endif - - endif - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, fock_3_mat_a_op_sh, (mo_num, mo_num)] - implicit none - integer :: h,p,i,j - double precision :: direct_int, exch_int, exchange_int_231, exchange_int_312 - double precision :: exchange_int_23, exchange_int_12, exchange_int_13 - - fock_3_mat_a_op_sh = 0.d0 - do h = 1, mo_num - do p = 1, mo_num - !F_a^{ab}(h,p) - do i = 1, elec_beta_num ! beta - do j = elec_beta_num+1, elec_alpha_num ! alpha - call give_integrals_3_body(h,j,i,p,j,i,direct_int) ! - call give_integrals_3_body(h,j,i,j,p,i,exch_int) - fock_3_mat_a_op_sh(h,p) -= direct_int - exch_int - enddo - enddo - !F_a^{aa}(h,p) - do i = 1, elec_beta_num ! alpha - do j = elec_beta_num+1, elec_alpha_num ! alpha - call give_integrals_3_body(h,j,i,p,j,i,direct_int) - call give_integrals_3_body(h,j,i,i,p,j,exchange_int_231) - call give_integrals_3_body(h,j,i,j,i,p,exchange_int_312) - call give_integrals_3_body(h,j,i,p,i,j,exchange_int_23) - call give_integrals_3_body(h,j,i,i,j,p,exchange_int_12) - call give_integrals_3_body(h,j,i,j,p,i,exchange_int_13) - fock_3_mat_a_op_sh(h,p) -= ( direct_int + exchange_int_231 + exchange_int_312 & - - exchange_int_23 & ! i <-> j - - exchange_int_12 & ! p <-> j - - exchange_int_13 )! p <-> i - enddo - enddo - enddo - enddo -! symmetrized -! do p = 1, elec_beta_num -! do h = elec_alpha_num +1, mo_num -! fock_3_mat_a_op_sh(h,p) = fock_3_mat_a_op_sh(p,h) -! enddo -! enddo - -! do h = elec_beta_num+1, elec_alpha_num -! do p = elec_alpha_num +1, mo_num -! !F_a^{bb}(h,p) -! do i = 1, elec_beta_num -! do j = i+1, elec_beta_num -! call give_integrals_3_body(h,j,i,p,j,i,direct_int) -! call give_integrals_3_body(h,j,i,p,i,j,exch_int) -! fock_3_mat_a_op_sh(h,p) -= direct_int - exch_int -! enddo -! enddo -! enddo -! enddo - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_mat_b_op_sh, (mo_num, mo_num)] - implicit none - integer :: h,p,i,j - double precision :: direct_int, exch_int - fock_3_mat_b_op_sh = 0.d0 - do h = 1, elec_beta_num - do p = elec_alpha_num +1, mo_num - !F_b^{aa}(h,p) - do i = 1, elec_beta_num - do j = elec_beta_num+1, elec_alpha_num - call give_integrals_3_body(h,j,i,p,j,i,direct_int) - call give_integrals_3_body(h,j,i,p,i,j,exch_int) - fock_3_mat_b_op_sh(h,p) += direct_int - exch_int - enddo - enddo - - !F_b^{ab}(h,p) - do i = elec_beta_num+1, elec_beta_num - do j = 1, elec_beta_num - call give_integrals_3_body(h,j,i,p,j,i,direct_int) - call give_integrals_3_body(h,j,i,j,p,i,exch_int) - fock_3_mat_b_op_sh(h,p) += direct_int - exch_int - enddo - enddo - - enddo - enddo - -END_PROVIDER - - -BEGIN_PROVIDER [ double precision, fock_3_w_kk_sum, (n_points_final_grid,3)] - implicit none - integer :: mm, ipoint,k - double precision :: w_kk - fock_3_w_kk_sum = 0.d0 - do k = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - w_kk = x_W_ij_erf_rk(ipoint,mm,k,k) - fock_3_w_kk_sum(ipoint,mm) += w_kk - enddo - enddo - enddo -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_w_ki_mos_k, (n_points_final_grid,3,mo_num)] - implicit none - integer :: mm, ipoint,k,i - double precision :: w_ki, mo_k - fock_3_w_ki_mos_k = 0.d0 - do i = 1, mo_num - do k = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - w_ki = x_W_ij_erf_rk(ipoint,mm,k,i) - mo_k = mos_in_r_array(k,ipoint) - fock_3_w_ki_mos_k(ipoint,mm,i) += w_ki * mo_k - enddo - enddo - enddo - enddo - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_w_kl_w_kl, (n_points_final_grid,3)] - implicit none - integer :: k,j,ipoint,mm - double precision :: w_kj - fock_3_w_kl_w_kl = 0.d0 - do j = 1, elec_beta_num - do k = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - w_kj = x_W_ij_erf_rk(ipoint,mm,k,j) - fock_3_w_kl_w_kl(ipoint,mm) += w_kj * w_kj - enddo - enddo - enddo - enddo - - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_rho_beta, (n_points_final_grid)] - implicit none - integer :: ipoint,k - fock_3_rho_beta = 0.d0 - do ipoint = 1, n_points_final_grid - do k = 1, elec_beta_num - fock_3_rho_beta(ipoint) += mos_in_r_array(k,ipoint) * mos_in_r_array(k,ipoint) - enddo - enddo -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_w_kl_mo_k_mo_l, (n_points_final_grid,3)] - implicit none - integer :: ipoint,k,l,mm - double precision :: mos_k, mos_l, w_kl - fock_3_w_kl_mo_k_mo_l = 0.d0 - do k = 1, elec_beta_num - do l = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - mos_k = mos_in_r_array_transp(ipoint,k) - mos_l = mos_in_r_array_transp(ipoint,l) - w_kl = x_W_ij_erf_rk(ipoint,mm,l,k) - fock_3_w_kl_mo_k_mo_l(ipoint,mm) += w_kl * mos_k * mos_l - enddo - enddo - enddo - enddo - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_w_ki_wk_a, (n_points_final_grid,3,mo_num, mo_num)] - implicit none - integer :: ipoint,i,a,k,mm - double precision :: w_ki,w_ka - fock_3_w_ki_wk_a = 0.d0 - do i = 1, mo_num - do a = 1, mo_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - do k = 1, elec_beta_num - w_ki = x_W_ij_erf_rk(ipoint,mm,k,i) - w_ka = x_W_ij_erf_rk(ipoint,mm,k,a) - fock_3_w_ki_wk_a(ipoint,mm,a,i) += w_ki * w_ka - enddo - enddo - enddo - enddo - enddo -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_trace_w_tilde, (n_points_final_grid,3)] - implicit none - integer :: ipoint,k,mm - fock_3_trace_w_tilde = 0.d0 - do k = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - fock_3_trace_w_tilde(ipoint,mm) += fock_3_w_ki_wk_a(ipoint,mm,k,k) - enddo - enddo - enddo - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_w_kl_wla_phi_k, (n_points_final_grid,3,mo_num)] - implicit none - integer :: ipoint,a,k,mm,l - double precision :: w_kl,w_la, mo_k - fock_3_w_kl_wla_phi_k = 0.d0 - do a = 1, mo_num - do k = 1, elec_beta_num - do l = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - w_kl = x_W_ij_erf_rk(ipoint,mm,l,k) - w_la = x_W_ij_erf_rk(ipoint,mm,l,a) - mo_k = mos_in_r_array_transp(ipoint,k) - fock_3_w_kl_wla_phi_k(ipoint,mm,a) += w_kl * w_la * mo_k - enddo - enddo - enddo - enddo - enddo -END_PROVIDER - - - - - diff --git a/plugins/local/tc_scf/integrals_in_r_stuff.irp.f b/plugins/local/tc_scf/integrals_in_r_stuff.irp.f deleted file mode 100644 index 3ce85a97..00000000 --- a/plugins/local/tc_scf/integrals_in_r_stuff.irp.f +++ /dev/null @@ -1,391 +0,0 @@ - -! --- - -BEGIN_PROVIDER [ double precision, tc_scf_dm_in_r, (n_points_final_grid) ] - - implicit none - integer :: i, j - - tc_scf_dm_in_r = 0.d0 - do i = 1, n_points_final_grid - do j = 1, elec_beta_num - tc_scf_dm_in_r(i) += mos_r_in_r_array(j,i) * mos_l_in_r_array(j,i) - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, w_sum_in_r, (n_points_final_grid, 3)] - - implicit none - integer :: ipoint, j, xi - - w_sum_in_r = 0.d0 - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - !w_sum_in_r(ipoint,xi) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,j) - w_sum_in_r(ipoint,xi) += x_W_ki_bi_ortho_erf_rk_diag(ipoint,xi,j) - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, ww_sum_in_r, (n_points_final_grid, 3)] - - implicit none - integer :: ipoint, j, xi - double precision :: tmp - - ww_sum_in_r = 0.d0 - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - tmp = x_W_ki_bi_ortho_erf_rk_diag(ipoint,xi,j) - ww_sum_in_r(ipoint,xi) += tmp * tmp - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_r_in_r, (n_points_final_grid, 3, mo_num)] - - implicit none - integer :: i, j, xi, ipoint - - ! TODO: call lapack - - W1_r_in_r = 0.d0 - do i = 1, mo_num - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_r_in_r(ipoint,xi,i) += mos_r_in_r_array_transp(ipoint,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,i) - enddo - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_l_in_r, (n_points_final_grid, 3, mo_num)] - - implicit none - integer :: i, j, xi, ipoint - - ! TODO: call lapack - - W1_l_in_r = 0.d0 - do i = 1, mo_num - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_l_in_r(ipoint,xi,i) += mos_l_in_r_array_transp(ipoint,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,i,j) - enddo - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_in_r, (n_points_final_grid, 3)] - - implicit none - integer :: j, xi, ipoint - - ! TODO: call lapack - - W1_in_r = 0.d0 - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_in_r(ipoint,xi) += W1_l_in_r(ipoint,xi,j) * mos_r_in_r_array_transp(ipoint,j) - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_diag_in_r, (n_points_final_grid, 3)] - - implicit none - integer :: j, xi, ipoint - - ! TODO: call lapack - - W1_diag_in_r = 0.d0 - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_diag_in_r(ipoint,xi) += mos_r_in_r_array_transp(ipoint,j) * mos_l_in_r_array_transp(ipoint,j) * x_W_ki_bi_ortho_erf_rk_diag(ipoint,xi,j) - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, v_sum_in_r, (n_points_final_grid, 3)] - - implicit none - integer :: i, j, xi, ipoint - - ! TODO: call lapack - v_sum_in_r = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - v_sum_in_r(ipoint,xi) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,i,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,i) - enddo - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_W1_r_in_r, (n_points_final_grid, 3, mo_num)] - - implicit none - integer :: i, m, xi, ipoint - - ! TODO: call lapack - - W1_W1_r_in_r = 0.d0 - do i = 1, mo_num - do m = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_W1_r_in_r(ipoint,xi,i) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,m,i) * W1_r_in_r(ipoint,xi,m) - enddo - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_W1_l_in_r, (n_points_final_grid, 3, mo_num)] - - implicit none - integer :: i, j, xi, ipoint - - ! TODO: call lapack - - W1_W1_l_in_r = 0.d0 - do i = 1, mo_num - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_W1_l_in_r(ipoint,xi,i) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,i,j) * W1_l_in_r(ipoint,xi,j) - enddo - enddo - enddo - enddo - -END_PROVIDER - -! --- - -subroutine direct_term_imj_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j,m = 1, elec_beta_num) < a m j | i m j > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi - double precision :: weight, tmp - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - !integral += ( mos_l_in_r_array(a,ipoint) * mos_r_in_r_array(i,ipoint) * w_sum_in_r(ipoint,xi) * w_sum_in_r(ipoint,xi) & - ! + 2.d0 * tc_scf_dm_in_r(ipoint) * w_sum_in_r(ipoint,xi) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) ) * weight - - tmp = w_sum_in_r(ipoint,xi) - - integral += ( mos_l_in_r_array_transp(ipoint,a) * mos_r_in_r_array_transp(ipoint,i) * tmp * tmp & - + 2.d0 * tc_scf_dm_in_r(ipoint) * tmp * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) & - ) * weight - enddo - enddo - -end - -! --- - -subroutine exch_term_jmi_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j,m = 1, elec_beta_num) < a m j | j m i > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi, j - double precision :: weight, tmp - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - - tmp = 0.d0 - do j = 1, elec_beta_num - tmp = tmp + x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,i) - enddo - - integral += ( mos_l_in_r_array_transp(ipoint,a) * W1_r_in_r(ipoint,xi,i) * w_sum_in_r(ipoint,xi) & - + tc_scf_dm_in_r(ipoint) * tmp & - + mos_r_in_r_array_transp(ipoint,i) * W1_l_in_r(ipoint,xi,a) * w_sum_in_r(ipoint,xi) & - ) * weight - - enddo - enddo - -end - -! --- - -subroutine exch_term_ijm_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j,m = 1, elec_beta_num) < a m j | i j m > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi - double precision :: weight - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - - integral += ( mos_l_in_r_array_transp(ipoint,a) * mos_r_in_r_array_transp(ipoint,i) * v_sum_in_r(ipoint,xi) & - + 2.d0 * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) * W1_in_r(ipoint,xi) & - ) * weight - - enddo - enddo - -end - -! --- - -subroutine direct_term_ijj_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j = 1, elec_beta_num) < a j j | i j j > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi - double precision :: weight - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - - integral += ( mos_l_in_r_array_transp(ipoint,a) * mos_r_in_r_array_transp(ipoint,i) * ww_sum_in_r(ipoint,xi) & - + 2.d0 * W1_diag_in_r(ipoint, xi) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) & - ) * weight - enddo - enddo - -end - -! --- - -subroutine cyclic_term_jim_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j,m = 1, elec_beta_num) < a m j | j i m > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi - double precision :: weight - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - - integral += ( mos_l_in_r_array_transp(ipoint,a) * W1_W1_r_in_r(ipoint,xi,i) & - + W1_W1_l_in_r(ipoint,xi,a) * mos_r_in_r_array_transp(ipoint,i) & - + W1_l_in_r(ipoint,xi,a) * W1_r_in_r(ipoint,xi,i) & - ) * weight - - enddo - enddo - -end - -! --- - -subroutine cyclic_term_mji_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j,m = 1, elec_beta_num) < a m j | m j i > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi - double precision :: weight - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - - integral += ( mos_l_in_r_array_transp(ipoint,a) * W1_W1_r_in_r(ipoint,xi,i) & - + W1_l_in_r(ipoint,xi,a) * W1_r_in_r(ipoint,xi,i) & - + W1_W1_l_in_r(ipoint,xi,a) * mos_r_in_r_array_transp(ipoint,i) & - ) * weight - - enddo - enddo - -end - -! --- - diff --git a/plugins/local/tc_scf/jast_schmos_90.irp.f b/plugins/local/tc_scf/jast_schmos_90.irp.f deleted file mode 100644 index 5c5e625f..00000000 --- a/plugins/local/tc_scf/jast_schmos_90.irp.f +++ /dev/null @@ -1,318 +0,0 @@ - BEGIN_PROVIDER [integer , m_max_sm_7] -&BEGIN_PROVIDER [integer , n_max_sm_7] -&BEGIN_PROVIDER [integer , o_max_sm_7] - implicit none - BEGIN_DOC -! maximum value of the "m", "n" and "o" integer in the Jastrow function as in Eq. (4) -! of Schmidt,Moskowitz, JCP, 93, 4172 (1990) for the SM_7 version of Table IV - END_DOC - m_max_sm_7 = 4 - n_max_sm_7 = 0 - o_max_sm_7 = 4 -END_PROVIDER - - BEGIN_PROVIDER [integer , m_max_sm_9] -&BEGIN_PROVIDER [integer , n_max_sm_9] -&BEGIN_PROVIDER [integer , o_max_sm_9] - implicit none - BEGIN_DOC -! maximum value of the "m", "n" and "o" integer in the Jastrow function as in Eq. (4) -! of Schmidt,Moskowitz, JCP, 93, 4172 (1990) for the SM_9 version of Table IV - END_DOC - m_max_sm_9 = 4 - n_max_sm_9 = 2 - o_max_sm_9 = 4 -END_PROVIDER - - - BEGIN_PROVIDER [integer , m_max_sm_17] -&BEGIN_PROVIDER [integer , n_max_sm_17] -&BEGIN_PROVIDER [integer , o_max_sm_17] - implicit none - BEGIN_DOC -! maximum value of the "m", "n" and "o" integer in the Jastrow function as in Eq. (4) -! of Schmidt,Moskowitz, JCP, 93, 4172 (1990) for the SM_17 version of Table IV - END_DOC - m_max_sm_17 = 6 - n_max_sm_17 = 2 - o_max_sm_17 = 6 -END_PROVIDER - - -BEGIN_PROVIDER [ double precision, c_mn_o_sm_7, (0:m_max_sm_7,0:n_max_sm_7,0:o_max_sm_7,2:10)] - implicit none - BEGIN_DOC - ! - !c_mn_o_7(0:4,0:4,2:10) = coefficient for the SM_7 correlation factor as given is Table IV of - ! Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! the first index (0:4) is the "m" integer for the 1e part - ! the second index(0:0) is the "n" integer for the 1e part WHICH IS ALWAYS SET TO 0 FOR SM_7 - ! the third index (0:4) is the "o" integer for the 2e part - ! the fourth index (2:10) is the nuclear charge of the atom - END_DOC - c_mn_o_sm_7 = 0.d0 - integer :: i - do i = 2, 10 ! loop over nuclear charge - c_mn_o_sm_7(0,0,1,i) = 0.5d0 ! all the linear terms are set to 1/2 to satisfy the anti-parallel spin condition - enddo - ! He atom - ! two electron terms - c_mn_o_sm_7(0,0,2,2) = 0.50516d0 - c_mn_o_sm_7(0,0,3,2) = -0.19313d0 - c_mn_o_sm_7(0,0,4,2) = 0.30276d0 - ! one-electron terms - c_mn_o_sm_7(2,0,0,2) = -0.16995d0 - c_mn_o_sm_7(3,0,0,2) = -0.34505d0 - c_mn_o_sm_7(4,0,0,2) = -0.54777d0 - ! Ne atom - ! two electron terms - c_mn_o_sm_7(0,0,2,10) = -0.792d0 - c_mn_o_sm_7(0,0,3,10) = 1.05232d0 - c_mn_o_sm_7(0,0,4,10) = -0.65615d0 - ! one-electron terms - c_mn_o_sm_7(2,0,0,10) = -0.13312d0 - c_mn_o_sm_7(3,0,0,10) = -0.00131d0 - c_mn_o_sm_7(4,0,0,10) = 0.09083d0 - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, c_mn_o_sm_9, (0:m_max_sm_9,0:n_max_sm_9,0:o_max_sm_9,2:10)] - implicit none - BEGIN_DOC - ! - !c_mn_o_9(0:4,0:4,2:10) = coefficient for the SM_9 correlation factor as given is Table IV of - ! Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! the first index (0:4) is the "m" integer for the 1e part - ! the second index(0:0) is the "n" integer for the 1e part WHICH IS ALWAYS SET TO 0 FOR SM_9 - ! the third index (0:4) is the "o" integer for the 2e part - ! the fourth index (2:10) is the nuclear charge of the atom - END_DOC - c_mn_o_sm_9 = 0.d0 - integer :: i - do i = 2, 10 ! loop over nuclear charge - c_mn_o_sm_9(0,0,1,i) = 0.5d0 ! all the linear terms are set to 1/2 to satisfy the anti-parallel spin condition - enddo - ! He atom - ! two electron terms - c_mn_o_sm_9(0,0,2,2) = 0.50516d0 - c_mn_o_sm_9(0,0,3,2) = -0.19313d0 - c_mn_o_sm_9(0,0,4,2) = 0.30276d0 - ! one-electron terms - c_mn_o_sm_9(2,0,0,2) = -0.16995d0 - c_mn_o_sm_9(3,0,0,2) = -0.34505d0 - c_mn_o_sm_9(4,0,0,2) = -0.54777d0 - ! Ne atom - ! two electron terms - c_mn_o_sm_9(0,0,2,10) = -0.792d0 - c_mn_o_sm_9(0,0,3,10) = 1.05232d0 - c_mn_o_sm_9(0,0,4,10) = -0.65615d0 - ! one-electron terms - c_mn_o_sm_9(2,0,0,10) = -0.13312d0 - c_mn_o_sm_9(3,0,0,10) = -0.00131d0 - c_mn_o_sm_9(4,0,0,10) = 0.09083d0 - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, c_mn_o_sm_17, (0:m_max_sm_17,0:n_max_sm_17,0:o_max_sm_17,2:10)] - implicit none - BEGIN_DOC - ! - !c_mn_o_17(0:4,0:4,2:10) = coefficient for the SM_17 correlation factor as given is Table IV of - ! Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! the first index (0:4) is the "m" integer for the 1e part - ! the second index(0:0) is the "n" integer for the 1e part WHICH IS ALWAYS SET TO 0 FOR SM_17 - ! the third index (0:4) is the "o" integer for the 2e part - ! the fourth index (2:10) is the nuclear charge of the atom - END_DOC - c_mn_o_sm_17 = 0.d0 - integer :: i - do i = 2, 10 ! loop over nuclear charge - c_mn_o_sm_17(0,0,1,i) = 0.5d0 ! all the linear terms are set to 1/2 to satisfy the anti-parallel spin condition - enddo - ! He atom - ! two electron terms - c_mn_o_sm_17(0,0,2,2) = 0.09239d0 - c_mn_o_sm_17(0,0,3,2) = -0.38664d0 - c_mn_o_sm_17(0,0,4,2) = 0.95764d0 - ! one-electron terms - c_mn_o_sm_17(2,0,0,2) = 0.23208d0 - c_mn_o_sm_17(3,0,0,2) = -0.45032d0 - c_mn_o_sm_17(4,0,0,2) = 0.82777d0 - c_mn_o_sm_17(2,2,0,2) = -4.15388d0 - ! ee-n terms - c_mn_o_sm_17(2,0,2,2) = 0.80622d0 - c_mn_o_sm_17(2,2,2,2) = 10.19704d0 - c_mn_o_sm_17(4,0,2,2) = -4.96259d0 - c_mn_o_sm_17(2,0,4,2) = -1.35647d0 - c_mn_o_sm_17(4,2,2,2) = -5.90907d0 - c_mn_o_sm_17(6,0,2,2) = 0.90343d0 - c_mn_o_sm_17(4,0,4,2) = 5.50739d0 - c_mn_o_sm_17(2,2,4,2) = -0.03154d0 - c_mn_o_sm_17(2,0,6,2) = -1.1051860 - - - ! Ne atom - ! two electron terms - c_mn_o_sm_17(0,0,2,10) = -0.80909d0 - c_mn_o_sm_17(0,0,3,10) = -0.00219d0 - c_mn_o_sm_17(0,0,4,10) = 0.59188d0 - ! one-electron terms - c_mn_o_sm_17(2,0,0,10) = -0.00567d0 - c_mn_o_sm_17(3,0,0,10) = 0.14011d0 - c_mn_o_sm_17(4,0,0,10) = -0.05671d0 - c_mn_o_sm_17(2,2,0,10) = -3.33767d0 - ! ee-n terms - c_mn_o_sm_17(2,0,2,10) = 1.95067d0 - c_mn_o_sm_17(2,2,2,10) = 6.83340d0 - c_mn_o_sm_17(4,0,2,10) = -3.29231d0 - c_mn_o_sm_17(2,0,4,10) = -2.44998d0 - c_mn_o_sm_17(4,2,2,10) = -2.13029d0 - c_mn_o_sm_17(6,0,2,10) = 2.25768d0 - c_mn_o_sm_17(4,0,4,10) = 1.97951d0 - c_mn_o_sm_17(2,2,4,10) = -2.0924160 - c_mn_o_sm_17(2,0,6,10) = 0.35493d0 - -END_PROVIDER - - BEGIN_PROVIDER [ double precision, b_I_sm_90,(2:10)] -&BEGIN_PROVIDER [ double precision, d_I_sm_90,(2:10)] - implicit none - BEGIN_DOC -! "b_I" and "d_I" parameters of Eqs. (4) and (5) of Schmidt,Moskowitz, JCP, 93, 4172 (1990) - END_DOC - b_I_sm_90 = 1.d0 - d_I_sm_90 = 1.d0 - -END_PROVIDER - -subroutine get_full_sm_90_jastrow(r1,r2,rI,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - implicit none - double precision, intent(in) :: r1(3),r2(3),rI(3) - integer, intent(in) :: sm_j, i_charge - double precision, intent(out):: j_1e,j_2e,j_een,j_tot - BEGIN_DOC - ! Jastrow function as in Eq. (4) of Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! the i_charge variable is the integer specifying the charge of the atom for the Jastrow - ! the sm_j integer variable represents the "quality" of the jastrow : sm_j = 7, 9, 17 - END_DOC - double precision :: r_inucl,r_jnucl,r_ij,b_I, d_I - b_I = b_I_sm_90(i_charge) - d_I = d_I_sm_90(i_charge) - call get_rescaled_variables_j_sm_90(r1,r2,rI,b_I,d_I,r_inucl,r_jnucl,r_ij) - call jastrow_func_sm_90(r_inucl,r_jnucl,r_ij,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) -end - -subroutine get_rescaled_variables_j_sm_90(r1,r2,rI,b_I,d_I,r_inucl,r_jnucl,r_ij) - implicit none - BEGIN_DOC - ! rescaled variables of Eq. (5) and (6) of Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! the "b_I" and "d_I" parameters are the same as in Eqs. (5) and (6) - END_DOC - double precision, intent(in) :: r1(3),r2(3),rI(3) - double precision, intent(in) :: b_I, d_I - double precision, intent(out):: r_inucl,r_jnucl,r_ij - double precision :: rin, rjn, rij - integer :: i - rin = 0.d0 - rjn = 0.d0 - rij = 0.d0 - do i = 1,3 - rin += (r1(i) - rI(i)) * (r1(i) - rI(i)) - rjn += (r2(i) - rI(i)) * (r2(i) - rI(i)) - rij += (r2(i) - r1(i)) * (r2(i) - r1(i)) - enddo - rin = dsqrt(rin) - rjn = dsqrt(rjn) - rij = dsqrt(rij) - r_inucl = b_I * rin/(1.d0 + b_I * rin) - r_jnucl = b_I * rjn/(1.d0 + b_I * rjn) - r_ij = d_I * rij/(1.d0 + b_I * rij) -end - -subroutine jastrow_func_sm_90(r_inucl,r_jnucl,r_ij,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - implicit none - BEGIN_DOC - ! Jastrow function as in Eq. (4) of Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! Here the r_inucl, r_jnucl are the rescaled variables as defined in Eq. (5) with "b_I" - ! r_ij is the rescaled variable as defined in Eq. (6) with "d_I" - ! the i_charge variable is the integer specifying the charge of the atom for the Jastrow - ! the sm_j integer variable represents the "quality" of the jastrow : sm_j = 7, 9, 17 - ! - ! it returns the j_1e : sum of terms with "o" = "n" = 0, "m" /= 0, - ! j_2e : sum of terms with "m" = "n" = 0, "o" /= 0, - ! j_een : sum of terms with "m" /=0, "n" /= 0, "o" /= 0, - ! j_tot : the total sum - END_DOC - double precision, intent(in) :: r_inucl,r_jnucl,r_ij - integer, intent(in) :: sm_j,i_charge - double precision, intent(out):: j_1e,j_2e,j_een,j_tot - j_1e = 0.D0 - j_2e = 0.D0 - j_een = 0.D0 - double precision :: delta_mn,jastrow_sm_90_atomic - integer :: m,n,o -BEGIN_TEMPLATE - ! pure 2e part - n = 0 - m = 0 - if(sm_j == $X )then - do o = 1, o_max_sm_$X - if(dabs(c_mn_o_sm_$X(m,n,o,i_charge)).lt.1.d-10)cycle - j_2e += c_mn_o_sm_$X(m,n,o,i_charge) * jastrow_sm_90_atomic(m,n,o,i_charge,r_inucl,r_jnucl,r_ij) - enddo -! else -! print*,'sm_j = ',sm_j -! print*,'not implemented, stop' -! stop - endif - ! pure one-e part - o = 0 - if(sm_j == $X)then - do n = 2, n_max_sm_$X - do m = 2, m_max_sm_$X - j_1e += c_mn_o_sm_$X(m,n,o,i_charge) * jastrow_sm_90_atomic(m,n,o,i_charge,r_inucl,r_jnucl,r_ij) - enddo - enddo -! else -! print*,'sm_j = ',sm_j -! print*,'not implemented, stop' -! stop - endif - ! e-e-n part - if(sm_j == $X)then - do o = 1, o_max_sm_$X - do m = 2, m_max_sm_$X - do n = 2, n_max_sm_$X - j_een += c_mn_o_sm_$X(m,n,o,i_charge) * jastrow_sm_90_atomic(m,n,o,i_charge,r_inucl,r_jnucl,r_ij) - enddo - enddo - enddo - else -! print*,'sm_j = ',sm_j -! print*,'not implemented, stop' -! stop - endif - j_tot = j_1e + j_2e + j_een -SUBST [ X] - 7 ;; - 9 ;; - 17 ;; -END_TEMPLATE -end - -double precision function jastrow_sm_90_atomic(m,n,o,i_charge,r_inucl,r_jnucl,r_ij) - implicit none - BEGIN_DOC -! contribution to the function of Eq. (4) of Schmidt,Moskowitz, JCP, 93, 4172 (1990) -! for a given m,n,o and atom - END_DOC - double precision, intent(in) :: r_inucl,r_jnucl,r_ij - integer , intent(in) :: m,n,o,i_charge - double precision :: delta_mn - if(m==n)then - delta_mn = 0.5d0 - else - delta_mn = 1.D0 - endif - jastrow_sm_90_atomic = delta_mn * (r_inucl**m * r_jnucl**n + r_jnucl**m * r_inucl**n)*r_ij**o -end diff --git a/plugins/local/tc_scf/plot_j_schMos.irp.f b/plugins/local/tc_scf/plot_j_schMos.irp.f deleted file mode 100644 index eda0dd25..00000000 --- a/plugins/local/tc_scf/plot_j_schMos.irp.f +++ /dev/null @@ -1,69 +0,0 @@ -program plot_j - implicit none - double precision :: r1(3),rI(3),r2(3) - double precision :: r12,dx,xmax, j_1e,j_2e,j_een,j_tot - double precision :: j_mu_F_x_j - integer :: i,nx,m,i_charge,sm_j - - character*(128) :: output - integer :: i_unit_output_He_sm_7,i_unit_output_Ne_sm_7 - integer :: i_unit_output_He_sm_17,i_unit_output_Ne_sm_17 - integer :: getUnitAndOpen - output='J_SM_7_He' - i_unit_output_He_sm_7 = getUnitAndOpen(output,'w') - output='J_SM_7_Ne' - i_unit_output_Ne_sm_7 = getUnitAndOpen(output,'w') - - output='J_SM_17_He' - i_unit_output_He_sm_17 = getUnitAndOpen(output,'w') - output='J_SM_17_Ne' - i_unit_output_Ne_sm_17 = getUnitAndOpen(output,'w') - - rI = 0.d0 - r1 = 0.d0 - r2 = 0.d0 - r1(1) = 1.5d0 - xmax = 20.d0 - r2(1) = -xmax*0.5d0 - nx = 1000 - dx = xmax/dble(nx) - do i = 1, nx - r12 = 0.d0 - do m = 1, 3 - r12 += (r1(m) - r2(m))*(r1(m) - r2(m)) - enddo - r12 = dsqrt(r12) - double precision :: jmu,env_nucl,jmu_env,jmu_scaled, jmu_scaled_env - double precision :: b_I,d_I,r_inucl,r_jnucl,r_ij - b_I = 1.D0 - d_I = 1.D0 - call get_rescaled_variables_j_sm_90(r1,r2,rI,b_I,d_I,r_inucl,r_jnucl,r_ij) - jmu=j_mu_F_x_j(r12) - jmu_scaled=j_mu_F_x_j(r_ij) - jmu_env = jmu * env_nucl(r1) * env_nucl(r2) -! jmu_scaled_env= jmu_scaled * (1.d0 - env_coef(1) * dexp(-env_expo(1)*r_inucl**2)) * (1.d0 - env_coef(1) * dexp(-env_expo(1)*r_jnucl**2)) - jmu_scaled_env= jmu_scaled * env_nucl(r1) * env_nucl(r2) - ! He - i_charge = 2 - ! SM 7 Jastrow - sm_j = 7 - call get_full_sm_90_jastrow(r1,r2,rI,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - write(i_unit_output_He_sm_7,'(100(F16.10,X))')r2(1),r12,j_mu_F_x_j(r12), j_1e,j_2e,j_een,j_tot,jmu_env,jmu_scaled,jmu_scaled_env - ! SM 17 Jastrow - sm_j = 17 - call get_full_sm_90_jastrow(r1,r2,rI,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - write(i_unit_output_He_sm_17,'(100(F16.10,X))')r2(1),r12,j_mu_F_x_j(r12), j_1e,j_2e,j_een,j_tot,jmu_env,jmu_scaled,jmu_scaled_env - ! Ne - i_charge = 10 - ! SM 7 Jastrow - sm_j = 7 - call get_full_sm_90_jastrow(r1,r2,rI,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - write(i_unit_output_Ne_sm_7,'(100(F16.10,X))')r2(1),r12,j_mu_F_x_j(r12), j_1e,j_2e,j_een,j_tot,jmu_env,jmu_scaled,jmu_scaled_env - ! SM 17 Jastrow - sm_j = 17 - call get_full_sm_90_jastrow(r1,r2,rI,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - write(i_unit_output_Ne_sm_17,'(100(F16.10,X))')r2(1),r12,j_mu_F_x_j(r12), j_1e,j_2e,j_een,j_tot,jmu_env,jmu_scaled,jmu_scaled_env - r2(1) += dx - enddo - -end diff --git a/plugins/local/tc_scf/print_fit_param.irp.f b/plugins/local/tc_scf/print_fit_param.irp.f deleted file mode 100644 index e62f0dde..00000000 --- a/plugins/local/tc_scf/print_fit_param.irp.f +++ /dev/null @@ -1,59 +0,0 @@ -program print_fit_param - - BEGIN_DOC -! TODO : Put the documentation of the program here - END_DOC - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - !call create_guess - !call orthonormalize_mos - - call main() - -end - -! --- - -subroutine main() - - implicit none - integer :: i - - mu_erf = 1.d0 - touch mu_erf - - print *, ' fit for (1 - erf(x))^2' - do i = 1, n_max_fit_slat - print*, expo_gauss_1_erf_x_2(i), coef_gauss_1_erf_x_2(i) - enddo - - print *, '' - print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)]' - do i = 1, n_max_fit_slat - print *, expo_gauss_j_mu_x(i), 2.d0 * coef_gauss_j_mu_x(i) - enddo - - print *, '' - print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)]^2' - do i = 1, n_max_fit_slat - print *, expo_gauss_j_mu_x_2(i), 4.d0 * coef_gauss_j_mu_x_2(i) - enddo - - print *, '' - print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)] x [1 - erf(mu * r12)]' - do i = 1, n_max_fit_slat - print *, expo_gauss_j_mu_1_erf(i), 4.d0 * coef_gauss_j_mu_1_erf(i) - enddo - - return -end subroutine main - -! --- - diff --git a/plugins/local/tc_scf/print_tcscf_energy.irp.f b/plugins/local/tc_scf/print_tcscf_energy.irp.f deleted file mode 100644 index 6f9afd9a..00000000 --- a/plugins/local/tc_scf/print_tcscf_energy.irp.f +++ /dev/null @@ -1,55 +0,0 @@ -program print_tcscf_energy - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - print *, 'Hello world' - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - call main() - -end - -! --- - -subroutine main() - - implicit none - double precision :: etc_tot, etc_1e, etc_2e, etc_3e - - PROVIDE j2e_type mu_erf - PROVIDE j1e_type j1e_coef j1e_expo - PROVIDE env_type env_coef env_expo - - print*, ' j2e_type = ', j2e_type - print*, ' j1e_type = ', j1e_type - print*, ' env_type = ', env_type - - print*, ' mu_erf = ', mu_erf - - etc_tot = TC_HF_energy - etc_1e = TC_HF_one_e_energy - etc_2e = TC_HF_two_e_energy - etc_3e = 0.d0 - if(three_body_h_tc) then - !etc_3e = diag_three_elem_hf - etc_3e = tcscf_energy_3e_naive - endif - - print *, " E_TC = ", etc_tot - print *, " E_1e = ", etc_1e - print *, " E_2e = ", etc_2e - print *, " E_3e = ", etc_3e - - return -end subroutine main - -! --- - diff --git a/plugins/local/tc_scf/rh_tcscf_simple.irp.f b/plugins/local/tc_scf/rh_tcscf_simple.irp.f deleted file mode 100644 index 2c2cf2c2..00000000 --- a/plugins/local/tc_scf/rh_tcscf_simple.irp.f +++ /dev/null @@ -1,129 +0,0 @@ -! --- - -subroutine rh_tcscf_simple() - - implicit none - integer :: i, j, it, dim_DIIS - double precision :: t0, t1 - double precision :: e_save, e_delta, rho_delta - double precision :: etc_tot, etc_1e, etc_2e, etc_3e, tc_grad - double precision :: er_DIIS - double precision, allocatable :: rho_old(:,:), rho_new(:,:) - - allocate(rho_old(ao_num,ao_num), rho_new(ao_num,ao_num)) - - it = 0 - e_save = 0.d0 - dim_DIIS = 0 - - ! --- - - if(.not. bi_ortho) then - print *, ' grad_hermit = ', grad_hermit - call save_good_hermit_tc_eigvectors - TOUCH mo_coef - call save_mos - endif - - ! --- - - if(bi_ortho) then - - PROVIDE level_shift_tcscf - PROVIDE mo_l_coef mo_r_coef - - write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & - '====', '================', '================', '================', '================', '================' & - , '================', '================', '================', '====', '========' - - write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & - ' it ', ' SCF TC Energy ', ' E(1e) ', ' E(2e) ', ' E(3e) ', ' energy diff ' & - , ' gradient ', ' DIIS error ', ' level shift ', 'DIIS', ' WT (m)' - - write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & - '====', '================', '================', '================', '================', '================' & - , '================', '================', '================', '====', '========' - - - ! first iteration (HF orbitals) - call wall_time(t0) - - etc_tot = TC_HF_energy - etc_1e = TC_HF_one_e_energy - etc_2e = TC_HF_two_e_energy - etc_3e = 0.d0 - if(three_body_h_tc) then - etc_3e = diag_three_elem_hf - endif - tc_grad = grad_non_hermit - er_DIIS = maxval(abs(FQS_SQF_mo)) - e_delta = dabs(etc_tot - e_save) - e_save = etc_tot - - call wall_time(t1) - write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & - it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 - - do while(tc_grad .gt. dsqrt(thresh_tcscf)) - call wall_time(t0) - - it += 1 - if(it > n_it_tcscf_max) then - print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max - stop - endif - - mo_l_coef = fock_tc_leigvec_ao - mo_r_coef = fock_tc_reigvec_ao - call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) - call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) - TOUCH mo_l_coef mo_r_coef - - etc_tot = TC_HF_energy - etc_1e = TC_HF_one_e_energy - etc_2e = TC_HF_two_e_energy - etc_3e = 0.d0 - if(three_body_h_tc) then - etc_3e = diag_three_elem_hf - endif - tc_grad = grad_non_hermit - er_DIIS = maxval(abs(FQS_SQF_mo)) - e_delta = dabs(etc_tot - e_save) - e_save = etc_tot - - call ezfio_set_tc_scf_tcscf_energy(etc_tot) - - call wall_time(t1) - write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & - it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 - enddo - - else - - do while( (grad_hermit.gt.dsqrt(thresh_tcscf)) .and. (it.lt.n_it_tcscf_max) ) - print*,'grad_hermit = ',grad_hermit - it += 1 - print *, 'iteration = ', it - print *, '***' - print *, 'TC HF total energy = ', TC_HF_energy - print *, 'TC HF 1 e energy = ', TC_HF_one_e_energy - print *, 'TC HF 2 e energy = ', TC_HF_two_e_energy - print *, 'TC HF 3 body = ', diag_three_elem_hf - print *, '***' - print *, '' - call save_good_hermit_tc_eigvectors - TOUCH mo_coef - call save_mos - enddo - - endif - - print *, ' TCSCF Simple converged !' - !call print_energy_and_mos(good_angles) - - deallocate(rho_old, rho_new) - -end - -! --- - diff --git a/plugins/local/tc_scf/rotate_tcscf_orbitals.irp.f b/plugins/local/tc_scf/rotate_tcscf_orbitals.irp.f deleted file mode 100644 index 0f2663e5..00000000 --- a/plugins/local/tc_scf/rotate_tcscf_orbitals.irp.f +++ /dev/null @@ -1,369 +0,0 @@ - -! --- - -program rotate_tcscf_orbitals - - BEGIN_DOC - ! TODO : Rotate the bi-orthonormal orbitals in order to minimize left-right angles when degenerate - END_DOC - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - bi_ortho = .True. - touch bi_ortho - - call minimize_tc_orb_angles() - !call maximize_overlap() - -end - -! --- - -subroutine maximize_overlap() - - implicit none - integer :: i, m, n - double precision :: accu_d, accu_nd - double precision, allocatable :: C(:,:), R(:,:), L(:,:), W(:,:), e(:) - double precision, allocatable :: S(:,:) - - n = ao_num - m = mo_num - - allocate(L(n,m), R(n,m), C(n,m), W(n,n), e(m)) - L = mo_l_coef - R = mo_r_coef - C = mo_coef - W = ao_overlap - - print*, ' fock matrix diag elements' - do i = 1, m - e(i) = Fock_matrix_tc_mo_tot(i,i) - print*, e(i) - enddo - - ! --- - - print *, ' overlap before :' - print *, ' ' - - allocate(S(m,m)) - - call LTxSxR(n, m, L, W, R, S) - !print*, " L.T x R" - !do i = 1, m - ! write(*, '(100(F16.10,X))') S(i,i) - !enddo - call LTxSxR(n, m, L, W, C, S) - print*, " L.T x C" - do i = 1, m - write(*, '(100(F16.10,X))') S(i,:) - enddo - call LTxSxR(n, m, C, W, R, S) - print*, " C.T x R" - do i = 1, m - write(*, '(100(F16.10,X))') S(i,:) - enddo - - deallocate(S) - - ! --- - - call rotate_degen_eigvec_to_maximize_overlap(n, m, e, C, W, L, R) - - ! --- - - print *, ' overlap after :' - print *, ' ' - - allocate(S(m,m)) - - call LTxSxR(n, m, L, W, R, S) - !print*, " L.T x R" - !do i = 1, m - ! write(*, '(100(F16.10,X))') S(i,i) - !enddo - call LTxSxR(n, m, L, W, C, S) - print*, " L.T x C" - do i = 1, m - write(*, '(100(F16.10,X))') S(i,:) - enddo - call LTxSxR(n, m, C, W, R, S) - print*, " C.T x R" - do i = 1, m - write(*, '(100(F16.10,X))') S(i,:) - enddo - - deallocate(S) - - ! --- - - mo_l_coef = L - mo_r_coef = R - call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) - call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) - - ! --- - - deallocate(L, R, C, W, e) - -end subroutine maximize_overlap - -! --- - -subroutine rotate_degen_eigvec_to_maximize_overlap(n, m, e0, C0, W0, L0, R0) - - implicit none - - integer, intent(in) :: n, m - double precision, intent(in) :: e0(m), W0(n,n), C0(n,m) - double precision, intent(inout) :: L0(n,m), R0(n,m) - - - integer :: i, j, k, kk, mm, id1, tot_deg - double precision :: ei, ej, de, de_thr - integer, allocatable :: deg_num(:) - double precision, allocatable :: L(:,:), R(:,:), C(:,:), Lnew(:,:), Rnew(:,:), tmp(:,:) - !double precision, allocatable :: S(:,:), Snew(:,:), T(:,:), Ttmp(:,:), Stmp(:,:) - double precision, allocatable :: S(:,:), Snew(:,:), T(:,:), Ttmp(:,:), Stmp(:,:) - !real*8 :: S(m,m), Snew(m,m), T(m,m) - - id1 = 700 - allocate(S(id1,id1), Snew(id1,id1), T(id1,id1)) - - ! --- - - allocate( deg_num(m) ) - do i = 1, m - deg_num(i) = 1 - enddo - - de_thr = thr_degen_tc - - do i = 1, m-1 - ei = e0(i) - - ! already considered in degen vectors - if(deg_num(i).eq.0) cycle - - do j = i+1, m - ej = e0(j) - de = dabs(ei - ej) - - if(de .lt. de_thr) then - deg_num(i) = deg_num(i) + 1 - deg_num(j) = 0 - endif - - enddo - enddo - - tot_deg = 0 - do i = 1, m - if(deg_num(i).gt.1) then - print *, ' degen on', i, deg_num(i) - tot_deg = tot_deg + 1 - endif - enddo - - if(tot_deg .eq. 0) then - print *, ' no degen' - return - endif - - ! --- - - do i = 1, m - mm = deg_num(i) - - if(mm .gt. 1) then - - allocate(L(n,mm), R(n,mm), C(n,mm)) - do j = 1, mm - L(1:n,j) = L0(1:n,i+j-1) - R(1:n,j) = R0(1:n,i+j-1) - C(1:n,j) = C0(1:n,i+j-1) - enddo - - ! --- - - ! C.T x W0 x R - allocate(tmp(mm,n), Stmp(mm,mm)) - call dgemm( 'T', 'N', mm, n, n, 1.d0 & - , C, size(C, 1), W0, size(W0, 1) & - , 0.d0, tmp, size(tmp, 1) ) - call dgemm( 'N', 'N', mm, mm, n, 1.d0 & - , tmp, size(tmp, 1), R, size(R, 1) & - , 0.d0, Stmp, size(Stmp, 1) ) - deallocate(C, tmp) - - S = 0.d0 - do k = 1, mm - do kk = 1, mm - S(kk,k) = Stmp(kk,k) - enddo - enddo - deallocate(Stmp) - - !print*, " overlap bef" - !do k = 1, mm - ! write(*, '(100(F16.10,X))') (S(k,kk), kk=1, mm) - !enddo - - T = 0.d0 - Snew = 0.d0 - call maxovl(mm, mm, S, T, Snew) - - !print*, " overlap aft" - !do k = 1, mm - ! write(*, '(100(F16.10,X))') (Snew(k,kk), kk=1, mm) - !enddo - - allocate(Ttmp(mm,mm)) - Ttmp(1:mm,1:mm) = T(1:mm,1:mm) - - allocate(Lnew(n,mm), Rnew(n,mm)) - call dgemm( 'N', 'N', n, mm, mm, 1.d0 & - , R, size(R, 1), Ttmp(1,1), size(Ttmp, 1) & - , 0.d0, Rnew, size(Rnew, 1) ) - call dgemm( 'N', 'N', n, mm, mm, 1.d0 & - , L, size(L, 1), Ttmp(1,1), size(Ttmp, 1) & - , 0.d0, Lnew, size(Lnew, 1) ) - - deallocate(L, R) - deallocate(Ttmp) - - ! --- - - do j = 1, mm - L0(1:n,i+j-1) = Lnew(1:n,j) - R0(1:n,i+j-1) = Rnew(1:n,j) - enddo - deallocate(Lnew, Rnew) - - endif - enddo - - deallocate(S, Snew, T) - -end subroutine rotate_degen_eigvec_to_maximize_overlap - -! --- - -subroutine fix_right_to_one() - - implicit none - integer :: i, j, m, n, mm, tot_deg - double precision :: accu_d, accu_nd - double precision :: de_thr, ei, ej, de - integer, allocatable :: deg_num(:) - double precision, allocatable :: R0(:,:), L0(:,:), W(:,:), e0(:) - double precision, allocatable :: R(:,:), L(:,:), S(:,:), Stmp(:,:), tmp(:,:) - - n = ao_num - m = mo_num - - allocate(L0(n,m), R0(n,m), W(n,n), e0(m)) - L0 = mo_l_coef - R0 = mo_r_coef - W = ao_overlap - - print*, ' fock matrix diag elements' - do i = 1, m - e0(i) = Fock_matrix_tc_mo_tot(i,i) - print*, e0(i) - enddo - - ! --- - - allocate( deg_num(m) ) - do i = 1, m - deg_num(i) = 1 - enddo - - de_thr = 1d-6 - - do i = 1, m-1 - ei = e0(i) - - ! already considered in degen vectors - if(deg_num(i).eq.0) cycle - - do j = i+1, m - ej = e0(j) - de = dabs(ei - ej) - - if(de .lt. de_thr) then - deg_num(i) = deg_num(i) + 1 - deg_num(j) = 0 - endif - - enddo - enddo - - deallocate(e0) - - tot_deg = 0 - do i = 1, m - if(deg_num(i).gt.1) then - print *, ' degen on', i, deg_num(i) - tot_deg = tot_deg + 1 - endif - enddo - - if(tot_deg .eq. 0) then - print *, ' no degen' - return - endif - - ! --- - - do i = 1, m - mm = deg_num(i) - - if(mm .gt. 1) then - - allocate(L(n,mm), R(n,mm)) - do j = 1, mm - L(1:n,j) = L0(1:n,i+j-1) - R(1:n,j) = R0(1:n,i+j-1) - enddo - - ! --- - - call impose_weighted_orthog_svd(n, mm, W, R) - call impose_weighted_biorthog_qr(n, mm, thresh_biorthog_diag, thresh_biorthog_nondiag, R, W, L) - - ! --- - - do j = 1, mm - L0(1:n,i+j-1) = L(1:n,j) - R0(1:n,i+j-1) = R(1:n,j) - enddo - deallocate(L, R) - - endif - enddo - - call check_weighted_biorthog_binormalize(n, m, L0, W, R0, thresh_biorthog_diag, thresh_biorthog_nondiag, .true.) - - deallocate(W, deg_num) - - mo_l_coef = L0 - mo_r_coef = R0 - deallocate(L0, R0) - - call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) - call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) - print *, ' orbitals are rotated ' - - return -end subroutine fix_right_to_one - -! --- diff --git a/plugins/local/tc_scf/tc_petermann_factor.irp.f b/plugins/local/tc_scf/tc_petermann_factor.irp.f deleted file mode 100644 index 14fff898..00000000 --- a/plugins/local/tc_scf/tc_petermann_factor.irp.f +++ /dev/null @@ -1,91 +0,0 @@ - -! --- - -program tc_petermann_factor - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - call main() - -end - -! --- - -subroutine main() - - implicit none - integer :: i, j - double precision :: Pf_diag_av - double precision, allocatable :: Sl(:,:), Sr(:,:), Pf(:,:) - - allocate(Sl(mo_num,mo_num), Sr(mo_num,mo_num), Pf(mo_num,mo_num)) - - - call LTxSxR(ao_num, mo_num, mo_l_coef, ao_overlap, mo_r_coef, Sl) - !call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & - ! , mo_l_coef, size(mo_l_coef, 1), mo_l_coef, size(mo_l_coef, 1) & - ! , 0.d0, Sl, size(Sl, 1) ) - - print *, '' - print *, ' left-right orthog matrix:' - do i = 1, mo_num - write(*,'(100(F8.4,X))') Sl(:,i) - enddo - - call LTxSxR(ao_num, mo_num, mo_l_coef, ao_overlap, mo_l_coef, Sl) - !call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & - ! , mo_l_coef, size(mo_l_coef, 1), mo_l_coef, size(mo_l_coef, 1) & - ! , 0.d0, Sl, size(Sl, 1) ) - - print *, '' - print *, ' left-orthog matrix:' - do i = 1, mo_num - write(*,'(100(F8.4,X))') Sl(:,i) - enddo - - call LTxSxR(ao_num, mo_num, mo_r_coef, ao_overlap, mo_r_coef, Sr) -! call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & -! , mo_r_coef, size(mo_r_coef, 1), mo_r_coef, size(mo_r_coef, 1) & -! , 0.d0, Sr, size(Sr, 1) ) - - print *, '' - print *, ' right-orthog matrix:' - do i = 1, mo_num - write(*,'(100(F8.4,X))') Sr(:,i) - enddo - - print *, '' - print *, ' Petermann matrix:' - do i = 1, mo_num - do j = 1, mo_num - Pf(j,i) = Sl(j,i) * Sr(j,i) - enddo - write(*,'(100(F8.4,X))') Pf(:,i) - enddo - - Pf_diag_av = 0.d0 - do i = 1, mo_num - Pf_diag_av = Pf_diag_av + Pf(i,i) - enddo - Pf_diag_av = Pf_diag_av / dble(mo_num) - - print *, '' - print *, ' mean of the diagonal Petermann factor = ', Pf_diag_av - - deallocate(Sl, Sr, Pf) - - return -end subroutine - -! --- - diff --git a/plugins/local/tc_scf/tc_scf.irp.f b/plugins/local/tc_scf/tc_scf.irp.f index ee8e8dad..f099b90e 100644 --- a/plugins/local/tc_scf/tc_scf.irp.f +++ b/plugins/local/tc_scf/tc_scf.irp.f @@ -10,13 +10,10 @@ program tc_scf integer :: i logical :: good_angles - PROVIDE j1e_type - PROVIDE j2e_type - PROVIDE tcscf_algorithm - print *, ' TC-SCF with:' - print *, ' j1e_type = ', j1e_type print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type write(json_unit,json_array_open_fmt) 'tc-scf' @@ -29,7 +26,6 @@ program tc_scf call write_int(6, my_n_pt_r_grid, 'radial external grid over') call write_int(6, my_n_pt_a_grid, 'angular external grid over') - if(tc_integ_type .eq. "numeric") then my_extra_grid_becke = .True. PROVIDE tc_grid2_a tc_grid2_r @@ -41,17 +37,7 @@ program tc_scf call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') endif - !call create_guess() - !call orthonormalize_mos() - - if(tcscf_algorithm == 'DIIS') then - call rh_tcscf_diis() - elseif(tcscf_algorithm == 'Simple') then - call rh_tcscf_simple() - else - print *, ' not implemented yet', tcscf_algorithm - stop - endif + call rh_tcscf_diis() PROVIDE Fock_matrix_tc_diag_mo_tot print*, ' Eigenvalues:' @@ -59,14 +45,11 @@ program tc_scf print*, i, Fock_matrix_tc_diag_mo_tot(i) enddo - ! TODO - ! rotate angles in separate code only if necessary - if(minimize_lr_angles)then + if(minimize_lr_angles) then call minimize_tc_orb_angles() endif call print_energy_and_mos(good_angles) - write(json_unit,json_array_close_fmtx) call json_close diff --git a/plugins/local/tc_scf/tc_scf_dm.irp.f b/plugins/local/tc_scf/tc_scf_dm.irp.f index bf31a4a1..5d25fce2 100644 --- a/plugins/local/tc_scf/tc_scf_dm.irp.f +++ b/plugins/local/tc_scf/tc_scf_dm.irp.f @@ -10,16 +10,8 @@ BEGIN_PROVIDER [double precision, TCSCF_density_matrix_ao_beta, (ao_num, ao_num) implicit none - if(bi_ortho) then - - PROVIDE mo_l_coef mo_r_coef - TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta - - else - - TCSCF_density_matrix_ao_beta = SCF_density_matrix_ao_beta - - endif + PROVIDE mo_l_coef mo_r_coef + TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta END_PROVIDER @@ -35,16 +27,8 @@ BEGIN_PROVIDER [double precision, TCSCF_density_matrix_ao_alpha, (ao_num, ao_num implicit none - if(bi_ortho) then - - PROVIDE mo_l_coef mo_r_coef - TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha - - else - - TCSCF_density_matrix_ao_alpha = SCF_density_matrix_ao_alpha - - endif + PROVIDE mo_l_coef mo_r_coef + TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha END_PROVIDER diff --git a/plugins/local/tc_scf/tc_scf_energy.irp.f b/plugins/local/tc_scf/tc_scf_energy.irp.f index 0266c605..c9366195 100644 --- a/plugins/local/tc_scf/tc_scf_energy.irp.f +++ b/plugins/local/tc_scf/tc_scf_energy.irp.f @@ -34,3 +34,426 @@ END_PROVIDER ! --- +BEGIN_PROVIDER [double precision, diag_three_elem_hf] + + BEGIN_DOC + ! + ! < Phi_left | L | Phi_right > + ! + ! + ! if three_body_h_tc == false and noL_standard == true ==> do a normal ordering + ! + ! todo + ! this should be equivalent to + ! three_body_h_tc == true and noL_standard == false + ! + ! if three_body_h_tc == false and noL_standard == false ==> this is equal to 0 + ! + END_DOC + + implicit none + integer :: i, j, k, ipoint, mm + double precision :: contrib, weight, four_third, one_third, two_third, exchange_int_231 + double precision :: integral_aaa, hthree, integral_aab, integral_abb, integral_bbb + double precision, allocatable :: tmp(:) + double precision, allocatable :: tmp_L(:,:), tmp_R(:,:) + double precision, allocatable :: tmp_M(:,:), tmp_S(:), tmp_O(:), tmp_J(:,:) + double precision, allocatable :: tmp_M_priv(:,:), tmp_S_priv(:), tmp_O_priv(:), tmp_J_priv(:,:) + + PROVIDE mo_l_coef mo_r_coef + + if(.not. three_body_h_tc) then + + if(noL_standard) then + PROVIDE noL_0e + diag_three_elem_hf = noL_0e + else + diag_three_elem_hf = 0.d0 + endif + + else + + PROVIDE int2_grad1_u12_bimo_t + PROVIDE mos_l_in_r_array_transp + PROVIDE mos_r_in_r_array_transp + + if(elec_alpha_num .eq. elec_beta_num) then + + allocate(tmp(elec_beta_num)) + allocate(tmp_L(n_points_final_grid,3), tmp_R(n_points_final_grid,3)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & + !$OMP SHARED(elec_beta_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) + + !$OMP DO + do j = 1, elec_beta_num + + tmp_L = 0.d0 + tmp_R = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) + + tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + + tmp(j) = 0.d0 + do ipoint = 1, n_points_final_grid + tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) + enddo + enddo ! j + !$OMP END DO + !$OMP END PARALLEL + + diag_three_elem_hf = -2.d0 * sum(tmp) + + deallocate(tmp) + deallocate(tmp_L, tmp_R) + + ! --- + + allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) + tmp_O = 0.d0 + tmp_J = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & + !$OMP SHARED(elec_beta_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) + + allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) + tmp_O_priv = 0.d0 + tmp_J_priv = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_O = tmp_O + tmp_O_priv + tmp_J = tmp_J + tmp_J_priv + !$OMP END CRITICAL + + deallocate(tmp_O_priv, tmp_J_priv) + !$OMP END PARALLEL + + allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) + tmp_M = 0.d0 + tmp_S = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & + !$OMP SHARED(elec_beta_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) + + allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) + tmp_M_priv = 0.d0 + tmp_S_priv = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_M = tmp_M + tmp_M_priv + tmp_S = tmp_S + tmp_S_priv + !$OMP END CRITICAL + + deallocate(tmp_M_priv, tmp_S_priv) + !$OMP END PARALLEL + + allocate(tmp(n_points_final_grid)) + + do ipoint = 1, n_points_final_grid + + tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint) + + tmp(ipoint) = final_weight_at_r_vector(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) & + - 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) & + + tmp_J(ipoint,2) * tmp_M(ipoint,2) & + + tmp_J(ipoint,3) * tmp_M(ipoint,3))) + enddo + + diag_three_elem_hf = diag_three_elem_hf -2.d0 * (sum(tmp)) + + deallocate(tmp) + + else ! elec_alpha_num .neq. elec_beta_num + + allocate(tmp(elec_alpha_num)) + allocate(tmp_L(n_points_final_grid,3), tmp_R(n_points_final_grid,3)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) + + !$OMP DO + do j = 1, elec_beta_num + + tmp_L = 0.d0 + tmp_R = 0.d0 + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + tmp_L(ipoint,1) = tmp_L(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,2) = tmp_L(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,3) = tmp_L(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) + + tmp_R(ipoint,1) = tmp_R(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,2) = tmp_R(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,3) = tmp_R(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + + tmp(j) = 0.d0 + do ipoint = 1, n_points_final_grid + tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) + enddo + + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) + + tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + + do ipoint = 1, n_points_final_grid + tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) + enddo + enddo ! j + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) + + !$OMP DO + do j = elec_beta_num+1, elec_alpha_num + + tmp_L = 0.d0 + tmp_R = 0.d0 + do i = 1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) + + tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + + tmp(j) = 0.d0 + do ipoint = 1, n_points_final_grid + tmp(j) = tmp(j) + 0.5d0 * final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) + enddo + enddo ! j + !$OMP END DO + !$OMP END PARALLEL + + diag_three_elem_hf = -2.d0 * sum(tmp) + + deallocate(tmp) + deallocate(tmp_L, tmp_R) + + ! --- + + allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) + tmp_O = 0.d0 + tmp_J = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) + + allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) + tmp_O_priv = 0.d0 + tmp_J_priv = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + 0.5d0 * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_O = tmp_O + tmp_O_priv + tmp_J = tmp_J + tmp_J_priv + !$OMP END CRITICAL + + deallocate(tmp_O_priv, tmp_J_priv) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) + tmp_M = 0.d0 + tmp_S = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) + + allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) + tmp_M_priv = 0.d0 + tmp_S_priv = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO COLLAPSE(2) + do i = elec_beta_num+1, elec_alpha_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO COLLAPSE(2) + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_M = tmp_M + tmp_M_priv + tmp_S = tmp_S + tmp_S_priv + !$OMP END CRITICAL + + deallocate(tmp_M_priv, tmp_S_priv) + !$OMP END PARALLEL + + allocate(tmp(n_points_final_grid)) + + do ipoint = 1, n_points_final_grid + + tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint) + + tmp(ipoint) = final_weight_at_r_vector(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) & + - 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) & + + tmp_J(ipoint,2) * tmp_M(ipoint,2) & + + tmp_J(ipoint,3) * tmp_M(ipoint,3))) + enddo + + diag_three_elem_hf = diag_three_elem_hf - 2.d0 * (sum(tmp)) + + deallocate(tmp) + + endif ! alpha/beta condition + + endif ! three_body_h_tc + +END_PROVIDER + +! --- + diff --git a/plugins/local/tc_scf/tcscf_energy_naive.irp.f b/plugins/local/tc_scf/tcscf_energy_naive.irp.f deleted file mode 100644 index 82bb8799..00000000 --- a/plugins/local/tc_scf/tcscf_energy_naive.irp.f +++ /dev/null @@ -1,80 +0,0 @@ - -! --- - -BEGIN_PROVIDER [double precision, tcscf_energy_3e_naive] - - implicit none - integer :: i, j, k - integer :: neu, ned, D(elec_num) - integer :: ii, jj, kk - integer :: si, sj, sk - double precision :: I_ijk, I_jki, I_kij, I_jik, I_ikj, I_kji - double precision :: I_tot - - PROVIDE mo_l_coef mo_r_coef - - neu = elec_alpha_num - ned = elec_beta_num - if (neu > 0) D(1:neu) = [(2*i-1, i = 1, neu)] - if (ned > 0) D(neu+1:neu+ned) = [(2*i, i = 1, ned)] - - !print*, "D = " - !do i = 1, elec_num - ! ii = (D(i) - 1) / 2 + 1 - ! si = mod(D(i), 2) - ! print*, i, D(i), ii, si - !enddo - - tcscf_energy_3e_naive = 0.d0 - - do i = 1, elec_num - 2 - ii = (D(i) - 1) / 2 + 1 - si = mod(D(i), 2) - - do j = i + 1, elec_num - 1 - jj = (D(j) - 1) / 2 + 1 - sj = mod(D(j), 2) - - do k = j + 1, elec_num - kk = (D(k) - 1) / 2 + 1 - sk = mod(D(k), 2) - - call give_integrals_3_body_bi_ort(ii, jj, kk, ii, jj, kk, I_ijk) - I_tot = I_ijk - - if(sj==si .and. sk==sj) then - call give_integrals_3_body_bi_ort(ii, jj, kk, jj, kk, ii, I_jki) - I_tot += I_jki - endif - - if(sk==si .and. si==sj) then - call give_integrals_3_body_bi_ort(ii, jj, kk, kk, ii, jj, I_kij) - I_tot += I_kij - endif - - if(sj==si) then - call give_integrals_3_body_bi_ort(ii, jj, kk, jj, ii, kk, I_jik) - I_tot -= I_jik - endif - - if(sk==sj) then - call give_integrals_3_body_bi_ort(ii, jj, kk, ii, kk, jj, I_ikj) - I_tot -= I_ikj - endif - - if(sk==si) then - call give_integrals_3_body_bi_ort(ii, jj, kk, kk, jj, ii, I_kji) - I_tot -= I_kji - endif - - tcscf_energy_3e_naive += I_tot - enddo - enddo - enddo - - tcscf_energy_3e_naive = -tcscf_energy_3e_naive - -END_PROVIDER - -! --- - diff --git a/plugins/local/tc_scf/three_e_energy_bi_ortho.irp.f b/plugins/local/tc_scf/three_e_energy_bi_ortho.irp.f deleted file mode 100644 index 0c9ebbd7..00000000 --- a/plugins/local/tc_scf/three_e_energy_bi_ortho.irp.f +++ /dev/null @@ -1,189 +0,0 @@ - -subroutine contrib_3e_diag_sss(i, j, k, integral) - - BEGIN_DOC - ! returns the pure same spin contribution to diagonal matrix element of 3e term - END_DOC - - implicit none - integer, intent(in) :: i, j, k - double precision, intent(out) :: integral - double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int - - call give_integrals_3_body_bi_ort(i, k, j, i, k, j, direct_int )!!! < i k j | i k j > - call give_integrals_3_body_bi_ort(i, k, j, j, i, k, c_3_int) ! < i k j | j i k > - call give_integrals_3_body_bi_ort(i, k, j, k, j, i, c_minus_3_int)! < i k j | k j i > - integral = direct_int + c_3_int + c_minus_3_int - - ! negative terms :: exchange contrib - call give_integrals_3_body_bi_ort(i, k, j, j, k, i, exch_13_int)!!! < i k j | j k i > : E_13 - call give_integrals_3_body_bi_ort(i, k, j, i, j, k, exch_23_int)!!! < i k j | i j k > : E_23 - call give_integrals_3_body_bi_ort(i, k, j, k, i, j, exch_12_int)!!! < i k j | k i j > : E_12 - - integral += - exch_13_int - exch_23_int - exch_12_int - integral = -integral - -end - -! --- - -subroutine contrib_3e_diag_soo(i,j,k,integral) - implicit none - integer, intent(in) :: i,j,k - BEGIN_DOC - ! returns the pure same spin contribution to diagonal matrix element of 3e term - END_DOC - double precision, intent(out) :: integral - double precision :: direct_int, exch_23_int - call give_integrals_3_body_bi_ort(i, k, j, i, k, j, direct_int) ! < i k j | i k j > - call give_integrals_3_body_bi_ort(i, k, j, i, j, k, exch_23_int)! < i k j | i j k > : E_23 - integral = direct_int - exch_23_int - integral = -integral -end - - -subroutine give_aaa_contrib_bis(integral_aaa) - implicit none - double precision, intent(out) :: integral_aaa - double precision :: integral - integer :: i,j,k - integral_aaa = 0.d0 - do i = 1, elec_alpha_num - do j = i+1, elec_alpha_num - do k = j+1, elec_alpha_num - call contrib_3e_diag_sss(i,j,k,integral) - integral_aaa += integral - enddo - enddo - enddo - -end - -! --- - -subroutine give_aaa_contrib(integral_aaa) - - implicit none - integer :: i, j, k - double precision :: integral - double precision, intent(out) :: integral_aaa - - integral_aaa = 0.d0 - do i = 1, elec_alpha_num - do j = 1, elec_alpha_num - do k = 1, elec_alpha_num - call contrib_3e_diag_sss(i, j, k, integral) - integral_aaa += integral - enddo - enddo - enddo - integral_aaa *= 1.d0/6.d0 - - return -end - -! --- - -subroutine give_aab_contrib(integral_aab) - implicit none - double precision, intent(out) :: integral_aab - double precision :: integral - integer :: i,j,k - integral_aab = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_alpha_num - do k = 1, elec_alpha_num - call contrib_3e_diag_soo(i,j,k,integral) - integral_aab += integral - enddo - enddo - enddo - integral_aab *= 0.5d0 -end - - -subroutine give_aab_contrib_bis(integral_aab) - implicit none - double precision, intent(out) :: integral_aab - double precision :: integral - integer :: i,j,k - integral_aab = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_alpha_num - do k = j+1, elec_alpha_num - call contrib_3e_diag_soo(i,j,k,integral) - integral_aab += integral - enddo - enddo - enddo -end - - -subroutine give_abb_contrib(integral_abb) - implicit none - double precision, intent(out) :: integral_abb - double precision :: integral - integer :: i,j,k - integral_abb = 0.d0 - do i = 1, elec_alpha_num - do j = 1, elec_beta_num - do k = 1, elec_beta_num - call contrib_3e_diag_soo(i,j,k,integral) - integral_abb += integral - enddo - enddo - enddo - integral_abb *= 0.5d0 -end - -subroutine give_abb_contrib_bis(integral_abb) - implicit none - double precision, intent(out) :: integral_abb - double precision :: integral - integer :: i,j,k - integral_abb = 0.d0 - do i = 1, elec_alpha_num - do j = 1, elec_beta_num - do k = j+1, elec_beta_num - call contrib_3e_diag_soo(i,j,k,integral) - integral_abb += integral - enddo - enddo - enddo -end - -subroutine give_bbb_contrib_bis(integral_bbb) - implicit none - double precision, intent(out) :: integral_bbb - double precision :: integral - integer :: i,j,k - integral_bbb = 0.d0 - do i = 1, elec_beta_num - do j = i+1, elec_beta_num - do k = j+1, elec_beta_num - call contrib_3e_diag_sss(i,j,k,integral) - integral_bbb += integral - enddo - enddo - enddo - -end - -subroutine give_bbb_contrib(integral_bbb) - implicit none - double precision, intent(out) :: integral_bbb - double precision :: integral - integer :: i,j,k - integral_bbb = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do k = 1, elec_beta_num - call contrib_3e_diag_sss(i,j,k,integral) - integral_bbb += integral - enddo - enddo - enddo - integral_bbb *= 1.d0/6.d0 -end - - diff --git a/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f b/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f index 7ce57578..ec5167d1 100644 --- a/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f +++ b/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f @@ -4,11 +4,9 @@ program write_ao_2e_tc_integ implicit none - PROVIDE j1e_type - PROVIDE j2e_type - - print *, ' j1e_type = ', j1e_type print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type my_grid_becke = .True. PROVIDE tc_grid1_a tc_grid1_r From d43d960b1a15e7ddb9dcf1e871bf6e9a4e70983b Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 1 May 2024 21:52:00 +0200 Subject: [PATCH 022/131] TC-SCF CLEANED --- plugins/local/bi_ort_ints/no_dressing.irp.f | 7 +- plugins/local/non_hermit_dav/biorthog.irp.f | 2 +- plugins/local/slater_tc/NEED | 1 + .../symmetrized_3_e_int_prov.irp.f | 0 plugins/local/tc_bi_ortho/test_tc_fock.irp.f | 33 - plugins/local/tc_keywords/EZFIO.cfg | 48 +- plugins/local/tc_keywords/tc_keywords.irp.f | 7 - plugins/local/tc_scf/EZFIO.cfg | 30 + plugins/local/tc_scf/fock_hermit.irp.f | 107 --- plugins/local/tc_scf/fock_tc.irp.f | 40 +- plugins/local/tc_scf/fock_tc_mo_tot.irp.f | 19 +- plugins/local/tc_scf/fock_three_hermit.irp.f | 771 ------------------ .../local/tc_scf/integrals_in_r_stuff.irp.f | 391 --------- plugins/local/tc_scf/jast_schmos_90.irp.f | 318 -------- plugins/local/tc_scf/plot_j_schMos.irp.f | 69 -- plugins/local/tc_scf/print_fit_param.irp.f | 59 -- plugins/local/tc_scf/print_tcscf_energy.irp.f | 55 -- plugins/local/tc_scf/rh_tcscf_diis.irp.f | 4 +- plugins/local/tc_scf/rh_tcscf_simple.irp.f | 129 --- .../local/tc_scf/rotate_tcscf_orbitals.irp.f | 369 --------- .../local/tc_scf/tc_petermann_factor.irp.f | 91 --- plugins/local/tc_scf/tc_scf.irp.f | 25 +- plugins/local/tc_scf/tc_scf_dm.irp.f | 24 +- plugins/local/tc_scf/tc_scf_energy.irp.f | 16 +- plugins/local/tc_scf/tcscf_energy_naive.irp.f | 80 -- .../tc_scf/three_e_energy_bi_ortho.irp.f | 189 ----- .../local/tc_scf/write_ao_2e_tc_integ.irp.f | 6 +- 27 files changed, 94 insertions(+), 2796 deletions(-) rename plugins/local/{tc_bi_ortho => slater_tc}/symmetrized_3_e_int_prov.irp.f (100%) delete mode 100644 plugins/local/tc_keywords/tc_keywords.irp.f delete mode 100644 plugins/local/tc_scf/fock_hermit.irp.f delete mode 100644 plugins/local/tc_scf/fock_three_hermit.irp.f delete mode 100644 plugins/local/tc_scf/integrals_in_r_stuff.irp.f delete mode 100644 plugins/local/tc_scf/jast_schmos_90.irp.f delete mode 100644 plugins/local/tc_scf/plot_j_schMos.irp.f delete mode 100644 plugins/local/tc_scf/print_fit_param.irp.f delete mode 100644 plugins/local/tc_scf/print_tcscf_energy.irp.f delete mode 100644 plugins/local/tc_scf/rh_tcscf_simple.irp.f delete mode 100644 plugins/local/tc_scf/rotate_tcscf_orbitals.irp.f delete mode 100644 plugins/local/tc_scf/tc_petermann_factor.irp.f delete mode 100644 plugins/local/tc_scf/tcscf_energy_naive.irp.f delete mode 100644 plugins/local/tc_scf/three_e_energy_bi_ortho.irp.f diff --git a/plugins/local/bi_ort_ints/no_dressing.irp.f b/plugins/local/bi_ort_ints/no_dressing.irp.f index bd225274..721ac0f8 100644 --- a/plugins/local/bi_ort_ints/no_dressing.irp.f +++ b/plugins/local/bi_ort_ints/no_dressing.irp.f @@ -322,6 +322,12 @@ END_PROVIDER BEGIN_PROVIDER [double precision, noL_0e] + BEGIN_DOC + ! + ! < Phi_left | L | Phi_right > + ! + END_DOC + implicit none integer :: i, j, k, ipoint double precision :: t0, t1 @@ -330,7 +336,6 @@ BEGIN_PROVIDER [double precision, noL_0e] double precision, allocatable :: tmp_M(:,:), tmp_S(:), tmp_O(:), tmp_J(:,:) double precision, allocatable :: tmp_M_priv(:,:), tmp_S_priv(:), tmp_O_priv(:), tmp_J_priv(:,:) - call wall_time(t0) print*, " Providing noL_0e ..." diff --git a/plugins/local/non_hermit_dav/biorthog.irp.f b/plugins/local/non_hermit_dav/biorthog.irp.f index b36b0130..4b618228 100644 --- a/plugins/local/non_hermit_dav/biorthog.irp.f +++ b/plugins/local/non_hermit_dav/biorthog.irp.f @@ -43,7 +43,7 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei ! track & sort the real eigenvalues n_good = 0 - thr = Im_thresh_tcscf + thr = Im_thresh_tc do i = 1, n if(dabs(WI(i)) .lt. thr) then n_good += 1 diff --git a/plugins/local/slater_tc/NEED b/plugins/local/slater_tc/NEED index ef0aa3f7..a8669866 100644 --- a/plugins/local/slater_tc/NEED +++ b/plugins/local/slater_tc/NEED @@ -5,3 +5,4 @@ bi_ortho_mos tc_keywords non_hermit_dav dav_general_mat +tc_scf diff --git a/plugins/local/tc_bi_ortho/symmetrized_3_e_int_prov.irp.f b/plugins/local/slater_tc/symmetrized_3_e_int_prov.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/symmetrized_3_e_int_prov.irp.f rename to plugins/local/slater_tc/symmetrized_3_e_int_prov.irp.f diff --git a/plugins/local/tc_bi_ortho/test_tc_fock.irp.f b/plugins/local/tc_bi_ortho/test_tc_fock.irp.f index f1a7cc0a..85f3ed97 100644 --- a/plugins/local/tc_bi_ortho/test_tc_fock.irp.f +++ b/plugins/local/tc_bi_ortho/test_tc_fock.irp.f @@ -24,44 +24,12 @@ program test_tc_fock !call routine_2 ! call routine_3() -! call test_3e call routine_tot end ! --- -subroutine test_3e - implicit none - double precision :: integral_aaa,integral_aab,integral_abb,integral_bbb,accu - double precision :: hmono, htwoe, hthree, htot - call htilde_mu_mat_bi_ortho_slow(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot) - print*,'hmono = ',hmono - print*,'htwoe = ',htwoe - print*,'hthree= ',hthree - print*,'htot = ',htot - print*,'' - print*,'' - print*,'TC_one= ',tc_hf_one_e_energy - print*,'TC_two= ',TC_HF_two_e_energy - print*,'TC_3e = ',diag_three_elem_hf - print*,'TC_tot= ',TC_HF_energy - print*,'' - print*,'' - call give_aaa_contrib(integral_aaa) - print*,'integral_aaa = ',integral_aaa - call give_aab_contrib(integral_aab) - print*,'integral_aab = ',integral_aab - call give_abb_contrib(integral_abb) - print*,'integral_abb = ',integral_abb - call give_bbb_contrib(integral_bbb) - print*,'integral_bbb = ',integral_bbb - accu = integral_aaa + integral_aab + integral_abb + integral_bbb - print*,'accu = ',accu - print*,'delta = ',hthree - accu - -end - subroutine routine_3() use bitmasks ! you need to include the bitmasks_module.f90 features @@ -86,7 +54,6 @@ subroutine routine_3() do i = 1, elec_num_tab(s1) do a = elec_num_tab(s1)+1, mo_num ! virtual - det_i = ref_bitmask call do_single_excitation(det_i, i, a, s1, i_ok) if(i_ok == -1) then diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index e4d9701a..33b9db57 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -100,30 +100,12 @@ doc: If |true|, the states are re-ordered to match the input states default: False interface: ezfio,provider,ocaml -[bi_ortho] -type: logical -doc: If |true|, the MO basis is assumed to be bi-orthonormal -interface: ezfio,provider,ocaml -default: True - [symetric_fock_tc] type: logical doc: If |true|, using F+F^t as Fock TC interface: ezfio,provider,ocaml default: False -[thresh_tcscf] -type: Threshold -doc: Threshold on the convergence of the Hartree Fock energy. -interface: ezfio,provider,ocaml -default: 1.e-8 - -[n_it_tcscf_max] -type: Strictly_positive_int -doc: Maximum number of SCF iterations -interface: ezfio,provider,ocaml -default: 50 - [selection_tc] type: integer doc: if +1: only positive is selected, -1: only negative is selected, :0 both positive and negative @@ -160,30 +142,6 @@ doc: If |true|, maximize the overlap between orthogonalized left- and right eige interface: ezfio,provider,ocaml default: False -[max_dim_diis_tcscf] -type: integer -doc: Maximum size of the DIIS extrapolation procedure -interface: ezfio,provider,ocaml -default: 15 - -[level_shift_tcscf] -type: Positive_float -doc: Energy shift on the virtual MOs to improve TCSCF convergence -interface: ezfio,provider,ocaml -default: 0. - -[tcscf_algorithm] -type: character*(32) -doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS] -interface: ezfio,provider,ocaml -default: DIIS - -[im_thresh_tcscf] -type: Threshold -doc: Thresholds on the Imag part of energy -interface: ezfio,provider,ocaml -default: 1.e-7 - [test_cycle_tc] type: logical doc: If |true|, the integrals of the three-body jastrow are computed with cycles @@ -304,3 +262,9 @@ doc: If |true|, more calc but less mem interface: ezfio,provider,ocaml default: False +[im_thresh_tc] +type: Threshold +doc: Thresholds on the Imag part of TC energy +interface: ezfio,provider,ocaml +default: 1.e-7 + diff --git a/plugins/local/tc_keywords/tc_keywords.irp.f b/plugins/local/tc_keywords/tc_keywords.irp.f deleted file mode 100644 index 3bc68550..00000000 --- a/plugins/local/tc_keywords/tc_keywords.irp.f +++ /dev/null @@ -1,7 +0,0 @@ -program tc_keywords - implicit none - BEGIN_DOC -! TODO : Put the documentation of the program here - END_DOC - print *, 'Hello world' -end diff --git a/plugins/local/tc_scf/EZFIO.cfg b/plugins/local/tc_scf/EZFIO.cfg index 510c777c..e3d24338 100644 --- a/plugins/local/tc_scf/EZFIO.cfg +++ b/plugins/local/tc_scf/EZFIO.cfg @@ -9,3 +9,33 @@ doc: If |true|, tc-scf has converged interface: ezfio,provider,ocaml default: False +[max_dim_diis_tcscf] +type: integer +doc: Maximum size of the DIIS extrapolation procedure +interface: ezfio,provider,ocaml +default: 15 + +[level_shift_tcscf] +type: Positive_float +doc: Energy shift on the virtual MOs to improve TCSCF convergence +interface: ezfio,provider,ocaml +default: 0. + +[thresh_tcscf] +type: Threshold +doc: Threshold on the convergence of the Hartree Fock energy. +interface: ezfio,provider,ocaml +default: 1.e-8 + +[n_it_tcscf_max] +type: Strictly_positive_int +doc: Maximum number of SCF iterations +interface: ezfio,provider,ocaml +default: 50 + +[tc_Brillouin_Right] +type: logical +doc: If |true|, impose only right-Brillouin condition +interface: ezfio,provider,ocaml +default: False + diff --git a/plugins/local/tc_scf/fock_hermit.irp.f b/plugins/local/tc_scf/fock_hermit.irp.f deleted file mode 100644 index 5a51b324..00000000 --- a/plugins/local/tc_scf/fock_hermit.irp.f +++ /dev/null @@ -1,107 +0,0 @@ - -! --- - -BEGIN_PROVIDER [ double precision, good_hermit_tc_fock_mat, (mo_num, mo_num)] - - BEGIN_DOC -! good_hermit_tc_fock_mat = Hermitian Upper triangular Fock matrix -! -! The converged eigenvectors of such matrix yield to orthonormal vectors satisfying the left Brillouin theorem - END_DOC - implicit none - integer :: i, j - - good_hermit_tc_fock_mat = Fock_matrix_tc_mo_tot - do j = 1, mo_num - do i = 1, j-1 - good_hermit_tc_fock_mat(i,j) = Fock_matrix_tc_mo_tot(j,i) - enddo - enddo - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, hermit_average_tc_fock_mat, (mo_num, mo_num)] - - BEGIN_DOC -! hermit_average_tc_fock_mat = (F + F^\dagger)/2 - END_DOC - implicit none - integer :: i, j - - hermit_average_tc_fock_mat = Fock_matrix_tc_mo_tot - do j = 1, mo_num - do i = 1, mo_num - hermit_average_tc_fock_mat(i,j) = 0.5d0 * (Fock_matrix_tc_mo_tot(j,i) + Fock_matrix_tc_mo_tot(i,j)) - enddo - enddo - -END_PROVIDER - - -! --- -BEGIN_PROVIDER [ double precision, grad_hermit] - implicit none - BEGIN_DOC - ! square of gradient of the energy - END_DOC - if(symetric_fock_tc)then - grad_hermit = grad_hermit_average_tc_fock_mat - else - grad_hermit = grad_good_hermit_tc_fock_mat - endif - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, grad_good_hermit_tc_fock_mat] - implicit none - BEGIN_DOC - ! grad_good_hermit_tc_fock_mat = norm of gradients of the upper triangular TC fock - END_DOC - integer :: i, j - grad_good_hermit_tc_fock_mat = 0.d0 - do i = 1, elec_alpha_num - do j = elec_alpha_num+1, mo_num - grad_good_hermit_tc_fock_mat += dabs(good_hermit_tc_fock_mat(i,j)) - enddo - enddo -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, grad_hermit_average_tc_fock_mat] - implicit none - BEGIN_DOC - ! grad_hermit_average_tc_fock_mat = norm of gradients of the upper triangular TC fock - END_DOC - integer :: i, j - grad_hermit_average_tc_fock_mat = 0.d0 - do i = 1, elec_alpha_num - do j = elec_alpha_num+1, mo_num - grad_hermit_average_tc_fock_mat += dabs(hermit_average_tc_fock_mat(i,j)) - enddo - enddo -END_PROVIDER - - -! --- - -subroutine save_good_hermit_tc_eigvectors() - - implicit none - integer :: sign - character*(64) :: label - logical :: output - - sign = 1 - label = "Canonical" - output = .False. - - if(symetric_fock_tc)then - call mo_as_eigvectors_of_mo_matrix(hermit_average_tc_fock_mat, mo_num, mo_num, label, sign, output) - else - call mo_as_eigvectors_of_mo_matrix(good_hermit_tc_fock_mat, mo_num, mo_num, label, sign, output) - endif -end subroutine save_good_hermit_tc_eigvectors - -! --- - diff --git a/plugins/local/tc_scf/fock_tc.irp.f b/plugins/local/tc_scf/fock_tc.irp.f index 508f3cd7..16bb5c87 100644 --- a/plugins/local/tc_scf/fock_tc.irp.f +++ b/plugins/local/tc_scf/fock_tc.irp.f @@ -110,23 +110,14 @@ BEGIN_PROVIDER [double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num)] double precision :: t0, t1, tt0, tt1 double precision, allocatable :: tmp(:,:) - if(bi_ortho) then + PROVIDE mo_l_coef mo_r_coef - PROVIDE mo_l_coef mo_r_coef - - call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & - , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) - - if(three_body_h_tc) then - PROVIDE fock_3e_mo_a - Fock_matrix_tc_mo_alpha += fock_3e_mo_a - endif - - else - - call ao_to_mo( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & - , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) + call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & + , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) + if(three_body_h_tc) then + PROVIDE fock_3e_mo_a + Fock_matrix_tc_mo_alpha += fock_3e_mo_a endif END_PROVIDER @@ -142,21 +133,12 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ] implicit none double precision, allocatable :: tmp(:,:) - if(bi_ortho) then - - call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & - , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) - - if(three_body_h_tc) then - PROVIDE fock_3e_mo_b - Fock_matrix_tc_mo_beta += fock_3e_mo_b - endif - - else - - call ao_to_mo( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & - , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) + call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & + , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) + if(three_body_h_tc) then + PROVIDE fock_3e_mo_b + Fock_matrix_tc_mo_beta += fock_3e_mo_b endif END_PROVIDER diff --git a/plugins/local/tc_scf/fock_tc_mo_tot.irp.f b/plugins/local/tc_scf/fock_tc_mo_tot.irp.f index 2df2421e..fd490af6 100644 --- a/plugins/local/tc_scf/fock_tc_mo_tot.irp.f +++ b/plugins/local/tc_scf/fock_tc_mo_tot.irp.f @@ -132,7 +132,7 @@ enddo endif - if(no_oa_or_av_opt)then + if(no_oa_or_av_opt) then do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_inact_orb @@ -153,8 +153,21 @@ enddo endif - if(.not.bi_ortho .and. three_body_h_tc)then - Fock_matrix_tc_mo_tot += fock_3_mat + if(tc_Brillouin_Right) then + + double precision, allocatable :: tmp(:,:) + allocate(tmp(mo_num,mo_num)) + + tmp = Fock_matrix_tc_mo_tot + do j = 1, mo_num + do i = 1, j-1 + tmp(i,j) = Fock_matrix_tc_mo_tot(j,i) + enddo + enddo + + Fock_matrix_tc_mo_tot = tmp + deallocate(tmp) + endif END_PROVIDER diff --git a/plugins/local/tc_scf/fock_three_hermit.irp.f b/plugins/local/tc_scf/fock_three_hermit.irp.f deleted file mode 100644 index 00d47fae..00000000 --- a/plugins/local/tc_scf/fock_three_hermit.irp.f +++ /dev/null @@ -1,771 +0,0 @@ - -! --- - -BEGIN_PROVIDER [ double precision, fock_3_mat, (mo_num, mo_num)] - - implicit none - integer :: i,j - double precision :: contrib - - fock_3_mat = 0.d0 - if(.not.bi_ortho .and. three_body_h_tc) then - - call give_fock_ia_three_e_total(1, 1, contrib) - !! !$OMP PARALLEL & - !! !$OMP DEFAULT (NONE) & - !! !$OMP PRIVATE (i,j,m,integral) & - !! !$OMP SHARED (mo_num,three_body_3_index) - !! !$OMP DO SCHEDULE (guided) COLLAPSE(3) - do i = 1, mo_num - do j = 1, mo_num - call give_fock_ia_three_e_total(j,i,contrib) - fock_3_mat(j,i) = -contrib - enddo - enddo - !else if(bi_ortho.and.three_body_h_tc) then - !! !$OMP END DO - !! !$OMP END PARALLEL - !! do i = 1, mo_num - !! do j = 1, i-1 - !! mat_three(j,i) = mat_three(i,j) - !! enddo - !! enddo - endif - -END_PROVIDER - - -subroutine give_fock_ia_three_e_total(i,a,contrib) - implicit none - BEGIN_DOC -! contrib is the TOTAL (same spins / opposite spins) contribution from the three body term to the Fock operator -! - END_DOC - integer, intent(in) :: i,a - double precision, intent(out) :: contrib - double precision :: int_1, int_2, int_3 - double precision :: mos_i, mos_a, w_ia - double precision :: mos_ia, weight - - integer :: mm, ipoint,k,l - - int_1 = 0.d0 - int_2 = 0.d0 - int_3 = 0.d0 - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - mos_i = mos_in_r_array_transp(ipoint,i) - mos_a = mos_in_r_array_transp(ipoint,a) - mos_ia = mos_a * mos_i - w_ia = x_W_ij_erf_rk(ipoint,mm,i,a) - - int_1 += weight * fock_3_w_kk_sum(ipoint,mm) * (4.d0 * fock_3_rho_beta(ipoint) * w_ia & - + 2.0d0 * mos_ia * fock_3_w_kk_sum(ipoint,mm) & - - 2.0d0 * fock_3_w_ki_mos_k(ipoint,mm,i) * mos_a & - - 2.0d0 * fock_3_w_ki_mos_k(ipoint,mm,a) * mos_i ) - int_2 += weight * (-1.d0) * ( 2.0d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * w_ia & - + 2.0d0 * fock_3_rho_beta(ipoint) * fock_3_w_ki_wk_a(ipoint,mm,i,a) & - + 1.0d0 * mos_ia * fock_3_trace_w_tilde(ipoint,mm) ) - - int_3 += weight * 1.d0 * (fock_3_w_kl_wla_phi_k(ipoint,mm,i) * mos_a + fock_3_w_kl_wla_phi_k(ipoint,mm,a) * mos_i & - +fock_3_w_ki_mos_k(ipoint,mm,i) * fock_3_w_ki_mos_k(ipoint,mm,a) ) - enddo - enddo - contrib = int_1 + int_2 + int_3 - -end - -! --- - -BEGIN_PROVIDER [double precision, diag_three_elem_hf] - - implicit none - integer :: i, j, k, ipoint, mm - double precision :: contrib, weight, four_third, one_third, two_third, exchange_int_231 - double precision :: integral_aaa, hthree, integral_aab, integral_abb, integral_bbb - double precision, allocatable :: tmp(:) - double precision, allocatable :: tmp_L(:,:), tmp_R(:,:) - double precision, allocatable :: tmp_M(:,:), tmp_S(:), tmp_O(:), tmp_J(:,:) - double precision, allocatable :: tmp_M_priv(:,:), tmp_S_priv(:), tmp_O_priv(:), tmp_J_priv(:,:) - - PROVIDE mo_l_coef mo_r_coef - - !print *, ' providing diag_three_elem_hf' - - if(.not. three_body_h_tc) then - - if(noL_standard) then - PROVIDE noL_0e - diag_three_elem_hf = noL_0e - else - diag_three_elem_hf = 0.d0 - endif - - else - - if(.not. bi_ortho) then - - ! --- - - one_third = 1.d0/3.d0 - two_third = 2.d0/3.d0 - four_third = 4.d0/3.d0 - diag_three_elem_hf = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do k = 1, elec_beta_num - call give_integrals_3_body(k, j, i, j, i, k, exchange_int_231) - diag_three_elem_hf += two_third * exchange_int_231 - enddo - enddo - enddo - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - contrib = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm) & - - 2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm) & - - 1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm) - contrib *= four_third - contrib += -two_third * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm) & - -four_third * fock_3_w_kk_sum(ipoint,mm) * fock_3_w_kl_mo_k_mo_l(ipoint,mm) - diag_three_elem_hf += weight * contrib - enddo - enddo - - diag_three_elem_hf = - diag_three_elem_hf - - ! --- - - else - - ! ------------ - ! SLOW VERSION - ! ------------ - - !call give_aaa_contrib(integral_aaa) - !call give_aab_contrib(integral_aab) - !call give_abb_contrib(integral_abb) - !call give_bbb_contrib(integral_bbb) - !diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb - - ! ------------ - ! ------------ - - PROVIDE int2_grad1_u12_bimo_t - PROVIDE mos_l_in_r_array_transp - PROVIDE mos_r_in_r_array_transp - - if(elec_alpha_num .eq. elec_beta_num) then - - allocate(tmp(elec_beta_num)) - allocate(tmp_L(n_points_final_grid,3), tmp_R(n_points_final_grid,3)) - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & - !$OMP SHARED(elec_beta_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) - - !$OMP DO - do j = 1, elec_beta_num - - tmp_L = 0.d0 - tmp_R = 0.d0 - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - tmp(j) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - enddo ! j - !$OMP END DO - !$OMP END PARALLEL - - diag_three_elem_hf = -2.d0 * sum(tmp) - - deallocate(tmp) - deallocate(tmp_L, tmp_R) - - ! --- - - allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) - tmp_O = 0.d0 - tmp_J = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & - !$OMP SHARED(elec_beta_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) - - allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) - tmp_O_priv = 0.d0 - tmp_J_priv = 0.d0 - - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_O = tmp_O + tmp_O_priv - tmp_J = tmp_J + tmp_J_priv - !$OMP END CRITICAL - - deallocate(tmp_O_priv, tmp_J_priv) - !$OMP END PARALLEL - - allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) - tmp_M = 0.d0 - tmp_S = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & - !$OMP SHARED(elec_beta_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) - - allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) - tmp_M_priv = 0.d0 - tmp_S_priv = 0.d0 - - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_M = tmp_M + tmp_M_priv - tmp_S = tmp_S + tmp_S_priv - !$OMP END CRITICAL - - deallocate(tmp_M_priv, tmp_S_priv) - !$OMP END PARALLEL - - allocate(tmp(n_points_final_grid)) - - do ipoint = 1, n_points_final_grid - - tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint) - - tmp(ipoint) = final_weight_at_r_vector(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) & - - 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) & - + tmp_J(ipoint,2) * tmp_M(ipoint,2) & - + tmp_J(ipoint,3) * tmp_M(ipoint,3))) - enddo - - diag_three_elem_hf = diag_three_elem_hf -2.d0 * (sum(tmp)) - - deallocate(tmp) - - else - - allocate(tmp(elec_alpha_num)) - allocate(tmp_L(n_points_final_grid,3), tmp_R(n_points_final_grid,3)) - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) - - !$OMP DO - do j = 1, elec_beta_num - - tmp_L = 0.d0 - tmp_R = 0.d0 - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - tmp_L(ipoint,1) = tmp_L(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - tmp(j) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - enddo ! j - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) - - !$OMP DO - do j = elec_beta_num+1, elec_alpha_num - - tmp_L = 0.d0 - tmp_R = 0.d0 - do i = 1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - tmp(j) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + 0.5d0 * final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - enddo ! j - !$OMP END DO - !$OMP END PARALLEL - - diag_three_elem_hf = -2.d0 * sum(tmp) - - deallocate(tmp) - deallocate(tmp_L, tmp_R) - - ! --- - - allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) - tmp_O = 0.d0 - tmp_J = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) - - allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) - tmp_O_priv = 0.d0 - tmp_J_priv = 0.d0 - - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP DO - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + 0.5d0 * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_O = tmp_O + tmp_O_priv - tmp_J = tmp_J + tmp_J_priv - !$OMP END CRITICAL - - deallocate(tmp_O_priv, tmp_J_priv) - !$OMP END PARALLEL - - ! --- - - allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) - tmp_M = 0.d0 - tmp_S = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) - - allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) - tmp_M_priv = 0.d0 - tmp_S_priv = 0.d0 - - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP DO COLLAPSE(2) - do i = elec_beta_num+1, elec_alpha_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP DO COLLAPSE(2) - do i = elec_beta_num+1, elec_alpha_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_M = tmp_M + tmp_M_priv - tmp_S = tmp_S + tmp_S_priv - !$OMP END CRITICAL - - deallocate(tmp_M_priv, tmp_S_priv) - !$OMP END PARALLEL - - allocate(tmp(n_points_final_grid)) - - do ipoint = 1, n_points_final_grid - - tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint) - - tmp(ipoint) = final_weight_at_r_vector(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) & - - 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) & - + tmp_J(ipoint,2) * tmp_M(ipoint,2) & - + tmp_J(ipoint,3) * tmp_M(ipoint,3))) - enddo - - diag_three_elem_hf = diag_three_elem_hf - 2.d0 * (sum(tmp)) - - deallocate(tmp) - - endif - - - endif - - endif - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, fock_3_mat_a_op_sh, (mo_num, mo_num)] - implicit none - integer :: h,p,i,j - double precision :: direct_int, exch_int, exchange_int_231, exchange_int_312 - double precision :: exchange_int_23, exchange_int_12, exchange_int_13 - - fock_3_mat_a_op_sh = 0.d0 - do h = 1, mo_num - do p = 1, mo_num - !F_a^{ab}(h,p) - do i = 1, elec_beta_num ! beta - do j = elec_beta_num+1, elec_alpha_num ! alpha - call give_integrals_3_body(h,j,i,p,j,i,direct_int) ! - call give_integrals_3_body(h,j,i,j,p,i,exch_int) - fock_3_mat_a_op_sh(h,p) -= direct_int - exch_int - enddo - enddo - !F_a^{aa}(h,p) - do i = 1, elec_beta_num ! alpha - do j = elec_beta_num+1, elec_alpha_num ! alpha - call give_integrals_3_body(h,j,i,p,j,i,direct_int) - call give_integrals_3_body(h,j,i,i,p,j,exchange_int_231) - call give_integrals_3_body(h,j,i,j,i,p,exchange_int_312) - call give_integrals_3_body(h,j,i,p,i,j,exchange_int_23) - call give_integrals_3_body(h,j,i,i,j,p,exchange_int_12) - call give_integrals_3_body(h,j,i,j,p,i,exchange_int_13) - fock_3_mat_a_op_sh(h,p) -= ( direct_int + exchange_int_231 + exchange_int_312 & - - exchange_int_23 & ! i <-> j - - exchange_int_12 & ! p <-> j - - exchange_int_13 )! p <-> i - enddo - enddo - enddo - enddo -! symmetrized -! do p = 1, elec_beta_num -! do h = elec_alpha_num +1, mo_num -! fock_3_mat_a_op_sh(h,p) = fock_3_mat_a_op_sh(p,h) -! enddo -! enddo - -! do h = elec_beta_num+1, elec_alpha_num -! do p = elec_alpha_num +1, mo_num -! !F_a^{bb}(h,p) -! do i = 1, elec_beta_num -! do j = i+1, elec_beta_num -! call give_integrals_3_body(h,j,i,p,j,i,direct_int) -! call give_integrals_3_body(h,j,i,p,i,j,exch_int) -! fock_3_mat_a_op_sh(h,p) -= direct_int - exch_int -! enddo -! enddo -! enddo -! enddo - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_mat_b_op_sh, (mo_num, mo_num)] - implicit none - integer :: h,p,i,j - double precision :: direct_int, exch_int - fock_3_mat_b_op_sh = 0.d0 - do h = 1, elec_beta_num - do p = elec_alpha_num +1, mo_num - !F_b^{aa}(h,p) - do i = 1, elec_beta_num - do j = elec_beta_num+1, elec_alpha_num - call give_integrals_3_body(h,j,i,p,j,i,direct_int) - call give_integrals_3_body(h,j,i,p,i,j,exch_int) - fock_3_mat_b_op_sh(h,p) += direct_int - exch_int - enddo - enddo - - !F_b^{ab}(h,p) - do i = elec_beta_num+1, elec_beta_num - do j = 1, elec_beta_num - call give_integrals_3_body(h,j,i,p,j,i,direct_int) - call give_integrals_3_body(h,j,i,j,p,i,exch_int) - fock_3_mat_b_op_sh(h,p) += direct_int - exch_int - enddo - enddo - - enddo - enddo - -END_PROVIDER - - -BEGIN_PROVIDER [ double precision, fock_3_w_kk_sum, (n_points_final_grid,3)] - implicit none - integer :: mm, ipoint,k - double precision :: w_kk - fock_3_w_kk_sum = 0.d0 - do k = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - w_kk = x_W_ij_erf_rk(ipoint,mm,k,k) - fock_3_w_kk_sum(ipoint,mm) += w_kk - enddo - enddo - enddo -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_w_ki_mos_k, (n_points_final_grid,3,mo_num)] - implicit none - integer :: mm, ipoint,k,i - double precision :: w_ki, mo_k - fock_3_w_ki_mos_k = 0.d0 - do i = 1, mo_num - do k = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - w_ki = x_W_ij_erf_rk(ipoint,mm,k,i) - mo_k = mos_in_r_array(k,ipoint) - fock_3_w_ki_mos_k(ipoint,mm,i) += w_ki * mo_k - enddo - enddo - enddo - enddo - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_w_kl_w_kl, (n_points_final_grid,3)] - implicit none - integer :: k,j,ipoint,mm - double precision :: w_kj - fock_3_w_kl_w_kl = 0.d0 - do j = 1, elec_beta_num - do k = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - w_kj = x_W_ij_erf_rk(ipoint,mm,k,j) - fock_3_w_kl_w_kl(ipoint,mm) += w_kj * w_kj - enddo - enddo - enddo - enddo - - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_rho_beta, (n_points_final_grid)] - implicit none - integer :: ipoint,k - fock_3_rho_beta = 0.d0 - do ipoint = 1, n_points_final_grid - do k = 1, elec_beta_num - fock_3_rho_beta(ipoint) += mos_in_r_array(k,ipoint) * mos_in_r_array(k,ipoint) - enddo - enddo -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_w_kl_mo_k_mo_l, (n_points_final_grid,3)] - implicit none - integer :: ipoint,k,l,mm - double precision :: mos_k, mos_l, w_kl - fock_3_w_kl_mo_k_mo_l = 0.d0 - do k = 1, elec_beta_num - do l = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - mos_k = mos_in_r_array_transp(ipoint,k) - mos_l = mos_in_r_array_transp(ipoint,l) - w_kl = x_W_ij_erf_rk(ipoint,mm,l,k) - fock_3_w_kl_mo_k_mo_l(ipoint,mm) += w_kl * mos_k * mos_l - enddo - enddo - enddo - enddo - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_w_ki_wk_a, (n_points_final_grid,3,mo_num, mo_num)] - implicit none - integer :: ipoint,i,a,k,mm - double precision :: w_ki,w_ka - fock_3_w_ki_wk_a = 0.d0 - do i = 1, mo_num - do a = 1, mo_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - do k = 1, elec_beta_num - w_ki = x_W_ij_erf_rk(ipoint,mm,k,i) - w_ka = x_W_ij_erf_rk(ipoint,mm,k,a) - fock_3_w_ki_wk_a(ipoint,mm,a,i) += w_ki * w_ka - enddo - enddo - enddo - enddo - enddo -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_trace_w_tilde, (n_points_final_grid,3)] - implicit none - integer :: ipoint,k,mm - fock_3_trace_w_tilde = 0.d0 - do k = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - fock_3_trace_w_tilde(ipoint,mm) += fock_3_w_ki_wk_a(ipoint,mm,k,k) - enddo - enddo - enddo - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_w_kl_wla_phi_k, (n_points_final_grid,3,mo_num)] - implicit none - integer :: ipoint,a,k,mm,l - double precision :: w_kl,w_la, mo_k - fock_3_w_kl_wla_phi_k = 0.d0 - do a = 1, mo_num - do k = 1, elec_beta_num - do l = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - w_kl = x_W_ij_erf_rk(ipoint,mm,l,k) - w_la = x_W_ij_erf_rk(ipoint,mm,l,a) - mo_k = mos_in_r_array_transp(ipoint,k) - fock_3_w_kl_wla_phi_k(ipoint,mm,a) += w_kl * w_la * mo_k - enddo - enddo - enddo - enddo - enddo -END_PROVIDER - - - - - diff --git a/plugins/local/tc_scf/integrals_in_r_stuff.irp.f b/plugins/local/tc_scf/integrals_in_r_stuff.irp.f deleted file mode 100644 index 3ce85a97..00000000 --- a/plugins/local/tc_scf/integrals_in_r_stuff.irp.f +++ /dev/null @@ -1,391 +0,0 @@ - -! --- - -BEGIN_PROVIDER [ double precision, tc_scf_dm_in_r, (n_points_final_grid) ] - - implicit none - integer :: i, j - - tc_scf_dm_in_r = 0.d0 - do i = 1, n_points_final_grid - do j = 1, elec_beta_num - tc_scf_dm_in_r(i) += mos_r_in_r_array(j,i) * mos_l_in_r_array(j,i) - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, w_sum_in_r, (n_points_final_grid, 3)] - - implicit none - integer :: ipoint, j, xi - - w_sum_in_r = 0.d0 - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - !w_sum_in_r(ipoint,xi) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,j) - w_sum_in_r(ipoint,xi) += x_W_ki_bi_ortho_erf_rk_diag(ipoint,xi,j) - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, ww_sum_in_r, (n_points_final_grid, 3)] - - implicit none - integer :: ipoint, j, xi - double precision :: tmp - - ww_sum_in_r = 0.d0 - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - tmp = x_W_ki_bi_ortho_erf_rk_diag(ipoint,xi,j) - ww_sum_in_r(ipoint,xi) += tmp * tmp - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_r_in_r, (n_points_final_grid, 3, mo_num)] - - implicit none - integer :: i, j, xi, ipoint - - ! TODO: call lapack - - W1_r_in_r = 0.d0 - do i = 1, mo_num - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_r_in_r(ipoint,xi,i) += mos_r_in_r_array_transp(ipoint,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,i) - enddo - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_l_in_r, (n_points_final_grid, 3, mo_num)] - - implicit none - integer :: i, j, xi, ipoint - - ! TODO: call lapack - - W1_l_in_r = 0.d0 - do i = 1, mo_num - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_l_in_r(ipoint,xi,i) += mos_l_in_r_array_transp(ipoint,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,i,j) - enddo - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_in_r, (n_points_final_grid, 3)] - - implicit none - integer :: j, xi, ipoint - - ! TODO: call lapack - - W1_in_r = 0.d0 - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_in_r(ipoint,xi) += W1_l_in_r(ipoint,xi,j) * mos_r_in_r_array_transp(ipoint,j) - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_diag_in_r, (n_points_final_grid, 3)] - - implicit none - integer :: j, xi, ipoint - - ! TODO: call lapack - - W1_diag_in_r = 0.d0 - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_diag_in_r(ipoint,xi) += mos_r_in_r_array_transp(ipoint,j) * mos_l_in_r_array_transp(ipoint,j) * x_W_ki_bi_ortho_erf_rk_diag(ipoint,xi,j) - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, v_sum_in_r, (n_points_final_grid, 3)] - - implicit none - integer :: i, j, xi, ipoint - - ! TODO: call lapack - v_sum_in_r = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - v_sum_in_r(ipoint,xi) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,i,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,i) - enddo - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_W1_r_in_r, (n_points_final_grid, 3, mo_num)] - - implicit none - integer :: i, m, xi, ipoint - - ! TODO: call lapack - - W1_W1_r_in_r = 0.d0 - do i = 1, mo_num - do m = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_W1_r_in_r(ipoint,xi,i) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,m,i) * W1_r_in_r(ipoint,xi,m) - enddo - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_W1_l_in_r, (n_points_final_grid, 3, mo_num)] - - implicit none - integer :: i, j, xi, ipoint - - ! TODO: call lapack - - W1_W1_l_in_r = 0.d0 - do i = 1, mo_num - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_W1_l_in_r(ipoint,xi,i) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,i,j) * W1_l_in_r(ipoint,xi,j) - enddo - enddo - enddo - enddo - -END_PROVIDER - -! --- - -subroutine direct_term_imj_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j,m = 1, elec_beta_num) < a m j | i m j > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi - double precision :: weight, tmp - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - !integral += ( mos_l_in_r_array(a,ipoint) * mos_r_in_r_array(i,ipoint) * w_sum_in_r(ipoint,xi) * w_sum_in_r(ipoint,xi) & - ! + 2.d0 * tc_scf_dm_in_r(ipoint) * w_sum_in_r(ipoint,xi) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) ) * weight - - tmp = w_sum_in_r(ipoint,xi) - - integral += ( mos_l_in_r_array_transp(ipoint,a) * mos_r_in_r_array_transp(ipoint,i) * tmp * tmp & - + 2.d0 * tc_scf_dm_in_r(ipoint) * tmp * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) & - ) * weight - enddo - enddo - -end - -! --- - -subroutine exch_term_jmi_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j,m = 1, elec_beta_num) < a m j | j m i > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi, j - double precision :: weight, tmp - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - - tmp = 0.d0 - do j = 1, elec_beta_num - tmp = tmp + x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,i) - enddo - - integral += ( mos_l_in_r_array_transp(ipoint,a) * W1_r_in_r(ipoint,xi,i) * w_sum_in_r(ipoint,xi) & - + tc_scf_dm_in_r(ipoint) * tmp & - + mos_r_in_r_array_transp(ipoint,i) * W1_l_in_r(ipoint,xi,a) * w_sum_in_r(ipoint,xi) & - ) * weight - - enddo - enddo - -end - -! --- - -subroutine exch_term_ijm_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j,m = 1, elec_beta_num) < a m j | i j m > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi - double precision :: weight - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - - integral += ( mos_l_in_r_array_transp(ipoint,a) * mos_r_in_r_array_transp(ipoint,i) * v_sum_in_r(ipoint,xi) & - + 2.d0 * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) * W1_in_r(ipoint,xi) & - ) * weight - - enddo - enddo - -end - -! --- - -subroutine direct_term_ijj_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j = 1, elec_beta_num) < a j j | i j j > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi - double precision :: weight - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - - integral += ( mos_l_in_r_array_transp(ipoint,a) * mos_r_in_r_array_transp(ipoint,i) * ww_sum_in_r(ipoint,xi) & - + 2.d0 * W1_diag_in_r(ipoint, xi) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) & - ) * weight - enddo - enddo - -end - -! --- - -subroutine cyclic_term_jim_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j,m = 1, elec_beta_num) < a m j | j i m > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi - double precision :: weight - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - - integral += ( mos_l_in_r_array_transp(ipoint,a) * W1_W1_r_in_r(ipoint,xi,i) & - + W1_W1_l_in_r(ipoint,xi,a) * mos_r_in_r_array_transp(ipoint,i) & - + W1_l_in_r(ipoint,xi,a) * W1_r_in_r(ipoint,xi,i) & - ) * weight - - enddo - enddo - -end - -! --- - -subroutine cyclic_term_mji_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j,m = 1, elec_beta_num) < a m j | m j i > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi - double precision :: weight - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - - integral += ( mos_l_in_r_array_transp(ipoint,a) * W1_W1_r_in_r(ipoint,xi,i) & - + W1_l_in_r(ipoint,xi,a) * W1_r_in_r(ipoint,xi,i) & - + W1_W1_l_in_r(ipoint,xi,a) * mos_r_in_r_array_transp(ipoint,i) & - ) * weight - - enddo - enddo - -end - -! --- - diff --git a/plugins/local/tc_scf/jast_schmos_90.irp.f b/plugins/local/tc_scf/jast_schmos_90.irp.f deleted file mode 100644 index 5c5e625f..00000000 --- a/plugins/local/tc_scf/jast_schmos_90.irp.f +++ /dev/null @@ -1,318 +0,0 @@ - BEGIN_PROVIDER [integer , m_max_sm_7] -&BEGIN_PROVIDER [integer , n_max_sm_7] -&BEGIN_PROVIDER [integer , o_max_sm_7] - implicit none - BEGIN_DOC -! maximum value of the "m", "n" and "o" integer in the Jastrow function as in Eq. (4) -! of Schmidt,Moskowitz, JCP, 93, 4172 (1990) for the SM_7 version of Table IV - END_DOC - m_max_sm_7 = 4 - n_max_sm_7 = 0 - o_max_sm_7 = 4 -END_PROVIDER - - BEGIN_PROVIDER [integer , m_max_sm_9] -&BEGIN_PROVIDER [integer , n_max_sm_9] -&BEGIN_PROVIDER [integer , o_max_sm_9] - implicit none - BEGIN_DOC -! maximum value of the "m", "n" and "o" integer in the Jastrow function as in Eq. (4) -! of Schmidt,Moskowitz, JCP, 93, 4172 (1990) for the SM_9 version of Table IV - END_DOC - m_max_sm_9 = 4 - n_max_sm_9 = 2 - o_max_sm_9 = 4 -END_PROVIDER - - - BEGIN_PROVIDER [integer , m_max_sm_17] -&BEGIN_PROVIDER [integer , n_max_sm_17] -&BEGIN_PROVIDER [integer , o_max_sm_17] - implicit none - BEGIN_DOC -! maximum value of the "m", "n" and "o" integer in the Jastrow function as in Eq. (4) -! of Schmidt,Moskowitz, JCP, 93, 4172 (1990) for the SM_17 version of Table IV - END_DOC - m_max_sm_17 = 6 - n_max_sm_17 = 2 - o_max_sm_17 = 6 -END_PROVIDER - - -BEGIN_PROVIDER [ double precision, c_mn_o_sm_7, (0:m_max_sm_7,0:n_max_sm_7,0:o_max_sm_7,2:10)] - implicit none - BEGIN_DOC - ! - !c_mn_o_7(0:4,0:4,2:10) = coefficient for the SM_7 correlation factor as given is Table IV of - ! Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! the first index (0:4) is the "m" integer for the 1e part - ! the second index(0:0) is the "n" integer for the 1e part WHICH IS ALWAYS SET TO 0 FOR SM_7 - ! the third index (0:4) is the "o" integer for the 2e part - ! the fourth index (2:10) is the nuclear charge of the atom - END_DOC - c_mn_o_sm_7 = 0.d0 - integer :: i - do i = 2, 10 ! loop over nuclear charge - c_mn_o_sm_7(0,0,1,i) = 0.5d0 ! all the linear terms are set to 1/2 to satisfy the anti-parallel spin condition - enddo - ! He atom - ! two electron terms - c_mn_o_sm_7(0,0,2,2) = 0.50516d0 - c_mn_o_sm_7(0,0,3,2) = -0.19313d0 - c_mn_o_sm_7(0,0,4,2) = 0.30276d0 - ! one-electron terms - c_mn_o_sm_7(2,0,0,2) = -0.16995d0 - c_mn_o_sm_7(3,0,0,2) = -0.34505d0 - c_mn_o_sm_7(4,0,0,2) = -0.54777d0 - ! Ne atom - ! two electron terms - c_mn_o_sm_7(0,0,2,10) = -0.792d0 - c_mn_o_sm_7(0,0,3,10) = 1.05232d0 - c_mn_o_sm_7(0,0,4,10) = -0.65615d0 - ! one-electron terms - c_mn_o_sm_7(2,0,0,10) = -0.13312d0 - c_mn_o_sm_7(3,0,0,10) = -0.00131d0 - c_mn_o_sm_7(4,0,0,10) = 0.09083d0 - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, c_mn_o_sm_9, (0:m_max_sm_9,0:n_max_sm_9,0:o_max_sm_9,2:10)] - implicit none - BEGIN_DOC - ! - !c_mn_o_9(0:4,0:4,2:10) = coefficient for the SM_9 correlation factor as given is Table IV of - ! Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! the first index (0:4) is the "m" integer for the 1e part - ! the second index(0:0) is the "n" integer for the 1e part WHICH IS ALWAYS SET TO 0 FOR SM_9 - ! the third index (0:4) is the "o" integer for the 2e part - ! the fourth index (2:10) is the nuclear charge of the atom - END_DOC - c_mn_o_sm_9 = 0.d0 - integer :: i - do i = 2, 10 ! loop over nuclear charge - c_mn_o_sm_9(0,0,1,i) = 0.5d0 ! all the linear terms are set to 1/2 to satisfy the anti-parallel spin condition - enddo - ! He atom - ! two electron terms - c_mn_o_sm_9(0,0,2,2) = 0.50516d0 - c_mn_o_sm_9(0,0,3,2) = -0.19313d0 - c_mn_o_sm_9(0,0,4,2) = 0.30276d0 - ! one-electron terms - c_mn_o_sm_9(2,0,0,2) = -0.16995d0 - c_mn_o_sm_9(3,0,0,2) = -0.34505d0 - c_mn_o_sm_9(4,0,0,2) = -0.54777d0 - ! Ne atom - ! two electron terms - c_mn_o_sm_9(0,0,2,10) = -0.792d0 - c_mn_o_sm_9(0,0,3,10) = 1.05232d0 - c_mn_o_sm_9(0,0,4,10) = -0.65615d0 - ! one-electron terms - c_mn_o_sm_9(2,0,0,10) = -0.13312d0 - c_mn_o_sm_9(3,0,0,10) = -0.00131d0 - c_mn_o_sm_9(4,0,0,10) = 0.09083d0 - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, c_mn_o_sm_17, (0:m_max_sm_17,0:n_max_sm_17,0:o_max_sm_17,2:10)] - implicit none - BEGIN_DOC - ! - !c_mn_o_17(0:4,0:4,2:10) = coefficient for the SM_17 correlation factor as given is Table IV of - ! Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! the first index (0:4) is the "m" integer for the 1e part - ! the second index(0:0) is the "n" integer for the 1e part WHICH IS ALWAYS SET TO 0 FOR SM_17 - ! the third index (0:4) is the "o" integer for the 2e part - ! the fourth index (2:10) is the nuclear charge of the atom - END_DOC - c_mn_o_sm_17 = 0.d0 - integer :: i - do i = 2, 10 ! loop over nuclear charge - c_mn_o_sm_17(0,0,1,i) = 0.5d0 ! all the linear terms are set to 1/2 to satisfy the anti-parallel spin condition - enddo - ! He atom - ! two electron terms - c_mn_o_sm_17(0,0,2,2) = 0.09239d0 - c_mn_o_sm_17(0,0,3,2) = -0.38664d0 - c_mn_o_sm_17(0,0,4,2) = 0.95764d0 - ! one-electron terms - c_mn_o_sm_17(2,0,0,2) = 0.23208d0 - c_mn_o_sm_17(3,0,0,2) = -0.45032d0 - c_mn_o_sm_17(4,0,0,2) = 0.82777d0 - c_mn_o_sm_17(2,2,0,2) = -4.15388d0 - ! ee-n terms - c_mn_o_sm_17(2,0,2,2) = 0.80622d0 - c_mn_o_sm_17(2,2,2,2) = 10.19704d0 - c_mn_o_sm_17(4,0,2,2) = -4.96259d0 - c_mn_o_sm_17(2,0,4,2) = -1.35647d0 - c_mn_o_sm_17(4,2,2,2) = -5.90907d0 - c_mn_o_sm_17(6,0,2,2) = 0.90343d0 - c_mn_o_sm_17(4,0,4,2) = 5.50739d0 - c_mn_o_sm_17(2,2,4,2) = -0.03154d0 - c_mn_o_sm_17(2,0,6,2) = -1.1051860 - - - ! Ne atom - ! two electron terms - c_mn_o_sm_17(0,0,2,10) = -0.80909d0 - c_mn_o_sm_17(0,0,3,10) = -0.00219d0 - c_mn_o_sm_17(0,0,4,10) = 0.59188d0 - ! one-electron terms - c_mn_o_sm_17(2,0,0,10) = -0.00567d0 - c_mn_o_sm_17(3,0,0,10) = 0.14011d0 - c_mn_o_sm_17(4,0,0,10) = -0.05671d0 - c_mn_o_sm_17(2,2,0,10) = -3.33767d0 - ! ee-n terms - c_mn_o_sm_17(2,0,2,10) = 1.95067d0 - c_mn_o_sm_17(2,2,2,10) = 6.83340d0 - c_mn_o_sm_17(4,0,2,10) = -3.29231d0 - c_mn_o_sm_17(2,0,4,10) = -2.44998d0 - c_mn_o_sm_17(4,2,2,10) = -2.13029d0 - c_mn_o_sm_17(6,0,2,10) = 2.25768d0 - c_mn_o_sm_17(4,0,4,10) = 1.97951d0 - c_mn_o_sm_17(2,2,4,10) = -2.0924160 - c_mn_o_sm_17(2,0,6,10) = 0.35493d0 - -END_PROVIDER - - BEGIN_PROVIDER [ double precision, b_I_sm_90,(2:10)] -&BEGIN_PROVIDER [ double precision, d_I_sm_90,(2:10)] - implicit none - BEGIN_DOC -! "b_I" and "d_I" parameters of Eqs. (4) and (5) of Schmidt,Moskowitz, JCP, 93, 4172 (1990) - END_DOC - b_I_sm_90 = 1.d0 - d_I_sm_90 = 1.d0 - -END_PROVIDER - -subroutine get_full_sm_90_jastrow(r1,r2,rI,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - implicit none - double precision, intent(in) :: r1(3),r2(3),rI(3) - integer, intent(in) :: sm_j, i_charge - double precision, intent(out):: j_1e,j_2e,j_een,j_tot - BEGIN_DOC - ! Jastrow function as in Eq. (4) of Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! the i_charge variable is the integer specifying the charge of the atom for the Jastrow - ! the sm_j integer variable represents the "quality" of the jastrow : sm_j = 7, 9, 17 - END_DOC - double precision :: r_inucl,r_jnucl,r_ij,b_I, d_I - b_I = b_I_sm_90(i_charge) - d_I = d_I_sm_90(i_charge) - call get_rescaled_variables_j_sm_90(r1,r2,rI,b_I,d_I,r_inucl,r_jnucl,r_ij) - call jastrow_func_sm_90(r_inucl,r_jnucl,r_ij,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) -end - -subroutine get_rescaled_variables_j_sm_90(r1,r2,rI,b_I,d_I,r_inucl,r_jnucl,r_ij) - implicit none - BEGIN_DOC - ! rescaled variables of Eq. (5) and (6) of Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! the "b_I" and "d_I" parameters are the same as in Eqs. (5) and (6) - END_DOC - double precision, intent(in) :: r1(3),r2(3),rI(3) - double precision, intent(in) :: b_I, d_I - double precision, intent(out):: r_inucl,r_jnucl,r_ij - double precision :: rin, rjn, rij - integer :: i - rin = 0.d0 - rjn = 0.d0 - rij = 0.d0 - do i = 1,3 - rin += (r1(i) - rI(i)) * (r1(i) - rI(i)) - rjn += (r2(i) - rI(i)) * (r2(i) - rI(i)) - rij += (r2(i) - r1(i)) * (r2(i) - r1(i)) - enddo - rin = dsqrt(rin) - rjn = dsqrt(rjn) - rij = dsqrt(rij) - r_inucl = b_I * rin/(1.d0 + b_I * rin) - r_jnucl = b_I * rjn/(1.d0 + b_I * rjn) - r_ij = d_I * rij/(1.d0 + b_I * rij) -end - -subroutine jastrow_func_sm_90(r_inucl,r_jnucl,r_ij,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - implicit none - BEGIN_DOC - ! Jastrow function as in Eq. (4) of Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! Here the r_inucl, r_jnucl are the rescaled variables as defined in Eq. (5) with "b_I" - ! r_ij is the rescaled variable as defined in Eq. (6) with "d_I" - ! the i_charge variable is the integer specifying the charge of the atom for the Jastrow - ! the sm_j integer variable represents the "quality" of the jastrow : sm_j = 7, 9, 17 - ! - ! it returns the j_1e : sum of terms with "o" = "n" = 0, "m" /= 0, - ! j_2e : sum of terms with "m" = "n" = 0, "o" /= 0, - ! j_een : sum of terms with "m" /=0, "n" /= 0, "o" /= 0, - ! j_tot : the total sum - END_DOC - double precision, intent(in) :: r_inucl,r_jnucl,r_ij - integer, intent(in) :: sm_j,i_charge - double precision, intent(out):: j_1e,j_2e,j_een,j_tot - j_1e = 0.D0 - j_2e = 0.D0 - j_een = 0.D0 - double precision :: delta_mn,jastrow_sm_90_atomic - integer :: m,n,o -BEGIN_TEMPLATE - ! pure 2e part - n = 0 - m = 0 - if(sm_j == $X )then - do o = 1, o_max_sm_$X - if(dabs(c_mn_o_sm_$X(m,n,o,i_charge)).lt.1.d-10)cycle - j_2e += c_mn_o_sm_$X(m,n,o,i_charge) * jastrow_sm_90_atomic(m,n,o,i_charge,r_inucl,r_jnucl,r_ij) - enddo -! else -! print*,'sm_j = ',sm_j -! print*,'not implemented, stop' -! stop - endif - ! pure one-e part - o = 0 - if(sm_j == $X)then - do n = 2, n_max_sm_$X - do m = 2, m_max_sm_$X - j_1e += c_mn_o_sm_$X(m,n,o,i_charge) * jastrow_sm_90_atomic(m,n,o,i_charge,r_inucl,r_jnucl,r_ij) - enddo - enddo -! else -! print*,'sm_j = ',sm_j -! print*,'not implemented, stop' -! stop - endif - ! e-e-n part - if(sm_j == $X)then - do o = 1, o_max_sm_$X - do m = 2, m_max_sm_$X - do n = 2, n_max_sm_$X - j_een += c_mn_o_sm_$X(m,n,o,i_charge) * jastrow_sm_90_atomic(m,n,o,i_charge,r_inucl,r_jnucl,r_ij) - enddo - enddo - enddo - else -! print*,'sm_j = ',sm_j -! print*,'not implemented, stop' -! stop - endif - j_tot = j_1e + j_2e + j_een -SUBST [ X] - 7 ;; - 9 ;; - 17 ;; -END_TEMPLATE -end - -double precision function jastrow_sm_90_atomic(m,n,o,i_charge,r_inucl,r_jnucl,r_ij) - implicit none - BEGIN_DOC -! contribution to the function of Eq. (4) of Schmidt,Moskowitz, JCP, 93, 4172 (1990) -! for a given m,n,o and atom - END_DOC - double precision, intent(in) :: r_inucl,r_jnucl,r_ij - integer , intent(in) :: m,n,o,i_charge - double precision :: delta_mn - if(m==n)then - delta_mn = 0.5d0 - else - delta_mn = 1.D0 - endif - jastrow_sm_90_atomic = delta_mn * (r_inucl**m * r_jnucl**n + r_jnucl**m * r_inucl**n)*r_ij**o -end diff --git a/plugins/local/tc_scf/plot_j_schMos.irp.f b/plugins/local/tc_scf/plot_j_schMos.irp.f deleted file mode 100644 index eda0dd25..00000000 --- a/plugins/local/tc_scf/plot_j_schMos.irp.f +++ /dev/null @@ -1,69 +0,0 @@ -program plot_j - implicit none - double precision :: r1(3),rI(3),r2(3) - double precision :: r12,dx,xmax, j_1e,j_2e,j_een,j_tot - double precision :: j_mu_F_x_j - integer :: i,nx,m,i_charge,sm_j - - character*(128) :: output - integer :: i_unit_output_He_sm_7,i_unit_output_Ne_sm_7 - integer :: i_unit_output_He_sm_17,i_unit_output_Ne_sm_17 - integer :: getUnitAndOpen - output='J_SM_7_He' - i_unit_output_He_sm_7 = getUnitAndOpen(output,'w') - output='J_SM_7_Ne' - i_unit_output_Ne_sm_7 = getUnitAndOpen(output,'w') - - output='J_SM_17_He' - i_unit_output_He_sm_17 = getUnitAndOpen(output,'w') - output='J_SM_17_Ne' - i_unit_output_Ne_sm_17 = getUnitAndOpen(output,'w') - - rI = 0.d0 - r1 = 0.d0 - r2 = 0.d0 - r1(1) = 1.5d0 - xmax = 20.d0 - r2(1) = -xmax*0.5d0 - nx = 1000 - dx = xmax/dble(nx) - do i = 1, nx - r12 = 0.d0 - do m = 1, 3 - r12 += (r1(m) - r2(m))*(r1(m) - r2(m)) - enddo - r12 = dsqrt(r12) - double precision :: jmu,env_nucl,jmu_env,jmu_scaled, jmu_scaled_env - double precision :: b_I,d_I,r_inucl,r_jnucl,r_ij - b_I = 1.D0 - d_I = 1.D0 - call get_rescaled_variables_j_sm_90(r1,r2,rI,b_I,d_I,r_inucl,r_jnucl,r_ij) - jmu=j_mu_F_x_j(r12) - jmu_scaled=j_mu_F_x_j(r_ij) - jmu_env = jmu * env_nucl(r1) * env_nucl(r2) -! jmu_scaled_env= jmu_scaled * (1.d0 - env_coef(1) * dexp(-env_expo(1)*r_inucl**2)) * (1.d0 - env_coef(1) * dexp(-env_expo(1)*r_jnucl**2)) - jmu_scaled_env= jmu_scaled * env_nucl(r1) * env_nucl(r2) - ! He - i_charge = 2 - ! SM 7 Jastrow - sm_j = 7 - call get_full_sm_90_jastrow(r1,r2,rI,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - write(i_unit_output_He_sm_7,'(100(F16.10,X))')r2(1),r12,j_mu_F_x_j(r12), j_1e,j_2e,j_een,j_tot,jmu_env,jmu_scaled,jmu_scaled_env - ! SM 17 Jastrow - sm_j = 17 - call get_full_sm_90_jastrow(r1,r2,rI,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - write(i_unit_output_He_sm_17,'(100(F16.10,X))')r2(1),r12,j_mu_F_x_j(r12), j_1e,j_2e,j_een,j_tot,jmu_env,jmu_scaled,jmu_scaled_env - ! Ne - i_charge = 10 - ! SM 7 Jastrow - sm_j = 7 - call get_full_sm_90_jastrow(r1,r2,rI,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - write(i_unit_output_Ne_sm_7,'(100(F16.10,X))')r2(1),r12,j_mu_F_x_j(r12), j_1e,j_2e,j_een,j_tot,jmu_env,jmu_scaled,jmu_scaled_env - ! SM 17 Jastrow - sm_j = 17 - call get_full_sm_90_jastrow(r1,r2,rI,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - write(i_unit_output_Ne_sm_17,'(100(F16.10,X))')r2(1),r12,j_mu_F_x_j(r12), j_1e,j_2e,j_een,j_tot,jmu_env,jmu_scaled,jmu_scaled_env - r2(1) += dx - enddo - -end diff --git a/plugins/local/tc_scf/print_fit_param.irp.f b/plugins/local/tc_scf/print_fit_param.irp.f deleted file mode 100644 index e62f0dde..00000000 --- a/plugins/local/tc_scf/print_fit_param.irp.f +++ /dev/null @@ -1,59 +0,0 @@ -program print_fit_param - - BEGIN_DOC -! TODO : Put the documentation of the program here - END_DOC - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - !call create_guess - !call orthonormalize_mos - - call main() - -end - -! --- - -subroutine main() - - implicit none - integer :: i - - mu_erf = 1.d0 - touch mu_erf - - print *, ' fit for (1 - erf(x))^2' - do i = 1, n_max_fit_slat - print*, expo_gauss_1_erf_x_2(i), coef_gauss_1_erf_x_2(i) - enddo - - print *, '' - print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)]' - do i = 1, n_max_fit_slat - print *, expo_gauss_j_mu_x(i), 2.d0 * coef_gauss_j_mu_x(i) - enddo - - print *, '' - print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)]^2' - do i = 1, n_max_fit_slat - print *, expo_gauss_j_mu_x_2(i), 4.d0 * coef_gauss_j_mu_x_2(i) - enddo - - print *, '' - print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)] x [1 - erf(mu * r12)]' - do i = 1, n_max_fit_slat - print *, expo_gauss_j_mu_1_erf(i), 4.d0 * coef_gauss_j_mu_1_erf(i) - enddo - - return -end subroutine main - -! --- - diff --git a/plugins/local/tc_scf/print_tcscf_energy.irp.f b/plugins/local/tc_scf/print_tcscf_energy.irp.f deleted file mode 100644 index 6f9afd9a..00000000 --- a/plugins/local/tc_scf/print_tcscf_energy.irp.f +++ /dev/null @@ -1,55 +0,0 @@ -program print_tcscf_energy - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - print *, 'Hello world' - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - call main() - -end - -! --- - -subroutine main() - - implicit none - double precision :: etc_tot, etc_1e, etc_2e, etc_3e - - PROVIDE j2e_type mu_erf - PROVIDE j1e_type j1e_coef j1e_expo - PROVIDE env_type env_coef env_expo - - print*, ' j2e_type = ', j2e_type - print*, ' j1e_type = ', j1e_type - print*, ' env_type = ', env_type - - print*, ' mu_erf = ', mu_erf - - etc_tot = TC_HF_energy - etc_1e = TC_HF_one_e_energy - etc_2e = TC_HF_two_e_energy - etc_3e = 0.d0 - if(three_body_h_tc) then - !etc_3e = diag_three_elem_hf - etc_3e = tcscf_energy_3e_naive - endif - - print *, " E_TC = ", etc_tot - print *, " E_1e = ", etc_1e - print *, " E_2e = ", etc_2e - print *, " E_3e = ", etc_3e - - return -end subroutine main - -! --- - diff --git a/plugins/local/tc_scf/rh_tcscf_diis.irp.f b/plugins/local/tc_scf/rh_tcscf_diis.irp.f index 853c4ab5..1cade02a 100644 --- a/plugins/local/tc_scf/rh_tcscf_diis.irp.f +++ b/plugins/local/tc_scf/rh_tcscf_diis.irp.f @@ -61,7 +61,7 @@ subroutine rh_tcscf_diis() etc_tot = TC_HF_energy etc_1e = TC_HF_one_e_energy etc_2e = TC_HF_two_e_energy - etc_3e = diag_three_elem_hf + etc_3e = TC_HF_three_e_energy !tc_grad = grad_non_hermit er_DIIS = maxval(abs(FQS_SQF_mo)) e_delta = dabs(etc_tot - e_save) @@ -189,7 +189,7 @@ subroutine rh_tcscf_diis() etc_tot = TC_HF_energy etc_1e = TC_HF_one_e_energy etc_2e = TC_HF_two_e_energy - etc_3e = diag_three_elem_hf + etc_3e = TC_HF_three_e_energy !tc_grad = grad_non_hermit er_DIIS = maxval(abs(FQS_SQF_mo)) e_delta = dabs(etc_tot - e_save) diff --git a/plugins/local/tc_scf/rh_tcscf_simple.irp.f b/plugins/local/tc_scf/rh_tcscf_simple.irp.f deleted file mode 100644 index 2c2cf2c2..00000000 --- a/plugins/local/tc_scf/rh_tcscf_simple.irp.f +++ /dev/null @@ -1,129 +0,0 @@ -! --- - -subroutine rh_tcscf_simple() - - implicit none - integer :: i, j, it, dim_DIIS - double precision :: t0, t1 - double precision :: e_save, e_delta, rho_delta - double precision :: etc_tot, etc_1e, etc_2e, etc_3e, tc_grad - double precision :: er_DIIS - double precision, allocatable :: rho_old(:,:), rho_new(:,:) - - allocate(rho_old(ao_num,ao_num), rho_new(ao_num,ao_num)) - - it = 0 - e_save = 0.d0 - dim_DIIS = 0 - - ! --- - - if(.not. bi_ortho) then - print *, ' grad_hermit = ', grad_hermit - call save_good_hermit_tc_eigvectors - TOUCH mo_coef - call save_mos - endif - - ! --- - - if(bi_ortho) then - - PROVIDE level_shift_tcscf - PROVIDE mo_l_coef mo_r_coef - - write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & - '====', '================', '================', '================', '================', '================' & - , '================', '================', '================', '====', '========' - - write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & - ' it ', ' SCF TC Energy ', ' E(1e) ', ' E(2e) ', ' E(3e) ', ' energy diff ' & - , ' gradient ', ' DIIS error ', ' level shift ', 'DIIS', ' WT (m)' - - write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & - '====', '================', '================', '================', '================', '================' & - , '================', '================', '================', '====', '========' - - - ! first iteration (HF orbitals) - call wall_time(t0) - - etc_tot = TC_HF_energy - etc_1e = TC_HF_one_e_energy - etc_2e = TC_HF_two_e_energy - etc_3e = 0.d0 - if(three_body_h_tc) then - etc_3e = diag_three_elem_hf - endif - tc_grad = grad_non_hermit - er_DIIS = maxval(abs(FQS_SQF_mo)) - e_delta = dabs(etc_tot - e_save) - e_save = etc_tot - - call wall_time(t1) - write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & - it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 - - do while(tc_grad .gt. dsqrt(thresh_tcscf)) - call wall_time(t0) - - it += 1 - if(it > n_it_tcscf_max) then - print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max - stop - endif - - mo_l_coef = fock_tc_leigvec_ao - mo_r_coef = fock_tc_reigvec_ao - call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) - call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) - TOUCH mo_l_coef mo_r_coef - - etc_tot = TC_HF_energy - etc_1e = TC_HF_one_e_energy - etc_2e = TC_HF_two_e_energy - etc_3e = 0.d0 - if(three_body_h_tc) then - etc_3e = diag_three_elem_hf - endif - tc_grad = grad_non_hermit - er_DIIS = maxval(abs(FQS_SQF_mo)) - e_delta = dabs(etc_tot - e_save) - e_save = etc_tot - - call ezfio_set_tc_scf_tcscf_energy(etc_tot) - - call wall_time(t1) - write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & - it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 - enddo - - else - - do while( (grad_hermit.gt.dsqrt(thresh_tcscf)) .and. (it.lt.n_it_tcscf_max) ) - print*,'grad_hermit = ',grad_hermit - it += 1 - print *, 'iteration = ', it - print *, '***' - print *, 'TC HF total energy = ', TC_HF_energy - print *, 'TC HF 1 e energy = ', TC_HF_one_e_energy - print *, 'TC HF 2 e energy = ', TC_HF_two_e_energy - print *, 'TC HF 3 body = ', diag_three_elem_hf - print *, '***' - print *, '' - call save_good_hermit_tc_eigvectors - TOUCH mo_coef - call save_mos - enddo - - endif - - print *, ' TCSCF Simple converged !' - !call print_energy_and_mos(good_angles) - - deallocate(rho_old, rho_new) - -end - -! --- - diff --git a/plugins/local/tc_scf/rotate_tcscf_orbitals.irp.f b/plugins/local/tc_scf/rotate_tcscf_orbitals.irp.f deleted file mode 100644 index 0f2663e5..00000000 --- a/plugins/local/tc_scf/rotate_tcscf_orbitals.irp.f +++ /dev/null @@ -1,369 +0,0 @@ - -! --- - -program rotate_tcscf_orbitals - - BEGIN_DOC - ! TODO : Rotate the bi-orthonormal orbitals in order to minimize left-right angles when degenerate - END_DOC - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - bi_ortho = .True. - touch bi_ortho - - call minimize_tc_orb_angles() - !call maximize_overlap() - -end - -! --- - -subroutine maximize_overlap() - - implicit none - integer :: i, m, n - double precision :: accu_d, accu_nd - double precision, allocatable :: C(:,:), R(:,:), L(:,:), W(:,:), e(:) - double precision, allocatable :: S(:,:) - - n = ao_num - m = mo_num - - allocate(L(n,m), R(n,m), C(n,m), W(n,n), e(m)) - L = mo_l_coef - R = mo_r_coef - C = mo_coef - W = ao_overlap - - print*, ' fock matrix diag elements' - do i = 1, m - e(i) = Fock_matrix_tc_mo_tot(i,i) - print*, e(i) - enddo - - ! --- - - print *, ' overlap before :' - print *, ' ' - - allocate(S(m,m)) - - call LTxSxR(n, m, L, W, R, S) - !print*, " L.T x R" - !do i = 1, m - ! write(*, '(100(F16.10,X))') S(i,i) - !enddo - call LTxSxR(n, m, L, W, C, S) - print*, " L.T x C" - do i = 1, m - write(*, '(100(F16.10,X))') S(i,:) - enddo - call LTxSxR(n, m, C, W, R, S) - print*, " C.T x R" - do i = 1, m - write(*, '(100(F16.10,X))') S(i,:) - enddo - - deallocate(S) - - ! --- - - call rotate_degen_eigvec_to_maximize_overlap(n, m, e, C, W, L, R) - - ! --- - - print *, ' overlap after :' - print *, ' ' - - allocate(S(m,m)) - - call LTxSxR(n, m, L, W, R, S) - !print*, " L.T x R" - !do i = 1, m - ! write(*, '(100(F16.10,X))') S(i,i) - !enddo - call LTxSxR(n, m, L, W, C, S) - print*, " L.T x C" - do i = 1, m - write(*, '(100(F16.10,X))') S(i,:) - enddo - call LTxSxR(n, m, C, W, R, S) - print*, " C.T x R" - do i = 1, m - write(*, '(100(F16.10,X))') S(i,:) - enddo - - deallocate(S) - - ! --- - - mo_l_coef = L - mo_r_coef = R - call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) - call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) - - ! --- - - deallocate(L, R, C, W, e) - -end subroutine maximize_overlap - -! --- - -subroutine rotate_degen_eigvec_to_maximize_overlap(n, m, e0, C0, W0, L0, R0) - - implicit none - - integer, intent(in) :: n, m - double precision, intent(in) :: e0(m), W0(n,n), C0(n,m) - double precision, intent(inout) :: L0(n,m), R0(n,m) - - - integer :: i, j, k, kk, mm, id1, tot_deg - double precision :: ei, ej, de, de_thr - integer, allocatable :: deg_num(:) - double precision, allocatable :: L(:,:), R(:,:), C(:,:), Lnew(:,:), Rnew(:,:), tmp(:,:) - !double precision, allocatable :: S(:,:), Snew(:,:), T(:,:), Ttmp(:,:), Stmp(:,:) - double precision, allocatable :: S(:,:), Snew(:,:), T(:,:), Ttmp(:,:), Stmp(:,:) - !real*8 :: S(m,m), Snew(m,m), T(m,m) - - id1 = 700 - allocate(S(id1,id1), Snew(id1,id1), T(id1,id1)) - - ! --- - - allocate( deg_num(m) ) - do i = 1, m - deg_num(i) = 1 - enddo - - de_thr = thr_degen_tc - - do i = 1, m-1 - ei = e0(i) - - ! already considered in degen vectors - if(deg_num(i).eq.0) cycle - - do j = i+1, m - ej = e0(j) - de = dabs(ei - ej) - - if(de .lt. de_thr) then - deg_num(i) = deg_num(i) + 1 - deg_num(j) = 0 - endif - - enddo - enddo - - tot_deg = 0 - do i = 1, m - if(deg_num(i).gt.1) then - print *, ' degen on', i, deg_num(i) - tot_deg = tot_deg + 1 - endif - enddo - - if(tot_deg .eq. 0) then - print *, ' no degen' - return - endif - - ! --- - - do i = 1, m - mm = deg_num(i) - - if(mm .gt. 1) then - - allocate(L(n,mm), R(n,mm), C(n,mm)) - do j = 1, mm - L(1:n,j) = L0(1:n,i+j-1) - R(1:n,j) = R0(1:n,i+j-1) - C(1:n,j) = C0(1:n,i+j-1) - enddo - - ! --- - - ! C.T x W0 x R - allocate(tmp(mm,n), Stmp(mm,mm)) - call dgemm( 'T', 'N', mm, n, n, 1.d0 & - , C, size(C, 1), W0, size(W0, 1) & - , 0.d0, tmp, size(tmp, 1) ) - call dgemm( 'N', 'N', mm, mm, n, 1.d0 & - , tmp, size(tmp, 1), R, size(R, 1) & - , 0.d0, Stmp, size(Stmp, 1) ) - deallocate(C, tmp) - - S = 0.d0 - do k = 1, mm - do kk = 1, mm - S(kk,k) = Stmp(kk,k) - enddo - enddo - deallocate(Stmp) - - !print*, " overlap bef" - !do k = 1, mm - ! write(*, '(100(F16.10,X))') (S(k,kk), kk=1, mm) - !enddo - - T = 0.d0 - Snew = 0.d0 - call maxovl(mm, mm, S, T, Snew) - - !print*, " overlap aft" - !do k = 1, mm - ! write(*, '(100(F16.10,X))') (Snew(k,kk), kk=1, mm) - !enddo - - allocate(Ttmp(mm,mm)) - Ttmp(1:mm,1:mm) = T(1:mm,1:mm) - - allocate(Lnew(n,mm), Rnew(n,mm)) - call dgemm( 'N', 'N', n, mm, mm, 1.d0 & - , R, size(R, 1), Ttmp(1,1), size(Ttmp, 1) & - , 0.d0, Rnew, size(Rnew, 1) ) - call dgemm( 'N', 'N', n, mm, mm, 1.d0 & - , L, size(L, 1), Ttmp(1,1), size(Ttmp, 1) & - , 0.d0, Lnew, size(Lnew, 1) ) - - deallocate(L, R) - deallocate(Ttmp) - - ! --- - - do j = 1, mm - L0(1:n,i+j-1) = Lnew(1:n,j) - R0(1:n,i+j-1) = Rnew(1:n,j) - enddo - deallocate(Lnew, Rnew) - - endif - enddo - - deallocate(S, Snew, T) - -end subroutine rotate_degen_eigvec_to_maximize_overlap - -! --- - -subroutine fix_right_to_one() - - implicit none - integer :: i, j, m, n, mm, tot_deg - double precision :: accu_d, accu_nd - double precision :: de_thr, ei, ej, de - integer, allocatable :: deg_num(:) - double precision, allocatable :: R0(:,:), L0(:,:), W(:,:), e0(:) - double precision, allocatable :: R(:,:), L(:,:), S(:,:), Stmp(:,:), tmp(:,:) - - n = ao_num - m = mo_num - - allocate(L0(n,m), R0(n,m), W(n,n), e0(m)) - L0 = mo_l_coef - R0 = mo_r_coef - W = ao_overlap - - print*, ' fock matrix diag elements' - do i = 1, m - e0(i) = Fock_matrix_tc_mo_tot(i,i) - print*, e0(i) - enddo - - ! --- - - allocate( deg_num(m) ) - do i = 1, m - deg_num(i) = 1 - enddo - - de_thr = 1d-6 - - do i = 1, m-1 - ei = e0(i) - - ! already considered in degen vectors - if(deg_num(i).eq.0) cycle - - do j = i+1, m - ej = e0(j) - de = dabs(ei - ej) - - if(de .lt. de_thr) then - deg_num(i) = deg_num(i) + 1 - deg_num(j) = 0 - endif - - enddo - enddo - - deallocate(e0) - - tot_deg = 0 - do i = 1, m - if(deg_num(i).gt.1) then - print *, ' degen on', i, deg_num(i) - tot_deg = tot_deg + 1 - endif - enddo - - if(tot_deg .eq. 0) then - print *, ' no degen' - return - endif - - ! --- - - do i = 1, m - mm = deg_num(i) - - if(mm .gt. 1) then - - allocate(L(n,mm), R(n,mm)) - do j = 1, mm - L(1:n,j) = L0(1:n,i+j-1) - R(1:n,j) = R0(1:n,i+j-1) - enddo - - ! --- - - call impose_weighted_orthog_svd(n, mm, W, R) - call impose_weighted_biorthog_qr(n, mm, thresh_biorthog_diag, thresh_biorthog_nondiag, R, W, L) - - ! --- - - do j = 1, mm - L0(1:n,i+j-1) = L(1:n,j) - R0(1:n,i+j-1) = R(1:n,j) - enddo - deallocate(L, R) - - endif - enddo - - call check_weighted_biorthog_binormalize(n, m, L0, W, R0, thresh_biorthog_diag, thresh_biorthog_nondiag, .true.) - - deallocate(W, deg_num) - - mo_l_coef = L0 - mo_r_coef = R0 - deallocate(L0, R0) - - call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) - call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) - print *, ' orbitals are rotated ' - - return -end subroutine fix_right_to_one - -! --- diff --git a/plugins/local/tc_scf/tc_petermann_factor.irp.f b/plugins/local/tc_scf/tc_petermann_factor.irp.f deleted file mode 100644 index 14fff898..00000000 --- a/plugins/local/tc_scf/tc_petermann_factor.irp.f +++ /dev/null @@ -1,91 +0,0 @@ - -! --- - -program tc_petermann_factor - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - call main() - -end - -! --- - -subroutine main() - - implicit none - integer :: i, j - double precision :: Pf_diag_av - double precision, allocatable :: Sl(:,:), Sr(:,:), Pf(:,:) - - allocate(Sl(mo_num,mo_num), Sr(mo_num,mo_num), Pf(mo_num,mo_num)) - - - call LTxSxR(ao_num, mo_num, mo_l_coef, ao_overlap, mo_r_coef, Sl) - !call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & - ! , mo_l_coef, size(mo_l_coef, 1), mo_l_coef, size(mo_l_coef, 1) & - ! , 0.d0, Sl, size(Sl, 1) ) - - print *, '' - print *, ' left-right orthog matrix:' - do i = 1, mo_num - write(*,'(100(F8.4,X))') Sl(:,i) - enddo - - call LTxSxR(ao_num, mo_num, mo_l_coef, ao_overlap, mo_l_coef, Sl) - !call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & - ! , mo_l_coef, size(mo_l_coef, 1), mo_l_coef, size(mo_l_coef, 1) & - ! , 0.d0, Sl, size(Sl, 1) ) - - print *, '' - print *, ' left-orthog matrix:' - do i = 1, mo_num - write(*,'(100(F8.4,X))') Sl(:,i) - enddo - - call LTxSxR(ao_num, mo_num, mo_r_coef, ao_overlap, mo_r_coef, Sr) -! call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & -! , mo_r_coef, size(mo_r_coef, 1), mo_r_coef, size(mo_r_coef, 1) & -! , 0.d0, Sr, size(Sr, 1) ) - - print *, '' - print *, ' right-orthog matrix:' - do i = 1, mo_num - write(*,'(100(F8.4,X))') Sr(:,i) - enddo - - print *, '' - print *, ' Petermann matrix:' - do i = 1, mo_num - do j = 1, mo_num - Pf(j,i) = Sl(j,i) * Sr(j,i) - enddo - write(*,'(100(F8.4,X))') Pf(:,i) - enddo - - Pf_diag_av = 0.d0 - do i = 1, mo_num - Pf_diag_av = Pf_diag_av + Pf(i,i) - enddo - Pf_diag_av = Pf_diag_av / dble(mo_num) - - print *, '' - print *, ' mean of the diagonal Petermann factor = ', Pf_diag_av - - deallocate(Sl, Sr, Pf) - - return -end subroutine - -! --- - diff --git a/plugins/local/tc_scf/tc_scf.irp.f b/plugins/local/tc_scf/tc_scf.irp.f index ee8e8dad..f099b90e 100644 --- a/plugins/local/tc_scf/tc_scf.irp.f +++ b/plugins/local/tc_scf/tc_scf.irp.f @@ -10,13 +10,10 @@ program tc_scf integer :: i logical :: good_angles - PROVIDE j1e_type - PROVIDE j2e_type - PROVIDE tcscf_algorithm - print *, ' TC-SCF with:' - print *, ' j1e_type = ', j1e_type print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type write(json_unit,json_array_open_fmt) 'tc-scf' @@ -29,7 +26,6 @@ program tc_scf call write_int(6, my_n_pt_r_grid, 'radial external grid over') call write_int(6, my_n_pt_a_grid, 'angular external grid over') - if(tc_integ_type .eq. "numeric") then my_extra_grid_becke = .True. PROVIDE tc_grid2_a tc_grid2_r @@ -41,17 +37,7 @@ program tc_scf call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') endif - !call create_guess() - !call orthonormalize_mos() - - if(tcscf_algorithm == 'DIIS') then - call rh_tcscf_diis() - elseif(tcscf_algorithm == 'Simple') then - call rh_tcscf_simple() - else - print *, ' not implemented yet', tcscf_algorithm - stop - endif + call rh_tcscf_diis() PROVIDE Fock_matrix_tc_diag_mo_tot print*, ' Eigenvalues:' @@ -59,14 +45,11 @@ program tc_scf print*, i, Fock_matrix_tc_diag_mo_tot(i) enddo - ! TODO - ! rotate angles in separate code only if necessary - if(minimize_lr_angles)then + if(minimize_lr_angles) then call minimize_tc_orb_angles() endif call print_energy_and_mos(good_angles) - write(json_unit,json_array_close_fmtx) call json_close diff --git a/plugins/local/tc_scf/tc_scf_dm.irp.f b/plugins/local/tc_scf/tc_scf_dm.irp.f index bf31a4a1..5d25fce2 100644 --- a/plugins/local/tc_scf/tc_scf_dm.irp.f +++ b/plugins/local/tc_scf/tc_scf_dm.irp.f @@ -10,16 +10,8 @@ BEGIN_PROVIDER [double precision, TCSCF_density_matrix_ao_beta, (ao_num, ao_num) implicit none - if(bi_ortho) then - - PROVIDE mo_l_coef mo_r_coef - TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta - - else - - TCSCF_density_matrix_ao_beta = SCF_density_matrix_ao_beta - - endif + PROVIDE mo_l_coef mo_r_coef + TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta END_PROVIDER @@ -35,16 +27,8 @@ BEGIN_PROVIDER [double precision, TCSCF_density_matrix_ao_alpha, (ao_num, ao_num implicit none - if(bi_ortho) then - - PROVIDE mo_l_coef mo_r_coef - TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha - - else - - TCSCF_density_matrix_ao_alpha = SCF_density_matrix_ao_alpha - - endif + PROVIDE mo_l_coef mo_r_coef + TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha END_PROVIDER diff --git a/plugins/local/tc_scf/tc_scf_energy.irp.f b/plugins/local/tc_scf/tc_scf_energy.irp.f index 0266c605..74ab9d05 100644 --- a/plugins/local/tc_scf/tc_scf_energy.irp.f +++ b/plugins/local/tc_scf/tc_scf_energy.irp.f @@ -1,7 +1,8 @@ - BEGIN_PROVIDER [ double precision, TC_HF_energy ] -&BEGIN_PROVIDER [ double precision, TC_HF_one_e_energy] -&BEGIN_PROVIDER [ double precision, TC_HF_two_e_energy] + BEGIN_PROVIDER [double precision, TC_HF_energy ] +&BEGIN_PROVIDER [double precision, TC_HF_one_e_energy ] +&BEGIN_PROVIDER [double precision, TC_HF_two_e_energy ] +&BEGIN_PROVIDER [double precision, TC_HF_three_e_energy] BEGIN_DOC ! TC Hartree-Fock energy containing the nuclear repulsion, and its one- and two-body components. @@ -27,8 +28,13 @@ enddo enddo - TC_HF_energy += TC_HF_one_e_energy + TC_HF_two_e_energy - TC_HF_energy += diag_three_elem_hf + if((three_body_h_tc .eq. .False.) .and. (.not. noL_standard)) then + TC_HF_three_e_energy = 0.d0 + else + TC_HF_three_e_energy = noL_0e + endif + + TC_HF_energy += TC_HF_one_e_energy + TC_HF_two_e_energy + TC_HF_three_e_energy END_PROVIDER diff --git a/plugins/local/tc_scf/tcscf_energy_naive.irp.f b/plugins/local/tc_scf/tcscf_energy_naive.irp.f deleted file mode 100644 index 82bb8799..00000000 --- a/plugins/local/tc_scf/tcscf_energy_naive.irp.f +++ /dev/null @@ -1,80 +0,0 @@ - -! --- - -BEGIN_PROVIDER [double precision, tcscf_energy_3e_naive] - - implicit none - integer :: i, j, k - integer :: neu, ned, D(elec_num) - integer :: ii, jj, kk - integer :: si, sj, sk - double precision :: I_ijk, I_jki, I_kij, I_jik, I_ikj, I_kji - double precision :: I_tot - - PROVIDE mo_l_coef mo_r_coef - - neu = elec_alpha_num - ned = elec_beta_num - if (neu > 0) D(1:neu) = [(2*i-1, i = 1, neu)] - if (ned > 0) D(neu+1:neu+ned) = [(2*i, i = 1, ned)] - - !print*, "D = " - !do i = 1, elec_num - ! ii = (D(i) - 1) / 2 + 1 - ! si = mod(D(i), 2) - ! print*, i, D(i), ii, si - !enddo - - tcscf_energy_3e_naive = 0.d0 - - do i = 1, elec_num - 2 - ii = (D(i) - 1) / 2 + 1 - si = mod(D(i), 2) - - do j = i + 1, elec_num - 1 - jj = (D(j) - 1) / 2 + 1 - sj = mod(D(j), 2) - - do k = j + 1, elec_num - kk = (D(k) - 1) / 2 + 1 - sk = mod(D(k), 2) - - call give_integrals_3_body_bi_ort(ii, jj, kk, ii, jj, kk, I_ijk) - I_tot = I_ijk - - if(sj==si .and. sk==sj) then - call give_integrals_3_body_bi_ort(ii, jj, kk, jj, kk, ii, I_jki) - I_tot += I_jki - endif - - if(sk==si .and. si==sj) then - call give_integrals_3_body_bi_ort(ii, jj, kk, kk, ii, jj, I_kij) - I_tot += I_kij - endif - - if(sj==si) then - call give_integrals_3_body_bi_ort(ii, jj, kk, jj, ii, kk, I_jik) - I_tot -= I_jik - endif - - if(sk==sj) then - call give_integrals_3_body_bi_ort(ii, jj, kk, ii, kk, jj, I_ikj) - I_tot -= I_ikj - endif - - if(sk==si) then - call give_integrals_3_body_bi_ort(ii, jj, kk, kk, jj, ii, I_kji) - I_tot -= I_kji - endif - - tcscf_energy_3e_naive += I_tot - enddo - enddo - enddo - - tcscf_energy_3e_naive = -tcscf_energy_3e_naive - -END_PROVIDER - -! --- - diff --git a/plugins/local/tc_scf/three_e_energy_bi_ortho.irp.f b/plugins/local/tc_scf/three_e_energy_bi_ortho.irp.f deleted file mode 100644 index 0c9ebbd7..00000000 --- a/plugins/local/tc_scf/three_e_energy_bi_ortho.irp.f +++ /dev/null @@ -1,189 +0,0 @@ - -subroutine contrib_3e_diag_sss(i, j, k, integral) - - BEGIN_DOC - ! returns the pure same spin contribution to diagonal matrix element of 3e term - END_DOC - - implicit none - integer, intent(in) :: i, j, k - double precision, intent(out) :: integral - double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int - - call give_integrals_3_body_bi_ort(i, k, j, i, k, j, direct_int )!!! < i k j | i k j > - call give_integrals_3_body_bi_ort(i, k, j, j, i, k, c_3_int) ! < i k j | j i k > - call give_integrals_3_body_bi_ort(i, k, j, k, j, i, c_minus_3_int)! < i k j | k j i > - integral = direct_int + c_3_int + c_minus_3_int - - ! negative terms :: exchange contrib - call give_integrals_3_body_bi_ort(i, k, j, j, k, i, exch_13_int)!!! < i k j | j k i > : E_13 - call give_integrals_3_body_bi_ort(i, k, j, i, j, k, exch_23_int)!!! < i k j | i j k > : E_23 - call give_integrals_3_body_bi_ort(i, k, j, k, i, j, exch_12_int)!!! < i k j | k i j > : E_12 - - integral += - exch_13_int - exch_23_int - exch_12_int - integral = -integral - -end - -! --- - -subroutine contrib_3e_diag_soo(i,j,k,integral) - implicit none - integer, intent(in) :: i,j,k - BEGIN_DOC - ! returns the pure same spin contribution to diagonal matrix element of 3e term - END_DOC - double precision, intent(out) :: integral - double precision :: direct_int, exch_23_int - call give_integrals_3_body_bi_ort(i, k, j, i, k, j, direct_int) ! < i k j | i k j > - call give_integrals_3_body_bi_ort(i, k, j, i, j, k, exch_23_int)! < i k j | i j k > : E_23 - integral = direct_int - exch_23_int - integral = -integral -end - - -subroutine give_aaa_contrib_bis(integral_aaa) - implicit none - double precision, intent(out) :: integral_aaa - double precision :: integral - integer :: i,j,k - integral_aaa = 0.d0 - do i = 1, elec_alpha_num - do j = i+1, elec_alpha_num - do k = j+1, elec_alpha_num - call contrib_3e_diag_sss(i,j,k,integral) - integral_aaa += integral - enddo - enddo - enddo - -end - -! --- - -subroutine give_aaa_contrib(integral_aaa) - - implicit none - integer :: i, j, k - double precision :: integral - double precision, intent(out) :: integral_aaa - - integral_aaa = 0.d0 - do i = 1, elec_alpha_num - do j = 1, elec_alpha_num - do k = 1, elec_alpha_num - call contrib_3e_diag_sss(i, j, k, integral) - integral_aaa += integral - enddo - enddo - enddo - integral_aaa *= 1.d0/6.d0 - - return -end - -! --- - -subroutine give_aab_contrib(integral_aab) - implicit none - double precision, intent(out) :: integral_aab - double precision :: integral - integer :: i,j,k - integral_aab = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_alpha_num - do k = 1, elec_alpha_num - call contrib_3e_diag_soo(i,j,k,integral) - integral_aab += integral - enddo - enddo - enddo - integral_aab *= 0.5d0 -end - - -subroutine give_aab_contrib_bis(integral_aab) - implicit none - double precision, intent(out) :: integral_aab - double precision :: integral - integer :: i,j,k - integral_aab = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_alpha_num - do k = j+1, elec_alpha_num - call contrib_3e_diag_soo(i,j,k,integral) - integral_aab += integral - enddo - enddo - enddo -end - - -subroutine give_abb_contrib(integral_abb) - implicit none - double precision, intent(out) :: integral_abb - double precision :: integral - integer :: i,j,k - integral_abb = 0.d0 - do i = 1, elec_alpha_num - do j = 1, elec_beta_num - do k = 1, elec_beta_num - call contrib_3e_diag_soo(i,j,k,integral) - integral_abb += integral - enddo - enddo - enddo - integral_abb *= 0.5d0 -end - -subroutine give_abb_contrib_bis(integral_abb) - implicit none - double precision, intent(out) :: integral_abb - double precision :: integral - integer :: i,j,k - integral_abb = 0.d0 - do i = 1, elec_alpha_num - do j = 1, elec_beta_num - do k = j+1, elec_beta_num - call contrib_3e_diag_soo(i,j,k,integral) - integral_abb += integral - enddo - enddo - enddo -end - -subroutine give_bbb_contrib_bis(integral_bbb) - implicit none - double precision, intent(out) :: integral_bbb - double precision :: integral - integer :: i,j,k - integral_bbb = 0.d0 - do i = 1, elec_beta_num - do j = i+1, elec_beta_num - do k = j+1, elec_beta_num - call contrib_3e_diag_sss(i,j,k,integral) - integral_bbb += integral - enddo - enddo - enddo - -end - -subroutine give_bbb_contrib(integral_bbb) - implicit none - double precision, intent(out) :: integral_bbb - double precision :: integral - integer :: i,j,k - integral_bbb = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do k = 1, elec_beta_num - call contrib_3e_diag_sss(i,j,k,integral) - integral_bbb += integral - enddo - enddo - enddo - integral_bbb *= 1.d0/6.d0 -end - - diff --git a/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f b/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f index 7ce57578..ec5167d1 100644 --- a/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f +++ b/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f @@ -4,11 +4,9 @@ program write_ao_2e_tc_integ implicit none - PROVIDE j1e_type - PROVIDE j2e_type - - print *, ' j1e_type = ', j1e_type print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type my_grid_becke = .True. PROVIDE tc_grid1_a tc_grid1_r From 23acd603d01118e0f2ce59fb14568a64d9994335 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 1 May 2024 23:17:36 +0200 Subject: [PATCH 023/131] removed diag_three_elem_hf --- plugins/local/tc_scf/tc_scf_energy.irp.f | 423 ----------------------- 1 file changed, 423 deletions(-) diff --git a/plugins/local/tc_scf/tc_scf_energy.irp.f b/plugins/local/tc_scf/tc_scf_energy.irp.f index 14d618ae..74ab9d05 100644 --- a/plugins/local/tc_scf/tc_scf_energy.irp.f +++ b/plugins/local/tc_scf/tc_scf_energy.irp.f @@ -40,426 +40,3 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, diag_three_elem_hf] - - BEGIN_DOC - ! - ! < Phi_left | L | Phi_right > - ! - ! - ! if three_body_h_tc == false and noL_standard == true ==> do a normal ordering - ! - ! todo - ! this should be equivalent to - ! three_body_h_tc == true and noL_standard == false - ! - ! if three_body_h_tc == false and noL_standard == false ==> this is equal to 0 - ! - END_DOC - - implicit none - integer :: i, j, k, ipoint, mm - double precision :: contrib, weight, four_third, one_third, two_third, exchange_int_231 - double precision :: integral_aaa, hthree, integral_aab, integral_abb, integral_bbb - double precision, allocatable :: tmp(:) - double precision, allocatable :: tmp_L(:,:), tmp_R(:,:) - double precision, allocatable :: tmp_M(:,:), tmp_S(:), tmp_O(:), tmp_J(:,:) - double precision, allocatable :: tmp_M_priv(:,:), tmp_S_priv(:), tmp_O_priv(:), tmp_J_priv(:,:) - - PROVIDE mo_l_coef mo_r_coef - - if(.not. three_body_h_tc) then - - if(noL_standard) then - PROVIDE noL_0e - diag_three_elem_hf = noL_0e - else - diag_three_elem_hf = 0.d0 - endif - - else - - PROVIDE int2_grad1_u12_bimo_t - PROVIDE mos_l_in_r_array_transp - PROVIDE mos_r_in_r_array_transp - - if(elec_alpha_num .eq. elec_beta_num) then - - allocate(tmp(elec_beta_num)) - allocate(tmp_L(n_points_final_grid,3), tmp_R(n_points_final_grid,3)) - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & - !$OMP SHARED(elec_beta_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) - - !$OMP DO - do j = 1, elec_beta_num - - tmp_L = 0.d0 - tmp_R = 0.d0 - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - tmp(j) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - enddo ! j - !$OMP END DO - !$OMP END PARALLEL - - diag_three_elem_hf = -2.d0 * sum(tmp) - - deallocate(tmp) - deallocate(tmp_L, tmp_R) - - ! --- - - allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) - tmp_O = 0.d0 - tmp_J = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & - !$OMP SHARED(elec_beta_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) - - allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) - tmp_O_priv = 0.d0 - tmp_J_priv = 0.d0 - - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_O = tmp_O + tmp_O_priv - tmp_J = tmp_J + tmp_J_priv - !$OMP END CRITICAL - - deallocate(tmp_O_priv, tmp_J_priv) - !$OMP END PARALLEL - - allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) - tmp_M = 0.d0 - tmp_S = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & - !$OMP SHARED(elec_beta_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) - - allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) - tmp_M_priv = 0.d0 - tmp_S_priv = 0.d0 - - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_M = tmp_M + tmp_M_priv - tmp_S = tmp_S + tmp_S_priv - !$OMP END CRITICAL - - deallocate(tmp_M_priv, tmp_S_priv) - !$OMP END PARALLEL - - allocate(tmp(n_points_final_grid)) - - do ipoint = 1, n_points_final_grid - - tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint) - - tmp(ipoint) = final_weight_at_r_vector(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) & - - 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) & - + tmp_J(ipoint,2) * tmp_M(ipoint,2) & - + tmp_J(ipoint,3) * tmp_M(ipoint,3))) - enddo - - diag_three_elem_hf = diag_three_elem_hf -2.d0 * (sum(tmp)) - - deallocate(tmp) - - else ! elec_alpha_num .neq. elec_beta_num - - allocate(tmp(elec_alpha_num)) - allocate(tmp_L(n_points_final_grid,3), tmp_R(n_points_final_grid,3)) - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) - - !$OMP DO - do j = 1, elec_beta_num - - tmp_L = 0.d0 - tmp_R = 0.d0 - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - tmp_L(ipoint,1) = tmp_L(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - tmp(j) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - enddo ! j - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) - - !$OMP DO - do j = elec_beta_num+1, elec_alpha_num - - tmp_L = 0.d0 - tmp_R = 0.d0 - do i = 1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - tmp(j) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + 0.5d0 * final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - enddo ! j - !$OMP END DO - !$OMP END PARALLEL - - diag_three_elem_hf = -2.d0 * sum(tmp) - - deallocate(tmp) - deallocate(tmp_L, tmp_R) - - ! --- - - allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) - tmp_O = 0.d0 - tmp_J = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) - - allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) - tmp_O_priv = 0.d0 - tmp_J_priv = 0.d0 - - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP DO - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + 0.5d0 * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_O = tmp_O + tmp_O_priv - tmp_J = tmp_J + tmp_J_priv - !$OMP END CRITICAL - - deallocate(tmp_O_priv, tmp_J_priv) - !$OMP END PARALLEL - - ! --- - - allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) - tmp_M = 0.d0 - tmp_S = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) - - allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) - tmp_M_priv = 0.d0 - tmp_S_priv = 0.d0 - - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP DO COLLAPSE(2) - do i = elec_beta_num+1, elec_alpha_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP DO COLLAPSE(2) - do i = elec_beta_num+1, elec_alpha_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_M = tmp_M + tmp_M_priv - tmp_S = tmp_S + tmp_S_priv - !$OMP END CRITICAL - - deallocate(tmp_M_priv, tmp_S_priv) - !$OMP END PARALLEL - - allocate(tmp(n_points_final_grid)) - - do ipoint = 1, n_points_final_grid - - tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint) - - tmp(ipoint) = final_weight_at_r_vector(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) & - - 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) & - + tmp_J(ipoint,2) * tmp_M(ipoint,2) & - + tmp_J(ipoint,3) * tmp_M(ipoint,3))) - enddo - - diag_three_elem_hf = diag_three_elem_hf - 2.d0 * (sum(tmp)) - - deallocate(tmp) - - endif ! alpha/beta condition - - endif ! three_body_h_tc - -END_PROVIDER - -! --- - From bd8d45b99b7505e00533bd9e97ad1b43453fb037 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 2 May 2024 17:18:45 +0200 Subject: [PATCH 024/131] FIXED BUG IN OPTIM J_BH --- plugins/local/bi_ort_ints/no_dressing.irp.f | 8 - .../non_h_ints_mu/jast_deriv_utils_vect.irp.f | 183 ++++++++---------- plugins/local/slater_tc/tc_hmat.irp.f | 1 + .../local/tc_bi_ortho/print_tc_energy.irp.f | 27 ++- plugins/local/tc_scf/tc_scf.irp.f | 31 ++- 5 files changed, 117 insertions(+), 133 deletions(-) diff --git a/plugins/local/bi_ort_ints/no_dressing.irp.f b/plugins/local/bi_ort_ints/no_dressing.irp.f index 721ac0f8..fd2c6285 100644 --- a/plugins/local/bi_ort_ints/no_dressing.irp.f +++ b/plugins/local/bi_ort_ints/no_dressing.irp.f @@ -336,9 +336,6 @@ BEGIN_PROVIDER [double precision, noL_0e] double precision, allocatable :: tmp_M(:,:), tmp_S(:), tmp_O(:), tmp_J(:,:) double precision, allocatable :: tmp_M_priv(:,:), tmp_S_priv(:), tmp_O_priv(:), tmp_J_priv(:,:) - call wall_time(t0) - print*, " Providing noL_0e ..." - if(elec_alpha_num .eq. elec_beta_num) then allocate(tmp(elec_beta_num)) @@ -713,11 +710,6 @@ BEGIN_PROVIDER [double precision, noL_0e] endif - call wall_time(t1) - print*, " Wall time for noL_0e (min) = ", (t1 - t0)/60.d0 - - print*, " noL_0e = ", noL_0e - END_PROVIDER ! --- diff --git a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f index 33563102..db06e835 100644 --- a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f +++ b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f @@ -4,7 +4,7 @@ subroutine get_grad1_u12_withsq_r1_seq(ipoint, n_grid2, resx, resy, resz, res) BEGIN_DOC - ! + ! ! grad_1 u(r1,r2) ! ! we use grid for r1 and extra_grid for r2 @@ -167,7 +167,7 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) integer :: jpoint integer :: i_nucl, p, mpA, npA, opA double precision :: r2(3) - double precision :: dx, dy, dz, r12, tmp, r12_inv + double precision :: dx, dy, dz, r12, tmp double precision :: mu_val, mu_tmp, mu_der(3) double precision :: rn(3), f1A, grad1_f1A(3), f2A, grad2_f2A(3), g12, grad1_g12(3) double precision :: tmp1, tmp2 @@ -181,7 +181,7 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) ! d/dy1 j(mu,r12) = 0.5 * [(1 - erf(mu * r12)) / r12] * (y1 - y2) ! d/dz1 j(mu,r12) = 0.5 * [(1 - erf(mu * r12)) / r12] * (z1 - z2) - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) @@ -191,19 +191,15 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) dy = r1(2) - r2(2) dz = r1(3) - r2(3) - r12 = dx * dx + dy * dy + dz * dz - - if(r12 .lt. 1d-20) then - gradx(jpoint) = 0.d0 - grady(jpoint) = 0.d0 - gradz(jpoint) = 0.d0 + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + if(r12 .lt. 1d-10) then + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 cycle endif - r12_inv = 1.d0/dsqrt(r12) - r12 = r12*r12_inv - - tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) * r12_inv + tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 gradx(jpoint) = tmp * dx grady(jpoint) = tmp * dy @@ -212,10 +208,10 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) elseif(j2e_type .eq. "Mur") then - ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) + ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) ! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) @@ -224,29 +220,23 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) dx = r1(1) - r2(1) dy = r1(2) - r2(2) dz = r1(3) - r2(3) + r12 = dsqrt(dx * dx + dy * dy + dz * dz) - r12 = dx * dx + dy * dy + dz * dz + call mu_r_val_and_grad(r1, r2, mu_val, mu_der) + mu_tmp = mu_val * r12 + tmp = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val) + gradx(jpoint) = tmp * mu_der(1) + grady(jpoint) = tmp * mu_der(2) + gradz(jpoint) = tmp * mu_der(3) - if(r12 .lt. 1d-20) then + if(r12 .lt. 1d-10) then gradx(jpoint) = 0.d0 grady(jpoint) = 0.d0 gradz(jpoint) = 0.d0 cycle endif - r12_inv = 1.d0/dsqrt(r12) - r12 = r12*r12_inv - - call mu_r_val_and_grad(r1, r2, mu_val, mu_der) - - mu_tmp = mu_val * r12 - tmp = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val) - - gradx(jpoint) = tmp * mu_der(1) - grady(jpoint) = tmp * mu_der(2) - gradz(jpoint) = tmp * mu_der(3) - - tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) * r12_inv + tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) / r12 gradx(jpoint) = gradx(jpoint) + tmp * dx grady(jpoint) = grady(jpoint) + tmp * dy @@ -264,7 +254,7 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) PROVIDE a_boys - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) @@ -273,17 +263,14 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) dx = r1(1) - r2(1) dy = r1(2) - r2(2) dz = r1(3) - r2(3) - r12 = dx * dx + dy * dy + dz * dz - + r12 = dsqrt(dx * dx + dy * dy + dz * dz) if(r12 .lt. 1d-10) then - gradx(jpoint) = 0.d0 - grady(jpoint) = 0.d0 - gradz(jpoint) = 0.d0 + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 cycle endif - r12 = dsqrt(r12) - tmp = 1.d0 + a_boys * r12 tmp = 0.5d0 / (r12 * tmp * tmp) @@ -294,13 +281,16 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) elseif(j2e_type .eq. "Boys_Handy") then - integer :: powmax - powmax = max(maxval(jBH_m),maxval(jBH_n)) - + integer :: powmax1, powmax, powmax2 double precision, allocatable :: f1A_power(:), f2A_power(:), double_p(:), g12_power(:) - allocate (f1A_power(-1:powmax), f2A_power(-1:powmax), g12_power(-1:powmax), double_p(0:powmax)) - do p=0,powmax + powmax1 = max(maxval(jBH_m), maxval(jBH_n)) + powmax2 = maxval(jBH_o) + powmax = max(powmax1, powmax2) + + allocate(f1A_power(-1:powmax), f2A_power(-1:powmax), g12_power(-1:powmax), double_p(0:powmax)) + + do p = 0, powmax double_p(p) = dble(p) enddo @@ -318,11 +308,10 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) r2(2) = final_grid_points_extra(2,jpoint) r2(3) = final_grid_points_extra(3,jpoint) - gradx(jpoint) = 0.d0 - grady(jpoint) = 0.d0 - gradz(jpoint) = 0.d0 - - do i_nucl = 1, nucl_num + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 + do i_nucl = 1, nucl_num rn(1) = nucl_coord(i_nucl,1) rn(2) = nucl_coord(i_nucl,2) @@ -332,23 +321,15 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) call jBH_elem_fct_grad(jBH_en(i_nucl), r2, rn, f2A, grad2_f2A) call jBH_elem_fct_grad(jBH_ee(i_nucl), r1, r2, g12, grad1_g12) - ! Compute powers of f1A and f2A - - do p = 1, maxval(jBH_m(:,i_nucl)) + do p = 1, powmax1 f1A_power(p) = f1A_power(p-1) * f1A - enddo - - do p = 1, maxval(jBH_n(:,i_nucl)) f2A_power(p) = f2A_power(p-1) * f2A enddo - - do p = 1, maxval(jBH_o(:,i_nucl)) + do p = 1, powmax2 g12_power(p) = g12_power(p-1) * g12 enddo - - do p = 1, jBH_size mpA = jBH_m(p,i_nucl) npA = jBH_n(p,i_nucl) @@ -358,27 +339,22 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) tmp = tmp * 0.5d0 endif -!TODO : Powers to optimize here - -! tmp1 = 0.d0 -! if(mpA .gt. 0) then -! tmp1 = tmp1 + dble(mpA) * f1A**(mpA-1) * f2A**npA -! endif -! if(npA .gt. 0) then -! tmp1 = tmp1 + dble(npA) * f1A**(npA-1) * f2A**mpA -! endif -! tmp1 = tmp1 * g12**(opA) -! -! tmp2 = 0.d0 -! if(opA .gt. 0) then -! tmp2 = tmp2 + dble(opA) * g12**(opA-1) * (f1A**(mpA) * f2A**(npA) + f1A**(npA) * f2A**(mpA)) -! endif - tmp1 = double_p(mpA) * f1A_power(mpA-1) * f2A_power(npA) + double_p(npA) * f1A_power(npA-1) * f2A_power(mpA) tmp1 = tmp1 * g12_power(opA) - tmp2 = double_p(opA) * g12_power(opA-1) * (f1A_power(mpA) * f2A_power(npA) + f1A_power(npA) * f2A_power(mpA)) + !tmp1 = 0.d0 + !if(mpA .gt. 0) then + ! tmp1 = tmp1 + dble(mpA) * f1A**dble(mpA-1) * f2A**dble(npA) + !endif + !if(npA .gt. 0) then + ! tmp1 = tmp1 + dble(npA) * f1A**dble(npA-1) * f2A**dble(mpA) + !endif + !tmp1 = tmp1 * g12**dble(opA) + !tmp2 = 0.d0 + !if(opA .gt. 0) then + ! tmp2 = tmp2 + dble(opA) * g12**dble(opA-1) * (f1A**dble(mpA) * f2A**dble(npA) + f1A**dble(npA) * f2A**dble(mpA)) + !endif gradx(jpoint) = gradx(jpoint) + tmp * (tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1)) grady(jpoint) = grady(jpoint) + tmp * (tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2)) @@ -418,10 +394,10 @@ subroutine grad1_jmu_r1_seq(mu, r1, n_grid2, gradx, grady, gradz) integer :: jpoint double precision :: r2(3) - double precision :: dx, dy, dz, r12, r12_inv, tmp + double precision :: dx, dy, dz, r12, tmp - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) @@ -431,19 +407,15 @@ subroutine grad1_jmu_r1_seq(mu, r1, n_grid2, gradx, grady, gradz) dy = r1(2) - r2(2) dz = r1(3) - r2(3) - r12 = dx * dx + dy * dy + dz * dz - - if(r12 .lt. 1d-20) then - gradx(jpoint) = 0.d0 - grady(jpoint) = 0.d0 - gradz(jpoint) = 0.d0 + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + if(r12 .lt. 1d-10) then + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 cycle endif - r12_inv = 1.d0 / dsqrt(r12) - r12 = r12 * r12_inv - - tmp = 0.5d0 * (1.d0 - derf(mu * r12)) * r12_inv + tmp = 0.5d0 * (1.d0 - derf(mu * r12)) / r12 gradx(jpoint) = tmp * dx grady(jpoint) = tmp * dy @@ -467,7 +439,7 @@ subroutine j12_r1_seq(r1, n_grid2, res) integer :: jpoint double precision :: r2(3) double precision :: dx, dy, dz - double precision :: mu_tmp, r12, mu_erf_inv + double precision :: mu_tmp, r12 PROVIDE final_grid_points_extra @@ -475,21 +447,20 @@ subroutine j12_r1_seq(r1, n_grid2, res) PROVIDE mu_erf - mu_erf_inv = 1.d0 / mu_erf - do jpoint = 1, n_points_extra_final_grid ! r2 - + do jpoint = 1, n_points_extra_final_grid ! r2 + r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) r2(3) = final_grid_points_extra(3,jpoint) - + dx = r1(1) - r2(1) dy = r1(2) - r2(2) dz = r1(3) - r2(3) r12 = dsqrt(dx * dx + dy * dy + dz * dz) mu_tmp = mu_erf * r12 - - res(jpoint) = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) * mu_erf_inv + + res(jpoint) = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf enddo elseif(j2e_type .eq. "Boys") then @@ -498,7 +469,7 @@ subroutine j12_r1_seq(r1, n_grid2, res) PROVIDE a_boys - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) @@ -540,19 +511,19 @@ subroutine jmu_r1_seq(mu, r1, n_grid2, res) tmp1 = inv_sq_pi_2 / mu - do jpoint = 1, n_points_extra_final_grid ! r2 - + do jpoint = 1, n_points_extra_final_grid ! r2 + r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) r2(3) = final_grid_points_extra(3,jpoint) - + dx = r1(1) - r2(1) dy = r1(2) - r2(2) dz = r1(3) - r2(3) r12 = dsqrt(dx * dx + dy * dy + dz * dz) tmp2 = mu * r12 - + res(jpoint) = 0.5d0 * r12 * (1.d0 - derf(tmp2)) - tmp1 * dexp(-tmp2*tmp2) enddo @@ -579,7 +550,7 @@ subroutine env_nucl_r1_seq(n_grid2, res) res = 1.d0 - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r(1) = final_grid_points_extra(1,jpoint) r(2) = final_grid_points_extra(2,jpoint) r(3) = final_grid_points_extra(3,jpoint) @@ -598,7 +569,7 @@ subroutine env_nucl_r1_seq(n_grid2, res) res = 1.d0 - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r(1) = final_grid_points_extra(1,jpoint) r(2) = final_grid_points_extra(2,jpoint) r(3) = final_grid_points_extra(3,jpoint) @@ -618,7 +589,7 @@ subroutine env_nucl_r1_seq(n_grid2, res) res = 1.d0 - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r(1) = final_grid_points_extra(1,jpoint) r(2) = final_grid_points_extra(2,jpoint) r(3) = final_grid_points_extra(3,jpoint) @@ -636,7 +607,7 @@ subroutine env_nucl_r1_seq(n_grid2, res) res = 1.d0 - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r(1) = final_grid_points_extra(1,jpoint) r(2) = final_grid_points_extra(2,jpoint) r(3) = final_grid_points_extra(3,jpoint) @@ -666,7 +637,7 @@ end subroutine get_grad1_u12_2e_r1_seq(ipoint, n_grid2, resx, resy, resz) BEGIN_DOC - ! + ! ! grad_1 u_2e(r1,r2) ! ! we use grid for r1 and extra_grid for r2 @@ -786,7 +757,7 @@ end subroutine get_u12_2e_r1_seq(ipoint, n_grid2, res) BEGIN_DOC - ! + ! ! u_2e(r1,r2) ! ! we use grid for r1 and extra_grid for r2 @@ -909,7 +880,7 @@ subroutine jBH_elem_fct_grad(alpha, r1, r2, fct, grad1_fct) endif return -end +end ! --- diff --git a/plugins/local/slater_tc/tc_hmat.irp.f b/plugins/local/slater_tc/tc_hmat.irp.f index abec410d..cc780364 100644 --- a/plugins/local/slater_tc/tc_hmat.irp.f +++ b/plugins/local/slater_tc/tc_hmat.irp.f @@ -22,6 +22,7 @@ BEGIN_PROVIDER [double precision, htilde_matrix_elmt_bi_ortho, (N_det,N_det)] if(noL_standard) then PROVIDE noL_0e + print*, "noL_0e =", noL_0e PROVIDE noL_1e PROVIDE noL_2e endif diff --git a/plugins/local/tc_bi_ortho/print_tc_energy.irp.f b/plugins/local/tc_bi_ortho/print_tc_energy.irp.f index 1fa0c6d9..979d792b 100644 --- a/plugins/local/tc_bi_ortho/print_tc_energy.irp.f +++ b/plugins/local/tc_bi_ortho/print_tc_energy.irp.f @@ -9,15 +9,6 @@ program print_tc_energy read_wf = .True. touch read_wf - PROVIDE j2e_type - PROVIDE j1e_type - PROVIDE env_type - - print *, ' j2e_type = ', j2e_type - print *, ' j1e_type = ', j1e_type - print *, ' env_type = ', env_type - - my_grid_becke = .True. PROVIDE tc_grid1_a tc_grid1_r my_n_pt_r_grid = tc_grid1_r @@ -38,6 +29,24 @@ program print_tc_energy call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') endif + call main() + +end + +! --- + +subroutine main() + + implicit none + + PROVIDE j2e_type + PROVIDE j1e_type + PROVIDE env_type + + print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type + call write_tc_energy() end diff --git a/plugins/local/tc_scf/tc_scf.irp.f b/plugins/local/tc_scf/tc_scf.irp.f index f099b90e..83da03ec 100644 --- a/plugins/local/tc_scf/tc_scf.irp.f +++ b/plugins/local/tc_scf/tc_scf.irp.f @@ -7,15 +7,6 @@ program tc_scf END_DOC implicit none - integer :: i - logical :: good_angles - - print *, ' TC-SCF with:' - print *, ' j2e_type = ', j2e_type - print *, ' j1e_type = ', j1e_type - print *, ' env_type = ', env_type - - write(json_unit,json_array_open_fmt) 'tc-scf' my_grid_becke = .True. PROVIDE tc_grid1_a tc_grid1_r @@ -37,6 +28,26 @@ program tc_scf call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') endif + call main() + +end + +! --- + +subroutine main() + + implicit none + + integer :: i + logical :: good_angles + + print *, ' TC-SCF with:' + print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type + + write(json_unit,json_array_open_fmt) 'tc-scf' + call rh_tcscf_diis() PROVIDE Fock_matrix_tc_diag_mo_tot @@ -84,7 +95,7 @@ subroutine create_guess() SOFT_TOUCH mo_label endif -end subroutine create_guess +end ! --- From 13785b267c36319925ffa72ebe42399fa932ffae Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 3 May 2024 11:34:30 +0200 Subject: [PATCH 025/131] fixed a bug in src/scf_utils/roothaan_hall_scf.irp.f --- .../extra_grid_vector.irp.f | 20 +++++++++---------- .../grid_becke_vector.irp.f | 20 +++++++++---------- src/scf_utils/roothaan_hall_scf.irp.f | 2 +- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/becke_numerical_grid/extra_grid_vector.irp.f b/src/becke_numerical_grid/extra_grid_vector.irp.f index 16a52dc6..44fc4435 100644 --- a/src/becke_numerical_grid/extra_grid_vector.irp.f +++ b/src/becke_numerical_grid/extra_grid_vector.irp.f @@ -71,16 +71,16 @@ END_PROVIDER index_final_points_extra(3,i_count) = j index_final_points_extra_reverse(k,i,j) = i_count - if(final_weight_at_r_vector_extra(i_count) .lt. 0.d0) then - print *, ' !!! WARNING !!!' - print *, ' negative weight !!!!' - print *, i_count, final_weight_at_r_vector_extra(i_count) - if(dabs(final_weight_at_r_vector_extra(i_count)) .lt. 1d-10) then - final_weight_at_r_vector_extra(i_count) = 0.d0 - else - stop - endif - endif +! if(final_weight_at_r_vector_extra(i_count) .lt. 0.d0) then +! print *, ' !!! WARNING !!!' +! print *, ' negative weight !!!!' +! print *, i_count, final_weight_at_r_vector_extra(i_count) +! if(dabs(final_weight_at_r_vector_extra(i_count)) .lt. 1d-10) then +! final_weight_at_r_vector_extra(i_count) = 0.d0 +! else +! stop +! endif +! endif enddo enddo enddo diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index c35918c3..7097dbb3 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -68,16 +68,16 @@ END_PROVIDER index_final_points(3,i_count) = j index_final_points_reverse(k,i,j) = i_count - if(final_weight_at_r_vector(i_count) .lt. 0.d0) then - print *, ' !!! WARNING !!!' - print *, ' negative weight !!!!' - print *, i_count, final_weight_at_r_vector(i_count) - if(dabs(final_weight_at_r_vector(i_count)) .lt. 1d-10) then - final_weight_at_r_vector(i_count) = 0.d0 - else - stop - endif - endif +! if(final_weight_at_r_vector(i_count) .lt. 0.d0) then +! print *, ' !!! WARNING !!!' +! print *, ' negative weight !!!!' +! print *, i_count, final_weight_at_r_vector(i_count) +! if(dabs(final_weight_at_r_vector(i_count)) .lt. 1d-10) then +! final_weight_at_r_vector(i_count) = 0.d0 +! else +! stop +! endif +! endif enddo enddo enddo diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index 3f5c8549..e0fe5319 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -217,7 +217,7 @@ END_DOC do while (i Date: Mon, 6 May 2024 17:47:48 +0200 Subject: [PATCH 026/131] updated get_fci_tc_conv.sh --- scripts/get_fci_tc_conv.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/get_fci_tc_conv.sh b/scripts/get_fci_tc_conv.sh index 643f3ac0..f0c99baf 100755 --- a/scripts/get_fci_tc_conv.sh +++ b/scripts/get_fci_tc_conv.sh @@ -1,2 +1,2 @@ file=$1 -grep "Ndet,E,E+PT2,E+RPT2,|PT2|=" $file | cut -d "=" -f 2 > ${file}.conv_fci_tc +grep "Ndet,E,E+PT2,pt2_minus,pt2_plus,pt2_abs=" $file | cut -d "=" -f 2 > ${file}.conv_fci_tc From b14325fef482bdf6cb471b40edf8fa46f2aeac65 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 6 May 2024 18:21:58 +0200 Subject: [PATCH 027/131] Introducing qmckl --- plugins/local/non_h_ints_mu/NEED | 1 + plugins/local/non_h_ints_mu/deb_aos.irp.f | 49 ++++++++-- plugins/local/non_h_ints_mu/qmckl.irp.f | 104 ++++++++++++++++++++++ 3 files changed, 148 insertions(+), 6 deletions(-) diff --git a/plugins/local/non_h_ints_mu/NEED b/plugins/local/non_h_ints_mu/NEED index 48c1c24b..5ca1d543 100644 --- a/plugins/local/non_h_ints_mu/NEED +++ b/plugins/local/non_h_ints_mu/NEED @@ -3,3 +3,4 @@ hamiltonian jastrow ao_tc_eff_map bi_ortho_mos +trexio diff --git a/plugins/local/non_h_ints_mu/deb_aos.irp.f b/plugins/local/non_h_ints_mu/deb_aos.irp.f index c9bc9c9a..86d011fb 100644 --- a/plugins/local/non_h_ints_mu/deb_aos.irp.f +++ b/plugins/local/non_h_ints_mu/deb_aos.irp.f @@ -34,21 +34,58 @@ subroutine print_aos() PROVIDE final_grid_points aos_in_r_array aos_grad_in_r_array aos_lapl_in_r_array - do ipoint = 1, n_points_final_grid - r(:) = final_grid_points(:,ipoint) - print*, r - enddo +! do ipoint = 1, n_points_final_grid +! r(:) = final_grid_points(:,ipoint) +! print*, r +! enddo +double precision :: accu_vgl(5) +double precision :: accu_vgl_nrm(5) do ipoint = 1, n_points_final_grid - r(:) = final_grid_points(:,ipoint) do i = 1, ao_num ao_val = aos_in_r_array (i,ipoint) ao_der(:) = aos_grad_in_r_array(i,ipoint,:) ao_lap = aos_lapl_in_r_array(1,i,ipoint) + aos_lapl_in_r_array(2,i,ipoint) + aos_lapl_in_r_array(3,i,ipoint) - write(*, '(5(f15.7, 3X))') ao_val, ao_der, ao_lap + write(111, '(5(f15.7, 3X))') ao_val, ao_der, ao_lap enddo enddo + do ipoint = 1, n_points_final_grid + do i = 1, ao_num + ao_val = aos_in_r_array_qmckl (i,ipoint) + ao_der(:) = aos_grad_in_r_array_qmckl(i,ipoint,:) + ao_lap = aos_lapl_in_r_array_qmckl(i,ipoint) + write(222, '(5(f15.7, 3X))') ao_val, ao_der, ao_lap + enddo + enddo + + accu_vgl = 0.d0 + accu_vgl_nrm = 0.d0 + do ipoint = 1, n_points_final_grid + do i = 1, ao_num + ao_val = aos_in_r_array (i,ipoint) + ao_der(:) = aos_grad_in_r_array(i,ipoint,:) + ao_lap = aos_lapl_in_r_array(1,i,ipoint) + aos_lapl_in_r_array(2,i,ipoint) + aos_lapl_in_r_array(3,i,ipoint) + accu_vgl_nrm(1) += dabs(ao_val) + accu_vgl_nrm(2) += dabs(ao_der(1)) + accu_vgl_nrm(3) += dabs(ao_der(2)) + accu_vgl_nrm(4) += dabs(ao_der(3)) + accu_vgl_nrm(5) += dabs(ao_lap) + + ao_val -= aos_in_r_array_qmckl (i,ipoint) + ao_der(:) -= aos_grad_in_r_array_qmckl(i,ipoint,:) + ao_lap -= aos_lapl_in_r_array_qmckl(i,ipoint) + accu_vgl(1) += dabs(ao_val) + accu_vgl(2) += dabs(ao_der(1)) + accu_vgl(3) += dabs(ao_der(2)) + accu_vgl(4) += dabs(ao_der(3)) + accu_vgl(5) += dabs(ao_lap) + enddo + + enddo + accu_vgl(:) *= 1.d0 / accu_vgl_nrm(:) + print *, accu_vgl + return end diff --git a/plugins/local/non_h_ints_mu/qmckl.irp.f b/plugins/local/non_h_ints_mu/qmckl.irp.f index 1df80457..4d419e24 100644 --- a/plugins/local/non_h_ints_mu/qmckl.irp.f +++ b/plugins/local/non_h_ints_mu/qmckl.irp.f @@ -75,3 +75,107 @@ BEGIN_PROVIDER [ integer*8, qmckl_ctx_jastrow ] endif END_PROVIDER + + + BEGIN_PROVIDER [ double precision, aos_in_r_array_qmckl, (ao_num,n_points_final_grid)] +&BEGIN_PROVIDER [ double precision, aos_grad_in_r_array_qmckl, (ao_num,n_points_final_grid,3)] +&BEGIN_PROVIDER [ double precision, aos_lapl_in_r_array_qmckl, (ao_num, n_points_final_grid)] + implicit none + BEGIN_DOC + ! AOS computed with qmckl + END_DOC + use qmckl + + integer*8 :: qmckl_ctx + integer(qmckl_exit_code) :: rc + + qmckl_ctx = qmckl_context_create() + + rc = qmckl_trexio_read(qmckl_ctx, trexio_file, 1_8*len(trim(trexio_filename))) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in read_trexio' + rc = qmckl_check(qmckl_ctx, rc) + stop -1 + endif + + rc = qmckl_set_point(qmckl_ctx, 'N', n_points_final_grid*1_8, final_grid_points, n_points_final_grid*3_8) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in set_electron_point' + rc = qmckl_check(qmckl_ctx, rc) + stop -1 + endif + + double precision, allocatable :: vgl(:,:,:) + allocate( vgl(ao_num,5,n_points_final_grid)) + rc = qmckl_get_ao_basis_ao_vgl_inplace(qmckl_ctx, vgl, n_points_final_grid*ao_num*5_8) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in get_ao_vgl' + rc = qmckl_check(qmckl_ctx, rc) + stop -1 + endif + + integer :: i,k + do k=1,n_points_final_grid + do i=1,ao_num + aos_in_r_array_qmckl(i,k) = vgl(i,1,k) + aos_grad_in_r_array_qmckl(i,k,1) = vgl(i,2,k) + aos_grad_in_r_array_qmckl(i,k,2) = vgl(i,3,k) + aos_grad_in_r_array_qmckl(i,k,3) = vgl(i,4,k) + aos_lapl_in_r_array_qmckl(i,k) = vgl(i,5,k) + enddo + enddo + +END_PROVIDER + + + BEGIN_PROVIDER [ double precision, mos_in_r_array_qmckl, (mo_num,n_points_final_grid)] +&BEGIN_PROVIDER [ double precision, mos_grad_in_r_array_qmckl, (mo_num,n_points_final_grid,3)] +&BEGIN_PROVIDER [ double precision, mos_lapl_in_r_array_qmckl, (mo_num, n_points_final_grid)] + implicit none + BEGIN_DOC + ! moS computed with qmckl + END_DOC + use qmckl + + integer*8 :: qmckl_ctx + integer(qmckl_exit_code) :: rc + + qmckl_ctx = qmckl_context_create() + + rc = qmckl_trexio_read(qmckl_ctx, trexio_file, 1_8*len(trim(trexio_filename))) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in read_trexio' + rc = qmckl_check(qmckl_ctx, rc) + stop -1 + endif + + rc = qmckl_set_point(qmckl_ctx, 'N', n_points_final_grid*1_8, final_grid_points, n_points_final_grid*3_8) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in set_electron_point' + rc = qmckl_check(qmckl_ctx, rc) + stop -1 + endif + + double precision, allocatable :: vgl(:,:,:) + allocate( vgl(mo_num,5,n_points_final_grid)) + rc = qmckl_get_mo_basis_mo_vgl_inplace(qmckl_ctx, vgl, n_points_final_grid*mo_num*5_8) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in get_mo_vgl' + rc = qmckl_check(qmckl_ctx, rc) + stop -1 + endif + + integer :: i,k + do k=1,n_points_final_grid + do i=1,mo_num + mos_in_r_array_qmckl(i,k) = vgl(i,1,k) + mos_grad_in_r_array_qmckl(i,k,1) = vgl(i,2,k) + mos_grad_in_r_array_qmckl(i,k,2) = vgl(i,3,k) + mos_grad_in_r_array_qmckl(i,k,3) = vgl(i,4,k) + mos_lapl_in_r_array_qmckl(i,k) = vgl(i,5,k) + enddo + enddo + +END_PROVIDER + + From 109a956f0d947665af7fbd3ed02d3569c49e592e Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 6 May 2024 18:30:05 +0200 Subject: [PATCH 028/131] does not compile but working on it --- plugins/local/slater_tc/h_mat_triple.irp.f | 391 ------------------ .../local/slater_tc/slater_tc_opt_diag.irp.f | 311 +++++++++++++- .../slater_tc/symmetrized_3_e_int_prov.irp.f | 140 ------- plugins/local/slater_tc_no_opt/.gitignore | 59 +++ plugins/local/slater_tc_no_opt/NEED | 8 + plugins/local/slater_tc_no_opt/README.rst | 4 + .../h_biortho.irp.f | 0 .../local/slater_tc_no_opt/h_mat_triple.irp.f | 193 +++++++++ .../h_tc_bi_ortho_psi.irp.f | 0 .../slater_tc_3e_slow.irp.f | 2 +- .../slater_tc_no_opt.irp.f} | 2 +- .../slater_tc_slow.irp.f | 73 +--- src/determinants/slater_rules_general.irp.f | 192 +++++++++ 13 files changed, 769 insertions(+), 606 deletions(-) delete mode 100644 plugins/local/slater_tc/h_mat_triple.irp.f delete mode 100644 plugins/local/slater_tc/symmetrized_3_e_int_prov.irp.f create mode 100644 plugins/local/slater_tc_no_opt/.gitignore create mode 100644 plugins/local/slater_tc_no_opt/NEED create mode 100644 plugins/local/slater_tc_no_opt/README.rst rename plugins/local/{slater_tc => slater_tc_no_opt}/h_biortho.irp.f (100%) create mode 100644 plugins/local/slater_tc_no_opt/h_mat_triple.irp.f rename plugins/local/{slater_tc => slater_tc_no_opt}/h_tc_bi_ortho_psi.irp.f (100%) rename plugins/local/{slater_tc => slater_tc_no_opt}/slater_tc_3e_slow.irp.f (99%) rename plugins/local/{slater_tc/slater_tc.irp.f => slater_tc_no_opt/slater_tc_no_opt.irp.f} (82%) rename plugins/local/{slater_tc => slater_tc_no_opt}/slater_tc_slow.irp.f (80%) create mode 100644 src/determinants/slater_rules_general.irp.f diff --git a/plugins/local/slater_tc/h_mat_triple.irp.f b/plugins/local/slater_tc/h_mat_triple.irp.f deleted file mode 100644 index 6f5697a2..00000000 --- a/plugins/local/slater_tc/h_mat_triple.irp.f +++ /dev/null @@ -1,391 +0,0 @@ -subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) - use bitmasks - BEGIN_DOC -! returns the array, for each spin, of holes/particles between key_i and key_j -! -! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> - END_DOC - include 'utils/constants.include.F' - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) - double precision, intent(out) :: phase - integer :: ispin,k,i,pos - integer(bit_kind) :: key_hole, key_particle - integer(bit_kind) :: xorvec(N_int_max,2) - holes_array = -1 - particles_array = -1 - degree_array = 0 - do i = 1, N_int - xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) - xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) - degree_array(1) += popcnt(xorvec(i,1)) - degree_array(2) += popcnt(xorvec(i,2)) - enddo - degree_array(1) = shiftr(degree_array(1),1) - degree_array(2) = shiftr(degree_array(2),1) - - do ispin = 1, 2 - k = 1 - !!! GETTING THE HOLES - do i = 1, N_int - key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) - do while(key_hole .ne.0_bit_kind) - pos = trailz(key_hole) - holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_hole = ibclr(key_hole,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_excitation_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo - do ispin = 1, 2 - k = 1 - !!! GETTING THE PARTICLES - do i = 1, N_int - key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) - do while(key_particle .ne.0_bit_kind) - pos = trailz(key_particle) - particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_particle = ibclr(key_particle,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_excitation_general ' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo - integer :: h,p, i_ok - integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) - integer :: exc(0:2,2,2) - double precision :: phase_tmp - allocate(det_i(Nint,2),det_ip(N_int,2)) - det_i = key_i - phase = 1.d0 - do ispin = 1, 2 - do i = 1, degree_array(ispin) - h = holes_array(i,ispin) - p = particles_array(i,ispin) - det_ip = det_i - call do_single_excitation(det_ip,h,p,ispin,i_ok) - if(i_ok == -1)then - print*,'excitation was not possible ' - stop - endif - call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) - phase *= phase_tmp - det_i = det_ip - enddo - enddo - -end - -subroutine get_holes_general(key_i, key_j,Nint, holes_array) - use bitmasks - BEGIN_DOC -! returns the array, per spin, of holes between key_i and key_j -! -! with the following convention: a_{hole}|key_i> --> |key_j> - END_DOC - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: holes_array(100,2) - integer(bit_kind) :: key_hole - integer :: ispin,k,i,pos - holes_array = -1 - do ispin = 1, 2 - k = 1 - do i = 1, N_int - key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) - do while(key_hole .ne.0_bit_kind) - pos = trailz(key_hole) - holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_hole = ibclr(key_hole,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_holes_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo -end - -subroutine get_particles_general(key_i, key_j,Nint,particles_array) - use bitmasks - BEGIN_DOC -! returns the array, per spin, of particles between key_i and key_j -! -! with the following convention: a^dagger_{particle}|key_i> --> |key_j> - END_DOC - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: particles_array(100,2) - integer(bit_kind) :: key_particle - integer :: ispin,k,i,pos - particles_array = -1 - do ispin = 1, 2 - k = 1 - do i = 1, N_int - key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) - do while(key_particle .ne.0_bit_kind) - pos = trailz(key_particle) - particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_particle = ibclr(key_particle,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_holes_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'Those are the two determinants' - call debug_det(key_i, N_int) - call debug_det(key_j, N_int) - print*,'stoping ...' - stop - endif - enddo - enddo - enddo -end - -subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) - implicit none - integer, intent(in) :: degree(2), Nint - integer(bit_kind), intent(in) :: key_i(Nint,2) - integer, intent(in) :: holes_array(100,2),particles_array(100,2) - double precision, intent(out) :: phase - integer :: i,ispin,h,p, i_ok - integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) - integer :: exc(0:2,2,2) - double precision :: phase_tmp - allocate(det_i(Nint,2),det_ip(N_int,2)) - det_i = key_i - phase = 1.d0 - do ispin = 1, 2 - do i = 1, degree(ispin) - h = holes_array(i,ispin) - p = particles_array(i,ispin) - det_ip = det_i - call do_single_excitation(det_ip,h,p,ispin,i_ok) - if(i_ok == -1)then - print*,'excitation was not possible ' - stop - endif - call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) - phase *= phase_tmp - det_i = det_ip - enddo - enddo - -end - -subroutine H_tc_s2_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) - BEGIN_DOC - ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS - ! - ! Assumes that the determinants are in psi_det - ! - ! istart, iend, ishift, istep are used in ZMQ parallelization. - END_DOC - - use bitmasks - implicit none - - integer, intent(in) :: N_st,sze - double precision, intent(in) :: u_0(sze,N_st) - double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) - call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) - integer :: i,j,degree,ist - double precision :: hmono, htwoe, hthree, htot - do i = 1, N_det - do j = 1, N_det - call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) - if(degree .ne. 3)cycle - call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) - do ist = 1, N_st - v_0(i,ist) += htot * u_0(j,ist) - enddo - enddo - enddo -end - -subroutine H_tc_s2_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) - BEGIN_DOC - ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS - ! - ! Assumes that the determinants are in psi_det - ! - ! istart, iend, ishift, istep are used in ZMQ parallelization. - END_DOC - - use bitmasks - implicit none - - integer, intent(in) :: N_st,sze - double precision, intent(in) :: u_0(sze,N_st) - double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) - call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) - integer :: i,j,degree,ist - double precision :: hmono, htwoe, hthree, htot - !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & - !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & - !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) - do i = 1, N_det - do j = 1, N_det - call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) - if(degree .ne. 3)cycle - call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) - do ist = 1, N_st - v_0(i,ist) += htot * u_0(j,ist) - enddo - enddo - enddo - !$OMP END PARALLEL DO -end - -! --- - -subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) - BEGIN_DOC - ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS - ! - ! Assumes that the determinants are in psi_det - ! - ! istart, iend, ishift, istep are used in ZMQ parallelization. - END_DOC - - use bitmasks - implicit none - - integer, intent(in) :: N_st,sze - double precision, intent(in) :: u_0(sze,N_st) - double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) - call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) - integer :: i,j,degree,ist - double precision :: hmono, htwoe, hthree, htot - do i = 1, N_det - do j = 1, N_det - call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) - if(degree .ne. 3)cycle - call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) - do ist = 1, N_st - v_0(i,ist) += htot * u_0(j,ist) - enddo - enddo - enddo -end - -subroutine H_tc_s2_dagger_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) - BEGIN_DOC - ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS - ! - ! Assumes that the determinants are in psi_det - ! - ! istart, iend, ishift, istep are used in ZMQ parallelization. - END_DOC - - use bitmasks - implicit none - - integer, intent(in) :: N_st,sze - double precision, intent(in) :: u_0(sze,N_st) - double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) - call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) - integer :: i,j,degree,ist - double precision :: hmono, htwoe, hthree, htot - !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & - !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & - !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) - do i = 1, N_det - do j = 1, N_det - call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) - if(degree .ne. 3)cycle - call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) - do ist = 1, N_st - v_0(i,ist) += htot * u_0(j,ist) - enddo - enddo - enddo - !$OMP END PARALLEL DO -end - -! --- -subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) - use bitmasks - BEGIN_DOC -! for triple excitation -!! -!! WARNING !! -! -! Genuine triple excitations of the same spin are not yet implemented - END_DOC - implicit none - integer(bit_kind), intent(in) :: key_j(N_int,2),key_i(N_int,2) - integer, intent(in) :: Nint - double precision, intent(out) :: hmono, htwoe, hthree, htot - integer :: degree - integer :: h1, p1, h2, p2, s1, s2, h3, p3, s3 - integer :: holes_array(100,2),particles_array(100,2),degree_array(2) - double precision :: phase,sym_3_e_int_from_6_idx_tensor - - hmono = 0.d0 - htwoe = 0.d0 - hthree = 0.d0 - htot = 0.d0 - call get_excitation_general(key_j, key_i, Nint,degree_array,holes_array, particles_array,phase) - degree = degree_array(1) + degree_array(2) - if(degree .ne. 3)return - if(degree_array(1)==3.or.degree_array(2)==3)then - if(degree_array(1) == 3)then - h1 = holes_array(1,1) - h2 = holes_array(2,1) - h3 = holes_array(3,1) - p1 = particles_array(1,1) - p2 = particles_array(2,1) - p3 = particles_array(3,1) - else - h1 = holes_array(1,2) - h2 = holes_array(2,2) - h3 = holes_array(3,2) - p1 = particles_array(1,2) - p2 = particles_array(2,2) - p3 = particles_array(3,2) - endif - hthree = sym_3_e_int_from_6_idx_tensor(p3, p2, p1, h3, h2, h1) - else - if(degree_array(1) == 2.and.degree_array(2) == 1)then ! double alpha + single beta - h1 = holes_array(1,1) - h2 = holes_array(2,1) - h3 = holes_array(1,2) - p1 = particles_array(1,1) - p2 = particles_array(2,1) - p3 = particles_array(1,2) - else if(degree_array(2) == 2 .and. degree_array(1) == 1)then ! double beta + single alpha - h1 = holes_array(1,2) - h2 = holes_array(2,2) - h3 = holes_array(1,1) - p1 = particles_array(1,2) - p2 = particles_array(2,2) - p3 = particles_array(1,1) - else - print*,'PB !!' - stop - endif - hthree = three_body_ints_bi_ort(p3,p2,p1,h3,h2,h1) - three_body_ints_bi_ort(p3,p2,p1,h3,h1,h2) - endif - hthree *= phase - htot = hthree - end - diff --git a/plugins/local/slater_tc/slater_tc_opt_diag.irp.f b/plugins/local/slater_tc/slater_tc_opt_diag.irp.f index 78f9dc66..3c5a5d12 100644 --- a/plugins/local/slater_tc/slater_tc_opt_diag.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt_diag.irp.f @@ -19,13 +19,13 @@ PROVIDE HF_bitmask PROVIDE mo_l_coef mo_r_coef - call diag_htilde_mu_mat_bi_ortho_slow(N_int, HF_bitmask, hmono, htwoe, htot) + call diag_htc_bi_orth_2e_brute(N_int, HF_bitmask, hmono, htwoe, htot) ref_tc_energy_1e = hmono ref_tc_energy_2e = htwoe if(three_body_h_tc) then - call diag_htilde_three_body_ints_bi_ort_slow(N_int, HF_bitmask, hthree) + call diag_htc_bi_orth_3e_brute(N_int, HF_bitmask, hthree) ref_tc_energy_3e = hthree else ref_tc_energy_3e = 0.d0 @@ -524,3 +524,310 @@ end ! --- +subroutine diag_htc_bi_orth_2e_brute(Nint, key_i, hmono, htwoe, htot) + + BEGIN_DOC + ! + ! diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS + ! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + double precision, intent(out) :: hmono,htwoe,htot + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk + double precision :: get_mo_two_e_integral_tc_int + integer(bit_kind) :: key_i_core(Nint,2) + + PROVIDE mo_bi_ortho_tc_two_e + + hmono = 0.d0 + htwoe = 0.d0 + htot = 0.d0 + + call bitstring_to_list_ab(key_i, occ, Ne, Nint) + + do ispin = 1, 2 + do i = 1, Ne(ispin) + ii = occ(i,ispin) + hmono += mo_bi_ortho_tc_one_e(ii,ii) + enddo + enddo + + ! alpha/beta two-body + ispin = 1 + jspin = 2 + do i = 1, Ne(ispin) ! electron 1 (so it can be associated to mu(r1)) + ii = occ(i,ispin) + do j = 1, Ne(jspin) ! electron 2 + jj = occ(j,jspin) + htwoe += mo_bi_ortho_tc_two_e(jj,ii,jj,ii) + enddo + enddo + + ! alpha/alpha two-body + do i = 1, Ne(ispin) + ii = occ(i,ispin) + do j = i+1, Ne(ispin) + jj = occ(j,ispin) + htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) + enddo + enddo + + ! beta/beta two-body + do i = 1, Ne(jspin) + ii = occ(i,jspin) + do j = i+1, Ne(jspin) + jj = occ(j,jspin) + htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) + enddo + enddo + + htot = hmono + htwoe + +end + +! --- + +subroutine diag_htc_bi_orth_3e_brute(Nint, key_i, hthree) + + BEGIN_DOC + ! diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + double precision, intent(out) :: hthree + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2),i,j,ii,jj,ispin,jspin,m,mm + integer(bit_kind) :: key_i_core(Nint,2) + double precision :: direct_int, exchange_int, ref + double precision, external :: sym_3_e_int_from_6_idx_tensor + double precision, external :: three_e_diag_parrallel_spin + + PROVIDE mo_l_coef mo_r_coef + + if(core_tc_op) then + do i = 1, Nint + key_i_core(i,1) = xor(key_i(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(key_i(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, Nint) + else + call bitstring_to_list_ab(key_i, occ, Ne, Nint) + endif + + hthree = 0.d0 + + if((Ne(1)+Ne(2)) .ge. 3) then + + ! alpha/alpha/beta three-body + do i = 1, Ne(1) + ii = occ(i,1) + do j = i+1, Ne(1) + jj = occ(j,1) + do m = 1, Ne(2) + mm = occ(m,2) + !direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) !uses the 6-idx tensor + !exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) !uses the 6-idx tensor + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) !uses 3-idx tensor + exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) !uses 3-idx tensor + hthree += direct_int - exchange_int + enddo + enddo + enddo + + ! beta/beta/alpha three-body + do i = 1, Ne(2) + ii = occ(i,2) + do j = i+1, Ne(2) + jj = occ(j,2) + do m = 1, Ne(1) + mm = occ(m,1) + !direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) !uses the 6-idx tensor + !exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) !uses the 6-idx tensor + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) + exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) + hthree += direct_int - exchange_int + enddo + enddo + enddo + + ! alpha/alpha/alpha three-body + do i = 1, Ne(1) + ii = occ(i,1) ! 1 + do j = i+1, Ne(1) + jj = occ(j,1) ! 2 + do m = j+1, Ne(1) + mm = occ(m,1) ! 3 + !hthree += sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) !uses the 6 idx tensor + hthree += three_e_diag_parrallel_spin(mm,jj,ii) !uses only 3-idx tensors + enddo + enddo + enddo + + ! beta/beta/beta three-body + do i = 1, Ne(2) + ii = occ(i,2) ! 1 + do j = i+1, Ne(2) + jj = occ(j,2) ! 2 + do m = j+1, Ne(2) + mm = occ(m,2) ! 3 + !hthree += sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) !uses the 6 idx tensor + hthree += three_e_diag_parrallel_spin(mm,jj,ii) !uses only 3-idx tensors + enddo + enddo + enddo + + endif + +end + + + +BEGIN_PROVIDER [ double precision, three_e_diag_parrallel_spin_prov, (mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator ON A BI ORTHONORMAL BASIS + ! + ! three_e_diag_parrallel_spin_prov(m,j,i) = All combinations of the form for same spin matrix elements + ! + ! notice the -1 sign: in this way three_e_diag_parrallel_spin_prov can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, m + double precision :: integral, wall1, wall0, three_e_diag_parrallel_spin + + three_e_diag_parrallel_spin_prov = 0.d0 + print *, ' Providing the three_e_diag_parrallel_spin_prov ...' + + integral = three_e_diag_parrallel_spin(1,1,1) ! to provide all stuffs + call wall_time(wall0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,m,integral) & + !$OMP SHARED (mo_num,three_e_diag_parrallel_spin_prov) + !$OMP DO SCHEDULE (dynamic) + do i = 1, mo_num + do j = 1, mo_num + do m = j, mo_num + three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin(m,j,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + do i = 1, mo_num + do j = 1, mo_num + do m = 1, j + three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin_prov(j,m,i) + enddo + enddo + enddo + + call wall_time(wall1) + print *, ' wall time for three_e_diag_parrallel_spin_prov', wall1 - wall0 + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, three_e_single_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_single_parrallel_spin_prov(m,j,k,i) = All combination of for same spin matrix elements + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0, three_e_single_parrallel_spin + + three_e_single_parrallel_spin_prov = 0.d0 + print *, ' Providing the three_e_single_parrallel_spin_prov ...' + + integral = three_e_single_parrallel_spin(1,1,1,1) + call wall_time(wall0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_single_parrallel_spin_prov) + !$OMP DO SCHEDULE (dynamic) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_single_parrallel_spin_prov(m,j,k,i) = three_e_single_parrallel_spin(m,j,k,i) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_single_parrallel_spin_prov', wall1 - wall0 + +END_PROVIDER + + +! --- + +BEGIN_PROVIDER [ double precision, three_e_double_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_double_parrallel_spin_prov(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0, three_e_double_parrallel_spin + + three_e_double_parrallel_spin_prov = 0.d0 + print *, ' Providing the three_e_double_parrallel_spin_prov ...' + call wall_time(wall0) + + integral = three_e_double_parrallel_spin(1,1,1,1,1) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_double_parrallel_spin_prov) + !$OMP DO SCHEDULE (dynamic) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + three_e_double_parrallel_spin_prov(m,l,j,k,i) = three_e_double_parrallel_spin(m,l,j,k,i) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_double_parrallel_spin_prov', wall1 - wall0 + +END_PROVIDER + diff --git a/plugins/local/slater_tc/symmetrized_3_e_int_prov.irp.f b/plugins/local/slater_tc/symmetrized_3_e_int_prov.irp.f deleted file mode 100644 index e8277a74..00000000 --- a/plugins/local/slater_tc/symmetrized_3_e_int_prov.irp.f +++ /dev/null @@ -1,140 +0,0 @@ - -BEGIN_PROVIDER [ double precision, three_e_diag_parrallel_spin_prov, (mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator ON A BI ORTHONORMAL BASIS - ! - ! three_e_diag_parrallel_spin_prov(m,j,i) = All combinations of the form for same spin matrix elements - ! - ! notice the -1 sign: in this way three_e_diag_parrallel_spin_prov can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, m - double precision :: integral, wall1, wall0, three_e_diag_parrallel_spin - - three_e_diag_parrallel_spin_prov = 0.d0 - print *, ' Providing the three_e_diag_parrallel_spin_prov ...' - - integral = three_e_diag_parrallel_spin(1,1,1) ! to provide all stuffs - call wall_time(wall0) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,m,integral) & - !$OMP SHARED (mo_num,three_e_diag_parrallel_spin_prov) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num - do j = 1, mo_num - do m = j, mo_num - three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin(m,j,i) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - do i = 1, mo_num - do j = 1, mo_num - do m = 1, j - three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin_prov(j,m,i) - enddo - enddo - enddo - - call wall_time(wall1) - print *, ' wall time for three_e_diag_parrallel_spin_prov', wall1 - wall0 - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, three_e_single_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_single_parrallel_spin_prov(m,j,k,i) = All combination of for same spin matrix elements - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0, three_e_single_parrallel_spin - - three_e_single_parrallel_spin_prov = 0.d0 - print *, ' Providing the three_e_single_parrallel_spin_prov ...' - - integral = three_e_single_parrallel_spin(1,1,1,1) - call wall_time(wall0) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_single_parrallel_spin_prov) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_single_parrallel_spin_prov(m,j,k,i) = three_e_single_parrallel_spin(m,j,k,i) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_single_parrallel_spin_prov', wall1 - wall0 - -END_PROVIDER - - -! --- - -BEGIN_PROVIDER [ double precision, three_e_double_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_double_parrallel_spin_prov(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0, three_e_double_parrallel_spin - - three_e_double_parrallel_spin_prov = 0.d0 - print *, ' Providing the three_e_double_parrallel_spin_prov ...' - call wall_time(wall0) - - integral = three_e_double_parrallel_spin(1,1,1,1,1) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_double_parrallel_spin_prov) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - three_e_double_parrallel_spin_prov(m,l,j,k,i) = three_e_double_parrallel_spin(m,l,j,k,i) - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_double_parrallel_spin_prov', wall1 - wall0 - -END_PROVIDER - diff --git a/plugins/local/slater_tc_no_opt/.gitignore b/plugins/local/slater_tc_no_opt/.gitignore new file mode 100644 index 00000000..1561915b --- /dev/null +++ b/plugins/local/slater_tc_no_opt/.gitignore @@ -0,0 +1,59 @@ +IRPF90_temp/ +IRPF90_man/ +build.ninja +irpf90.make +ezfio_interface.irp.f +irpf90_entities +tags +Makefile +ao_basis +ao_one_e_ints +ao_two_e_erf_ints +ao_two_e_ints +aux_quantities +becke_numerical_grid +bitmask +cis +cisd +cipsi +davidson +davidson_dressed +davidson_undressed +density_for_dft +determinants +dft_keywords +dft_utils_in_r +dft_utils_one_e +dft_utils_two_body +dressing +dummy +electrons +ezfio_files +fci +generators_cas +generators_full +hartree_fock +iterations +kohn_sham +kohn_sham_rs +mo_basis +mo_guess +mo_one_e_ints +mo_two_e_erf_ints +mo_two_e_ints +mpi +mrpt_utils +nuclei +perturbation +pseudo +psiref_cas +psiref_utils +scf_utils +selectors_cassd +selectors_full +selectors_utils +single_ref_method +slave +tools +utils +zmq diff --git a/plugins/local/slater_tc_no_opt/NEED b/plugins/local/slater_tc_no_opt/NEED new file mode 100644 index 00000000..a8669866 --- /dev/null +++ b/plugins/local/slater_tc_no_opt/NEED @@ -0,0 +1,8 @@ +determinants +normal_order_old +bi_ort_ints +bi_ortho_mos +tc_keywords +non_hermit_dav +dav_general_mat +tc_scf diff --git a/plugins/local/slater_tc_no_opt/README.rst b/plugins/local/slater_tc_no_opt/README.rst new file mode 100644 index 00000000..90679e4c --- /dev/null +++ b/plugins/local/slater_tc_no_opt/README.rst @@ -0,0 +1,4 @@ +================ +slater_tc_no_opt +================ + diff --git a/plugins/local/slater_tc/h_biortho.irp.f b/plugins/local/slater_tc_no_opt/h_biortho.irp.f similarity index 100% rename from plugins/local/slater_tc/h_biortho.irp.f rename to plugins/local/slater_tc_no_opt/h_biortho.irp.f diff --git a/plugins/local/slater_tc_no_opt/h_mat_triple.irp.f b/plugins/local/slater_tc_no_opt/h_mat_triple.irp.f new file mode 100644 index 00000000..e2c8f982 --- /dev/null +++ b/plugins/local/slater_tc_no_opt/h_mat_triple.irp.f @@ -0,0 +1,193 @@ +subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) + use bitmasks + BEGIN_DOC +! returns the array, for each spin, of holes/particles between key_i and key_j +! +! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> + END_DOC + include 'utils/constants.include.F' + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision, intent(out) :: phase + integer :: ispin,k,i,pos + integer(bit_kind) :: key_hole, key_particle + integer(bit_kind) :: xorvec(N_int_max,2) + holes_array = -1 + particles_array = -1 + degree_array = 0 + do i = 1, N_int + xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) + xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) + degree_array(1) += popcnt(xorvec(i,1)) + degree_array(2) += popcnt(xorvec(i,2)) + enddo + degree_array(1) = shiftr(degree_array(1),1) + degree_array(2) = shiftr(degree_array(2),1) + + do ispin = 1, 2 + k = 1 + !!! GETTING THE HOLES + do i = 1, N_int + key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + do ispin = 1, 2 + k = 1 + !!! GETTING THE PARTICLES + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general ' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + integer :: h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree_array(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + +subroutine get_holes_general(key_i, key_j,Nint, holes_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of holes between key_i and key_j +! +! with the following convention: a_{hole}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2) + integer(bit_kind) :: key_hole + integer :: ispin,k,i,pos + holes_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_particles_general(key_i, key_j,Nint,particles_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of particles between key_i and key_j +! +! with the following convention: a^dagger_{particle}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: particles_array(100,2) + integer(bit_kind) :: key_particle + integer :: ispin,k,i,pos + particles_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'Those are the two determinants' + call debug_det(key_i, N_int) + call debug_det(key_j, N_int) + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) + implicit none + integer, intent(in) :: degree(2), Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + integer, intent(in) :: holes_array(100,2),particles_array(100,2) + double precision, intent(out) :: phase + integer :: i,ispin,h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + diff --git a/plugins/local/slater_tc/h_tc_bi_ortho_psi.irp.f b/plugins/local/slater_tc_no_opt/h_tc_bi_ortho_psi.irp.f similarity index 100% rename from plugins/local/slater_tc/h_tc_bi_ortho_psi.irp.f rename to plugins/local/slater_tc_no_opt/h_tc_bi_ortho_psi.irp.f diff --git a/plugins/local/slater_tc/slater_tc_3e_slow.irp.f b/plugins/local/slater_tc_no_opt/slater_tc_3e_slow.irp.f similarity index 99% rename from plugins/local/slater_tc/slater_tc_3e_slow.irp.f rename to plugins/local/slater_tc_no_opt/slater_tc_3e_slow.irp.f index cb33d343..f7919653 100644 --- a/plugins/local/slater_tc/slater_tc_3e_slow.irp.f +++ b/plugins/local/slater_tc_no_opt/slater_tc_3e_slow.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) +subroutine diag_htc_bi_orth_3e_brute(Nint, key_i, hthree) BEGIN_DOC ! diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS diff --git a/plugins/local/slater_tc/slater_tc.irp.f b/plugins/local/slater_tc_no_opt/slater_tc_no_opt.irp.f similarity index 82% rename from plugins/local/slater_tc/slater_tc.irp.f rename to plugins/local/slater_tc_no_opt/slater_tc_no_opt.irp.f index 27ab47c5..0fcc587f 100644 --- a/plugins/local/slater_tc/slater_tc.irp.f +++ b/plugins/local/slater_tc_no_opt/slater_tc_no_opt.irp.f @@ -1,4 +1,4 @@ -program slater_tc +program slater_tc_no_opt implicit none BEGIN_DOC ! TODO : Put the documentation of the program here diff --git a/plugins/local/slater_tc/slater_tc_slow.irp.f b/plugins/local/slater_tc_no_opt/slater_tc_slow.irp.f similarity index 80% rename from plugins/local/slater_tc/slater_tc_slow.irp.f rename to plugins/local/slater_tc_no_opt/slater_tc_slow.irp.f index caf7d665..b06fd12f 100644 --- a/plugins/local/slater_tc/slater_tc_slow.irp.f +++ b/plugins/local/slater_tc_no_opt/slater_tc_slow.irp.f @@ -61,7 +61,7 @@ subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, if(degree.gt.2) return if(degree == 0) then - call diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) + call diag_htc_bi_orth_2e_brute(Nint, key_i, hmono, htwoe, htot) else if (degree == 1) then call single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) else if(degree == 2) then @@ -76,7 +76,7 @@ subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, else if((degree == 1) .and. (elec_num .gt. 2) .and. three_e_4_idx_term) then call single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) else if((degree == 0) .and. (elec_num .gt. 2) .and. three_e_3_idx_term) then - call diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) + call diag_htc_bi_orth_3e_brute(Nint, key_i, hthree) endif endif @@ -95,75 +95,6 @@ end ! --- -subroutine diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) - - BEGIN_DOC - ! - ! diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS - ! - END_DOC - - use bitmasks - - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_i(Nint,2) - double precision, intent(out) :: hmono,htwoe,htot - integer :: occ(Nint*bit_kind_size,2) - integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk - double precision :: get_mo_two_e_integral_tc_int - integer(bit_kind) :: key_i_core(Nint,2) - - PROVIDE mo_bi_ortho_tc_two_e - - hmono = 0.d0 - htwoe = 0.d0 - htot = 0.d0 - - call bitstring_to_list_ab(key_i, occ, Ne, Nint) - - do ispin = 1, 2 - do i = 1, Ne(ispin) - ii = occ(i,ispin) - hmono += mo_bi_ortho_tc_one_e(ii,ii) - enddo - enddo - - ! alpha/beta two-body - ispin = 1 - jspin = 2 - do i = 1, Ne(ispin) ! electron 1 (so it can be associated to mu(r1)) - ii = occ(i,ispin) - do j = 1, Ne(jspin) ! electron 2 - jj = occ(j,jspin) - htwoe += mo_bi_ortho_tc_two_e(jj,ii,jj,ii) - enddo - enddo - - ! alpha/alpha two-body - do i = 1, Ne(ispin) - ii = occ(i,ispin) - do j = i+1, Ne(ispin) - jj = occ(j,ispin) - htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) - enddo - enddo - - ! beta/beta two-body - do i = 1, Ne(jspin) - ii = occ(i,jspin) - do j = i+1, Ne(jspin) - jj = occ(j,jspin) - htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) - enddo - enddo - - htot = hmono + htwoe - -end - -! --- - subroutine double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) BEGIN_DOC diff --git a/src/determinants/slater_rules_general.irp.f b/src/determinants/slater_rules_general.irp.f new file mode 100644 index 00000000..e987c846 --- /dev/null +++ b/src/determinants/slater_rules_general.irp.f @@ -0,0 +1,192 @@ +subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) + use bitmasks + BEGIN_DOC +! returns the array, for each spin, of holes/particles between key_i and key_j +! +! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> + END_DOC + include 'utils/constants.include.F' + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision, intent(out) :: phase + integer :: ispin,k,i,pos + integer(bit_kind) :: key_hole, key_particle + integer(bit_kind) :: xorvec(N_int_max,2) + holes_array = -1 + particles_array = -1 + degree_array = 0 + do i = 1, N_int + xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) + xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) + degree_array(1) += popcnt(xorvec(i,1)) + degree_array(2) += popcnt(xorvec(i,2)) + enddo + degree_array(1) = shiftr(degree_array(1),1) + degree_array(2) = shiftr(degree_array(2),1) + + do ispin = 1, 2 + k = 1 + !!! GETTING THE HOLES + do i = 1, N_int + key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + do ispin = 1, 2 + k = 1 + !!! GETTING THE PARTICLES + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general ' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + integer :: h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree_array(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + +subroutine get_holes_general(key_i, key_j,Nint, holes_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of holes between key_i and key_j +! +! with the following convention: a_{hole}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2) + integer(bit_kind) :: key_hole + integer :: ispin,k,i,pos + holes_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_particles_general(key_i, key_j,Nint,particles_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of particles between key_i and key_j +! +! with the following convention: a^dagger_{particle}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: particles_array(100,2) + integer(bit_kind) :: key_particle + integer :: ispin,k,i,pos + particles_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'Those are the two determinants' + call debug_det(key_i, N_int) + call debug_det(key_j, N_int) + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) + implicit none + integer, intent(in) :: degree(2), Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + integer, intent(in) :: holes_array(100,2),particles_array(100,2) + double precision, intent(out) :: phase + integer :: i,ispin,h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end From b749796d931401f2c7e966e2c7eeedfff2f4477c Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 6 May 2024 18:33:29 +0200 Subject: [PATCH 029/131] still not compiling --- .../tc_bi_ortho/dressing_vectors_lr.irp.f | 8 ++++---- .../local/tc_bi_ortho/e_corr_bi_ortho.irp.f | 18 +++++++++--------- plugins/local/tc_bi_ortho/print_tc_wf.irp.f | 6 +++--- .../local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f | 2 +- plugins/local/tc_bi_ortho/tc_som.irp.f | 4 ++-- plugins/local/tc_bi_ortho/tc_utils.irp.f | 10 +++++----- .../local/tc_bi_ortho/test_normal_order.irp.f | 8 ++++---- .../local/tc_bi_ortho/test_tc_bi_ortho.irp.f | 10 +++++----- plugins/local/tc_bi_ortho/test_tc_fock.irp.f | 4 ++-- 9 files changed, 35 insertions(+), 35 deletions(-) diff --git a/plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f b/plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f index 0aff9980..135f9d17 100644 --- a/plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f +++ b/plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f @@ -27,7 +27,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) call hmat_bi_ortho (psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) delta = 0.d0 @@ -39,7 +39,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I |Htilde | J > - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) ! < I |H | J > call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) @@ -78,7 +78,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta = 0.d0 !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & @@ -88,7 +88,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I |Htilde | J > - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta(i) = delta(i) + psicoef(j) * htc_tot enddo diff --git a/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f b/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f index 6d5c3b21..4abdc25b 100644 --- a/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f +++ b/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f @@ -2,7 +2,7 @@ BEGIN_PROVIDER [ double precision, e_tilde_00] implicit none double precision :: hmono,htwoe,hthree,htot - call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot) + call htilde_mu_mat_opt_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot) e_tilde_00 = htot END_PROVIDER @@ -18,11 +18,11 @@ do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e - call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) e_pt2_tc_bi_orth += coef_pt1 * htilde_ij if(degree == 1)then e_pt2_tc_bi_orth_single += coef_pt1 * htilde_ij @@ -37,7 +37,7 @@ BEGIN_PROVIDER [ double precision, e_tilde_bi_orth_00] implicit none double precision :: hmono,htwoe,hthree,htilde_ij - call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00) + call htilde_mu_mat_opt_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00) e_tilde_bi_orth_00 += nuclear_repulsion END_PROVIDER @@ -57,7 +57,7 @@ e_corr_double_bi_orth = 0.d0 do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) - call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) if(degree == 1)then e_corr_single_bi_orth += reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1) e_corr_single_bi_orth_abs += dabs(reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)) @@ -80,7 +80,7 @@ do i = 1, N_det accu += reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(i,1) do j = 1, N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) e_tc_left_right += htilde_ij * reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(j,1) enddo enddo @@ -99,8 +99,8 @@ BEGIN_PROVIDER [ double precision, coef_pt1_bi_ortho, (N_det)] if(degree==0)then coef_pt1_bi_ortho(i) = 1.d0 else - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e coef_pt1_bi_ortho(i)= coef_pt1 diff --git a/plugins/local/tc_bi_ortho/print_tc_wf.irp.f b/plugins/local/tc_bi_ortho/print_tc_wf.irp.f index 2b88bc5b..ab5ce371 100644 --- a/plugins/local/tc_bi_ortho/print_tc_wf.irp.f +++ b/plugins/local/tc_bi_ortho/print_tc_wf.irp.f @@ -61,12 +61,12 @@ subroutine routine do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e - call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) contrib_pt = coef_pt1 * htilde_ij e_pt2 += contrib_pt diff --git a/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f b/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f index 4c3c0788..5cbf26d2 100644 --- a/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f +++ b/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f @@ -14,7 +14,7 @@ call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) + call htilde_mu_mat_opt_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) endif enddo reigvec_tc_bi_orth_tmp = 0.d0 diff --git a/plugins/local/tc_bi_ortho/tc_som.irp.f b/plugins/local/tc_bi_ortho/tc_som.irp.f index 1d11c81b..6bdcc1f0 100644 --- a/plugins/local/tc_bi_ortho/tc_som.irp.f +++ b/plugins/local/tc_bi_ortho/tc_som.irp.f @@ -49,8 +49,8 @@ subroutine main() U_SOM = 0.d0 do i = 1, N_det if(i == i_HF) cycle - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2) U_SOM += htot_1 * htot_2 enddo U_SOM = 0.5d0 * U_SOM diff --git a/plugins/local/tc_bi_ortho/tc_utils.irp.f b/plugins/local/tc_bi_ortho/tc_utils.irp.f index 43a6865e..2aa148a3 100644 --- a/plugins/local/tc_bi_ortho/tc_utils.irp.f +++ b/plugins/local/tc_bi_ortho/tc_utils.irp.f @@ -25,7 +25,7 @@ subroutine write_tc_energy() E_2e_tmp(i) = 0.d0 E_3e_tmp(i) = 0.d0 do j = 1, N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) E_TC_tmp(i) = E_TC_tmp(i) + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * htot E_1e_tmp(i) = E_1e_tmp(i) + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * hmono E_2e_tmp(i) = E_2e_tmp(i) + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * htwoe @@ -70,7 +70,7 @@ subroutine write_tc_energy() E_3e = 0.d0 do i = 1, N_det do j = 1, N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot E_1e = E_1e + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * hmono E_2e = E_2e + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htwoe @@ -109,8 +109,8 @@ subroutine write_tc_var() SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) SIGMA_TC = SIGMA_TC + htot_1j * htot_j1 enddo @@ -132,7 +132,7 @@ subroutine write_tc_gs_var_HF() SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot) SIGMA_TC = SIGMA_TC + htot * htot enddo diff --git a/plugins/local/tc_bi_ortho/test_normal_order.irp.f b/plugins/local/tc_bi_ortho/test_normal_order.irp.f index 0cf27396..7b4c558f 100644 --- a/plugins/local/tc_bi_ortho/test_normal_order.irp.f +++ b/plugins/local/tc_bi_ortho/test_normal_order.irp.f @@ -54,7 +54,7 @@ subroutine test if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) hthree_tmp *= phase @@ -66,7 +66,7 @@ subroutine test if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) hthree_tmp *= phase @@ -109,7 +109,7 @@ do h1 = 1, elec_alpha_num if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) integer :: hh1, pp1, hh2, pp2, ss1, ss2 @@ -145,7 +145,7 @@ do h1 = 1, elec_beta_num if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) diff --git a/plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f b/plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f index 369efd15..559c0200 100644 --- a/plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -88,7 +88,7 @@ subroutine test_slater_tc_opt i_count = 0.d0 do i = 1, N_det do j = 1,N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hnewmono, hnewtwoe, hnewthree, hnewtot) if(dabs(htot).gt.1.d-15)then i_count += 1.D0 @@ -124,7 +124,7 @@ subroutine timing_tot do j = 1, N_det ! call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int) i_count += 1.d0 - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -171,7 +171,7 @@ subroutine timing_diag do i = 1, N_det do j = i,i i_count += 1.d0 - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -208,7 +208,7 @@ subroutine timing_single if(degree.ne.1)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo @@ -250,7 +250,7 @@ subroutine timing_double if(degree.ne.2)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo diff --git a/plugins/local/tc_bi_ortho/test_tc_fock.irp.f b/plugins/local/tc_bi_ortho/test_tc_fock.irp.f index 85f3ed97..b33b2e93 100644 --- a/plugins/local/tc_bi_ortho/test_tc_fock.irp.f +++ b/plugins/local/tc_bi_ortho/test_tc_fock.irp.f @@ -64,7 +64,7 @@ subroutine routine_3() print*, ' excited det' call debug_det(det_i, N_int) - call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) + call htilde_mu_mat_opt_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) if(dabs(hthree).lt.1.d-10)cycle ref = hthree if(s1 == 1)then @@ -130,7 +130,7 @@ subroutine routine_tot() stop endif - call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) + call htilde_mu_mat_opt_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) print*,htilde_ij ! if(dabs(htilde_ij).lt.1.d-10)cycle print*, ' excited det' From 366afb2933baba919db1ad85b7eee965ea56d0c6 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 6 May 2024 18:53:20 +0200 Subject: [PATCH 030/131] compiling after some cleaning --- plugins/local/old_delta_tc_qmc/NEED | 1 + plugins/local/old_delta_tc_qmc/README.rst | 4 + .../compute_deltamu_right.irp.f | 0 .../dressing_vectors_lr.irp.f | 0 .../old_delta_tc_qmc/old_delta_tc_qmc.irp.f | 7 + plugins/local/slater_tc/h_mat_triple.irp.f | 198 ++++++++++++++++++ .../local/slater_tc_no_opt/h_mat_triple.irp.f | 193 ----------------- .../test_tc_bi_ortho.irp.f | 0 plugins/local/tc_bi_ortho/pt2_tc_cisd.irp.f | 129 ------------ plugins/local/tc_bi_ortho/tc_cisd_sc2.irp.f | 36 ---- .../local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f | 145 ------------- plugins/local/tc_bi_ortho/test_s2_tc.irp.f | 170 --------------- 12 files changed, 210 insertions(+), 673 deletions(-) create mode 100644 plugins/local/old_delta_tc_qmc/NEED create mode 100644 plugins/local/old_delta_tc_qmc/README.rst rename plugins/local/{tc_bi_ortho => old_delta_tc_qmc}/compute_deltamu_right.irp.f (100%) rename plugins/local/{tc_bi_ortho => old_delta_tc_qmc}/dressing_vectors_lr.irp.f (100%) create mode 100644 plugins/local/old_delta_tc_qmc/old_delta_tc_qmc.irp.f create mode 100644 plugins/local/slater_tc/h_mat_triple.irp.f delete mode 100644 plugins/local/slater_tc_no_opt/h_mat_triple.irp.f rename plugins/local/{tc_bi_ortho => slater_tc_no_opt}/test_tc_bi_ortho.irp.f (100%) delete mode 100644 plugins/local/tc_bi_ortho/pt2_tc_cisd.irp.f delete mode 100644 plugins/local/tc_bi_ortho/tc_cisd_sc2.irp.f delete mode 100644 plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f delete mode 100644 plugins/local/tc_bi_ortho/test_s2_tc.irp.f diff --git a/plugins/local/old_delta_tc_qmc/NEED b/plugins/local/old_delta_tc_qmc/NEED new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/plugins/local/old_delta_tc_qmc/NEED @@ -0,0 +1 @@ + diff --git a/plugins/local/old_delta_tc_qmc/README.rst b/plugins/local/old_delta_tc_qmc/README.rst new file mode 100644 index 00000000..1d56f96c --- /dev/null +++ b/plugins/local/old_delta_tc_qmc/README.rst @@ -0,0 +1,4 @@ +================ +old_delta_tc_qmc +================ + diff --git a/plugins/local/tc_bi_ortho/compute_deltamu_right.irp.f b/plugins/local/old_delta_tc_qmc/compute_deltamu_right.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/compute_deltamu_right.irp.f rename to plugins/local/old_delta_tc_qmc/compute_deltamu_right.irp.f diff --git a/plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f b/plugins/local/old_delta_tc_qmc/dressing_vectors_lr.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f rename to plugins/local/old_delta_tc_qmc/dressing_vectors_lr.irp.f diff --git a/plugins/local/old_delta_tc_qmc/old_delta_tc_qmc.irp.f b/plugins/local/old_delta_tc_qmc/old_delta_tc_qmc.irp.f new file mode 100644 index 00000000..5ff08bd6 --- /dev/null +++ b/plugins/local/old_delta_tc_qmc/old_delta_tc_qmc.irp.f @@ -0,0 +1,7 @@ +program old_delta_tc_qmc + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + print *, 'Hello world' +end diff --git a/plugins/local/slater_tc/h_mat_triple.irp.f b/plugins/local/slater_tc/h_mat_triple.irp.f new file mode 100644 index 00000000..9cb4b60a --- /dev/null +++ b/plugins/local/slater_tc/h_mat_triple.irp.f @@ -0,0 +1,198 @@ +subroutine H_tc_s2_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo +end + +subroutine H_tc_s2_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & + !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & + !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo + !$OMP END PARALLEL DO +end + +! --- + +subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo +end + +subroutine H_tc_s2_dagger_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & + !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & + !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo + !$OMP END PARALLEL DO +end + +! --- +subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + use bitmasks + BEGIN_DOC +! for triple excitation +!! +!! WARNING !! +! +! Genuine triple excitations of the same spin are not yet implemented + END_DOC + implicit none + integer(bit_kind), intent(in) :: key_j(N_int,2),key_i(N_int,2) + integer, intent(in) :: Nint + double precision, intent(out) :: hmono, htwoe, hthree, htot + integer :: degree + integer :: h1, p1, h2, p2, s1, s2, h3, p3, s3 + integer :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision :: phase,sym_3_e_int_from_6_idx_tensor + + hmono = 0.d0 + htwoe = 0.d0 + hthree = 0.d0 + htot = 0.d0 + call get_excitation_general(key_j, key_i, Nint,degree_array,holes_array, particles_array,phase) + degree = degree_array(1) + degree_array(2) + if(degree .ne. 3)return + if(degree_array(1)==3.or.degree_array(2)==3)then + if(degree_array(1) == 3)then + h1 = holes_array(1,1) + h2 = holes_array(2,1) + h3 = holes_array(3,1) + p1 = particles_array(1,1) + p2 = particles_array(2,1) + p3 = particles_array(3,1) + else + h1 = holes_array(1,2) + h2 = holes_array(2,2) + h3 = holes_array(3,2) + p1 = particles_array(1,2) + p2 = particles_array(2,2) + p3 = particles_array(3,2) + endif + hthree = sym_3_e_int_from_6_idx_tensor(p3, p2, p1, h3, h2, h1) + else + if(degree_array(1) == 2.and.degree_array(2) == 1)then ! double alpha + single beta + h1 = holes_array(1,1) + h2 = holes_array(2,1) + h3 = holes_array(1,2) + p1 = particles_array(1,1) + p2 = particles_array(2,1) + p3 = particles_array(1,2) + else if(degree_array(2) == 2 .and. degree_array(1) == 1)then ! double beta + single alpha + h1 = holes_array(1,2) + h2 = holes_array(2,2) + h3 = holes_array(1,1) + p1 = particles_array(1,2) + p2 = particles_array(2,2) + p3 = particles_array(1,1) + else + print*,'PB !!' + stop + endif + hthree = three_body_ints_bi_ort(p3,p2,p1,h3,h2,h1) - three_body_ints_bi_ort(p3,p2,p1,h3,h1,h2) + endif + hthree *= phase + htot = hthree + end + diff --git a/plugins/local/slater_tc_no_opt/h_mat_triple.irp.f b/plugins/local/slater_tc_no_opt/h_mat_triple.irp.f deleted file mode 100644 index e2c8f982..00000000 --- a/plugins/local/slater_tc_no_opt/h_mat_triple.irp.f +++ /dev/null @@ -1,193 +0,0 @@ -subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) - use bitmasks - BEGIN_DOC -! returns the array, for each spin, of holes/particles between key_i and key_j -! -! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> - END_DOC - include 'utils/constants.include.F' - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) - double precision, intent(out) :: phase - integer :: ispin,k,i,pos - integer(bit_kind) :: key_hole, key_particle - integer(bit_kind) :: xorvec(N_int_max,2) - holes_array = -1 - particles_array = -1 - degree_array = 0 - do i = 1, N_int - xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) - xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) - degree_array(1) += popcnt(xorvec(i,1)) - degree_array(2) += popcnt(xorvec(i,2)) - enddo - degree_array(1) = shiftr(degree_array(1),1) - degree_array(2) = shiftr(degree_array(2),1) - - do ispin = 1, 2 - k = 1 - !!! GETTING THE HOLES - do i = 1, N_int - key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) - do while(key_hole .ne.0_bit_kind) - pos = trailz(key_hole) - holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_hole = ibclr(key_hole,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_excitation_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo - do ispin = 1, 2 - k = 1 - !!! GETTING THE PARTICLES - do i = 1, N_int - key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) - do while(key_particle .ne.0_bit_kind) - pos = trailz(key_particle) - particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_particle = ibclr(key_particle,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_excitation_general ' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo - integer :: h,p, i_ok - integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) - integer :: exc(0:2,2,2) - double precision :: phase_tmp - allocate(det_i(Nint,2),det_ip(N_int,2)) - det_i = key_i - phase = 1.d0 - do ispin = 1, 2 - do i = 1, degree_array(ispin) - h = holes_array(i,ispin) - p = particles_array(i,ispin) - det_ip = det_i - call do_single_excitation(det_ip,h,p,ispin,i_ok) - if(i_ok == -1)then - print*,'excitation was not possible ' - stop - endif - call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) - phase *= phase_tmp - det_i = det_ip - enddo - enddo - -end - -subroutine get_holes_general(key_i, key_j,Nint, holes_array) - use bitmasks - BEGIN_DOC -! returns the array, per spin, of holes between key_i and key_j -! -! with the following convention: a_{hole}|key_i> --> |key_j> - END_DOC - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: holes_array(100,2) - integer(bit_kind) :: key_hole - integer :: ispin,k,i,pos - holes_array = -1 - do ispin = 1, 2 - k = 1 - do i = 1, N_int - key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) - do while(key_hole .ne.0_bit_kind) - pos = trailz(key_hole) - holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_hole = ibclr(key_hole,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_holes_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo -end - -subroutine get_particles_general(key_i, key_j,Nint,particles_array) - use bitmasks - BEGIN_DOC -! returns the array, per spin, of particles between key_i and key_j -! -! with the following convention: a^dagger_{particle}|key_i> --> |key_j> - END_DOC - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: particles_array(100,2) - integer(bit_kind) :: key_particle - integer :: ispin,k,i,pos - particles_array = -1 - do ispin = 1, 2 - k = 1 - do i = 1, N_int - key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) - do while(key_particle .ne.0_bit_kind) - pos = trailz(key_particle) - particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_particle = ibclr(key_particle,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_holes_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'Those are the two determinants' - call debug_det(key_i, N_int) - call debug_det(key_j, N_int) - print*,'stoping ...' - stop - endif - enddo - enddo - enddo -end - -subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) - implicit none - integer, intent(in) :: degree(2), Nint - integer(bit_kind), intent(in) :: key_i(Nint,2) - integer, intent(in) :: holes_array(100,2),particles_array(100,2) - double precision, intent(out) :: phase - integer :: i,ispin,h,p, i_ok - integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) - integer :: exc(0:2,2,2) - double precision :: phase_tmp - allocate(det_i(Nint,2),det_ip(N_int,2)) - det_i = key_i - phase = 1.d0 - do ispin = 1, 2 - do i = 1, degree(ispin) - h = holes_array(i,ispin) - p = particles_array(i,ispin) - det_ip = det_i - call do_single_excitation(det_ip,h,p,ispin,i_ok) - if(i_ok == -1)then - print*,'excitation was not possible ' - stop - endif - call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) - phase *= phase_tmp - det_i = det_ip - enddo - enddo - -end - diff --git a/plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f b/plugins/local/slater_tc_no_opt/test_tc_bi_ortho.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f rename to plugins/local/slater_tc_no_opt/test_tc_bi_ortho.irp.f diff --git a/plugins/local/tc_bi_ortho/pt2_tc_cisd.irp.f b/plugins/local/tc_bi_ortho/pt2_tc_cisd.irp.f deleted file mode 100644 index 8940a4f6..00000000 --- a/plugins/local/tc_bi_ortho/pt2_tc_cisd.irp.f +++ /dev/null @@ -1,129 +0,0 @@ -program pt2_tc_cisd - - BEGIN_DOC - ! - ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together - ! with the energy. Saves the left-right wave functions at the end. - ! - END_DOC - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - print*, ' nb of states = ', N_states - print*, ' nb of det = ', N_det - call routine_diag() - - call routine -end - -subroutine routine - implicit none - integer :: i,h1,p1,h2,p2,s1,s2,degree - double precision :: h0i,hi0,e00,ei,delta_e - double precision :: norm,e_corr,coef,e_corr_pos,e_corr_neg,e_corr_abs - - integer :: exc(0:2,2,2) - double precision :: phase - double precision :: eh1,ep1,eh2,ep2 - - norm = 0.d0 - e_corr = 0.d0 - e_corr_abs = 0.d0 - e_corr_pos = 0.d0 - e_corr_neg = 0.d0 - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,1), N_int, e00) - do i = 2, N_det - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,1), N_int, hi0) - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,i), N_int, h0i) - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, ei) - call get_excitation_degree(psi_det(1,1,1), psi_det(1,1,i),degree,N_int) - call get_excitation(psi_det(1,1,1), psi_det(1,1,i),exc,degree,phase,N_int) - call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) - eh1 = Fock_matrix_tc_diag_mo_tot(h1) - ep1 = Fock_matrix_tc_diag_mo_tot(p1) - delta_e = eh1 - ep1 - if (degree==2)then - eh2 = Fock_matrix_tc_diag_mo_tot(h2) - ep2 = Fock_matrix_tc_diag_mo_tot(p2) - delta_e += eh2 - ep2 - endif -! delta_e = e00 - ei - coef = hi0/delta_e - norm += coef*coef - e_corr = coef* h0i - if(e_corr.lt.0.d0)then - e_corr_neg += e_corr - elseif(e_corr.gt.0.d0)then - e_corr_pos += e_corr - endif - e_corr_abs += dabs(e_corr) - enddo - print*,'e_corr_abs = ',e_corr_abs - print*,'e_corr_pos = ',e_corr_pos - print*,'e_corr_neg = ',e_corr_neg - print*,'norm = ',dsqrt(norm) - -end - -subroutine routine_diag() - - implicit none - integer :: i, j, k - double precision :: dE - - ! provide eigval_right_tc_bi_orth - ! provide overlap_bi_ortho - ! provide htilde_matrix_elmt_bi_ortho - - if(N_states .eq. 1) then - - print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1) - print*,'e_tc_left_right = ',e_tc_left_right - print*,'e_tilde_bi_orth_00 = ',e_tilde_bi_orth_00 - print*,'e_pt2_tc_bi_orth = ',e_pt2_tc_bi_orth - print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single - print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double - print*,'***' - print*,'e_corr_bi_orth = ',e_corr_bi_orth - print*,'e_corr_bi_orth_proj = ',e_corr_bi_orth_proj - print*,'e_corr_bi_orth_proj_abs = ',e_corr_bi_orth_proj_abs - print*,'e_corr_single_bi_orth = ',e_corr_single_bi_orth - print*,'e_corr_double_bi_orth = ',e_corr_double_bi_orth - print*,'e_corr_single_bi_orth_abs = ',e_corr_single_bi_orth_abs - print*,'e_corr_double_bi_orth_abs = ',e_corr_double_bi_orth_abs - print*,'Left/right eigenvectors' - do i = 1,N_det - write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1) - enddo - - else - - print*,'eigval_right_tc_bi_orth : ' - do i = 1, N_states - print*, i, eigval_right_tc_bi_orth(i) - enddo - - print*,'' - print*,'******************************************************' - print*,'TC Excitation energies (au) (eV)' - do i = 2, N_states - dE = eigval_right_tc_bi_orth(i) - eigval_right_tc_bi_orth(1) - print*, i, dE, dE/0.0367502d0 - enddo - print*,'' - - endif - -end - - - diff --git a/plugins/local/tc_bi_ortho/tc_cisd_sc2.irp.f b/plugins/local/tc_bi_ortho/tc_cisd_sc2.irp.f deleted file mode 100644 index d4c8c55d..00000000 --- a/plugins/local/tc_bi_ortho/tc_cisd_sc2.irp.f +++ /dev/null @@ -1,36 +0,0 @@ - -! --- - -program tc_cisd_sc2 - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - print *, 'Hello world' - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - call test - -end - -! --- - -subroutine test() - implicit none -! double precision, allocatable :: dressing_dets(:),e_corr_dets(:) -! allocate(dressing_dets(N_det),e_corr_dets(N_det)) -! e_corr_dets = 0.d0 -! call get_cisd_sc2_dressing(psi_det,e_corr_dets,N_det,dressing_dets) - provide eigval_tc_cisd_sc2_bi_ortho -end diff --git a/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f b/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f deleted file mode 100644 index 5cbf26d2..00000000 --- a/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f +++ /dev/null @@ -1,145 +0,0 @@ - BEGIN_PROVIDER [ double precision, reigvec_tc_cisd_sc2_bi_ortho, (N_det,N_states)] -&BEGIN_PROVIDER [ double precision, leigvec_tc_cisd_sc2_bi_ortho, (N_det,N_states)] -&BEGIN_PROVIDER [ double precision, eigval_tc_cisd_sc2_bi_ortho, (N_states)] - implicit none - integer :: it,n_real,degree,i,istate - double precision :: e_before, e_current,thr, hmono,htwoe,hthree,accu - double precision, allocatable :: e_corr_dets(:),h0j(:), h_sc2(:,:), dressing_dets(:) - double precision, allocatable :: leigvec_tc_bi_orth_tmp(:,:),reigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:) - allocate(leigvec_tc_bi_orth_tmp(N_det,N_det),reigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det)) - allocate(e_corr_dets(N_det),h0j(N_det),h_sc2(N_det,N_det),dressing_dets(N_det)) - allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),eigval_tmp(N_states)) - dressing_dets = 0.d0 - do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) - call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) - if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_opt_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) - endif - enddo - reigvec_tc_bi_orth_tmp = 0.d0 - do i = 1, N_det - reigvec_tc_bi_orth_tmp(i,1) = psi_r_coef_bi_ortho(i,1) - enddo - vec_tmp = 0.d0 - do istate = 1, N_states - vec_tmp(:,istate) = reigvec_tc_bi_orth_tmp(:,istate) - enddo - do istate = N_states+1, n_states_diag - vec_tmp(istate,istate) = 1.d0 - enddo - print*,'Diagonalizing the TC CISD ' - call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow) - do i = 1, N_det - e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1) - enddo - E_before = eigval_tmp(1) - print*,'Starting from ',E_before - - e_current = 10.d0 - thr = 1.d-5 - it = 0 - dressing_dets = 0.d0 - double precision, allocatable :: H_jj(:),vec_tmp(:,:),eigval_tmp(:) - external htc_bi_ortho_calc_tdav_slow - external htcdag_bi_ortho_calc_tdav_slow - logical :: converged - do while (dabs(E_before-E_current).gt.thr) - it += 1 - E_before = E_current -! h_sc2 = htilde_matrix_elmt_bi_ortho - call get_cisd_sc2_dressing(psi_det,e_corr_dets,N_det,dressing_dets) - do i = 1, N_det -! print*,'dressing_dets(i) = ',dressing_dets(i) - h_sc2(i,i) += dressing_dets(i) - enddo - print*,'********************' - print*,'iteration ',it -! call non_hrmt_real_diag(N_det,h_sc2,& -! leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& -! n_real,eigval_right_tmp) -! print*,'eigval_right_tmp(1)',eigval_right_tmp(1) - vec_tmp = 0.d0 - do istate = 1, N_states - vec_tmp(:,istate) = reigvec_tc_bi_orth_tmp(:,istate) - enddo - do istate = N_states+1, n_states_diag - vec_tmp(istate,istate) = 1.d0 - enddo - call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow) - print*,'outside Davidson' - print*,'eigval_tmp(1) = ',eigval_tmp(1) - do i = 1, N_det - reigvec_tc_bi_orth_tmp(i,1) = vec_tmp(i,1) - e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1) - enddo -! E_current = eigval_right_tmp(1) - E_current = eigval_tmp(1) - print*,'it, E(SC)^2 = ',it,E_current - enddo - eigval_tc_cisd_sc2_bi_ortho(1:N_states) = eigval_right_tmp(1:N_states) - reigvec_tc_cisd_sc2_bi_ortho(1:N_det,1:N_states) = reigvec_tc_bi_orth_tmp(1:N_det,1:N_states) - leigvec_tc_cisd_sc2_bi_ortho(1:N_det,1:N_states) = leigvec_tc_bi_orth_tmp(1:N_det,1:N_states) - -END_PROVIDER - -subroutine get_cisd_sc2_dressing(dets,e_corr_dets,ndet,dressing_dets) - implicit none - use bitmasks - integer, intent(in) :: ndet - integer(bit_kind), intent(in) :: dets(N_int,2,ndet) - double precision, intent(in) :: e_corr_dets(ndet) - double precision, intent(out) :: dressing_dets(ndet) - integer, allocatable :: degree(:),hole(:,:),part(:,:),spin(:,:) - integer(bit_kind), allocatable :: hole_part(:,:,:) - integer :: i,j,k, exc(0:2,2,2),h1,p1,h2,p2,s1,s2 - integer(bit_kind) :: xorvec(2,N_int) - - double precision :: phase - dressing_dets = 0.d0 - allocate(degree(ndet),hole(2,ndet),part(2,ndet), spin(2,ndet),hole_part(N_int,2,ndet)) - do i = 2, ndet - call get_excitation_degree(HF_bitmask,dets(1,1,i),degree(i),N_int) - do j = 1, N_int - hole_part(j,1,i) = xor( HF_bitmask(j,1), dets(j,1,i)) - hole_part(j,2,i) = xor( HF_bitmask(j,2), dets(j,2,i)) - enddo - if(degree(i) == 1)then - call get_single_excitation(HF_bitmask,psi_det(1,1,i),exc,phase,N_int) - else if(degree(i) == 2)then - call get_double_excitation(HF_bitmask,psi_det(1,1,i),exc,phase,N_int) - endif - call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) - hole(1,i) = h1 - hole(2,i) = h2 - part(1,i) = p1 - part(2,i) = p2 - spin(1,i) = s1 - spin(2,i) = s2 - enddo - - integer :: same - if(elec_alpha_num+elec_beta_num<3)return - do i = 2, ndet - do j = i+1, ndet - same = 0 - if(degree(i) == degree(j) .and. degree(i)==1)cycle - do k = 1, N_int - xorvec(k,1) = iand(hole_part(k,1,i),hole_part(k,1,j)) - xorvec(k,2) = iand(hole_part(k,2,i),hole_part(k,2,j)) - same += popcnt(xorvec(k,1)) + popcnt(xorvec(k,2)) - enddo -! print*,'i,j',i,j -! call debug_det(dets(1,1,i),N_int) -! call debug_det(hole_part(1,1,i),N_int) -! call debug_det(dets(1,1,j),N_int) -! call debug_det(hole_part(1,1,j),N_int) -! print*,'same = ',same - if(same.eq.0)then - dressing_dets(i) += e_corr_dets(j) - dressing_dets(j) += e_corr_dets(i) - endif - enddo - enddo - -end diff --git a/plugins/local/tc_bi_ortho/test_s2_tc.irp.f b/plugins/local/tc_bi_ortho/test_s2_tc.irp.f deleted file mode 100644 index 7c70b119..00000000 --- a/plugins/local/tc_bi_ortho/test_s2_tc.irp.f +++ /dev/null @@ -1,170 +0,0 @@ - -! --- - -program test_tc - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - call provide_all_three_ints_bi_ortho() - call routine_h_triple_left - call routine_h_triple_right -! call routine_test_s2_davidson - -end - -subroutine routine_h_triple_right - implicit none - logical :: do_right - integer :: sze ,i, N_st, j - double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 - double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) - double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) - sze = N_det - N_st = 1 - allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) - print*,'Checking first the Right ' - do i = 1, sze - u_0(i,1) = psi_r_coef_bi_ortho(i,1) - enddo - double precision :: wall0,wall1 - call wall_time(wall0) - call H_tc_s2_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) - call wall_time(wall1) - print*,'time for omp',wall1 - wall0 - call wall_time(wall0) - call H_tc_s2_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) - call wall_time(wall1) - print*,'time serial ',wall1 - wall0 - accu_e = 0.d0 - accu_s = 0.d0 - do i = 1, sze - accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) - accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) - enddo - print*,'accu_e = ',accu_e - print*,'accu_s = ',accu_s - -end - -subroutine routine_h_triple_left - implicit none - logical :: do_right - integer :: sze ,i, N_st, j - double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 - double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) - double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) - sze = N_det - N_st = 1 - allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) - print*,'Checking the Left ' - do i = 1, sze - u_0(i,1) = psi_l_coef_bi_ortho(i,1) - enddo - double precision :: wall0,wall1 - call wall_time(wall0) - call H_tc_s2_dagger_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) - call wall_time(wall1) - print*,'time for omp',wall1 - wall0 - call wall_time(wall0) - call H_tc_s2_dagger_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) - call wall_time(wall1) - print*,'time serial ',wall1 - wall0 - accu_e = 0.d0 - accu_s = 0.d0 - do i = 1, sze - accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) - accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) - enddo - print*,'accu_e = ',accu_e - print*,'accu_s = ',accu_s - -end - - -subroutine routine_test_s2_davidson - implicit none - double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) , s2(:) - integer :: i,istate - logical :: converged - external H_tc_s2_dagger_u_0_opt - external H_tc_s2_u_0_opt - allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag), s2(n_states_diag)) - do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) - enddo - ! Preparing the left-eigenvector - print*,'Computing the left-eigenvector ' - vec_tmp = 0.d0 - do istate = 1, N_states - vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) - enddo - do istate = N_states+1, n_states_diag - vec_tmp(istate,istate) = 1.d0 - enddo - do istate = 1, N_states - leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) - enddo - integer :: n_it_max - n_it_max = 1 - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) - double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) - integer :: sze,N_st - logical :: do_right - sze = N_det - N_st = 1 - do_right = .False. - allocate(s_0_new(N_det,1),v_0_new(N_det,1)) - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) - double precision :: accu_e_0, accu_s_0 - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 - do i = 1, sze - accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) - accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) - enddo - print*,'energies = ',energies - print*,'s2 = ',s2 - print*,'accu_e_0',accu_e_0 - print*,'accu_s_0',accu_s_0 - - ! Preparing the right-eigenvector - print*,'Computing the right-eigenvector ' - vec_tmp = 0.d0 - do istate = 1, N_states - vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate) - enddo - do istate = N_states+1, n_states_diag - vec_tmp(istate,istate) = 1.d0 - enddo - do istate = 1, N_states - leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) - enddo - n_it_max = 1 - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) - sze = N_det - N_st = 1 - do_right = .True. - v_0_new = 0.d0 - s_0_new = 0.d0 - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 - do i = 1, sze - accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) - accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) - enddo - print*,'energies = ',energies - print*,'s2 = ',s2 - print*,'accu_e_0',accu_e_0 - print*,'accu_s_0',accu_s_0 - -end From 2a8b9e544b8c9f47ce55dd8f0c4e7df5b0a67ea1 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Tue, 7 May 2024 01:56:14 +0200 Subject: [PATCH 031/131] working on aos debug --- plugins/local/non_h_ints_mu/deb_aos.irp.f | 16 +- src/ao_basis/aos_in_r.irp.f | 508 ++++++++++++---------- 2 files changed, 285 insertions(+), 239 deletions(-) diff --git a/plugins/local/non_h_ints_mu/deb_aos.irp.f b/plugins/local/non_h_ints_mu/deb_aos.irp.f index c9bc9c9a..a84e1b91 100644 --- a/plugins/local/non_h_ints_mu/deb_aos.irp.f +++ b/plugins/local/non_h_ints_mu/deb_aos.irp.f @@ -31,12 +31,14 @@ subroutine print_aos() integer :: i, ipoint double precision :: r(3) double precision :: ao_val, ao_der(3), ao_lap + double precision :: mo_val, mo_der(3), mo_lap PROVIDE final_grid_points aos_in_r_array aos_grad_in_r_array aos_lapl_in_r_array + write(1000, *) n_points_final_grid do ipoint = 1, n_points_final_grid r(:) = final_grid_points(:,ipoint) - print*, r + write(1000, '(3(f15.7, 3X))') r enddo do ipoint = 1, n_points_final_grid @@ -45,7 +47,17 @@ subroutine print_aos() ao_val = aos_in_r_array (i,ipoint) ao_der(:) = aos_grad_in_r_array(i,ipoint,:) ao_lap = aos_lapl_in_r_array(1,i,ipoint) + aos_lapl_in_r_array(2,i,ipoint) + aos_lapl_in_r_array(3,i,ipoint) - write(*, '(5(f15.7, 3X))') ao_val, ao_der, ao_lap + write(1010, '(5(f15.7, 3X))') ao_val, ao_der, ao_lap + enddo + enddo + + do ipoint = 1, n_points_final_grid + r(:) = final_grid_points(:,ipoint) + do i = 1, mo_num + mo_val = mos_in_r_array (i,ipoint) + mo_der(:) = mos_grad_in_r_array(i,ipoint,:) + mo_lap = mos_lapl_in_r_array(i,ipoint,1) + mos_lapl_in_r_array(i,ipoint,2) + mos_lapl_in_r_array(i,ipoint,3) + write(2010, '(5(f15.7, 3X))') mo_val, mo_der, mo_lap enddo enddo diff --git a/src/ao_basis/aos_in_r.irp.f b/src/ao_basis/aos_in_r.irp.f index 1b1595a3..053c86a2 100644 --- a/src/ao_basis/aos_in_r.irp.f +++ b/src/ao_basis/aos_in_r.irp.f @@ -1,67 +1,76 @@ -double precision function ao_value(i,r) - implicit none - BEGIN_DOC -! Returns the value of the i-th ao at point $\textbf{r}$ - END_DOC - double precision, intent(in) :: r(3) - integer, intent(in) :: i - integer :: m,num_ao - double precision :: center_ao(3) - double precision :: beta - integer :: power_ao(3) - double precision :: accu,dx,dy,dz,r2 - num_ao = ao_nucl(i) - power_ao(1:3)= ao_power(i,1:3) - center_ao(1:3) = nucl_coord(num_ao,1:3) - dx = (r(1) - center_ao(1)) - dy = (r(2) - center_ao(2)) - dz = (r(3) - center_ao(3)) - r2 = dx*dx + dy*dy + dz*dz - dx = dx**power_ao(1) - dy = dy**power_ao(2) - dz = dz**power_ao(3) +! --- - accu = 0.d0 - do m=1,ao_prim_num(i) - beta = ao_expo_ordered_transp(m,i) - accu += ao_coef_normalized_ordered_transp(m,i) * dexp(-beta*r2) - enddo - ao_value = accu * dx * dy * dz +double precision function ao_value(i, r) + + BEGIN_DOC + ! Returns the value of the i-th ao at point $\textbf{r}$ + END_DOC + + implicit none + integer, intent(in) :: i + double precision, intent(in) :: r(3) + + integer :: m, num_ao + integer :: power_ao(3) + double precision :: center_ao(3) + double precision :: beta + double precision :: accu, dx, dy, dz, r2 + + num_ao = ao_nucl(i) + power_ao(1:3) = ao_power(i,1:3) + center_ao(1:3) = nucl_coord(num_ao,1:3) + dx = r(1) - center_ao(1) + dy = r(2) - center_ao(2) + dz = r(3) - center_ao(3) + r2 = dx*dx + dy*dy + dz*dz + dx = dx**power_ao(1) + dy = dy**power_ao(2) + dz = dz**power_ao(3) + + accu = 0.d0 + do m = 1, ao_prim_num(i) + beta = ao_expo_ordered_transp(m,i) + accu += ao_coef_normalized_ordered_transp(m,i) * dexp(-beta*r2) + enddo + ao_value = accu * dx * dy * dz end -double precision function primitive_value(i,j,r) - implicit none - BEGIN_DOC -! Returns the value of the j-th primitive of the i-th |AO| at point $\textbf{r} -! **without the coefficient** - END_DOC - double precision, intent(in) :: r(3) - integer, intent(in) :: i,j +double precision function primitive_value(i, j, r) - integer :: m,num_ao - double precision :: center_ao(3) - double precision :: beta - integer :: power_ao(3) - double precision :: accu,dx,dy,dz,r2 - num_ao = ao_nucl(i) - power_ao(1:3)= ao_power(i,1:3) - center_ao(1:3) = nucl_coord(num_ao,1:3) - dx = (r(1) - center_ao(1)) - dy = (r(2) - center_ao(2)) - dz = (r(3) - center_ao(3)) - r2 = dx*dx + dy*dy + dz*dz - dx = dx**power_ao(1) - dy = dy**power_ao(2) - dz = dz**power_ao(3) + BEGIN_DOC + ! Returns the value of the j-th primitive of the i-th |AO| at point $\textbf{r} + ! **without the coefficient** + END_DOC - accu = 0.d0 - m=j - beta = ao_expo_ordered_transp(m,i) - accu += dexp(-beta*r2) - primitive_value = accu * dx * dy * dz + implicit none + integer, intent(in) :: i, j + double precision, intent(in) :: r(3) + + integer :: m, num_ao + integer :: power_ao(3) + double precision :: center_ao(3) + double precision :: beta + double precision :: accu, dx, dy, dz, r2 + + num_ao = ao_nucl(i) + power_ao(1:3)= ao_power(i,1:3) + center_ao(1:3) = nucl_coord(num_ao,1:3) + dx = r(1) - center_ao(1) + dy = r(2) - center_ao(2) + dz = r(3) - center_ao(3) + r2 = dx*dx + dy*dy + dz*dz + dx = dx**power_ao(1) + dy = dy**power_ao(2) + dz = dz**power_ao(3) + + accu = 0.d0 + m = j + beta = ao_expo_ordered_transp(m,i) + accu += dexp(-beta*r2) + primitive_value = accu * dx * dy * dz end @@ -104,9 +113,9 @@ subroutine give_all_aos_at_r(r, tmp_array) dz2 = dz**p_ao(3) tmp_array(k) = 0.d0 - do l = 1,ao_prim_num(k) + do l = 1, ao_prim_num(k) beta = ao_expo_ordered_transp_per_nucl(l,j,i) - if(dabs(beta*r2).gt.40.d0) cycle + if(beta*r2.gt.50.d0) cycle tmp_array(k) += ao_coef_normalized_ordered_transp_per_nucl(l,j,i) * dexp(-beta*r2) enddo @@ -120,207 +129,232 @@ end ! --- -subroutine give_all_aos_and_grad_at_r(r,aos_array,aos_grad_array) - implicit none - BEGIN_DOC -! input : r(1) ==> r(1) = x, r(2) = y, r(3) = z -! -! output : -! -! * aos_array(i) = ao(i) evaluated at ro -! * aos_grad_array(1,i) = gradient X of the ao(i) evaluated at $\textbf{r}$ -! - END_DOC - double precision, intent(in) :: r(3) - double precision, intent(out) :: aos_array(ao_num) - double precision, intent(out) :: aos_grad_array(3,ao_num) +subroutine give_all_aos_and_grad_at_r(r, aos_array, aos_grad_array) - integer :: power_ao(3) - integer :: i,j,k,l,m - double precision :: dx,dy,dz,r2 - double precision :: dx2,dy2,dz2 - double precision :: dx1,dy1,dz1 - double precision :: center_ao(3) - double precision :: beta,accu_1,accu_2,contrib - do i = 1, nucl_num - center_ao(1:3) = nucl_coord(i,1:3) - dx = (r(1) - center_ao(1)) - dy = (r(2) - center_ao(2)) - dz = (r(3) - center_ao(3)) - r2 = dx*dx + dy*dy + dz*dz - do j = 1,Nucl_N_Aos(i) - k = Nucl_Aos_transposed(j,i) ! index of the ao in the ordered format - aos_array(k) = 0.d0 - aos_grad_array(1,k) = 0.d0 - aos_grad_array(2,k) = 0.d0 - aos_grad_array(3,k) = 0.d0 - power_ao(1:3)= ao_power_ordered_transp_per_nucl(1:3,j,i) - dx2 = dx**power_ao(1) - dy2 = dy**power_ao(2) - dz2 = dz**power_ao(3) - if(power_ao(1) .ne. 0)then - dx1 = dble(power_ao(1)) * dx**(power_ao(1)-1) - else - dx1 = 0.d0 - endif - if(power_ao(2) .ne. 0)then - dy1 = dble(power_ao(2)) * dy**(power_ao(2)-1) - else - dy1 = 0.d0 - endif - if(power_ao(3) .ne. 0)then - dz1 = dble(power_ao(3)) * dz**(power_ao(3)-1) - else - dz1 = 0.d0 - endif - accu_1 = 0.d0 - accu_2 = 0.d0 - do l = 1,ao_prim_num(k) - beta = ao_expo_ordered_transp_per_nucl(l,j,i) - contrib = 0.d0 - if(beta*r2.gt.50.d0)cycle - contrib = ao_coef_normalized_ordered_transp_per_nucl(l,j,i) * dexp(-beta*r2) - accu_1 += contrib - accu_2 += contrib * beta - enddo - aos_array(k) = accu_1 * dx2 * dy2 * dz2 - aos_grad_array(1,k) = accu_1 * dx1 * dy2 * dz2- 2.d0 * dx2 * dx * dy2 * dz2 * accu_2 - aos_grad_array(2,k) = accu_1 * dx2 * dy1 * dz2- 2.d0 * dx2 * dy2 * dy * dz2 * accu_2 - aos_grad_array(3,k) = accu_1 * dx2 * dy2 * dz1- 2.d0 * dx2 * dy2 * dz2 * dz * accu_2 + BEGIN_DOC + ! + ! input : r(1) ==> r(1) = x, r(2) = y, r(3) = z + ! + ! output : + ! + ! * aos_array(i) = ao(i) evaluated at ro + ! * aos_grad_array(1,i) = gradient X of the ao(i) evaluated at $\textbf{r}$ + ! + END_DOC + + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out) :: aos_array(ao_num) + double precision, intent(out) :: aos_grad_array(3,ao_num) + + integer :: power_ao(3) + integer :: i, j, k, l, m + double precision :: dx, dy, dz, r2 + double precision :: dx1, dy1, dz1 + double precision :: dx2, dy2, dz2 + double precision :: center_ao(3) + double precision :: beta, accu_1, accu_2, contrib + + do i = 1, nucl_num + + center_ao(1:3) = nucl_coord(i,1:3) + + dx = r(1) - center_ao(1) + dy = r(2) - center_ao(2) + dz = r(3) - center_ao(3) + r2 = dx*dx + dy*dy + dz*dz + + do j = 1, Nucl_N_Aos(i) + + k = Nucl_Aos_transposed(j,i) ! index of the ao in the ordered format + + aos_array(k) = 0.d0 + aos_grad_array(1,k) = 0.d0 + aos_grad_array(2,k) = 0.d0 + aos_grad_array(3,k) = 0.d0 + + power_ao(1:3) = ao_power_ordered_transp_per_nucl(1:3,j,i) + dx2 = dx**power_ao(1) + dy2 = dy**power_ao(2) + dz2 = dz**power_ao(3) + + dx1 = 0.d0 + if(power_ao(1) .ne. 0) then + dx1 = dble(power_ao(1)) * dx**(power_ao(1)-1) + endif + + dy1 = 0.d0 + if(power_ao(2) .ne. 0) then + dy1 = dble(power_ao(2)) * dy**(power_ao(2)-1) + endif + + dz1 = 0.d0 + if(power_ao(3) .ne. 0) then + dz1 = dble(power_ao(3)) * dz**(power_ao(3)-1) + endif + + accu_1 = 0.d0 + accu_2 = 0.d0 + do l = 1, ao_prim_num(k) + beta = ao_expo_ordered_transp_per_nucl(l,j,i) + if(beta*r2.gt.50.d0) cycle + contrib = ao_coef_normalized_ordered_transp_per_nucl(l,j,i) * dexp(-beta*r2) + accu_1 += contrib + accu_2 += contrib * beta + enddo + + aos_array(k) = accu_1 * dx2 * dy2 * dz2 + aos_grad_array(1,k) = accu_1 * dx1 * dy2 * dz2 - 2.d0 * dx2 * dx * dy2 * dz2 * accu_2 + aos_grad_array(2,k) = accu_1 * dx2 * dy1 * dz2 - 2.d0 * dx2 * dy2 * dy * dz2 * accu_2 + aos_grad_array(3,k) = accu_1 * dx2 * dy2 * dz1 - 2.d0 * dx2 * dy2 * dz2 * dz * accu_2 + enddo enddo - enddo + end +! --- -subroutine give_all_aos_and_grad_and_lapl_at_r(r,aos_array,aos_grad_array,aos_lapl_array) - implicit none - BEGIN_DOC -! input : r(1) ==> r(1) = x, r(2) = y, r(3) = z -! -! output : -! -! * aos_array(i) = ao(i) evaluated at $\textbf{r}$ -! * aos_grad_array(1,i) = $\nabla_x$ of the ao(i) evaluated at $\textbf{r}$ - END_DOC - double precision, intent(in) :: r(3) - double precision, intent(out) :: aos_array(ao_num) - double precision, intent(out) :: aos_grad_array(3,ao_num) - double precision, intent(out) :: aos_lapl_array(3,ao_num) +subroutine give_all_aos_and_grad_and_lapl_at_r(r, aos_array, aos_grad_array, aos_lapl_array) - integer :: power_ao(3) - integer :: i,j,k,l,m - double precision :: dx,dy,dz,r2 - double precision :: dx2,dy2,dz2 - double precision :: dx1,dy1,dz1 - double precision :: dx3,dy3,dz3 - double precision :: dx4,dy4,dz4 - double precision :: dx5,dy5,dz5 - double precision :: center_ao(3) - double precision :: beta,accu_1,accu_2,accu_3,contrib - do i = 1, nucl_num - center_ao(1:3) = nucl_coord(i,1:3) - dx = (r(1) - center_ao(1)) - dy = (r(2) - center_ao(2)) - dz = (r(3) - center_ao(3)) - r2 = dx*dx + dy*dy + dz*dz - do j = 1,Nucl_N_Aos(i) - k = Nucl_Aos_transposed(j,i) ! index of the ao in the ordered format - aos_array(k) = 0.d0 - aos_grad_array(1,k) = 0.d0 - aos_grad_array(2,k) = 0.d0 - aos_grad_array(3,k) = 0.d0 + BEGIN_DOC + ! + ! input : r(1) ==> r(1) = x, r(2) = y, r(3) = z + ! + ! output : + ! + ! * aos_array(i) = ao(i) evaluated at $\textbf{r}$ + ! * aos_grad_array(1,i) = $\nabla_x$ of the ao(i) evaluated at $\textbf{r}$ + ! + END_DOC - aos_lapl_array(1,k) = 0.d0 - aos_lapl_array(2,k) = 0.d0 - aos_lapl_array(3,k) = 0.d0 + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out) :: aos_array(ao_num) + double precision, intent(out) :: aos_grad_array(3,ao_num) + double precision, intent(out) :: aos_lapl_array(3,ao_num) - power_ao(1:3)= ao_power_ordered_transp_per_nucl(1:3,j,i) - dx2 = dx**power_ao(1) - dy2 = dy**power_ao(2) - dz2 = dz**power_ao(3) - if(power_ao(1) .ne. 0)then - dx1 = dble(power_ao(1)) * dx**(power_ao(1)-1) - else - dx1 = 0.d0 - endif - ! For the Laplacian - if(power_ao(1) .ge. 2)then - dx3 = dble(power_ao(1)) * dble((power_ao(1)-1)) * dx**(power_ao(1)-2) - else - dx3 = 0.d0 - endif - if(power_ao(1) .ge. 1)then - dx4 = dble((2 * power_ao(1) + 1)) * dx**(power_ao(1)) - else - dx4 = dble((power_ao(1) + 1)) * dx**(power_ao(1)) - endif + integer :: power_ao(3) + integer :: i, j, k, l, m + double precision :: dx, dy, dz, r2 + double precision :: dx1, dy1, dz1 + double precision :: dx2, dy2, dz2 + double precision :: dx3, dy3, dz3 + double precision :: dx4, dy4, dz4 + double precision :: dx5, dy5, dz5 + double precision :: center_ao(3) + double precision :: beta, accu_1, accu_2, accu_3, contrib - dx5 = dx**(power_ao(1)+2) + do i = 1, nucl_num - if(power_ao(2) .ne. 0)then - dy1 = dble(power_ao(2)) * dy**(power_ao(2)-1) - else - dy1 = 0.d0 - endif - ! For the Laplacian - if(power_ao(2) .ge. 2)then - dy3 = dble(power_ao(2)) * dble((power_ao(2)-1)) * dy**(power_ao(2)-2) - else - dy3 = 0.d0 - endif + center_ao(1:3) = nucl_coord(i,1:3) - if(power_ao(2) .ge. 1)then - dy4 = dble((2 * power_ao(2) + 1)) * dy**(power_ao(2)) - else - dy4 = dble((power_ao(2) + 1)) * dy**(power_ao(2)) - endif + dx = r(1) - center_ao(1) + dy = r(2) - center_ao(2) + dz = r(3) - center_ao(3) + r2 = dx*dx + dy*dy + dz*dz + + do j = 1, Nucl_N_Aos(i) - dy5 = dy**(power_ao(2)+2) + k = Nucl_Aos_transposed(j,i) ! index of the ao in the ordered format + aos_array(k) = 0.d0 + aos_grad_array(1,k) = 0.d0 + aos_grad_array(2,k) = 0.d0 + aos_grad_array(3,k) = 0.d0 + aos_lapl_array(1,k) = 0.d0 + aos_lapl_array(2,k) = 0.d0 + aos_lapl_array(3,k) = 0.d0 + + power_ao(1:3)= ao_power_ordered_transp_per_nucl(1:3,j,i) + dx2 = dx**power_ao(1) + dy2 = dy**power_ao(2) + dz2 = dz**power_ao(3) - if(power_ao(3) .ne. 0)then - dz1 = dble(power_ao(3)) * dz**(power_ao(3)-1) - else - dz1 = 0.d0 - endif - ! For the Laplacian - if(power_ao(3) .ge. 2)then - dz3 = dble(power_ao(3)) * dble((power_ao(3)-1)) * dz**(power_ao(3)-2) - else - dz3 = 0.d0 - endif + ! --- - if(power_ao(3) .ge. 1)then - dz4 = dble((2 * power_ao(3) + 1)) * dz**(power_ao(3)) - else - dz4 = dble((power_ao(3) + 1)) * dz**(power_ao(3)) - endif + dx1 = 0.d0 + if(power_ao(1) .ne. 0) then + dx1 = dble(power_ao(1)) * dx**(power_ao(1)-1) + endif - dz5 = dz**(power_ao(3)+2) + dx3 = 0.d0 + if(power_ao(1) .ge. 2) then + dx3 = dble(power_ao(1)) * dble((power_ao(1)-1)) * dx**(power_ao(1)-2) + endif + if(power_ao(1) .ge. 1) then + dx4 = dble((2 * power_ao(1) + 1)) * dx**(power_ao(1)) + else + dx4 = dble((power_ao(1) + 1)) * dx**(power_ao(1)) + endif + + dx5 = dx**(power_ao(1)+2) + + ! --- + + dy1 = 0.d0 + if(power_ao(2) .ne. 0) then + dy1 = dble(power_ao(2)) * dy**(power_ao(2)-1) + endif - accu_1 = 0.d0 - accu_2 = 0.d0 - accu_3 = 0.d0 - do l = 1,ao_prim_num(k) - beta = ao_expo_ordered_transp_per_nucl(l,j,i) - contrib = ao_coef_normalized_ordered_transp_per_nucl(l,j,i) * dexp(-beta*r2) - accu_1 += contrib - accu_2 += contrib * beta - accu_3 += contrib * beta**2 - enddo - aos_array(k) = accu_1 * dx2 * dy2 * dz2 + dy3 = 0.d0 + if(power_ao(2) .ge. 2) then + dy3 = dble(power_ao(2)) * dble((power_ao(2)-1)) * dy**(power_ao(2)-2) + endif + + if(power_ao(2) .ge. 1) then + dy4 = dble((2 * power_ao(2) + 1)) * dy**(power_ao(2)) + else + dy4 = dble((power_ao(2) + 1)) * dy**(power_ao(2)) + endif + + dy5 = dy**(power_ao(2)+2) - aos_grad_array(1,k) = accu_1 * dx1 * dy2 * dz2- 2.d0 * dx2 * dx * dy2 * dz2 * accu_2 - aos_grad_array(2,k) = accu_1 * dx2 * dy1 * dz2- 2.d0 * dx2 * dy2 * dy * dz2 * accu_2 - aos_grad_array(3,k) = accu_1 * dx2 * dy2 * dz1- 2.d0 * dx2 * dy2 * dz2 * dz * accu_2 + ! --- + + dz1 = 0.d0 + if(power_ao(3) .ne. 0) then + dz1 = dble(power_ao(3)) * dz**(power_ao(3)-1) + endif - aos_lapl_array(1,k) = accu_1 * dx3 * dy2 * dz2- 2.d0 * dx4 * dy2 * dz2* accu_2 +4.d0 * dx5 *dy2 * dz2* accu_3 - aos_lapl_array(2,k) = accu_1 * dx2 * dy3 * dz2- 2.d0 * dx2 * dy4 * dz2* accu_2 +4.d0 * dx2 *dy5 * dz2* accu_3 - aos_lapl_array(3,k) = accu_1 * dx2 * dy2 * dz3- 2.d0 * dx2 * dy2 * dz4* accu_2 +4.d0 * dx2 *dy2 * dz5* accu_3 + dz3 = 0.d0 + if(power_ao(3) .ge. 2) then + dz3 = dble(power_ao(3)) * dble((power_ao(3)-1)) * dz**(power_ao(3)-2) + endif + + if(power_ao(3) .ge. 1) then + dz4 = dble((2 * power_ao(3) + 1)) * dz**(power_ao(3)) + else + dz4 = dble((power_ao(3) + 1)) * dz**(power_ao(3)) + endif + + dz5 = dz**(power_ao(3)+2) + + ! --- + + accu_1 = 0.d0 + accu_2 = 0.d0 + accu_3 = 0.d0 + do l = 1,ao_prim_num(k) + beta = ao_expo_ordered_transp_per_nucl(l,j,i) + if(beta*r2.gt.50.d0) cycle + contrib = ao_coef_normalized_ordered_transp_per_nucl(l,j,i) * dexp(-beta*r2) + accu_1 += contrib + accu_2 += contrib * beta + accu_3 += contrib * beta**2 + enddo + aos_array(k) = accu_1 * dx2 * dy2 * dz2 + aos_grad_array(1,k) = accu_1 * dx1 * dy2 * dz2 - 2.d0 * dx2 * dx * dy2 * dz2 * accu_2 + aos_grad_array(2,k) = accu_1 * dx2 * dy1 * dz2 - 2.d0 * dx2 * dy2 * dy * dz2 * accu_2 + aos_grad_array(3,k) = accu_1 * dx2 * dy2 * dz1 - 2.d0 * dx2 * dy2 * dz2 * dz * accu_2 + aos_lapl_array(1,k) = accu_1 * dx3 * dy2 * dz2 - 2.d0 * dx4 * dy2 * dz2 * accu_2 + 4.d0 * dx5 * dy2 * dz2 * accu_3 + aos_lapl_array(2,k) = accu_1 * dx2 * dy3 * dz2 - 2.d0 * dx2 * dy4 * dz2 * accu_2 + 4.d0 * dx2 * dy5 * dz2 * accu_3 + aos_lapl_array(3,k) = accu_1 * dx2 * dy2 * dz3 - 2.d0 * dx2 * dy2 * dz4 * accu_2 + 4.d0 * dx2 * dy2 * dz5 * accu_3 + enddo enddo - enddo + end +! --- From 17ae4d8fe2f103bac46205380ae0e6a33736de71 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 7 May 2024 18:27:09 +0200 Subject: [PATCH 032/131] added tc_progs --- .../local/cipsi_tc_bi_ortho/selection.irp.f | 4 +- ..._bi_ortho.irp.f => diagonalize_tc_h.irp.f} | 0 plugins/local/tc_bi_ortho/test_natorb.irp.f | 64 ------- .../local/tc_bi_ortho/test_normal_order.irp.f | 173 ------------------ plugins/local/tc_bi_ortho/test_tc_fock.irp.f | 171 ----------------- plugins/local/tc_progs/NEED | 1 + .../print_he_tc_energy.irp.f | 0 .../print_tc_dump.irp.f | 0 .../print_tc_energy.irp.f | 0 .../print_tc_spin_dens.irp.f | 0 .../print_tc_var.irp.f | 0 .../print_tc_wf.irp.f | 0 .../save_bitcpsileft_for_qmcchem.irp.f | 0 .../save_tc_bi_ortho_nat.irp.f | 0 .../select_dets_bi_ortho.irp.f | 0 .../tc_bi_ortho_prop.irp.f | 0 .../{tc_bi_ortho => tc_progs}/tc_som.irp.f | 0 .../test_tc_two_rdm.irp.f | 0 18 files changed, 3 insertions(+), 410 deletions(-) rename plugins/local/tc_bi_ortho/{tc_bi_ortho.irp.f => diagonalize_tc_h.irp.f} (100%) delete mode 100644 plugins/local/tc_bi_ortho/test_natorb.irp.f delete mode 100644 plugins/local/tc_bi_ortho/test_normal_order.irp.f delete mode 100644 plugins/local/tc_bi_ortho/test_tc_fock.irp.f create mode 100644 plugins/local/tc_progs/NEED rename plugins/local/{tc_bi_ortho => tc_progs}/print_he_tc_energy.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/print_tc_dump.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/print_tc_energy.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/print_tc_spin_dens.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/print_tc_var.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/print_tc_wf.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/save_bitcpsileft_for_qmcchem.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/save_tc_bi_ortho_nat.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/select_dets_bi_ortho.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/tc_bi_ortho_prop.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/tc_som.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/test_tc_two_rdm.irp.f (100%) diff --git a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f index 12163e06..0b4345d5 100644 --- a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f @@ -892,8 +892,8 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d psi_h_alpha = 0.d0 alpha_h_psi = 0.d0 do iii = 1, N_det_selectors - call htilde_mu_mat_bi_ortho_tot_slow(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_bi_ortho_tot_slow(det, psi_selectors(1,1,iii), N_int, alpha_h_i) + call htilde_mu_mat_opt_bi_ortho_tot(psi_selectors(1,1,iii), det, N_int, i_h_alpha) + call htilde_mu_mat_opt_bi_ortho_tot(det, psi_selectors(1,1,iii), N_int, alpha_h_i) call get_excitation_degree(psi_selectors(1,1,iii), det,degree,N_int) if(degree == 0)then print*,'problem !!!' diff --git a/plugins/local/tc_bi_ortho/tc_bi_ortho.irp.f b/plugins/local/tc_bi_ortho/diagonalize_tc_h.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/tc_bi_ortho.irp.f rename to plugins/local/tc_bi_ortho/diagonalize_tc_h.irp.f diff --git a/plugins/local/tc_bi_ortho/test_natorb.irp.f b/plugins/local/tc_bi_ortho/test_natorb.irp.f deleted file mode 100644 index 5b8801f7..00000000 --- a/plugins/local/tc_bi_ortho/test_natorb.irp.f +++ /dev/null @@ -1,64 +0,0 @@ - -! --- - -program test_natorb - - BEGIN_DOC - ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. - END_DOC - - implicit none - - print *, 'Hello world' - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - call routine() - ! call test() - -end - -! --- - -subroutine routine() - - implicit none - double precision, allocatable :: fock_diag(:),eigval(:),leigvec(:,:),reigvec(:,:),mat_ref(:,:) - allocate(eigval(mo_num),leigvec(mo_num,mo_num),reigvec(mo_num,mo_num),fock_diag(mo_num),mat_ref(mo_num, mo_num)) - double precision, allocatable :: eigval_ref(:),leigvec_ref(:,:),reigvec_ref(:,:) - allocate(eigval_ref(mo_num),leigvec_ref(mo_num,mo_num),reigvec_ref(mo_num,mo_num)) - - double precision :: thr_deg - integer :: i,n_real,j - print*,'fock_matrix' - do i = 1, mo_num - fock_diag(i) = Fock_matrix_mo(i,i) - print*,i,fock_diag(i) - enddo - thr_deg = 1.d-6 - mat_ref = -one_e_dm_mo - print*,'diagonalization by block' - call diag_mat_per_fock_degen(fock_diag,mat_ref,mo_num,thr_deg,leigvec,reigvec,eigval) - call non_hrmt_bieig( mo_num, mat_ref& - , leigvec_ref, reigvec_ref& - , n_real, eigval_ref) - print*,'TEST ***********************************' - double precision :: accu_l, accu_r - do i = 1, mo_num - accu_l = 0.d0 - accu_r = 0.d0 - do j = 1, mo_num - accu_r += reigvec_ref(j,i) * reigvec(j,i) - accu_l += leigvec_ref(j,i) * leigvec(j,i) - enddo - print*,i - write(*,'(I3,X,100(F16.10,X))')i,eigval(i),eigval_ref(i),accu_l,accu_r - enddo -end diff --git a/plugins/local/tc_bi_ortho/test_normal_order.irp.f b/plugins/local/tc_bi_ortho/test_normal_order.irp.f deleted file mode 100644 index 7b4c558f..00000000 --- a/plugins/local/tc_bi_ortho/test_normal_order.irp.f +++ /dev/null @@ -1,173 +0,0 @@ - -! --- - -program test_normal_order - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - print *, 'Hello world' - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - call provide_all_three_ints_bi_ortho() - call test() - -end - -! --- - -subroutine test - implicit none - use bitmasks ! you need to include the bitmasks_module.f90 features - integer :: h1,h2,p1,p2,s1,s2,i_ok,degree,Ne(2) - integer :: exc(0:2,2,2) - integer(bit_kind), allocatable :: det_i(:,:) - double precision :: hmono,htwoe,hthree,htilde_ij,accu,phase,normal,hthree_tmp - integer, allocatable :: occ(:,:) - allocate( occ(N_int*bit_kind_size,2) ) - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - allocate(det_i(N_int,2)) - s1 = 1 - s2 = 2 - accu = 0.d0 - do h1 = 1, elec_beta_num - do p1 = elec_alpha_num+1, mo_num - do h2 = 1, elec_beta_num - do p2 = elec_beta_num+1, mo_num - hthree = 0.d0 - - det_i = ref_bitmask - s1 = 1 - s2 = 2 - call do_single_excitation(det_i,h1,p1,s1,i_ok) - if(i_ok.ne.1)cycle - call do_single_excitation(det_i,h2,p2,s2,i_ok) - if(i_ok.ne.1)cycle - call htilde_mu_mat_opt_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) - call get_excitation_degree(ref_bitmask,det_i,degree,N_int) - call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - hthree_tmp *= phase - hthree += 0.5d0 * hthree_tmp - det_i = ref_bitmask - s1 = 2 - s2 = 1 - call do_single_excitation(det_i,h1,p1,s1,i_ok) - if(i_ok.ne.1)cycle - call do_single_excitation(det_i,h2,p2,s2,i_ok) - if(i_ok.ne.1)cycle - call htilde_mu_mat_opt_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) - call get_excitation_degree(ref_bitmask,det_i,degree,N_int) - call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - hthree_tmp *= phase - hthree += 0.5d0 * hthree_tmp - - -! normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1) - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, normal) - if(dabs(hthree).lt.1.d-10)cycle - if(dabs(hthree-normal).gt.1.d-10)then -! print*,pp2,pp1,hh2,hh1 - print*,p2,p1,h2,h1 - print*,hthree,normal,dabs(hthree-normal) - stop - endif -! call three_comp_two_e_elem(det_i,h1,h2,p1,p2,s1,s2,normal) -! normal = eff_2_e_from_3_e_ab(p2,p1,h2,h1) - accu += dabs(hthree-normal) - enddo - enddo - enddo - enddo -print*,'accu opposite spin = ',accu -stop - -! p2=6 -! p1=5 -! h2=2 -! h1=1 - -s1 = 1 -s2 = 1 -accu = 0.d0 -do h1 = 1, elec_alpha_num - do p1 = elec_alpha_num+1, mo_num - do p2 = p1+1, mo_num - do h2 = h1+1, elec_alpha_num - det_i = ref_bitmask - call do_single_excitation(det_i,h1,p1,s1,i_ok) - if(i_ok.ne.1)cycle - call do_single_excitation(det_i,h2,p2,s2,i_ok) - if(i_ok.ne.1)cycle - call htilde_mu_mat_opt_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call get_excitation_degree(ref_bitmask,det_i,degree,N_int) - call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - integer :: hh1, pp1, hh2, pp2, ss1, ss2 - call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) - hthree *= phase - normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) -! normal = eff_2_e_from_3_e_aa(p2,p1,h2,h1) - if(dabs(hthree).lt.1.d-10)cycle - if(dabs(hthree-normal).gt.1.d-10)then - print*,pp2,pp1,hh2,hh1 - print*,p2,p1,h2,h1 - print*,hthree,normal,dabs(hthree-normal) - stop - endif -! print*,hthree,normal,dabs(hthree-normal) - accu += dabs(hthree-normal) - enddo - enddo - enddo -enddo -print*,'accu same spin alpha = ',accu - - -s1 = 2 -s2 = 2 -accu = 0.d0 -do h1 = 1, elec_beta_num - do p1 = elec_beta_num+1, mo_num - do p2 = p1+1, mo_num - do h2 = h1+1, elec_beta_num - det_i = ref_bitmask - call do_single_excitation(det_i,h1,p1,s1,i_ok) - if(i_ok.ne.1)cycle - call do_single_excitation(det_i,h2,p2,s2,i_ok) - if(i_ok.ne.1)cycle - call htilde_mu_mat_opt_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call get_excitation_degree(ref_bitmask,det_i,degree,N_int) - call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) - hthree *= phase -! normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) - normal = eff_2_e_from_3_e_bb(p2,p1,h2,h1) - if(dabs(hthree).lt.1.d-10)cycle - if(dabs(hthree-normal).gt.1.d-10)then - print*,pp2,pp1,hh2,hh1 - print*,p2,p1,h2,h1 - print*,hthree,normal,dabs(hthree-normal) - stop - endif -! print*,hthree,normal,dabs(hthree-normal) - accu += dabs(hthree-normal) - enddo - enddo - enddo -enddo -print*,'accu same spin beta = ',accu - - -end - - diff --git a/plugins/local/tc_bi_ortho/test_tc_fock.irp.f b/plugins/local/tc_bi_ortho/test_tc_fock.irp.f deleted file mode 100644 index b33b2e93..00000000 --- a/plugins/local/tc_bi_ortho/test_tc_fock.irp.f +++ /dev/null @@ -1,171 +0,0 @@ - -! --- - -program test_tc_fock - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - print *, 'Hello world' - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - !call routine_1 - !call routine_2 -! call routine_3() - - call routine_tot - -end - -! --- - -subroutine routine_3() - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i, a, i_ok, s1 - double precision :: hmono, htwoe, hthree, htilde_ij - double precision :: err_ai, err_tot, ref, new - integer(bit_kind), allocatable :: det_i(:,:) - - allocate(det_i(N_int,2)) - - err_tot = 0.d0 - - do s1 = 1, 2 - - det_i = ref_bitmask - call debug_det(det_i, N_int) - print*, ' HF det' - call debug_det(det_i, N_int) - - do i = 1, elec_num_tab(s1) - do a = elec_num_tab(s1)+1, mo_num ! virtual - - det_i = ref_bitmask - call do_single_excitation(det_i, i, a, s1, i_ok) - if(i_ok == -1) then - print*, 'PB !!' - print*, i, a - stop - endif - print*, ' excited det' - call debug_det(det_i, N_int) - - call htilde_mu_mat_opt_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) - if(dabs(hthree).lt.1.d-10)cycle - ref = hthree - if(s1 == 1)then - new = fock_a_tot_3e_bi_orth(a,i) - else if(s1 == 2)then - new = fock_b_tot_3e_bi_orth(a,i) - endif - err_ai = dabs(dabs(ref) - dabs(new)) - if(err_ai .gt. 1d-7) then - print*,'s1 = ',s1 - print*, ' warning on', i, a - print*, ref,new,err_ai - endif - print*, ref,new,err_ai - err_tot += err_ai - - write(22, *) htilde_ij - enddo - enddo - enddo - - print *, ' err_tot = ', err_tot - - deallocate(det_i) - -end subroutine routine_3 - -! --- -subroutine routine_tot() - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i, a, i_ok, s1,other_spin(2) - double precision :: hmono, htwoe, hthree, htilde_ij - double precision :: err_ai, err_tot, ref, new - integer(bit_kind), allocatable :: det_i(:,:) - - allocate(det_i(N_int,2)) - other_spin(1) = 2 - other_spin(2) = 1 - - err_tot = 0.d0 - -! do s1 = 1, 2 - s1 = 2 - det_i = ref_bitmask - call debug_det(det_i, N_int) - print*, ' HF det' - call debug_det(det_i, N_int) - -! do i = 1, elec_num_tab(s1) -! do a = elec_num_tab(s1)+1, mo_num ! virtual - do i = 1, elec_beta_num - do a = elec_beta_num+1, mo_num! virtual - print*,i,a - - det_i = ref_bitmask - call do_single_excitation(det_i, i, a, s1, i_ok) - if(i_ok == -1) then - print*, 'PB !!' - print*, i, a - stop - endif - - call htilde_mu_mat_opt_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) - print*,htilde_ij -! if(dabs(htilde_ij).lt.1.d-10)cycle - print*, ' excited det' - call debug_det(det_i, N_int) - - if(s1 == 1)then - new = Fock_matrix_tc_mo_alpha(a,i) - else - new = Fock_matrix_tc_mo_beta(a,i) - endif - ref = htilde_ij -! if(s1 == 1)then -! new = fock_a_tot_3e_bi_orth(a,i) -! else if(s1 == 2)then -! new = fock_b_tot_3e_bi_orth(a,i) -! endif - err_ai = dabs(dabs(ref) - dabs(new)) - if(err_ai .gt. 1d-7) then - print*,'---------' - print*,'s1 = ',s1 - print*, ' warning on', i, a - print*, ref,new,err_ai - print*,hmono, htwoe, hthree - print*,'---------' - endif - print*, ref,new,err_ai - err_tot += err_ai - - write(22, *) htilde_ij - enddo - enddo -! enddo - - print *, ' err_tot = ', err_tot - - deallocate(det_i) - -end subroutine routine_3 diff --git a/plugins/local/tc_progs/NEED b/plugins/local/tc_progs/NEED new file mode 100644 index 00000000..9deb3db4 --- /dev/null +++ b/plugins/local/tc_progs/NEED @@ -0,0 +1 @@ +tc_bi_ortho diff --git a/plugins/local/tc_bi_ortho/print_he_tc_energy.irp.f b/plugins/local/tc_progs/print_he_tc_energy.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/print_he_tc_energy.irp.f rename to plugins/local/tc_progs/print_he_tc_energy.irp.f diff --git a/plugins/local/tc_bi_ortho/print_tc_dump.irp.f b/plugins/local/tc_progs/print_tc_dump.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/print_tc_dump.irp.f rename to plugins/local/tc_progs/print_tc_dump.irp.f diff --git a/plugins/local/tc_bi_ortho/print_tc_energy.irp.f b/plugins/local/tc_progs/print_tc_energy.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/print_tc_energy.irp.f rename to plugins/local/tc_progs/print_tc_energy.irp.f diff --git a/plugins/local/tc_bi_ortho/print_tc_spin_dens.irp.f b/plugins/local/tc_progs/print_tc_spin_dens.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/print_tc_spin_dens.irp.f rename to plugins/local/tc_progs/print_tc_spin_dens.irp.f diff --git a/plugins/local/tc_bi_ortho/print_tc_var.irp.f b/plugins/local/tc_progs/print_tc_var.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/print_tc_var.irp.f rename to plugins/local/tc_progs/print_tc_var.irp.f diff --git a/plugins/local/tc_bi_ortho/print_tc_wf.irp.f b/plugins/local/tc_progs/print_tc_wf.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/print_tc_wf.irp.f rename to plugins/local/tc_progs/print_tc_wf.irp.f diff --git a/plugins/local/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f b/plugins/local/tc_progs/save_bitcpsileft_for_qmcchem.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f rename to plugins/local/tc_progs/save_bitcpsileft_for_qmcchem.irp.f diff --git a/plugins/local/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f b/plugins/local/tc_progs/save_tc_bi_ortho_nat.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f rename to plugins/local/tc_progs/save_tc_bi_ortho_nat.irp.f diff --git a/plugins/local/tc_bi_ortho/select_dets_bi_ortho.irp.f b/plugins/local/tc_progs/select_dets_bi_ortho.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/select_dets_bi_ortho.irp.f rename to plugins/local/tc_progs/select_dets_bi_ortho.irp.f diff --git a/plugins/local/tc_bi_ortho/tc_bi_ortho_prop.irp.f b/plugins/local/tc_progs/tc_bi_ortho_prop.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/tc_bi_ortho_prop.irp.f rename to plugins/local/tc_progs/tc_bi_ortho_prop.irp.f diff --git a/plugins/local/tc_bi_ortho/tc_som.irp.f b/plugins/local/tc_progs/tc_som.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/tc_som.irp.f rename to plugins/local/tc_progs/tc_som.irp.f diff --git a/plugins/local/tc_bi_ortho/test_tc_two_rdm.irp.f b/plugins/local/tc_progs/test_tc_two_rdm.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/test_tc_two_rdm.irp.f rename to plugins/local/tc_progs/test_tc_two_rdm.irp.f From b7787f5e6dce198bee06eb92f69b9904a7448bea Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 7 May 2024 19:43:05 +0200 Subject: [PATCH 033/131] trying to speed up the PT2 in TC by transposing the array of tc integrals --- .../local/bi_ort_ints/total_twoe_pot.irp.f | 8 +- .../cipsi_tc_bi_ortho/get_d0_transp.irp.f | 140 +++++++++++ .../local/cipsi_tc_bi_ortho/get_d2_good.irp.f | 3 - .../cipsi_tc_bi_ortho/get_d2_transp.irp.f | 235 ++++++++++++++++++ plugins/local/cipsi_tc_bi_ortho/pt2.irp.f | 1 + .../local/cipsi_tc_bi_ortho/selection.irp.f | 94 +------ .../cipsi_tc_bi_ortho/stochastic_cipsi.irp.f | 3 + plugins/local/fci_tc_bi/pt2_tc.irp.f | 2 + .../local/tc_bi_ortho/e_corr_bi_ortho.irp.f | 1 - plugins/local/tc_keywords/EZFIO.cfg | 11 +- 10 files changed, 404 insertions(+), 94 deletions(-) create mode 100644 plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f create mode 100644 plugins/local/cipsi_tc_bi_ortho/get_d2_transp.irp.f diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f index 1e127fac..71269fdc 100644 --- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f +++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f @@ -259,15 +259,21 @@ BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_transp, (mo_num, mo_num, END_DOC integer :: i,j,k,l + print*,'Providing mo_bi_ortho_tc_two_e_transp' + double precision :: t0,t1 + call wall_time(t0) do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num do l = 1, mo_num - mo_bi_ortho_tc_two_e_transp(i,j,k,l) = mo_bi_ortho_tc_two_e_transp(k,l,i,j) + mo_bi_ortho_tc_two_e_transp(i,j,k,l) = mo_bi_ortho_tc_two_e(k,l,i,j) enddo enddo enddo enddo + call wall_time(t1) + + print *, ' WALL TIME for PROVIDING mo_bi_ortho_tc_two_e_transp (min)', (t1-t0)/60.d0 END_PROVIDER ! --- diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f new file mode 100644 index 00000000..56238e13 --- /dev/null +++ b/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f @@ -0,0 +1,140 @@ +subroutine get_d0_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs) + !todo: indices/conjg should be okay for complex + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states,2) + double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num) + double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + integer :: i, j, k, s, h1, h2, p1, p2, puti, putj, mm + double precision :: phase + double precision :: hij,hji + double precision, external :: get_phase_bi + logical :: ok + + integer, parameter :: bant=1 + double precision, allocatable :: hij_cache1(:), hij_cache2(:) + allocate (hij_cache1(mo_num),hij_cache2(mo_num)) + double precision, allocatable :: hji_cache1(:), hji_cache2(:) + allocate (hji_cache1(mo_num),hji_cache2(mo_num)) +! print*,'in get_d0_new' +! call debug_det(gen,N_int) +! print*,'coefs',coefs(1,:) + + if(sp == 3) then ! AB + h1 = p(1,1) + h2 = p(1,2) + do p1=1, mo_num + if(bannedOrb(p1, 1)) cycle +! call get_mo_two_e_integrals_complex(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map) + do mm = 1, mo_num + hij_cache1(mm) = mo_bi_ortho_tc_two_e(mm,p1,h2,h1) + hji_cache1(mm) = mo_bi_ortho_tc_two_e_transp(mm,p1,h2,h1) + enddo + !!!!!!!!!! + do p2=1, mo_num + if(bannedOrb(p2,2)) cycle + if(banned(p1, p2, bant)) cycle ! rentable? + if(p1 == h1 .or. p2 == h2) then + call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) + ! call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this +! call i_h_j_complex(det, gen, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij) + else + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hij = hij_cache1(p2) * phase + end if + if (hij == (0.d0,0.d0)) cycle + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij ! HOTSPOT + enddo + end do + !!!!!!!!!! + do p2=1, mo_num + if(bannedOrb(p2,2)) cycle + if(banned(p1, p2, bant)) cycle ! rentable? + if(p1 == h1 .or. p2 == h2) then + call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) + ! call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this +! call i_h_j_complex(det, gen, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji) + else + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hji = hji_cache1(p2) * phase + end if + if (hji == (0.d0,0.d0)) cycle + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji ! HOTSPOT + enddo + end do + end do + + else ! AA BB + p1 = p(1,sp) + p2 = p(2,sp) + do puti=1, mo_num + if(bannedOrb(puti, sp)) cycle +! call get_mo_two_e_integrals_complex(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache1(mm) = mo_bi_ortho_tc_two_e(mm,puti,p2,p1) + hij_cache2(mm) = mo_bi_ortho_tc_two_e(mm,puti,p1,p2) + hji_cache1(mm) = mo_bi_ortho_tc_two_e_transp(mm,puti,p2,p1) + hji_cache2(mm) = mo_bi_ortho_tc_two_e_transp(mm,puti,p1,p2) + enddo + !!!!!!!!!! + do putj=puti+1, mo_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, bant)) cycle ! rentable? + if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then + call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) + !call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this +! call i_h_j_complex(det, gen, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij) + if (hij == 0.d0) cycle + else +! hij = (mo_two_e_integral_complex(p1, p2, puti, putj) - mo_two_e_integral_complex(p2, p1, puti, putj)) +! hij = (mo_bi_ortho_tc_two_e(p1, p2, puti, putj) - mo_bi_ortho_tc_two_e(p2, p1, puti, putj)) + hij = (mo_bi_ortho_tc_two_e(puti, putj, p1, p2) - mo_bi_ortho_tc_two_e(puti, putj, p2, p1)) + if (hij == 0.d0) cycle + hij = (hij) * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + end if + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + enddo + end do + + !!!!!!!!!! + do putj=puti+1, mo_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, bant)) cycle ! rentable? + if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then + call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) + call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji) + if (hji == 0.d0) cycle + else +! hji = (mo_bi_ortho_tc_two_e( p1, p2, puti, putj) - mo_bi_ortho_tc_two_e( p2, p1, puti, putj)) + hji = (mo_bi_ortho_tc_two_e_transp(puti, putj, p1, p2 ) - mo_bi_ortho_tc_two_e_transp( puti, putj, p2, p1)) + if (hji == 0.d0) cycle + hji = (hji) * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + end if + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + end do + end do + end if + + deallocate(hij_cache1,hij_cache2) +end + diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f index d01ed433..86922ae9 100644 --- a/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f @@ -25,9 +25,6 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, integer :: bant bant = 1 -! print*, 'in get_d2_new' -! call debug_det(gen,N_int) -! print*,'coefs',coefs(1,:) tip = p(0,1) * p(0,2) ! number of alpha particles times number of beta particles diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d2_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d2_transp.irp.f new file mode 100644 index 00000000..b2a7ea31 --- /dev/null +++ b/plugins/local/cipsi_tc_bi_ortho/get_d2_transp.irp.f @@ -0,0 +1,235 @@ + +subroutine get_d2_new_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs) + !todo: indices/conjg should be correct for complex + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + double precision, intent(in) :: coefs(N_states,2) + double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num) + double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + double precision, external :: get_phase_bi + + integer :: i, j, k, tip, ma, mi, puti, putj + integer :: h1, h2, p1, p2, i1, i2 + double precision :: phase + double precision :: hij,hji + + integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) + integer, parameter :: turn2(2) = (/2, 1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + bant = 1 + + tip = p(0,1) * p(0,2) ! number of alpha particles times number of beta particles + + ma = sp !1:(alpha,alpha); 2:(b,b); 3:(a,b) + if(p(0,1) > p(0,2)) ma = 1 ! more alpha particles than beta particles + if(p(0,1) < p(0,2)) ma = 2 ! fewer alpha particles than beta particles + mi = mod(ma, 2) + 1 + + if(sp == 3) then ! if one alpha and one beta xhole + !(where xholes refer to the ionizations from the generator, not the holes occupied in the ionized generator) + if(ma == 2) bant = 2 ! if more beta particles than alpha particles + + if(tip == 3) then ! if 3 of one particle spin and 1 of the other particle spin + puti = p(1, mi) + if(bannedOrb(puti, mi)) return + h1 = h(1, ma) + h2 = h(2, ma) + + !! + do i = 1, 3 ! loop over all 3 combinations of 2 particles with spin ma + putj = p(i, ma) + if(banned(putj,puti,bant)) cycle + i1 = turn3(1,i) + i2 = turn3(2,i) + p1 = p(i1, ma) + p2 = p(i2, ma) + + ! |G> = |psi_{gen,i}> + ! |G'> = a_{x1} a_{x2} |G> + ! |alpha> = a_{puti}^{\dagger} a_{putj}^{\dagger} |G'> + ! |alpha> = t_{x1,x2}^{puti,putj} |G> + ! hij = + ! |alpha> = t_{p1,p2}^{h1,h2}|psi_{selectors,i}> + !todo: = ( - ) * phase + ! += dconjg(c_i) * + ! = ( - ) * phase + ! += * c_i + +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + +! hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e( h1, h2, p2, p1) +! hji = mo_bi_ortho_tc_two_e_transp(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e_transp( h1, h2, p2, p1) + hij = mo_bi_ortho_tc_two_e_transp(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e_transp( p1, p2, h2, h1) + hji = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e( p1, p2, h2, h1) + if (hij == 0.d0.or.hji==0.d0) cycle + + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + + if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji + enddo + else ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + end if + end do + else ! if 2 alpha and 2 beta particles + h1 = h(1,1) + h2 = h(1,2) + !! + do j = 1,2 ! loop over all 4 combinations of one alpha and one beta particle + putj = p(j, 2) + if(bannedOrb(putj, 2)) cycle + p2 = p(turn2(j), 2) + do i = 1,2 + puti = p(i, 1) + if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle + p1 = p(turn2(i), 1) + ! hij = +! hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate +! hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2 ) +! hji = mo_bi_ortho_tc_two_e_transp(h1, h2, p1, p2 ) + hij = mo_bi_ortho_tc_two_e_transp(p1, p2 ,h1, h2 ) + hji = mo_bi_ortho_tc_two_e( p1, p2, h1, h2) + if (hij /= 0.d0.or.hji==0.d0) then + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + endif + end do + end do + end if + + else ! if holes are (a,a) or (b,b) + if(tip == 0) then ! if particles are (a,a,a,a) or (b,b,b,b) + h1 = h(1, ma) + h2 = h(2, ma) + !! + do i=1,3 + puti = p(i, ma) + if(bannedOrb(puti,ma)) cycle + do j=i+1,4 + putj = p(j, ma) + if(bannedOrb(putj,ma)) cycle + if(banned(puti,putj,1)) cycle + + i1 = turn2d(1, i, j) + i2 = turn2d(2, i, j) + p1 = p(i1, ma) + p2 = p(i2, ma) +! hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + hij = mo_bi_ortho_tc_two_e_transp(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e_transp(p1, p2, h2,h1 ) + hji = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p1, p2, h2,h1 ) + if (hij == 0.d0.or.hji == 0.d0) cycle + + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) +coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) +coefs(k,1) * hji + enddo + end do + end do + else if(tip == 3) then ! if particles are (a,a,a,b) (ma=1,mi=2) or (a,b,b,b) (ma=2,mi=1) + h1 = h(1, mi) + h2 = h(1, ma) + p1 = p(1, mi) + !! + do i=1,3 + puti = p(turn3(1,i), ma) + if(bannedOrb(puti,ma)) cycle + putj = p(turn3(2,i), ma) + if(bannedOrb(putj,ma)) cycle + if(banned(puti,putj,1)) cycle + p2 = p(i, ma) + +! hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + hij = mo_bi_ortho_tc_two_e_transp(p1, p2 ,h1, h2) + hji = mo_bi_ortho_tc_two_e(p1, p2,h1, h2 ) + if (hij == 0.d0) cycle + + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + if (puti < putj) then + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + else + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji + enddo + endif + end do + else ! tip == 4 (a,a,b,b) + puti = p(1, sp) + putj = p(2, sp) + if(.not. banned(puti,putj,1)) then + p1 = p(1, mi) + p2 = p(2, mi) + h1 = h(1, mi) + h2 = h(2, mi) + !! +! hij = (mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2)) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + hij = (mo_bi_ortho_tc_two_e_transp(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e_transp(p2,p1,h1, h2)) + hji = (mo_bi_ortho_tc_two_e(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e(p2,p1,h1, h2)) + if (hij /= 0.d0.or.hji==0.d0) then + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji* phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + end if + end if + end if + end if +end diff --git a/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f b/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f index 833cc0ea..ada19c6b 100644 --- a/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f @@ -67,6 +67,7 @@ subroutine tc_pt2 call pt2_alloc(pt2_data_err, N_states) call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) + call print_summary_tc(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, N_det, N_configuration, N_states, psi_s2) end diff --git a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f index 0b4345d5..0f785ba2 100644 --- a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f @@ -636,10 +636,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere negMask(i,2) = not(mask(i,2)) end do -! print*,'in selection ' do i = 1, N_sel -! call debug_det(det(1,1,i),N_int) -! print*,i,dabs(psi_selectors_coef_transp_tc(1,2,i) * psi_selectors_coef_transp_tc(1,1,i)) if(interesting(i) < 0) then stop 'prefetch interesting(i) and det(i)' endif @@ -691,11 +688,19 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere call get_mask_phase(psi_det_sorted_tc(1,1,interesting(i)), phasemask,N_int) if(nt == 4) then - call get_d2_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + if(transpose_two_e_int)then + call get_d2_new_transp(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + else + call get_d2_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + endif elseif(nt == 3) then call get_d1_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) else + if(transpose_two_e_int)then + call get_d0_transp (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + else call get_d0_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + endif endif elseif(nt == 4) then call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) @@ -887,79 +892,11 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d call diag_htilde_mu_mat_fock_bi_ortho(N_int, det, hmono, htwoe, hthree, hii) do istate = 1,N_states delta_E = E0(istate) - Hii + E_shift - double precision :: alpha_h_psi_tmp, psi_h_alpha_tmp, error - if(debug_tc_pt2 == 1)then !! Using the old version - psi_h_alpha = 0.d0 - alpha_h_psi = 0.d0 - do iii = 1, N_det_selectors - call htilde_mu_mat_opt_bi_ortho_tot(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_opt_bi_ortho_tot(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - call get_excitation_degree(psi_selectors(1,1,iii), det,degree,N_int) - if(degree == 0)then - print*,'problem !!!' - print*,'a determinant is already in the wave function !!' - print*,'it corresponds to the selector number ',iii - call debug_det(det,N_int) - stop - endif -! call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) -! call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - psi_h_alpha += i_h_alpha * psi_selectors_coef_tc(iii,2,1) ! left function - alpha_h_psi += alpha_h_i * psi_selectors_coef_tc(iii,1,1) ! right function - enddo - else if(debug_tc_pt2 == 2)then !! debugging the new version -! psi_h_alpha_tmp = 0.d0 -! alpha_h_psi_tmp = 0.d0 -! do iii = 1, N_det_selectors ! old version -! call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) -! call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) -! psi_h_alpha_tmp += i_h_alpha * psi_selectors_coef_tc(iii,1,1) ! left function -! alpha_h_psi_tmp += alpha_h_i * psi_selectors_coef_tc(iii,2,1) ! right function -! enddo - psi_h_alpha_tmp = mat_l(istate, p1, p2) ! new version - alpha_h_psi_tmp = mat_r(istate, p1, p2) ! new version - psi_h_alpha = 0.d0 - alpha_h_psi = 0.d0 - do iii = 1, N_det ! old version - call htilde_mu_mat_opt_bi_ortho_no_3e(psi_det(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_det(1,1,iii), N_int, alpha_h_i) - psi_h_alpha += i_h_alpha * psi_l_coef_bi_ortho(iii,1) ! left function - alpha_h_psi += alpha_h_i * psi_r_coef_bi_ortho(iii,1) ! right function - enddo - if(dabs(psi_h_alpha*alpha_h_psi/delta_E).gt.1.d-10)then - error = dabs(psi_h_alpha * alpha_h_psi - psi_h_alpha_tmp * alpha_h_psi_tmp)/dabs(psi_h_alpha * alpha_h_psi) - if(error.gt.1.d-2)then - call debug_det(det, N_int) - print*,'error =',error,psi_h_alpha * alpha_h_psi/delta_E,psi_h_alpha_tmp * alpha_h_psi_tmp/delta_E - print*,psi_h_alpha , alpha_h_psi - print*,psi_h_alpha_tmp , alpha_h_psi_tmp - print*,'selectors ' - do iii = 1, N_det_selectors ! old version - print*,'iii',iii,psi_selectors_coef_tc(iii,1,1),psi_selectors_coef_tc(iii,2,1) - call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - print*,i_h_alpha,alpha_h_i - call debug_det(psi_selectors(1,1,iii),N_int) - enddo -! print*,'psi_det ' -! do iii = 1, N_det! old version -! print*,'iii',iii,psi_l_coef_bi_ortho(iii,1),psi_r_coef_bi_ortho(iii,1) -! call debug_det(psi_det(1,1,iii),N_int) -! enddo - stop - endif - endif - else - psi_h_alpha = mat_l(istate, p1, p2) - alpha_h_psi = mat_r(istate, p1, p2) - endif + psi_h_alpha = mat_l(istate, p1, p2) + alpha_h_psi = mat_r(istate, p1, p2) val = 4.d0 * psi_h_alpha * alpha_h_psi tmp = dsqrt(delta_E * delta_E + val) -! if (delta_E < 0.d0) then -! tmp = -tmp -! endif e_pert(istate) = 0.25 * val / delta_E -! e_pert(istate) = 0.5d0 * (tmp - delta_E) if(dsqrt(tmp).gt.1.d-4.and.dabs(psi_h_alpha).gt.1.d-4)then coef(istate) = e_pert(istate) / psi_h_alpha else @@ -976,15 +913,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if(e_pert(istate).gt.0.d0)e_pert(istate)=0.d0 endif -! if(selection_tc == 1 )then -! if(e_pert(istate).lt.0.d0)then -! e_pert(istate) = 0.d0 -! endif -! else if(selection_tc == -1)then -! if(e_pert(istate).gt.0.d0)then -! e_pert(istate) = 0.d0 -! endif -! endif enddo diff --git a/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index 99a8de7e..bb5a89a1 100644 --- a/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -88,6 +88,9 @@ subroutine run_stochastic_cipsi call pt2_dealloc(pt2_data_err) call pt2_alloc(pt2_data, N_states) call pt2_alloc(pt2_data_err, N_states) + if(transpose_two_e_int)then + provide mo_bi_ortho_tc_two_e_transp + endif call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection ! stop diff --git a/plugins/local/fci_tc_bi/pt2_tc.irp.f b/plugins/local/fci_tc_bi/pt2_tc.irp.f index 390042bf..3c07e367 100644 --- a/plugins/local/fci_tc_bi/pt2_tc.irp.f +++ b/plugins/local/fci_tc_bi/pt2_tc.irp.f @@ -13,6 +13,8 @@ program tc_pt2_prog pruning = -1.d0 touch pruning + read_wf = .True. + touch read_wf ! pt2_relative_error = 0.01d0 ! touch pt2_relative_error diff --git a/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f b/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f index 4abdc25b..5a3971c5 100644 --- a/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f +++ b/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f @@ -27,7 +27,6 @@ if(degree == 1)then e_pt2_tc_bi_orth_single += coef_pt1 * htilde_ij else -! print*,'coef_pt1, e_pt2',coef_pt1,coef_pt1 * htilde_ij e_pt2_tc_bi_orth_double += coef_pt1 * htilde_ij endif endif diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index 1e89eaa4..39968ec8 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -184,12 +184,6 @@ doc: Read/Write normal_two_body_bi_orth from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None -[debug_tc_pt2] -type: integer -doc: If :: 1 then you compute the TC-PT2 the old way, :: 2 then you check with the new version but without three-body -interface: ezfio,provider,ocaml -default: -1 - [only_spin_tc_right] type: logical doc: If |true|, only the right part of WF is used to compute spin dens @@ -268,3 +262,8 @@ doc: Thresholds on the Imag part of TC energy interface: ezfio,provider,ocaml default: 1.e-7 +[transpose_two_e_int] +type: logical +doc: If |true|, you duplicate the two-electron TC integrals with the transpose matrix. Acceleates the PT2. +interface: ezfio,provider,ocaml +default: False From 18fd70f1b88ee4a412a351a92a98f4b1ef1ee3d0 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 7 May 2024 20:18:24 +0200 Subject: [PATCH 034/131] added get_d1_transp.irp.f --- .../cipsi_tc_bi_ortho/get_d1_transp.irp.f | 350 ++++++++++++++++++ .../local/cipsi_tc_bi_ortho/selection.irp.f | 6 +- 2 files changed, 355 insertions(+), 1 deletion(-) create mode 100644 plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f new file mode 100644 index 00000000..3c6cbf60 --- /dev/null +++ b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f @@ -0,0 +1,350 @@ +subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs) + !todo: indices should be okay for complex? + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states,2) + double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num) + double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision, external :: get_phase_bi + double precision, external :: mo_two_e_integral_complex + logical :: ok + + logical, allocatable :: lbanned(:,:) + integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j + integer :: hfix, pfix, h1, h2, p1, p2, ib, k, l, mm + + integer, parameter :: turn2(2) = (/2,1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + double precision, allocatable :: hij_cache(:,:) + double precision :: hij, tmp_rowij(N_states, mo_num), tmp_rowij2(N_states, mo_num),phase + double precision, allocatable :: hji_cache(:,:) + double precision :: hji, tmp_rowji(N_states, mo_num), tmp_rowji2(N_states, mo_num) +! PROVIDE mo_integrals_map N_int +! print*,'in get_d1_new' +! call debug_det(gen,N_int) +! print*,'coefs',coefs(1,:) + + allocate (lbanned(mo_num, 2)) + allocate (hij_cache(mo_num,2)) + allocate (hji_cache(mo_num,2)) + lbanned = bannedOrb + + do i=1, p(0,1) + lbanned(p(i,1), 1) = .true. + end do + do i=1, p(0,2) + lbanned(p(i,2), 2) = .true. + end do + + ma = 1 + if(p(0,2) >= 2) ma = 2 + mi = turn2(ma) + + bant = 1 + + if(sp == 3) then + !move MA + if(ma == 2) bant = 2 + puti = p(1,mi) + hfix = h(1,ma) + p1 = p(1,ma) + p2 = p(2,ma) + if(.not. bannedOrb(puti, mi)) then +! call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,p2) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,p1) + enddo + !! + tmp_rowij = 0.d0 + tmp_rowji = 0.d0 + do putj=1, hfix-1 + if(lbanned(putj, ma)) cycle + if(banned(putj, puti,bant)) cycle + hij = hij_cache(putj,1) - hij_cache(putj,2) + hji = hji_cache(putj,1) - hji_cache(putj,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) + enddo + endif + end do + do putj=hfix+1, mo_num + if(lbanned(putj, ma)) cycle + if(banned(putj, puti,bant)) cycle + hij = hij_cache(putj,2) - hij_cache(putj,1) + hji = hji_cache(putj,2) - hji_cache(putj,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) + enddo + endif + end do + + if(ma == 1) then + mat_r(1:N_states,1:mo_num,puti) = mat_r(1:N_states,1:mo_num,puti) + tmp_rowij(1:N_states,1:mo_num) + mat_l(1:N_states,1:mo_num,puti) = mat_l(1:N_states,1:mo_num,puti) + tmp_rowji(1:N_states,1:mo_num) + else + do l=1,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,puti,l) = mat_r(k,puti,l) + tmp_rowij(k,l) + mat_l(k,puti,l) = mat_l(k,puti,l) + tmp_rowji(k,l) + enddo + enddo + end if + + end if + + !MOVE MI + pfix = p(1,mi) + tmp_rowij = 0.d0 + tmp_rowij2 = 0.d0 + tmp_rowji = 0.d0 + tmp_rowji2 = 0.d0 +! call get_mo_two_e_integrals_complex(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p1) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p2) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,pfix,p1) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,pfix,p2) + enddo + putj = p1 + !! + do puti=1,mo_num !HOT + if(lbanned(puti,mi)) cycle + !p1 fixed + putj = p1 + if(.not. banned(putj,puti,bant)) then + hij = hij_cache(puti,2) + hji = hji_cache(puti,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) + enddo + endif + end if +! + putj = p2 + if(.not. banned(putj,puti,bant)) then + hij = hij_cache(puti,1) + hji = hji_cache(puti,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) + hij = hij * phase + hji = hji * phase + do k=1,N_states + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) + enddo + endif + end if + end do + + if(mi == 1) then + mat_r(:,:,p1) = mat_r(:,:,p1) + tmp_rowij(:,:) + mat_r(:,:,p2) = mat_r(:,:,p2) + tmp_rowij2(:,:) + mat_l(:,:,p1) = mat_l(:,:,p1) + tmp_rowji(:,:) + mat_l(:,:,p2) = mat_l(:,:,p2) + tmp_rowji2(:,:) + else + do l=1,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,p1,l) = mat_r(k,p1,l) + tmp_rowij(k,l) + mat_r(k,p2,l) = mat_r(k,p2,l) + tmp_rowij2(k,l) + mat_l(k,p1,l) = mat_l(k,p1,l) + tmp_rowji(k,l) + mat_l(k,p2,l) = mat_l(k,p2,l) + tmp_rowji2(k,l) + enddo + enddo + end if + + else ! sp /= 3 + + if(p(0,ma) == 3) then + do i=1,3 + hfix = h(1,ma) + puti = p(i, ma) + p1 = p(turn3(1,i), ma) + p2 = p(turn3(2,i), ma) +! call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,p2) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,p1) + enddo + !! + tmp_rowij = 0.d0 + tmp_rowji = 0.d0 + do putj=1,hfix-1 + if(banned(putj,puti,1)) cycle + if(lbanned(putj,ma)) cycle + hij = hij_cache(putj,1) - hij_cache(putj,2) + hji = hji_cache(putj,1) - hji_cache(putj,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + hij = hij * phase + hji = hji * phase + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) + endif + end do + do putj=hfix+1,mo_num + if(banned(putj,puti,1)) cycle + if(lbanned(putj,ma)) cycle + hij = hij_cache(putj,2) - hij_cache(putj,1) + hji = hji_cache(putj,2) - hji_cache(putj,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + hij = hij * phase + hji = hji * phase + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) + endif + end do + + mat_r(:, :puti-1, puti) = mat_r(:, :puti-1, puti) + tmp_rowij(:,:puti-1) + mat_l(:, :puti-1, puti) = mat_l(:, :puti-1, puti) + tmp_rowji(:,:puti-1) + do l=puti,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, l) = mat_r(k, puti,l) + tmp_rowij(k,l) + mat_l(k, puti, l) = mat_l(k, puti,l) + tmp_rowji(k,l) + enddo + enddo + end do + else + hfix = h(1,mi) + pfix = p(1,mi) + p1 = p(1,ma) + p2 = p(2,ma) + tmp_rowij = 0.d0 + tmp_rowij2 = 0.d0 + tmp_rowji = 0.d0 + tmp_rowji2 = 0.d0 +! call get_mo_two_e_integrals_complex(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,pfix) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,pfix) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,pfix) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,pfix) + enddo + putj = p2 + !! + do puti=1,mo_num + if(lbanned(puti,ma)) cycle + putj = p2 + if(.not. banned(puti,putj,1)) then + hij = hij_cache(puti,1) + hji = hji_cache(puti,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) + enddo + endif + end if + + putj = p1 + if(.not. banned(puti,putj,1)) then + hij = hij_cache(puti,2) + hji = hji_cache(puti,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) + hij = hij * phase + hji = hji * phase + do k=1,N_states + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) + enddo + endif + end if + end do + mat_r(:,:p2-1,p2) = mat_r(:,:p2-1,p2) + tmp_rowij(:,:p2-1) + mat_l(:,:p2-1,p2) = mat_l(:,:p2-1,p2) + tmp_rowji(:,:p2-1) + do l=p2,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,p2,l) = mat_r(k,p2,l) + tmp_rowij(k,l) + mat_l(k,p2,l) = mat_l(k,p2,l) + tmp_rowji(k,l) + enddo + enddo + mat_r(:,:p1-1,p1) = mat_r(:,:p1-1,p1) + tmp_rowij2(:,:p1-1) + mat_l(:,:p1-1,p1) = mat_l(:,:p1-1,p1) + tmp_rowji2(:,:p1-1) + do l=p1,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,p1,l) = mat_r(k,p1,l) + tmp_rowij2(k,l) + mat_l(k,p1,l) = mat_l(k,p1,l) + tmp_rowji2(k,l) + enddo + enddo + end if + end if + deallocate(lbanned,hij_cache, hji_cache) + + !! MONO + if(sp == 3) then + s1 = 1 + s2 = 2 + else + s1 = sp + s2 = sp + end if + + do i1=1,p(0,s1) + ib = 1 + if(s1 == s2) ib = i1+1 + do i2=ib,p(0,s2) + p1 = p(i1,s1) + p2 = p(i2,s2) + if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + ! gen is a selector; mask is ionized generator; det is alpha + ! hij is contribution to +! call i_h_j_complex(gen, det, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e(det, gen, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e(gen, det, N_int, hji) + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + ! take conjugate to get contribution to instead of +! mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * dconjg(hij) + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji + enddo + end do + end do +end + diff --git a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f index 0f785ba2..17d34f43 100644 --- a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f @@ -694,7 +694,11 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere call get_d2_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) endif elseif(nt == 3) then - call get_d1_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + if(transpose_two_e_int)then + call get_d1_transp(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + else + call get_d1_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + endif else if(transpose_two_e_int)then call get_d0_transp (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) From 687259c25feb8ec568b31b89b760d2e08d07ad3a Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 7 May 2024 20:32:48 +0200 Subject: [PATCH 035/131] working on the matrix elements both --- .../local/bi_ort_ints/total_twoe_pot.irp.f | 20 +++ plugins/local/slater_tc/slater_tc_opt.irp.f | 42 ++++++ .../slater_tc/slater_tc_opt_double.irp.f | 60 ++++++++ .../slater_tc/slater_tc_opt_single.irp.f | 142 ++++++++++++++++++ 4 files changed, 264 insertions(+) diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f index 71269fdc..e27fdb7f 100644 --- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f +++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f @@ -332,3 +332,23 @@ END_PROVIDER ! --- + BEGIN_PROVIDER [double precision, tc_2e_3idx_coulomb_integrals_transp , (mo_num,mo_num,mo_num)] +&BEGIN_PROVIDER [double precision, tc_2e_3idx_exchange_integrals_transp, (mo_num,mo_num,mo_num)] + + BEGIN_DOC + ! tc_2e_3idx_coulomb_integrals_transp (j,k,i) = + ! tc_2e_3idx_exchange_integrals_transp(j,k,i) = + END_DOC + implicit none + integer :: i, j, k + + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + tc_2e_3idx_coulomb_integrals_transp(j, k,i) = mo_bi_ortho_tc_two_e_transp(j ,k ,j ,i ) + tc_2e_3idx_exchange_integrals_transp(j,k,i) = mo_bi_ortho_tc_two_e_transp(k ,j ,j ,i ) + enddo + enddo + enddo + +END_PROVIDER diff --git a/plugins/local/slater_tc/slater_tc_opt.irp.f b/plugins/local/slater_tc/slater_tc_opt.irp.f index 59efc943..9ed2b389 100644 --- a/plugins/local/slater_tc/slater_tc_opt.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt.irp.f @@ -181,3 +181,45 @@ end ! --- +subroutine htilde_mu_mat_opt_bi_ortho_no_3e_both(key_j, key_i, Nint, htot) + + BEGIN_DOC + ! + ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis + !! + ! Returns the detail of the matrix element WITHOUT ANY CONTRIBUTION FROM THE THREE ELECTRON TERMS + !! WARNING !! + ! + ! Non hermitian !! + ! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) + double precision, intent(out) :: htot + integer :: degree + + htot = 0.d0 + + call get_excitation_degree(key_i, key_j, degree, Nint) + if(degree.gt.2) return + + if(degree == 0) then + call diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_i,htot) + else if (degree == 1) then + call single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint,key_j, key_i , htot) + else if(degree == 2) then + call double_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, htot) + endif + + if(degree==0) then + htot += nuclear_repulsion + endif + +end + +! --- + diff --git a/plugins/local/slater_tc/slater_tc_opt_double.irp.f b/plugins/local/slater_tc/slater_tc_opt_double.irp.f index 4067473c..181ae11d 100644 --- a/plugins/local/slater_tc/slater_tc_opt_double.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt_double.irp.f @@ -505,3 +505,63 @@ subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot) end +subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hji,hij) + + BEGIN_DOC + ! and for double excitation ONLY FOR ONE- AND TWO-BODY TERMS + !! + !! WARNING !! + ! + ! Non hermitian !! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2) + double precision, intent(out) :: hji,hij + double precision :: hmono, htwoe_ji, htwoe_ij + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk + integer :: degree,exc(0:2,2,2) + integer :: h1, p1, h2, p2, s1, s2 + double precision :: get_mo_two_e_integral_tc_int,phase + + + call get_excitation_degree(key_i, key_j, degree, Nint) + + hmono = 0.d0 + htwoe_ji = 0.d0 + htwoe_ij = 0.d0 + hji = 0.d0 + hij = 0.d0 + + if(degree.ne.2)then + return + endif + integer :: degree_i,degree_j + call get_excitation_degree(ref_bitmask,key_i,degree_i,N_int) + call get_excitation_degree(ref_bitmask,key_j,degree_j,N_int) + call get_double_excitation(key_i, key_j, exc, phase, Nint) + call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) + + if(s1.ne.s2)then + ! opposite spin two-body + htwoe_ji = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + htwoe_ij = mo_bi_ortho_tc_two_e_transp(p2,p1,h2,h1) + else + ! same spin two-body + ! direct terms + htwoe_ji = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + htwoe_ij = mo_bi_ortho_tc_two_e_transp(p2,p1,h2,h1) + ! exchange terms + htwoe_ji -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) + htwoe_ij -= mo_bi_ortho_tc_two_e_transp(p1,p2,h2,h1) + endif + htwoe_ji *= phase + hji = htwoe_ji + htwoe_ij *= phase + hij = htwoe_ij + +end diff --git a/plugins/local/slater_tc/slater_tc_opt_single.irp.f b/plugins/local/slater_tc/slater_tc_opt_single.irp.f index e57cb05c..3f4e17e2 100644 --- a/plugins/local/slater_tc/slater_tc_opt_single.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt_single.irp.f @@ -618,3 +618,145 @@ subroutine get_single_excitation_from_fock_tc_no_3e(Nint, key_i, key_j, h, p, sp end + +subroutine single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hji,hij) + + BEGIN_DOC + ! and for single excitation ONLY FOR ONE- AND TWO-BODY TERMS + !! + !! WARNING !! + ! + ! Non hermitian !! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2) + double precision, intent(out) :: hji,hij + + double precision :: hmono, htwoe + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk + integer :: degree,exc(0:2,2,2) + integer :: h1, p1, h2, p2, s1, s2 + double precision :: get_mo_two_e_integral_tc_int, phase + double precision :: direct_int, exchange_int_12, exchange_int_23, exchange_int_13 + integer :: other_spin(2) + integer(bit_kind) :: key_j_core(Nint,2), key_i_core(Nint,2) + + other_spin(1) = 2 + other_spin(2) = 1 + + hmono = 0.d0 + htwoe = 0.d0 + hji = 0.d0 + hji = 0.d0 + call get_excitation_degree(key_i, key_j, degree, Nint) + if(degree.ne.1)then + return + endif + call bitstring_to_list_ab(key_i, occ, Ne, Nint) + + call get_single_excitation(key_i, key_j, exc, phase, Nint) + call decode_exc(exc,1,h1,p1,h2,p2,s1,s2) + call get_single_excitation_from_fock_tc_no_3e_both(Nint, key_i, key_j, h1, p1, s1, phase, hmono, htwoe, hji,hij) + +end + +! --- + +subroutine get_single_excitation_from_fock_tc_no_3e_both(Nint, key_i, key_j, h, p, spin, phase, hji,hij) + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer, intent(in) :: h, p, spin + double precision, intent(in) :: phase + integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) + double precision, intent(out) :: hji,hij + double precision :: hmono_ji,htwoe_ji + double precision :: hmono_ij,htwoe_ij + + integer(bit_kind) :: differences(Nint,2) + integer(bit_kind) :: hole(Nint,2) + integer(bit_kind) :: partcl(Nint,2) + integer :: occ_hole(Nint*bit_kind_size,2) + integer :: occ_partcl(Nint*bit_kind_size,2) + integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2) + integer :: i0,i + double precision :: buffer_c_ji(mo_num), buffer_x_ji(mo_num) + double precision :: buffer_c_ij(mo_num), buffer_x_ij(mo_num) + + do i = 1, mo_num + buffer_c_ji(i) = tc_2e_3idx_coulomb_integrals(i,p,h) + buffer_x_ji(i) = tc_2e_3idx_exchange_integrals(i,p,h) + buffer_c_ij(i) = tc_2e_3idx_coulomb_integrals_transp(i,p,h) + buffer_x_ij(i) = tc_2e_3idx_exchange_integrals_transp(i,p,h) + enddo + + do i = 1, Nint + differences(i,1) = xor(key_i(i,1),ref_closed_shell_bitmask(i,1)) + differences(i,2) = xor(key_i(i,2),ref_closed_shell_bitmask(i,2)) + hole(i,1) = iand(differences(i,1),ref_closed_shell_bitmask(i,1)) + hole(i,2) = iand(differences(i,2),ref_closed_shell_bitmask(i,2)) + partcl(i,1) = iand(differences(i,1),key_i(i,1)) + partcl(i,2) = iand(differences(i,2),key_i(i,2)) + enddo + + call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, Nint) + call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, Nint) + hmono_ji = mo_bi_ortho_tc_one_e(p,h) + htwoe_ji = fock_op_2_e_tc_closed_shell(p,h) + hmono_ij = mo_bi_ortho_tc_one_e(h,p) + htwoe_ij = fock_op_2_e_tc_closed_shell(h,p) + + ! holes :: direct terms + do i0 = 1, n_occ_ab_hole(1) + i = occ_hole(i0,1) + htwoe_ji -= buffer_c_ji(i) + htwoe_ij -= buffer_c_ij(i) + enddo + do i0 = 1, n_occ_ab_hole(2) + i = occ_hole(i0,2) + htwoe_ji -= buffer_c_ji(i) + htwoe_ij -= buffer_c_ij(i) + enddo + + ! holes :: exchange terms + do i0 = 1, n_occ_ab_hole(spin) + i = occ_hole(i0,spin) + htwoe_ji += buffer_x_ji(i) + htwoe_ij += buffer_x_ij(i) + enddo + + ! particles :: direct terms + do i0 = 1, n_occ_ab_partcl(1) + i = occ_partcl(i0,1) + htwoe_ji += buffer_c_ji(i) + htwoe_ij += buffer_c_ij(i) + enddo + do i0 = 1, n_occ_ab_partcl(2) + i = occ_partcl(i0,2) + htwoe_ji += buffer_c_ji(i) + htwoe_ij += buffer_c_ij(i) + enddo + + ! particles :: exchange terms + do i0 = 1, n_occ_ab_partcl(spin) + i = occ_partcl(i0,spin) + htwoe_ji -= buffer_x_ji(i) + htwoe_ij -= buffer_x_ij(i) + enddo + htwoe_ji = htwoe_ji * phase + hmono_ji = hmono_ji * phase + hji = htwoe_ji + hmono_ji + + htwoe_ij = htwoe_ij * phase + hmono_ij = hmono_ij * phase + hij = htwoe_ij + hmono_ij + +end + From 42fdb3c4350c0452a7169614ff9dba4e0e381f62 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 7 May 2024 20:52:10 +0200 Subject: [PATCH 036/131] it works with new routines for pt2 tc --- .../cipsi_tc_bi_ortho/get_d0_transp.irp.f | 54 ++++--------------- .../cipsi_tc_bi_ortho/get_d1_transp.irp.f | 4 +- plugins/local/slater_tc/slater_tc_opt.irp.f | 17 +++--- .../slater_tc/slater_tc_opt_single.irp.f | 4 +- 4 files changed, 25 insertions(+), 54 deletions(-) diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f index 56238e13..f149e7c6 100644 --- a/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f @@ -45,33 +45,16 @@ subroutine get_d0_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) ! call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this ! call i_h_j_complex(det, gen, N_int, hij) - call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e_both(det,gen,N_int, hij,hji) else phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) hij = hij_cache1(p2) * phase + hji = hji_cache1(p2) * phase end if - if (hij == (0.d0,0.d0)) cycle + if (hij == 0.d0.or.hji == 0.d0) cycle !DIR$ LOOP COUNT AVG(4) do k=1,N_states mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij ! HOTSPOT - enddo - end do - !!!!!!!!!! - do p2=1, mo_num - if(bannedOrb(p2,2)) cycle - if(banned(p1, p2, bant)) cycle ! rentable? - if(p1 == h1 .or. p2 == h2) then - call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) - ! call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this -! call i_h_j_complex(det, gen, N_int, hij) - call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji) - else - phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) - hji = hji_cache1(p2) * phase - end if - if (hji == (0.d0,0.d0)) cycle - !DIR$ LOOP COUNT AVG(4) - do k=1,N_states mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji ! HOTSPOT enddo end do @@ -98,40 +81,25 @@ subroutine get_d0_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) !call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this ! call i_h_j_complex(det, gen, N_int, hij) - call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij) - if (hij == 0.d0) cycle + call htilde_mu_mat_opt_bi_ortho_no_3e_both(det,gen,N_int, hij,hji) + if (hij == 0.d0.or.hji == 0.d0) cycle else ! hij = (mo_two_e_integral_complex(p1, p2, puti, putj) - mo_two_e_integral_complex(p2, p1, puti, putj)) ! hij = (mo_bi_ortho_tc_two_e(p1, p2, puti, putj) - mo_bi_ortho_tc_two_e(p2, p1, puti, putj)) hij = (mo_bi_ortho_tc_two_e(puti, putj, p1, p2) - mo_bi_ortho_tc_two_e(puti, putj, p2, p1)) - if (hij == 0.d0) cycle - hij = (hij) * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + hji = (mo_bi_ortho_tc_two_e_transp(puti, putj, p1, p2) - mo_bi_ortho_tc_two_e_transp(puti, putj, p2, p1)) + if (hij == 0.d0.or.hji == 0.d0) cycle + phase = get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + hij = (hij) * phase + hji = (hji) * phase end if !DIR$ LOOP COUNT AVG(4) do k=1,N_states mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij - enddo - end do - - !!!!!!!!!! - do putj=puti+1, mo_num - if(bannedOrb(putj, sp)) cycle - if(banned(puti, putj, bant)) cycle ! rentable? - if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then - call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) - call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji) - if (hji == 0.d0) cycle - else -! hji = (mo_bi_ortho_tc_two_e( p1, p2, puti, putj) - mo_bi_ortho_tc_two_e( p2, p1, puti, putj)) - hji = (mo_bi_ortho_tc_two_e_transp(puti, putj, p1, p2 ) - mo_bi_ortho_tc_two_e_transp( puti, putj, p2, p1)) - if (hji == 0.d0) cycle - hji = (hji) * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) - end if - !DIR$ LOOP COUNT AVG(4) - do k=1,N_states mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo end do + end do end if diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f index 3c6cbf60..84a1ce24 100644 --- a/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f @@ -335,8 +335,8 @@ subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, ! gen is a selector; mask is ionized generator; det is alpha ! hij is contribution to ! call i_h_j_complex(gen, det, N_int, hij) - call htilde_mu_mat_opt_bi_ortho_no_3e(det, gen, N_int, hij) - call htilde_mu_mat_opt_bi_ortho_no_3e(gen, det, N_int, hji) + call htilde_mu_mat_opt_bi_ortho_no_3e_both(det, gen, N_int, hij,hji) +! call htilde_mu_mat_opt_bi_ortho_no_3e(gen, det, N_int, hji) !DIR$ LOOP COUNT AVG(4) do k=1,N_states ! take conjugate to get contribution to instead of diff --git a/plugins/local/slater_tc/slater_tc_opt.irp.f b/plugins/local/slater_tc/slater_tc_opt.irp.f index 9ed2b389..5651a299 100644 --- a/plugins/local/slater_tc/slater_tc_opt.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt.irp.f @@ -181,7 +181,7 @@ end ! --- -subroutine htilde_mu_mat_opt_bi_ortho_no_3e_both(key_j, key_i, Nint, htot) +subroutine htilde_mu_mat_opt_bi_ortho_no_3e_both(key_j, key_i, Nint, hji,hij) BEGIN_DOC ! @@ -199,24 +199,27 @@ subroutine htilde_mu_mat_opt_bi_ortho_no_3e_both(key_j, key_i, Nint, htot) implicit none integer, intent(in) :: Nint integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) - double precision, intent(out) :: htot + double precision, intent(out) :: hji,hij integer :: degree - htot = 0.d0 + hji = 0.d0 + hij = 0.d0 call get_excitation_degree(key_i, key_j, degree, Nint) if(degree.gt.2) return if(degree == 0) then - call diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_i,htot) + call diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_i,hji) + hij = hji else if (degree == 1) then - call single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint,key_j, key_i , htot) + call single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint,key_j, key_i , hji,hij) else if(degree == 2) then - call double_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, htot) + call double_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hji,hij) endif if(degree==0) then - htot += nuclear_repulsion + hji += nuclear_repulsion + hij += nuclear_repulsion endif end diff --git a/plugins/local/slater_tc/slater_tc_opt_single.irp.f b/plugins/local/slater_tc/slater_tc_opt_single.irp.f index 3f4e17e2..47bcbe34 100644 --- a/plugins/local/slater_tc/slater_tc_opt_single.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt_single.irp.f @@ -652,7 +652,7 @@ subroutine single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hj hmono = 0.d0 htwoe = 0.d0 hji = 0.d0 - hji = 0.d0 + hij = 0.d0 call get_excitation_degree(key_i, key_j, degree, Nint) if(degree.ne.1)then return @@ -661,7 +661,7 @@ subroutine single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hj call get_single_excitation(key_i, key_j, exc, phase, Nint) call decode_exc(exc,1,h1,p1,h2,p2,s1,s2) - call get_single_excitation_from_fock_tc_no_3e_both(Nint, key_i, key_j, h1, p1, s1, phase, hmono, htwoe, hji,hij) + call get_single_excitation_from_fock_tc_no_3e_both(Nint, key_i, key_j, h1, p1, s1, phase, hji,hij) end From a38bf00975365cc755fc7c8c24e9e74c02cd2a00 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 8 May 2024 17:26:48 +0200 Subject: [PATCH 037/131] updated default keywords in tc_keywords and ao_twoe_e_ints --- .../cipsi_tc_bi_ortho/get_d1_transp.irp.f | 34 ++++++++++++------- plugins/local/cipsi_tc_bi_ortho/pt2.irp.f | 3 ++ .../local/cipsi_tc_bi_ortho/selection.irp.f | 6 ++-- .../cipsi_tc_bi_ortho/stochastic_cipsi.irp.f | 2 +- .../local/tc_bi_ortho/diagonalize_tc_h.irp.f | 34 +++++++++---------- plugins/local/tc_keywords/EZFIO.cfg | 4 +-- src/ao_two_e_ints/EZFIO.cfg | 6 ++-- 7 files changed, 49 insertions(+), 40 deletions(-) diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f index 84a1ce24..a3d7b076 100644 --- a/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f @@ -16,7 +16,7 @@ subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, logical :: ok logical, allocatable :: lbanned(:,:) - integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j + integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j, istate integer :: hfix, pfix, h1, h2, p1, p2, ib, k, l, mm integer, parameter :: turn2(2) = (/2,1/) @@ -65,10 +65,12 @@ subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1) hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,p2) hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,p1) + do istate = 1,N_states + tmp_rowij(istate,mm) = 0.d0 + tmp_rowji(istate,mm) = 0.d0 + enddo enddo !! - tmp_rowij = 0.d0 - tmp_rowji = 0.d0 do putj=1, hfix-1 if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle @@ -119,13 +121,15 @@ subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, !MOVE MI pfix = p(1,mi) - tmp_rowij = 0.d0 - tmp_rowij2 = 0.d0 - tmp_rowji = 0.d0 - tmp_rowji2 = 0.d0 ! call get_mo_two_e_integrals_complex(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) ! call get_mo_two_e_integrals_complex(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) do mm = 1, mo_num + do istate = 1,N_states + tmp_rowij(istate,mm) = 0.d0 + tmp_rowij2(istate,mm) = 0.d0 + tmp_rowji(istate,mm) = 0.d0 + tmp_rowji2(istate,mm) = 0.d0 + enddo hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p1) hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p2) hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,pfix,p1) @@ -200,10 +204,12 @@ subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1) hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,p2) hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,p1) + do istate = 1, N_states + tmp_rowij(istate,mm) = 0.d0 + tmp_rowji(istate,mm) = 0.d0 + enddo enddo !! - tmp_rowij = 0.d0 - tmp_rowji = 0.d0 do putj=1,hfix-1 if(banned(putj,puti,1)) cycle if(lbanned(putj,ma)) cycle @@ -246,10 +252,6 @@ subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, pfix = p(1,mi) p1 = p(1,ma) p2 = p(2,ma) - tmp_rowij = 0.d0 - tmp_rowij2 = 0.d0 - tmp_rowji = 0.d0 - tmp_rowji2 = 0.d0 ! call get_mo_two_e_integrals_complex(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) ! call get_mo_two_e_integrals_complex(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) do mm = 1, mo_num @@ -257,6 +259,12 @@ subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,pfix) hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,pfix) hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,pfix) + do istate = 1,N_states + tmp_rowij (istate,mm) = 0.d0 + tmp_rowij2(istate,mm) = 0.d0 + tmp_rowji (istate,mm) = 0.d0 + tmp_rowji2(istate,mm) = 0.d0 + enddo enddo putj = p2 !! diff --git a/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f b/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f index ada19c6b..22381991 100644 --- a/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f @@ -65,6 +65,9 @@ subroutine tc_pt2 call pt2_dealloc(pt2_data_err) call pt2_alloc(pt2_data, N_states) call pt2_alloc(pt2_data_err, N_states) + if(transpose_two_e_int)then + provide mo_bi_ortho_tc_two_e_transp tc_2e_3idx_coulomb_integrals_transp + endif call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) call print_summary_tc(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, N_det, N_configuration, N_states, psi_s2) diff --git a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f index 17d34f43..72ccf9c4 100644 --- a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f @@ -691,19 +691,19 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere if(transpose_two_e_int)then call get_d2_new_transp(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) else - call get_d2_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + call get_d2_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) endif elseif(nt == 3) then if(transpose_two_e_int)then call get_d1_transp(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) else - call get_d1_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + call get_d1_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) endif else if(transpose_two_e_int)then call get_d0_transp (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) else - call get_d0_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + call get_d0_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) endif endif elseif(nt == 4) then diff --git a/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index bb5a89a1..e363830d 100644 --- a/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -89,7 +89,7 @@ subroutine run_stochastic_cipsi call pt2_alloc(pt2_data, N_states) call pt2_alloc(pt2_data_err, N_states) if(transpose_two_e_int)then - provide mo_bi_ortho_tc_two_e_transp + provide mo_bi_ortho_tc_two_e_transp tc_2e_3idx_coulomb_integrals_transp endif call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection ! stop diff --git a/plugins/local/tc_bi_ortho/diagonalize_tc_h.irp.f b/plugins/local/tc_bi_ortho/diagonalize_tc_h.irp.f index 398e96db..03388898 100644 --- a/plugins/local/tc_bi_ortho/diagonalize_tc_h.irp.f +++ b/plugins/local/tc_bi_ortho/diagonalize_tc_h.irp.f @@ -35,8 +35,8 @@ program tc_bi_ortho print*, ' nb of det = ', N_det call routine_diag() - call write_tc_energy() - call save_tc_bi_ortho_wavefunction() +! call write_tc_energy() +! call save_tc_bi_ortho_wavefunction() end @@ -76,28 +76,26 @@ subroutine routine_diag() PROVIDE noL_2e endif - PROVIDE htilde_matrix_elmt_bi_ortho - return if(N_states .eq. 1) then print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1) - print*,'e_tc_left_right = ',e_tc_left_right - print*,'e_tilde_bi_orth_00 = ',e_tilde_bi_orth_00 - print*,'e_pt2_tc_bi_orth = ',e_pt2_tc_bi_orth - print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single - print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double - print*,'***' - print*,'e_corr_bi_orth = ',e_corr_bi_orth - print*,'e_corr_bi_orth_proj = ',e_corr_bi_orth_proj - print*,'e_corr_bi_orth_proj_abs = ',e_corr_bi_orth_proj_abs - print*,'e_corr_single_bi_orth = ',e_corr_single_bi_orth - print*,'e_corr_double_bi_orth = ',e_corr_double_bi_orth - print*,'e_corr_single_bi_orth_abs = ',e_corr_single_bi_orth_abs - print*,'e_corr_double_bi_orth_abs = ',e_corr_double_bi_orth_abs +! print*,'e_tc_left_right = ',e_tc_left_right +! print*,'e_tilde_bi_orth_00 = ',e_tilde_bi_orth_00 +! print*,'e_pt2_tc_bi_orth = ',e_pt2_tc_bi_orth +! print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single +! print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double +! print*,'***' +! print*,'e_corr_bi_orth = ',e_corr_bi_orth +! print*,'e_corr_bi_orth_proj = ',e_corr_bi_orth_proj +! print*,'e_corr_bi_orth_proj_abs = ',e_corr_bi_orth_proj_abs +! print*,'e_corr_single_bi_orth = ',e_corr_single_bi_orth +! print*,'e_corr_double_bi_orth = ',e_corr_double_bi_orth +! print*,'e_corr_single_bi_orth_abs = ',e_corr_single_bi_orth_abs +! print*,'e_corr_double_bi_orth_abs = ',e_corr_double_bi_orth_abs print*,'Left/right eigenvectors' do i = 1,N_det - write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1) + write(*,'(I6,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1) enddo else diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index 39968ec8..d764224a 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -14,7 +14,7 @@ default: False type: logical doc: If |true|, three-body terms are included interface: ezfio,provider,ocaml -default: True +default: False [three_e_3_idx_term] type: logical @@ -50,7 +50,7 @@ default: False type: logical doc: If |true|, standard normal-ordering for L (to be used with three_body_h_tc |false|) interface: ezfio,provider,ocaml -default: False +default: True [core_tc_op] type: logical diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index ff932b0c..c2e083a3 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -25,16 +25,16 @@ default: 1.e-12 [do_direct_integrals] type: logical -doc: Compute integrals on the fly (very slow, only for debugging) +doc: Compute integrals on the fly (Useful only for Cholesky decomposition) interface: ezfio,provider,ocaml -default: False +default: True ezfio_name: direct [do_ao_cholesky] type: logical doc: Perform Cholesky decomposition of AO integrals interface: ezfio,provider,ocaml -default: False +default: True [io_ao_two_e_integrals_erf] type: Disk_access From 2af293fd291481896cd5114df6f653ca0f04f797 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 8 May 2024 17:38:54 +0200 Subject: [PATCH 038/131] minor modifs in BH jastrows --- .../local/non_h_ints_mu/jast_deriv_utils_vect.irp.f | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f index db06e835..09bb6528 100644 --- a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f +++ b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f @@ -340,8 +340,8 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) endif tmp1 = double_p(mpA) * f1A_power(mpA-1) * f2A_power(npA) + double_p(npA) * f1A_power(npA-1) * f2A_power(mpA) - tmp1 = tmp1 * g12_power(opA) - tmp2 = double_p(opA) * g12_power(opA-1) * (f1A_power(mpA) * f2A_power(npA) + f1A_power(npA) * f2A_power(mpA)) + tmp1 = tmp1 * g12_power(opA) * tmp + tmp2 = double_p(opA) * g12_power(opA-1) * (f1A_power(mpA) * f2A_power(npA) + f1A_power(npA) * f2A_power(mpA)) * tmp !tmp1 = 0.d0 !if(mpA .gt. 0) then @@ -356,9 +356,12 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) ! tmp2 = tmp2 + dble(opA) * g12**dble(opA-1) * (f1A**dble(mpA) * f2A**dble(npA) + f1A**dble(npA) * f2A**dble(mpA)) !endif - gradx(jpoint) = gradx(jpoint) + tmp * (tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1)) - grady(jpoint) = grady(jpoint) + tmp * (tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2)) - gradz(jpoint) = gradz(jpoint) + tmp * (tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3)) +! gradx(jpoint) = gradx(jpoint) + tmp * (tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1)) +! grady(jpoint) = grady(jpoint) + tmp * (tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2)) +! gradz(jpoint) = gradz(jpoint) + tmp * (tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3)) + gradx(jpoint) = gradx(jpoint) + tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1) + grady(jpoint) = grady(jpoint) + tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2) + gradz(jpoint) = gradz(jpoint) + tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3) enddo ! p enddo ! i_nucl enddo ! jpoint From 812e75982b96959485b8f7ccd333ef3a1b1b570b Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 10 May 2024 17:23:51 +0200 Subject: [PATCH 039/131] minor modifs in plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f --- .../non_h_ints_mu/jast_deriv_utils_vect.irp.f | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f index 09bb6528..2c41b535 100644 --- a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f +++ b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f @@ -867,19 +867,20 @@ subroutine jBH_elem_fct_grad(alpha, r1, r2, fct, grad1_fct) + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) - tmp1 = 1.d0 / (1.d0 + alpha * dist) - fct = alpha * dist * tmp1 - - if(dist .lt. 1d-10) then - grad1_fct(1) = 0.d0 - grad1_fct(2) = 0.d0 - grad1_fct(3) = 0.d0 - else + if(dist .ge. 1d-10) then + tmp1 = 1.d0 / (1.d0 + alpha * dist) + + fct = alpha * dist * tmp1 tmp2 = alpha * tmp1 * tmp1 / dist grad1_fct(1) = tmp2 * (r1(1) - r2(1)) grad1_fct(2) = tmp2 * (r1(2) - r2(2)) grad1_fct(3) = tmp2 * (r1(3) - r2(3)) + else + grad1_fct(1) = 0.d0 + grad1_fct(2) = 0.d0 + grad1_fct(3) = 0.d0 + fct = 0.d0 endif return From 6e2f28b97fbf3c961c9461689a8db9adf81bd6f8 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 11 May 2024 10:27:03 +0200 Subject: [PATCH 040/131] COLLAPSE(4) -> COLLAPSE(3) --- plugins/local/non_h_ints_mu/deb_aos.irp.f | 6 +++--- plugins/local/non_h_ints_mu/total_tc_int.irp.f | 9 ++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/plugins/local/non_h_ints_mu/deb_aos.irp.f b/plugins/local/non_h_ints_mu/deb_aos.irp.f index 4012f47c..70604f54 100644 --- a/plugins/local/non_h_ints_mu/deb_aos.irp.f +++ b/plugins/local/non_h_ints_mu/deb_aos.irp.f @@ -31,6 +31,9 @@ subroutine print_aos() integer :: i, ipoint double precision :: r(3) double precision :: ao_val, ao_der(3), ao_lap + double precision :: accu_vgl(5) + double precision :: accu_vgl_nrm(5) + double precision :: mo_val, mo_der(3), mo_lap PROVIDE final_grid_points aos_in_r_array aos_grad_in_r_array aos_lapl_in_r_array @@ -40,9 +43,6 @@ subroutine print_aos() write(1000, '(3(f15.7, 3X))') r enddo -double precision :: accu_vgl(5) -double precision :: accu_vgl_nrm(5) - do ipoint = 1, n_points_final_grid do i = 1, ao_num ao_val = aos_in_r_array (i,ipoint) diff --git a/plugins/local/non_h_ints_mu/total_tc_int.irp.f b/plugins/local/non_h_ints_mu/total_tc_int.irp.f index a1bbd6e0..656f5f16 100644 --- a/plugins/local/non_h_ints_mu/total_tc_int.irp.f +++ b/plugins/local/non_h_ints_mu/total_tc_int.irp.f @@ -78,7 +78,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n !$OMP PRIVATE (i, j, k, l, ipoint, ao_i_r, ao_k_r, weight1) & !$OMP SHARED (ao_num, n_points_final_grid, ao_two_e_tc_tot, & !$OMP aos_in_r_array_transp, final_weight_at_r_vector, int2_grad1_u12_square_ao) - !$OMP DO COLLAPSE(4) + !$OMP DO COLLAPSE(3) do i = 1, ao_num do k = 1, ao_num do l = 1, ao_num @@ -188,7 +188,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n !$OMP SHARED (ao_num, n_points_final_grid, ao_two_e_tc_tot, & !$OMP aos_in_r_array_transp, final_weight_at_r_vector, & !$OMP int2_grad1_u12_ao, aos_grad_in_r_array_transp_bis) - !$OMP DO COLLAPSE(4) + !$OMP DO COLLAPSE(3) do i = 1, ao_num do k = 1, ao_num do l = 1, ao_num @@ -270,7 +270,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i, j, k, l, integ_zero, integ_val) & !$OMP SHARED(ao_num, ao_two_e_tc_tot) - !$OMP DO COLLAPSE(4) + !$OMP DO COLLAPSE(3) do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num @@ -293,7 +293,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n !$OMP PARALLEL DEFAULT(NONE) & !$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & !$OMP PRIVATE(i, j, k, l) - !$OMP DO COLLAPSE(4) + !$OMP DO COLLAPSE(3) do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num @@ -306,7 +306,6 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n enddo !$OMP END DO !$OMP END PARALLEL - !call clear_ao_map() FREE ao_integrals_map endif From 8eea5d7f7f142103998d8bfa1b3bcc630935f69b Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 15 May 2024 15:41:35 +0200 Subject: [PATCH 041/131] fixed a bug in cholesk_ao_transp --- .../tuto_plugins/tuto_I/test_cholesky.irp.f | 53 +++++++++++++++++++ src/ao_two_e_ints/cholesky.irp.f | 2 +- 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 plugins/local/tuto_plugins/tuto_I/test_cholesky.irp.f diff --git a/plugins/local/tuto_plugins/tuto_I/test_cholesky.irp.f b/plugins/local/tuto_plugins/tuto_I/test_cholesky.irp.f new file mode 100644 index 00000000..d09d100a --- /dev/null +++ b/plugins/local/tuto_plugins/tuto_I/test_cholesky.irp.f @@ -0,0 +1,53 @@ +program my_program_to_print_stuffs + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + integer :: i,j,k,l,m + double precision :: integral, accu, accu_tot, integral_cholesky + double precision :: get_ao_two_e_integral, get_two_e_integral ! declaration of the functions + print*,'AO integrals, physicist notations : ' + accu_tot = 0.D0 + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + integral = get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + integral_cholesky = 0.D0 + do m = 1, cholesky_ao_num + integral_cholesky += cholesky_ao_transp(m,i,k) * cholesky_ao_transp(m,j,l) + enddo + accu = dabs(integral_cholesky-integral) + accu_tot += accu + if(accu.gt.1.d-10)then + print*,i,j,k,l + print*,accu, integral, integral_cholesky + endif + enddo + enddo + enddo + enddo + print*,'accu_tot',accu_tot + + print*,'MO integrals, physicist notations : ' + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + integral = get_two_e_integral(i, j, k, l, mo_integrals_map) + accu = 0.D0 + integral_cholesky = 0.D0 + do m = 1, cholesky_mo_num + integral_cholesky += cholesky_mo_transp(m,i,k) * cholesky_mo_transp(m,j,l) + enddo + accu = dabs(integral_cholesky-integral) + accu_tot += accu + if(accu.gt.1.d-10)then + print*,i,j,k,l + print*,accu, integral, integral_cholesky + endif + enddo + enddo + enddo + enddo +end diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 33304026..5fbd166c 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -6,7 +6,7 @@ BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, integer :: i,j,k do j=1,ao_num do i=1,ao_num - do k=1,ao_num + do k=1,cholesky_ao_num cholesky_ao_transp(k,i,j) = cholesky_ao(i,j,k) enddo enddo From c6a61639445229eca3ecb2e32556ddef646064d6 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 16 May 2024 17:57:00 +0200 Subject: [PATCH 042/131] added f_hf with cholesky by default --- src/dft_utils_in_r/mo_in_r.irp.f | 2 +- src/mu_of_r/f_cholesky.irp.f | 221 +++++++++++++++++++++++++++ src/mu_of_r/mu_of_r_conditions.irp.f | 46 +++++- 3 files changed, 264 insertions(+), 5 deletions(-) create mode 100644 src/mu_of_r/f_cholesky.irp.f diff --git a/src/dft_utils_in_r/mo_in_r.irp.f b/src/dft_utils_in_r/mo_in_r.irp.f index 192cb25a..ad931402 100644 --- a/src/dft_utils_in_r/mo_in_r.irp.f +++ b/src/dft_utils_in_r/mo_in_r.irp.f @@ -48,7 +48,7 @@ integer :: i,j do i = 1, n_points_final_grid do j = 1, mo_num - mos_in_r_array_transp(i,j) = mos_in_r_array(j,i) + mos_in_r_array_transp(i,j) = mos_in_r_array_omp(j,i) enddo enddo END_PROVIDER diff --git a/src/mu_of_r/f_cholesky.irp.f b/src/mu_of_r/f_cholesky.irp.f new file mode 100644 index 00000000..1ad4ce36 --- /dev/null +++ b/src/mu_of_r/f_cholesky.irp.f @@ -0,0 +1,221 @@ +BEGIN_PROVIDER [integer, list_couple_orb_r1, (2,n_couple_orb_r1)] + implicit none + integer :: ii,i,mm,m,itmp + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + list_couple_orb_r1(1,itmp) = i + list_couple_orb_r1(2,itmp) = m + enddo + enddo +END_PROVIDER + + +BEGIN_PROVIDER [integer, list_couple_orb_r2, (2,n_couple_orb_r2)] + implicit none + integer :: ii,i,mm,m,itmp + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + list_couple_orb_r2(1,itmp) = i + list_couple_orb_r2(2,itmp) = m + enddo + enddo +END_PROVIDER + + +BEGIN_PROVIDER [integer, n_couple_orb_r1] + implicit none + BEGIN_DOC + ! number of couples of alpha occupied times any basis orbital + END_DOC + n_couple_orb_r1 = n_occ_val_orb_for_hf(1) * n_basis_orb +END_PROVIDER + +BEGIN_PROVIDER [integer, n_couple_orb_r2] + implicit none + BEGIN_DOC + ! number of couples of beta occupied times any basis orbital + END_DOC + n_couple_orb_r2 = n_occ_val_orb_for_hf(2) * n_basis_orb +END_PROVIDER + +BEGIN_PROVIDER [ double precision, mos_times_cholesky_r1, (cholesky_mo_num,n_points_final_grid)] + implicit none + BEGIN_DOC + ! V1_AR = \sum_{I}V_AI Phi_IR where "R" specifies the index of the grid point and A the number of cholesky point + ! + ! here Phi_IR is phi_i(R)xphi_b(R) for r1 and V_AI = (ib|A) chollesky vector + END_DOC + double precision, allocatable :: mos_ib_r1(:,:),mo_chol_r1(:,:) + double precision, allocatable :: test(:,:) + double precision :: mo_i_r1,mo_b_r1 + integer :: ii,i,mm,m,itmp,ipoint,ll + allocate(mos_ib_r1(n_couple_orb_r1,n_points_final_grid)) + allocate(mo_chol_r1(cholesky_mo_num,n_couple_orb_r1)) + + do ipoint = 1, n_points_final_grid + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + itmp += 1 + mos_ib_r1(itmp,ipoint) = mo_i_r1 * mo_b_r1 + enddo + enddo + enddo + + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + do ll = 1, cholesky_mo_num + mo_chol_r1(ll,itmp) = cholesky_mo_transp(ll,m,i) + enddo + enddo + enddo + + call get_AB_prod(mo_chol_r1,cholesky_mo_num,n_couple_orb_r1,mos_ib_r1,n_points_final_grid,mos_times_cholesky_r1) + allocate(test(cholesky_mo_num,n_points_final_grid)) + test = 0.d0 + do ipoint = 1, n_points_final_grid + do itmp = 1, n_couple_orb_r1 + i = list_couple_orb_r1(1,itmp) + m = list_couple_orb_r1(2,itmp) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + do mm = 1, cholesky_mo_num + test(mm,ipoint) += mo_i_r1 * mo_b_r1 * mo_chol_r1(mm,itmp) + enddo + enddo + enddo + double precision :: accu + accu = 0.d0 + do ipoint = 1, n_points_final_grid + do mm = 1, cholesky_mo_num + accu += dabs(mos_times_cholesky_r1(mm,ipoint) - test(mm,ipoint) ) + if(dabs(mos_times_cholesky_r1(mm,ipoint) - test(mm,ipoint)).gt.1.d-10)then + print*,'problem ! ',dabs(mos_times_cholesky_r1(mm,ipoint) - test(mm,ipoint)) & + , mos_times_cholesky_r1(mm,ipoint) , test(mm,ipoint) + endif + enddo + enddo + print*,'accu = ',accu + + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, mos_times_cholesky_r2, (cholesky_mo_num,n_points_final_grid)] + implicit none + BEGIN_DOC + ! V1_AR = \sum_{I}V_AI Phi_IR where "R" specifies the index of the grid point and A the number of cholesky point + ! + ! here Phi_IR is phi_i(R)xphi_b(R) for r2 and V_AI = (ib|A) chollesky vector + END_DOC + double precision, allocatable :: mos_ib_r2(:,:),mo_chol_r2(:,:) + double precision, allocatable :: test(:,:) + double precision :: mo_i_r2,mo_b_r2 + integer :: ii,i,mm,m,itmp,ipoint,ll + allocate(mos_ib_r2(n_couple_orb_r2,n_points_final_grid)) + allocate(mo_chol_r2(cholesky_mo_num,n_couple_orb_r2)) + + do ipoint = 1, n_points_final_grid + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + mo_i_r2 = mos_in_r_array_omp(i,ipoint) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r2 = mos_in_r_array_omp(m,ipoint) + itmp += 1 + mos_ib_r2(itmp,ipoint) = mo_i_r2 * mo_b_r2 + enddo + enddo + enddo + + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + do ll = 1, cholesky_mo_num + mo_chol_r2(ll,itmp) = cholesky_mo_transp(ll,m,i) + enddo + enddo + enddo + + call get_AB_prod(mo_chol_r2,cholesky_mo_num,n_couple_orb_r2,mos_ib_r2,n_points_final_grid,mos_times_cholesky_r2) + allocate(test(cholesky_mo_num,n_points_final_grid)) + test = 0.d0 + do ipoint = 1, n_points_final_grid + do itmp = 1, n_couple_orb_r2 + i = list_couple_orb_r2(1,itmp) + m = list_couple_orb_r2(2,itmp) + mo_i_r2 = mos_in_r_array_omp(i,ipoint) + mo_b_r2 = mos_in_r_array_omp(m,ipoint) + do mm = 1, cholesky_mo_num + test(mm,ipoint) += mo_i_r2 * mo_b_r2 * mo_chol_r2(mm,itmp) + enddo + enddo + enddo + double precision :: accu + accu = 0.d0 + do ipoint = 1, n_points_final_grid + do mm = 1, cholesky_mo_num + accu += dabs(mos_times_cholesky_r2(mm,ipoint) - test(mm,ipoint) ) + if(dabs(mos_times_cholesky_r2(mm,ipoint) - test(mm,ipoint)).gt.1.d-10)then + print*,'problem ! ',dabs(mos_times_cholesky_r2(mm,ipoint) - test(mm,ipoint)) & + , mos_times_cholesky_r2(mm,ipoint) , test(mm,ipoint) + endif + enddo + enddo + print*,'accu = ',accu + +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, f_hf_cholesky, (n_points_final_grid)] + implicit none + integer :: ipoint + !!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ + !! = \sum_{I}\sum_{J}\sum_A Phi_I(R) Phi_J(R) V_AI V_AJ + !! = \sum_A \sum_{I}Phi_I(R)V_AI \sum_{J}V_AJ Phi_J(R) + !! = \sum_A V_AR G_AR + !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI + double precision :: u_dot_v + do ipoint = 1, n_points_final_grid + f_hf_cholesky(ipoint) = 2.D0 * u_dot_v(mos_times_cholesky_r2(1,ipoint),mos_times_cholesky_r1(1,ipoint),cholesky_mo_num) + enddo +END_PROVIDER + +BEGIN_PROVIDER [ double precision, on_top_hf_grid, (n_points_final_grid)] + implicit none + integer :: ipoint,i,ii + double precision :: dm_a, dm_b + do ipoint = 1, n_points_final_grid + dm_a = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + dm_a += mos_in_r_array_omp(i,ipoint)*mos_in_r_array_omp(i,ipoint) + enddo + dm_b = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + dm_b += mos_in_r_array_omp(i,ipoint)*mos_in_r_array_omp(i,ipoint) + enddo + on_top_hf_grid(ipoint) = 2.D0 * dm_a*dm_b + enddo +END_PROVIDER + diff --git a/src/mu_of_r/mu_of_r_conditions.irp.f b/src/mu_of_r/mu_of_r_conditions.irp.f index 6b49b9df..5b4d4b83 100644 --- a/src/mu_of_r/mu_of_r_conditions.irp.f +++ b/src/mu_of_r/mu_of_r_conditions.irp.f @@ -61,7 +61,7 @@ END_DOC integer :: ipoint double precision :: wall0,wall1,f_hf,on_top,w_hf,sqpi - PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals + PROVIDE f_hf_cholesky on_top_hf_grid print*,'providing mu_of_r_hf ...' call wall_time(wall0) sqpi = dsqrt(dacos(-1.d0)) @@ -69,10 +69,10 @@ !$OMP PARALLEL DO & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint,f_hf,on_top,w_hf) & - !$OMP ShARED (n_points_final_grid,mu_of_r_hf,f_psi_hf_ab,on_top_hf_mu_r,sqpi) + !$OMP ShARED (n_points_final_grid,mu_of_r_hf,f_hf_cholesky,on_top_hf_grid,sqpi) do ipoint = 1, n_points_final_grid - f_hf = f_psi_hf_ab(ipoint) - on_top = on_top_hf_mu_r(ipoint) + f_hf = f_hf_cholesky(ipoint) + on_top = on_top_hf_grid(ipoint) if(on_top.le.1.d-12.or.f_hf.le.0.d0.or.f_hf * on_top.lt.0.d0)then w_hf = 1.d+10 else @@ -85,6 +85,44 @@ print*,'Time to provide mu_of_r_hf = ',wall1-wall0 END_PROVIDER + BEGIN_PROVIDER [double precision, mu_of_r_hf_old, (n_points_final_grid) ] + implicit none + BEGIN_DOC + ! mu(r) computed with a HF wave function (assumes that HF MOs are stored in the EZFIO) + ! + ! corresponds to Eq. (37) of J. Chem. Phys. 149, 194301 (2018) but for \Psi^B = HF^B + ! + ! !!!!!! WARNING !!!!!! if no_core_density == .True. then all contributions from the core orbitals + ! + ! in the two-body density matrix are excluded + END_DOC + integer :: ipoint + double precision :: wall0,wall1,f_hf,on_top,w_hf,sqpi + PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals + print*,'providing mu_of_r_hf_old ...' + call wall_time(wall0) + sqpi = dsqrt(dacos(-1.d0)) + provide f_psi_hf_ab + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,f_hf,on_top,w_hf) & + !$OMP ShARED (n_points_final_grid,mu_of_r_hf_old,f_psi_hf_ab,on_top_hf_mu_r,sqpi) + do ipoint = 1, n_points_final_grid + f_hf = f_psi_hf_ab(ipoint) + on_top = on_top_hf_mu_r(ipoint) + if(on_top.le.1.d-12.or.f_hf.le.0.d0.or.f_hf * on_top.lt.0.d0)then + w_hf = 1.d+10 + else + w_hf = f_hf / on_top + endif + mu_of_r_hf_old(ipoint) = w_hf * sqpi * 0.5d0 + enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide mu_of_r_hf_old = ',wall1-wall0 + END_PROVIDER + + BEGIN_PROVIDER [double precision, mu_of_r_psi_cas, (n_points_final_grid,N_states) ] implicit none BEGIN_DOC From ce042fbd787a21a600830596fa3caa5f7aa2cdb1 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 21 May 2024 12:01:28 +0200 Subject: [PATCH 043/131] basis set correction with cholesky works for hf --- .../local/basis_correction/51.basis_c.bats | 8 -- .../{01.convert.bats => convert_bats_old} | 0 src/hartree_fock/10.hf.bats | 13 -- src/mu_of_r/basis_def.irp.f | 45 +++++++ .../{f_cholesky.irp.f => f_hf_cholesky.irp.f} | 121 +++++++++--------- 5 files changed, 104 insertions(+), 83 deletions(-) rename src/ezfio_files/{01.convert.bats => convert_bats_old} (100%) rename src/mu_of_r/{f_cholesky.irp.f => f_hf_cholesky.irp.f} (67%) diff --git a/plugins/local/basis_correction/51.basis_c.bats b/plugins/local/basis_correction/51.basis_c.bats index 914b482b..1e20bae3 100644 --- a/plugins/local/basis_correction/51.basis_c.bats +++ b/plugins/local/basis_correction/51.basis_c.bats @@ -37,14 +37,6 @@ function run_sd() { eq $energy1 $1 $thresh } -@test "O2 CAS" { - qp set_file o2_cas.gms.ezfio - qp set_mo_class -c "[1-2]" -a "[3-10]" -d "[11-46]" - run -149.72435425 3.e-4 10000 - qp set_mo_class -c "[1-2]" -a "[3-10]" -v "[11-46]" - run_md -0.1160222327 1.e-6 -} - @test "LiF RHF" { qp set_file lif.ezfio diff --git a/src/ezfio_files/01.convert.bats b/src/ezfio_files/convert_bats_old similarity index 100% rename from src/ezfio_files/01.convert.bats rename to src/ezfio_files/convert_bats_old diff --git a/src/hartree_fock/10.hf.bats b/src/hartree_fock/10.hf.bats index b496a089..214dfa86 100644 --- a/src/hartree_fock/10.hf.bats +++ b/src/hartree_fock/10.hf.bats @@ -115,9 +115,6 @@ rm -rf $EZFIO run hco.ezfio -113.1841002944744 } -@test "HBO" { # 0.805600 1.4543s - run hbo.ezfio -100.018582259096 -} @test "H2S" { # 1.655600 4.21402s run h2s.ezfio -398.6944130421982 @@ -127,9 +124,6 @@ rm -rf $EZFIO run h3coh.ezfio -114.9865030596373 } -@test "H2O" { # 1.811100 1.84387s - run h2o.ezfio -0.760270218692179E+02 -} @test "H2O2" { # 2.217000 8.50267s run h2o2.ezfio -150.7806608469964 @@ -187,13 +181,6 @@ rm -rf $EZFIO run oh.ezfio -75.42025413469165 } -@test "[Cu(NH3)4]2+" { # 59.610100 4.18766m - [[ -n $TRAVIS ]] && skip - qp set_file cu_nh3_4_2plus.ezfio - qp set scf_utils thresh_scf 1.e-10 - run cu_nh3_4_2plus.ezfio -1862.97590358903 -} - @test "SO2" { # 71.894900 3.22567m [[ -n $TRAVIS ]] && skip run so2.ezfio -41.55800401346361 diff --git a/src/mu_of_r/basis_def.irp.f b/src/mu_of_r/basis_def.irp.f index fff9f581..e433f4d8 100644 --- a/src/mu_of_r/basis_def.irp.f +++ b/src/mu_of_r/basis_def.irp.f @@ -114,3 +114,48 @@ BEGIN_PROVIDER [double precision, basis_mos_in_r_array, (n_basis_orb,n_points_fi enddo enddo END_PROVIDER + +! BEGIN_PROVIDER [integer, n_docc_val_orb_for_cas] +!&BEGIN_PROVIDER [integer, n_max_docc_val_orb_for_cas] +! implicit none +! BEGIN_DOC +! ! Number of DOUBLY OCCUPIED VALENCE ORBITALS for the CAS wave function +! ! +! ! This determines the size of the space \mathcal{A} of Eqs. (15-16) of Phys.Chem.Lett.2019, 10, 2931 2937 +! END_DOC +! integer :: i +! n_docc_val_orb_for_cas = 0 +! ! You browse the BETA ELECTRONS and check if its not a CORE ORBITAL +! do i = 1, elec_beta_num +! if( trim(mo_class(i))=="Inactive" & +! .or. trim(mo_class(i))=="Active" & +! .or. trim(mo_class(i))=="Virtual" )then +! n_docc_val_orb_for_cas +=1 +! endif +! enddo +! n_max_docc_val_orb_for_cas = maxval(n_docc_val_orb_for_cas) +! +!END_PROVIDER +! +!BEGIN_PROVIDER [integer, list_doc_valence_orb_for_cas, (n_max_docc_val_orb_for_cas)] +! implicit none +! BEGIN_DOC +! ! List of OCCUPIED valence orbitals for each spin to build the f_{HF}(r_1,r_2) function +! ! +! ! This corresponds to ALL OCCUPIED orbitals in the HF wave function, except those defined as "core" +! ! +! ! This determines the space \mathcal{A} of Eqs. (15-16) of Phys.Chem.Lett.2019, 10, 2931 2937 +! END_DOC +! j = 0 +! ! You browse the BETA ELECTRONS and check if its not a CORE ORBITAL +! do i = 1, elec_beta_num +! if( trim(mo_class(i))=="Inactive" & +! .or. trim(mo_class(i))=="Active" & +! .or. trim(mo_class(i))=="Virtual" )then +! j +=1 +! list_doc_valence_orb_for_cas(j) = i +! endif +! enddo +! +!END_PROVIDER + diff --git a/src/mu_of_r/f_cholesky.irp.f b/src/mu_of_r/f_hf_cholesky.irp.f similarity index 67% rename from src/mu_of_r/f_cholesky.irp.f rename to src/mu_of_r/f_hf_cholesky.irp.f index 1ad4ce36..84097f09 100644 --- a/src/mu_of_r/f_cholesky.irp.f +++ b/src/mu_of_r/f_hf_cholesky.irp.f @@ -1,4 +1,4 @@ -BEGIN_PROVIDER [integer, list_couple_orb_r1, (2,n_couple_orb_r1)] +BEGIN_PROVIDER [integer, list_couple_hf_orb_r1, (2,n_couple_orb_r1)] implicit none integer :: ii,i,mm,m,itmp itmp = 0 @@ -7,14 +7,14 @@ BEGIN_PROVIDER [integer, list_couple_orb_r1, (2,n_couple_orb_r1)] do mm = 1, n_basis_orb ! electron 1 m = list_basis(mm) itmp += 1 - list_couple_orb_r1(1,itmp) = i - list_couple_orb_r1(2,itmp) = m + list_couple_hf_orb_r1(1,itmp) = i + list_couple_hf_orb_r1(2,itmp) = m enddo enddo END_PROVIDER -BEGIN_PROVIDER [integer, list_couple_orb_r2, (2,n_couple_orb_r2)] +BEGIN_PROVIDER [integer, list_couple_hf_orb_r2, (2,n_couple_orb_r2)] implicit none integer :: ii,i,mm,m,itmp itmp = 0 @@ -23,8 +23,8 @@ BEGIN_PROVIDER [integer, list_couple_orb_r2, (2,n_couple_orb_r2)] do mm = 1, n_basis_orb ! electron 1 m = list_basis(mm) itmp += 1 - list_couple_orb_r2(1,itmp) = i - list_couple_orb_r2(2,itmp) = m + list_couple_hf_orb_r2(1,itmp) = i + list_couple_hf_orb_r2(2,itmp) = m enddo enddo END_PROVIDER @@ -87,31 +87,6 @@ BEGIN_PROVIDER [ double precision, mos_times_cholesky_r1, (cholesky_mo_num,n_poi enddo call get_AB_prod(mo_chol_r1,cholesky_mo_num,n_couple_orb_r1,mos_ib_r1,n_points_final_grid,mos_times_cholesky_r1) - allocate(test(cholesky_mo_num,n_points_final_grid)) - test = 0.d0 - do ipoint = 1, n_points_final_grid - do itmp = 1, n_couple_orb_r1 - i = list_couple_orb_r1(1,itmp) - m = list_couple_orb_r1(2,itmp) - mo_i_r1 = mos_in_r_array_omp(i,ipoint) - mo_b_r1 = mos_in_r_array_omp(m,ipoint) - do mm = 1, cholesky_mo_num - test(mm,ipoint) += mo_i_r1 * mo_b_r1 * mo_chol_r1(mm,itmp) - enddo - enddo - enddo - double precision :: accu - accu = 0.d0 - do ipoint = 1, n_points_final_grid - do mm = 1, cholesky_mo_num - accu += dabs(mos_times_cholesky_r1(mm,ipoint) - test(mm,ipoint) ) - if(dabs(mos_times_cholesky_r1(mm,ipoint) - test(mm,ipoint)).gt.1.d-10)then - print*,'problem ! ',dabs(mos_times_cholesky_r1(mm,ipoint) - test(mm,ipoint)) & - , mos_times_cholesky_r1(mm,ipoint) , test(mm,ipoint) - endif - enddo - enddo - print*,'accu = ',accu END_PROVIDER @@ -157,53 +132,72 @@ BEGIN_PROVIDER [ double precision, mos_times_cholesky_r2, (cholesky_mo_num,n_poi enddo call get_AB_prod(mo_chol_r2,cholesky_mo_num,n_couple_orb_r2,mos_ib_r2,n_points_final_grid,mos_times_cholesky_r2) - allocate(test(cholesky_mo_num,n_points_final_grid)) - test = 0.d0 - do ipoint = 1, n_points_final_grid - do itmp = 1, n_couple_orb_r2 - i = list_couple_orb_r2(1,itmp) - m = list_couple_orb_r2(2,itmp) - mo_i_r2 = mos_in_r_array_omp(i,ipoint) - mo_b_r2 = mos_in_r_array_omp(m,ipoint) - do mm = 1, cholesky_mo_num - test(mm,ipoint) += mo_i_r2 * mo_b_r2 * mo_chol_r2(mm,itmp) - enddo - enddo - enddo - double precision :: accu - accu = 0.d0 - do ipoint = 1, n_points_final_grid - do mm = 1, cholesky_mo_num - accu += dabs(mos_times_cholesky_r2(mm,ipoint) - test(mm,ipoint) ) - if(dabs(mos_times_cholesky_r2(mm,ipoint) - test(mm,ipoint)).gt.1.d-10)then - print*,'problem ! ',dabs(mos_times_cholesky_r2(mm,ipoint) - test(mm,ipoint)) & - , mos_times_cholesky_r2(mm,ipoint) , test(mm,ipoint) - endif - enddo - enddo - print*,'accu = ',accu END_PROVIDER BEGIN_PROVIDER [ double precision, f_hf_cholesky, (n_points_final_grid)] implicit none - integer :: ipoint + integer :: ipoint,m,k !!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ !! = \sum_{I}\sum_{J}\sum_A Phi_I(R) Phi_J(R) V_AI V_AJ !! = \sum_A \sum_{I}Phi_I(R)V_AI \sum_{J}V_AJ Phi_J(R) !! = \sum_A V_AR G_AR !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI - double precision :: u_dot_v - do ipoint = 1, n_points_final_grid - f_hf_cholesky(ipoint) = 2.D0 * u_dot_v(mos_times_cholesky_r2(1,ipoint),mos_times_cholesky_r1(1,ipoint),cholesky_mo_num) - enddo + double precision :: u_dot_v,wall0,wall1 + if(elec_alpha_num == elec_beta_num)then + provide mos_times_cholesky_r1 + print*,'providing f_hf_cholesky ...' + call wall_time(wall0) + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,m) & + !$OMP ShARED (mos_times_cholesky_r1,cholesky_mo_num,f_hf_cholesky,n_points_final_grid) + do ipoint = 1, n_points_final_grid + f_hf_cholesky(ipoint) = 0.d0 + do m = 1, cholesky_mo_num + f_hf_cholesky(ipoint) = f_hf_cholesky(ipoint) + & + mos_times_cholesky_r1(m,ipoint) * mos_times_cholesky_r1(m,ipoint) + enddo + f_hf_cholesky(ipoint) *= 2.D0 + enddo + !$OMP END PARALLEL DO + + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky = ',wall1-wall0 + free mos_times_cholesky_r1 + else + provide mos_times_cholesky_r2 mos_times_cholesky_r1 + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,m) & + !$OMP ShARED (mos_times_cholesky_r2,mos_times_cholesky_r1,cholesky_mo_num,f_hf_cholesky,n_points_final_grid) + do ipoint = 1, n_points_final_grid + f_hf_cholesky(ipoint) = 0.D0 + do m = 1, cholesky_mo_num + f_hf_cholesky(ipoint) = f_hf_cholesky(ipoint) + & + mos_times_cholesky_r2(m,ipoint)*mos_times_cholesky_r1(m,ipoint) + enddo + f_hf_cholesky(ipoint) *= 2.D0 + enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky = ',wall1-wall0 + free mos_times_cholesky_r2 mos_times_cholesky_r1 + endif END_PROVIDER BEGIN_PROVIDER [ double precision, on_top_hf_grid, (n_points_final_grid)] implicit none integer :: ipoint,i,ii - double precision :: dm_a, dm_b + double precision :: dm_a, dm_b,wall0,wall1 + print*,'providing on_top_hf_grid ...' + provide mos_in_r_array_omp + call wall_time(wall0) + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,dm_a,dm_b,ii,i) & + !$OMP ShARED (n_points_final_grid,n_occ_val_orb_for_hf,mos_in_r_array_omp,list_valence_orb_for_hf,on_top_hf_grid) do ipoint = 1, n_points_final_grid dm_a = 0.d0 do ii = 1, n_occ_val_orb_for_hf(1) @@ -217,5 +211,8 @@ BEGIN_PROVIDER [ double precision, on_top_hf_grid, (n_points_final_grid)] enddo on_top_hf_grid(ipoint) = 2.D0 * dm_a*dm_b enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide on_top_hf_grid = ',wall1-wall0 END_PROVIDER From 112f113ccb3f363262930b53e21aed010a29f746 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 21 May 2024 12:26:30 +0200 Subject: [PATCH 044/131] fixed forgotten stuffs in normal_order_old/NEED --- plugins/local/normal_order_old/NEED | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/local/normal_order_old/NEED b/plugins/local/normal_order_old/NEED index 8b137891..e8c8c478 100644 --- a/plugins/local/normal_order_old/NEED +++ b/plugins/local/normal_order_old/NEED @@ -1 +1 @@ - +tc_scf From 6fb0f2a58e803ea02a03fe01b4ce9daa6b2fba91 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 21 May 2024 12:53:55 +0200 Subject: [PATCH 045/131] modified scripts/get_fci_tc_conv.sh according to new printing --- plugins/local/tc_bi_ortho/print_tc_wf.irp.f | 2 +- scripts/get_fci_tc_conv.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/local/tc_bi_ortho/print_tc_wf.irp.f b/plugins/local/tc_bi_ortho/print_tc_wf.irp.f index 2b88bc5b..4d9f7c48 100644 --- a/plugins/local/tc_bi_ortho/print_tc_wf.irp.f +++ b/plugins/local/tc_bi_ortho/print_tc_wf.irp.f @@ -37,7 +37,7 @@ subroutine write_l_r_wf integer :: i print*,'Writing the left-right wf' do i = 1, N_det - write(i_unit_output,*)i, psi_coef_sorted_tc(i,1)/psi_coef_sorted_tc(i,1) & + write(i_unit_output,'(I8,X,10(F16.10,X))')i, psi_coef_sorted_tc(i,1),psi_coef_sorted_tc(i,1)/psi_coef_sorted_tc(1,1)& , psi_l_coef_sorted_bi_ortho_left(i)/psi_l_coef_sorted_bi_ortho_left(1) & , psi_r_coef_sorted_bi_ortho_right(i)/psi_r_coef_sorted_bi_ortho_right(1) enddo diff --git a/scripts/get_fci_tc_conv.sh b/scripts/get_fci_tc_conv.sh index 643f3ac0..f0c99baf 100755 --- a/scripts/get_fci_tc_conv.sh +++ b/scripts/get_fci_tc_conv.sh @@ -1,2 +1,2 @@ file=$1 -grep "Ndet,E,E+PT2,E+RPT2,|PT2|=" $file | cut -d "=" -f 2 > ${file}.conv_fci_tc +grep "Ndet,E,E+PT2,pt2_minus,pt2_plus,pt2_abs=" $file | cut -d "=" -f 2 > ${file}.conv_fci_tc From 3600c3c5ca92c6c62a0bbbb6cc1d01ec595e148c Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 22 May 2024 17:02:26 +0200 Subject: [PATCH 046/131] removed stupid dead link for PYSCF_EOMCC.py --- scripts/PYSCF_EOMCC.py | 1 - 1 file changed, 1 deletion(-) delete mode 120000 scripts/PYSCF_EOMCC.py diff --git a/scripts/PYSCF_EOMCC.py b/scripts/PYSCF_EOMCC.py deleted file mode 120000 index 8ad341da..00000000 --- a/scripts/PYSCF_EOMCC.py +++ /dev/null @@ -1 +0,0 @@ -/home_lct/eginer/qp2/plugins/qp_plugins_lct/dev/fcidump_for_vbarb/PYSCF_EOMCC.py \ No newline at end of file From 29da3b6542cdfeb52f9d1b7f8c23f3967018bf0f Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Thu, 23 May 2024 00:45:56 +0200 Subject: [PATCH 047/131] bypass one_e_tr_dm_mo for large mo_num & n_states --- .../multi_s_dipole_moment.irp.f | 154 ++++++++++++++++-- 1 file changed, 140 insertions(+), 14 deletions(-) diff --git a/src/mol_properties/multi_s_dipole_moment.irp.f b/src/mol_properties/multi_s_dipole_moment.irp.f index c7216a61..8aae3bf4 100644 --- a/src/mol_properties/multi_s_dipole_moment.irp.f +++ b/src/mol_properties/multi_s_dipole_moment.irp.f @@ -18,7 +18,7 @@ -BEGIN_PROVIDER [double precision, multi_s_dipole_moment, (N_states, N_states)] + BEGIN_PROVIDER [double precision, multi_s_dipole_moment , (N_states, N_states)] &BEGIN_PROVIDER [double precision, multi_s_x_dipole_moment, (N_states, N_states)] &BEGIN_PROVIDER [double precision, multi_s_y_dipole_moment, (N_states, N_states)] &BEGIN_PROVIDER [double precision, multi_s_z_dipole_moment, (N_states, N_states)] @@ -40,27 +40,153 @@ BEGIN_PROVIDER [double precision, multi_s_dipole_moment, (N_states, N_states)] ! gamma^{nm}: density matrix \bra{\Psi^n} a^{\dagger}_a a_i \ket{\Psi^m} END_DOC - integer :: istate,jstate ! States - integer :: i,j ! general spatial MOs + integer :: istate, jstate ! States + integer :: i, j ! general spatial MOs double precision :: nuclei_part_x, nuclei_part_y, nuclei_part_z multi_s_x_dipole_moment = 0.d0 multi_s_y_dipole_moment = 0.d0 multi_s_z_dipole_moment = 0.d0 + + if(8.d0*mo_num*mo_num*n_states*n_states*1d-9 .lt. 200.d0) then - do jstate = 1, N_states - do istate = 1, N_states - - do i = 1, mo_num - do j = 1, mo_num - multi_s_x_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_x(j,i) - multi_s_y_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_y(j,i) - multi_s_z_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_z(j,i) - enddo + do jstate = 1, N_states + do istate = 1, N_states + do i = 1, mo_num + do j = 1, mo_num + multi_s_x_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_x(j,i) + multi_s_y_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_y(j,i) + multi_s_z_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_z(j,i) + enddo + enddo enddo - enddo - enddo + + else + + ! no enouph memory + ! on the fly scheme + + PROVIDE psi_det_alpha_unique psi_det_beta_unique + + integer :: l, k_a, k_b + integer :: occ(N_int*bit_kind_size,2) + integer :: h1, h2, p1, p2, degree + integer :: exc(0:2,2), n_occ(2) + integer :: krow, kcol, lrow, lcol + integer(bit_kind) :: tmp_det(N_int,2), tmp_det2(N_int) + double precision :: ck, ckl, phase + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j, l, k_a, k_b, istate, jstate, occ, ck, ckl, h1, h2, p1, p2, exc, & + !$OMP phase, degree, n_occ, krow, kcol, lrow, lcol, tmp_det, tmp_det2) & + !$OMP SHARED(N_int, N_states, elec_alpha_num, elec_beta_num, N_det, & + !$OMP psi_bilinear_matrix_rows, psi_bilinear_matrix_columns, & + !$OMP psi_bilinear_matrix_transp_rows, psi_bilinear_matrix_transp_columns, & + !$OMP psi_det_alpha_unique, psi_det_beta_unique, & + !$OMP psi_bilinear_matrix_values, psi_bilinear_matrix_transp_values, & + !$OMP mo_dipole_x, mo_dipole_y, mo_dipole_z, & + !$OMP multi_s_x_dipole_moment, multi_s_y_dipole_moment, multi_s_z_dipole_moment) + !$OMP DO COLLAPSE(2) + do istate = 1, N_states + do jstate = 1, N_states + + do k_a = 1, N_det + krow = psi_bilinear_matrix_rows (k_a) + kcol = psi_bilinear_matrix_columns(k_a) + + tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow) + tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol) + + ! Diagonal part + call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int) + ck = psi_bilinear_matrix_values(k_a,istate)*psi_bilinear_matrix_values(k_a,jstate) + do l = 1, elec_alpha_num + j = occ(l,1) + multi_s_x_dipole_moment(istate,jstate) -= ck * mo_dipole_x(j,j) + multi_s_y_dipole_moment(istate,jstate) -= ck * mo_dipole_y(j,j) + multi_s_z_dipole_moment(istate,jstate) -= ck * mo_dipole_z(j,j) + enddo + + if (k_a == N_det) cycle + l = k_a + 1 + lrow = psi_bilinear_matrix_rows (l) + lcol = psi_bilinear_matrix_columns(l) + ! Fix beta determinant, loop over alphas + do while (lcol == kcol) + tmp_det2(:) = psi_det_alpha_unique(:,lrow) + call get_excitation_degree_spin(tmp_det(1,1), tmp_det2, degree, N_int) + if (degree == 1) then + exc = 0 + call get_single_excitation_spin(tmp_det(1,1), tmp_det2, exc, phase, N_int) + call decode_exc_spin(exc, h1, p1, h2, p2) + ckl = psi_bilinear_matrix_values(k_a,istate)*psi_bilinear_matrix_values(l,jstate) * phase + multi_s_x_dipole_moment(istate,jstate) -= ckl * mo_dipole_x(h1,p1) + multi_s_y_dipole_moment(istate,jstate) -= ckl * mo_dipole_y(h1,p1) + multi_s_z_dipole_moment(istate,jstate) -= ckl * mo_dipole_z(h1,p1) + ckl = psi_bilinear_matrix_values(k_a,jstate)*psi_bilinear_matrix_values(l,istate) * phase + multi_s_x_dipole_moment(istate,jstate) -= ckl * mo_dipole_x(p1,h1) + multi_s_y_dipole_moment(istate,jstate) -= ckl * mo_dipole_y(p1,h1) + multi_s_z_dipole_moment(istate,jstate) -= ckl * mo_dipole_z(p1,h1) + endif + l = l+1 + if (l > N_det) exit + lrow = psi_bilinear_matrix_rows (l) + lcol = psi_bilinear_matrix_columns(l) + enddo + enddo ! k_a + + do k_b = 1, N_det + krow = psi_bilinear_matrix_transp_rows (k_b) + kcol = psi_bilinear_matrix_transp_columns(k_b) + + tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow) + tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol) + + ! Diagonal part + call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int) + ck = psi_bilinear_matrix_transp_values(k_b,istate)*psi_bilinear_matrix_transp_values(k_b,jstate) + do l = 1, elec_beta_num + j = occ(l,2) + multi_s_x_dipole_moment(istate,jstate) -= ck * mo_dipole_x(j,j) + multi_s_y_dipole_moment(istate,jstate) -= ck * mo_dipole_y(j,j) + multi_s_z_dipole_moment(istate,jstate) -= ck * mo_dipole_z(j,j) + enddo + + if (k_b == N_det) cycle + l = k_b+1 + lrow = psi_bilinear_matrix_transp_rows (l) + lcol = psi_bilinear_matrix_transp_columns(l) + ! Fix beta determinant, loop over alphas + do while (lrow == krow) + tmp_det2(:) = psi_det_beta_unique(:,lcol) + call get_excitation_degree_spin(tmp_det(1,2), tmp_det2, degree, N_int) + if (degree == 1) then + exc = 0 + call get_single_excitation_spin(tmp_det(1,2), tmp_det2, exc, phase, N_int) + call decode_exc_spin(exc, h1, p1, h2, p2) + ckl = psi_bilinear_matrix_transp_values(k_b,istate)*psi_bilinear_matrix_transp_values(l,jstate) * phase + multi_s_x_dipole_moment(istate,jstate) -= ckl * mo_dipole_x(h1,p1) + multi_s_y_dipole_moment(istate,jstate) -= ckl * mo_dipole_y(h1,p1) + multi_s_z_dipole_moment(istate,jstate) -= ckl * mo_dipole_z(h1,p1) + ckl = psi_bilinear_matrix_transp_values(k_b,jstate)*psi_bilinear_matrix_transp_values(l,istate) * phase + multi_s_x_dipole_moment(istate,jstate) -= ckl * mo_dipole_x(p1,h1) + multi_s_y_dipole_moment(istate,jstate) -= ckl * mo_dipole_y(p1,h1) + multi_s_z_dipole_moment(istate,jstate) -= ckl * mo_dipole_z(p1,h1) + endif + l = l+1 + if (l > N_det) exit + lrow = psi_bilinear_matrix_transp_rows (l) + lcol = psi_bilinear_matrix_transp_columns(l) + enddo + enddo ! k_b + + enddo ! istate + enddo ! jstate + !$OMP END DO + !$OMP END PARALLEL + + endif ! memory condition ! Nuclei part nuclei_part_x = 0.d0 From 70f8019960140b965476e1efa88ee8e03850b0d9 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 23 May 2024 10:02:46 +0200 Subject: [PATCH 048/131] removed stupid print in tc_keywords/EZFIO.cfg --- plugins/local/tc_keywords/EZFIO.cfg | 1 - src/mu_of_r/f_hf_cholesky.irp.f | 75 +++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index b7ce0b19..f3bd75c8 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -273,4 +273,3 @@ type: logical doc: If |true|, you duplicate the two-electron TC integrals with the transpose matrix. Acceleates the PT2. interface: ezfio,provider,ocaml default: False ->>>>>>> 8c4183cf6e38711b097df202d1f430b76823aeff diff --git a/src/mu_of_r/f_hf_cholesky.irp.f b/src/mu_of_r/f_hf_cholesky.irp.f index 84097f09..101f9cc9 100644 --- a/src/mu_of_r/f_hf_cholesky.irp.f +++ b/src/mu_of_r/f_hf_cholesky.irp.f @@ -187,6 +187,81 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky, (n_points_final_grid)] endif END_PROVIDER +BEGIN_PROVIDER [ double precision, f_hf_sparse_cholesky, (n_points_final_grid)] + implicit none + integer :: ipoint,m,mm,i,ii,p + !!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ + !! = \sum_{I}\sum_{J}\sum_A Phi_I(R) Phi_J(R) V_AI V_AJ + !! = \sum_A \sum_{I}Phi_I(R)V_AI \sum_{J}V_AJ Phi_J(R) + !! = \sum_A V_AR G_AR + !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI + double precision :: u_dot_v,wall0,wall1,accu_1, accu_2,mo_i_r1,mo_b_r1 + if(elec_alpha_num == elec_beta_num)then + call wall_time(wall0) +! !$OMP PARALLEL DO & +! !$OMP DEFAULT (NONE) & +! !$OMP PRIVATE (ipoint,m) & +! !$OMP ShARED (mos_times_cholesky_r1,cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid) + do ipoint = 1, n_points_final_grid + f_hf_sparse_cholesky(ipoint) = 0.d0 + do p = 1, cholesky_mo_num + accu_1 = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + accu_1 += mo_i_r1 * mo_b_r1 * cholesky_mo(m,i,p) + enddo + enddo + f_hf_sparse_cholesky(ipoint) += accu_1 * accu_1 + enddo + f_hf_sparse_cholesky(ipoint) *= 2.D0 + enddo +! !$OMP END PARALLEL DO + + call wall_time(wall1) + print*,'Time to provide f_hf_sparse_cholesky = ',wall1-wall0 + else + call wall_time(wall0) +! !$OMP PARALLEL DO & +! !$OMP DEFAULT (NONE) & +! !$OMP PRIVATE (ipoint,m) & +! !$OMP ShARED (mos_times_cholesky_r1,cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid) + do ipoint = 1, n_points_final_grid + f_hf_sparse_cholesky(ipoint) = 0.d0 + do p = 1, cholesky_mo_num + accu_2 = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + accu_2 += mo_i_r1 * mo_b_r1 * cholesky_mo(m,i,p) + enddo + enddo + accu_1 = accu_2 + do ii = n_occ_val_orb_for_hf(2)+1,n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + accu_1 += mo_i_r1 * mo_b_r1 * cholesky_mo(m,i,p) + enddo + enddo + f_hf_sparse_cholesky(ipoint) += accu_1 * accu_2 + enddo + f_hf_sparse_cholesky(ipoint) *= 2.D0 + enddo +! !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide f_hf_sparse_cholesky = ',wall1-wall0 + endif +END_PROVIDER + BEGIN_PROVIDER [ double precision, on_top_hf_grid, (n_points_final_grid)] implicit none integer :: ipoint,i,ii From 49a96d4400c640928ba213f33e0493eadb4457ad Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 23 May 2024 10:17:31 +0200 Subject: [PATCH 049/131] added f_hf_cholesky without big storage in memory --- .../basis_correction/test_chol_bas.irp.f | 18 ++++++++++++ src/mu_of_r/f_hf_cholesky.irp.f | 28 +++++++++++-------- 2 files changed, 34 insertions(+), 12 deletions(-) create mode 100644 plugins/local/basis_correction/test_chol_bas.irp.f diff --git a/plugins/local/basis_correction/test_chol_bas.irp.f b/plugins/local/basis_correction/test_chol_bas.irp.f new file mode 100644 index 00000000..ae47ec09 --- /dev/null +++ b/plugins/local/basis_correction/test_chol_bas.irp.f @@ -0,0 +1,18 @@ +program pouet + implicit none + call test +end +subroutine test + implicit none +! provide mos_times_cholesky_r1 +! provide mos_times_cholesky_r2 + integer :: ipoint + double precision :: accu,weight + accu = 0.d0 + do ipoint = 1, n_points_final_grid + weight = final_weight_at_r_vector(ipoint) +! accu += dabs(mu_of_r_hf(ipoint) - mu_of_r_hf_old(ipoint)) * weight + accu += dabs(f_hf_sparse_cholesky(ipoint) - f_hf_cholesky(ipoint)) * weight + enddo + print*,'accu = ',accu +end diff --git a/src/mu_of_r/f_hf_cholesky.irp.f b/src/mu_of_r/f_hf_cholesky.irp.f index 101f9cc9..b937addf 100644 --- a/src/mu_of_r/f_hf_cholesky.irp.f +++ b/src/mu_of_r/f_hf_cholesky.irp.f @@ -146,9 +146,9 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky, (n_points_final_grid)] !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI double precision :: u_dot_v,wall0,wall1 if(elec_alpha_num == elec_beta_num)then - provide mos_times_cholesky_r1 print*,'providing f_hf_cholesky ...' call wall_time(wall0) + provide mos_times_cholesky_r1 !$OMP PARALLEL DO & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint,m) & @@ -167,6 +167,8 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky, (n_points_final_grid)] print*,'Time to provide f_hf_cholesky = ',wall1-wall0 free mos_times_cholesky_r1 else + print*,'providing f_hf_cholesky ...' + call wall_time(wall0) provide mos_times_cholesky_r2 mos_times_cholesky_r1 !$OMP PARALLEL DO & !$OMP DEFAULT (NONE) & @@ -198,10 +200,11 @@ BEGIN_PROVIDER [ double precision, f_hf_sparse_cholesky, (n_points_final_grid)] double precision :: u_dot_v,wall0,wall1,accu_1, accu_2,mo_i_r1,mo_b_r1 if(elec_alpha_num == elec_beta_num)then call wall_time(wall0) -! !$OMP PARALLEL DO & -! !$OMP DEFAULT (NONE) & -! !$OMP PRIVATE (ipoint,m) & -! !$OMP ShARED (mos_times_cholesky_r1,cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid) + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (accu_1,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp) & + !$OMP ShARED (cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid,cholesky_mo,n_basis_orb) do ipoint = 1, n_points_final_grid f_hf_sparse_cholesky(ipoint) = 0.d0 do p = 1, cholesky_mo_num @@ -219,16 +222,17 @@ BEGIN_PROVIDER [ double precision, f_hf_sparse_cholesky, (n_points_final_grid)] enddo f_hf_sparse_cholesky(ipoint) *= 2.D0 enddo -! !$OMP END PARALLEL DO + !$OMP END PARALLEL DO call wall_time(wall1) print*,'Time to provide f_hf_sparse_cholesky = ',wall1-wall0 - else + else call wall_time(wall0) -! !$OMP PARALLEL DO & -! !$OMP DEFAULT (NONE) & -! !$OMP PRIVATE (ipoint,m) & -! !$OMP ShARED (mos_times_cholesky_r1,cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid) + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (accu_2,accu_1,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp) & + !$OMP ShARED (cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid,cholesky_mo,n_basis_orb) do ipoint = 1, n_points_final_grid f_hf_sparse_cholesky(ipoint) = 0.d0 do p = 1, cholesky_mo_num @@ -256,7 +260,7 @@ BEGIN_PROVIDER [ double precision, f_hf_sparse_cholesky, (n_points_final_grid)] enddo f_hf_sparse_cholesky(ipoint) *= 2.D0 enddo -! !$OMP END PARALLEL DO + !$OMP END PARALLEL DO call wall_time(wall1) print*,'Time to provide f_hf_sparse_cholesky = ',wall1-wall0 endif From 70745cbeaaf59900c3ce4e1df042ef88ff1ecb11 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 23 May 2024 14:45:33 +0200 Subject: [PATCH 050/131] added sparse cholesky mu_of_r --- external/irpf90 | 2 +- .../basis_correction/basis_correction.irp.f | 4 -- .../basis_correction/print_routine.irp.f | 2 +- .../basis_correction/test_chol_bas.irp.f | 2 +- src/mu_of_r/f_hf_cholesky.irp.f | 52 ++++++++++++------- src/mu_of_r/mu_of_r_conditions.irp.f | 44 ++++++++++++++-- 6 files changed, 76 insertions(+), 30 deletions(-) diff --git a/external/irpf90 b/external/irpf90 index 0007f72f..4ab1b175 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 +Subproject commit 4ab1b175fc7ed0d96c1912f13dc53579b24157a6 diff --git a/plugins/local/basis_correction/basis_correction.irp.f b/plugins/local/basis_correction/basis_correction.irp.f index a7ea7244..f17b5d5b 100644 --- a/plugins/local/basis_correction/basis_correction.irp.f +++ b/plugins/local/basis_correction/basis_correction.irp.f @@ -7,10 +7,6 @@ program basis_correction touch read_wf no_core_density = .True. touch no_core_density - if(io_mo_two_e_integrals .ne. "Read")then - provide ao_two_e_integrals_in_map - endif - provide mo_two_e_integrals_in_map call print_basis_correction end diff --git a/plugins/local/basis_correction/print_routine.irp.f b/plugins/local/basis_correction/print_routine.irp.f index 96faba30..b3b38673 100644 --- a/plugins/local/basis_correction/print_routine.irp.f +++ b/plugins/local/basis_correction/print_routine.irp.f @@ -22,7 +22,7 @@ subroutine print_basis_correction print*, '****************************************' print*, '****************************************' print*, 'mu_of_r_potential = ',mu_of_r_potential - if(mu_of_r_potential.EQ."hf")then + if(mu_of_r_potential.EQ."hf".or.mu_of_r_potential.EQ."hf_old".or.mu_of_r_potential.EQ."hf_sparse")then print*, '' print*,'Using a HF-like two-body density to define mu(r)' print*,'This assumes that HF is a qualitative representation of the wave function ' diff --git a/plugins/local/basis_correction/test_chol_bas.irp.f b/plugins/local/basis_correction/test_chol_bas.irp.f index ae47ec09..076d888c 100644 --- a/plugins/local/basis_correction/test_chol_bas.irp.f +++ b/plugins/local/basis_correction/test_chol_bas.irp.f @@ -12,7 +12,7 @@ subroutine test do ipoint = 1, n_points_final_grid weight = final_weight_at_r_vector(ipoint) ! accu += dabs(mu_of_r_hf(ipoint) - mu_of_r_hf_old(ipoint)) * weight - accu += dabs(f_hf_sparse_cholesky(ipoint) - f_hf_cholesky(ipoint)) * weight + accu += dabs(f_hf_cholesky_sparse(ipoint) - f_hf_cholesky(ipoint)) * weight enddo print*,'accu = ',accu end diff --git a/src/mu_of_r/f_hf_cholesky.irp.f b/src/mu_of_r/f_hf_cholesky.irp.f index b937addf..17f0229a 100644 --- a/src/mu_of_r/f_hf_cholesky.irp.f +++ b/src/mu_of_r/f_hf_cholesky.irp.f @@ -189,7 +189,7 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky, (n_points_final_grid)] endif END_PROVIDER -BEGIN_PROVIDER [ double precision, f_hf_sparse_cholesky, (n_points_final_grid)] +BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse, (n_points_final_grid)] implicit none integer :: ipoint,m,mm,i,ii,p !!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ @@ -198,43 +198,55 @@ BEGIN_PROVIDER [ double precision, f_hf_sparse_cholesky, (n_points_final_grid)] !! = \sum_A V_AR G_AR !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI double precision :: u_dot_v,wall0,wall1,accu_1, accu_2,mo_i_r1,mo_b_r1 + double precision :: thresh_1,thresh_2 + double precision, allocatable :: accu_vec(:) + thresh_2 = ao_cholesky_threshold * 100.d0 + thresh_1 = dsqrt(thresh_2) + provide cholesky_mo_transp if(elec_alpha_num == elec_beta_num)then call wall_time(wall0) - !$OMP PARALLEL DO & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (accu_1,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & - !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp) & - !$OMP ShARED (cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid,cholesky_mo,n_basis_orb) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE (accu_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp,thresh_1,thresh_2) & + !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse,n_points_final_grid,cholesky_mo_transp,n_basis_orb) + allocate(accu_vec(cholesky_mo_num)) + !$OMP DO do ipoint = 1, n_points_final_grid - f_hf_sparse_cholesky(ipoint) = 0.d0 - do p = 1, cholesky_mo_num - accu_1 = 0.d0 + f_hf_cholesky_sparse(ipoint) = 0.d0 + accu_vec = 0.d0 do ii = 1, n_occ_val_orb_for_hf(1) i = list_valence_orb_for_hf(ii,1) mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle do mm = 1, n_basis_orb ! electron 1 m = list_basis(mm) mo_b_r1 = mos_in_r_array_omp(m,ipoint) - accu_1 += mo_i_r1 * mo_b_r1 * cholesky_mo(m,i,p) + if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + accu_vec(p) += mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo enddo enddo - f_hf_sparse_cholesky(ipoint) += accu_1 * accu_1 - enddo - f_hf_sparse_cholesky(ipoint) *= 2.D0 + do p = 1, cholesky_mo_num + f_hf_cholesky_sparse(ipoint) += accu_vec(p) * accu_vec(p) + enddo + f_hf_cholesky_sparse(ipoint) *= 2.D0 enddo - !$OMP END PARALLEL DO + !$OMP END DO + deallocate(accu_vec) + !$OMP END PARALLEL call wall_time(wall1) - print*,'Time to provide f_hf_sparse_cholesky = ',wall1-wall0 + print*,'Time to provide f_hf_cholesky_sparse = ',wall1-wall0 else call wall_time(wall0) !$OMP PARALLEL DO & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (accu_2,accu_1,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp) & - !$OMP ShARED (cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid,cholesky_mo,n_basis_orb) + !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse,n_points_final_grid,cholesky_mo,n_basis_orb) do ipoint = 1, n_points_final_grid - f_hf_sparse_cholesky(ipoint) = 0.d0 + f_hf_cholesky_sparse(ipoint) = 0.d0 do p = 1, cholesky_mo_num accu_2 = 0.d0 do ii = 1, n_occ_val_orb_for_hf(2) @@ -256,13 +268,13 @@ BEGIN_PROVIDER [ double precision, f_hf_sparse_cholesky, (n_points_final_grid)] accu_1 += mo_i_r1 * mo_b_r1 * cholesky_mo(m,i,p) enddo enddo - f_hf_sparse_cholesky(ipoint) += accu_1 * accu_2 + f_hf_cholesky_sparse(ipoint) += accu_1 * accu_2 enddo - f_hf_sparse_cholesky(ipoint) *= 2.D0 + f_hf_cholesky_sparse(ipoint) *= 2.D0 enddo !$OMP END PARALLEL DO call wall_time(wall1) - print*,'Time to provide f_hf_sparse_cholesky = ',wall1-wall0 + print*,'Time to provide f_hf_cholesky_sparse = ',wall1-wall0 endif END_PROVIDER diff --git a/src/mu_of_r/mu_of_r_conditions.irp.f b/src/mu_of_r/mu_of_r_conditions.irp.f index 5b4d4b83..f2bb7145 100644 --- a/src/mu_of_r/mu_of_r_conditions.irp.f +++ b/src/mu_of_r/mu_of_r_conditions.irp.f @@ -13,7 +13,6 @@ integer :: ipoint,istate double precision :: wall0,wall1 print*,'providing mu_of_r ...' -! PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals call wall_time(wall0) if (read_mu_of_r) then @@ -26,6 +25,10 @@ do ipoint = 1, n_points_final_grid if(mu_of_r_potential.EQ."hf")then mu_of_r_prov(ipoint,istate) = mu_of_r_hf(ipoint) + else if(mu_of_r_potential.EQ."hf_old")then + mu_of_r_prov(ipoint,istate) = mu_of_r_hf_old(ipoint) + else if(mu_of_r_potential.EQ."hf_sparse")then + mu_of_r_prov(ipoint,istate) = mu_of_r_hf_sparse(ipoint) else if(mu_of_r_potential.EQ."cas_full".or.mu_of_r_potential.EQ."cas_truncated".or.mu_of_r_potential.EQ."pure_act")then mu_of_r_prov(ipoint,istate) = mu_of_r_psi_cas(ipoint,istate) else @@ -61,11 +64,10 @@ END_DOC integer :: ipoint double precision :: wall0,wall1,f_hf,on_top,w_hf,sqpi - PROVIDE f_hf_cholesky on_top_hf_grid print*,'providing mu_of_r_hf ...' call wall_time(wall0) + PROVIDE f_hf_cholesky on_top_hf_grid sqpi = dsqrt(dacos(-1.d0)) - provide f_psi_hf_ab !$OMP PARALLEL DO & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint,f_hf,on_top,w_hf) & @@ -85,6 +87,42 @@ print*,'Time to provide mu_of_r_hf = ',wall1-wall0 END_PROVIDER + BEGIN_PROVIDER [double precision, mu_of_r_hf_sparse, (n_points_final_grid) ] + implicit none + BEGIN_DOC + ! mu(r) computed with a HF wave function (assumes that HF MOs are stored in the EZFIO) + ! + ! corresponds to Eq. (37) of J. Chem. Phys. 149, 194301 (2018) but for \Psi^B = HF^B + ! + ! !!!!!! WARNING !!!!!! if no_core_density == .True. then all contributions from the core orbitals + ! + ! in the two-body density matrix are excluded + END_DOC + integer :: ipoint + double precision :: wall0,wall1,f_hf,on_top,w_hf,sqpi + print*,'providing mu_of_r_hf_sparse ...' + call wall_time(wall0) + sqpi = dsqrt(dacos(-1.d0)) + PROVIDE f_hf_cholesky_sparse on_top_hf_grid + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,f_hf,on_top,w_hf) & + !$OMP ShARED (n_points_final_grid,mu_of_r_hf_sparse,f_hf_cholesky_sparse,on_top_hf_grid,sqpi) + do ipoint = 1, n_points_final_grid + f_hf = f_hf_cholesky_sparse(ipoint) + on_top = on_top_hf_grid(ipoint) + if(on_top.le.1.d-12.or.f_hf.le.0.d0.or.f_hf * on_top.lt.0.d0)then + w_hf = 1.d+10 + else + w_hf = f_hf / on_top + endif + mu_of_r_hf_sparse(ipoint) = w_hf * sqpi * 0.5d0 + enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide mu_of_r_hf_sparse = ',wall1-wall0 + END_PROVIDER + BEGIN_PROVIDER [double precision, mu_of_r_hf_old, (n_points_final_grid) ] implicit none BEGIN_DOC From 1e886ac128187624af9efac5dd0cbe29e594ff5b Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 27 May 2024 10:21:29 +0200 Subject: [PATCH 051/131] implemented the f_hf_sparse for open systems in basis set correction --- src/dft_utils_func/on_top_from_ueg.irp.f | 1 - src/mo_two_e_ints/cholesky.irp.f | 4 +++ src/mu_of_r/f_hf_cholesky.irp.f | 45 +++++++++++++++--------- 3 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/dft_utils_func/on_top_from_ueg.irp.f b/src/dft_utils_func/on_top_from_ueg.irp.f index 4e28ad89..711ffc39 100644 --- a/src/dft_utils_func/on_top_from_ueg.irp.f +++ b/src/dft_utils_func/on_top_from_ueg.irp.f @@ -32,7 +32,6 @@ double precision function g0_UEG_mu_inf(rho_a,rho_b) C = 0.08193d0 D = -0.01277d0 E = 0.001859d0 - x = -d2*rs if (dabs(rho) > 1.d-20) then rs = (3d0 / (4d0*pi*rho))**(1d0/3d0) ! JT: serious bug fixed 20/03/19 x = -d2*rs diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 349f13b9..0d0989d7 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -34,8 +34,10 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, END_DOC double precision, allocatable :: X(:,:,:) + double precision :: wall0, wall1 integer :: ierr print *, 'AO->MO Transformation of Cholesky vectors' + call wall_time(wall0) allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) if (ierr /= 0) then @@ -46,6 +48,8 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, call dgemm('T','N', cholesky_mo_num*mo_num, mo_num, ao_num, 1.d0, & X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_mo_num*mo_num) deallocate(X) + call wall_time(wall1) + print*,'Time for AO->MO Cholesky vectors = ',wall1-wall0 END_PROVIDER diff --git a/src/mu_of_r/f_hf_cholesky.irp.f b/src/mu_of_r/f_hf_cholesky.irp.f index 17f0229a..472abb1c 100644 --- a/src/mu_of_r/f_hf_cholesky.irp.f +++ b/src/mu_of_r/f_hf_cholesky.irp.f @@ -199,7 +199,7 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse, (n_points_final_grid)] !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI double precision :: u_dot_v,wall0,wall1,accu_1, accu_2,mo_i_r1,mo_b_r1 double precision :: thresh_1,thresh_2 - double precision, allocatable :: accu_vec(:) + double precision, allocatable :: accu_vec(:),delta_vec(:) thresh_2 = ao_cholesky_threshold * 100.d0 thresh_1 = dsqrt(thresh_2) provide cholesky_mo_transp @@ -223,12 +223,12 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse, (n_points_final_grid)] mo_b_r1 = mos_in_r_array_omp(m,ipoint) if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle do p = 1, cholesky_mo_num - accu_vec(p) += mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + accu_vec(p) = accu_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) enddo enddo enddo do p = 1, cholesky_mo_num - f_hf_cholesky_sparse(ipoint) += accu_vec(p) * accu_vec(p) + f_hf_cholesky_sparse(ipoint) = f_hf_cholesky_sparse(ipoint) + accu_vec(p) * accu_vec(p) enddo f_hf_cholesky_sparse(ipoint) *= 2.D0 enddo @@ -240,39 +240,50 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse, (n_points_final_grid)] print*,'Time to provide f_hf_cholesky_sparse = ',wall1-wall0 else call wall_time(wall0) - !$OMP PARALLEL DO & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (accu_2,accu_1,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & - !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp) & - !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse,n_points_final_grid,cholesky_mo,n_basis_orb) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE (accu_vec,delta_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp,thresh_1,thresh_2) & + !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse,n_points_final_grid,cholesky_mo_transp,n_basis_orb) + allocate(accu_vec(cholesky_mo_num),delta_vec(cholesky_mo_num)) + !$OMP DO do ipoint = 1, n_points_final_grid f_hf_cholesky_sparse(ipoint) = 0.d0 - do p = 1, cholesky_mo_num - accu_2 = 0.d0 + accu_vec = 0.d0 do ii = 1, n_occ_val_orb_for_hf(2) i = list_valence_orb_for_hf(ii,2) mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle do mm = 1, n_basis_orb ! electron 1 m = list_basis(mm) mo_b_r1 = mos_in_r_array_omp(m,ipoint) - accu_2 += mo_i_r1 * mo_b_r1 * cholesky_mo(m,i,p) + if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + accu_vec(p) = accu_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo enddo enddo - accu_1 = accu_2 - do ii = n_occ_val_orb_for_hf(2)+1,n_occ_val_orb_for_hf(1) + delta_vec = 0.d0 + do ii = n_occ_val_orb_for_hf(2)+1,n_occ_val_orb_for_hf(1) i = list_valence_orb_for_hf(ii,1) mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle do mm = 1, n_basis_orb ! electron 1 m = list_basis(mm) mo_b_r1 = mos_in_r_array_omp(m,ipoint) - accu_1 += mo_i_r1 * mo_b_r1 * cholesky_mo(m,i,p) + if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + delta_vec(p) = delta_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo enddo enddo - f_hf_cholesky_sparse(ipoint) += accu_1 * accu_2 - enddo + do p = 1, cholesky_mo_num + f_hf_cholesky_sparse(ipoint) = f_hf_cholesky_sparse(ipoint) + accu_vec(p) * accu_vec(p) + accu_vec(p) * delta_vec(p) + enddo f_hf_cholesky_sparse(ipoint) *= 2.D0 enddo - !$OMP END PARALLEL DO + !$OMP END DO + deallocate(accu_vec) + !$OMP END PARALLEL call wall_time(wall1) print*,'Time to provide f_hf_cholesky_sparse = ',wall1-wall0 endif From 4d18a0124e9a3cd088ce19e0c56801d7e4fb478d Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 27 May 2024 10:23:47 +0200 Subject: [PATCH 052/131] changed the default in mu_of_r_potential --- src/mu_of_r/EZFIO.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mu_of_r/EZFIO.cfg b/src/mu_of_r/EZFIO.cfg index a66b00ef..7a39b012 100644 --- a/src/mu_of_r/EZFIO.cfg +++ b/src/mu_of_r/EZFIO.cfg @@ -6,9 +6,9 @@ size: (becke_numerical_grid.n_points_final_grid,determinants.n_states) [mu_of_r_potential] type: character*(32) -doc: type of potential for the mu(r) interaction: can be [ hf| cas_full | cas_truncated | pure_act] +doc: type of potential for the mu(r) interaction: can be [ hf| hf_sparse | cas_full | cas_truncated | pure_act] interface: ezfio, provider, ocaml -default: hf +default: hf_sparse [io_mu_of_r] type: Disk_access From 09d9a814d2646cc599bdbdcb0886461fd8ed6688 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 May 2024 11:38:57 +0200 Subject: [PATCH 053/131] Added stop in (T) --- plugins/local/non_h_ints_mu/deb_mos.irp.f | 101 ++++++++++++++++++++++ plugins/local/non_h_ints_mu/qmckl.irp.f | 2 +- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 17 +++- src/utils_cc/EZFIO.cfg | 6 ++ 4 files changed, 122 insertions(+), 4 deletions(-) create mode 100644 plugins/local/non_h_ints_mu/deb_mos.irp.f diff --git a/plugins/local/non_h_ints_mu/deb_mos.irp.f b/plugins/local/non_h_ints_mu/deb_mos.irp.f new file mode 100644 index 00000000..26344786 --- /dev/null +++ b/plugins/local/non_h_ints_mu/deb_mos.irp.f @@ -0,0 +1,101 @@ + +! --- + +program deb_mos + + implicit none + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + if(tc_integ_type .eq. "numeric") then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + endif + + call print_mos() + +end + +! --- + +subroutine print_mos() + + implicit none + integer :: i, ipoint + double precision :: r(3) + double precision :: mo_val, mo_der(3), mo_lap + + PROVIDE final_grid_points mos_in_r_array mos_grad_in_r_array mos_lapl_in_r_array + +! do ipoint = 1, n_points_final_grid +! r(:) = final_grid_points(:,ipoint) +! print*, r +! enddo +double precision :: accu_vgl(5) +double precision :: accu_vgl_nrm(5) + + do ipoint = 1, n_points_final_grid + r(1) = final_grid_points(1,i) + r(2) = final_grid_points(2,i) + r(3) = final_grid_points(3,i) + write(1111, '(5(f15.7, 3X))') r + do i = 1, mo_num + mo_val = mos_in_r_array (i,ipoint) + mo_der(:) = mos_grad_in_r_array(i,ipoint,:) + mo_lap = mos_lapl_in_r_array(i,ipoint,1) + mos_lapl_in_r_array(i,ipoint,2) + mos_lapl_in_r_array(i,ipoint,3) + write(1111, '(5(f15.7, 3X))') mo_val, mo_der, mo_lap + enddo + enddo + + do ipoint = 1, n_points_final_grid + r(1) = final_grid_points(1,i) + r(2) = final_grid_points(2,i) + r(3) = final_grid_points(3,i) + write(2222, '(5(f15.7, 3X))') r + do i = 1, mo_num + mo_val = mos_in_r_array_qmckl (i,ipoint) + mo_der(:) = mos_grad_in_r_array_qmckl(i,ipoint,:) + mo_lap = mos_lapl_in_r_array_qmckl(i,ipoint) + write(2222, '(5(f15.7, 3X))') mo_val, mo_der, mo_lap + enddo + enddo + + accu_vgl = 0.d0 + accu_vgl_nrm = 0.d0 + do ipoint = 1, n_points_final_grid + do i = 1, mo_num + mo_val = mos_in_r_array (i,ipoint) + mo_der(:) = mos_grad_in_r_array(i,ipoint,:) + mo_lap = mos_lapl_in_r_array(i,ipoint,1) + mos_lapl_in_r_array(i,ipoint,2) + mos_lapl_in_r_array(i,ipoint,3) + accu_vgl_nrm(1) += dabs(mo_val) + accu_vgl_nrm(2) += dabs(mo_der(1)) + accu_vgl_nrm(3) += dabs(mo_der(2)) + accu_vgl_nrm(4) += dabs(mo_der(3)) + accu_vgl_nrm(5) += dabs(mo_lap) + + mo_val -= mos_in_r_array_qmckl (i,ipoint) + mo_der(:) -= mos_grad_in_r_array_qmckl(i,ipoint,:) + mo_lap -= mos_lapl_in_r_array_qmckl(i,ipoint) + accu_vgl(1) += dabs(mo_val) + accu_vgl(2) += dabs(mo_der(1)) + accu_vgl(3) += dabs(mo_der(2)) + accu_vgl(4) += dabs(mo_der(3)) + accu_vgl(5) += dabs(mo_lap) + enddo + + enddo + accu_vgl(:) *= 1.d0 / accu_vgl_nrm(:) + print *, accu_vgl + + return +end + +! --- + diff --git a/plugins/local/non_h_ints_mu/qmckl.irp.f b/plugins/local/non_h_ints_mu/qmckl.irp.f index 4d419e24..de440f14 100644 --- a/plugins/local/non_h_ints_mu/qmckl.irp.f +++ b/plugins/local/non_h_ints_mu/qmckl.irp.f @@ -158,7 +158,7 @@ END_PROVIDER double precision, allocatable :: vgl(:,:,:) allocate( vgl(mo_num,5,n_points_final_grid)) - rc = qmckl_get_mo_basis_mo_vgl_inplace(qmckl_ctx, vgl, n_points_final_grid*mo_num*5_8) + rc = qmckl_get_mo_basis_mo_vgl(qmckl_ctx, vgl, n_points_final_grid*mo_num*5_8) if (rc /= QMCKL_SUCCESS) then print *, irp_here, 'qmckl error in get_mo_vgl' rc = qmckl_check(qmckl_ctx, rc) diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 1093c59d..485382e2 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -217,11 +217,14 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ print '(A)', ' ======================= ============== ==========' + double precision :: t_error call set_multiple_levels_omp(.False.) call wall_time(t00) imin = 1_8 - !$OMP PARALLEL & - !$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample) & + t_error = huge(1.d0) + + !$OMP PARALLEL & + !$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample) & !$OMP DEFAULT(SHARED) NUM_THREADS(nthreads_pt2) do kiter=1,Nabc @@ -328,15 +331,23 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ if (norm > 0.d0) then energy_stoch = ET / norm variance = ET2 / norm - energy_stoch*energy_stoch + if (norm > 1.d0) then + t_error = dsqrt(variance/(norm-1.d0)) + else + t_error = dsqrt(variance) + endif endif energy = energy_det + energy_stoch - print '('' '',F20.8, '' '', ES12.4,'' '', F8.2,'' '')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) + print '('' '',F20.8, '' '', ES12.4,'' '', F8.2,'' '')', eccsd+energy, t_error, 100.*real(Ncomputed)/real(Nabc) + endif !$OMP END MASTER + if (t_error < cc_par_t_stop) exit if (imin > Nabc) exit enddo + !$OMP TASKWAIT !$OMP END PARALLEL print '(A)', ' ======================= ============== ========== ' diff --git a/src/utils_cc/EZFIO.cfg b/src/utils_cc/EZFIO.cfg index fb6d9034..7d50d66a 100644 --- a/src/utils_cc/EZFIO.cfg +++ b/src/utils_cc/EZFIO.cfg @@ -58,6 +58,12 @@ doc: If true, the CCSD(T) will be computed. interface: ezfio,ocaml,provider default: False +[cc_par_t_stop] +type: double precision +doc: Stops the calculation when the statistical error bar is below the given value. +interface: ezfio,ocaml,provider +default: 1.e-5 + [cc_dev] type: logical doc: Only for dev purposes. From ed8cfdc599bdbeb6b66fd444cef6dc0ac2756d55 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 May 2024 12:58:07 +0200 Subject: [PATCH 054/131] Added print of the error bar in (T) --- src/ccsd/ccsd_space_orb_sub.irp.f | 14 ++++++++------ src/ccsd/ccsd_t_space_orb_stoch.irp.f | 5 ++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index b48ca7da..b8cfab2a 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -192,7 +192,7 @@ subroutine run_ccsd_space_orb deallocate(H_vv,H_oo,H_vo,r1,r2,tau) ! CCSD(T) - double precision :: e_t + double precision :: e_t, e_t_err e_t = 0.d0 if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then @@ -210,22 +210,24 @@ subroutine run_ccsd_space_orb !print*,'' ! New + e_t = uncorr_energy + energy ! For print in (T) call + e_t_err = 0.d0 + print*,'Computing (T) correction...' call wall_time(ta) ! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & ! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) - e_t = uncorr_energy + energy ! For print in next call call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & - ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t, e_t_err) call wall_time(tb) print*,'Time: ',tb-ta, ' s' print*,'' - write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' - write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' - write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + write(*,'(A15,F18.12,A7,F18.12)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha +/- ', e_t_err + write(*,'(A15,F18.12,A7,F18.12)') ' E(T) = ', e_t, ' Ha +/- ', e_t_err + write(*,'(A15,F18.12,A7,F18.12)') ' Correlation = ', energy + e_t, ' Ha +/- ', e_t_err print*,'' endif diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 485382e2..851b6a9f 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -1,5 +1,5 @@ ! Main -subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) +subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy,t_error) implicit none @@ -7,7 +7,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) double precision, intent(in) :: t2(nO,nO,nV,nV) double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) - double precision, intent(inout) :: energy + double precision, intent(inout) :: energy, t_error double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) @@ -217,7 +217,6 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ print '(A)', ' ======================= ============== ==========' - double precision :: t_error call set_multiple_levels_omp(.False.) call wall_time(t00) imin = 1_8 From b773a361b204a1f8424ae26bc2ac1a1a9d424a9f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 31 May 2024 20:07:29 +0200 Subject: [PATCH 055/131] fixed uninitialized variable in cholesky' --- src/ao_two_e_ints/cholesky.irp.f | 38 ++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 5fbd166c..09d86679 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -22,6 +22,9 @@ END_PROVIDER ! = (ik|jl) = sum_a (ik|a).(a|jl) ! ! Last dimension of cholesky_ao is cholesky_ao_num + ! + ! https://mogp-emulator.readthedocs.io/en/latest/methods/proc/ProcPivotedCholesky.html + ! https://doi.org/10.1016/j.apnum.2011.10.001 : Page 4, Algorithm 1 END_DOC integer :: rank, ndim @@ -86,20 +89,25 @@ END_PROVIDER call print_memory_usage() allocate(L(ndim,1)) +!print *, 'allocate : (L(ndim,1))', memory_of_double(ndim) print *, '' print *, 'Cholesky decomposition of AO integrals' print *, '======================================' print *, '' - print *, '============ =============' - print *, ' Rank Threshold' - print *, '============ =============' + print *, '============ ============ =============' + print *, ' Rank Block size Threshold' + print *, '============ ============ =============' rank = 0 allocate( D(ndim), Lset(ndim), Dset(ndim) ) allocate( addr(3,ndim) ) +!print *, 'allocate : (D(ndim))', memory_of_int(ndim) +!print *, 'allocate : (Lset(ndim))', memory_of_int(ndim) +!print *, 'allocate : (Dset(ndim))', memory_of_int(ndim) +!print *, 'allocate : (3,addr(ndim))', memory_of_int(3*ndim) ! 1. k=0 @@ -151,9 +159,10 @@ END_PROVIDER ! a. i = i+1 - s = 0.01d0 ! Inrease s until the arrays fit in memory + s = 0.01d0 + block_size = max(N,24) do while (.True.) ! b. @@ -168,6 +177,7 @@ END_PROVIDER endif enddo + call total_memory(mem) mem = mem & + np*memory_of_double(nq) &! Delta(np,nq) @@ -176,23 +186,28 @@ END_PROVIDER if (mem > qp_max_mem) then s = s*2.d0 + block_size = block_size / 2 else exit endif if ((s > 1.d0).or.(nq == 0)) then call print_memory_usage() - print *, 'Not enough memory. Reduce cholesky threshold' + print *, 'Required peak memory: ', mem, 'Gb' + call total_memory(mem) + print *, 'Already used memory: ', mem, 'Gb' + print *, 'Not enough memory. Reduce cholesky threshold' stop -1 endif enddo ! d., e. - block_size = max(N,24) L_old => L allocate(L(ndim,rank+nq), stat=ierr) +!print *, 'allocate : L(ndim,rank+nq)', memory_of_double(ndim*(rank+nq)) + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' @@ -210,6 +225,8 @@ END_PROVIDER deallocate(L_old) allocate(Delta(np,nq), stat=ierr) +!print *, 'allocate : Delta(np,nq)', memory_of_double(np*nq) + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Delta(np,nq))' @@ -217,6 +234,8 @@ END_PROVIDER endif allocate(Ltmp_p(np,block_size), stat=ierr) +!print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double(np*block_size) + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' @@ -224,6 +243,8 @@ END_PROVIDER endif allocate(Ltmp_q(nq,block_size), stat=ierr) +!print *, 'allocate : Ltmp_q(nq,block_size)', memory_of_double(nq*block_size) + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' @@ -232,6 +253,7 @@ END_PROVIDER allocate(computed(nq)) +!print *, 'allocate : computed(nq)', memory_of_int(nq) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) @@ -353,7 +375,7 @@ END_PROVIDER enddo - print '(I10, 4X, ES12.3)', rank, Qmax + print '(I10, 4X, I10, 4X, ES12.3)', rank, block_size, Qmax deallocate(computed) deallocate(Delta) @@ -380,6 +402,8 @@ END_PROVIDER enddo allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) +!print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', ao_num*ao_num*(rank*1_8) * 8_8 / 1024_8**3 + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': Allocation failed' From 0dca6cfde4ba658518637c2f42ecb2e45c04de6a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 31 May 2024 20:20:04 +0200 Subject: [PATCH 056/131] block size in cholesky --- src/ao_two_e_ints/cholesky.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 09d86679..5a44571c 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -186,7 +186,7 @@ END_PROVIDER if (mem > qp_max_mem) then s = s*2.d0 - block_size = block_size / 2 + block_size = max(block_size / 2, 1) else exit endif From 0a3d462510a6146da21770ff6fff2f7a1794a0fb Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 31 May 2024 20:30:48 +0200 Subject: [PATCH 057/131] Clean up openmp in cholesky --- src/ao_two_e_ints/cholesky.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 5a44571c..cfd57050 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -255,7 +255,7 @@ END_PROVIDER allocate(computed(nq)) !print *, 'allocate : computed(nq)', memory_of_int(nq) - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q,j) !$OMP DO do q=1,nq @@ -351,7 +351,7 @@ END_PROVIDER ! iii. f = 1.d0/dsqrt(Qmax) - !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) + !$OMP PARALLEL PRIVATE(p,q) DEFAULT(shared) !$OMP DO do p=1,np Ltmp_p(p,iblock) = Ltmp_p(p,iblock) * f From b743201efe692294db887f175dceb02a81f73422 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 31 May 2024 20:50:30 +0200 Subject: [PATCH 058/131] Use integer*8 in cholesky --- src/ao_two_e_ints/cholesky.irp.f | 152 ++++++++++++++++++------------- src/utils/memory.irp.f | 20 ++++ 2 files changed, 109 insertions(+), 63 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index cfd57050..daa29079 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -27,19 +27,22 @@ END_PROVIDER ! https://doi.org/10.1016/j.apnum.2011.10.001 : Page 4, Algorithm 1 END_DOC - integer :: rank, ndim + integer*8 :: ndim8 + integer :: rank double precision :: tau double precision, pointer :: L(:,:), L_old(:,:) double precision :: s - double precision, parameter :: dscale = 1.d0 + double precision :: dscale double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) - integer, allocatable :: Lset(:), Dset(:), addr(:,:) + integer, allocatable :: addr1(:,:), addr2(:,:) + integer**, allocatable :: Lset(:), Dset(:), addr3(:,:) logical, allocatable :: computed(:) - integer :: i,j,k,m,p,q, qj, dj, p2, q2 + integer :: i,j,k,m,p,q, dj, p2, q2 + integer*8 :: i8, j8, p8, qj8 integer :: N, np, nq double precision :: Dmax, Dmin, Qmax, f @@ -47,15 +50,15 @@ END_PROVIDER logical, external :: ao_two_e_integral_zero double precision, external :: ao_two_e_integral - integer :: block_size, iblock, ierr + integer :: block_size, iblock double precision :: mem - double precision, external :: memory_of_double, memory_of_int + double precision, external :: memory_of_double8, memory_of_int8 integer, external :: getUnitAndOpen - integer :: iunit + integer :: iunit, ierr - ndim = ao_num*ao_num + ndim8 = ao_num*ao_num*1_8 deallocate(cholesky_ao) if (read_ao_cholesky) then @@ -83,13 +86,13 @@ END_PROVIDER tau = ao_cholesky_threshold - mem = 6.d0 * memory_of_double(ndim) + 6.d0 * memory_of_int(ndim) + mem = 6.d0 * memory_of_double8(ndim8) + 6.d0 * memory_of_int8(ndim8) call check_mem(mem, irp_here) call print_memory_usage() - allocate(L(ndim,1)) -!print *, 'allocate : (L(ndim,1))', memory_of_double(ndim) + allocate(L(ndim8,1)) +print *, 'allocate : (L(ndim8,1))', memory_of_double8(ndim8) print *, '' print *, 'Cholesky decomposition of AO integrals' @@ -102,36 +105,36 @@ END_PROVIDER rank = 0 - allocate( D(ndim), Lset(ndim), Dset(ndim) ) - allocate( addr(3,ndim) ) -!print *, 'allocate : (D(ndim))', memory_of_int(ndim) -!print *, 'allocate : (Lset(ndim))', memory_of_int(ndim) -!print *, 'allocate : (Dset(ndim))', memory_of_int(ndim) -!print *, 'allocate : (3,addr(ndim))', memory_of_int(3*ndim) + allocate( D(ndim8), Lset(ndim8), Dset(ndim8) ) + allocate( addr1(ndim8), addr2(ndim8), addr3(ndim8), ) +print *, 'allocate : (D(ndim8))', memory_of_int8(ndim8) +print *, 'allocate : (Lset(ndim8))', memory_of_int8(ndim8) +print *, 'allocate : (Dset(ndim8))', memory_of_int8(ndim8) +print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) ! 1. k=0 do j=1,ao_num do i=1,ao_num k = k+1 - addr(1,k) = i - addr(2,k) = j - addr(3,k) = (i-1)*ao_num + j + addr1(k) = i + addr2(k) = j + addr3(k) = (i-1)*ao_num + j enddo enddo if (do_direct_integrals) then - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) - do i=1,ndim - D(i) = ao_two_e_integral(addr(1,i), addr(2,i), & - addr(1,i), addr(2,i)) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(guided) + do i8=1,ndim8 + D(i8) = ao_two_e_integral(addr1(i8), addr2(i8), & + addr1(i8), addr2(i8)) enddo !$OMP END PARALLEL DO else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) - do i=1,ndim - D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & - addr(2,i), addr(2,i), & + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(guided) + do i8=1,ndim8 + D(i8) = get_ao_two_e_integral(addr1(i8), addr1(i8), & + addr2(i8), addr2(i8), & ao_integrals_map) enddo !$OMP END PARALLEL DO @@ -140,12 +143,21 @@ END_PROVIDER Dmax = maxval(D) ! 2. - np=0 - do p=1,ndim - if ( dscale*dscale*Dmax*D(p) > tau*tau ) then - np = np+1 - Lset(np) = p - endif + np = huge(1_4) + dscale = 1.d0 + do while (np == huge(1_4)) + np=0 + do p8=1,ndim8 + if ( dscale*dscale*Dmax*D(p8) > tau*tau ) then + np = np+1 + Lset(np) = p8 + if (np == huge(1_4)) then + ! Overflow detected + dscale = dscale*0.5d0 + exit + endif + endif + enddo enddo ! 3. @@ -155,7 +167,7 @@ END_PROVIDER i = 0 ! 5. - do while ( (Dmax > tau).and.(rank < ndim) ) + do while ( (Dmax > tau).and.(rank < min(ndim8,huge(1_4)) ) ! a. i = i+1 @@ -181,7 +193,8 @@ END_PROVIDER call total_memory(mem) mem = mem & + np*memory_of_double(nq) &! Delta(np,nq) - + (rank+nq)* memory_of_double(ndim) &! L(ndim,rank+nq) + + (rank+nq)* memory_of_double8(ndim8) +&! L(ndim8,rank+nq) + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) if (mem > qp_max_mem) then @@ -205,19 +218,19 @@ END_PROVIDER ! d., e. L_old => L - allocate(L(ndim,rank+nq), stat=ierr) -!print *, 'allocate : L(ndim,rank+nq)', memory_of_double(ndim*(rank+nq)) + allocate(L(ndim8,rank+nq), stat=ierr) +print *, 'allocate : L(ndim8,rank+nq)', memory_of_double8(ndim8*(rank+nq)) if (ierr /= 0) then call print_memory_usage() - print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' + print *, irp_here, ': allocation failed : (L(ndim8,rank+nq))' stop -1 endif !$OMP PARALLEL DO PRIVATE(k,j) do k=1,rank - do j=1,ndim - L(j,k) = L_old(j,k) + do j8=1,ndim8 + L(j8,k) = L_old(j8,k) enddo enddo !$OMP END PARALLEL DO @@ -225,7 +238,7 @@ END_PROVIDER deallocate(L_old) allocate(Delta(np,nq), stat=ierr) -!print *, 'allocate : Delta(np,nq)', memory_of_double(np*nq) +print *, 'allocate : Delta(np,nq)', memory_of_double8(np*nq*1_8) if (ierr /= 0) then call print_memory_usage() @@ -234,7 +247,7 @@ END_PROVIDER endif allocate(Ltmp_p(np,block_size), stat=ierr) -!print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double(np*block_size) +print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double8(np*block_size*1_8) if (ierr /= 0) then call print_memory_usage() @@ -243,7 +256,7 @@ END_PROVIDER endif allocate(Ltmp_q(nq,block_size), stat=ierr) -!print *, 'allocate : Ltmp_q(nq,block_size)', memory_of_double(nq*block_size) +print *, 'allocate : Ltmp_q(nq,block_size)', memory_of_double8(nq*block_size*1_8) if (ierr /= 0) then call print_memory_usage() @@ -253,8 +266,9 @@ END_PROVIDER allocate(computed(nq)) -!print *, 'allocate : computed(nq)', memory_of_int(nq) +print *, 'allocate : computed(nq)', memory_of_int(nq) +print *, 'p1' !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q,j) !$OMP DO @@ -296,7 +310,7 @@ END_PROVIDER iblock = 0 do j=1,nq - if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit + if ( (Qmax <= Dmin).or.(N+j*1_8 > ndim8) ) exit ! i. rank = N+j @@ -308,28 +322,28 @@ END_PROVIDER ! ii. do dj=1,nq - qj = Dset(dj) - if (D(qj) == Qmax) then + qj8 = Dset(dj) + if (D(qj8) == Qmax) then exit endif enddo - L(1:ndim, rank) = 0.d0 + L(1:ndim8, rank) = 0.d0 if (.not.computed(dj)) then m = dj !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(guided) do k=np,1,-1 - if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)),& - addr(2,Lset(k)), addr(2,Dset(m)) ) ) then + if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)) ) ) then if (do_direct_integrals) then Delta(k,m) = Delta(k,m) + & - ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)),& - addr(1,Dset(m)), addr(2,Dset(m))) + ao_two_e_integral(addr1(Lset(k)), addr2(Lset(k)),& + addr1(Dset(m)), addr2(Dset(m))) else Delta(k,m) = Delta(k,m) + & - get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)),& - addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + get_ao_two_e_integral( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)), ao_integrals_map) endif endif enddo @@ -391,18 +405,28 @@ END_PROVIDER Dmax = max(Dmax, D(Lset(p))) enddo - np=0 - do p=1,ndim - if ( dscale*dscale*Dmax*D(p) > tau*tau ) then - np = np+1 - Lset(np) = p - endif + np = huge(1_4) + dscale = 1.d0 + do while (np == huge(1_4)) + np=0 + do p8=1,ndim8 + if ( dscale*dscale*Dmax*D(p8) > tau*tau ) then + np = np+1 + Lset(np) = p8 + if (np == huge(1_4)) then + ! Overflow detected + dscale = dscale*0.5d0 + exit + endif + endif + enddo enddo + enddo allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) -!print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', ao_num*ao_num*(rank*1_8) * 8_8 / 1024_8**3 +print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', memory_of_double8(ao_num*ao_num*rank*1_8) if (ierr /= 0) then call print_memory_usage() @@ -411,7 +435,9 @@ END_PROVIDER endif !$OMP PARALLEL DO PRIVATE(k) do k=1,rank - call dcopy(ndim, L(1,k), 1, cholesky_ao(1,1,k), 1) + do j=1,ao_num + call dcopy(ao_num, L((j-1)*ao_num+1,k), 1, cholesky_ao(1,j,k), 1) + enddo enddo !$OMP END PARALLEL DO deallocate(L) diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index e69bf71e..043562db 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -79,6 +79,26 @@ IRP_ENDIF call unlock_io() end function +double precision function memory_of_double8(n) + implicit none + BEGIN_DOC +! Computes the memory required for n double precision elements in gigabytes. + END_DOC + integer*8, intent(in) :: n + double precision, parameter :: f = 8.d0 / (1024.d0*1024.d0*1024.d0) + memory_of_double8 = dble(n) * f +end function + +double precision function memory_of_int8(n) + implicit none + BEGIN_DOC +! Computes the memory required for n double precision elements in gigabytes. + END_DOC + integer*8, intent(in) :: n + double precision, parameter :: f = 4.d0 / (1024.d0*1024.d0*1024.d0) + memory_of_int8 = dble(n) * f +end function + double precision function memory_of_double(n) implicit none BEGIN_DOC From 38d386d36c78ff87a6c1062d065ab2b12fa4dcc9 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 2 Jun 2024 19:03:05 +0200 Subject: [PATCH 059/131] Debug cholesky --- config/gfortran_debug_mkl.cfg | 63 ++++++++++++ src/ao_two_e_ints/cholesky.irp.f | 169 +++++++++++++++++++------------ 2 files changed, 166 insertions(+), 66 deletions(-) create mode 100644 config/gfortran_debug_mkl.cfg diff --git a/config/gfortran_debug_mkl.cfg b/config/gfortran_debug_mkl.cfg new file mode 100644 index 00000000..1dc3f2b2 --- /dev/null +++ b/config/gfortran_debug_mkl.cfg @@ -0,0 +1,63 @@ +# Common flags +############## +# +# -ffree-line-length-none : Needed for IRPF90 which produces long lines +# -lblas -llapack : Link with libblas and liblapack libraries provided by the system +# -I . : Include the curent directory (Mandatory) +# +# --ninja : Allow the utilisation of ninja. (Mandatory) +# --align=32 : Align all provided arrays on a 32-byte boundary +# +# +[COMMON] +FC : gfortran -g -ffree-line-length-none -I . -fPIC -std=legacy +LAPACK_LIB : -I${MKLROOT}/include -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_core -lpthread -lm -ldl -lmkl_gnu_thread -lgomp -fopenmp +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -Ofast : Disregard strict standards compliance. Enables all -O3 optimizations. +# It also enables optimizations that are not valid +# for all standard-compliant programs. It turns on +# -ffast-math and the Fortran-specific +# -fno-protect-parens and -fstack-arrays. +[OPT] +FCFLAGS : -Ofast + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -Ofast + +# Debugging flags +################# +# +# -fcheck=all : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# +[DEBUG] +#FCFLAGS : -g -msse4.2 -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant -Wuninitialized -fbacktrace -ffpe-trap=zero,overflow,underflow -finit-real=nan +FCFLAGS : -g -mavx -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant -Wuninitialized -fbacktrace -ffpe-trap=zero,overflow -finit-real=nan + +# OpenMP flags +################# +# +[OPENMP] +FC : -fopenmp +IRPF90_FLAGS : --openmp + diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index daa29079..3d0baa48 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -29,21 +29,20 @@ END_PROVIDER integer*8 :: ndim8 integer :: rank - double precision :: tau + double precision :: tau, tau2 double precision, pointer :: L(:,:), L_old(:,:) - double precision :: s - double precision :: dscale + double precision :: dscale, dscale_tmp double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) - integer, allocatable :: addr1(:,:), addr2(:,:) - integer**, allocatable :: Lset(:), Dset(:), addr3(:,:) + integer, allocatable :: addr1(:), addr2(:) + integer*8, allocatable :: Lset(:), Dset(:), addr3(:) logical, allocatable :: computed(:) integer :: i,j,k,m,p,q, dj, p2, q2 integer*8 :: i8, j8, p8, qj8 - integer :: N, np, nq + integer :: N, np, nq, npmax double precision :: Dmax, Dmin, Qmax, f double precision, external :: get_ao_two_e_integral @@ -53,6 +52,7 @@ END_PROVIDER integer :: block_size, iblock double precision :: mem + double precision, external :: memory_of_double, memory_of_int double precision, external :: memory_of_double8, memory_of_int8 integer, external :: getUnitAndOpen @@ -61,6 +61,9 @@ END_PROVIDER ndim8 = ao_num*ao_num*1_8 deallocate(cholesky_ao) + +! TODO : Save L() to disk + if (read_ao_cholesky) then print *, 'Reading Cholesky vectors from disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'R') @@ -85,6 +88,7 @@ END_PROVIDER endif tau = ao_cholesky_threshold + tau2 = tau*tau mem = 6.d0 * memory_of_double8(ndim8) + 6.d0 * memory_of_int8(ndim8) call check_mem(mem, irp_here) @@ -92,25 +96,25 @@ END_PROVIDER call print_memory_usage() allocate(L(ndim8,1)) -print *, 'allocate : (L(ndim8,1))', memory_of_double8(ndim8) +!print *, 'allocate : (L(ndim8,1))', memory_of_double8(ndim8) print *, '' print *, 'Cholesky decomposition of AO integrals' print *, '======================================' print *, '' - print *, '============ ============ =============' - print *, ' Rank Block size Threshold' - print *, '============ ============ =============' + print *, '============ =============' + print *, ' Rank Threshold' + print *, '============ =============' rank = 0 allocate( D(ndim8), Lset(ndim8), Dset(ndim8) ) - allocate( addr1(ndim8), addr2(ndim8), addr3(ndim8), ) -print *, 'allocate : (D(ndim8))', memory_of_int8(ndim8) -print *, 'allocate : (Lset(ndim8))', memory_of_int8(ndim8) -print *, 'allocate : (Dset(ndim8))', memory_of_int8(ndim8) -print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) + allocate( addr1(ndim8), addr2(ndim8), addr3(ndim8) ) +!print *, 'allocate : (D(ndim8))', memory_of_int8(ndim8) +!print *, 'allocate : (Lset(ndim8))', memory_of_int8(ndim8) +!print *, 'allocate : (Dset(ndim8))', memory_of_int8(ndim8) +!print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) ! 1. k=0 @@ -124,14 +128,14 @@ print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) enddo if (do_direct_integrals) then - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(guided) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,16) do i8=1,ndim8 D(i8) = ao_two_e_integral(addr1(i8), addr2(i8), & addr1(i8), addr2(i8)) enddo !$OMP END PARALLEL DO else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(guided) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,16) do i8=1,ndim8 D(i8) = get_ao_two_e_integral(addr1(i8), addr1(i8), & addr2(i8), addr2(i8), & @@ -143,17 +147,22 @@ print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) Dmax = maxval(D) ! 2. - np = huge(1_4) + npmax = huge(1_4)*1_8 + np = npmax dscale = 1.d0 - do while (np == huge(1_4)) + dscale_tmp = Dmax + do while (np == npmax) np=0 do p8=1,ndim8 - if ( dscale*dscale*Dmax*D(p8) > tau*tau ) then + if ( dscale_tmp*D(p8) > tau2 ) then np = np+1 Lset(np) = p8 - if (np == huge(1_4)) then + if (np == npmax) then ! Overflow detected - dscale = dscale*0.5d0 + dscale = dscale*0.1d0 + dscale_tmp = dscale*dscale*Dmax +!print *, 'Overflow detected ' +!print *, 'dscale = ', dscale exit endif endif @@ -167,7 +176,7 @@ print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) i = 0 ! 5. - do while ( (Dmax > tau).and.(rank < min(ndim8,huge(1_4)) ) + do while ( (Dmax > tau).and.(rank < min(ndim8,huge(1_4))) ) ! a. i = i+1 @@ -191,15 +200,13 @@ print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) call total_memory(mem) - mem = mem & - + np*memory_of_double(nq) &! Delta(np,nq) - + (rank+nq)* memory_of_double8(ndim8) -&! L(ndim8,rank+nq) - + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) + mem = mem & + + np*memory_of_double(nq) &! Delta(np,nq) + + (rank+nq)* memory_of_double8(ndim8) &! L(ndim8,rank+nq) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) if (mem > qp_max_mem) then s = s*2.d0 - block_size = max(block_size / 2, 1) else exit endif @@ -219,7 +226,7 @@ print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) L_old => L allocate(L(ndim8,rank+nq), stat=ierr) -print *, 'allocate : L(ndim8,rank+nq)', memory_of_double8(ndim8*(rank+nq)) +!print *, 'allocate : L(ndim8,rank+nq)', memory_of_double8(ndim8*(rank+nq)) if (ierr /= 0) then call print_memory_usage() @@ -227,7 +234,7 @@ print *, 'allocate : L(ndim8,rank+nq)', memory_of_double8(ndim8*(rank+nq)) stop -1 endif - !$OMP PARALLEL DO PRIVATE(k,j) + !$OMP PARALLEL DO PRIVATE(k,j8) do k=1,rank do j8=1,ndim8 L(j8,k) = L_old(j8,k) @@ -238,7 +245,7 @@ print *, 'allocate : L(ndim8,rank+nq)', memory_of_double8(ndim8*(rank+nq)) deallocate(L_old) allocate(Delta(np,nq), stat=ierr) -print *, 'allocate : Delta(np,nq)', memory_of_double8(np*nq*1_8) +!print *, 'allocate : Delta(np,nq)', memory_of_double8(np*nq*1_8) if (ierr /= 0) then call print_memory_usage() @@ -247,7 +254,7 @@ print *, 'allocate : Delta(np,nq)', memory_of_double8(np*nq*1_8) endif allocate(Ltmp_p(np,block_size), stat=ierr) -print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double8(np*block_size*1_8) +!print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double8(np*block_size*1_8), np, block_size if (ierr /= 0) then call print_memory_usage() @@ -256,7 +263,7 @@ print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double8(np*block_size*1_8 endif allocate(Ltmp_q(nq,block_size), stat=ierr) -print *, 'allocate : Ltmp_q(nq,block_size)', memory_of_double8(nq*block_size*1_8) +!print *, 'allocate : Ltmp_q(nq,block_size)', memory_of_double8(nq*block_size*1_8), nq, block_size if (ierr /= 0) then call print_memory_usage() @@ -266,34 +273,47 @@ print *, 'allocate : Ltmp_q(nq,block_size)', memory_of_double8(nq*block_size*1_8 allocate(computed(nq)) -print *, 'allocate : computed(nq)', memory_of_int(nq) +!print *, 'allocate : computed(nq)', memory_of_int(nq) -print *, 'p1' +!print *, 'N, rank, block_size', N, rank, block_size +!print *, 'p1' !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q,j) +!print *, 'computed' + !$OMP DO + do q=1,nq + computed(q) = .False. + enddo + !$OMP ENDDO NOWAIT + +!print *, 'Delta' !$OMP DO do q=1,nq do j=1,np Delta(j,q) = 0.d0 enddo - computed(q) = .False. enddo !$OMP ENDDO NOWAIT - !$OMP DO +!print *, 'Ltmp_p' do k=1,N + !$OMP DO do p=1,np Ltmp_p(p,k) = L(Lset(p),k) enddo + !$OMP END DO NOWAIT + + !$OMP DO do q=1,nq Ltmp_q(q,k) = L(Dset(q),k) enddo + !$OMP END DO NOWAIT enddo - !$OMP END DO NOWAIT !$OMP BARRIER !$OMP END PARALLEL +!print *, 'p2', np, nq, N if (N>0) then call dgemm('N','T', np, nq, N, -1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) @@ -315,8 +335,10 @@ print *, 'p1' rank = N+j if (iblock == block_size) then +!print *, 'dgemm' call dgemm('N','T',np,nq,block_size,-1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + iblock = 0 endif @@ -328,36 +350,47 @@ print *, 'p1' endif enddo - L(1:ndim8, rank) = 0.d0 + do i8=1,ndim8 + L(i8, rank) = 0.d0 + enddo if (.not.computed(dj)) then m = dj - !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(guided) - do k=np,1,-1 - if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& - addr2(Lset(k)), addr2(Dset(m)) ) ) then - if (do_direct_integrals) then - Delta(k,m) = Delta(k,m) + & - ao_two_e_integral(addr1(Lset(k)), addr2(Lset(k)),& - addr1(Dset(m)), addr2(Dset(m))) - else - Delta(k,m) = Delta(k,m) + & - get_ao_two_e_integral( addr1(Lset(k)), addr1(Dset(m)),& - addr2(Lset(k)), addr2(Dset(m)), ao_integrals_map) - endif - endif - enddo - !$OMP END PARALLEL DO + if (do_direct_integrals) then + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,16) + do k=np,1,-1 + if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)) ) ) then + Delta(k,m) = Delta(k,m) + & + ao_two_e_integral(addr1(Lset(k)), addr2(Lset(k)),& + addr1(Dset(m)), addr2(Dset(m))) + endif + enddo + !$OMP END PARALLEL DO + else + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,16) + do k=np,1,-1 + if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)) ) ) then + Delta(k,m) = Delta(k,m) + & + get_ao_two_e_integral( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)), ao_integrals_map) + endif + enddo + !$OMP END PARALLEL DO + endif computed(dj) = .True. endif iblock = iblock+1 +!print *, iblock do p=1,np Ltmp_p(p,iblock) = Delta(p,dj) enddo ! iv. if (iblock > 1) then +!print *, 'dgemv', iblock call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0,& Ltmp_p(1,iblock), 1) endif @@ -365,6 +398,7 @@ print *, 'p1' ! iii. f = 1.d0/dsqrt(Qmax) +!print *, 'p4' !$OMP PARALLEL PRIVATE(p,q) DEFAULT(shared) !$OMP DO do p=1,np @@ -379,7 +413,6 @@ print *, 'p1' Ltmp_q(q,iblock) = L(Dset(q), rank) enddo !$OMP END DO - !$OMP END PARALLEL Qmax = D(Dset(1)) @@ -389,12 +422,12 @@ print *, 'p1' enddo - print '(I10, 4X, I10, 4X, ES12.3)', rank, block_size, Qmax + print '(I10, 4X, ES12.3)', rank, Qmax - deallocate(computed) - deallocate(Delta) deallocate(Ltmp_p) deallocate(Ltmp_q) + deallocate(computed) + deallocate(Delta) ! i. N = rank @@ -405,17 +438,21 @@ print *, 'p1' Dmax = max(Dmax, D(Lset(p))) enddo - np = huge(1_4) + np = npmax dscale = 1.d0 - do while (np == huge(1_4)) + dscale_tmp = Dmax + do while (np == npmax) np=0 do p8=1,ndim8 - if ( dscale*dscale*Dmax*D(p8) > tau*tau ) then + if ( dscale_tmp*D(p8) > tau2 ) then np = np+1 Lset(np) = p8 - if (np == huge(1_4)) then + if (np == npmax) then ! Overflow detected dscale = dscale*0.5d0 + dscale_tmp = dscale*dscale*Dmax +!print *, 'Overflow detected ' +!print *, 'dscale = ', dscale exit endif endif @@ -426,7 +463,7 @@ print *, 'p1' enddo allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) -print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', memory_of_double8(ao_num*ao_num*rank*1_8) +!print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', memory_of_double8(ao_num*ao_num*rank*1_8) if (ierr /= 0) then call print_memory_usage() @@ -436,7 +473,7 @@ print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', memory_of_double8(ao_num* !$OMP PARALLEL DO PRIVATE(k) do k=1,rank do j=1,ao_num - call dcopy(ao_num, L((j-1)*ao_num+1,k), 1, cholesky_ao(1,j,k), 1) + cholesky_ao(1:ao_num,j,k) = L((j-1)*ao_num+1:j*ao_num,k) enddo enddo !$OMP END PARALLEL DO From ff59e9efcc2ab1e4ffaa352e86bbb14491ce1531 Mon Sep 17 00:00:00 2001 From: eginer Date: Sun, 2 Jun 2024 19:16:56 +0200 Subject: [PATCH 060/131] added print in src/ao_two_e_ints/cholesky.irp.f --- src/ao_two_e_ints/cholesky.irp.f | 4 ++++ src/mu_of_r/f_hf_cholesky.irp.f | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 5fbd166c..1d8b8948 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -51,7 +51,9 @@ END_PROVIDER integer, external :: getUnitAndOpen integer :: iunit + double precision :: wall0,wall1 + call wall_time(wall0) ndim = ao_num*ao_num deallocate(cholesky_ao) @@ -409,6 +411,8 @@ END_PROVIDER print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' print *, '' + call wall_time(wall1) + print*,'Time to provide AO cholesky vectors = ',wall1-wall0 END_PROVIDER diff --git a/src/mu_of_r/f_hf_cholesky.irp.f b/src/mu_of_r/f_hf_cholesky.irp.f index 472abb1c..5dd69eb6 100644 --- a/src/mu_of_r/f_hf_cholesky.irp.f +++ b/src/mu_of_r/f_hf_cholesky.irp.f @@ -220,10 +220,10 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse, (n_points_final_grid)] if(dabs(mo_i_r1).lt.thresh_1)cycle do mm = 1, n_basis_orb ! electron 1 m = list_basis(mm) - mo_b_r1 = mos_in_r_array_omp(m,ipoint) - if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + mo_b_r1 = mos_in_r_array_omp(m,ipoint)*mo_i_r1 + if(dabs(mo_b_r1).lt.thresh_2)cycle do p = 1, cholesky_mo_num - accu_vec(p) = accu_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + accu_vec(p) = accu_vec(p) + mo_b_r1 * cholesky_mo_transp(p,m,i) enddo enddo enddo From c1ca673a6fd39ef574bb5a41b420a18a45f85d58 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jun 2024 11:24:25 +0200 Subject: [PATCH 061/131] Added omp_lib.F file --- etc/paths.rc | 9 +++++++++ src/ezfio_files/omp_lib.F | 4 ++++ 2 files changed, 13 insertions(+) create mode 100644 src/ezfio_files/omp_lib.F diff --git a/etc/paths.rc b/etc/paths.rc index dc1741e8..843ec383 100644 --- a/etc/paths.rc +++ b/etc/paths.rc @@ -28,6 +28,15 @@ function qp_prepend_export () { fi } +function qp_append_export () { + eval "value_1="\${$1}"" + if [[ -z $value_1 ]] ; then + echo "${2}:" + else + echo "${value_1}:${2}" + fi +} + export PYTHONPATH=$(qp_prepend_export "PYTHONPATH" "${QP_EZFIO}/Python":"${QP_PYTHON}") export PATH=$(qp_prepend_export "PATH" "${QP_PYTHON}":"${QP_ROOT}"/bin:"${QP_ROOT}"/ocaml) diff --git a/src/ezfio_files/omp_lib.F b/src/ezfio_files/omp_lib.F new file mode 100644 index 00000000..b3df8e0a --- /dev/null +++ b/src/ezfio_files/omp_lib.F @@ -0,0 +1,4 @@ + module omp_lib +#include + end module + From 2a9b8c56a121fbe9880b9d6bebe344e78ded6355 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jun 2024 11:32:56 +0200 Subject: [PATCH 062/131] omp_lib was a bad idea... --- src/ezfio_files/omp_lib.F | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 src/ezfio_files/omp_lib.F diff --git a/src/ezfio_files/omp_lib.F b/src/ezfio_files/omp_lib.F deleted file mode 100644 index b3df8e0a..00000000 --- a/src/ezfio_files/omp_lib.F +++ /dev/null @@ -1,4 +0,0 @@ - module omp_lib -#include - end module - From c95a0b2d87f34e07e52f535e1548081c97eba0eb Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jun 2024 16:15:09 +0200 Subject: [PATCH 063/131] Disk-based cholesky --- src/ao_two_e_ints/cholesky.irp.f | 180 ++++++++++++------------ src/ao_two_e_ints/two_e_integrals.irp.f | 4 +- src/ezfio_files/get_unit_and_open.irp.f | 8 +- 3 files changed, 96 insertions(+), 96 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index b98dfd5b..f689a65e 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -16,6 +16,7 @@ END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_ao_num ] &BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, 1) ] + use mmap_module implicit none BEGIN_DOC ! Cholesky vectors in AO basis: (ik|a): @@ -30,19 +31,19 @@ END_PROVIDER integer*8 :: ndim8 integer :: rank double precision :: tau, tau2 - double precision, pointer :: L(:,:), L_old(:,:) + double precision, pointer :: L(:,:) double precision :: s double precision :: dscale, dscale_tmp - double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) + double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:) integer, allocatable :: addr1(:), addr2(:) integer*8, allocatable :: Lset(:), Dset(:), addr3(:) logical, allocatable :: computed(:) integer :: i,j,k,m,p,q, dj, p2, q2 - integer*8 :: i8, j8, p8, qj8 - integer :: N, np, nq, npmax + integer*8 :: i8, j8, p8, qj8, rank_max, np8 + integer :: N, np, nq double precision :: Dmax, Dmin, Qmax, f double precision, external :: get_ao_two_e_integral @@ -61,12 +62,12 @@ END_PROVIDER ndim8 = ao_num*ao_num*1_8 double precision :: wall0,wall1 + type(c_ptr) :: c_pointer(2) + integer :: fd(2) + call wall_time(wall0) deallocate(cholesky_ao) - -! TODO : Save L() to disk - if (read_ao_cholesky) then print *, 'Reading Cholesky vectors from disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'R') @@ -81,6 +82,16 @@ END_PROVIDER PROVIDE nucl_coord ao_two_e_integral_schwartz call set_multiple_levels_omp(.False.) + rank_max = min(ndim8,274877906944_8/1_8/ndim8) + call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., c_pointer(1)) + call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) +!print *, 'rank_max/ndim8', dble(rank_max) / dble(ndim8) + + ! Deleting the file while it is open makes the file invisible on the filesystem, + ! and automatically deleted, even if the program crashes + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao_tmp', 'R') + close(iunit,status='delete') + if (do_direct_integrals) then if (ao_two_e_integral(1,1,1,1) < huge(1.d0)) then ! Trigger providers inside ao_two_e_integral @@ -98,9 +109,6 @@ END_PROVIDER call print_memory_usage() - allocate(L(ndim8,1)) -!print *, 'allocate : (L(ndim8,1))', memory_of_double8(ndim8) - print *, '' print *, 'Cholesky decomposition of AO integrals' print *, '======================================' @@ -112,7 +120,7 @@ END_PROVIDER rank = 0 - allocate( D(ndim8), Lset(ndim8), Dset(ndim8) ) + allocate( D(ndim8), Lset(ndim8), Dset(ndim8), D_sorted(ndim8)) allocate( addr1(ndim8), addr2(ndim8), addr3(ndim8) ) !print *, 'allocate : (D(ndim8))', memory_of_int8(ndim8) !print *, 'allocate : (Lset(ndim8))', memory_of_int8(ndim8) @@ -132,44 +140,52 @@ END_PROVIDER if (do_direct_integrals) then !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,16) - do i8=1,ndim8 + do i8=ndim8,1,-1 D(i8) = ao_two_e_integral(addr1(i8), addr2(i8), & addr1(i8), addr2(i8)) enddo !$OMP END PARALLEL DO else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,16) - do i8=1,ndim8 + do i8=ndim8,1,-1 D(i8) = get_ao_two_e_integral(addr1(i8), addr1(i8), & addr2(i8), addr2(i8), & ao_integrals_map) enddo !$OMP END PARALLEL DO endif + D_sorted(:) = -D(:) + call dsort_noidx_big(D_sorted,ndim8) + D_sorted(:) = dabs(D_sorted(:)) - Dmax = maxval(D) + Dmax = D_sorted(1) ! 2. - npmax = huge(1_4)*1_8 - np = npmax - dscale = 1.d0 - dscale_tmp = Dmax - do while (np == npmax) - np=0 + dscale = tau2/Dmax + do i8=1,ndim8 + if (D_sorted(i8) <= dscale) exit + enddo + + + mem = qp_max_mem+1 + do while ( (mem > qp_max_mem).and.(i8>1_8) ) + dscale = min(1.d0,dsqrt(tau2/(D_sorted(i8)*Dmax))) + dscale_tmp = dscale*dscale*Dmax +! print *, 'dscale = ', dscale, dble(i8)/dble(ndim8) + np8=0_8 do p8=1,ndim8 if ( dscale_tmp*D(p8) > tau2 ) then - np = np+1 - Lset(np) = p8 - if (np == npmax) then - ! Overflow detected - dscale = dscale*0.1d0 - dscale_tmp = dscale*dscale*Dmax -!print *, 'Overflow detected ' -!print *, 'dscale = ', dscale - exit - endif + np8 = np8+1_8 + Lset(np8) = p8 endif enddo + i8 = i8*3_8/4_8 + if (np8 > huge(1_4)/64_8) cycle + np = np8 +! print *, 'np = ', np + call resident_memory(mem) + mem = mem & + + 0.1d0*np*memory_of_double(np) ! Delta(np,nq) enddo ! 3. @@ -179,7 +195,7 @@ END_PROVIDER i = 0 ! 5. - do while ( (Dmax > tau).and.(rank < min(ndim8,huge(1_4))) ) + do while ( (Dmax > tau).and.(rank*1_8 < min(ndim8,rank_max)) ) ! a. i = i+1 @@ -202,12 +218,12 @@ END_PROVIDER enddo - call total_memory(mem) + call resident_memory(mem) mem = mem & + np*memory_of_double(nq) &! Delta(np,nq) - + (rank+nq)* memory_of_double8(ndim8) &! L(ndim8,rank+nq) + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) +!print *, 'mem = ', mem if (mem > qp_max_mem) then s = s*2.d0 else @@ -217,7 +233,7 @@ END_PROVIDER if ((s > 1.d0).or.(nq == 0)) then call print_memory_usage() print *, 'Required peak memory: ', mem, 'Gb' - call total_memory(mem) + call resident_memory(mem) print *, 'Already used memory: ', mem, 'Gb' print *, 'Not enough memory. Reduce cholesky threshold' stop -1 @@ -227,26 +243,6 @@ END_PROVIDER ! d., e. - L_old => L - allocate(L(ndim8,rank+nq), stat=ierr) -!print *, 'allocate : L(ndim8,rank+nq)', memory_of_double8(ndim8*(rank+nq)) - - if (ierr /= 0) then - call print_memory_usage() - print *, irp_here, ': allocation failed : (L(ndim8,rank+nq))' - stop -1 - endif - - !$OMP PARALLEL DO PRIVATE(k,j8) - do k=1,rank - do j8=1,ndim8 - L(j8,k) = L_old(j8,k) - enddo - enddo - !$OMP END PARALLEL DO - - deallocate(L_old) - allocate(Delta(np,nq), stat=ierr) !print *, 'allocate : Delta(np,nq)', memory_of_double8(np*nq*1_8) @@ -280,39 +276,29 @@ END_PROVIDER !print *, 'N, rank, block_size', N, rank, block_size !print *, 'p1' - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q,j) - -!print *, 'computed' - !$OMP DO + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(p,q,j) do q=1,nq computed(q) = .False. - enddo - !$OMP ENDDO NOWAIT - -!print *, 'Delta' - !$OMP DO - do q=1,nq do j=1,np Delta(j,q) = 0.d0 enddo enddo - !$OMP ENDDO NOWAIT + !$OMP END PARALLEL DO -!print *, 'Ltmp_p' + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q,j) do k=1,N !$OMP DO do p=1,np - Ltmp_p(p,k) = L(Lset(p),k) + Ltmp_p(p,k) = L(Lset(p),k) enddo !$OMP END DO NOWAIT !$OMP DO do q=1,nq - Ltmp_q(q,k) = L(Dset(q),k) + Ltmp_q(q,k) = L(Dset(q),k) enddo !$OMP END DO NOWAIT enddo - !$OMP BARRIER !$OMP END PARALLEL @@ -338,7 +324,7 @@ END_PROVIDER rank = N+j if (iblock == block_size) then -!print *, 'dgemm' +!print *, 'dgemm', np, nq call dgemm('N','T',np,nq,block_size,-1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) @@ -401,7 +387,6 @@ END_PROVIDER ! iii. f = 1.d0/dsqrt(Qmax) -!print *, 'p4' !$OMP PARALLEL PRIVATE(p,q) DEFAULT(shared) !$OMP DO do p=1,np @@ -441,30 +426,42 @@ END_PROVIDER Dmax = max(Dmax, D(Lset(p))) enddo - np = npmax - dscale = 1.d0 - dscale_tmp = Dmax - do while (np == npmax) - np=0 + mem = qp_max_mem+1 + do while ( (mem > qp_max_mem).and.(i8>1_8) ) + dscale = min(1.d0,dsqrt(tau2/(D_sorted(i8)*Dmax))) + dscale_tmp = dscale*dscale*Dmax +!print *, 'dscale = ', dscale, dble(i8)/dble(ndim8) + np8=0_8 do p8=1,ndim8 if ( dscale_tmp*D(p8) > tau2 ) then - np = np+1 - Lset(np) = p8 - if (np == npmax) then - ! Overflow detected - dscale = dscale*0.5d0 - dscale_tmp = dscale*dscale*Dmax -!print *, 'Overflow detected ' -!print *, 'dscale = ', dscale - exit - endif + np8 = np8+1_8 + Lset(np8) = p8 endif enddo + i8 = i8*3_8/4_8 + if (np8 > huge(1_4)/64_8) cycle + np = np8 +!print *, 'np = ', np + call resident_memory(mem) + mem = mem & + + 0.1d0*np*memory_of_double(np) ! Delta(np,nq) enddo + if (np == 0) then + call print_memory_usage() + print *, 'Required peak memory: ', mem, 'Gb' + call resident_memory(mem) + print *, 'Already used memory: ', mem, 'Gb' + print *, 'Not enough memory. Reduce cholesky threshold' + stop -1 + endif enddo + + print *, '============ =============' + print *, '' + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) !print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', memory_of_double8(ao_num*ao_num*rank*1_8) @@ -473,18 +470,19 @@ END_PROVIDER print *, irp_here, ': Allocation failed' stop -1 endif - !$OMP PARALLEL DO PRIVATE(k) + + + !$OMP PARALLEL DO PRIVATE(k,j) do k=1,rank do j=1,ao_num cholesky_ao(1:ao_num,j,k) = L((j-1)*ao_num+1:j*ao_num,k) enddo enddo !$OMP END PARALLEL DO - deallocate(L) - cholesky_ao_num = rank - print *, '============ =============' - print *, '' + call munmap( (/ ndim8, ndim8 /), 8, fd(1), c_pointer(1) ) + + cholesky_ao_num = rank if (write_ao_cholesky) then print *, 'Writing Cholesky vectors to disk...' diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index b55b5f0d..d12f3d45 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -460,8 +460,8 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz, (ao_num, ao_num) !$OMP PARALLEL DO PRIVATE(i,k) & !$OMP DEFAULT(NONE) & !$OMP SHARED (ao_num,ao_two_e_integral_schwartz) & - !$OMP SCHEDULE(guided) - do i=1,ao_num + !$OMP SCHEDULE(dynamic) + do i=ao_num,1,-1 do k=1,i ao_two_e_integral_schwartz(i,k) = dsqrt(ao_two_e_integral(i,i,k,k)) ao_two_e_integral_schwartz(k,i) = ao_two_e_integral_schwartz(i,k) diff --git a/src/ezfio_files/get_unit_and_open.irp.f b/src/ezfio_files/get_unit_and_open.irp.f index 6440579f..d6a7efac 100644 --- a/src/ezfio_files/get_unit_and_open.irp.f +++ b/src/ezfio_files/get_unit_and_open.irp.f @@ -47,11 +47,13 @@ integer function getUnitAndOpen(f,mode) endif open(unit=getUnitAndOpen,file=f,status='OLD',action='READ',form='UNFORMATTED') else if (mode.eq.'W') then - open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='WRITE',form='UNFORMATTED') + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',form='UNFORMATTED') + else if (mode.eq.'A') then + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',position='APPEND',form='UNFORMATTED') else if (mode.eq.'w') then - open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='WRITE',form='FORMATTED') + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',form='FORMATTED') else if (mode.eq.'a') then - open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='WRITE',position='APPEND',form='FORMATTED') + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',position='APPEND',form='FORMATTED') else if (mode.eq.'x') then open(unit=getUnitAndOpen,file=new_f,form='FORMATTED') endif From b9f041e5e587e0ce96e845fd437f3fc5abeb3272 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jun 2024 19:31:39 +0200 Subject: [PATCH 064/131] More I/O in Cholesky --- src/ao_two_e_ints/cholesky.irp.f | 198 +++++++++++++++---------------- 1 file changed, 94 insertions(+), 104 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index f689a65e..34b91f0f 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -31,12 +31,12 @@ END_PROVIDER integer*8 :: ndim8 integer :: rank double precision :: tau, tau2 - double precision, pointer :: L(:,:) + double precision, pointer :: L(:,:), Delta(:,:) double precision :: s double precision :: dscale, dscale_tmp - double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:) + double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:) integer, allocatable :: addr1(:), addr2(:) integer*8, allocatable :: Lset(:), Dset(:), addr3(:) logical, allocatable :: computed(:) @@ -52,7 +52,7 @@ END_PROVIDER double precision, external :: ao_two_e_integral integer :: block_size, iblock - double precision :: mem + double precision :: mem, mem0 double precision, external :: memory_of_double, memory_of_int double precision, external :: memory_of_double8, memory_of_int8 @@ -64,8 +64,11 @@ END_PROVIDER type(c_ptr) :: c_pointer(2) integer :: fd(2) + logical :: delta_on_disk call wall_time(wall0) + + ! Will be reallocated at the end deallocate(cholesky_ao) if (read_ao_cholesky) then @@ -82,11 +85,11 @@ END_PROVIDER PROVIDE nucl_coord ao_two_e_integral_schwartz call set_multiple_levels_omp(.False.) + call resident_memory(mem0) + rank_max = min(ndim8,274877906944_8/1_8/ndim8) call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., c_pointer(1)) call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) -!print *, 'rank_max/ndim8', dble(rank_max) / dble(ndim8) - ! Deleting the file while it is open makes the file invisible on the filesystem, ! and automatically deleted, even if the program crashes iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao_tmp', 'R') @@ -161,32 +164,16 @@ END_PROVIDER Dmax = D_sorted(1) ! 2. - dscale = tau2/Dmax - do i8=1,ndim8 - if (D_sorted(i8) <= dscale) exit - enddo - - - mem = qp_max_mem+1 - do while ( (mem > qp_max_mem).and.(i8>1_8) ) - dscale = min(1.d0,dsqrt(tau2/(D_sorted(i8)*Dmax))) - dscale_tmp = dscale*dscale*Dmax -! print *, 'dscale = ', dscale, dble(i8)/dble(ndim8) - np8=0_8 - do p8=1,ndim8 - if ( dscale_tmp*D(p8) > tau2 ) then - np8 = np8+1_8 - Lset(np8) = p8 - endif - enddo - i8 = i8*3_8/4_8 - if (np8 > huge(1_4)/64_8) cycle - np = np8 -! print *, 'np = ', np - call resident_memory(mem) - mem = mem & - + 0.1d0*np*memory_of_double(np) ! Delta(np,nq) + dscale = 1.d0 + dscale_tmp = dscale*dscale*Dmax + np8=0_8 + do p8=1,ndim8 + if ( dscale_tmp*D(p8) > tau2 ) then + np8 = np8+1_8 + Lset(np8) = p8 + endif enddo + np = np8 ! 3. N = 0 @@ -218,13 +205,11 @@ END_PROVIDER enddo - call resident_memory(mem) - mem = mem & - + np*memory_of_double(nq) &! Delta(np,nq) - + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) + mem = mem0 & + + np*memory_of_double(nq) !print *, 'mem = ', mem - if (mem > qp_max_mem) then + if (mem > 300.d0) then ! 300GB max for Delta s = s*2.d0 else exit @@ -239,18 +224,33 @@ END_PROVIDER stop -1 endif + if (s > 0.1d0) then + exit + endif + enddo ! d., e. + mem = mem0 & + + memory_of_int(nq) &! computed(nq) + + np*memory_of_int(nq) &! computed(nq) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) - allocate(Delta(np,nq), stat=ierr) -!print *, 'allocate : Delta(np,nq)', memory_of_double8(np*nq*1_8) - - if (ierr /= 0) then - call print_memory_usage() - print *, irp_here, ': allocation failed : (Delta(np,nq))' - stop -1 + if (mem > qp_max_mem) then + call mmap(trim(ezfio_work_dir)//'cholesky_delta', (/ np*1_8, nq*1_8 /), 8, fd(2), .False., c_pointer(2)) + call c_f_pointer(c_pointer(2), Delta, (/ np, nq /)) + ! Deleting the file while it is open makes the file invisible on the filesystem, + ! and automatically deleted, even if the program crashes + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_delta', 'R') + close(iunit,status='delete') + delta_on_disk = .True. + else + allocate(Delta(np,nq)) + delta_on_disk = .False. endif + print *, delta_on_disk + + allocate(Delta_col(np)) allocate(Ltmp_p(np,block_size), stat=ierr) !print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double8(np*block_size*1_8), np, block_size @@ -272,40 +272,38 @@ END_PROVIDER allocate(computed(nq)) -!print *, 'allocate : computed(nq)', memory_of_int(nq) !print *, 'N, rank, block_size', N, rank, block_size -!print *, 'p1' - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(p,q,j) - do q=1,nq - computed(q) = .False. - do j=1,np - Delta(j,q) = 0.d0 - enddo - enddo - !$OMP END PARALLEL DO !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q,j) do k=1,N !$OMP DO do p=1,np - Ltmp_p(p,k) = L(Lset(p),k) + Ltmp_p(p,k) = L(Lset(p),k) enddo !$OMP END DO NOWAIT !$OMP DO do q=1,nq - Ltmp_q(q,k) = L(Dset(q),k) + computed(q) = .False. + Ltmp_q(q,k) = L(Dset(q),k) enddo !$OMP END DO NOWAIT enddo !$OMP BARRIER !$OMP END PARALLEL -!print *, 'p2', np, nq, N if (N>0) then - call dgemm('N','T', np, nq, N, -1.d0, & - Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + call dgemm('N','T', np, nq, N, -1.d0, & + Ltmp_p, np, Ltmp_q, nq, 0.d0, Delta, np) + else + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) SCHEDULE(static,1) + do q=1,nq + do j=1,np + Delta(j,q) = 0.d0 + enddo + enddo + !$OMP END PARALLEL DO endif ! f. @@ -324,10 +322,8 @@ END_PROVIDER rank = N+j if (iblock == block_size) then -!print *, 'dgemm', np, nq call dgemm('N','T',np,nq,block_size,-1.d0, & - Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) - + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) iblock = 0 endif @@ -343,43 +339,51 @@ END_PROVIDER L(i8, rank) = 0.d0 enddo + iblock = iblock+1 + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Delta(p,dj) + enddo + !$OMP END PARALLEL DO + if (.not.computed(dj)) then m = dj if (do_direct_integrals) then - !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,16) - do k=np,1,-1 + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,21) + do k=1,np if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& addr2(Lset(k)), addr2(Dset(m)) ) ) then - Delta(k,m) = Delta(k,m) + & + Delta_col(k) = & ao_two_e_integral(addr1(Lset(k)), addr2(Lset(k)),& addr1(Dset(m)), addr2(Dset(m))) endif enddo !$OMP END PARALLEL DO - else - !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,16) - do k=np,1,-1 + else + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,21) + do k=1,np if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& addr2(Lset(k)), addr2(Dset(m)) ) ) then - Delta(k,m) = Delta(k,m) + & + Delta_col(k) = & get_ao_two_e_integral( addr1(Lset(k)), addr1(Dset(m)),& addr2(Lset(k)), addr2(Dset(m)), ao_integrals_map) endif enddo !$OMP END PARALLEL DO endif + + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) + Delta(p,dj) = Ltmp_p(p,iblock) + enddo + !$OMP END PARALLEL DO + computed(dj) = .True. endif - iblock = iblock+1 -!print *, iblock - do p=1,np - Ltmp_p(p,iblock) = Delta(p,dj) - enddo - ! iv. if (iblock > 1) then -!print *, 'dgemv', iblock call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0,& Ltmp_p(1,iblock), 1) endif @@ -412,10 +416,15 @@ END_PROVIDER print '(I10, 4X, ES12.3)', rank, Qmax + deallocate(Delta_col) deallocate(Ltmp_p) deallocate(Ltmp_q) deallocate(computed) - deallocate(Delta) + if (delta_on_disk) then + call munmap( (/ np*1_8, nq*1_8 /), 8, fd(2), c_pointer(2) ) + else + deallocate(Delta) + endif ! i. N = rank @@ -426,35 +435,16 @@ END_PROVIDER Dmax = max(Dmax, D(Lset(p))) enddo - mem = qp_max_mem+1 - do while ( (mem > qp_max_mem).and.(i8>1_8) ) - dscale = min(1.d0,dsqrt(tau2/(D_sorted(i8)*Dmax))) - dscale_tmp = dscale*dscale*Dmax -!print *, 'dscale = ', dscale, dble(i8)/dble(ndim8) - np8=0_8 - do p8=1,ndim8 - if ( dscale_tmp*D(p8) > tau2 ) then - np8 = np8+1_8 - Lset(np8) = p8 - endif - enddo - i8 = i8*3_8/4_8 - if (np8 > huge(1_4)/64_8) cycle - np = np8 -!print *, 'np = ', np - call resident_memory(mem) - mem = mem & - + 0.1d0*np*memory_of_double(np) ! Delta(np,nq) + dscale = 1.d0 + dscale_tmp = dscale*dscale*Dmax + np8=0_8 + do p8=1,ndim8 + if ( dscale_tmp*D(p8) > tau2 ) then + np8 = np8+1_8 + Lset(np8) = p8 + endif enddo - - if (np == 0) then - call print_memory_usage() - print *, 'Required peak memory: ', mem, 'Gb' - call resident_memory(mem) - print *, 'Already used memory: ', mem, 'Gb' - print *, 'Not enough memory. Reduce cholesky threshold' - stop -1 - endif + np = np8 enddo @@ -480,7 +470,7 @@ END_PROVIDER enddo !$OMP END PARALLEL DO - call munmap( (/ ndim8, ndim8 /), 8, fd(1), c_pointer(1) ) + call munmap( (/ ndim8, rank_max /), 8, fd(1), c_pointer(1) ) cholesky_ao_num = rank From 19286bede43e5b01b5997ccf5e709a2ade9a4456 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Jun 2024 02:51:12 +0200 Subject: [PATCH 065/131] Initialization --- src/ao_two_e_ints/cholesky.irp.f | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 34b91f0f..a7b2389f 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -248,7 +248,7 @@ END_PROVIDER allocate(Delta(np,nq)) delta_on_disk = .False. endif - print *, delta_on_disk +!print *, delta_on_disk allocate(Delta_col(np)) @@ -275,7 +275,7 @@ END_PROVIDER !print *, 'N, rank, block_size', N, rank, block_size - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q,j) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q) do k=1,N !$OMP DO do p=1,np @@ -356,6 +356,8 @@ END_PROVIDER Delta_col(k) = & ao_two_e_integral(addr1(Lset(k)), addr2(Lset(k)),& addr1(Dset(m)), addr2(Dset(m))) + else + Delta_col(k) = 0.d0 endif enddo !$OMP END PARALLEL DO @@ -367,6 +369,8 @@ END_PROVIDER Delta_col(k) = & get_ao_two_e_integral( addr1(Lset(k)), addr1(Dset(m)),& addr2(Lset(k)), addr2(Dset(m)), ao_integrals_map) + else + Delta_col(k) = 0.d0 endif enddo !$OMP END PARALLEL DO From 36a2f0b46ff535607764b662e8013cba49347ff8 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Jun 2024 03:16:55 +0200 Subject: [PATCH 066/131] Fixed cholesky --- src/ao_two_e_ints/cholesky.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index a7b2389f..09131b5d 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -272,9 +272,10 @@ END_PROVIDER allocate(computed(nq)) + computed(:) = .False. + !print *, 'N, rank, block_size', N, rank, block_size - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q) do k=1,N !$OMP DO @@ -285,7 +286,6 @@ END_PROVIDER !$OMP DO do q=1,nq - computed(q) = .False. Ltmp_q(q,k) = L(Dset(q),k) enddo !$OMP END DO NOWAIT From 2241096a6485a71406c12e5c2ae3165a4a838aeb Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Jun 2024 13:53:30 +0200 Subject: [PATCH 067/131] Trying to improve mmap performance --- src/ao_two_e_ints/cholesky.irp.f | 4 ++-- .../dav_diag_dressed_ext_rout.irp.f | 1 - ...diag_dressed_ext_rout_nonsym_B1space.irp.f | 2 -- .../dav_double_dress_ext_rout.irp.f | 1 - .../dav_dressed_ext_rout.irp.f | 1 - src/dav_general_mat/dav_ext_rout.irp.f | 1 - .../dav_ext_rout_nonsym_B1space.irp.f | 2 -- src/dav_general_mat/dav_general.irp.f | 6 +++--- src/davidson/diagonalization_h_dressed.irp.f | 2 +- .../diagonalization_hcsf_dressed.irp.f | 2 +- .../diagonalization_hs2_dressed.irp.f | 4 ++-- .../diagonalization_nonsym_h_dressed.irp.f | 2 +- src/utils/fortran_mmap.c | 13 +++++++++---- src/utils/map_functions.irp.f | 12 ++++++------ src/utils/memory.irp.f | 2 +- src/utils/mmap.f90 | 19 +++++++++++-------- 16 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 09131b5d..3cd400f8 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -88,7 +88,7 @@ END_PROVIDER call resident_memory(mem0) rank_max = min(ndim8,274877906944_8/1_8/ndim8) - call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., c_pointer(1)) + call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1)) call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) ! Deleting the file while it is open makes the file invisible on the filesystem, ! and automatically deleted, even if the program crashes @@ -237,7 +237,7 @@ END_PROVIDER + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) if (mem > qp_max_mem) then - call mmap(trim(ezfio_work_dir)//'cholesky_delta', (/ np*1_8, nq*1_8 /), 8, fd(2), .False., c_pointer(2)) + call mmap(trim(ezfio_work_dir)//'cholesky_delta', (/ np*1_8, nq*1_8 /), 8, fd(2), .False., .True., c_pointer(2)) call c_f_pointer(c_pointer(2), Delta, (/ np, nq /)) ! Deleting the file while it is open makes the file invisible on the filesystem, ! and automatically deleted, even if the program crashes diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f index 0dc939cb..f57b7f92 100644 --- a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f @@ -1,6 +1,5 @@ subroutine davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_diag_in,converged,hcalc) - use mmap_module implicit none BEGIN_DOC ! Generic Davidson diagonalization with ONE DIAGONAL DRESSING OPERATOR diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f index 1a8269f4..c8848998 100644 --- a/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f @@ -3,8 +3,6 @@ subroutine davidson_general_diag_dressed_ext_rout_nonsym_b1space(u_in, H_jj, Dress_jj,energies, sze, N_st, N_st_diag_in, converged, hcalc) - use mmap_module - BEGIN_DOC ! Generic modified-Davidson diagonalization ! diff --git a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f index 24f4fa10..1ff6632c 100644 --- a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f +++ b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f @@ -1,5 +1,4 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies,sze,N_st,N_st_diag,converged,hcalc) - use mmap_module BEGIN_DOC ! Generic Davidson diagonalization with TWO DRESSING VECTORS ! diff --git a/src/dav_general_mat/dav_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_dressed_ext_rout.irp.f index cedaaf0a..ca59a688 100644 --- a/src/dav_general_mat/dav_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_dressed_ext_rout.irp.f @@ -1,5 +1,4 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_diag,dressing_state,dressing_vec,idress,converged,hcalc) - use mmap_module implicit none BEGIN_DOC ! Davidson diagonalization. diff --git a/src/dav_general_mat/dav_ext_rout.irp.f b/src/dav_general_mat/dav_ext_rout.irp.f index deb7e3a9..ad60b2a8 100644 --- a/src/dav_general_mat/dav_ext_rout.irp.f +++ b/src/dav_general_mat/dav_ext_rout.irp.f @@ -1,6 +1,5 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,converged,hcalc) - use mmap_module implicit none BEGIN_DOC ! Generic Davidson diagonalization diff --git a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f index d89aaadb..ca0a835e 100644 --- a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f +++ b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f @@ -3,8 +3,6 @@ subroutine davidson_general_ext_rout_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_diag_in, converged, hcalc) - use mmap_module - BEGIN_DOC ! Generic modified-Davidson diagonalization ! diff --git a/src/dav_general_mat/dav_general.irp.f b/src/dav_general_mat/dav_general.irp.f index 9940bf1e..a277d9ef 100644 --- a/src/dav_general_mat/dav_general.irp.f +++ b/src/dav_general_mat/dav_general.irp.f @@ -1,6 +1,6 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,converged,h_mat) - use mmap_module +! use mmap_module implicit none BEGIN_DOC ! Davidson diagonalization with specific diagonal elements of the H matrix @@ -160,9 +160,9 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv ! type(c_ptr) :: ptr_w, ptr_s ! integer :: fd_s, fd_w ! call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& -! 8, fd_w, .False., ptr_w) +! 8, fd_w, .False., .True., ptr_w) ! call mmap(trim(ezfio_work_dir)//'davidson_s', (/int(sze,8),int(N_st_diag*itermax,8)/),& -! 4, fd_s, .False., ptr_s) +! 4, fd_s, .False., .True., ptr_s) ! call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) ! call c_f_pointer(ptr_s, s, (/sze,N_st_diag*itermax/)) ! else diff --git a/src/davidson/diagonalization_h_dressed.irp.f b/src/davidson/diagonalization_h_dressed.irp.f index b7179c18..15bf256d 100644 --- a/src/davidson/diagonalization_h_dressed.irp.f +++ b/src/davidson/diagonalization_h_dressed.irp.f @@ -228,7 +228,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) else allocate(W(sze,N_st_diag*itermax)) diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index fa8aff80..656dd1d9 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -229,7 +229,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/)) else allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax)) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index fd967ecc..fb04b29b 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -270,9 +270,9 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call mmap(trim(ezfio_work_dir)//'davidson_s', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 4, fd_s, .False., ptr_s) + 4, fd_s, .False., .True., ptr_s) call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) call c_f_pointer(ptr_s, s, (/sze,N_st_diag*itermax/)) else diff --git a/src/davidson/diagonalization_nonsym_h_dressed.irp.f b/src/davidson/diagonalization_nonsym_h_dressed.irp.f index 96ca84ab..86df3a19 100644 --- a/src/davidson/diagonalization_nonsym_h_dressed.irp.f +++ b/src/davidson/diagonalization_nonsym_h_dressed.irp.f @@ -251,7 +251,7 @@ subroutine davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze, type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) else allocate(W(sze,N_st_diag*itermax)) diff --git a/src/utils/fortran_mmap.c b/src/utils/fortran_mmap.c index e8d85a2f..fdf7fb6f 100644 --- a/src/utils/fortran_mmap.c +++ b/src/utils/fortran_mmap.c @@ -7,7 +7,7 @@ #include -void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) +void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only, int single_node) { int fd; int result; @@ -21,7 +21,7 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) perror("Error opening mmap file for reading"); exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0); + map = mmap(NULL, bytes, PROT_READ, MAP_PRIVATE, fd, 0); } else { @@ -39,7 +39,7 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) perror("Error calling lseek() to stretch the file"); exit(EXIT_FAILURE); } - + result = write(fd, "", 1); if (result != 1) { close(fd); @@ -48,7 +48,12 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (single_node == 1) { + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_HUGETLB , fd, 0); + } else { + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_HUGETLB, fd, 0); + } } if (map == MAP_FAILED) { diff --git a/src/utils/map_functions.irp.f b/src/utils/map_functions.irp.f index 97d0e8bf..e3a62b07 100644 --- a/src/utils/map_functions.irp.f +++ b/src/utils/map_functions.irp.f @@ -21,13 +21,13 @@ subroutine map_save_to_disk(filename,map) stop 'map already consolidated' endif - call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .False., c_pointer(1)) + call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .False., .False., c_pointer(1)) call c_f_pointer(c_pointer(1),map % consolidated_idx, (/ map % map_size +2_8/)) - call mmap(trim(filename)//'_consolidated_key', (/ n_elements /), cache_key_kind, fd(2), .False., c_pointer(2)) + call mmap(trim(filename)//'_consolidated_key', (/ n_elements /), cache_key_kind, fd(2), .False., .False., c_pointer(2)) call c_f_pointer(c_pointer(2),map % consolidated_key, (/ n_elements /)) - call mmap(trim(filename)//'_consolidated_value', (/ n_elements /), integral_kind, fd(3), .False., c_pointer(3)) + call mmap(trim(filename)//'_consolidated_value', (/ n_elements /), integral_kind, fd(3), .False., .False., c_pointer(3)) call c_f_pointer(c_pointer(3),map % consolidated_value, (/ n_elements /)) if (.not.associated(map%consolidated_key)) then @@ -85,15 +85,15 @@ subroutine map_load_from_disk(filename,map) stop 'map already consolidated' endif - call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .True., c_pointer(1)) + call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .True., .False., c_pointer(1)) call c_f_pointer(c_pointer(1),map % consolidated_idx, (/ map % map_size + 2_8/)) map% n_elements = map % consolidated_idx (map % map_size+2_8)-1_8 - call mmap(trim(filename)//'_consolidated_key', (/ map % n_elements /), cache_key_kind, fd(2), .True., c_pointer(2)) + call mmap(trim(filename)//'_consolidated_key', (/ map % n_elements /), cache_key_kind, fd(2), .True., .False., c_pointer(2)) call c_f_pointer(c_pointer(2),map % consolidated_key, (/ map % n_elements /)) - call mmap(trim(filename)//'_consolidated_value', (/ map % n_elements /), integral_kind, fd(3), .True., c_pointer(3)) + call mmap(trim(filename)//'_consolidated_value', (/ map % n_elements /), integral_kind, fd(3), .True., .False., c_pointer(3)) call c_f_pointer(c_pointer(3),map % consolidated_value, (/ map % n_elements /)) l = 0_8 diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index 043562db..e2e8dd76 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -6,7 +6,7 @@ BEGIN_PROVIDER [ integer, qp_max_mem ] character*(128) :: env integer, external :: get_total_available_memory - qp_max_mem = get_total_available_memory() + qp_max_mem = max(get_total_available_memory() - 1,3) call write_int(6,qp_max_mem,'Total available memory (GB)') call getenv('QP_MAXMEM',env) if (trim(env) /= '') then diff --git a/src/utils/mmap.f90 b/src/utils/mmap.f90 index 41e60224..723cb771 100644 --- a/src/utils/mmap.f90 +++ b/src/utils/mmap.f90 @@ -7,12 +7,13 @@ module mmap_module ! File descriptors ! ---------------- - type(c_ptr) function c_mmap_fortran(filename, length, fd, read_only) bind(c,name='mmap_fortran') + type(c_ptr) function c_mmap_fortran(filename, length, fd, read_only, single_node) bind(c,name='mmap_fortran') use iso_c_binding character(c_char), intent(in) :: filename(*) integer(c_size_t), intent(in), value :: length integer(c_int), intent(out) :: fd integer(c_int), intent(in), value :: read_only + integer(c_int), intent(in), value :: single_node end function subroutine c_munmap_fortran(length, fd, map) bind(c,name='munmap_fortran') @@ -33,31 +34,33 @@ module mmap_module contains - subroutine mmap(filename, shape, bytes, fd, read_only, map) + subroutine mmap(filename, shape, bytes, fd, read_only, single_node, map) use iso_c_binding implicit none character*(*), intent(in) :: filename ! Name of the mapped file integer*8, intent(in) :: shape(:) ! Shape of the array to map integer, intent(in) :: bytes ! Number of bytes per element logical, intent(in) :: read_only ! If true, mmap is read-only + logical, intent(in) :: single_node! If true, mmap is on a single node integer, intent(out) :: fd ! File descriptor type(c_ptr), intent(out) :: map ! C Pointer integer(c_size_t) :: length integer(c_int) :: fd_ - integer :: i + integer :: i, read_only_, single_node_ + + read_only_ = 0 + single_node_ = 0 + if (read_only_) read_only_ = 1 + if (single_node_) single_node_ = 1 length = int(bytes,8) do i=1,size(shape) length = length * shape(i) enddo - if (read_only) then - map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 1) - else - map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 0) - endif + map = c_mmap_fortran( trim(filename)//char(0), length, fd_, read_only, single_node) fd = fd_ end subroutine From 6ae162b6c93d0bb31749086736dea13af9edfb56 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jun 2024 14:33:13 +0200 Subject: [PATCH 068/131] Disk-based cholesky OK --- src/ao_two_e_ints/cholesky.irp.f | 13 ++++++++++--- src/utils/fortran_mmap.c | 5 ++--- src/utils/mmap.f90 | 6 +++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 3cd400f8..6778d5c7 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -66,6 +66,10 @@ END_PROVIDER integer :: fd(2) logical :: delta_on_disk + PROVIDE nproc + PROVIDE nucl_coord ao_two_e_integral_schwartz + call set_multiple_levels_omp(.False.) + call wall_time(wall0) ! Will be reallocated at the end @@ -87,7 +91,7 @@ END_PROVIDER call resident_memory(mem0) - rank_max = min(ndim8,274877906944_8/1_8/ndim8) + rank_max = min(ndim8,(qp_max_mem*1024_8*1024_8*1024_8/8_8)/ndim8) call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1)) call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) ! Deleting the file while it is open makes the file invisible on the filesystem, @@ -209,7 +213,7 @@ END_PROVIDER + np*memory_of_double(nq) !print *, 'mem = ', mem - if (mem > 300.d0) then ! 300GB max for Delta + if (mem > qp_max_mem/2) then s = s*2.d0 else exit @@ -231,9 +235,12 @@ END_PROVIDER enddo ! d., e. - mem = mem0 & + mem = mem0 & + memory_of_int(nq) &! computed(nq) + np*memory_of_int(nq) &! computed(nq) + + memory_of_double(np) &! Delta_col(np) + + 7*memory_of_double(ndim8) &! D, Lset, Dset, D_sorted, addr[1-3] + + np*memory_of_double(nq) &! Delta(np,nq) + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) if (mem > qp_max_mem) then diff --git a/src/utils/fortran_mmap.c b/src/utils/fortran_mmap.c index fdf7fb6f..711a9c34 100644 --- a/src/utils/fortran_mmap.c +++ b/src/utils/fortran_mmap.c @@ -49,10 +49,9 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only, } if (single_node == 1) { - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_HUGETLB , fd, 0); + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE | MAP_NONBLOCK, fd, 0); } else { - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_HUGETLB, fd, 0); + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); } } diff --git a/src/utils/mmap.f90 b/src/utils/mmap.f90 index 723cb771..af3fe6ed 100644 --- a/src/utils/mmap.f90 +++ b/src/utils/mmap.f90 @@ -46,9 +46,9 @@ module mmap_module type(c_ptr), intent(out) :: map ! C Pointer integer(c_size_t) :: length - integer(c_int) :: fd_ + integer(c_int) :: fd_, read_only_, single_node_ - integer :: i, read_only_, single_node_ + integer :: i read_only_ = 0 single_node_ = 0 @@ -60,7 +60,7 @@ module mmap_module length = length * shape(i) enddo - map = c_mmap_fortran( trim(filename)//char(0), length, fd_, read_only, single_node) + map = c_mmap_fortran( trim(filename)//char(0), length, fd_, read_only_, single_node_) fd = fd_ end subroutine From af8973770e9265f6b8f997edde1873b97c1a48da Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jun 2024 14:39:34 +0200 Subject: [PATCH 069/131] Typo in mmap --- src/utils/mmap.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/mmap.f90 b/src/utils/mmap.f90 index af3fe6ed..e342b422 100644 --- a/src/utils/mmap.f90 +++ b/src/utils/mmap.f90 @@ -52,8 +52,8 @@ module mmap_module read_only_ = 0 single_node_ = 0 - if (read_only_) read_only_ = 1 - if (single_node_) single_node_ = 1 + if (read_only) read_only_ = 1 + if (single_node) single_node_ = 1 length = int(bytes,8) do i=1,size(shape) From f58df5e81669226e728f744308593db5a5e4cad0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jun 2024 16:09:53 +0200 Subject: [PATCH 070/131] Added do_mo_cholesky --- src/ao_two_e_ints/EZFIO.cfg | 2 +- src/ao_two_e_ints/cholesky.irp.f | 6 +- src/mo_two_e_ints/EZFIO.cfg | 6 ++ src/mo_two_e_ints/cholesky.irp.f | 117 +++++++++++++++------- src/mo_two_e_ints/four_idx_novvvv.irp.f | 9 -- src/mo_two_e_ints/integrals_3_index.irp.f | 2 +- src/mo_two_e_ints/mo_bi_integrals.irp.f | 2 +- src/trexio/import_trexio_integrals.irp.f | 5 + 8 files changed, 97 insertions(+), 52 deletions(-) diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index c2e083a3..a985149e 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -6,7 +6,7 @@ default: None [io_ao_cholesky] type: Disk_access -doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] +doc: Read/Write |AO| Cholesky integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 6778d5c7..a1cd8e5b 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -76,7 +76,7 @@ END_PROVIDER deallocate(cholesky_ao) if (read_ao_cholesky) then - print *, 'Reading Cholesky vectors from disk...' + print *, 'Reading Cholesky AO vectors from disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'R') read(iunit) rank allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) @@ -486,7 +486,7 @@ END_PROVIDER cholesky_ao_num = rank if (write_ao_cholesky) then - print *, 'Writing Cholesky vectors to disk...' + print *, 'Writing Cholesky AO vectors to disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'W') write(iunit) rank write(iunit) cholesky_ao @@ -499,7 +499,7 @@ END_PROVIDER print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' print *, '' call wall_time(wall1) - print*,'Time to provide AO cholesky vectors = ',wall1-wall0 + print*,'Time to provide AO cholesky vectors = ',(wall1-wall0)/60.d0, ' min' END_PROVIDER diff --git a/src/mo_two_e_ints/EZFIO.cfg b/src/mo_two_e_ints/EZFIO.cfg index 088a2416..49a2952c 100644 --- a/src/mo_two_e_ints/EZFIO.cfg +++ b/src/mo_two_e_ints/EZFIO.cfg @@ -1,3 +1,9 @@ +[io_mo_cholesky] +type: Disk_access +doc: Read/Write |MO| Cholesky integrals from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None + [io_mo_two_e_integrals] type: Disk_access doc: Read/Write |MO| integrals from/to disk [ Write | Read | None ] diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 0d0989d7..d3affd68 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -1,55 +1,98 @@ +BEGIN_PROVIDER [ logical, do_mo_cholesky ] + implicit none + BEGIN_DOC + ! If True, use Cholesky vectors for MO integrals + END_DOC + do_mo_cholesky = do_ao_cholesky +END_PROVIDER + BEGIN_PROVIDER [ integer, cholesky_mo_num ] implicit none BEGIN_DOC ! Number of Cholesky vectors in MO basis END_DOC - cholesky_mo_num = cholesky_ao_num -END_PROVIDER - -BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] - implicit none - BEGIN_DOC - ! Cholesky vectors in MO basis - END_DOC - - integer :: k, i, j - - call set_multiple_levels_omp(.False.) - !$OMP PARALLEL DO PRIVATE(k) - do k=1,cholesky_mo_num - do j=1,mo_num - do i=1,mo_num - cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j) - enddo - enddo - enddo - !$OMP END PARALLEL DO - + integer, external :: getUnitAndOpen + integer :: iunit + if (read_mo_cholesky) then + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_mo_transp', 'R') + read(iunit) cholesky_mo_num + close(iunit) + else + cholesky_mo_num = cholesky_ao_num + endif END_PROVIDER +!BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] +! implicit none +! BEGIN_DOC +! ! Cholesky vectors in MO basis +! END_DOC +! +! integer :: k, i, j +! +! call set_multiple_levels_omp(.False.) +! !$OMP PARALLEL DO PRIVATE(k) +! do k=1,cholesky_mo_num +! do j=1,mo_num +! do i=1,mo_num +! cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j) +! enddo +! enddo +! enddo +! !$OMP END PARALLEL DO +! +!END_PROVIDER +! BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, mo_num) ] implicit none BEGIN_DOC - ! Cholesky vectors in MO basis + ! Cholesky vectors in MO basis. Warning: it is transposed wrt cholesky_ao: + ! + ! - cholesky_ao is (ao_num^2 x cholesky_ao_num) + ! + ! - cholesky_mo_transp is (cholesky_mo_num x mo_num^2) END_DOC double precision, allocatable :: X(:,:,:) double precision :: wall0, wall1 - integer :: ierr - print *, 'AO->MO Transformation of Cholesky vectors' - call wall_time(wall0) + integer, external :: getUnitAndOpen + integer :: iunit, ierr, rank - allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) - if (ierr /= 0) then - print *, irp_here, ': Allocation failed' - endif - call dgemm('T','N', ao_num*cholesky_mo_num, mo_num, ao_num, 1.d0, & - cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_mo_num) - call dgemm('T','N', cholesky_mo_num*mo_num, mo_num, ao_num, 1.d0, & - X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_mo_num*mo_num) - deallocate(X) - call wall_time(wall1) - print*,'Time for AO->MO Cholesky vectors = ',wall1-wall0 + if (read_mo_cholesky) then + print *, 'Reading Cholesky MO vectors from disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_mo_transp', 'R') + read(iunit) rank + if (cholesky_mo_num /= rank) then + stop 'inconsistent rank' + endif + read(iunit) cholesky_mo_transp + close(iunit) + else + print *, 'AO->MO Transformation of Cholesky vectors' + call wall_time(wall0) + + allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': Allocation failed' + endif + call dgemm('T','N', ao_num*cholesky_mo_num, mo_num, ao_num, 1.d0, & + cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_mo_num) + call dgemm('T','N', cholesky_mo_num*mo_num, mo_num, ao_num, 1.d0, & + X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_mo_num*mo_num) + deallocate(X) + call wall_time(wall1) + print*,'Time to provide MO cholesky vectors = ',(wall1-wall0)/60.d0, ' min' + + + if (write_mo_cholesky) then + print *, 'Writing Cholesky MO vectors to disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_mo_transp', 'W') + write(iunit) rank + write(iunit) cholesky_mo_transp + close(iunit) + call ezfio_set_mo_two_e_ints_io_mo_cholesky('Read') + endif + endif END_PROVIDER diff --git a/src/mo_two_e_ints/four_idx_novvvv.irp.f b/src/mo_two_e_ints/four_idx_novvvv.irp.f index 2be09689..80af35dc 100644 --- a/src/mo_two_e_ints/four_idx_novvvv.irp.f +++ b/src/mo_two_e_ints/four_idx_novvvv.irp.f @@ -1,12 +1,3 @@ -!BEGIN_PROVIDER [ logical, no_vvvv_integrals ] -! implicit none -! BEGIN_DOC -! If `True`, computes all integrals except for the integrals having 3 or 4 virtual indices -! END_DOC -! -! no_vvvv_integrals = .False. -!END_PROVIDER - BEGIN_PROVIDER [ double precision, mo_coef_novirt, (ao_num,n_core_inact_act_orb) ] implicit none BEGIN_DOC diff --git a/src/mo_two_e_ints/integrals_3_index.irp.f b/src/mo_two_e_ints/integrals_3_index.irp.f index eb05da84..c0dab506 100644 --- a/src/mo_two_e_ints/integrals_3_index.irp.f +++ b/src/mo_two_e_ints/integrals_3_index.irp.f @@ -10,7 +10,7 @@ double precision :: get_two_e_integral double precision :: integral - if (do_ao_cholesky) then + if (do_mo_cholesky) then double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:) allocate(buffer_jj(cholesky_mo_num,mo_num), buffer(mo_num,mo_num,mo_num)) diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index 0e77b6a2..cb3f4bc6 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -1362,7 +1362,7 @@ END_PROVIDER double precision :: get_two_e_integral - if (do_ao_cholesky) then + if (do_mo_cholesky) then double precision, allocatable :: buffer(:,:) allocate (buffer(cholesky_mo_num,mo_num)) do k=1,cholesky_mo_num diff --git a/src/trexio/import_trexio_integrals.irp.f b/src/trexio/import_trexio_integrals.irp.f index 8c6b79d7..5a6b3c03 100644 --- a/src/trexio/import_trexio_integrals.irp.f +++ b/src/trexio/import_trexio_integrals.irp.f @@ -41,6 +41,11 @@ subroutine run(f) integer , allocatable :: Vi(:,:) double precision :: s +! TODO: +! - If Cholesky AO in trexio file, read cholesky ao vectors +! - If Cholesky MO in trexio file, read cholesky mo vectors +! - If Cholesky MO not in trexio file, force do_cholesky_mo to False + if (trexio_has_nucleus_repulsion(f) == TREXIO_SUCCESS) then rc = trexio_read_nucleus_repulsion(f, s) if (rc /= TREXIO_SUCCESS) then From e55390c70c98d9c475bde599ab3621246cac88af Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jun 2024 16:11:10 +0200 Subject: [PATCH 071/131] Type error in cholesky --- src/ao_two_e_ints/cholesky.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 6778d5c7..ffb37565 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -239,7 +239,7 @@ END_PROVIDER + memory_of_int(nq) &! computed(nq) + np*memory_of_int(nq) &! computed(nq) + memory_of_double(np) &! Delta_col(np) - + 7*memory_of_double(ndim8) &! D, Lset, Dset, D_sorted, addr[1-3] + + 7*memory_of_double8(ndim8) &! D, Lset, Dset, D_sorted, addr[1-3] + np*memory_of_double(nq) &! Delta(np,nq) + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) From b080a7a5e9adecf0544cc34b65dbf967872a77e3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jun 2024 16:34:14 +0200 Subject: [PATCH 072/131] Clean Cholesky MO --- src/mo_two_e_ints/EZFIO.cfg | 6 -- src/mo_two_e_ints/cholesky.irp.f | 6 +- src/mo_two_e_ints/mo_bi_integrals.irp.f | 39 +++++------ src/mo_two_e_ints/no_vvvv.irp.f | 88 ------------------------- 4 files changed, 25 insertions(+), 114 deletions(-) delete mode 100644 src/mo_two_e_ints/no_vvvv.irp.f diff --git a/src/mo_two_e_ints/EZFIO.cfg b/src/mo_two_e_ints/EZFIO.cfg index 49a2952c..c967969f 100644 --- a/src/mo_two_e_ints/EZFIO.cfg +++ b/src/mo_two_e_ints/EZFIO.cfg @@ -17,12 +17,6 @@ interface: ezfio,provider,ocaml default: 1.e-15 ezfio_name: threshold_mo -[no_vvvv_integrals] -type: logical -doc: If `True`, computes all integrals except for the integrals having 3 or 4 virtual indices -interface: ezfio,provider,ocaml -default: false - [io_mo_two_e_integrals_erf] type: Disk_access doc: Read/Write MO integrals with the long range interaction from/to disk [ Write | Read | None ] diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index d3affd68..5d34fb33 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -68,7 +68,11 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, read(iunit) cholesky_mo_transp close(iunit) else + print *, '' print *, 'AO->MO Transformation of Cholesky vectors' + print *, '-----------------------------------------' + print *, '' + call wall_time(wall0) allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) @@ -87,7 +91,7 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, if (write_mo_cholesky) then print *, 'Writing Cholesky MO vectors to disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_mo_transp', 'W') - write(iunit) rank + write(iunit) cholesky_mo_num write(iunit) cholesky_mo_transp close(iunit) call ezfio_set_mo_two_e_ints_io_mo_cholesky('Read') diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index cb3f4bc6..4b9bf97f 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -39,29 +39,16 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] return endif - if (.not. do_direct_integrals) then - PROVIDE ao_two_e_integrals_in_map - endif - - print *, '' - print *, 'AO -> MO integrals transformation' - print *, '---------------------------------' - print *, '' - call wall_time(wall_1) call cpu_time(cpu_1) - if(no_vvvv_integrals)then - call four_idx_novvvv_old + if (do_mo_cholesky) then + call add_integrals_to_map_cholesky else - if (do_ao_cholesky) then - call add_integrals_to_map_cholesky + if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then + call four_idx_dgemm else - if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then - call four_idx_dgemm - else - call add_integrals_to_map(full_ijkl_bitmask_4) - endif + call add_integrals_to_map(full_ijkl_bitmask_4) endif endif @@ -92,8 +79,15 @@ subroutine four_idx_dgemm double precision, allocatable :: a1(:,:,:,:) double precision, allocatable :: a2(:,:,:,:) + PROVIDE ao_two_e_integrals_in_map mo_coef + + print *, '' + print *, 'DGEMM-based AO->MO Transformation' + print *, '---------------------------------' + print *, '' + if (ao_num > 1289) then - print *, irp_here, ': Integer overflow in ao_num**3' + print *, irp_here, ': Integer overflow in ao_num**3. Set do_ao_cholesky=.True.' endif allocate (a1(ao_num,ao_num,ao_num,ao_num)) @@ -213,6 +207,12 @@ subroutine add_integrals_to_map(mask_ijkl) PROVIDE ao_two_e_integrals_in_map mo_coef + + print *, '' + print *, 'Sparse AO->MO Transformation' + print *, '----------------------------' + print *, '' + !Get list of MOs for i,j,k and l !------------------------------- @@ -469,6 +469,7 @@ subroutine add_integrals_to_map_cholesky integer(key_kind) , allocatable :: buffer_i(:) real(integral_kind), allocatable :: buffer_value(:) + PROVIDE cholesky_mo_transp call set_multiple_levels_omp(.False.) !$OMP PARALLEL DEFAULT(SHARED) & diff --git a/src/mo_two_e_ints/no_vvvv.irp.f b/src/mo_two_e_ints/no_vvvv.irp.f deleted file mode 100644 index 48a7f5e2..00000000 --- a/src/mo_two_e_ints/no_vvvv.irp.f +++ /dev/null @@ -1,88 +0,0 @@ - -subroutine four_idx_novvvv_old - use map_module - use bitmasks - implicit none - BEGIN_DOC - ! Retransform MO integrals for next CAS-SCF step - END_DOC - integer(bit_kind) :: mask_ijkl(N_int,4) - integer(bit_kind) :: mask_ijk(N_int,3) - - print*,'Using partial transformation' - print*,'It will not transform all integrals with at least 3 indices within the virtuals' - integer :: i,j,k,l - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I I I !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 4 - ! - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = core_inact_act_bitmask_4(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I V V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 2 (virt) ^2 - ! = J_iv - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = virt_bitmask(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - - ! (core+inact+act) ^ 2 (virt) ^2 - ! = (iv|iv) - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! V V V !!!!!!!!!!!!!!!!!!!!!!! -! if(.not.no_vvv_integrals)then - print*, '' - print*, ' and ' - do i = 1,N_int - mask_ijk(i,1) = virt_bitmask(i,1) - mask_ijk(i,2) = virt_bitmask(i,1) - mask_ijk(i,3) = virt_bitmask(i,1) - enddo - call add_integrals_to_map_three_indices(mask_ijk) -! endif - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I I V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 3 (virt) ^1 - ! - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I V V V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 1 (virt) ^3 - ! -! if(.not.no_ivvv_integrals)then - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = virt_bitmask(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map_no_exit_34(mask_ijkl) -end From ca98a6b529b3aa47d125e27d4b8031ac88a55c41 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jun 2024 16:46:25 +0200 Subject: [PATCH 073/131] Fixed previous commit --- src/mo_two_e_ints/cholesky.irp.f | 42 ++++++++++++++++---------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 5d34fb33..971ab38d 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -22,27 +22,27 @@ BEGIN_PROVIDER [ integer, cholesky_mo_num ] endif END_PROVIDER -!BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] -! implicit none -! BEGIN_DOC -! ! Cholesky vectors in MO basis -! END_DOC -! -! integer :: k, i, j -! -! call set_multiple_levels_omp(.False.) -! !$OMP PARALLEL DO PRIVATE(k) -! do k=1,cholesky_mo_num -! do j=1,mo_num -! do i=1,mo_num -! cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j) -! enddo -! enddo -! enddo -! !$OMP END PARALLEL DO -! -!END_PROVIDER -! +BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis + END_DOC + + integer :: k, i, j + + call set_multiple_levels_omp(.False.) + !$OMP PARALLEL DO PRIVATE(k) + do k=1,cholesky_mo_num + do j=1,mo_num + do i=1,mo_num + cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, mo_num) ] implicit none BEGIN_DOC From 7e1ed69eef611e6ff0336814c8ffded0eb9cf323 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jun 2024 18:03:51 +0200 Subject: [PATCH 074/131] Starting Cholesky transition --- src/mo_two_e_ints/map_integrals.irp.f | 38 +- src/mo_two_e_ints/mo_bi_integrals.irp.f | 856 +----------------------- 2 files changed, 57 insertions(+), 837 deletions(-) diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index ada256a2..290fdeab 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -98,7 +98,10 @@ double precision function get_two_e_integral(i,j,k,l,map) integer*8 :: ii_8 type(map_type), intent(inout) :: map real(integral_kind) :: tmp - PROVIDE mo_two_e_integrals_in_map mo_integrals_cache + integer :: kk + + PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky + if (use_banned_excitation) then if (banned_excitation(i,k)) then get_two_e_integral = 0.d0 @@ -109,22 +112,43 @@ double precision function get_two_e_integral(i,j,k,l,map) return endif endif + + ii = l-mo_integrals_cache_min ii = ior(ii, k-mo_integrals_cache_min) ii = ior(ii, j-mo_integrals_cache_min) ii = ior(ii, i-mo_integrals_cache_min) - if (iand(ii, -128) /= 0) then - !DIR$ FORCEINLINE - call two_e_integrals_index(i,j,k,l,idx) - !DIR$ FORCEINLINE - call map_get(map,idx,tmp) - get_two_e_integral = dble(tmp) + +! if (iand(ii, -128) /= 0) then + if (.True.) then + ! Integral is not in the cache + + if (do_mo_cholesky) then + + get_two_e_integral = 0.d0 + do kk=1,cholesky_mo_num + get_two_e_integral = get_two_e_integral + cholesky_mo_transp(kk,i,k) * cholesky_mo_transp(kk,j,l) + enddo + + else + ! Integrals is in the map + + !DIR$ FORCEINLINE + call two_e_integrals_index(i,j,k,l,idx) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + get_two_e_integral = dble(tmp) + endif + else + ! Integrals is in the cache + ii_8 = int(l,8)-mo_integrals_cache_min_8 ii_8 = ior( shiftl(ii_8,7), int(k,8)-mo_integrals_cache_min_8) ii_8 = ior( shiftl(ii_8,7), int(j,8)-mo_integrals_cache_min_8) ii_8 = ior( shiftl(ii_8,7), int(i,8)-mo_integrals_cache_min_8) get_two_e_integral = mo_integrals_cache(ii_8) + endif end diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index 4b9bf97f..d44bb38a 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -1,3 +1,26 @@ +! 1,2-index integrals are always taken from: +! - mo_two_e_integrals_jj_exchange +! - mo_two_e_integrals_jj_anti +! - mo_two_e_integrals_jj +! +! 3-index integrals are always taken from: +! - big_array_exchange_integrals +! - big_array_coulomb_integrals +! +! If (do_mo_cholesky): +! - Integrals with four 4 active orbitals are stored in the cache map, +! all other integrals are used from cholesky vectors +! - 1,2,3-index arrays are built from cholesky vectors +! Else: +! - All integrals are stored in the map or cache map +! - 1,2,3-index arrays are built from the map +! +! TODO: +! - build cache map from cholesky vectors +! - get_mo_integrals using cholesky +! - get_mo_integralss using cholesky +! - get_mo_integralss in PT2 + subroutine mo_two_e_integrals_index(i,j,k,l,i1) use map_module implicit none @@ -453,6 +476,9 @@ subroutine add_integrals_to_map(mask_ijkl) end + + + subroutine add_integrals_to_map_cholesky use bitmasks implicit none @@ -516,837 +542,7 @@ subroutine add_integrals_to_map_cholesky end -subroutine add_integrals_to_map_three_indices(mask_ijk) - use bitmasks - implicit none - BEGIN_DOC - ! Adds integrals to the MO map according to some bitmask - END_DOC - - integer(bit_kind), intent(in) :: mask_ijk(N_int,3) - - integer :: i,j,k,l - integer :: i0,j0,k0,l0 - double precision :: c, cpu_1, cpu_2, wall_1, wall_2, wall_0 - - integer, allocatable :: list_ijkl(:,:) - integer :: n_i, n_j, n_k - integer :: m - integer, allocatable :: two_e_tmp_0_idx(:) - real(integral_kind), allocatable :: two_e_tmp_0(:,:) - double precision, allocatable :: two_e_tmp_1(:) - double precision, allocatable :: two_e_tmp_2(:,:) - double precision, allocatable :: two_e_tmp_3(:,:,:) - !DIR$ ATTRIBUTES ALIGN : 64 :: two_e_tmp_1, two_e_tmp_2, two_e_tmp_3 - - integer :: n_integrals - integer :: size_buffer - integer(key_kind),allocatable :: buffer_i(:) - real(integral_kind),allocatable :: buffer_value(:) - double precision :: map_mb - - integer :: i1,j1,k1,l1, ii1, kmax, thread_num - integer :: i2,i3,i4 - double precision,parameter :: thr_coef = 1.d-10 - - PROVIDE ao_two_e_integrals_in_map mo_coef - - !Get list of MOs for i,j,k and l - !------------------------------- - - allocate(list_ijkl(mo_num,4)) - call bitstring_to_list( mask_ijk(1,1), list_ijkl(1,1), n_i, N_int ) - call bitstring_to_list( mask_ijk(1,2), list_ijkl(1,2), n_j, N_int ) - call bitstring_to_list( mask_ijk(1,3), list_ijkl(1,3), n_k, N_int ) - j = 0 - do i = 1, N_int - j += popcnt(mask_ijk(i,1)) - enddo - if(j==0)then - return - endif - - j = 0 - do i = 1, N_int - j += popcnt(mask_ijk(i,2)) - enddo - if(j==0)then - return - endif - - j = 0 - do i = 1, N_int - j += popcnt(mask_ijk(i,3)) - enddo - if(j==0)then - return - endif - - if (ao_num > 1289) then - print *, irp_here, ': Integer overflow in ao_num**3' - endif - size_buffer = min(ao_num*ao_num*ao_num,16000000) - print*, 'Providing the molecular integrals ' - print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& - ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' - - call wall_time(wall_1) - call cpu_time(cpu_1) - !$OMP PARALLEL PRIVATE(m,l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, & - !$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,& - !$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, & - !$OMP wall_0,thread_num) & - !$OMP DEFAULT(NONE) & - !$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k, & - !$OMP mo_coef_transp, & - !$OMP mo_coef_transp_is_built, list_ijkl, & - !$OMP mo_coef_is_built, wall_1, & - !$OMP mo_coef,mo_integrals_threshold,mo_integrals_map) - n_integrals = 0 - wall_0 = wall_1 - allocate(two_e_tmp_3(mo_num, n_j, n_k), & - two_e_tmp_1(mo_num), & - two_e_tmp_0(ao_num,ao_num), & - two_e_tmp_0_idx(ao_num), & - two_e_tmp_2(mo_num, n_j), & - buffer_i(size_buffer), & - buffer_value(size_buffer) ) - - thread_num = 0 - !$ thread_num = omp_get_thread_num() - !$OMP DO SCHEDULE(guided) - do l1 = 1,ao_num - two_e_tmp_3 = 0.d0 - do k1 = 1,ao_num - two_e_tmp_2 = 0.d0 - do j1 = 1,ao_num - call get_ao_two_e_integrals(j1,k1,l1,ao_num,two_e_tmp_0(1,j1)) - enddo - do j1 = 1,ao_num - kmax = 0 - do i1 = 1,ao_num - c = two_e_tmp_0(i1,j1) - if (c == 0.d0) then - cycle - endif - kmax += 1 - two_e_tmp_0(kmax,j1) = c - two_e_tmp_0_idx(kmax) = i1 - enddo - - if (kmax==0) then - cycle - endif - - two_e_tmp_1 = 0.d0 - ii1=1 - do ii1 = 1,kmax-4,4 - i1 = two_e_tmp_0_idx(ii1) - i2 = two_e_tmp_0_idx(ii1+1) - i3 = two_e_tmp_0_idx(ii1+2) - i4 = two_e_tmp_0_idx(ii1+3) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + & - mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) + & - mo_coef_transp(i,i2) * two_e_tmp_0(ii1+1,j1) + & - mo_coef_transp(i,i3) * two_e_tmp_0(ii1+2,j1) + & - mo_coef_transp(i,i4) * two_e_tmp_0(ii1+3,j1) - enddo ! i - enddo ! ii1 - - i2 = ii1 - do ii1 = i2,kmax - i1 = two_e_tmp_0_idx(ii1) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) - enddo ! i - enddo ! ii1 - c = 0.d0 - - do i = list_ijkl(1,1), list_ijkl(n_i,1) - c = max(c,abs(two_e_tmp_1(i))) - if (c>mo_integrals_threshold) exit - enddo - if ( c < mo_integrals_threshold ) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - c = mo_coef_transp(j,j1) - if (abs(c) < thr_coef) then - cycle - endif - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_2(i,j0) = two_e_tmp_2(i,j0) + c * two_e_tmp_1(i) - enddo ! i - enddo ! j - enddo !j1 - if ( maxval(abs(two_e_tmp_2)) < mo_integrals_threshold ) then - cycle - endif - - - do k0 = 1, n_k - k = list_ijkl(k0,3) - c = mo_coef_transp(k,k1) - if (abs(c) < thr_coef) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - do i = list_ijkl(1,1), k - two_e_tmp_3(i,j0,k0) = two_e_tmp_3(i,j0,k0) + c* two_e_tmp_2(i,j0) - enddo!i - enddo !j - - enddo !k - enddo !k1 - - - - do l0 = 1,n_j - l = list_ijkl(l0,2) - c = mo_coef_transp(l,l1) - if (abs(c) < thr_coef) then - cycle - endif - do k0 = 1, n_k - k = list_ijkl(k0,3) - i1 = shiftr((k*k-k),1) - two_e_tmp_1 = 0.d0 - j0 = l0 - j = list_ijkl(j0,2) - do i0 = 1, n_i - i = list_ijkl(i0,1) - if (i>k) then - exit - endif - two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0) - enddo - - do i0 = 1, n_i - i = list_ijkl(i0,1) - if (i>k) then !min(k,j1-i1) - exit - endif - if (abs(two_e_tmp_1(i)) < mo_integrals_threshold) then - cycle - endif - n_integrals += 1 - buffer_value(n_integrals) = two_e_tmp_1(i) - if(i==k .and. j==l .and. i.ne.j)then - buffer_value(n_integrals) = buffer_value(n_integrals) *0.5d0 - endif - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - n_integrals = 0 - endif - enddo - enddo - enddo - - do l0 = 1,n_j - l = list_ijkl(l0,2) - c = mo_coef_transp(l,l1) - if (abs(c) < thr_coef) then - cycle - endif - do k0 = 1, n_k - k = list_ijkl(k0,3) - i1 = shiftr((k*k-k),1) - two_e_tmp_1 = 0.d0 - j0 = k0 - j = list_ijkl(k0,2) - i0 = l0 - i = list_ijkl(i0,2) - if (k==l) then - cycle - endif - two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0) - - n_integrals += 1 - buffer_value(n_integrals) = two_e_tmp_1(i) - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - n_integrals = 0 - endif - enddo - enddo - - call wall_time(wall_2) - if (thread_num == 0) then - if (wall_2 - wall_0 > 1.d0) then - wall_0 = wall_2 - print*, 100.*float(l1)/float(ao_num), '% in ', & - wall_2-wall_1, 's', map_mb(mo_integrals_map) ,'MB' - endif - endif - enddo - !$OMP END DO NOWAIT - deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3) - - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - deallocate(buffer_i, buffer_value) - !$OMP END PARALLEL - call map_merge(mo_integrals_map) - - call wall_time(wall_2) - call cpu_time(cpu_2) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - - deallocate(list_ijkl) - - - print*,'Molecular integrals provided:' - print*,' Size of MO map ', map_mb(mo_integrals_map) ,'MB' - print*,' Number of MO integrals: ', mo_map_size - print*,' cpu time :',cpu_2 - cpu_1, 's' - print*,' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')' - -end - - -subroutine add_integrals_to_map_no_exit_34(mask_ijkl) - use bitmasks - implicit none - - BEGIN_DOC - ! Adds integrals to tha MO map according to some bitmask - END_DOC - - integer(bit_kind), intent(in) :: mask_ijkl(N_int,4) - - integer :: i,j,k,l - integer :: i0,j0,k0,l0 - double precision :: c, cpu_1, cpu_2, wall_1, wall_2, wall_0 - - integer, allocatable :: list_ijkl(:,:) - integer :: n_i, n_j, n_k, n_l - integer, allocatable :: two_e_tmp_0_idx(:) - real(integral_kind), allocatable :: two_e_tmp_0(:,:) - double precision, allocatable :: two_e_tmp_1(:) - double precision, allocatable :: two_e_tmp_2(:,:) - double precision, allocatable :: two_e_tmp_3(:,:,:) - !DIR$ ATTRIBUTES ALIGN : 64 :: two_e_tmp_1, two_e_tmp_2, two_e_tmp_3 - - integer :: n_integrals - integer :: size_buffer - integer(key_kind),allocatable :: buffer_i(:) - real(integral_kind),allocatable :: buffer_value(:) - double precision :: map_mb - - integer :: i1,j1,k1,l1, ii1, kmax, thread_num - integer :: i2,i3,i4 - double precision,parameter :: thr_coef = 1.d-10 - - PROVIDE ao_two_e_integrals_in_map mo_coef - - !Get list of MOs for i,j,k and l - !------------------------------- - - allocate(list_ijkl(mo_num,4)) - call bitstring_to_list( mask_ijkl(1,1), list_ijkl(1,1), n_i, N_int ) - call bitstring_to_list( mask_ijkl(1,2), list_ijkl(1,2), n_j, N_int ) - call bitstring_to_list( mask_ijkl(1,3), list_ijkl(1,3), n_k, N_int ) - call bitstring_to_list( mask_ijkl(1,4), list_ijkl(1,4), n_l, N_int ) - - if (ao_num > 1289) then - print *, irp_here, ': Integer overflow in ao_num**3' - endif - size_buffer = min(ao_num*ao_num*ao_num,16000000) - print*, 'Providing the molecular integrals ' - print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& - ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' - - call wall_time(wall_1) - call cpu_time(cpu_1) - - !$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, & - !$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,& - !$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, & - !$OMP wall_0,thread_num) & - !$OMP DEFAULT(NONE) & - !$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,n_l, & - !$OMP mo_coef_transp, & - !$OMP mo_coef_transp_is_built, list_ijkl, & - !$OMP mo_coef_is_built, wall_1, & - !$OMP mo_coef,mo_integrals_threshold,mo_integrals_map) - n_integrals = 0 - wall_0 = wall_1 - allocate(two_e_tmp_3(mo_num, n_j, n_k), & - two_e_tmp_1(mo_num), & - two_e_tmp_0(ao_num,ao_num), & - two_e_tmp_0_idx(ao_num), & - two_e_tmp_2(mo_num, n_j), & - buffer_i(size_buffer), & - buffer_value(size_buffer) ) - - thread_num = 0 - !$ thread_num = omp_get_thread_num() - !$OMP DO SCHEDULE(guided) - do l1 = 1,ao_num - !IRP_IF COARRAY - ! if (mod(l1-this_image(),num_images()) /= 0 ) then - ! cycle - ! endif - !IRP_ENDIF - two_e_tmp_3 = 0.d0 - do k1 = 1,ao_num - two_e_tmp_2 = 0.d0 - do j1 = 1,ao_num - call get_ao_two_e_integrals(j1,k1,l1,ao_num,two_e_tmp_0(1,j1)) - enddo - do j1 = 1,ao_num - kmax = 0 - do i1 = 1,ao_num - c = two_e_tmp_0(i1,j1) - if (c == 0.d0) then - cycle - endif - kmax += 1 - two_e_tmp_0(kmax,j1) = c - two_e_tmp_0_idx(kmax) = i1 - enddo - - if (kmax==0) then - cycle - endif - - two_e_tmp_1 = 0.d0 - ii1=1 - do ii1 = 1,kmax-4,4 - i1 = two_e_tmp_0_idx(ii1) - i2 = two_e_tmp_0_idx(ii1+1) - i3 = two_e_tmp_0_idx(ii1+2) - i4 = two_e_tmp_0_idx(ii1+3) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + & - mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) + & - mo_coef_transp(i,i2) * two_e_tmp_0(ii1+1,j1) + & - mo_coef_transp(i,i3) * two_e_tmp_0(ii1+2,j1) + & - mo_coef_transp(i,i4) * two_e_tmp_0(ii1+3,j1) - enddo ! i - enddo ! ii1 - - i2 = ii1 - do ii1 = i2,kmax - i1 = two_e_tmp_0_idx(ii1) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) - enddo ! i - enddo ! ii1 - c = 0.d0 - - do i = list_ijkl(1,1), list_ijkl(n_i,1) - c = max(c,abs(two_e_tmp_1(i))) - if (c>mo_integrals_threshold) exit - enddo - if ( c < mo_integrals_threshold ) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - c = mo_coef_transp(j,j1) - if (abs(c) < thr_coef) then - cycle - endif - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_2(i,j0) = two_e_tmp_2(i,j0) + c * two_e_tmp_1(i) - enddo ! i - enddo ! j - enddo !j1 - if ( maxval(abs(two_e_tmp_2)) < mo_integrals_threshold ) then - cycle - endif - - - do k0 = 1, n_k - k = list_ijkl(k0,3) - c = mo_coef_transp(k,k1) - if (abs(c) < thr_coef) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - do i = list_ijkl(1,1), k - two_e_tmp_3(i,j0,k0) = two_e_tmp_3(i,j0,k0) + c* two_e_tmp_2(i,j0) - enddo!i - enddo !j - - enddo !k - enddo !k1 - - - - do l0 = 1,n_l - l = list_ijkl(l0,4) - c = mo_coef_transp(l,l1) - if (abs(c) < thr_coef) then - cycle - endif - j1 = shiftr((l*l-l),1) - do j0 = 1, n_j - j = list_ijkl(j0,2) - if (j > l) then - exit - endif - j1 += 1 - do k0 = 1, n_k - k = list_ijkl(k0,3) - i1 = shiftr((k*k-k),1) - two_e_tmp_1 = 0.d0 - do i0 = 1, n_i - i = list_ijkl(i0,1) - if (i>k) then - exit - endif - two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0) - enddo - - do i0 = 1, n_i - i = list_ijkl(i0,1) - if(i> k)then - exit - endif - - if (abs(two_e_tmp_1(i)) < mo_integrals_threshold) then - cycle - endif - n_integrals += 1 - buffer_value(n_integrals) = two_e_tmp_1(i) - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - n_integrals = 0 - endif - enddo - enddo - enddo - enddo - - call wall_time(wall_2) - if (thread_num == 0) then - if (wall_2 - wall_0 > 1.d0) then - wall_0 = wall_2 - print*, 100.*float(l1)/float(ao_num), '% in ', & - wall_2-wall_1, 's', map_mb(mo_integrals_map) ,'MB' - endif - endif - enddo - !$OMP END DO NOWAIT - deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3) - - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - deallocate(buffer_i, buffer_value) - !$OMP END PARALLEL - !IRP_IF COARRAY - ! print*, 'Communicating the map' - ! call communicate_mo_integrals() - !IRP_ENDIF - call map_merge(mo_integrals_map) - - call wall_time(wall_2) - call cpu_time(cpu_2) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - - deallocate(list_ijkl) - - - print*,'Molecular integrals provided:' - print*,' Size of MO map ', map_mb(mo_integrals_map) ,'MB' - print*,' Number of MO integrals: ', mo_map_size - print*,' cpu time :',cpu_2 - cpu_1, 's' - print*,' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')' - - -end - - - - BEGIN_PROVIDER [ double precision, mo_two_e_integral_jj_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_jj_exchange_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_jj_anti_from_ao, (mo_num,mo_num) ] - implicit none - BEGIN_DOC - ! mo_two_e_integral_jj_from_ao(i,j) = J_ij - ! mo_two_e_integrals_jj_exchange_from_ao(i,j) = J_ij - ! mo_two_e_integrals_jj_anti_from_ao(i,j) = J_ij - K_ij - END_DOC - - integer :: i,j,p,q,r,s - double precision :: c - real(integral_kind) :: integral - integer :: n, pp - real(integral_kind), allocatable :: int_value(:) - integer, allocatable :: int_idx(:) - - double precision, allocatable :: iqrs(:,:), iqsr(:,:), iqis(:), iqri(:) - - if (.not.do_direct_integrals) then - PROVIDE ao_two_e_integrals_in_map mo_coef - endif - - mo_two_e_integral_jj_from_ao = 0.d0 - mo_two_e_integrals_jj_exchange_from_ao = 0.d0 - - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: iqrs, iqsr - - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE (i,j,p,q,r,s,integral,c,n,pp,int_value,int_idx, & - !$OMP iqrs, iqsr,iqri,iqis) & - !$OMP SHARED(mo_num,mo_coef_transp,ao_num, & - !$OMP ao_integrals_threshold,do_direct_integrals) & - !$OMP REDUCTION(+:mo_two_e_integral_jj_from_ao,mo_two_e_integrals_jj_exchange_from_ao) - - allocate( int_value(ao_num), int_idx(ao_num), & - iqrs(mo_num,ao_num), iqis(mo_num), iqri(mo_num), & - iqsr(mo_num,ao_num) ) - - !$OMP DO SCHEDULE (guided) - do s=1,ao_num - do q=1,ao_num - - do j=1,ao_num - do i=1,mo_num - iqrs(i,j) = 0.d0 - iqsr(i,j) = 0.d0 - enddo - enddo - - if (do_direct_integrals) then - double precision :: ao_two_e_integral - do r=1,ao_num - call compute_ao_two_e_integrals(q,r,s,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call compute_ao_two_e_integrals(q,s,r,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - - else - - do r=1,ao_num - call get_ao_two_e_integrals_non_zero(q,r,s,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call get_ao_two_e_integrals_non_zero(q,s,r,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - endif - iqis = 0.d0 - iqri = 0.d0 - do r=1,ao_num - do i=1,mo_num - iqis(i) += mo_coef_transp(i,r) * iqrs(i,r) - iqri(i) += mo_coef_transp(i,r) * iqsr(i,r) - enddo - enddo - do i=1,mo_num - do j=1,mo_num - c = mo_coef_transp(j,q)*mo_coef_transp(j,s) - mo_two_e_integral_jj_from_ao(j,i) += c * iqis(i) - mo_two_e_integrals_jj_exchange_from_ao(j,i) += c * iqri(i) - enddo - enddo - - enddo - enddo - !$OMP END DO NOWAIT - deallocate(iqrs,iqsr,int_value,int_idx) - !$OMP END PARALLEL - - mo_two_e_integrals_jj_anti_from_ao = mo_two_e_integral_jj_from_ao - mo_two_e_integrals_jj_exchange_from_ao - - -END_PROVIDER - - BEGIN_PROVIDER [ double precision, mo_two_e_integrals_vv_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_vv_exchange_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_vv_anti_from_ao, (mo_num,mo_num) ] - implicit none - BEGIN_DOC - ! mo_two_e_integrals_vv_from_ao(i,j) = J_ij - ! mo_two_e_integrals_vv_exchange_from_ao(i,j) = J_ij - ! mo_two_e_integrals_vv_anti_from_ao(i,j) = J_ij - K_ij - ! but only for the virtual orbitals - END_DOC - - integer :: i,j,p,q,r,s - integer :: i0,j0 - double precision :: c - real(integral_kind) :: integral - integer :: n, pp - real(integral_kind), allocatable :: int_value(:) - integer, allocatable :: int_idx(:) - - double precision, allocatable :: iqrs(:,:), iqsr(:,:), iqis(:), iqri(:) - - if (.not.do_direct_integrals) then - PROVIDE ao_two_e_integrals_in_map mo_coef - endif - - mo_two_e_integrals_vv_from_ao = 0.d0 - mo_two_e_integrals_vv_exchange_from_ao = 0.d0 - - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: iqrs, iqsr - - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE (i0,j0,i,j,p,q,r,s,integral,c,n,pp,int_value,int_idx, & - !$OMP iqrs, iqsr,iqri,iqis) & - !$OMP SHARED(n_virt_orb,mo_num,list_virt,mo_coef_transp,ao_num, & - !$OMP ao_integrals_threshold,do_direct_integrals) & - !$OMP REDUCTION(+:mo_two_e_integrals_vv_from_ao,mo_two_e_integrals_vv_exchange_from_ao) - - allocate( int_value(ao_num), int_idx(ao_num), & - iqrs(mo_num,ao_num), iqis(mo_num), iqri(mo_num),& - iqsr(mo_num,ao_num) ) - - !$OMP DO SCHEDULE (guided) - do s=1,ao_num - do q=1,ao_num - - do j=1,ao_num - do i0=1,n_virt_orb - i = list_virt(i0) - iqrs(i,j) = 0.d0 - iqsr(i,j) = 0.d0 - enddo - enddo - - if (do_direct_integrals) then - double precision :: ao_two_e_integral - do r=1,ao_num - call compute_ao_two_e_integrals(q,r,s,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i = list_virt(i0) - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call compute_ao_two_e_integrals(q,s,r,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i =list_virt(i0) - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - - else - - do r=1,ao_num - call get_ao_two_e_integrals_non_zero(q,r,s,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i =list_virt(i0) - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call get_ao_two_e_integrals_non_zero(q,s,r,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i = list_virt(i0) - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - endif - iqis = 0.d0 - iqri = 0.d0 - do r=1,ao_num - do i0=1,n_virt_orb - i = list_virt(i0) - iqis(i) += mo_coef_transp(i,r) * iqrs(i,r) - iqri(i) += mo_coef_transp(i,r) * iqsr(i,r) - enddo - enddo - do i0=1,n_virt_orb - i= list_virt(i0) - do j0=1,n_virt_orb - j = list_virt(j0) - c = mo_coef_transp(j,q)*mo_coef_transp(j,s) - mo_two_e_integrals_vv_from_ao(j,i) += c * iqis(i) - mo_two_e_integrals_vv_exchange_from_ao(j,i) += c * iqri(i) - enddo - enddo - - enddo - enddo - !$OMP END DO NOWAIT - deallocate(iqrs,iqsr,int_value,int_idx) - !$OMP END PARALLEL - - mo_two_e_integrals_vv_anti_from_ao = mo_two_e_integrals_vv_from_ao - mo_two_e_integrals_vv_exchange_from_ao - ! print*, '**********' - ! do i0 =1, n_virt_orb - ! i = list_virt(i0) - ! print*, mo_two_e_integrals_vv_from_ao(i,i) - ! enddo - ! print*, '**********' - - -END_PROVIDER BEGIN_PROVIDER [ double precision, mo_two_e_integrals_jj, (mo_num,mo_num) ] From bd534589e123c939de8ddd3147837b97837581dc Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 10 Jun 2024 17:36:14 +0200 Subject: [PATCH 075/131] Building mo cache from cholesky --- src/mo_two_e_ints/map_integrals.irp.f | 68 ++++++++++++++++--------- src/mo_two_e_ints/mo_bi_integrals.irp.f | 1 - 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 290fdeab..e99e89fb 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -59,29 +59,50 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:128_8*128_8*128_8*12 integer(key_kind) :: idx real(integral_kind) :: integral FREE ao_integrals_cache - !$OMP PARALLEL DO PRIVATE (i,j,k,l,i4,j4,k4,l4,idx,ii,integral) - do l=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - l4 = int(l,4) - do k=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - k4 = int(k,4) - do j=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - j4 = int(j,4) - do i=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - i4 = int(i,4) - !DIR$ FORCEINLINE - call two_e_integrals_index(i4,j4,k4,l4,idx) - !DIR$ FORCEINLINE - call map_get(mo_integrals_map,idx,integral) + if (do_mo_cholesky) then + + call set_multiple_levels_omp(.False.) + !$OMP PARALLEL DO PRIVATE (k,l,ii) + do l=mo_integrals_cache_min_8,mo_integrals_cache_max_8 + do k=mo_integrals_cache_min_8,mo_integrals_cache_max_8 ii = l-mo_integrals_cache_min_8 ii = ior( shiftl(ii,7), k-mo_integrals_cache_min_8) - ii = ior( shiftl(ii,7), j-mo_integrals_cache_min_8) - ii = ior( shiftl(ii,7), i-mo_integrals_cache_min_8) - mo_integrals_cache(ii) = integral + ii = shiftl(ii,14) + call dgemm('T','N', mo_integrals_cache_max-mo_integrals_cache_min+1, & + mo_integrals_cache_max-mo_integrals_cache_min+1, & + cholesky_mo_num, 1.d0, & + cholesky_mo_transp(1,mo_integrals_cache_min,k), cholesky_mo_num, & + cholesky_mo_transp(1,mo_integrals_cache_min,l), cholesky_mo_num, 0.d0, & + mo_integrals_cache(ii), 128) + enddo + enddo + !$OMP END PARALLEL DO + + else + !$OMP PARALLEL DO PRIVATE (i,j,k,l,i4,j4,k4,l4,idx,ii,integral) + do l=mo_integrals_cache_min_8,mo_integrals_cache_max_8 + l4 = int(l,4) + do k=mo_integrals_cache_min_8,mo_integrals_cache_max_8 + k4 = int(k,4) + do j=mo_integrals_cache_min_8,mo_integrals_cache_max_8 + j4 = int(j,4) + do i=mo_integrals_cache_min_8,mo_integrals_cache_max_8 + i4 = int(i,4) + !DIR$ FORCEINLINE + call two_e_integrals_index(i4,j4,k4,l4,idx) + !DIR$ FORCEINLINE + call map_get(mo_integrals_map,idx,integral) + ii = l-mo_integrals_cache_min_8 + ii = ior( shiftl(ii,7), k-mo_integrals_cache_min_8) + ii = ior( shiftl(ii,7), j-mo_integrals_cache_min_8) + ii = ior( shiftl(ii,7), i-mo_integrals_cache_min_8) + mo_integrals_cache(ii) = integral + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + endif END_PROVIDER @@ -100,7 +121,7 @@ double precision function get_two_e_integral(i,j,k,l,map) real(integral_kind) :: tmp integer :: kk - PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky + PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky if (use_banned_excitation) then if (banned_excitation(i,k)) then @@ -119,16 +140,13 @@ double precision function get_two_e_integral(i,j,k,l,map) ii = ior(ii, j-mo_integrals_cache_min) ii = ior(ii, i-mo_integrals_cache_min) -! if (iand(ii, -128) /= 0) then - if (.True.) then + if (iand(ii, -128) /= 0) then ! Integral is not in the cache if (do_mo_cholesky) then - get_two_e_integral = 0.d0 - do kk=1,cholesky_mo_num - get_two_e_integral = get_two_e_integral + cholesky_mo_transp(kk,i,k) * cholesky_mo_transp(kk,j,l) - enddo + double precision, external :: ddot + get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1) else ! Integrals is in the map diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index d44bb38a..6079c9f7 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -16,7 +16,6 @@ ! - 1,2,3-index arrays are built from the map ! ! TODO: -! - build cache map from cholesky vectors ! - get_mo_integrals using cholesky ! - get_mo_integralss using cholesky ! - get_mo_integralss in PT2 From 10fb3a0636d74ef5a3b78dc69bb9cc4d6be63455 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 10 Jun 2024 18:23:45 +0200 Subject: [PATCH 076/131] Introducing dgemm and dgemv to get integrals --- src/mo_two_e_ints/map_integrals.irp.f | 35 ++++++++++++++++++++----- src/mo_two_e_ints/mo_bi_integrals.irp.f | 6 +++-- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index e99e89fb..c9fa81c4 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -262,9 +262,25 @@ subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) integer :: j real(integral_kind), allocatable :: tmp_val(:) - do j=1,sze - call get_mo_two_e_integrals(j,k,l,sze,out_array(1,j),map) - enddo + if (do_mo_cholesky) then + call dgemm('T', 'N', mo_num, mo_num, cholesky_mo_num, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,1,l), cholesky_mo_num, 0.d0, & + out_array, sze) +! integer :: i +! do j=1,mo_num +! do i=1,mo_num +! double precision, external :: get_two_e_integral +! print *, i, j, real(out_array(i,j)), real(get_two_e_integral(i,j,k,l,map)) +! enddo +! enddo +! print *, irp_here +! pause + else + do j=1,sze + call get_mo_two_e_integrals(j,k,l,sze,out_array(1,j),map) + enddo + endif end subroutine get_mo_two_e_integrals_i1j1(k,l,sze,out_array,map) @@ -303,9 +319,16 @@ subroutine get_mo_two_e_integrals_coulomb_ii(k,l,sze,out_val,map) double precision, external :: get_two_e_integral PROVIDE mo_two_e_integrals_in_map - do i=1,sze - out_val(i) = get_two_e_integral(k,i,l,i,map) - enddo + if (do_mo_cholesky) then + call dgemv('T', cholesky_mo_num, mo_num, 1.d0, & + cholesky_mo_transp(1,1,1), cholesky_mo_num*(mo_num+1), & + cholesky_mo_transp(1,k,l), 1, 0.d0, & + out_val, 1) + else + do i=1,sze + out_val(i) = get_two_e_integral(k,i,l,i,map) + enddo + endif end diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index 6079c9f7..04e6c3e6 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -65,9 +65,11 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] call cpu_time(cpu_1) if (do_mo_cholesky) then - call add_integrals_to_map_cholesky + PROVIDE cholesky_mo_transp else - if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then + if (do_ao_cholesky) then + call add_integrals_to_map_cholesky + else if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then call four_idx_dgemm else call add_integrals_to_map(full_ijkl_bitmask_4) From 47b80703397ec92dfec008ef2a641efba9c22f44 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jun 2024 11:53:11 +0200 Subject: [PATCH 077/131] Cache map in integer*4 --- src/mo_two_e_ints/map_integrals.irp.f | 207 ++++++++++++++++++-------- 1 file changed, 145 insertions(+), 62 deletions(-) diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index c9fa81c4..9155cd8f 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -34,28 +34,28 @@ end BEGIN_PROVIDER [ integer*4, mo_integrals_cache_min ] &BEGIN_PROVIDER [ integer*4, mo_integrals_cache_max ] -&BEGIN_PROVIDER [ integer*8, mo_integrals_cache_min_8 ] -&BEGIN_PROVIDER [ integer*8, mo_integrals_cache_max_8 ] +&BEGIN_PROVIDER [ integer*4, mo_integrals_cache_shift] +&BEGIN_PROVIDER [ integer*4, mo_integrals_cache_size ] implicit none BEGIN_DOC ! Min and max values of the MOs for which the integrals are in the cache END_DOC - mo_integrals_cache_min_8 = max(1_8,elec_alpha_num - 63_8) - mo_integrals_cache_max_8 = min(int(mo_num,8),mo_integrals_cache_min_8+127_8) - mo_integrals_cache_min = max(1,elec_alpha_num - 63) - mo_integrals_cache_max = min(mo_num,mo_integrals_cache_min+127) + mo_integrals_cache_shift = 7 ! 7 = log(128). Max 7 + mo_integrals_cache_size = 2**mo_integrals_cache_shift + + mo_integrals_cache_min = max(1,elec_alpha_num - (mo_integrals_cache_size/2 - 1) ) + mo_integrals_cache_max = min(mo_num, mo_integrals_cache_min + mo_integrals_cache_size - 1) END_PROVIDER -BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:128_8*128_8*128_8*128_8) ] +BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0:mo_integrals_cache_size**4) ] implicit none BEGIN_DOC ! Cache of MO integrals for fast access END_DOC PROVIDE mo_two_e_integrals_in_map - integer*8 :: i,j,k,l - integer*4 :: i4,j4,k4,l4 - integer*8 :: ii + integer :: i,j,k,l + integer :: ii integer(key_kind) :: idx real(integral_kind) :: integral FREE ao_integrals_cache @@ -63,39 +63,36 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:128_8*128_8*128_8*12 call set_multiple_levels_omp(.False.) !$OMP PARALLEL DO PRIVATE (k,l,ii) - do l=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - do k=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - ii = l-mo_integrals_cache_min_8 - ii = ior( shiftl(ii,7), k-mo_integrals_cache_min_8) - ii = shiftl(ii,14) + do l=mo_integrals_cache_min,mo_integrals_cache_max + do k=mo_integrals_cache_min,mo_integrals_cache_max + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = shiftl(ii,mo_integrals_cache_shift) + ii = shiftl(ii,mo_integrals_cache_shift) call dgemm('T','N', mo_integrals_cache_max-mo_integrals_cache_min+1, & mo_integrals_cache_max-mo_integrals_cache_min+1, & cholesky_mo_num, 1.d0, & cholesky_mo_transp(1,mo_integrals_cache_min,k), cholesky_mo_num, & cholesky_mo_transp(1,mo_integrals_cache_min,l), cholesky_mo_num, 0.d0, & - mo_integrals_cache(ii), 128) + mo_integrals_cache(ii), mo_integrals_cache_size) enddo enddo !$OMP END PARALLEL DO else - !$OMP PARALLEL DO PRIVATE (i,j,k,l,i4,j4,k4,l4,idx,ii,integral) - do l=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - l4 = int(l,4) - do k=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - k4 = int(k,4) - do j=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - j4 = int(j,4) - do i=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - i4 = int(i,4) + !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral) + do l=mo_integrals_cache_min,mo_integrals_cache_max + do k=mo_integrals_cache_min,mo_integrals_cache_max + do j=mo_integrals_cache_min,mo_integrals_cache_max + do i=mo_integrals_cache_min,mo_integrals_cache_max !DIR$ FORCEINLINE - call two_e_integrals_index(i4,j4,k4,l4,idx) + call two_e_integrals_index(i,j,k,l,idx) !DIR$ FORCEINLINE call map_get(mo_integrals_map,idx,integral) - ii = l-mo_integrals_cache_min_8 - ii = ior( shiftl(ii,7), k-mo_integrals_cache_min_8) - ii = ior( shiftl(ii,7), j-mo_integrals_cache_min_8) - ii = ior( shiftl(ii,7), i-mo_integrals_cache_min_8) + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) mo_integrals_cache(ii) = integral enddo enddo @@ -116,7 +113,6 @@ double precision function get_two_e_integral(i,j,k,l,map) integer, intent(in) :: i,j,k,l integer(key_kind) :: idx integer :: ii - integer*8 :: ii_8 type(map_type), intent(inout) :: map real(integral_kind) :: tmp integer :: kk @@ -140,7 +136,7 @@ double precision function get_two_e_integral(i,j,k,l,map) ii = ior(ii, j-mo_integrals_cache_min) ii = ior(ii, i-mo_integrals_cache_min) - if (iand(ii, -128) /= 0) then + if (iand(ii, -mo_integrals_cache_size) /= 0) then ! Integral is not in the cache if (do_mo_cholesky) then @@ -161,11 +157,11 @@ double precision function get_two_e_integral(i,j,k,l,map) else ! Integrals is in the cache - ii_8 = int(l,8)-mo_integrals_cache_min_8 - ii_8 = ior( shiftl(ii_8,7), int(k,8)-mo_integrals_cache_min_8) - ii_8 = ior( shiftl(ii_8,7), int(j,8)-mo_integrals_cache_min_8) - ii_8 = ior( shiftl(ii_8,7), int(i,8)-mo_integrals_cache_min_8) - get_two_e_integral = mo_integrals_cache(ii_8) + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) + get_two_e_integral = mo_integrals_cache(ii) endif end @@ -197,19 +193,12 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) integer :: i double precision, external :: get_two_e_integral - integer :: ii, ii0 - integer*8 :: ii_8, ii0_8 + integer :: ii real(integral_kind) :: tmp integer(key_kind) :: i1, idx integer(key_kind) :: p,q,r,s,i2 PROVIDE mo_two_e_integrals_in_map mo_integrals_cache -!DEBUG -! do i=1,sze -! out_val(i) = get_two_e_integral(i,j,k,l,map) -! enddo -! return -!DEBUG out_val(1:sze) = 0.d0 if (banned_excitation(j,l)) then @@ -220,9 +209,10 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) ii0 = ior(ii0, k-mo_integrals_cache_min) ii0 = ior(ii0, j-mo_integrals_cache_min) - ii0_8 = int(l,8)-mo_integrals_cache_min_8 - ii0_8 = ior( shiftl(ii0_8,7), int(k,8)-mo_integrals_cache_min_8) - ii0_8 = ior( shiftl(ii0_8,7), int(j,8)-mo_integrals_cache_min_8) + integer :: ii0, ii0_8, ii_8 + ii0_8 = l-mo_integrals_cache_min + ii0_8 = ior( shiftl(ii0_8,mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii0_8 = ior( shiftl(ii0_8,mo_integrals_cache_shift), j-mo_integrals_cache_min) q = min(j,l) s = max(j,l) @@ -231,8 +221,8 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) do i=1,sze if (banned_excitation(i,k)) cycle ii = ior(ii0, i-mo_integrals_cache_min) - if (iand(ii, -128) == 0) then - ii_8 = ior( shiftl(ii0_8,7), int(i,8)-mo_integrals_cache_min_8) + if (iand(ii, -mo_integrals_cache_size) == 0) then + ii_8 = ior( shiftl(ii0_8,mo_integrals_cache_shift), i-mo_integrals_cache_min) out_val(i) = mo_integrals_cache(ii_8) else p = min(i,k) @@ -246,6 +236,93 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) out_val(i) = dble(tmp) endif enddo + +! if (banned_excitation(j,l)) then +! out_val(1:sze) = 0.d0 +! return +! endif +! +! if (mo_integrals_cache_min > 1) then +! +! if (do_mo_cholesky) then +! +! call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & +! cholesky_mo_transp(1,1,k), cholesky_mo_num, & +! cholesky_mo_transp(1,j,l), 1, 0.d0, & +! out_val, 1) +! +! else +! +! q = min(j,l) +! s = max(j,l) +! q = q+shiftr(s*s-s,1) +! +! do i=1,mo_integrals_cache_min-1 +! if (banned_excitation(i,k)) then +! out_val(i) = 0.d0 +! cycle +! endif +! p = min(i,k) +! r = max(i,k) +! p = p+shiftr(r*r-r,1) +! i1 = min(p,q) +! i2 = max(p,q) +! idx = i1+shiftr(i2*i2-i2,1) +! !DIR$ FORCEINLINE +! call map_get(map,idx,tmp) +! out_val(i) = dble(tmp) +! enddo +! +! endif +! +! endif +! +! +! ii = l-mo_integrals_cache_min +! ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) +! ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) +! ii = shiftl(ii, mo_integrals_cache_shift) +! do i=mo_integrals_cache_min, mo_integrals_cache_max +! ii = ii+1 +! out_val(i) = mo_integrals_cache(ii) +! enddo +! +! +! if (mo_integrals_cache_max < mo_num) then +! +! if (do_mo_cholesky) then +! +! call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & +! cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & +! cholesky_mo_transp(1,j,l), 1, 0.d0, & +! out_val(mo_integrals_cache_max+1), 1) +! +! else +! +! q = min(j,l) +! s = max(j,l) +! q = q+shiftr(s*s-s,1) +! +! do i=mo_integrals_cache_max+1,mo_num +! if (banned_excitation(i,k)) then +! out_val(i) = 0.d0 +! cycle +! endif +! p = min(i,k) +! r = max(i,k) +! p = p+shiftr(r*r-r,1) +! i1 = min(p,q) +! i2 = max(p,q) +! idx = i1+shiftr(i2*i2-i2,1) +! !DIR$ FORCEINLINE +! call map_get(map,idx,tmp) +! out_val(i) = dble(tmp) +! enddo +! +! endif +! +! endif + end subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) @@ -267,15 +344,6 @@ subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) cholesky_mo_transp(1,1,k), cholesky_mo_num, & cholesky_mo_transp(1,1,l), cholesky_mo_num, 0.d0, & out_array, sze) -! integer :: i -! do j=1,mo_num -! do i=1,mo_num -! double precision, external :: get_two_e_integral -! print *, i, j, real(out_array(i,j)), real(get_two_e_integral(i,j,k,l,map)) -! enddo -! enddo -! print *, irp_here -! pause else do j=1,sze call get_mo_two_e_integrals(j,k,l,sze,out_array(1,j),map) @@ -297,9 +365,20 @@ subroutine get_mo_two_e_integrals_i1j1(k,l,sze,out_array,map) integer :: j PROVIDE mo_two_e_integrals_in_map - do j=1,sze - call get_mo_two_e_integrals(k,j,l,sze,out_array(1,j),map) - enddo + if (do_mo_cholesky) then + + call dgemv('T', cholesky_mo_num, mo_num*mo_num, 1.d0, & + cholesky_mo_transp(1,1,1), cholesky_mo_num, & + cholesky_mo_transp(1,k,l), 1, 0.d0, & + out_array, 1) + + else + + do j=1,sze + call get_mo_two_e_integrals(k,j,l,sze,out_array(1,j),map) + enddo + + endif end @@ -320,14 +399,18 @@ subroutine get_mo_two_e_integrals_coulomb_ii(k,l,sze,out_val,map) PROVIDE mo_two_e_integrals_in_map if (do_mo_cholesky) then + call dgemv('T', cholesky_mo_num, mo_num, 1.d0, & cholesky_mo_transp(1,1,1), cholesky_mo_num*(mo_num+1), & cholesky_mo_transp(1,k,l), 1, 0.d0, & out_val, 1) + else + do i=1,sze out_val(i) = get_two_e_integral(k,i,l,i,map) enddo + endif end From a4516fb8f96acabf86e5effa497fd7b0035cc0cb Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jun 2024 12:12:14 +0200 Subject: [PATCH 078/131] Accelerated cache-map access --- src/mo_two_e_ints/map_integrals.irp.f | 188 ++++++++++---------------- 1 file changed, 74 insertions(+), 114 deletions(-) diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 9155cd8f..fb155073 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -200,128 +200,88 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) PROVIDE mo_two_e_integrals_in_map mo_integrals_cache - out_val(1:sze) = 0.d0 if (banned_excitation(j,l)) then - return + out_val(1:sze) = 0.d0 + return endif - ii0 = l-mo_integrals_cache_min - ii0 = ior(ii0, k-mo_integrals_cache_min) - ii0 = ior(ii0, j-mo_integrals_cache_min) + if (mo_integrals_cache_min > 1) then - integer :: ii0, ii0_8, ii_8 - ii0_8 = l-mo_integrals_cache_min - ii0_8 = ior( shiftl(ii0_8,mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii0_8 = ior( shiftl(ii0_8,mo_integrals_cache_shift), j-mo_integrals_cache_min) + if (do_mo_cholesky) then - q = min(j,l) - s = max(j,l) - q = q+shiftr(s*s-s,1) + call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val, 1) - do i=1,sze - if (banned_excitation(i,k)) cycle - ii = ior(ii0, i-mo_integrals_cache_min) - if (iand(ii, -mo_integrals_cache_size) == 0) then - ii_8 = ior( shiftl(ii0_8,mo_integrals_cache_shift), i-mo_integrals_cache_min) - out_val(i) = mo_integrals_cache(ii_8) else - p = min(i,k) - r = max(i,k) - p = p+shiftr(r*r-r,1) - i1 = min(p,q) - i2 = max(p,q) - idx = i1+shiftr(i2*i2-i2,1) - !DIR$ FORCEINLINE - call map_get(map,idx,tmp) - out_val(i) = dble(tmp) - endif - enddo -! if (banned_excitation(j,l)) then -! out_val(1:sze) = 0.d0 -! return -! endif -! -! if (mo_integrals_cache_min > 1) then -! -! if (do_mo_cholesky) then -! -! call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & -! cholesky_mo_transp(1,1,k), cholesky_mo_num, & -! cholesky_mo_transp(1,j,l), 1, 0.d0, & -! out_val, 1) -! -! else -! -! q = min(j,l) -! s = max(j,l) -! q = q+shiftr(s*s-s,1) -! -! do i=1,mo_integrals_cache_min-1 -! if (banned_excitation(i,k)) then -! out_val(i) = 0.d0 -! cycle -! endif -! p = min(i,k) -! r = max(i,k) -! p = p+shiftr(r*r-r,1) -! i1 = min(p,q) -! i2 = max(p,q) -! idx = i1+shiftr(i2*i2-i2,1) -! !DIR$ FORCEINLINE -! call map_get(map,idx,tmp) -! out_val(i) = dble(tmp) -! enddo -! -! endif -! -! endif -! -! -! ii = l-mo_integrals_cache_min -! ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) -! ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) -! ii = shiftl(ii, mo_integrals_cache_shift) -! do i=mo_integrals_cache_min, mo_integrals_cache_max -! ii = ii+1 -! out_val(i) = mo_integrals_cache(ii) -! enddo -! -! -! if (mo_integrals_cache_max < mo_num) then -! -! if (do_mo_cholesky) then -! -! call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & -! cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & -! cholesky_mo_transp(1,j,l), 1, 0.d0, & -! out_val(mo_integrals_cache_max+1), 1) -! -! else -! -! q = min(j,l) -! s = max(j,l) -! q = q+shiftr(s*s-s,1) -! -! do i=mo_integrals_cache_max+1,mo_num -! if (banned_excitation(i,k)) then -! out_val(i) = 0.d0 -! cycle -! endif -! p = min(i,k) -! r = max(i,k) -! p = p+shiftr(r*r-r,1) -! i1 = min(p,q) -! i2 = max(p,q) -! idx = i1+shiftr(i2*i2-i2,1) -! !DIR$ FORCEINLINE -! call map_get(map,idx,tmp) -! out_val(i) = dble(tmp) -! enddo -! -! endif -! -! endif + q = min(j,l) + s = max(j,l) + q = q+shiftr(s*s-s,1) + + do i=1,mo_integrals_cache_min-1 + if (banned_excitation(i,k)) then + out_val(i) = 0.d0 + cycle + endif + p = min(i,k) + r = max(i,k) + p = p+shiftr(r*r-r,1) + i1 = min(p,q) + i2 = max(p,q) + idx = i1+shiftr(i2*i2-i2,1) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + out_val(i) = dble(tmp) + enddo + + endif + + endif + + + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = shiftl(ii, mo_integrals_cache_shift) + out_val(mo_integrals_cache_min:mo_integrals_cache_max) = & + mo_integrals_cache(ii:ii+mo_integrals_cache_max-mo_integrals_cache_min) + + if (mo_integrals_cache_max < mo_num) then + + if (do_mo_cholesky) then + + call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & + cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val(mo_integrals_cache_max+1), 1) + + else + + q = min(j,l) + s = max(j,l) + q = q+shiftr(s*s-s,1) + + do i=mo_integrals_cache_max+1,mo_num + if (banned_excitation(i,k)) then + out_val(i) = 0.d0 + cycle + endif + p = min(i,k) + r = max(i,k) + p = p+shiftr(r*r-r,1) + i1 = min(p,q) + i2 = max(p,q) + idx = i1+shiftr(i2*i2-i2,1) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + out_val(i) = dble(tmp) + enddo + + endif + + endif end From 82654efdf9c9a19ac593b7bec54f16372fb03460 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jun 2024 13:06:32 +0200 Subject: [PATCH 079/131] Optimized get_integrals --- src/mo_two_e_ints/cholesky.irp.f | 1 + src/mo_two_e_ints/map_integrals.irp.f | 164 +++++++++++++++++--------- 2 files changed, 107 insertions(+), 58 deletions(-) diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 971ab38d..773d240a 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -4,6 +4,7 @@ BEGIN_PROVIDER [ logical, do_mo_cholesky ] ! If True, use Cholesky vectors for MO integrals END_DOC do_mo_cholesky = do_ao_cholesky + do_mo_cholesky = .False. END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_mo_num ] diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index fb155073..571de573 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -32,19 +32,27 @@ subroutine insert_into_mo_integrals_map(n_integrals, & call map_update(mo_integrals_map, buffer_i, buffer_values, n_integrals, thr) end - BEGIN_PROVIDER [ integer*4, mo_integrals_cache_min ] -&BEGIN_PROVIDER [ integer*4, mo_integrals_cache_max ] -&BEGIN_PROVIDER [ integer*4, mo_integrals_cache_shift] -&BEGIN_PROVIDER [ integer*4, mo_integrals_cache_size ] + BEGIN_PROVIDER [ integer, mo_integrals_cache_min ] +&BEGIN_PROVIDER [ integer, mo_integrals_cache_max ] +&BEGIN_PROVIDER [ integer, mo_integrals_cache_shift] +&BEGIN_PROVIDER [ integer, mo_integrals_cache_size ] implicit none BEGIN_DOC ! Min and max values of the MOs for which the integrals are in the cache END_DOC - mo_integrals_cache_shift = 7 ! 7 = log(128). Max 7 + if (qp_max_mem < 1) then + mo_integrals_cache_shift = 5 ! 5 = log(32). + else if (qp_max_mem < 2) then + mo_integrals_cache_shift = 6 ! 6 = log(64). + else + mo_integrals_cache_shift = 7 ! 7 = log(128). Max 7 + endif + mo_integrals_cache_size = 2**mo_integrals_cache_shift mo_integrals_cache_min = max(1,elec_alpha_num - (mo_integrals_cache_size/2 - 1) ) mo_integrals_cache_max = min(mo_num, mo_integrals_cache_min + mo_integrals_cache_size - 1) +print *, 'mo_integrals_cache: (', mo_integrals_cache_min, ', ', mo_integrals_cache_max, ')' END_PROVIDER @@ -136,7 +144,17 @@ double precision function get_two_e_integral(i,j,k,l,map) ii = ior(ii, j-mo_integrals_cache_min) ii = ior(ii, i-mo_integrals_cache_min) - if (iand(ii, -mo_integrals_cache_size) /= 0) then + if (iand(ii, -mo_integrals_cache_size) == 0) then + ! Integrals is in the cache + + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) + get_two_e_integral = mo_integrals_cache(ii) + + else + ! Integral is not in the cache if (do_mo_cholesky) then @@ -145,7 +163,6 @@ double precision function get_two_e_integral(i,j,k,l,map) get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1) else - ! Integrals is in the map !DIR$ FORCEINLINE call two_e_integrals_index(i,j,k,l,idx) @@ -154,15 +171,6 @@ double precision function get_two_e_integral(i,j,k,l,map) get_two_e_integral = dble(tmp) endif - else - ! Integrals is in the cache - - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) - get_two_e_integral = mo_integrals_cache(ii) - endif end @@ -200,62 +208,105 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) PROVIDE mo_two_e_integrals_in_map mo_integrals_cache + if (banned_excitation(j,l)) then out_val(1:sze) = 0.d0 return endif - if (mo_integrals_cache_min > 1) then + ii = l-mo_integrals_cache_min + ii = ior(ii, k-mo_integrals_cache_min) + ii = ior(ii, j-mo_integrals_cache_min) - if (do_mo_cholesky) then + if (iand(ii, -mo_integrals_cache_size) == 0) then + ! Some integrals are in the cache - call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & - cholesky_mo_transp(1,1,k), cholesky_mo_num, & - cholesky_mo_transp(1,j,l), 1, 0.d0, & - out_val, 1) + if (mo_integrals_cache_min > 1) then - else + if (do_mo_cholesky) then - q = min(j,l) - s = max(j,l) - q = q+shiftr(s*s-s,1) + call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val, 1) - do i=1,mo_integrals_cache_min-1 - if (banned_excitation(i,k)) then - out_val(i) = 0.d0 - cycle - endif - p = min(i,k) - r = max(i,k) - p = p+shiftr(r*r-r,1) - i1 = min(p,q) - i2 = max(p,q) - idx = i1+shiftr(i2*i2-i2,1) - !DIR$ FORCEINLINE - call map_get(map,idx,tmp) - out_val(i) = dble(tmp) - enddo + else + + q = min(j,l) + s = max(j,l) + q = q+shiftr(s*s-s,1) + + do i=1,mo_integrals_cache_min-1 + if (banned_excitation(i,k)) then + out_val(i) = 0.d0 + cycle + endif + p = min(i,k) + r = max(i,k) + p = p+shiftr(r*r-r,1) + i1 = min(p,q) + i2 = max(p,q) + idx = i1+shiftr(i2*i2-i2,1) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + out_val(i) = dble(tmp) + enddo + + endif endif - endif + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = shiftl(ii, mo_integrals_cache_shift) + out_val(mo_integrals_cache_min:mo_integrals_cache_max) = & + mo_integrals_cache(ii:ii+mo_integrals_cache_max-mo_integrals_cache_min) - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) - ii = shiftl(ii, mo_integrals_cache_shift) - out_val(mo_integrals_cache_min:mo_integrals_cache_max) = & - mo_integrals_cache(ii:ii+mo_integrals_cache_max-mo_integrals_cache_min) + if (mo_integrals_cache_max < mo_num) then - if (mo_integrals_cache_max < mo_num) then + if (do_mo_cholesky) then + + call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & + cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val(mo_integrals_cache_max+1), 1) + + else + + q = min(j,l) + s = max(j,l) + q = q+shiftr(s*s-s,1) + + do i=mo_integrals_cache_max+1,mo_num + if (banned_excitation(i,k)) then + out_val(i) = 0.d0 + cycle + endif + p = min(i,k) + r = max(i,k) + p = p+shiftr(r*r-r,1) + i1 = min(p,q) + i2 = max(p,q) + idx = i1+shiftr(i2*i2-i2,1) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + out_val(i) = dble(tmp) + enddo + + endif + + endif + + else if (do_mo_cholesky) then - call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & - cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & - cholesky_mo_transp(1,j,l), 1, 0.d0, & - out_val(mo_integrals_cache_max+1), 1) + call dgemv('T', cholesky_mo_num, mo_num, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val, 1) else @@ -263,11 +314,8 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) s = max(j,l) q = q+shiftr(s*s-s,1) - do i=mo_integrals_cache_max+1,mo_num - if (banned_excitation(i,k)) then - out_val(i) = 0.d0 - cycle - endif + do i=1,sze + if (banned_excitation(i,k)) cycle p = min(i,k) r = max(i,k) p = p+shiftr(r*r-r,1) From 90c3db31036b3aeffa3b94445960f4d092c6f929 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jun 2024 14:38:50 +0200 Subject: [PATCH 080/131] Accelerated cache --- src/mo_two_e_ints/cholesky.irp.f | 2 +- src/mo_two_e_ints/four_idx_novvvv.irp.f | 180 ------------------------ src/mo_two_e_ints/map_integrals.irp.f | 163 ++++++++++++++++----- src/mo_two_e_ints/mo_bi_integrals.irp.f | 4 - 4 files changed, 128 insertions(+), 221 deletions(-) delete mode 100644 src/mo_two_e_ints/four_idx_novvvv.irp.f diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 773d240a..7e2c8b37 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -4,7 +4,7 @@ BEGIN_PROVIDER [ logical, do_mo_cholesky ] ! If True, use Cholesky vectors for MO integrals END_DOC do_mo_cholesky = do_ao_cholesky - do_mo_cholesky = .False. +! do_mo_cholesky = .False. END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_mo_num ] diff --git a/src/mo_two_e_ints/four_idx_novvvv.irp.f b/src/mo_two_e_ints/four_idx_novvvv.irp.f deleted file mode 100644 index 80af35dc..00000000 --- a/src/mo_two_e_ints/four_idx_novvvv.irp.f +++ /dev/null @@ -1,180 +0,0 @@ -BEGIN_PROVIDER [ double precision, mo_coef_novirt, (ao_num,n_core_inact_act_orb) ] - implicit none - BEGIN_DOC - ! MO coefficients without virtual MOs - END_DOC - integer :: j,jj - - do j=1,n_core_inact_act_orb - jj = list_core_inact_act(j) - mo_coef_novirt(:,j) = mo_coef(:,jj) - enddo - -END_PROVIDER - -subroutine ao_to_mo_novirt(A_ao,LDA_ao,A_mo,LDA_mo) - implicit none - BEGIN_DOC - ! Transform A from the |AO| basis to the |MO| basis excluding virtuals - ! - ! $C^\dagger.A_{ao}.C$ - END_DOC - integer, intent(in) :: LDA_ao,LDA_mo - double precision, intent(in) :: A_ao(LDA_ao,ao_num) - double precision, intent(out) :: A_mo(LDA_mo,n_core_inact_act_orb) - double precision, allocatable :: T(:,:) - - allocate ( T(ao_num,n_core_inact_act_orb) ) - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T - - call dgemm('N','N', ao_num, n_core_inact_act_orb, ao_num, & - 1.d0, A_ao,LDA_ao, & - mo_coef_novirt, size(mo_coef_novirt,1), & - 0.d0, T, size(T,1)) - - call dgemm('T','N', n_core_inact_act_orb, n_core_inact_act_orb, ao_num,& - 1.d0, mo_coef_novirt,size(mo_coef_novirt,1), & - T, ao_num, & - 0.d0, A_mo, size(A_mo,1)) - - deallocate(T) -end - - -subroutine four_idx_novvvv - print*,'********' - print*,'********' - print*,'********' - print*,'WARNING :: Using four_idx_novvvv, and we are not sure that this routine is not bugged ...' - print*,'********' - print*,'********' - print*,'********' - use map_module - implicit none - BEGIN_DOC - ! Retransform MO integrals for next CAS-SCF step - END_DOC - print*,'Using partial transformation' - print*,'It will not transform all integrals with at least 3 indices within the virtuals' - integer :: i,j,k,l,n_integrals - double precision, allocatable :: f(:,:,:), f2(:,:,:), d(:,:), T(:,:,:,:), T2(:,:,:,:) - double precision, external :: get_ao_two_e_integral - integer(key_kind), allocatable :: idx(:) - real(integral_kind), allocatable :: values(:) - - integer :: p,q,r,s - double precision :: c - allocate( T(n_core_inact_act_orb,n_core_inact_act_orb,ao_num,ao_num) , & - T2(n_core_inact_act_orb,n_core_inact_act_orb,ao_num,ao_num) ) - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP SHARED(mo_num,ao_num,T,n_core_inact_act_orb, mo_coef_transp, & - !$OMP mo_integrals_threshold,mo_coef,mo_integrals_map, & - !$OMP list_core_inact_act,T2,ao_integrals_map) & - !$OMP PRIVATE(i,j,k,l,p,q,r,s,idx,values,n_integrals, & - !$OMP f,f2,d,c) - allocate(f(ao_num,ao_num,ao_num), f2(ao_num,ao_num,ao_num), d(mo_num,mo_num), & - idx(mo_num*mo_num), values(mo_num*mo_num) ) - - ! - !$OMP DO - do s=1,ao_num - do r=1,ao_num - do q=1,ao_num - do p=1,r - f (p,q,r) = get_ao_two_e_integral(p,q,r,s,ao_integrals_map) - f (r,q,p) = f(p,q,r) - enddo - enddo - enddo - do r=1,ao_num - do q=1,ao_num - do p=1,ao_num - f2(p,q,r) = f(p,r,q) - enddo - enddo - enddo - ! f (p,q,r) = - ! f2(p,q,r) = - - do r=1,ao_num - call ao_to_mo_novirt(f (1,1,r),size(f ,1),T (1,1,r,s),size(T,1)) - call ao_to_mo_novirt(f2(1,1,r),size(f2,1),T2(1,1,r,s),size(T,1)) - enddo - ! T (i,j,p,q) = - ! T2(i,j,p,q) = - - enddo - !$OMP END DO - - !$OMP DO - do j=1,n_core_inact_act_orb - do i=1,n_core_inact_act_orb - do s=1,ao_num - do r=1,ao_num - f (r,s,1) = T (i,j,r,s) - f2(r,s,1) = T2(i,j,r,s) - enddo - enddo - call ao_to_mo(f ,size(f ,1),d,size(d,1)) - n_integrals = 0 - do l=1,mo_num - do k=1,mo_num - n_integrals+=1 - call two_e_integrals_index(list_core_inact_act(i),list_core_inact_act(j),k,l,idx(n_integrals)) - values(n_integrals) = d(k,l) - enddo - enddo - call map_append(mo_integrals_map, idx, values, n_integrals) - - call ao_to_mo(f2,size(f2,1),d,size(d,1)) - n_integrals = 0 - do l=1,mo_num - do k=1,mo_num - n_integrals+=1 - call two_e_integrals_index(list_core_inact_act(i),k,list_core_inact_act(j),l,idx(n_integrals)) - values(n_integrals) = d(k,l) - enddo - enddo - call map_append(mo_integrals_map, idx, values, n_integrals) - enddo - enddo - !$OMP END DO - deallocate(f,f2,d,idx,values) - - !$OMP END PARALLEL - - deallocate(T,T2) - - - call map_sort(mo_integrals_map) - call map_unique(mo_integrals_map) - call map_shrink(mo_integrals_map,real(mo_integrals_threshold,integral_kind)) - -end - -subroutine four_idx_novvvv2 - use bitmasks - implicit none - integer :: i - integer(bit_kind) :: mask_ijkl(N_int,4) - - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = full_ijkl_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = full_ijkl_bitmask_4(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - -end diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 571de573..90257bbd 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -48,6 +48,8 @@ end mo_integrals_cache_shift = 7 ! 7 = log(128). Max 7 endif +!mo_integrals_cache_shift = 2 ! 5 = log(32). + mo_integrals_cache_size = 2**mo_integrals_cache_shift mo_integrals_cache_min = max(1,elec_alpha_num - (mo_integrals_cache_size/2 - 1) ) @@ -112,6 +114,24 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0:mo_integrals_cache_siz END_PROVIDER +double precision function get_two_e_integral_cache(i,j,k,l) + use map_module + implicit none + BEGIN_DOC + ! Returns one integral in the MO basis taken from the cache + END_DOC + integer, intent(in) :: i,j,k,l + integer :: ii + + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) + get_two_e_integral_cache = mo_integrals_cache(ii) + +end + + double precision function get_two_e_integral(i,j,k,l,map) use map_module implicit none @@ -123,7 +143,6 @@ double precision function get_two_e_integral(i,j,k,l,map) integer :: ii type(map_type), intent(inout) :: map real(integral_kind) :: tmp - integer :: kk PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky @@ -145,13 +164,9 @@ double precision function get_two_e_integral(i,j,k,l,map) ii = ior(ii, i-mo_integrals_cache_min) if (iand(ii, -mo_integrals_cache_size) == 0) then - ! Integrals is in the cache - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) - get_two_e_integral = mo_integrals_cache(ii) + double precision, external :: get_two_e_integral_cache + get_two_e_integral = get_two_e_integral_cache(i,j,k,l) else @@ -199,7 +214,6 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) double precision, intent(out) :: out_val(sze) type(map_type), intent(inout) :: map integer :: i - double precision, external :: get_two_e_integral integer :: ii real(integral_kind) :: tmp @@ -256,13 +270,7 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) endif - - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) - ii = shiftl(ii, mo_integrals_cache_shift) - out_val(mo_integrals_cache_min:mo_integrals_cache_max) = & - mo_integrals_cache(ii:ii+mo_integrals_cache_max-mo_integrals_cache_min) + call get_mo_two_e_integrals_cache(j,k,l,sze,out_val) if (mo_integrals_cache_max < mo_num) then @@ -333,6 +341,26 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) end +subroutine get_mo_two_e_integrals_cache(j,k,l,sze,out_val) + use map_module + implicit none + BEGIN_DOC + ! Returns multiple integrals in the MO basis, all + ! i for j,k,l fixed, all integrals from the cache + END_DOC + integer, intent(in) :: j,k,l, sze + double precision, intent(out) :: out_val(sze) + integer :: ii + + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = shiftl(ii, mo_integrals_cache_shift) + out_val(mo_integrals_cache_min:mo_integrals_cache_max) = & + mo_integrals_cache(ii:ii+mo_integrals_cache_max-mo_integrals_cache_min) + +end + subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) use map_module implicit none @@ -347,16 +375,32 @@ subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) integer :: j real(integral_kind), allocatable :: tmp_val(:) - if (do_mo_cholesky) then - call dgemm('T', 'N', mo_num, mo_num, cholesky_mo_num, 1.d0, & - cholesky_mo_transp(1,1,k), cholesky_mo_num, & - cholesky_mo_transp(1,1,l), cholesky_mo_num, 0.d0, & - out_array, sze) + if ( (mo_integrals_cache_min>1).or.(mo_integrals_cache_max1).or.(mo_integrals_cache_max1).or.(mo_integrals_cache_max1).or.(mo_integrals_cache_max Date: Wed, 12 Jun 2024 14:59:26 +0200 Subject: [PATCH 081/131] Introduce hij_cache in PT2 --- src/cipsi/selection.irp.f | 72 ++++++++++++++------------- src/mo_two_e_ints/map_integrals.irp.f | 16 +++++- 2 files changed, 52 insertions(+), 36 deletions(-) diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 0281a1d4..88a2cbdc 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -178,7 +178,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) logical, allocatable :: banned(:,:,:), bannedOrb(:,:) double precision, allocatable :: coef_fullminilist_rev(:,:) - double precision, allocatable :: mat(:,:,:) + double precision, allocatable :: mat(:,:,:), hij_cache(:,:,:) PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique @@ -205,7 +205,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc ! Removed to avoid introducing determinants already presents in the wf !double precision, parameter :: norm_thr = 1.d-16 - allocate (indices(N_det), & + allocate (indices(N_det), hij_cache(mo_num,mo_num,2), & exc_degree(max(N_det_alpha_unique,N_det_beta_unique))) ! Pre-compute excitation degrees wrt alpha determinants @@ -511,11 +511,15 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc maskInd = maskInd + 1 if(mod(maskInd, csubset) == (subset-1)) then + call get_mo_two_e_integrals_ij(h2,h1,mo_num,hij_cache(1,1,1),mo_integrals_map) + if (sp /= 3) then ! AA or BB + call get_mo_two_e_integrals_ij(h1,h2,mo_num,hij_cache(1,1,2),mo_integrals_map) + endif call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle - call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) + call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting, hij_cache) call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) end if @@ -531,7 +535,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc enddo enddo deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) - deallocate(banned, bannedOrb,mat) + deallocate(banned, bannedOrb, mat, hij_cache) end subroutine BEGIN_TEMPLATE @@ -914,7 +918,7 @@ single ; do p1=1,mo_num ; enddo ; p2=1 ; ; .False. ;; END_TEMPLATE -subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting) +subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting, hij_cache) use bitmasks implicit none BEGIN_DOC @@ -926,6 +930,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere integer, intent(in) :: sp, i_gen, N_sel integer, intent(in) :: interesting(0:N_sel) integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N_sel) + double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) logical, intent(inout) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num, 2) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) @@ -995,9 +1000,9 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere if(nt == 4) then call get_d2(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) else if(nt == 3) then - call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) !, hij_cache) else - call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)), hij_cache) end if else if(nt == 4) then call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) @@ -1190,6 +1195,8 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) integer, intent(in) :: h(0:2,2), p(0:4,2), sp +! double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) + double precision, external :: get_phase_bi, mo_two_e_integral logical :: ok @@ -1201,12 +1208,12 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) integer :: bant - double precision, allocatable :: hij_cache(:,:) + double precision, allocatable :: hij_cache1(:,:) double precision :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num) PROVIDE mo_integrals_map N_int allocate (lbanned(mo_num, 2)) - allocate (hij_cache(mo_num,2)) + allocate (hij_cache1(mo_num,2)) lbanned = bannedOrb do i=1, p(0,1) @@ -1230,13 +1237,13 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(1,ma) p2 = p(2,ma) if(.not. bannedOrb(puti, mi)) then - call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache1(1,2),mo_integrals_map) tmp_row = 0d0 do putj=1, hfix-1 if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle - hij = hij_cache(putj,1) - hij_cache(putj,2) + hij = hij_cache1(putj,1) - hij_cache1(putj,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1248,7 +1255,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=hfix+1, mo_num if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle - hij = hij_cache(putj,2) - hij_cache(putj,1) + hij = hij_cache1(putj,2) - hij_cache1(putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1274,15 +1281,15 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) pfix = p(1,mi) tmp_row = 0d0 tmp_row2 = 0d0 - call get_mo_two_e_integrals(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,pfix,p1,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,pfix,p2,mo_num,hij_cache1(1,2),mo_integrals_map) putj = p1 do puti=1,mo_num !HOT if(lbanned(puti,mi)) cycle !p1 fixed putj = p1 if(.not. banned(putj,puti,bant)) then - hij = hij_cache(puti,2) + hij = hij_cache1(puti,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1296,7 +1303,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) putj = p2 ! do puti=1,mo_num !HOT if(.not. banned(putj,puti,bant)) then - hij = hij_cache(puti,1) + hij = hij_cache1(puti,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) do k=1,N_states @@ -1327,13 +1334,13 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) puti = p(i, ma) p1 = p(turn3(1,i), ma) p2 = p(turn3(2,i), ma) - call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache1(1,2),mo_integrals_map) tmp_row = 0d0 do putj=1,hfix-1 if(banned(putj,puti,1)) cycle if(lbanned(putj,ma)) cycle - hij = hij_cache(putj,1) - hij_cache(putj,2) + hij = hij_cache1(putj,1) - hij_cache1(putj,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) @@ -1342,7 +1349,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=hfix+1,mo_num if(banned(putj,puti,1)) cycle if(lbanned(putj,ma)) cycle - hij = hij_cache(putj,2) - hij_cache(putj,1) + hij = hij_cache1(putj,2) - hij_cache1(putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) @@ -1364,14 +1371,14 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(2,ma) tmp_row = 0d0 tmp_row2 = 0d0 - call get_mo_two_e_integrals(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p1,pfix,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,pfix,mo_num,hij_cache1(1,2),mo_integrals_map) putj = p2 do puti=1,mo_num if(lbanned(puti,ma)) cycle putj = p2 if(.not. banned(puti,putj,1)) then - hij = hij_cache(puti,1) + hij = hij_cache1(puti,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1383,7 +1390,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) putj = p1 if(.not. banned(puti,putj,1)) then - hij = hij_cache(puti,2) + hij = hij_cache1(puti,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) do k=1,N_states @@ -1408,7 +1415,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) enddo end if end if - deallocate(lbanned,hij_cache) + deallocate(lbanned,hij_cache1) !! MONO if(sp == 3) then @@ -1439,7 +1446,7 @@ end -subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) +subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, hij_cache) use bitmasks implicit none @@ -1450,6 +1457,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) integer :: i, j, k, s, h1, h2, p1, p2, puti, putj double precision :: hij, phase @@ -1457,8 +1465,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) logical :: ok integer, parameter :: bant=1 - double precision, allocatable :: hij_cache1(:), hij_cache2(:) - allocate (hij_cache1(mo_num),hij_cache2(mo_num)) if(sp == 3) then ! AB @@ -1466,7 +1472,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) h2 = p(1,2) do p1=1, mo_num if(bannedOrb(p1, 1)) cycle - call get_mo_two_e_integrals(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map) do p2=1, mo_num if(bannedOrb(p2,2)) cycle if(banned(p1, p2, bant)) cycle ! rentable? @@ -1475,7 +1480,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) else phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) - hij = hij_cache1(p2) * phase + hij = hij_cache(p2,p1,1) * phase end if if (hij == 0.d0) cycle !DIR$ LOOP COUNT AVG(4) @@ -1490,8 +1495,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(2,sp) do puti=1, mo_num if (bannedOrb(puti, sp)) cycle - call get_mo_two_e_integrals(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map) - call get_mo_two_e_integrals(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map) do putj=puti+1, mo_num if(bannedOrb(putj, sp)) cycle if(banned(puti, putj, bant)) cycle ! rentable? @@ -1500,7 +1503,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) if (hij == 0.d0) cycle else - hij = hij_cache1(putj) - hij_cache2(putj) + hij = hij_cache(putj,puti,1) - hij_cache(putj,puti,2) if (hij == 0.d0) cycle hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) end if @@ -1512,7 +1515,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end do end if - deallocate(hij_cache1,hij_cache2) end diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 90257bbd..516851b9 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -48,7 +48,7 @@ end mo_integrals_cache_shift = 7 ! 7 = log(128). Max 7 endif -!mo_integrals_cache_shift = 2 ! 5 = log(32). +mo_integrals_cache_shift = 2 ! 5 = log(32). mo_integrals_cache_size = 2**mo_integrals_cache_shift @@ -176,6 +176,8 @@ double precision function get_two_e_integral(i,j,k,l,map) double precision, external :: ddot get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1) +! double precision, external :: get_from_mo_cholesky_cache +! get_two_e_integral = get_from_mo_cholesky_cache(i,j,k,l,.False.) else @@ -227,6 +229,11 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) out_val(1:sze) = 0.d0 return endif +! +! if (do_mo_cholesky) then +! call get_from_mo_cholesky_caches(j,k,l,out_val) +! return +! endif ii = l-mo_integrals_cache_min ii = ior(ii, k-mo_integrals_cache_min) @@ -239,6 +246,7 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) if (do_mo_cholesky) then + !TODO: here call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & cholesky_mo_transp(1,1,k), cholesky_mo_num, & cholesky_mo_transp(1,j,l), 1, 0.d0, & @@ -276,6 +284,7 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) if (do_mo_cholesky) then + !TODO: here call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & cholesky_mo_transp(1,j,l), 1, 0.d0, & @@ -311,6 +320,7 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) if (do_mo_cholesky) then + !TODO: here call dgemv('T', cholesky_mo_num, mo_num, 1.d0, & cholesky_mo_transp(1,1,k), cholesky_mo_num, & cholesky_mo_transp(1,j,l), 1, 0.d0, & @@ -425,6 +435,10 @@ subroutine get_mo_two_e_integrals_i1j1(k,l,sze,out_array,map) cholesky_mo_transp(1,1,1), cholesky_mo_num, & cholesky_mo_transp(1,k,l), 1, 0.d0, & out_array, 1) +! +! do j=1,sze +! call get_from_mo_cholesky_caches(k,j,l,out_array(1,j)) +! enddo else From acc0b97fbad1589c5453ec9e077668483b419759 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 13 Jun 2024 13:29:38 +0200 Subject: [PATCH 082/131] Inline past_d1 and past_d2 --- src/cipsi/selection.irp.f | 94 ++++++++++----------------- src/mo_two_e_ints/map_integrals.irp.f | 29 ++++----- 2 files changed, 50 insertions(+), 73 deletions(-) diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 88a2cbdc..517220a8 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -560,7 +560,7 @@ subroutine fill_buffer_$DOUBLE(i_generator, sp, h1, h2, bannedOrb, banned, fock_ double precision, external :: diag_H_mat_elem_fock double precision :: E_shift double precision :: s_weight(N_states,N_states) - PROVIDE dominant_dets_of_cfgs N_dominant_dets_of_cfgs + PROVIDE dominant_dets_of_cfgs N_dominant_dets_of_cfgs thresh_sym excitation_ref hf_bitmask elec_alpha_num do jstate=1,N_states do istate=1,N_states s_weight(istate,jstate) = dsqrt(selection_weight(istate)*selection_weight(jstate)) @@ -746,7 +746,7 @@ subroutine fill_buffer_$DOUBLE(i_generator, sp, h1, h2, bannedOrb, banned, fock_ do istate=1,N_states delta_E = E0(istate) - Hii + E_shift alpha_h_psi = mat(istate, p1, p2) - if (alpha_h_psi == 0.d0) cycle + if (dabs(alpha_h_psi) < mo_integrals_threshold) cycle val = alpha_h_psi + alpha_h_psi tmp = dsqrt(delta_E * delta_E + val * val) @@ -1000,18 +1000,36 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere if(nt == 4) then call get_d2(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) else if(nt == 3) then - call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) !, hij_cache) + call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)), hij_cache) else call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)), hij_cache) end if else if(nt == 4) then call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) - call past_d2(banned, p, sp) + if(sp == 3) then + do j=1,p(0,2) + do ii=1,p(0,1) + banned(p(ii,1), p(j,2),1) = .true. + end do + end do + else + do ii=1,p(0, sp) + do j=1,ii-1 + banned(p(j,sp), p(ii,sp),1) = .true. + banned(p(ii,sp), p(j,sp),1) = .true. + end do + end do + end if else if(nt == 3) then call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) - call past_d1(bannedOrb, p) + do ii = 1, p(0, 1) + bannedOrb(p(ii, 1), 1) = .true. + end do + do ii = 1, p(0, 2) + bannedOrb(p(ii, 2), 2) = .true. + end do end if end do @@ -1042,6 +1060,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) integer :: bant bant = 1 + PROVIDE mo_integrals_threshold tip = p(0,1) * p(0,2) ma = sp @@ -1067,7 +1086,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(i2, ma) hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2, p1, h1, h2) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) @@ -1097,7 +1116,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(turn2(i), 1) hij = mo_two_e_integral(p1, p2, h1, h2) - if (hij /= 0.d0) then + if (dabs(hij) > mo_integrals_threshold) then hij = hij * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states @@ -1125,7 +1144,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(i1, ma) p2 = p(i2, ma) hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1147,7 +1166,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(i, ma) hij = mo_two_e_integral(p1, p2, h1, h2) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) if (puti < putj) then @@ -1184,7 +1203,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end -subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) +subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, hij_cache) use bitmasks implicit none @@ -1195,7 +1214,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) integer, intent(in) :: h(0:2,2), p(0:4,2), sp -! double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) + double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) double precision, external :: get_phase_bi, mo_two_e_integral logical :: ok @@ -1237,13 +1256,11 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(1,ma) p2 = p(2,ma) if(.not. bannedOrb(puti, mi)) then - call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache1(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache1(1,2),mo_integrals_map) tmp_row = 0d0 do putj=1, hfix-1 if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle - hij = hij_cache1(putj,1) - hij_cache1(putj,2) + hij = hij_cache(hfix,putj,1) - hij_cache(putj,hfix,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1255,7 +1272,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=hfix+1, mo_num if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle - hij = hij_cache1(putj,2) - hij_cache1(putj,1) + hij = hij_cache(putj,hfix,1) - hij_cache(hfix,putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1466,6 +1483,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, integer, parameter :: bant=1 + PROVIDE mo_integrals_threshold if(sp == 3) then ! AB h1 = p(1,1) @@ -1482,7 +1500,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) hij = hij_cache(p2,p1,1) * phase end if - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle !DIR$ LOOP COUNT AVG(4) do k=1,N_states mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij ! HOTSPOT @@ -1501,10 +1519,10 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) call i_h_j(gen, det, N_int, hij) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle else hij = hij_cache(putj,puti,1) - hij_cache(putj,puti,2) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) end if !DIR$ LOOP COUNT AVG(4) @@ -1518,46 +1536,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, end -subroutine past_d1(bannedOrb, p) - use bitmasks - implicit none - - logical, intent(inout) :: bannedOrb(mo_num, 2) - integer, intent(in) :: p(0:4, 2) - integer :: i,s - - do s = 1, 2 - do i = 1, p(0, s) - bannedOrb(p(i, s), s) = .true. - end do - end do -end - - -subroutine past_d2(banned, p, sp) - use bitmasks - implicit none - - logical, intent(inout) :: banned(mo_num, mo_num) - integer, intent(in) :: p(0:4, 2), sp - integer :: i,j - - if(sp == 3) then - do j=1,p(0,2) - do i=1,p(0,1) - banned(p(i,1), p(j,2)) = .true. - end do - end do - else - do i=1,p(0, sp) - do j=1,i-1 - banned(p(j,sp), p(i,sp)) = .true. - banned(p(i,sp), p(j,sp)) = .true. - end do - end do - end if -end - subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) use bitmasks implicit none diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 516851b9..13fcc19a 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -192,19 +192,6 @@ double precision function get_two_e_integral(i,j,k,l,map) end -double precision function mo_two_e_integral(i,j,k,l) - implicit none - BEGIN_DOC - ! Returns one integral in the MO basis - END_DOC - integer, intent(in) :: i,j,k,l - double precision :: get_two_e_integral - PROVIDE mo_two_e_integrals_in_map mo_integrals_cache - !DIR$ FORCEINLINE - mo_two_e_integral = get_two_e_integral(i,j,k,l,mo_integrals_map) - return -end - subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) use map_module implicit none @@ -223,8 +210,6 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) integer(key_kind) :: p,q,r,s,i2 PROVIDE mo_two_e_integrals_in_map mo_integrals_cache - - if (banned_excitation(j,l)) then out_val(1:sze) = 0.d0 return @@ -351,6 +336,20 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) end +double precision function mo_two_e_integral(i,j,k,l) + implicit none + BEGIN_DOC + ! Returns one integral in the MO basis + END_DOC + integer, intent(in) :: i,j,k,l + double precision :: get_two_e_integral + PROVIDE mo_two_e_integrals_in_map mo_integrals_cache + !DIR$ FORCEINLINE + mo_two_e_integral = get_two_e_integral(i,j,k,l,mo_integrals_map) + return +end + + subroutine get_mo_two_e_integrals_cache(j,k,l,sze,out_val) use map_module implicit none From 70317b2a158d69404ac9047bde280f29fa8ec82f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 13 Jun 2024 14:54:32 +0200 Subject: [PATCH 083/131] Put mo_integrals_cache_shift in EZFIO --- src/mo_two_e_ints/EZFIO.cfg | 6 +++ src/mo_two_e_ints/map_integrals.irp.f | 59 +++++++++------------------ 2 files changed, 26 insertions(+), 39 deletions(-) diff --git a/src/mo_two_e_ints/EZFIO.cfg b/src/mo_two_e_ints/EZFIO.cfg index c967969f..da9d8fc9 100644 --- a/src/mo_two_e_ints/EZFIO.cfg +++ b/src/mo_two_e_ints/EZFIO.cfg @@ -10,6 +10,12 @@ doc: Read/Write |MO| integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None +[mo_integrals_cache_shift] +type: integer +doc: Adjusts the size of the MO integrals cache. 2: 2KB, 3: 32KB, 4: 512KB, 5: 8MB, 6: 128MB, 7: 2GB, 8: 32GB, 9: 512GB +interface: ezfio, provider, ocaml +default: 7 + [mo_integrals_threshold] type: Threshold doc: If | | < `mo_integrals_threshold` then is zero diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 13fcc19a..168c34b4 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -34,38 +34,28 @@ end BEGIN_PROVIDER [ integer, mo_integrals_cache_min ] &BEGIN_PROVIDER [ integer, mo_integrals_cache_max ] -&BEGIN_PROVIDER [ integer, mo_integrals_cache_shift] &BEGIN_PROVIDER [ integer, mo_integrals_cache_size ] implicit none BEGIN_DOC ! Min and max values of the MOs for which the integrals are in the cache END_DOC - if (qp_max_mem < 1) then - mo_integrals_cache_shift = 5 ! 5 = log(32). - else if (qp_max_mem < 2) then - mo_integrals_cache_shift = 6 ! 6 = log(64). - else - mo_integrals_cache_shift = 7 ! 7 = log(128). Max 7 - endif -mo_integrals_cache_shift = 2 ! 5 = log(32). - - mo_integrals_cache_size = 2**mo_integrals_cache_shift + mo_integrals_cache_size = 2_8**mo_integrals_cache_shift mo_integrals_cache_min = max(1,elec_alpha_num - (mo_integrals_cache_size/2 - 1) ) mo_integrals_cache_max = min(mo_num, mo_integrals_cache_min + mo_integrals_cache_size - 1) -print *, 'mo_integrals_cache: (', mo_integrals_cache_min, ', ', mo_integrals_cache_max, ')' + print *, 'MO integrals cache: (', mo_integrals_cache_min, ', ', mo_integrals_cache_max, ')' END_PROVIDER -BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0:mo_integrals_cache_size**4) ] +BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:(1_8*mo_integrals_cache_size)**4) ] implicit none BEGIN_DOC ! Cache of MO integrals for fast access END_DOC PROVIDE mo_two_e_integrals_in_map integer :: i,j,k,l - integer :: ii + integer*8 :: ii integer(key_kind) :: idx real(integral_kind) :: integral FREE ao_integrals_cache @@ -75,8 +65,8 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0:mo_integrals_cache_siz !$OMP PARALLEL DO PRIVATE (k,l,ii) do l=mo_integrals_cache_min,mo_integrals_cache_max do k=mo_integrals_cache_min,mo_integrals_cache_max - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) ii = shiftl(ii,mo_integrals_cache_shift) ii = shiftl(ii,mo_integrals_cache_shift) call dgemm('T','N', mo_integrals_cache_max-mo_integrals_cache_min+1, & @@ -99,10 +89,10 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0:mo_integrals_cache_siz call two_e_integrals_index(i,j,k,l,idx) !DIR$ FORCEINLINE call map_get(mo_integrals_map,idx,integral) - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(j-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(i-mo_integrals_cache_min,8)) mo_integrals_cache(ii) = integral enddo enddo @@ -121,12 +111,12 @@ double precision function get_two_e_integral_cache(i,j,k,l) ! Returns one integral in the MO basis taken from the cache END_DOC integer, intent(in) :: i,j,k,l - integer :: ii + integer*8 :: ii - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(j-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(i-mo_integrals_cache_min,8)) get_two_e_integral_cache = mo_integrals_cache(ii) end @@ -215,11 +205,6 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) return endif ! -! if (do_mo_cholesky) then -! call get_from_mo_cholesky_caches(j,k,l,out_val) -! return -! endif - ii = l-mo_integrals_cache_min ii = ior(ii, k-mo_integrals_cache_min) ii = ior(ii, j-mo_integrals_cache_min) @@ -359,14 +344,14 @@ subroutine get_mo_two_e_integrals_cache(j,k,l,sze,out_val) END_DOC integer, intent(in) :: j,k,l, sze double precision, intent(out) :: out_val(sze) - integer :: ii + integer*8 :: ii - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii, mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii, mo_integrals_cache_shift), int(j-mo_integrals_cache_min,8)) ii = shiftl(ii, mo_integrals_cache_shift) out_val(mo_integrals_cache_min:mo_integrals_cache_max) = & - mo_integrals_cache(ii:ii+mo_integrals_cache_max-mo_integrals_cache_min) + mo_integrals_cache(ii:ii+int(mo_integrals_cache_max-mo_integrals_cache_min,8)) end @@ -434,10 +419,6 @@ subroutine get_mo_two_e_integrals_i1j1(k,l,sze,out_array,map) cholesky_mo_transp(1,1,1), cholesky_mo_num, & cholesky_mo_transp(1,k,l), 1, 0.d0, & out_array, 1) -! -! do j=1,sze -! call get_from_mo_cholesky_caches(k,j,l,out_array(1,j)) -! enddo else From d89682cb7ee8beeb0e01d6129afd18d8cd9c78ee Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 13 Jun 2024 17:50:27 +0200 Subject: [PATCH 084/131] Improved disk access in Cholesky --- src/ao_two_e_ints/cholesky.irp.f | 98 +++++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 9 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 41cdb80d..05f7115d 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -41,7 +41,7 @@ END_PROVIDER integer*8, allocatable :: Lset(:), Dset(:), addr3(:) logical, allocatable :: computed(:) - integer :: i,j,k,m,p,q, dj, p2, q2 + integer :: i,j,k,m,p,q, dj, p2, q2, ii, jj integer*8 :: i8, j8, p8, qj8, rank_max, np8 integer :: N, np, nq @@ -65,6 +65,8 @@ END_PROVIDER type(c_ptr) :: c_pointer(2) integer :: fd(2) logical :: delta_on_disk + integer :: dgemm_block_size, nqq + double precision, allocatable :: dgemm_buffer1(:,:), dgemm_buffer2(:,:) PROVIDE nproc PROVIDE nucl_coord ao_two_e_integral_schwartz @@ -300,17 +302,58 @@ END_PROVIDER !$OMP BARRIER !$OMP END PARALLEL + PROVIDE nproc if (N>0) then - call dgemm('N','T', np, nq, N, -1.d0, & - Ltmp_p, np, Ltmp_q, nq, 0.d0, Delta, np) - else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) SCHEDULE(static,1) - do q=1,nq - do j=1,np - Delta(j,q) = 0.d0 + + if (delta_on_disk) then + ! Blocking improves I/O performance + + dgemm_block_size = nproc*4 + + allocate (dgemm_buffer1(np,dgemm_block_size)) + allocate (dgemm_buffer2(dgemm_block_size,N)) + + do jj=1,nq,dgemm_block_size + + nqq = min(nq, jj+dgemm_block_size-1) - jj + 1 + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,ii) + do ii=1,N + do q=jj,jj+nqq-1 + dgemm_buffer2(q-jj+1,ii) = Ltmp_q(q,ii) + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('N', 'T', np, nqq, N, 1.d0, & + Ltmp_p, np, dgemm_buffer2, dgemm_block_size, 0.d0, dgemm_buffer1, np) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) + do q=jj,jj+nqq-1 + Delta(:,q) = - dgemm_buffer1(:, q-jj+1) + enddo + !$OMP END PARALLEL DO + enddo + + deallocate(dgemm_buffer1, dgemm_buffer2) + + else + + call dgemm('N', 'T', np, nq, N, -1.d0, & + Ltmp_p(1,1), np, Ltmp_q(1,1), nq, 0.d0, Delta, np) + + endif + + + else + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) + do q=1,nq + Delta(:,q) = 0.d0 enddo !$OMP END PARALLEL DO + endif ! f. @@ -329,9 +372,46 @@ END_PROVIDER rank = N+j if (iblock == block_size) then - call dgemm('N','T',np,nq,block_size,-1.d0, & + + if (delta_on_disk) then + ! Blocking improves I/O performance + + dgemm_block_size = nproc*4 + allocate (dgemm_buffer1(np,dgemm_block_size)) + allocate (dgemm_buffer2(dgemm_block_size,block_size)) + + do jj=1,nq,dgemm_block_size + nqq = min(nq, jj+dgemm_block_size-1) - jj + 1 + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,ii) + do ii=1,block_size + do q=jj,jj+nqq-1 + dgemm_buffer2(q-jj+1,ii) = Ltmp_q(q,ii) + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('N', 'T', np, nqq, block_size, 1.d0, & + Ltmp_p(1,1), np, dgemm_buffer2, dgemm_block_size, 0.d0, dgemm_buffer1, np) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) + do q=jj,jj+nqq-1 + Delta(:,q) = Delta(:,q) - dgemm_buffer1(:, q-jj+1) + enddo + !$OMP END PARALLEL DO + + enddo + deallocate(dgemm_buffer1, dgemm_buffer2) + + else + + call dgemm('N','T',np,nq,block_size,-1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + + endif + iblock = 0 + endif ! ii. From e876f635d636b1cb878cdb9baf9e7b3906bf955f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 14 Jun 2024 16:26:23 +0200 Subject: [PATCH 085/131] Asyc Fortran I/O --- src/ao_two_e_ints/cholesky.irp.f | 161 ++++++++++++++++++++++--------- src/utils/fortran_mmap.c | 7 +- 2 files changed, 118 insertions(+), 50 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 05f7115d..d731ef04 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -31,12 +31,12 @@ END_PROVIDER integer*8 :: ndim8 integer :: rank double precision :: tau, tau2 - double precision, pointer :: L(:,:), Delta(:,:) + double precision, pointer :: L(:,:) double precision :: s double precision :: dscale, dscale_tmp - double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:) + double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:), Delta(:,:) integer, allocatable :: addr1(:), addr2(:) integer*8, allocatable :: Lset(:), Dset(:), addr3(:) logical, allocatable :: computed(:) @@ -66,7 +66,7 @@ END_PROVIDER integer :: fd(2) logical :: delta_on_disk integer :: dgemm_block_size, nqq - double precision, allocatable :: dgemm_buffer1(:,:), dgemm_buffer2(:,:) + double precision, allocatable :: dgemm_buffer1(:,:), dgemm_buffer2(:,:), dgemm_buffer3(:,:) PROVIDE nproc PROVIDE nucl_coord ao_two_e_integral_schwartz @@ -230,7 +230,7 @@ END_PROVIDER stop -1 endif - if (s > 0.1d0) then + if (s > 0.3d0) then exit endif @@ -245,13 +245,16 @@ END_PROVIDER + np*memory_of_double(nq) &! Delta(np,nq) + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) - if (mem > qp_max_mem) then - call mmap(trim(ezfio_work_dir)//'cholesky_delta', (/ np*1_8, nq*1_8 /), 8, fd(2), .False., .True., c_pointer(2)) - call c_f_pointer(c_pointer(2), Delta, (/ np, nq /)) - ! Deleting the file while it is open makes the file invisible on the filesystem, - ! and automatically deleted, even if the program crashes + if (1.1*mem > qp_max_mem) then +! call mmap(trim(ezfio_work_dir)//'cholesky_delta', (/ np*1_8, nq*1_8 /), 8, fd(2), .False., .True., c_pointer(2)) +! call c_f_pointer(c_pointer(2), Delta, (/ np, nq /)) + +! ! Deleting the file while it is open makes the file invisible on the filesystem, +! ! and automatically deleted, even if the program crashes iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_delta', 'R') close(iunit,status='delete') + open(unit=iunit, access='DIRECT', form='UNFORMATTED', RECL=(np+1)*8, & + ASYNCHRONOUS='YES', file=trim(ezfio_work_dir)//'cholesky_delta') delta_on_disk = .True. else allocate(Delta(np,nq)) @@ -303,15 +306,18 @@ END_PROVIDER !$OMP END PARALLEL PROVIDE nproc - if (N>0) then - if (delta_on_disk) then + if (delta_on_disk) then + + dgemm_block_size = nproc*4 + + allocate (dgemm_buffer1(np,dgemm_block_size)) + allocate (dgemm_buffer3(np,dgemm_block_size)) + allocate (dgemm_buffer2(dgemm_block_size,N)) + + if (N>0) then ! Blocking improves I/O performance - dgemm_block_size = nproc*4 - - allocate (dgemm_buffer1(np,dgemm_block_size)) - allocate (dgemm_buffer2(dgemm_block_size,N)) do jj=1,nq,dgemm_block_size @@ -325,34 +331,55 @@ END_PROVIDER enddo !$OMP END PARALLEL DO - call dgemm('N', 'T', np, nqq, N, 1.d0, & +print *, np, nq, jj, nqq, N + call dgemm('N', 'T', np, nqq, N, -1.d0, & Ltmp_p, np, dgemm_buffer2, dgemm_block_size, 0.d0, dgemm_buffer1, np) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) + wait(iunit) + dgemm_buffer3(:,:) = dgemm_buffer1(:,:) +! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) do q=jj,jj+nqq-1 - Delta(:,q) = - dgemm_buffer1(:, q-jj+1) + write(iunit, ASYNCHRONOUS='YES', rec=q) dgemm_buffer3(1:np, q-jj+1) enddo - !$OMP END PARALLEL DO +! !$OMP END PARALLEL DO enddo - - deallocate(dgemm_buffer1, dgemm_buffer2) +print *, 'ok1' else - call dgemm('N', 'T', np, nq, N, -1.d0, & - Ltmp_p(1,1), np, Ltmp_q(1,1), nq, 0.d0, Delta, np) + + dgemm_buffer1(1:np,1) = 0.d0 + +! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) + do q=1,nq + write(iunit, ASYNCHRONOUS='YES', rec=q) dgemm_buffer1(1:np, 1) + enddo +! !$OMP END PARALLEL DO endif + deallocate(dgemm_buffer1, dgemm_buffer2) + if (delta_on_disk) wait(iunit) + deallocate(dgemm_buffer3) + else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) - do q=1,nq - Delta(:,q) = 0.d0 - enddo - !$OMP END PARALLEL DO + if (N>0) then + + call dgemm('N', 'T', np, nq, N, -1.d0, & + Ltmp_p(1,1), np, Ltmp_q(1,1), nq, 0.d0, Delta, np) + + else + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) + do q=1,nq + Delta(:,q) = 0.d0 + enddo + !$OMP END PARALLEL DO + + endif endif @@ -383,25 +410,40 @@ END_PROVIDER do jj=1,nq,dgemm_block_size nqq = min(nq, jj+dgemm_block_size-1) - jj + 1 - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,ii) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(q,ii) + !$OMP DO do ii=1,block_size do q=jj,jj+nqq-1 dgemm_buffer2(q-jj+1,ii) = Ltmp_q(q,ii) enddo enddo - !$OMP END PARALLEL DO + !$OMP END DO + !$OMP END PARALLEL - call dgemm('N', 'T', np, nqq, block_size, 1.d0, & - Ltmp_p(1,1), np, dgemm_buffer2, dgemm_block_size, 0.d0, dgemm_buffer1, np) - - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) +! !$OMP DO do q=jj,jj+nqq-1 - Delta(:,q) = Delta(:,q) - dgemm_buffer1(:, q-jj+1) + read(iunit, rec=q) dgemm_buffer1(1:np, q-jj+1) enddo - !$OMP END PARALLEL DO +! !$OMP END DO + +print *, np, nq, jj, nqq, block_size + call dgemm('N', 'T', np, nqq, block_size, -1.d0, & + Ltmp_p(1,1), np, dgemm_buffer2, dgemm_block_size, 1.d0, dgemm_buffer1, np) + + wait(iunit) + dgemm_buffer3 = dgemm_buffer1 + +! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) + do q=jj,jj+nqq-1 + write(iunit, ASYNCHRONOUS='YES', rec=q) dgemm_buffer3(:, q-jj+1) + enddo +! !$OMP END PARALLEL DO enddo +print *, 'ok' deallocate(dgemm_buffer1, dgemm_buffer2) + wait(iunit) + deallocate(dgemm_buffer3) else @@ -427,11 +469,20 @@ END_PROVIDER enddo iblock = iblock+1 - !$OMP PARALLEL DO PRIVATE(p) - do p=1,np - Ltmp_p(p,iblock) = Delta(p,dj) - enddo - !$OMP END PARALLEL DO + + if (delta_on_disk) then + + read(iunit,rec=dj) Ltmp_p(1:np,iblock) + + else + + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Delta(p,dj) + enddo + !$OMP END PARALLEL DO + + endif if (.not.computed(dj)) then m = dj @@ -463,12 +514,26 @@ END_PROVIDER !$OMP END PARALLEL DO endif - !$OMP PARALLEL DO PRIVATE(p) - do p=1,np - Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) - Delta(p,dj) = Ltmp_p(p,iblock) - enddo - !$OMP END PARALLEL DO + if (delta_on_disk) then + + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) + enddo + !$OMP END PARALLEL DO + + write(iunit, ASYNCHRONOUS='YES', rec=dj) Ltmp_p(1:np,iblock) + + else + + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) + Delta(p,dj) = Ltmp_p(p,iblock) + enddo + !$OMP END PARALLEL DO + + endif computed(dj) = .True. endif @@ -512,7 +577,7 @@ END_PROVIDER deallocate(Ltmp_q) deallocate(computed) if (delta_on_disk) then - call munmap( (/ np*1_8, nq*1_8 /), 8, fd(2), c_pointer(2) ) + close(iunit, status='delete') else deallocate(Delta) endif diff --git a/src/utils/fortran_mmap.c b/src/utils/fortran_mmap.c index 711a9c34..2dbe42b8 100644 --- a/src/utils/fortran_mmap.c +++ b/src/utils/fortran_mmap.c @@ -40,7 +40,7 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only, exit(EXIT_FAILURE); } - result = write(fd, "", 1); + result = write(fd, " ", 1); if (result != 1) { close(fd); printf("%s:\n", filename); @@ -49,7 +49,10 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only, } if (single_node == 1) { - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE | MAP_NONBLOCK, fd, 0); + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE | MAP_NONBLOCK | MAP_NORESERVE, fd, 0); + if (map == MAP_FAILED) { + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + } } else { map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); } From f671c669f8cae460911c1e016e9e44297e817d79 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 17 Jun 2024 14:57:48 +0200 Subject: [PATCH 086/131] Use less memory in Cholesky --- src/ao_two_e_ints/cholesky.irp.f | 378 +++++++------------------- src/hartree_fock/fock_matrix_hf.irp.f | 21 +- 2 files changed, 119 insertions(+), 280 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index d731ef04..a680e7ee 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -31,14 +31,14 @@ END_PROVIDER integer*8 :: ndim8 integer :: rank double precision :: tau, tau2 - double precision, pointer :: L(:,:) + double precision, pointer :: L(:,:), Delta(:,:) double precision :: s double precision :: dscale, dscale_tmp - double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:), Delta(:,:) + double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:) integer, allocatable :: addr1(:), addr2(:) - integer*8, allocatable :: Lset(:), Dset(:), addr3(:) + integer*8, allocatable :: Lset(:), Dset(:) logical, allocatable :: computed(:) integer :: i,j,k,m,p,q, dj, p2, q2, ii, jj @@ -64,11 +64,8 @@ END_PROVIDER type(c_ptr) :: c_pointer(2) integer :: fd(2) - logical :: delta_on_disk - integer :: dgemm_block_size, nqq - double precision, allocatable :: dgemm_buffer1(:,:), dgemm_buffer2(:,:), dgemm_buffer3(:,:) - PROVIDE nproc + PROVIDE nproc ao_cholesky_threshold do_direct_integrals qp_max_mem PROVIDE nucl_coord ao_two_e_integral_schwartz call set_multiple_levels_omp(.False.) @@ -88,19 +85,8 @@ END_PROVIDER else - PROVIDE nucl_coord ao_two_e_integral_schwartz call set_multiple_levels_omp(.False.) - call resident_memory(mem0) - - rank_max = min(ndim8,(qp_max_mem*1024_8*1024_8*1024_8/8_8)/ndim8) - call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1)) - call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) - ! Deleting the file while it is open makes the file invisible on the filesystem, - ! and automatically deleted, even if the program crashes - iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao_tmp', 'R') - close(iunit,status='delete') - if (do_direct_integrals) then if (ao_two_e_integral(1,1,1,1) < huge(1.d0)) then ! Trigger providers inside ao_two_e_integral @@ -113,8 +99,12 @@ END_PROVIDER tau = ao_cholesky_threshold tau2 = tau*tau - mem = 6.d0 * memory_of_double8(ndim8) + 6.d0 * memory_of_int8(ndim8) - call check_mem(mem, irp_here) + rank = 0 + + allocate( D(ndim8), Lset(ndim8), Dset(ndim8), D_sorted(ndim8)) + allocate( addr1(ndim8), addr2(ndim8), Delta_col(ndim8) ) + + call resident_memory(mem0) call print_memory_usage() @@ -127,46 +117,35 @@ END_PROVIDER print *, '============ =============' - rank = 0 - - allocate( D(ndim8), Lset(ndim8), Dset(ndim8), D_sorted(ndim8)) - allocate( addr1(ndim8), addr2(ndim8), addr3(ndim8) ) -!print *, 'allocate : (D(ndim8))', memory_of_int8(ndim8) -!print *, 'allocate : (Lset(ndim8))', memory_of_int8(ndim8) -!print *, 'allocate : (Dset(ndim8))', memory_of_int8(ndim8) -!print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) - ! 1. - k=0 + i8=0 do j=1,ao_num do i=1,ao_num - k = k+1 - addr1(k) = i - addr2(k) = j - addr3(k) = (i-1)*ao_num + j + i8 = i8+1 + addr1(i8) = i + addr2(i8) = j enddo enddo if (do_direct_integrals) then - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,16) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,21) do i8=ndim8,1,-1 D(i8) = ao_two_e_integral(addr1(i8), addr2(i8), & addr1(i8), addr2(i8)) enddo !$OMP END PARALLEL DO else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,16) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,21) do i8=ndim8,1,-1 D(i8) = get_ao_two_e_integral(addr1(i8), addr1(i8), & - addr2(i8), addr2(i8), & - ao_integrals_map) + addr2(i8), addr2(i8), ao_integrals_map) enddo !$OMP END PARALLEL DO endif + D_sorted(:) = -D(:) call dsort_noidx_big(D_sorted,ndim8) - D_sorted(:) = dabs(D_sorted(:)) - + D_sorted(:) = -D_sorted(:) Dmax = D_sorted(1) ! 2. @@ -174,12 +153,24 @@ END_PROVIDER dscale_tmp = dscale*dscale*Dmax np8=0_8 do p8=1,ndim8 - if ( dscale_tmp*D(p8) > tau2 ) then + if ( dscale_tmp*D(p8) >= tau2 ) then np8 = np8+1_8 Lset(np8) = p8 endif enddo np = np8 + if (np <= 0) stop 'np<=0' + if (np > ndim8) stop 'np>ndim8' + + rank_max = min(np,20*elec_num*elec_num) + call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1)) + call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) + + ! Deleting the file while it is open makes the file invisible on the filesystem, + ! and automatically deleted, even if the program crashes + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao_tmp', 'R') + close(iunit,status='delete') + ! 3. N = 0 @@ -187,85 +178,59 @@ END_PROVIDER ! 4. i = 0 + mem = memory_of_double(np) & ! Delta(np,nq) + + (np+1)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) + +! call check_mem(mem) + ! 5. - do while ( (Dmax > tau).and.(rank*1_8 < min(ndim8,rank_max)) ) + do while ( (Dmax > tau).and.(np > 0) ) ! a. i = i+1 - ! Inrease s until the arrays fit in memory - s = 0.01d0 block_size = max(N,24) + + ! Determine nq so that Delta fits in memory + + s = 0.1d0 + Dmin = max(s*Dmax,tau) + do nq=2,np-1 + if (D_sorted(nq) < Dmin) exit + enddo + do while (.True.) - ! b. - Dmin = max(s*Dmax,tau) + mem = mem0 & + + np*memory_of_double(nq) & ! Delta(np,nq) + + (np+nq)*memory_of_double(block_size) & ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) + + memory_of_int(nq) ! computed(nq) - ! c. - nq=0 - do p=1,np - if ( D(Lset(p)) > Dmin ) then - nq = nq+1 - Dset(nq) = Lset(p) - endif - enddo - - - mem = mem0 & - + np*memory_of_double(nq) - -!print *, 'mem = ', mem - if (mem > qp_max_mem/2) then - s = s*2.d0 + if (mem > qp_max_mem*0.5d0) then + nq = nq/2 else exit endif - if ((s > 1.d0).or.(nq == 0)) then - call print_memory_usage() - print *, 'Required peak memory: ', mem, 'Gb' - call resident_memory(mem) - print *, 'Already used memory: ', mem, 'Gb' - print *, 'Not enough memory. Reduce cholesky threshold' - stop -1 - endif - - if (s > 0.3d0) then - exit - endif - enddo - ! d., e. - mem = mem0 & - + memory_of_int(nq) &! computed(nq) - + np*memory_of_int(nq) &! computed(nq) - + memory_of_double(np) &! Delta_col(np) - + 7*memory_of_double8(ndim8) &! D, Lset, Dset, D_sorted, addr[1-3] - + np*memory_of_double(nq) &! Delta(np,nq) - + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) - - if (1.1*mem > qp_max_mem) then -! call mmap(trim(ezfio_work_dir)//'cholesky_delta', (/ np*1_8, nq*1_8 /), 8, fd(2), .False., .True., c_pointer(2)) -! call c_f_pointer(c_pointer(2), Delta, (/ np, nq /)) - -! ! Deleting the file while it is open makes the file invisible on the filesystem, -! ! and automatically deleted, even if the program crashes - iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_delta', 'R') - close(iunit,status='delete') - open(unit=iunit, access='DIRECT', form='UNFORMATTED', RECL=(np+1)*8, & - ASYNCHRONOUS='YES', file=trim(ezfio_work_dir)//'cholesky_delta') - delta_on_disk = .True. - else - allocate(Delta(np,nq)) - delta_on_disk = .False. + if (nq <= 0) then + print *, nq + stop 'bug in cholesky: nq <= 0' endif -!print *, delta_on_disk - allocate(Delta_col(np)) + Dmin = D_sorted(nq) + nq=0 + do p=1,np + if ( D(Lset(p)) >= Dmin ) then + nq = nq+1 + Dset(nq) = Lset(p) + endif + enddo + + allocate(Delta(np,nq)) allocate(Ltmp_p(np,block_size), stat=ierr) -!print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double8(np*block_size*1_8), np, block_size if (ierr /= 0) then call print_memory_usage() @@ -274,7 +239,6 @@ END_PROVIDER endif allocate(Ltmp_q(nq,block_size), stat=ierr) -!print *, 'allocate : Ltmp_q(nq,block_size)', memory_of_double8(nq*block_size*1_8), nq, block_size if (ierr /= 0) then call print_memory_usage() @@ -287,7 +251,6 @@ END_PROVIDER computed(:) = .False. -!print *, 'N, rank, block_size', N, rank, block_size !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q) do k=1,N !$OMP DO @@ -305,81 +268,18 @@ END_PROVIDER !$OMP BARRIER !$OMP END PARALLEL - PROVIDE nproc - - if (delta_on_disk) then - - dgemm_block_size = nproc*4 - - allocate (dgemm_buffer1(np,dgemm_block_size)) - allocate (dgemm_buffer3(np,dgemm_block_size)) - allocate (dgemm_buffer2(dgemm_block_size,N)) - - if (N>0) then - ! Blocking improves I/O performance - - - do jj=1,nq,dgemm_block_size - - nqq = min(nq, jj+dgemm_block_size-1) - jj + 1 - - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,ii) - do ii=1,N - do q=jj,jj+nqq-1 - dgemm_buffer2(q-jj+1,ii) = Ltmp_q(q,ii) - enddo - enddo - !$OMP END PARALLEL DO - -print *, np, nq, jj, nqq, N - call dgemm('N', 'T', np, nqq, N, -1.d0, & - Ltmp_p, np, dgemm_buffer2, dgemm_block_size, 0.d0, dgemm_buffer1, np) - - wait(iunit) - dgemm_buffer3(:,:) = dgemm_buffer1(:,:) -! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) - do q=jj,jj+nqq-1 - write(iunit, ASYNCHRONOUS='YES', rec=q) dgemm_buffer3(1:np, q-jj+1) - enddo -! !$OMP END PARALLEL DO - - enddo -print *, 'ok1' - - else - - - dgemm_buffer1(1:np,1) = 0.d0 - -! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) - do q=1,nq - write(iunit, ASYNCHRONOUS='YES', rec=q) dgemm_buffer1(1:np, 1) - enddo -! !$OMP END PARALLEL DO - - endif - - deallocate(dgemm_buffer1, dgemm_buffer2) - if (delta_on_disk) wait(iunit) - deallocate(dgemm_buffer3) - - - else - - if (N>0) then + if (N>0) then call dgemm('N', 'T', np, nq, N, -1.d0, & Ltmp_p(1,1), np, Ltmp_q(1,1), nq, 0.d0, Delta, np) - else + else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) - do q=1,nq - Delta(:,q) = 0.d0 - enddo - !$OMP END PARALLEL DO - - endif + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) + do q=1,nq + Delta(:,q) = 0.d0 + enddo + !$OMP END PARALLEL DO endif @@ -395,64 +295,20 @@ print *, 'ok1' do j=1,nq if ( (Qmax <= Dmin).or.(N+j*1_8 > ndim8) ) exit + ! i. rank = N+j + if (rank == rank_max) then + print *, 'cholesky: rank_max reached' + exit + endif if (iblock == block_size) then - if (delta_on_disk) then - ! Blocking improves I/O performance - - dgemm_block_size = nproc*4 - allocate (dgemm_buffer1(np,dgemm_block_size)) - allocate (dgemm_buffer2(dgemm_block_size,block_size)) - - do jj=1,nq,dgemm_block_size - nqq = min(nq, jj+dgemm_block_size-1) - jj + 1 - - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(q,ii) - !$OMP DO - do ii=1,block_size - do q=jj,jj+nqq-1 - dgemm_buffer2(q-jj+1,ii) = Ltmp_q(q,ii) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - -! !$OMP DO - do q=jj,jj+nqq-1 - read(iunit, rec=q) dgemm_buffer1(1:np, q-jj+1) - enddo -! !$OMP END DO - -print *, np, nq, jj, nqq, block_size - call dgemm('N', 'T', np, nqq, block_size, -1.d0, & - Ltmp_p(1,1), np, dgemm_buffer2, dgemm_block_size, 1.d0, dgemm_buffer1, np) - - wait(iunit) - dgemm_buffer3 = dgemm_buffer1 - -! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) - do q=jj,jj+nqq-1 - write(iunit, ASYNCHRONOUS='YES', rec=q) dgemm_buffer3(:, q-jj+1) - enddo -! !$OMP END PARALLEL DO - - enddo -print *, 'ok' - deallocate(dgemm_buffer1, dgemm_buffer2) - wait(iunit) - deallocate(dgemm_buffer3) - - else - - call dgemm('N','T',np,nq,block_size,-1.d0, & + call dgemm('N','T',np,nq,block_size,-1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) - endif - - iblock = 0 + iblock = 0 endif @@ -469,71 +325,47 @@ print *, 'ok' enddo iblock = iblock+1 - - if (delta_on_disk) then - - read(iunit,rec=dj) Ltmp_p(1:np,iblock) - - else - - !$OMP PARALLEL DO PRIVATE(p) - do p=1,np - Ltmp_p(p,iblock) = Delta(p,dj) - enddo - !$OMP END PARALLEL DO - - endif + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Delta(p,dj) + enddo + !$OMP END PARALLEL DO if (.not.computed(dj)) then m = dj if (do_direct_integrals) then !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,21) do k=1,np + Delta_col(k) = 0.d0 if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& addr2(Lset(k)), addr2(Dset(m)) ) ) then Delta_col(k) = & ao_two_e_integral(addr1(Lset(k)), addr2(Lset(k)),& addr1(Dset(m)), addr2(Dset(m))) - else - Delta_col(k) = 0.d0 endif enddo !$OMP END PARALLEL DO else + PROVIDE ao_integrals_map !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,21) do k=1,np + Delta_col(k) = 0.d0 if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& addr2(Lset(k)), addr2(Dset(m)) ) ) then Delta_col(k) = & get_ao_two_e_integral( addr1(Lset(k)), addr1(Dset(m)),& addr2(Lset(k)), addr2(Dset(m)), ao_integrals_map) - else - Delta_col(k) = 0.d0 endif enddo !$OMP END PARALLEL DO endif - if (delta_on_disk) then - - !$OMP PARALLEL DO PRIVATE(p) - do p=1,np - Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) - enddo - !$OMP END PARALLEL DO - - write(iunit, ASYNCHRONOUS='YES', rec=dj) Ltmp_p(1:np,iblock) - - else - - !$OMP PARALLEL DO PRIVATE(p) - do p=1,np - Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) - Delta(p,dj) = Ltmp_p(p,iblock) - enddo - !$OMP END PARALLEL DO - - endif + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) + Delta(p,dj) = Ltmp_p(p,iblock) + enddo + !$OMP END PARALLEL DO computed(dj) = .True. endif @@ -572,30 +404,26 @@ print *, 'ok' print '(I10, 4X, ES12.3)', rank, Qmax - deallocate(Delta_col) deallocate(Ltmp_p) deallocate(Ltmp_q) deallocate(computed) - if (delta_on_disk) then - close(iunit, status='delete') - else - deallocate(Delta) - endif + deallocate(Delta) ! i. N = rank ! j. - Dmax = D(Lset(1)) - do p=1,np - Dmax = max(Dmax, D(Lset(p))) - enddo + D_sorted(:) = -D(:) + call dsort_noidx_big(D_sorted,ndim8) + D_sorted(:) = -D_sorted(:) + + Dmax = D_sorted(1) dscale = 1.d0 dscale_tmp = dscale*dscale*Dmax np8=0_8 do p8=1,ndim8 - if ( dscale_tmp*D(p8) > tau2 ) then + if ( dscale_tmp*D(p8) >= tau2 ) then np8 = np8+1_8 Lset(np8) = p8 endif @@ -609,7 +437,6 @@ print *, 'ok' print *, '' allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) -!print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', memory_of_double8(ao_num*ao_num*rank*1_8) if (ierr /= 0) then call print_memory_usage() @@ -621,7 +448,7 @@ print *, 'ok' !$OMP PARALLEL DO PRIVATE(k,j) do k=1,rank do j=1,ao_num - cholesky_ao(1:ao_num,j,k) = L((j-1)*ao_num+1:j*ao_num,k) + cholesky_ao(1:ao_num,j,k) = L((j-1_8)*ao_num+1_8:1_8*j*ao_num,k) enddo enddo !$OMP END PARALLEL DO @@ -646,5 +473,6 @@ print *, 'ok' call wall_time(wall1) print*,'Time to provide AO cholesky vectors = ',(wall1-wall0)/60.d0, ' min' + END_PROVIDER diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f index 65b3d63c..6d917322 100644 --- a/src/hartree_fock/fock_matrix_hf.irp.f +++ b/src/hartree_fock/fock_matrix_hf.irp.f @@ -194,17 +194,28 @@ END_PROVIDER endif - double precision :: rss + double precision :: rss, mem0, mem double precision :: memory_of_double integer :: iblock - integer, parameter :: block_size = 32 + integer :: block_size + + call resident_memory(mem0) + + block_size = 1024 + + rss = memory_of_double(2.d0*ao_num*ao_num) + do + mem = mem0 + block_size*rss + if ( (block_size < 2).or.(mem < qp_max_mem) ) exit + block_size = block_size/2 + enddo + + call check_mem(block_size*rss, irp_here) - rss = memory_of_double(ao_num*ao_num) - call check_mem(2.d0*block_size*rss, irp_here) allocate(X2(ao_num,ao_num,block_size,2)) allocate(X3(ao_num,block_size,ao_num,2)) - + ! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j) do iblock=1,cholesky_ao_num,block_size From 4b578d9849df7fa548a0b8627f714df6248a8440 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 20 Jun 2024 13:43:46 +0200 Subject: [PATCH 087/131] mmap is now shared in cholesky --- src/ao_two_e_ints/cholesky.irp.f | 36 ++++++++++++++++++-------------- src/utils/fortran_mmap.c | 3 +++ 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index a680e7ee..063cc365 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -31,12 +31,11 @@ END_PROVIDER integer*8 :: ndim8 integer :: rank double precision :: tau, tau2 - double precision, pointer :: L(:,:), Delta(:,:) + double precision, pointer :: L(:,:) double precision :: s - double precision :: dscale, dscale_tmp - double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:) + double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:), Delta(:,:) integer, allocatable :: addr1(:), addr2(:) integer*8, allocatable :: Lset(:), Dset(:) logical, allocatable :: computed(:) @@ -102,7 +101,7 @@ END_PROVIDER rank = 0 allocate( D(ndim8), Lset(ndim8), Dset(ndim8), D_sorted(ndim8)) - allocate( addr1(ndim8), addr2(ndim8), Delta_col(ndim8) ) + allocate( addr1(ndim8), addr2(ndim8), Delta_col(ndim8), computed(ndim8) ) call resident_memory(mem0) @@ -149,11 +148,9 @@ END_PROVIDER Dmax = D_sorted(1) ! 2. - dscale = 1.d0 - dscale_tmp = dscale*dscale*Dmax np8=0_8 do p8=1,ndim8 - if ( dscale_tmp*D(p8) >= tau2 ) then + if ( Dmax*D(p8) >= tau2 ) then np8 = np8+1_8 Lset(np8) = p8 endif @@ -203,16 +200,23 @@ END_PROVIDER mem = mem0 & + np*memory_of_double(nq) & ! Delta(np,nq) - + (np+nq)*memory_of_double(block_size) & ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) - + memory_of_int(nq) ! computed(nq) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) if (mem > qp_max_mem*0.5d0) then - nq = nq/2 + Dmin = D_sorted(nq/2) + do ii=nq/2,np-1 + if (D_sorted(ii) < Dmin) then + nq = ii + exit + endif + enddo else exit endif enddo +!call print_memory_usage +!print *, 'np, nq, Predicted memory: ', np, nq, mem if (nq <= 0) then print *, nq @@ -247,8 +251,7 @@ END_PROVIDER endif - allocate(computed(nq)) - computed(:) = .False. + computed(1:nq) = .False. !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q) @@ -406,7 +409,6 @@ END_PROVIDER deallocate(Ltmp_p) deallocate(Ltmp_q) - deallocate(computed) deallocate(Delta) ! i. @@ -419,11 +421,9 @@ END_PROVIDER Dmax = D_sorted(1) - dscale = 1.d0 - dscale_tmp = dscale*dscale*Dmax np8=0_8 do p8=1,ndim8 - if ( dscale_tmp*D(p8) >= tau2 ) then + if ( Dmax*D(p8) >= tau2 ) then np8 = np8+1_8 Lset(np8) = p8 endif @@ -436,6 +436,10 @@ END_PROVIDER print *, '============ =============' print *, '' + deallocate( D, Lset, Dset, D_sorted ) + deallocate( addr1, addr2, Delta_col, computed ) + + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) if (ierr /= 0) then diff --git a/src/utils/fortran_mmap.c b/src/utils/fortran_mmap.c index 2dbe42b8..0306f64f 100644 --- a/src/utils/fortran_mmap.c +++ b/src/utils/fortran_mmap.c @@ -49,10 +49,13 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only, } if (single_node == 1) { + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); +/* map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE | MAP_NONBLOCK | MAP_NORESERVE, fd, 0); if (map == MAP_FAILED) { map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); } +*/ } else { map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); } From b6b169c1cd5ad510364cf5b33800cad8a6b5272c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 20 Jun 2024 17:27:21 +0200 Subject: [PATCH 088/131] Updated documentation --- src/ao_two_e_ints/cholesky.irp.f | 3 +++ src/utils/linear_algebra.irp.f | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 41cdb80d..cdd64a8c 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -25,7 +25,10 @@ END_PROVIDER ! Last dimension of cholesky_ao is cholesky_ao_num ! ! https://mogp-emulator.readthedocs.io/en/latest/methods/proc/ProcPivotedCholesky.html + ! ! https://doi.org/10.1016/j.apnum.2011.10.001 : Page 4, Algorithm 1 + ! + ! https://www.diva-portal.org/smash/get/diva2:396223/FULLTEXT01.pdf END_DOC integer*8 :: ndim8 diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 20386b30..4e7ca87d 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1856,7 +1856,7 @@ subroutine pivoted_cholesky( A, rank, tol, ndim, U) ! ! matrix A is destroyed inside this subroutine ! Cholesky vectors are stored in U -! dimension of U: U(1:rank, 1:n) +! dimension of U: U(1:n, 1:rank) ! U is allocated inside this subroutine ! rank is the number of Cholesky vectors depending on tol ! From 7e45c517d981adcd77883a39a361be38b470ff20 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 25 Jun 2024 18:32:44 +0200 Subject: [PATCH 089/131] Introducing gpu_x86 --- src/gpu_x86/NEED | 1 + src/gpu_x86/README.rst | 5 + src/gpu_x86/gpu.c | 506 +++++++++++++++++++++++++++++++++++++ src/gpu_x86/gpu.h | 41 +++ src/gpu_x86/gpu_module.F90 | 141 +++++++++++ 5 files changed, 694 insertions(+) create mode 100644 src/gpu_x86/NEED create mode 100644 src/gpu_x86/README.rst create mode 100644 src/gpu_x86/gpu.c create mode 100644 src/gpu_x86/gpu.h create mode 100644 src/gpu_x86/gpu_module.F90 diff --git a/src/gpu_x86/NEED b/src/gpu_x86/NEED new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/src/gpu_x86/NEED @@ -0,0 +1 @@ + diff --git a/src/gpu_x86/README.rst b/src/gpu_x86/README.rst new file mode 100644 index 00000000..f530bf29 --- /dev/null +++ b/src/gpu_x86/README.rst @@ -0,0 +1,5 @@ +======= +gpu_x86 +======= + +x86 implementation of GPU routines. For use when GPUs are not available. diff --git a/src/gpu_x86/gpu.c b/src/gpu_x86/gpu.c new file mode 100644 index 00000000..71505dbe --- /dev/null +++ b/src/gpu_x86/gpu.c @@ -0,0 +1,506 @@ +#include +#include +#include +#include +#include + + +/* Generic functions */ + +int gpu_ndevices() { + return 1; +} + +void gpu_set_device(int32_t i) { + return; +} + + +/* Allocation functions */ + +void gpu_allocate(void** ptr, const int64_t n) { + *ptr = malloc((size_t) n); + if (*ptr == NULL) { + perror("Allocation failed"); + } +} + +void gpu_free(void** ptr) { + free(*ptr); + *ptr = NULL; +} + + +/* Memory transfer functions */ + +void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) { + memcpy(gpu_ptr, cpu_ptr, n); +} + +void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) { + memcpy(cpu_ptr, gpu_ptr, n); +} + +void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) { + memcpy(gpu_ptr_dest, gpu_ptr_src, n); +} + + +/* Streams */ + +void gpu_stream_create(void** ptr) { + *ptr = (void*) 2; +} + +void gpu_stream_destroy(void** ptr) { + *ptr = NULL; +} + +void gpu_set_stream(void* handle, void* stream) { + return; +} + +void gpu_synchronize() { + return; +} + + +/* BLAS functions */ + +void gpu_blas_create(void** handle) { + *handle = (void*) 1; +} + + +void gpu_blas_destroy(void** handle) { + *handle = NULL; +} + + +double ddot_(const int32_t* n, const double* x, const int32_t* incx, const double* y, const int32_t* incy); + +void gpu_ddot(const void* handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) { + assert (handle != NULL); + + /* Convert to int32_t */ + int32_t n_, incx_, incy_; + + n_ = (int32_t) n; + incx_ = (int32_t) incx; + incy_ = (int32_t) incy; + + /* Check for integer overflows */ + assert ( (int64_t) n_ == n ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + *result = ddot_(&n_, x, &incx_, y, &incy_); +} + + +float sdot_(const int32_t* n, const float* x, const int32_t* incx, const float* y, const int32_t* incy); + +void gpu_sdot(const void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) { + assert (handle != NULL); + + /* Convert to int32_t */ + int32_t n_, incx_, incy_; + + n_ = (int32_t) n; + incx_ = (int32_t) incx; + incy_ = (int32_t) incy; + + /* Check for integer overflows */ + assert ( (int64_t) n_ == n ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + *result = sdot_(&n_, x, &incx_, y, &incy_); +} + + +void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha, + const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy); + +void gpu_dgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const double alpha, + const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) { + + assert (handle != NULL); + + /* Convert to int32_t */ + int32_t m_, n_, lda_, incx_, incy_; + + m_ = (int32_t) m; + n_ = (int32_t) n; + lda_ = (int32_t) lda; + incx_ = (int32_t) incx; + incy_ = (int32_t) incy; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) lda_ == lda ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + dgemv_(&transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_); +} + + +void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha, + const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy); + +void gpu_sgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const float alpha, + const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) { + + assert (handle != NULL); + + /* Convert to int32_t */ + int32_t m_, n_, lda_, incx_, incy_; + + m_ = (int32_t) m; + n_ = (int32_t) n; + lda_ = (int32_t) lda; + incx_ = (int32_t) incx; + incy_ = (int32_t) incy; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) lda_ == lda ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + sgemv_(&transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_); +} + + +void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha, + const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc); + +void gpu_dgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, + const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) { + + assert (handle != NULL); + + /* Convert to int32_t */ + int32_t m_, n_, k_, lda_, ldb_, ldc_; + + m_ = (int32_t) m; + n_ = (int32_t) n; + k_ = (int32_t) k; + lda_ = (int32_t) lda; + ldb_ = (int32_t) ldb; + ldc_ = (int32_t) ldc; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) k_ == k ); + assert ( (int64_t) lda_ == lda); + assert ( (int64_t) ldb_ == ldb); + assert ( (int64_t) ldc_ == ldc); + + dgemm_(&transa, &transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_); +} + + + +void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha, + const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc); + +void gpu_sgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, + const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) { + + assert (handle != NULL); + + /* Convert to int32_t */ + int32_t m_, n_, k_, lda_, ldb_, ldc_; + + m_ = (int32_t) m; + n_ = (int32_t) n; + k_ = (int32_t) k; + lda_ = (int32_t) lda; + ldb_ = (int32_t) ldb; + ldc_ = (int32_t) ldc; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) k_ == k ); + assert ( (int64_t) lda_ == lda); + assert ( (int64_t) ldb_ == ldb); + assert ( (int64_t) ldc_ == ldc); + + sgemm_(&transa, &transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_); +} + + +void gpu_dgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha, + const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { + if (handle == NULL) { + perror("NULL handle"); + exit(-1); + } + + if ( (transa == 'N' && transb == 'N') || + (transa == 'n' && transb == 'N') || + (transa == 'N' && transb == 'n') || + (transa == 'n' && transb == 'n') ) { + + if (alpha == 0.) { + + for (int64_t j=0 ; j + +int gpu_ndevices(); +void gpu_set_device(int32_t i); + +void gpu_allocate(void** ptr, const int64_t n); +void gpu_free(void** ptr); + +void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n); +void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n); +void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n); + +void gpu_stream_create(void** ptr); +void gpu_stream_destroy(void** ptr); +void gpu_set_stream(void* handle, void* stream); +void gpu_synchronize(); + +void gpu_blas_create(void** handle); +void gpu_blas_destroy(void** handle); + +void gpu_ddot(const void* handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result); + +void gpu_sdot(const void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result); + +void gpu_dgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const double alpha, + const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy); + +void gpu_sgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const float alpha, + const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy); + +void gpu_dgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, + const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc); + +void gpu_sgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, + const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc); + +void gpu_dgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha, + const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc); + +void gpu_sgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha, + const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc); diff --git a/src/gpu_x86/gpu_module.F90 b/src/gpu_x86/gpu_module.F90 new file mode 100644 index 00000000..86ba3926 --- /dev/null +++ b/src/gpu_x86/gpu_module.F90 @@ -0,0 +1,141 @@ +module gpu + use, intrinsic :: iso_c_binding, only : c_int32_t, c_int64_t, c_double, c_size_t, c_char + implicit none + + interface + integer function gpu_ndevices() bind(C) + end function + + subroutine gpu_set_device(id) bind(C) + import + integer(c_int32_t), value :: id + end subroutine + + subroutine gpu_allocate_c(ptr, n) bind(C, name='gpu_allocate') + import + type(c_ptr) :: ptr + integer(c_int64_t), value :: n + end subroutine + + subroutine gpu_free_c(ptr) bind(C, name='gpu_free') + import + type(c_ptr) :: ptr + end subroutine + + subroutine gpu_upload_c(cpu_ptr, gpu_ptr, n) bind(C, name='gpu_upload') + import + type(c_ptr), value :: cpu_ptr + type(c_ptr), value :: gpu_ptr + integer(c_int64_t), value :: n + end subroutine + + subroutine gpu_download_c(gpu_ptr, cpu_ptr, n) bind(C, name='gpu_download') + import + type(c_ptr), value :: gpu_ptr + type(c_ptr), value :: cpu_ptr + integer(c_int64_t), value :: n + end subroutine + + subroutine gpu_copy_c(gpu_ptr_src, gpu_ptr_dest, n) bind(C, name='gpu_copy') + import + type(c_ptr), value :: gpu_ptr_src + type(c_ptr), value :: gpu_ptr_dest + integer(c_int64_t), value :: n + end subroutine + + subroutine gpu_stream_create(stream) bind(C) + import + type(c_ptr) :: stream + end subroutine + + subroutine gpu_stream_destroy(stream) bind(C) + import + type(c_ptr) :: stream + end subroutine + + subroutine gpu_set_stream(handle, stream) bind(C) + import + type(c_ptr) :: handle, stream + end subroutine + + subroutine gpu_synchronize() + end subroutine + + subroutine gpu_blas_create(handle) bind(C) + import + type(c_ptr) :: handle + end subroutine + + subroutine gpu_blas_destroy(handle) bind(C) + import + type(c_ptr) :: handle + end subroutine + + subroutine gpu_ddot(handle, n, dx, incx, dy, incy, res) bind(C) + import + type(c_ptr), intent(in) :: handle + integer(c_int64_t), value :: n, incx, incy + real(c_double), intent(in) :: dx(*), dy(*) + real(c_double), intent(out) :: res + end subroutine + + subroutine gpu_sdot(handle, n, dx, incx, dy, incy, res) bind(C) + import + type(c_ptr), intent(in) :: handle + integer(c_int64_t), value :: n, incx, incy + real(c_float), intent(in) :: dx(*), dy(*) + real(c_float), intent(out) :: res + end subroutine + + end interface + +end module + +subroutine gpu_allocate_double(ptr, s) + use gpu + implicit none + double precision, pointer, intent(inout) :: ptr + integer*8, intent(in) :: s(*) + type(c_ptr) :: cptr + + call gpu_allocate_c(cptr, sum(s)*8_8) + call c_f_pointer(cptr, ptr, s) +end subroutine + +subroutine gpu_free_double(ptr) + use gpu + implicit none + double precision, pointer, intent(inout) :: ptr + type(c_ptr) :: cptr + cptr = cloc(ptr) + call gpu_free(cptr) + NULLIFY(ptr) +end subroutine + +subroutine gpu_upload_double(cpu_ptr, gpu_ptr, n) + use gpu + implicit none + double precision, intent(in) :: cpu_ptr(*) + double precision, intent(out) :: gpu_ptr(*) + integer(c_int64_t), intent(in) :: n + call gpu_upload_c(cpu_ptr, gpu_ptr, 8_8*n) +end subroutine + +subroutine gpu_download_double(gpu_ptr, cpu_ptr, n) + use gpu + implicit none + double precision, intent(in) :: gpu_ptr(*) + double precision, intent(out) :: cpu_ptr(*) + integer(c_int64_t), intent(in) :: n + call gpu_download_c(gpu_ptr, cpu_ptr, 8_8*n) +end subroutine + +subroutine gpu_copy_double(gpu_ptr_src, gpu_ptr_dest, n) + use gpu + implicit none + double precision, intent(in) :: gpu_ptr_src(*) + double precision, intent(out) :: gpu_ptr_dest(*) + integer(c_int64_t), intent(in) :: n + call gpu_copy_c(gpu_ptr_src, gpu_ptr_dest, 8_8*n) +end subroutine + From 646607ada4a0a58c9cd5e0593c04bce7bc9bd02e Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 26 Jun 2024 11:15:30 +0200 Subject: [PATCH 090/131] 1st commit --- plugins/local/tc_int/NEED | 5 + plugins/local/tc_int/README.rst | 4 + plugins/local/tc_int/int2_grad1_u12.irp.f | 134 ++++++++++++++++++++++ plugins/local/tc_int/jast_grad_2e.irp.f | 102 ++++++++++++++++ plugins/local/tc_int/jast_grad_full.irp.f | 51 ++++++++ plugins/local/tc_int/jast_utils_bh.irp.f | 35 ++++++ plugins/local/tc_int/write_tc_int.irp.f | 58 ++++++++++ 7 files changed, 389 insertions(+) create mode 100644 plugins/local/tc_int/NEED create mode 100644 plugins/local/tc_int/README.rst create mode 100644 plugins/local/tc_int/int2_grad1_u12.irp.f create mode 100644 plugins/local/tc_int/jast_grad_2e.irp.f create mode 100644 plugins/local/tc_int/jast_grad_full.irp.f create mode 100644 plugins/local/tc_int/jast_utils_bh.irp.f create mode 100644 plugins/local/tc_int/write_tc_int.irp.f diff --git a/plugins/local/tc_int/NEED b/plugins/local/tc_int/NEED new file mode 100644 index 00000000..8a4caf5b --- /dev/null +++ b/plugins/local/tc_int/NEED @@ -0,0 +1,5 @@ +tc_keywords +jastrow +qmckl +becke_numerical_grid +dft_utils_in_r diff --git a/plugins/local/tc_int/README.rst b/plugins/local/tc_int/README.rst new file mode 100644 index 00000000..bc9e8483 --- /dev/null +++ b/plugins/local/tc_int/README.rst @@ -0,0 +1,4 @@ +====== +tc_int +====== + diff --git a/plugins/local/tc_int/int2_grad1_u12.irp.f b/plugins/local/tc_int/int2_grad1_u12.irp.f new file mode 100644 index 00000000..0cf0d775 --- /dev/null +++ b/plugins/local/tc_int/int2_grad1_u12.irp.f @@ -0,0 +1,134 @@ + +! --- + +subroutine provide_int2_grad1_u12_ao() + + implicit none + integer :: ipoint, i, j, m, jpoint + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision :: time0, time1 + double precision :: mem, n_double + double precision, allocatable :: tmp(:,:,:) + double precision, allocatable :: tmp_grad1_u12(:,:,:) + double precision, allocatable :: int2_grad1_u12_ao(:,:,:,:) + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + + print*, ' start provide_int2_grad1_u12_ao ...' + call wall_time(time0) + + + ! int2_grad1_u12_ao(i,j,ipoint,1) = \int dr2 [\grad1 u(r1,r2)]_x1 \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,2) = \int dr2 [\grad1 u(r1,r2)]_y1 \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,3) = \int dr2 [\grad1 u(r1,r2)]_z1 \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,4) = -(1/2) \int dr2 [\grad1 u(r1,r2)]^2 \chi_i(r2) \chi_j(r2) + allocate(int2_grad1_u12_ao(ao_num,ao_num,n_points_final_grid,4)) + + + + call total_memory(mem) + mem = max(1.d0, qp_max_mem - mem) + n_double = mem * 1.d8 + n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid)) + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + + call write_int(6, n_pass, 'Number of passes') + call write_int(6, n_blocks, 'Size of the blocks') + call write_int(6, n_rest, 'Size of the last block') + + + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,4)) + + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_blocks, ipoint) & + !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, final_grid_points, tmp_grad1_u12) + !$OMP DO + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1), tmp_grad1_u12(1,i_blocks,2), tmp_grad1_u12(1,i_blocks,3), tmp_grad1_u12(1,i_blocks,4)) + enddo + !$OMP END DO + !$OMP END PARALLEL + + do m = 1, 4 + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + enddo + + deallocate(tmp_grad1_u12) + + + if(n_rest .gt. 0) then + + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,4)) + + ii = n_pass*n_blocks + 1 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_rest, ipoint) & + !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, final_grid_points, tmp_grad1_u12) + !$OMP DO + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1), tmp_grad1_u12(1,i_rest,2), tmp_grad1_u12(1,i_rest,3), tmp_grad1_u12(1,i_rest,4)) + enddo + !$OMP END DO + !$OMP END PARALLEL + + do m = 1, 4 + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + + deallocate(tmp_grad1_u12) + endif + + deallocate(tmp) + + + ! --- + + print*, ' Writing int2_grad1_u12_ao in ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao' + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao(:,:,:,1:3) + close(11) + + deallocate(int2_grad1_u12_ao) + + call wall_time(time1) + print*, ' wall time for provide_int2_grad1_u12_ao (min) = ', (time1-time0) / 60.d0 + call print_memory_usage() + +end + +! --- + + diff --git a/plugins/local/tc_int/jast_grad_2e.irp.f b/plugins/local/tc_int/jast_grad_2e.irp.f new file mode 100644 index 00000000..b18b9d62 --- /dev/null +++ b/plugins/local/tc_int/jast_grad_2e.irp.f @@ -0,0 +1,102 @@ + +! --- + +subroutine get_grad1_u12_r1_2e(r1, n_grid2, gradx, grady, gradz) + + BEGIN_DOC + ! + ! d/dx1 j_2e(1,2) + ! d/dy1 j_2e(1,2) + ! d/dz1 j_2e(1,2) + ! + END_DOC + + include 'constants.include.F' + + implicit none + integer , intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: gradx(n_grid2) + double precision, intent(out) :: grady(n_grid2) + double precision, intent(out) :: gradz(n_grid2) + + integer :: jpoint + integer :: i_nucl, p, mpA, npA, opA + integer :: powmax1, powmax, powmax2 + double precision :: r2(3) + double precision :: tmp, tmp1, tmp2 + double precision :: rn(3), f1A, grad1_f1A(3), f2A, grad2_f2A(3), g12, grad1_g12(3) + double precision, allocatable :: f1A_power(:), f2A_power(:), double_p(:), g12_power(:) + + + powmax1 = max(maxval(jBH_m), maxval(jBH_n)) + powmax2 = maxval(jBH_o) + powmax = max(powmax1, powmax2) + + allocate(f1A_power(-1:powmax), f2A_power(-1:powmax), g12_power(-1:powmax), double_p(0:powmax)) + + do p = 0, powmax + double_p(p) = dble(p) + enddo + + f1A_power(-1) = 0.d0 + f2A_power(-1) = 0.d0 + g12_power(-1) = 0.d0 + + f1A_power(0) = 1.d0 + f2A_power(0) = 1.d0 + g12_power(0) = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 + do i_nucl = 1, nucl_num + + rn(1) = nucl_coord(i_nucl,1) + rn(2) = nucl_coord(i_nucl,2) + rn(3) = nucl_coord(i_nucl,3) + + call jBH_elem_fct_grad(jBH_en(i_nucl), r1, rn, f1A, grad1_f1A) + call jBH_elem_fct_grad(jBH_en(i_nucl), r2, rn, f2A, grad2_f2A) + call jBH_elem_fct_grad(jBH_ee(i_nucl), r1, r2, g12, grad1_g12) + + ! Compute powers of f1A and f2A + do p = 1, powmax1 + f1A_power(p) = f1A_power(p-1) * f1A + f2A_power(p) = f2A_power(p-1) * f2A + enddo + do p = 1, powmax2 + g12_power(p) = g12_power(p-1) * g12 + enddo + + do p = 1, jBH_size + mpA = jBH_m(p,i_nucl) + npA = jBH_n(p,i_nucl) + opA = jBH_o(p,i_nucl) + tmp = jBH_c(p,i_nucl) + if(mpA .eq. npA) then + tmp = tmp * 0.5d0 + endif + + tmp1 = double_p(mpA) * f1A_power(mpA-1) * f2A_power(npA) + double_p(npA) * f1A_power(npA-1) * f2A_power(mpA) + tmp1 = tmp1 * g12_power(opA) * tmp + tmp2 = double_p(opA) * g12_power(opA-1) * (f1A_power(mpA) * f2A_power(npA) + f1A_power(npA) * f2A_power(mpA)) * tmp + + gradx(jpoint) = gradx(jpoint) + tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1) + grady(jpoint) = grady(jpoint) + tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2) + gradz(jpoint) = gradz(jpoint) + tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3) + enddo ! p + enddo ! i_nucl + enddo ! jpoint + + return +end + +! --- + diff --git a/plugins/local/tc_int/jast_grad_full.irp.f b/plugins/local/tc_int/jast_grad_full.irp.f new file mode 100644 index 00000000..f63ee3e4 --- /dev/null +++ b/plugins/local/tc_int/jast_grad_full.irp.f @@ -0,0 +1,51 @@ + +! --- + +subroutine get_grad1_u12_for_tc(ipoint, n_grid2, resx, resy, resz, res) + + BEGIN_DOC + ! + ! resx(ipoint) = [grad1 u(r1,r2)]_x1 + ! resy(ipoint) = [grad1 u(r1,r2)]_y1 + ! resz(ipoint) = [grad1 u(r1,r2)]_z1 + ! res (ipoint) = -0.5 [grad1 u(r1,r2)]^2 + ! + ! We use: + ! grid for r1 + ! extra_grid for r2 + ! + END_DOC + + implicit none + integer, intent(in) :: ipoint, n_grid2 + double precision, intent(out) :: resx(n_grid2), resy(n_grid2), resz(n_grid2), res(n_grid2) + + integer :: jpoint + double precision :: env_r1, tmp + double precision :: grad1_env(3), r1(3) + double precision, allocatable :: env_r2(:) + double precision, allocatable :: u2b_r12(:), gradx1_u2b(:), grady1_u2b(:), gradz1_u2b(:) + double precision, allocatable :: u2b_mu(:), gradx1_mu(:), grady1_mu(:), gradz1_mu(:) + double precision, allocatable :: u2b_nu(:), gradx1_nu(:), grady1_nu(:), gradz1_nu(:) + double precision, external :: env_nucl + + r1(1) = final_grid_points(1,ipoint) + r1(2) = final_grid_points(2,ipoint) + r1(3) = final_grid_points(3,ipoint) + + + ! j2e_type .eq. "Boys_Handy" + ! env_type .eq. "None" + ! j1e_type .eq "None" + + call get_grad1_u12_r1_2e(r1, n_grid2, resx(1), resy(1), resz(1)) + + do jpoint = 1, n_points_extra_final_grid + res(jpoint) = -0.5d0 * (resx(jpoint) * resx(jpoint) + resy(jpoint) * resy(jpoint) + resz(jpoint) * resz(jpoint)) + enddo + + return +end + +! --- + diff --git a/plugins/local/tc_int/jast_utils_bh.irp.f b/plugins/local/tc_int/jast_utils_bh.irp.f new file mode 100644 index 00000000..750ce90b --- /dev/null +++ b/plugins/local/tc_int/jast_utils_bh.irp.f @@ -0,0 +1,35 @@ + +! --- + +subroutine jBH_elem_fct_grad(alpha, r1, r2, fct, grad1_fct) + + implicit none + double precision, intent(in) :: alpha, r1(3), r2(3) + double precision, intent(out) :: fct, grad1_fct(3) + double precision :: dist, tmp1, tmp2 + + dist = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & + + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) + + + if(dist .ge. 1d-10) then + tmp1 = 1.d0 / (1.d0 + alpha * dist) + + fct = alpha * dist * tmp1 + tmp2 = alpha * tmp1 * tmp1 / dist + grad1_fct(1) = tmp2 * (r1(1) - r2(1)) + grad1_fct(2) = tmp2 * (r1(2) - r2(2)) + grad1_fct(3) = tmp2 * (r1(3) - r2(3)) + else + grad1_fct(1) = 0.d0 + grad1_fct(2) = 0.d0 + grad1_fct(3) = 0.d0 + fct = 0.d0 + endif + + return +end + +! --- + diff --git a/plugins/local/tc_int/write_tc_int.irp.f b/plugins/local/tc_int/write_tc_int.irp.f new file mode 100644 index 00000000..ebdce6f2 --- /dev/null +++ b/plugins/local/tc_int/write_tc_int.irp.f @@ -0,0 +1,58 @@ +! --- + +program write_tc_int + + implicit none + + print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + if(tc_integ_type .eq. "numeric") then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + endif + + call main() + +end + +! --- + +subroutine main() + + implicit none + + PROVIDE io_tc_integ + + print*, 'io_tc_integ = ', io_tc_integ + + if(io_tc_integ .ne. "Write") then + print*, 'io_tc_integ != Write' + print*, io_tc_integ + stop + endif + + call provide_int2_grad1_u12_ao() + + call ezfio_set_tc_keywords_io_tc_integ('Read') + +end + +! --- + From a2f4bc218d207d4c588b3d0d1c1d4c5f7448b334 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 26 Jun 2024 13:44:45 +0200 Subject: [PATCH 091/131] GPU acceleration selection in configure --- configure | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/configure b/configure index 41c0123d..014275eb 100755 --- a/configure +++ b/configure @@ -40,14 +40,16 @@ Usage: $(basename $0) -c $(basename $0) -h $(basename $0) -i + $(basename $0) -g [nvidia|none] Options: - -c Define a COMPILATION configuration file, - in "${QP_ROOT}/config/". - -h Print the HELP message - -i INSTALL . Use at your OWN RISK: - no support will be provided for the installation of - dependencies. + -c Define a COMPILATION configuration file, + in "${QP_ROOT}/config/". + -h Print the HELP message + -i INSTALL . Use at your OWN RISK: + no support will be provided for the installation of + dependencies. + -g [nvidia|none] Choose GPU acceleration (experimental) Example: ./$(basename $0) -c config/gfortran.cfg @@ -83,7 +85,7 @@ function execute () { PACKAGES="" -while getopts "d:c:i:h" c ; do +while getopts "d:c:i:g:h" c ; do case "$c" in c) case "$OPTARG" in @@ -100,6 +102,9 @@ while getopts "d:c:i:h" c ; do "") help ; break;; *) PACKAGES="${PACKAGE} $OPTARG" esac;; + g) + GPU=$OPTARG; + break;; h) help exit 0;; @@ -109,6 +114,23 @@ while getopts "d:c:i:h" c ; do esac done +# Handle GPU acceleration +rm -f ${QP_ROOT}/src/gpu +case "$GPU" in + amd) # Nvidia + echo "Activating AMD GPU acceleration" + ln -s ${QP_ROOT}/src/gpu_amd ${QP_ROOT}/src/gpu + ;; + nvidia) # Nvidia + echo "Activating Nvidia GPU acceleration" + ln -s ${QP_ROOT}/src/gpu_nvidia ${QP_ROOT}/src/gpu + ;; + *) # No Acceleration + echo "Disabling GPU acceleration" + ln -s ${QP_ROOT}/src/gpu_x86 ${QP_ROOT}/src/gpu + ;; +esac + # Trim leading and trailing spaces PACKAGES=$(echo $PACKAGES | xargs) From 1d0bac25d081d76177c6efeba88117f397c5de3c Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 26 Jun 2024 15:31:44 +0200 Subject: [PATCH 092/131] v0 --- plugins/local/tc_int/compute_tc_int.irp.f | 295 ++++++++++++++++++++++ plugins/local/tc_int/int2_grad1_u12.irp.f | 134 ---------- plugins/local/tc_int/jast_grad_2e.irp.f | 102 -------- plugins/local/tc_int/jast_grad_full.irp.f | 113 +++++++-- plugins/local/tc_int/write_tc_int.irp.f | 18 +- 5 files changed, 401 insertions(+), 261 deletions(-) create mode 100644 plugins/local/tc_int/compute_tc_int.irp.f delete mode 100644 plugins/local/tc_int/int2_grad1_u12.irp.f delete mode 100644 plugins/local/tc_int/jast_grad_2e.irp.f diff --git a/plugins/local/tc_int/compute_tc_int.irp.f b/plugins/local/tc_int/compute_tc_int.irp.f new file mode 100644 index 00000000..02f21570 --- /dev/null +++ b/plugins/local/tc_int/compute_tc_int.irp.f @@ -0,0 +1,295 @@ + +! --- + +subroutine provide_int2_grad1_u12_ao() + + BEGIN_DOC + ! + ! int2_grad1_u12_ao(i,j,ipoint,1) = \int dr2 [\grad1 u(r1,r2)]_x1 \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,2) = \int dr2 [\grad1 u(r1,r2)]_y1 \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,3) = \int dr2 [\grad1 u(r1,r2)]_z1 \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,4) = \int dr2 [-(1/2) [\grad1 u(r1,r2)]^2] \chi_i(r2) \chi_j(r2) + ! + ! + ! tc_int_2e_ao(k,i,l,j) = (ki|V^TC(r_12)|lj) + ! = where V^TC(r_12) is the total TC operator + ! = tc_grad_and_lapl_ao(k,i,l,j) + tc_grad_square_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) + ! where: + ! + ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij > + ! = -1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 (-1) \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! + ! tc_grad_square_ao(k,i,l,j) = -1/2 + ! + ! ao_two_e_coul(k,i,l,j) = < l k | 1/r12 | j i > = ( k i | 1/r12 | l j ) + ! + END_DOC + + implicit none + + integer :: i, j, k, l, m, ipoint, jpoint + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision :: mem, n_double + double precision :: weight1, ao_k_r, ao_i_r + double precision :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq + double precision :: time0, time1, time2, tc1, tc2, tc + double precision, allocatable :: int2_grad1_u12_ao(:,:,:,:), tc_int_2e_ao(:,:,:,:) + double precision, allocatable :: tmp(:,:,:), c_mat(:,:,:), tmp_grad1_u12(:,:,:) + + double precision, external :: get_ao_two_e_integral + + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + PROVIDE final_weight_at_r_vector aos_grad_in_r_array_transp_bis final_weight_at_r_vector aos_in_r_array_transp + + + + print*, ' start provide_int2_grad1_u12_ao ...' + call wall_time(time0) + + call total_memory(mem) + mem = max(1.d0, qp_max_mem - mem) + n_double = mem * 1.d8 + n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid)) + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + + call write_int(6, n_pass, 'Number of passes') + call write_int(6, n_blocks, 'Size of the blocks') + call write_int(6, n_rest, 'Size of the last block') + + ! --- + ! --- + ! --- + + allocate(int2_grad1_u12_ao(ao_num,ao_num,n_points_final_grid,4)) + + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,4)) + + tc = 0.d0 + + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + + call wall_time(tc1) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_blocks, ipoint) & + !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, final_grid_points, tmp_grad1_u12) + !$OMP DO + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1), tmp_grad1_u12(1,i_blocks,2), tmp_grad1_u12(1,i_blocks,3), tmp_grad1_u12(1,i_blocks,4)) + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(tc2) + tc = tc + tc2 - tc1 + + do m = 1, 4 + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + enddo + + deallocate(tmp_grad1_u12) + + + if(n_rest .gt. 0) then + + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,4)) + + ii = n_pass*n_blocks + 1 + + call wall_time(tc1) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_rest, ipoint) & + !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, final_grid_points, tmp_grad1_u12) + !$OMP DO + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1), tmp_grad1_u12(1,i_rest,2), tmp_grad1_u12(1,i_rest,3), tmp_grad1_u12(1,i_rest,4)) + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(tc2) + tc = tc + tc2 - tc1 + + do m = 1, 4 + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + + deallocate(tmp_grad1_u12) + endif + + deallocate(tmp) + + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao (min) = ', (time1-time0) / 60.d0 + print*, ' wall time Jastrow derivatives (min) = ', tc / 60.d0 + call print_memory_usage() + + ! --- + ! --- + ! --- + + + allocate(tc_int_2e_ao(ao_num,ao_num,ao_num,ao_num)) + + call wall_time(time1) + + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (aos_in_r_array_transp, c_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + c_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_grad1_u12_ao(1,1,1,4), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , 0.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num) + deallocate(c_mat) + + call wall_time(time2) + print*, ' wall time of Hermitian part of tc_int_2e_ao (min) ', (time2 - time1) / 60.d0 + call print_memory_usage() + + ! --- + + call wall_time(time1) + + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) + do m = 1, 3 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, c_mat, & + !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector, m) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + + c_mat(ipoint,k,i) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,m) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,m)) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 & + , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , 1.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num) + enddo + deallocate(c_mat) + + call wall_time(time2) + print*, ' wall time of non-Hermitian part of tc_int_2e_ao (min) ', (time2 - time1) / 60.d0 + call print_memory_usage() + + ! --- + + call wall_time(time1) + + call sum_A_At(tc_int_2e_ao(1,1,1,1), ao_num*ao_num) + + call wall_time(time2) + print*, ' lower- and upper-triangle of tc_int_2e_ao (min) ', (time2 - time1) / 60.d0 + call print_memory_usage() + + ! --- + + call wall_time(time1) + + PROVIDE ao_integrals_map + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(ao_num, tc_int_2e_ao, ao_integrals_map) & + !$OMP PRIVATE(i, j, k, l) + !$OMP DO COLLAPSE(3) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ! < 1:i, 2:j | 1:k, 2:l > + tc_int_2e_ao(k,i,l,j) = tc_int_2e_ao(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(time2) + print*, ' wall time of Coulomb part of tc_int_2e_ao (min) ', (time2 - time1) / 60.d0 + call print_memory_usage() + + ! --- + + print*, ' Writing int2_grad1_u12_ao in ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao' + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao(:,:,:,1:3) + close(11) + + print*, ' Saving tc_int_2e_ao in ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot' + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="write") + call ezfio_set_work_empty(.False.) + do i = 1, ao_num + write(11) tc_int_2e_ao(:,:,:,i) + enddo + close(11) + + ! ---- + + deallocate(int2_grad1_u12_ao) + deallocate(tc_int_2e_ao) + + call wall_time(time2) + print*, ' wall time for tc_int_2e_ao (min) = ', (time2-time1) / 60.d0 + call print_memory_usage() + + ! --- + + call wall_time(time1) + print*, ' wall time for TC-integrals (min) = ', (time1-time0) / 60.d0 + + return +end + +! --- + diff --git a/plugins/local/tc_int/int2_grad1_u12.irp.f b/plugins/local/tc_int/int2_grad1_u12.irp.f deleted file mode 100644 index 0cf0d775..00000000 --- a/plugins/local/tc_int/int2_grad1_u12.irp.f +++ /dev/null @@ -1,134 +0,0 @@ - -! --- - -subroutine provide_int2_grad1_u12_ao() - - implicit none - integer :: ipoint, i, j, m, jpoint - integer :: n_blocks, n_rest, n_pass - integer :: i_blocks, i_rest, i_pass, ii - double precision :: time0, time1 - double precision :: mem, n_double - double precision, allocatable :: tmp(:,:,:) - double precision, allocatable :: tmp_grad1_u12(:,:,:) - double precision, allocatable :: int2_grad1_u12_ao(:,:,:,:) - - PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - - print*, ' start provide_int2_grad1_u12_ao ...' - call wall_time(time0) - - - ! int2_grad1_u12_ao(i,j,ipoint,1) = \int dr2 [\grad1 u(r1,r2)]_x1 \chi_i(r2) \chi_j(r2) - ! int2_grad1_u12_ao(i,j,ipoint,2) = \int dr2 [\grad1 u(r1,r2)]_y1 \chi_i(r2) \chi_j(r2) - ! int2_grad1_u12_ao(i,j,ipoint,3) = \int dr2 [\grad1 u(r1,r2)]_z1 \chi_i(r2) \chi_j(r2) - ! int2_grad1_u12_ao(i,j,ipoint,4) = -(1/2) \int dr2 [\grad1 u(r1,r2)]^2 \chi_i(r2) \chi_j(r2) - allocate(int2_grad1_u12_ao(ao_num,ao_num,n_points_final_grid,4)) - - - - call total_memory(mem) - mem = max(1.d0, qp_max_mem - mem) - n_double = mem * 1.d8 - n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid)) - n_rest = int(mod(n_points_final_grid, n_blocks)) - n_pass = int((n_points_final_grid - n_rest) / n_blocks) - - call write_int(6, n_pass, 'Number of passes') - call write_int(6, n_blocks, 'Size of the blocks') - call write_int(6, n_rest, 'Size of the last block') - - - allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, i, jpoint) & - !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) - !$OMP DO SCHEDULE (static) - do j = 1, ao_num - do i = 1, ao_num - do jpoint = 1, n_points_extra_final_grid - tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - - allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,4)) - - do i_pass = 1, n_pass - ii = (i_pass-1)*n_blocks + 1 - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i_blocks, ipoint) & - !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, final_grid_points, tmp_grad1_u12) - !$OMP DO - do i_blocks = 1, n_blocks - ipoint = ii - 1 + i_blocks ! r1 - call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1), tmp_grad1_u12(1,i_blocks,2), tmp_grad1_u12(1,i_blocks,3), tmp_grad1_u12(1,i_blocks,4)) - enddo - !$OMP END DO - !$OMP END PARALLEL - - do m = 1, 4 - call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & - , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) - enddo - enddo - - deallocate(tmp_grad1_u12) - - - if(n_rest .gt. 0) then - - allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,4)) - - ii = n_pass*n_blocks + 1 - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i_rest, ipoint) & - !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, final_grid_points, tmp_grad1_u12) - !$OMP DO - do i_rest = 1, n_rest - ipoint = ii - 1 + i_rest ! r1 - call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1), tmp_grad1_u12(1,i_rest,2), tmp_grad1_u12(1,i_rest,3), tmp_grad1_u12(1,i_rest,4)) - enddo - !$OMP END DO - !$OMP END PARALLEL - - do m = 1, 4 - call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & - , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) - enddo - - deallocate(tmp_grad1_u12) - endif - - deallocate(tmp) - - - ! --- - - print*, ' Writing int2_grad1_u12_ao in ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao' - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") - call ezfio_set_work_empty(.False.) - write(11) int2_grad1_u12_ao(:,:,:,1:3) - close(11) - - deallocate(int2_grad1_u12_ao) - - call wall_time(time1) - print*, ' wall time for provide_int2_grad1_u12_ao (min) = ', (time1-time0) / 60.d0 - call print_memory_usage() - -end - -! --- - - diff --git a/plugins/local/tc_int/jast_grad_2e.irp.f b/plugins/local/tc_int/jast_grad_2e.irp.f deleted file mode 100644 index b18b9d62..00000000 --- a/plugins/local/tc_int/jast_grad_2e.irp.f +++ /dev/null @@ -1,102 +0,0 @@ - -! --- - -subroutine get_grad1_u12_r1_2e(r1, n_grid2, gradx, grady, gradz) - - BEGIN_DOC - ! - ! d/dx1 j_2e(1,2) - ! d/dy1 j_2e(1,2) - ! d/dz1 j_2e(1,2) - ! - END_DOC - - include 'constants.include.F' - - implicit none - integer , intent(in) :: n_grid2 - double precision, intent(in) :: r1(3) - double precision, intent(out) :: gradx(n_grid2) - double precision, intent(out) :: grady(n_grid2) - double precision, intent(out) :: gradz(n_grid2) - - integer :: jpoint - integer :: i_nucl, p, mpA, npA, opA - integer :: powmax1, powmax, powmax2 - double precision :: r2(3) - double precision :: tmp, tmp1, tmp2 - double precision :: rn(3), f1A, grad1_f1A(3), f2A, grad2_f2A(3), g12, grad1_g12(3) - double precision, allocatable :: f1A_power(:), f2A_power(:), double_p(:), g12_power(:) - - - powmax1 = max(maxval(jBH_m), maxval(jBH_n)) - powmax2 = maxval(jBH_o) - powmax = max(powmax1, powmax2) - - allocate(f1A_power(-1:powmax), f2A_power(-1:powmax), g12_power(-1:powmax), double_p(0:powmax)) - - do p = 0, powmax - double_p(p) = dble(p) - enddo - - f1A_power(-1) = 0.d0 - f2A_power(-1) = 0.d0 - g12_power(-1) = 0.d0 - - f1A_power(0) = 1.d0 - f2A_power(0) = 1.d0 - g12_power(0) = 1.d0 - - do jpoint = 1, n_points_extra_final_grid ! r2 - - r2(1) = final_grid_points_extra(1,jpoint) - r2(2) = final_grid_points_extra(2,jpoint) - r2(3) = final_grid_points_extra(3,jpoint) - - gradx(jpoint) = 0.d0 - grady(jpoint) = 0.d0 - gradz(jpoint) = 0.d0 - do i_nucl = 1, nucl_num - - rn(1) = nucl_coord(i_nucl,1) - rn(2) = nucl_coord(i_nucl,2) - rn(3) = nucl_coord(i_nucl,3) - - call jBH_elem_fct_grad(jBH_en(i_nucl), r1, rn, f1A, grad1_f1A) - call jBH_elem_fct_grad(jBH_en(i_nucl), r2, rn, f2A, grad2_f2A) - call jBH_elem_fct_grad(jBH_ee(i_nucl), r1, r2, g12, grad1_g12) - - ! Compute powers of f1A and f2A - do p = 1, powmax1 - f1A_power(p) = f1A_power(p-1) * f1A - f2A_power(p) = f2A_power(p-1) * f2A - enddo - do p = 1, powmax2 - g12_power(p) = g12_power(p-1) * g12 - enddo - - do p = 1, jBH_size - mpA = jBH_m(p,i_nucl) - npA = jBH_n(p,i_nucl) - opA = jBH_o(p,i_nucl) - tmp = jBH_c(p,i_nucl) - if(mpA .eq. npA) then - tmp = tmp * 0.5d0 - endif - - tmp1 = double_p(mpA) * f1A_power(mpA-1) * f2A_power(npA) + double_p(npA) * f1A_power(npA-1) * f2A_power(mpA) - tmp1 = tmp1 * g12_power(opA) * tmp - tmp2 = double_p(opA) * g12_power(opA-1) * (f1A_power(mpA) * f2A_power(npA) + f1A_power(npA) * f2A_power(mpA)) * tmp - - gradx(jpoint) = gradx(jpoint) + tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1) - grady(jpoint) = grady(jpoint) + tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2) - gradz(jpoint) = gradz(jpoint) + tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3) - enddo ! p - enddo ! i_nucl - enddo ! jpoint - - return -end - -! --- - diff --git a/plugins/local/tc_int/jast_grad_full.irp.f b/plugins/local/tc_int/jast_grad_full.irp.f index f63ee3e4..78ed1edf 100644 --- a/plugins/local/tc_int/jast_grad_full.irp.f +++ b/plugins/local/tc_int/jast_grad_full.irp.f @@ -16,31 +16,26 @@ subroutine get_grad1_u12_for_tc(ipoint, n_grid2, resx, resy, resz, res) ! END_DOC + include 'constants.include.F' + implicit none integer, intent(in) :: ipoint, n_grid2 double precision, intent(out) :: resx(n_grid2), resy(n_grid2), resz(n_grid2), res(n_grid2) - integer :: jpoint - double precision :: env_r1, tmp - double precision :: grad1_env(3), r1(3) - double precision, allocatable :: env_r2(:) - double precision, allocatable :: u2b_r12(:), gradx1_u2b(:), grady1_u2b(:), gradz1_u2b(:) - double precision, allocatable :: u2b_mu(:), gradx1_mu(:), grady1_mu(:), gradz1_mu(:) - double precision, allocatable :: u2b_nu(:), gradx1_nu(:), grady1_nu(:), gradz1_nu(:) - double precision, external :: env_nucl + integer :: jpoint, i_nucl, p, mpA, npA, opA, pp + integer :: powmax1, powmax, powmax2 + double precision :: r1(3), r2(3) + double precision :: tmp, tmp1, tmp2, tmp11, tmp22 + double precision :: rn(3), f1A, grad1_f1A(3), f2A, grad2_f2A(3), g12, grad1_g12(3) + double precision, allocatable :: f1A_power(:), f2A_power(:), double_p(:), g12_power(:) r1(1) = final_grid_points(1,ipoint) r1(2) = final_grid_points(2,ipoint) r1(3) = final_grid_points(3,ipoint) + call grad1_j12_r1_seq(r1, n_grid2, resx, resy, resz) - ! j2e_type .eq. "Boys_Handy" - ! env_type .eq. "None" - ! j1e_type .eq "None" - - call get_grad1_u12_r1_2e(r1, n_grid2, resx(1), resy(1), resz(1)) - - do jpoint = 1, n_points_extra_final_grid + do jpoint = 1, n_grid2 ! r2 res(jpoint) = -0.5d0 * (resx(jpoint) * resx(jpoint) + resy(jpoint) * resy(jpoint) + resz(jpoint) * resz(jpoint)) enddo @@ -49,3 +44,91 @@ end ! --- +subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) + + include 'constants.include.F' + + implicit none + integer , intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: gradx(n_grid2) + double precision, intent(out) :: grady(n_grid2) + double precision, intent(out) :: gradz(n_grid2) + + integer :: jpoint, i_nucl, p, mpA, npA, opA + double precision :: r2(3) + double precision :: dx, dy, dz, r12, tmp + double precision :: rn(3), f1A, grad1_f1A(3), f2A, grad2_f2A(3), g12, grad1_g12(3) + double precision :: tmp1, tmp2 + integer :: powmax1, powmax, powmax2 + double precision, allocatable :: f1A_power(:), f2A_power(:), double_p(:), g12_power(:) + + powmax1 = max(maxval(jBH_m), maxval(jBH_n)) + powmax2 = maxval(jBH_o) + powmax = max(powmax1, powmax2) + + allocate(f1A_power(-1:powmax), f2A_power(-1:powmax), g12_power(-1:powmax), double_p(0:powmax)) + + do p = 0, powmax + double_p(p) = dble(p) + enddo + + f1A_power(-1) = 0.d0 + f2A_power(-1) = 0.d0 + g12_power(-1) = 0.d0 + + f1A_power(0) = 1.d0 + f2A_power(0) = 1.d0 + g12_power(0) = 1.d0 + + do jpoint = 1, n_grid2 ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 + do i_nucl = 1, nucl_num + + rn(1) = nucl_coord(i_nucl,1) + rn(2) = nucl_coord(i_nucl,2) + rn(3) = nucl_coord(i_nucl,3) + + call jBH_elem_fct_grad(jBH_en(i_nucl), r1, rn, f1A, grad1_f1A) + call jBH_elem_fct_grad(jBH_en(i_nucl), r2, rn, f2A, grad2_f2A) + call jBH_elem_fct_grad(jBH_ee(i_nucl), r1, r2, g12, grad1_g12) + + ! Compute powers of f1A and f2A + do p = 1, powmax1 + f1A_power(p) = f1A_power(p-1) * f1A + f2A_power(p) = f2A_power(p-1) * f2A + enddo + do p = 1, powmax2 + g12_power(p) = g12_power(p-1) * g12 + enddo + + do p = 1, jBH_size + mpA = jBH_m(p,i_nucl) + npA = jBH_n(p,i_nucl) + opA = jBH_o(p,i_nucl) + tmp = jBH_c(p,i_nucl) + if(mpA .eq. npA) then + tmp = tmp * 0.5d0 + endif + + tmp1 = double_p(mpA) * f1A_power(mpA-1) * f2A_power(npA) + double_p(npA) * f1A_power(npA-1) * f2A_power(mpA) + tmp1 = tmp1 * g12_power(opA) * tmp + tmp2 = double_p(opA) * g12_power(opA-1) * (f1A_power(mpA) * f2A_power(npA) + f1A_power(npA) * f2A_power(mpA)) * tmp + + gradx(jpoint) = gradx(jpoint) + tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1) + grady(jpoint) = grady(jpoint) + tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2) + gradz(jpoint) = gradz(jpoint) + tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3) + enddo ! p + enddo ! i_nucl + enddo ! jpoint + + return +end + diff --git a/plugins/local/tc_int/write_tc_int.irp.f b/plugins/local/tc_int/write_tc_int.irp.f index ebdce6f2..9f25a6fd 100644 --- a/plugins/local/tc_int/write_tc_int.irp.f +++ b/plugins/local/tc_int/write_tc_int.irp.f @@ -14,19 +14,17 @@ program write_tc_int my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + call write_int(6, my_n_pt_r_grid, 'radial external grid over') call write_int(6, my_n_pt_a_grid, 'angular external grid over') - if(tc_integ_type .eq. "numeric") then - my_extra_grid_becke = .True. - PROVIDE tc_grid2_a tc_grid2_r - my_n_pt_r_extra_grid = tc_grid2_r - my_n_pt_a_extra_grid = tc_grid2_a - touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid - - call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') - call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') - endif + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') call main() From a9d2f0e188cdc88e1cfe1387de4f1c118bb17a5d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 26 Jun 2024 17:55:56 +0200 Subject: [PATCH 093/131] Working on TC ints. Not well tested --- plugins/local/tc_int/jast_grad_full.irp.f | 127 ++++++++++++++++++++-- plugins/local/tc_int/jast_utils_bh.irp.f | 22 ++-- 2 files changed, 134 insertions(+), 15 deletions(-) diff --git a/plugins/local/tc_int/jast_grad_full.irp.f b/plugins/local/tc_int/jast_grad_full.irp.f index 78ed1edf..599d3779 100644 --- a/plugins/local/tc_int/jast_grad_full.irp.f +++ b/plugins/local/tc_int/jast_grad_full.irp.f @@ -4,7 +4,7 @@ subroutine get_grad1_u12_for_tc(ipoint, n_grid2, resx, resy, resz, res) BEGIN_DOC - ! + ! ! resx(ipoint) = [grad1 u(r1,r2)]_x1 ! resy(ipoint) = [grad1 u(r1,r2)]_y1 ! resz(ipoint) = [grad1 u(r1,r2)]_z1 @@ -59,7 +59,7 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) double precision :: r2(3) double precision :: dx, dy, dz, r12, tmp double precision :: rn(3), f1A, grad1_f1A(3), f2A, grad2_f2A(3), g12, grad1_g12(3) - double precision :: tmp1, tmp2 + double precision :: tmp1, tmp2, dist integer :: powmax1, powmax, powmax2 double precision, allocatable :: f1A_power(:), f2A_power(:), double_p(:), g12_power(:) @@ -90,30 +90,105 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) gradx(jpoint) = 0.d0 grady(jpoint) = 0.d0 gradz(jpoint) = 0.d0 + + call jBH_elem_fct_grad_alpha1(r1, r2, g12, grad1_g12) + +! dist = (r1(1) - r2(1)) * (r1(1) - r2(1)) & +! + (r1(2) - r2(2)) * (r1(2) - r2(2)) & +! + (r1(3) - r2(3)) * (r1(3) - r2(3)) +! +! if(dist .ge. 1d-15) then +! dist = dsqrt( dist ) +! +! tmp1 = 1.d0 / (1.d0 + dist) +! +! g12 = dist * tmp1 +! tmp2 = tmp1 * tmp1 / dist +! grad1_g12(1) = tmp2 * (r1(1) - r2(1)) +! grad1_g12(2) = tmp2 * (r1(2) - r2(2)) +! grad1_g12(3) = tmp2 * (r1(3) - r2(3)) +! +! else +! +! grad1_g12(1) = 0.d0 +! grad1_g12(2) = 0.d0 +! grad1_g12(3) = 0.d0 +! g12 = 0.d0 +! +! endif +! + do p = 1, powmax2 + g12_power(p) = g12_power(p-1) * g12 + enddo + do i_nucl = 1, nucl_num rn(1) = nucl_coord(i_nucl,1) rn(2) = nucl_coord(i_nucl,2) rn(3) = nucl_coord(i_nucl,3) - call jBH_elem_fct_grad(jBH_en(i_nucl), r1, rn, f1A, grad1_f1A) - call jBH_elem_fct_grad(jBH_en(i_nucl), r2, rn, f2A, grad2_f2A) - call jBH_elem_fct_grad(jBH_ee(i_nucl), r1, r2, g12, grad1_g12) + call jBH_elem_fct_grad_alpha1(r1, rn, f1A, grad1_f1A) +! dist = (r1(1) - rn(1)) * (r1(1) - rn(1)) & +! + (r1(2) - rn(2)) * (r1(2) - rn(2)) & +! + (r1(3) - rn(3)) * (r1(3) - rn(3)) +! if (dist > 1.d-15) then +! dist = dsqrt( dist ) +! +! tmp1 = 1.d0 / (1.d0 + dist) +! +! f1A = dist * tmp1 +! tmp2 = tmp1 * tmp1 / dist +! grad1_f1A(1) = tmp2 * (r1(1) - rn(1)) +! grad1_f1A(2) = tmp2 * (r1(2) - rn(2)) +! grad1_f1A(3) = tmp2 * (r1(3) - rn(3)) +! +! else +! +! grad1_f1A(1) = 0.d0 +! grad1_f1A(2) = 0.d0 +! grad1_f1A(3) = 0.d0 +! f1A = 0.d0 +! +! endif + + call jBH_elem_fct_grad_alpha1(r2, rn, f2A, grad2_f2A) +! dist = (r2(1) - rn(1)) * (r2(1) - rn(1)) & +! + (r2(2) - rn(2)) * (r2(2) - rn(2)) & +! + (r2(3) - rn(3)) * (r2(3) - rn(3)) +! +! if (dist > 1.d-15) then +! dist = dsqrt( dist ) +! +! tmp1 = 1.d0 / (1.d0 + dist) +! +! f2A = dist * tmp1 +! tmp2 = tmp1 * tmp1 / dist +! grad2_f2A(1) = tmp2 * (r2(1) - rn(1)) +! grad2_f2A(2) = tmp2 * (r2(2) - rn(2)) +! grad2_f2A(3) = tmp2 * (r2(3) - rn(3)) +! +! else +! +! grad2_f2A(1) = 0.d0 +! grad2_f2A(2) = 0.d0 +! grad2_f2A(3) = 0.d0 +! f2A = 0.d0 +! +! endif ! Compute powers of f1A and f2A do p = 1, powmax1 f1A_power(p) = f1A_power(p-1) * f1A f2A_power(p) = f2A_power(p-1) * f2A enddo - do p = 1, powmax2 - g12_power(p) = g12_power(p-1) * g12 - enddo do p = 1, jBH_size mpA = jBH_m(p,i_nucl) npA = jBH_n(p,i_nucl) opA = jBH_o(p,i_nucl) tmp = jBH_c(p,i_nucl) +! if (dabs(tmp) <= 1.d-10) cycle +! if(mpA .eq. npA) then tmp = tmp * 0.5d0 endif @@ -132,3 +207,39 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) return end +subroutine jBH_elem_fct_grad_alpha1(r1, r2, fct, grad1_fct) + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision, intent(out) :: fct, grad1_fct(3) + double precision :: dist, tmp1, tmp2 + + dist = (r1(1) - r2(1)) * (r1(1) - r2(1)) & + + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + + (r1(3) - r2(3)) * (r1(3) - r2(3)) + + + if(dist .ge. 1d-15) then + dist = dsqrt( dist ) + + tmp1 = 1.d0 / (1.d0 + dist) + + fct = dist * tmp1 + tmp2 = tmp1 * tmp1 / dist + grad1_fct(1) = tmp2 * (r1(1) - r2(1)) + grad1_fct(2) = tmp2 * (r1(2) - r2(2)) + grad1_fct(3) = tmp2 * (r1(3) - r2(3)) + + else + + grad1_fct(1) = 0.d0 + grad1_fct(2) = 0.d0 + grad1_fct(3) = 0.d0 + fct = 0.d0 + + endif + + return +end + +! --- diff --git a/plugins/local/tc_int/jast_utils_bh.irp.f b/plugins/local/tc_int/jast_utils_bh.irp.f index 750ce90b..200bc5ff 100644 --- a/plugins/local/tc_int/jast_utils_bh.irp.f +++ b/plugins/local/tc_int/jast_utils_bh.irp.f @@ -1,35 +1,43 @@ ! --- + + subroutine jBH_elem_fct_grad(alpha, r1, r2, fct, grad1_fct) implicit none double precision, intent(in) :: alpha, r1(3), r2(3) double precision, intent(out) :: fct, grad1_fct(3) - double precision :: dist, tmp1, tmp2 + double precision :: dist, tmp1, tmp2, dist_inv - dist = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & - + (r1(2) - r2(2)) * (r1(2) - r2(2)) & - + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) + dist = (r1(1) - r2(1)) * (r1(1) - r2(1)) & + + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + + (r1(3) - r2(3)) * (r1(3) - r2(3)) - if(dist .ge. 1d-10) then + if(dist .ge. 1d-15) then + dist_inv = 1.d0/dsqrt( dist ) + dist = dist_inv * dist + tmp1 = 1.d0 / (1.d0 + alpha * dist) fct = alpha * dist * tmp1 - tmp2 = alpha * tmp1 * tmp1 / dist + tmp2 = alpha * tmp1 * tmp1 * dist_inv grad1_fct(1) = tmp2 * (r1(1) - r2(1)) grad1_fct(2) = tmp2 * (r1(2) - r2(2)) grad1_fct(3) = tmp2 * (r1(3) - r2(3)) + else + grad1_fct(1) = 0.d0 grad1_fct(2) = 0.d0 grad1_fct(3) = 0.d0 fct = 0.d0 + endif return -end +end ! --- From 5d80cb7b2dd53bdd9eb713e507912e6fce3cadd7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 27 Jun 2024 12:06:06 +0200 Subject: [PATCH 094/131] Separated gpu and gpu_arch --- configure | 8 ++-- src/ccsd/NEED | 1 + src/ccsd/ccsd_space_orb_sub.irp.f | 14 +++++-- src/gpu/NEED | 1 + src/gpu/README.rst | 6 +++ src/{gpu_x86 => gpu}/gpu.h | 0 src/{gpu_x86 => gpu}/gpu_module.F90 | 59 +++++++++++++++-------------- src/gpu_x86/gpu.c | 2 +- 8 files changed, 54 insertions(+), 37 deletions(-) create mode 100644 src/gpu/NEED create mode 100644 src/gpu/README.rst rename src/{gpu_x86 => gpu}/gpu.h (100%) rename src/{gpu_x86 => gpu}/gpu_module.F90 (74%) diff --git a/configure b/configure index 014275eb..db158966 100755 --- a/configure +++ b/configure @@ -115,19 +115,19 @@ while getopts "d:c:i:g:h" c ; do done # Handle GPU acceleration -rm -f ${QP_ROOT}/src/gpu +rm -f ${QP_ROOT}/src/gpu_arch case "$GPU" in amd) # Nvidia echo "Activating AMD GPU acceleration" - ln -s ${QP_ROOT}/src/gpu_amd ${QP_ROOT}/src/gpu + ln -s ${QP_ROOT}/src/gpu_amd ${QP_ROOT}/src/gpu_arch ;; nvidia) # Nvidia echo "Activating Nvidia GPU acceleration" - ln -s ${QP_ROOT}/src/gpu_nvidia ${QP_ROOT}/src/gpu + ln -s ${QP_ROOT}/src/gpu_nvidia ${QP_ROOT}/src/gpu_arch ;; *) # No Acceleration echo "Disabling GPU acceleration" - ln -s ${QP_ROOT}/src/gpu_x86 ${QP_ROOT}/src/gpu + ln -s ${QP_ROOT}/src/gpu_x86 ${QP_ROOT}/src/gpu_arch ;; esac diff --git a/src/ccsd/NEED b/src/ccsd/NEED index e6e6bc59..8298f28e 100644 --- a/src/ccsd/NEED +++ b/src/ccsd/NEED @@ -1,2 +1,3 @@ +gpu hartree_fock utils_cc diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 9d4ae7f9..84aab08a 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -1,4 +1,5 @@ subroutine run_ccsd_space_orb + use gpu implicit none @@ -11,7 +12,7 @@ subroutine run_ccsd_space_orb double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:), tau_x(:,:,:,:) double precision, allocatable :: t1(:,:), r1(:,:) - double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) + double precision, pointer :: H_oo, H_vv, H_vo double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) @@ -55,7 +56,10 @@ subroutine run_ccsd_space_orb allocate(tau(nO,nO,nV,nV)) allocate(tau_x(nO,nO,nV,nV)) allocate(t1(nO,nV), r1(nO,nV)) - allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) + + call gpu_allocate_double(H_oo, (/ nO, nO /) ) + call gpu_allocate_double(H_vv, (/ nV, nV /) ) + call gpu_allocate_double(H_vo, (/ nV, nO /) ) if (cc_update_method == 'diis') then double precision :: rss, diis_mem, extra_mem @@ -191,7 +195,11 @@ subroutine run_ccsd_space_orb deallocate(all_err,all_t) endif - deallocate(H_vv,H_oo,H_vo,r1,r2,tau) + call gpu_deallocate_double(H_oo) + call gpu_deallocate_double(H_vv) + call gpu_deallocate_double(H_vo) + + deallocate(r1,r2,tau) ! CCSD(T) double precision :: e_t, e_t_err diff --git a/src/gpu/NEED b/src/gpu/NEED new file mode 100644 index 00000000..c2af78d2 --- /dev/null +++ b/src/gpu/NEED @@ -0,0 +1 @@ +gpu_arch diff --git a/src/gpu/README.rst b/src/gpu/README.rst new file mode 100644 index 00000000..17ee28a0 --- /dev/null +++ b/src/gpu/README.rst @@ -0,0 +1,6 @@ +=== +gpu +=== + +Bindings for GPU routines (architecture independent). +Architecture-dependent files are in gpu_arch. diff --git a/src/gpu_x86/gpu.h b/src/gpu/gpu.h similarity index 100% rename from src/gpu_x86/gpu.h rename to src/gpu/gpu.h diff --git a/src/gpu_x86/gpu_module.F90 b/src/gpu/gpu_module.F90 similarity index 74% rename from src/gpu_x86/gpu_module.F90 rename to src/gpu/gpu_module.F90 index 86ba3926..f35ebc97 100644 --- a/src/gpu_x86/gpu_module.F90 +++ b/src/gpu/gpu_module.F90 @@ -1,5 +1,5 @@ module gpu - use, intrinsic :: iso_c_binding, only : c_int32_t, c_int64_t, c_double, c_size_t, c_char + use, intrinsic :: iso_c_binding implicit none interface @@ -17,7 +17,7 @@ module gpu integer(c_int64_t), value :: n end subroutine - subroutine gpu_free_c(ptr) bind(C, name='gpu_free') + subroutine gpu_deallocate_c(ptr) bind(C, name='gpu_deallocate') import type(c_ptr) :: ptr end subroutine @@ -89,53 +89,54 @@ module gpu end interface + contains + + + subroutine gpu_allocate_double(ptr, s) + implicit none + double precision, pointer, intent(inout) :: ptr + integer, intent(in) :: s(:) + type(c_ptr) :: cptr + + call gpu_allocate_c(cptr, sum(s*1_8)*8_8) + call c_f_pointer(cptr, ptr, s) + end subroutine + + subroutine gpu_deallocate_double(ptr) + implicit none + double precision, pointer, intent(inout) :: ptr + type(c_ptr) :: cptr + cptr = c_loc(ptr) + call gpu_deallocate(cptr) + NULLIFY(ptr) + end subroutine + end module -subroutine gpu_allocate_double(ptr, s) - use gpu - implicit none - double precision, pointer, intent(inout) :: ptr - integer*8, intent(in) :: s(*) - type(c_ptr) :: cptr - - call gpu_allocate_c(cptr, sum(s)*8_8) - call c_f_pointer(cptr, ptr, s) -end subroutine - -subroutine gpu_free_double(ptr) - use gpu - implicit none - double precision, pointer, intent(inout) :: ptr - type(c_ptr) :: cptr - cptr = cloc(ptr) - call gpu_free(cptr) - NULLIFY(ptr) -end subroutine - subroutine gpu_upload_double(cpu_ptr, gpu_ptr, n) use gpu implicit none double precision, intent(in) :: cpu_ptr(*) - double precision, intent(out) :: gpu_ptr(*) + double precision, intent(in) :: gpu_ptr(*) integer(c_int64_t), intent(in) :: n - call gpu_upload_c(cpu_ptr, gpu_ptr, 8_8*n) + call gpu_upload_c(c_loc(cpu_ptr), c_loc(gpu_ptr), 8_8*n) end subroutine subroutine gpu_download_double(gpu_ptr, cpu_ptr, n) use gpu implicit none double precision, intent(in) :: gpu_ptr(*) - double precision, intent(out) :: cpu_ptr(*) + double precision, intent(in) :: cpu_ptr(*) integer(c_int64_t), intent(in) :: n - call gpu_download_c(gpu_ptr, cpu_ptr, 8_8*n) + call gpu_download_c(c_loc(gpu_ptr), c_loc(cpu_ptr), 8_8*n) end subroutine subroutine gpu_copy_double(gpu_ptr_src, gpu_ptr_dest, n) use gpu implicit none double precision, intent(in) :: gpu_ptr_src(*) - double precision, intent(out) :: gpu_ptr_dest(*) + double precision, intent(in) :: gpu_ptr_dest(*) integer(c_int64_t), intent(in) :: n - call gpu_copy_c(gpu_ptr_src, gpu_ptr_dest, 8_8*n) + call gpu_copy_c(c_loc(gpu_ptr_src), c_loc(gpu_ptr_dest), 8_8*n) end subroutine diff --git a/src/gpu_x86/gpu.c b/src/gpu_x86/gpu.c index 71505dbe..41ede396 100644 --- a/src/gpu_x86/gpu.c +++ b/src/gpu_x86/gpu.c @@ -25,7 +25,7 @@ void gpu_allocate(void** ptr, const int64_t n) { } } -void gpu_free(void** ptr) { +void gpu_deallocate(void** ptr) { free(*ptr); *ptr = NULL; } From 6c02ac0f0b05ea3cc16e0fde66e23c9a0de14246 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 27 Jun 2024 12:07:48 +0200 Subject: [PATCH 095/131] Separated gpu and gpu_arch --- src/gpu/gpu_module.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpu/gpu_module.F90 b/src/gpu/gpu_module.F90 index f35ebc97..43754454 100644 --- a/src/gpu/gpu_module.F90 +++ b/src/gpu/gpu_module.F90 @@ -107,7 +107,7 @@ module gpu double precision, pointer, intent(inout) :: ptr type(c_ptr) :: cptr cptr = c_loc(ptr) - call gpu_deallocate(cptr) + call gpu_deallocate_c(cptr) NULLIFY(ptr) end subroutine From fa6d1419496d271a4715efc776790ce7fc152064 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 27 Jun 2024 15:45:52 +0200 Subject: [PATCH 096/131] Introducing GPU in CCSD --- src/ccsd/ccsd_space_orb_sub.irp.f | 224 +++++++----- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 12 +- src/gpu/gpu_module.F90 | 450 ++++++++++++++++++++++--- src/gpu_x86/gpu.c | 48 +-- 4 files changed, 570 insertions(+), 164 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 84aab08a..455d62f7 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -10,9 +10,9 @@ subroutine run_ccsd_space_orb double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb logical :: not_converged - double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:), tau_x(:,:,:,:) - double precision, allocatable :: t1(:,:), r1(:,:) - double precision, pointer :: H_oo, H_vv, H_vo + type(gpu_double4) :: t2, r2, tau, tau_x + type(gpu_double2) :: t1, r1 + type(gpu_double2) :: H_oo, H_vv, H_vo double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) @@ -52,14 +52,15 @@ subroutine run_ccsd_space_orb !print*,'occ',list_occ !print*,'vir',list_vir - allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) - allocate(tau(nO,nO,nV,nV)) - allocate(tau_x(nO,nO,nV,nV)) - allocate(t1(nO,nV), r1(nO,nV)) - - call gpu_allocate_double(H_oo, (/ nO, nO /) ) - call gpu_allocate_double(H_vv, (/ nV, nV /) ) - call gpu_allocate_double(H_vo, (/ nV, nO /) ) + call gpu_allocate(t2, nO,nO,nV,nV) + call gpu_allocate(r2, nO,nO,nV,nV) + call gpu_allocate(tau, nO,nO,nV,nV) + call gpu_allocate(tau_x, nO,nO,nV,nV) + call gpu_allocate(t1, nO,nV) + call gpu_allocate(r1, nO,nV) + call gpu_allocate(H_oo, nO, nO) + call gpu_allocate(H_vo, nV, nO) + call gpu_allocate(H_vv, nV, nV) if (cc_update_method == 'diis') then double precision :: rss, diis_mem, extra_mem @@ -101,14 +102,21 @@ subroutine run_ccsd_space_orb endif ! Init - call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1) - call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2) - call update_tau_space(nO,nV,t1,t2,tau) + double precision, allocatable :: h_t1(:,:), h_t2(:,:,:,:) + allocate(h_t1(nO,nV), h_t2(nO,nO,nV,nV)) + + call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,h_t1) + call gpu_upload(h_t1, t1) + + call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,h_t2) + call gpu_upload(h_t2, t2) + + call update_tau_space(nO,nV,h_t1,t1,t2,tau) call update_tau_x_space(nO,nV,tau,tau_x) !print*,'hf_energy', hf_energy call det_energy(det,uncorr_energy) print*,'Det energy', uncorr_energy - call ccsd_energy_space_x(nO,nV,tau_x,t1,energy) + call ccsd_energy_space_x(nO,nV,tau_x%f,t1%f,energy) print*,'Guess energy', uncorr_energy+energy, energy nb_iter = 0 @@ -127,40 +135,38 @@ subroutine run_ccsd_space_orb if (do_ao_cholesky) then ! if (.False.) then call compute_H_oo_chol(nO,nV,tau_x,H_oo) - call compute_H_vv_chol(nO,nV,tau_x,H_vv) - call compute_H_vo_chol(nO,nV,t1,H_vo) + call compute_H_vv_chol(nO,nV,tau_x%f,H_vv%f) + call compute_H_vo_chol(nO,nV,t1%f,H_vo%f) - call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) - call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + call compute_r1_space_chol(nO,nV,t1%f,t2%f,tau%f,H_oo%F,H_vv%F,H_vo%F,r1%f,max_r1) + call compute_r2_space_chol(nO,nV,t1%f,t2%f,tau%f,H_oo%F,H_vv%F,H_vo%F,r2%f,max_r2) else - call compute_H_oo(nO,nV,t1,t2,tau,H_oo) - call compute_H_vv(nO,nV,t1,t2,tau,H_vv) - call compute_H_vo(nO,nV,t1,t2,H_vo) + call compute_H_oo(nO,nV,t1%f,t2%f,tau%f,H_oo%f) + call compute_H_vv(nO,nV,t1%f,t2%f,tau%f,H_vv%f) + call compute_H_vo(nO,nV,t1%f,t2%f,H_vo%f) - call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) - call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + call compute_r1_space(nO,nV,t1%f,t2%f,tau%f,H_oo%f,H_vv%f,H_vo%f,r1%f,max_r1) + call compute_r2_space(nO,nV,t1%f,t2%f,tau%f,H_oo%f,H_vv%f,H_vo%f,r2%f,max_r2) endif max_r = max(max_r1,max_r2) ! Update if (cc_update_method == 'diis') then - !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) - !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) - call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1,r2,t1,t2,all_err,all_t) + call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1%f,r2%f,t1%f,t2%f,all_err,all_t) ! Standard update as T = T - Delta elseif (cc_update_method == 'none') then - call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1) - call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2) + call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1%f,t1%f) + call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2%f,t2%f) else print*,'Unkown cc_method_method: '//cc_update_method endif - call update_tau_space(nO,nV,t1,t2,tau) + call update_tau_space(nO,nV,t1%f,t1,t2,tau) call update_tau_x_space(nO,nV,tau,tau_x) ! Energy - call ccsd_energy_space_x(nO,nV,tau_x,t1,energy) + call ccsd_energy_space_x(nO,nV,tau_x%f,t1%f,energy) write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' nb_iter = nb_iter + 1 @@ -185,8 +191,8 @@ subroutine run_ccsd_space_orb print*,'' if (write_amplitudes) then - call write_t1(nO,nV,t1) - call write_t2(nO,nV,t2) + call write_t1(nO,nV,t1%f) + call write_t2(nO,nV,t2%f) call ezfio_set_utils_cc_io_amplitudes('Read') endif @@ -195,11 +201,14 @@ subroutine run_ccsd_space_orb deallocate(all_err,all_t) endif - call gpu_deallocate_double(H_oo) - call gpu_deallocate_double(H_vv) - call gpu_deallocate_double(H_vo) + call gpu_deallocate(H_oo) + call gpu_deallocate(H_vv) + call gpu_deallocate(H_vo) - deallocate(r1,r2,tau) + call gpu_deallocate(r1) + call gpu_deallocate(r2) + call gpu_deallocate(tau) + call gpu_deallocate(tau_x) ! CCSD(T) double precision :: e_t, e_t_err @@ -207,28 +216,14 @@ subroutine run_ccsd_space_orb if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then - ! Dumb way - !call wall_time(ta) - !call ccsd_par_t_space(nO,nV,t1,t2,e_t) - !call wall_time(tb) - !print*,'Time: ',tb-ta, ' s' - - !print*,'' - !write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' - !write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' - !write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' - !print*,'' - ! New e_t = uncorr_energy + energy ! For print in (T) call e_t_err = 0.d0 print*,'Computing (T) correction...' call wall_time(ta) -! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & -! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) - call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & + call ccsd_par_t_space_stoch(nO,nV,t1%f,t2%f,cc_space_f_o,cc_space_f_v & ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t, e_t_err) call wall_time(tb) @@ -243,7 +238,9 @@ subroutine run_ccsd_space_orb call save_energy(uncorr_energy + energy, e_t) - deallocate(t1,t2) + deallocate(h_t1, h_t2) + call gpu_deallocate(t1) + call gpu_deallocate(t2) end @@ -341,70 +338,139 @@ end ! Tau -subroutine update_tau_space(nO,nV,t1,t2,tau) - +subroutine update_tau_space(nO,nV,h_t1,t1,t2,tau) + use gpu implicit none ! in integer, intent(in) :: nO, nV - double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: h_t1(nO,nV) + type(gpu_double2), intent(in) :: t1 + type(gpu_double4), intent(in) :: t2 ! out - double precision, intent(out) :: tau(nO,nO,nV,nV) + type(gpu_double4) :: tau ! internal integer :: i,j,a,b +! !$OMP PARALLEL & +! !$OMP SHARED(nO,nV,tau,t2,t1,h_t1) & +! !$OMP PRIVATE(i,j,a,b) & +! !$OMP DEFAULT(NONE) +! !$OMP DO +! do b = 1, nV +! do a = 1, nV +! do j = 1, nO +! do i = 1, nO +! tau%f(i,j,a,b) = t2%f(i,j,a,b) + t1%f(i,a) * h_t1(j,b) +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL + + + type(gpu_blas) :: blas + type(gpu_stream) :: stream(nV) + + call gpu_blas_create(blas) + do b=1,nV + call gpu_stream_create(stream(b)) + enddo + !$OMP PARALLEL & - !$OMP SHARED(nO,nV,tau,t2,t1) & + !$OMP SHARED(nO,nV,tau,t2,t1,h_t1,stream,blas) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - !$OMP DO - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b) - enddo - enddo + do j=1,nO + !$OMP DO + do b=1,nV + call gpu_set_stream(blas,stream(b)) + call gpu_dgeam_c(blas%c, 'N', 'N', nO*1_8, nV*1_8, & + 1.d0, c_loc(t2%f(1,j,1,b)), nO*nO*1_8, & + h_t1(j,b), t1%c, nO*1_8, & + c_loc(tau%f(1,j,1,b)), nO*nO*1_8) enddo + !$OMP END DO enddo - !$OMP END DO !$OMP END PARALLEL + call gpu_synchronize() + + do b=1,nV + call gpu_stream_destroy(stream(b)) + enddo + + call gpu_blas_destroy(blas) + end subroutine update_tau_x_space(nO,nV,tau,tau_x) - + use gpu implicit none ! in - integer, intent(in) :: nO, nV - double precision, intent(in) :: tau(nO,nO,nV,nV) + integer, intent(in) :: nO, nV + type(gpu_double4), intent(in) :: tau ! out - double precision, intent(out) :: tau_x(nO,nO,nV,nV) + type(gpu_double4) :: tau_x ! internal integer :: i,j,a,b +! !$OMP PARALLEL & +! !$OMP SHARED(nO,nV,tau,tau_x) & +! !$OMP PRIVATE(i,j,a,b) & +! !$OMP DEFAULT(NONE) +! !$OMP DO +! do b = 1, nV +! do a = 1, nV +! do j = 1, nO +! do i = 1, nO +! tau_x%f(i,j,a,b) = 2.d0*tau%f(i,j,a,b) - tau%f(i,j,b,a) +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL + + type(gpu_blas) :: blas + type(gpu_stream) :: stream(nV) + + call gpu_blas_create(blas) + do a=1,nV + call gpu_stream_create(stream(a)) + enddo + !$OMP PARALLEL & - !$OMP SHARED(nO,nV,tau,tau_x) & + !$OMP SHARED(nO,nV,tau,tau_x,stream,blas) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) !$OMP DO - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - tau_x(i,j,a,b) = 2.d0*tau(i,j,a,b) - tau(i,j,b,a) - enddo - enddo + do b=1,nV + do a=1,nV + call gpu_set_stream(blas,stream(a)) + call gpu_dgeam_c(blas%c, 'N', 'N', nO*1_8, nO*1_8, & + 2.d0, c_loc(tau%f(1,1,a,b)), nO*1_8, & + -1.d0, c_loc(tau%f(1,1,b,a)), nO*1_8, & + c_loc(tau_x%f(1,1,a,b)), nO*1_8) enddo enddo !$OMP END DO !$OMP END PARALLEL + call gpu_synchronize() + + do b=1,nV + call gpu_stream_destroy(stream(b)) + enddo + + call gpu_blas_destroy(blas) + end ! R1 diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index b59dc0bb..9b161001 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -294,12 +294,12 @@ end ! H_oo subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) - + use gpu implicit none integer, intent(in) :: nO,nV - double precision, intent(in) :: tau_x(nO, nO, nV, nV) - double precision, intent(out) :: H_oo(nO, nO) + type(gpu_double4), intent(in) :: tau_x + type(gpu_double2), intent(out) :: H_oo integer :: a,b,i,j,u,k @@ -315,7 +315,7 @@ subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) do b=1,nV do j=1,nO do a=1,nV - tmp_vov(a,j,b) = tau_x(u,j,a,b) + tmp_vov(a,j,b) = tau_x%f(u,j,a,b) enddo enddo enddo @@ -328,7 +328,7 @@ subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) !$omp do do i = 1, nO do u = 1, nO - H_oo(u,i) = cc_space_f_oo(u,i) + H_oo%f(u,i) = cc_space_f_oo(u,i) enddo enddo !$omp end do nowait @@ -336,7 +336,7 @@ subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) !$omp end parallel call dgemm('T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & tau_kau, cholesky_mo_num*nV, cc_space_v_vo_chol, cholesky_mo_num*nV, & - 1.d0, H_oo, nO) + 1.d0, H_oo%f, nO) end diff --git a/src/gpu/gpu_module.F90 b/src/gpu/gpu_module.F90 index 43754454..51f80ac0 100644 --- a/src/gpu/gpu_module.F90 +++ b/src/gpu/gpu_module.F90 @@ -2,6 +2,52 @@ module gpu use, intrinsic :: iso_c_binding implicit none +! Data types +! ---------- + + type gpu_double1 + type(c_ptr) :: c + double precision, pointer :: f(:) + end type + + type gpu_double2 + type(c_ptr) :: c + double precision, pointer :: f(:,:) + end type + + type gpu_double3 + type(c_ptr) :: c + double precision, pointer :: f(:,:,:) + end type + + type gpu_double4 + type(c_ptr) :: c + double precision, pointer :: f(:,:,:,:) + end type + + type gpu_double5 + type(c_ptr) :: c + double precision, pointer :: f(:,:,:,:,:) + end type + + type gpu_double6 + type(c_ptr) :: c + double precision, pointer :: f(:,:,:,:,:,:) + end type + + + type gpu_blas + type(c_ptr) :: c + end type + + type gpu_stream + type(c_ptr) :: c + end type + + +! C interfaces +! ------------ + interface integer function gpu_ndevices() bind(C) end function @@ -43,100 +89,394 @@ module gpu integer(c_int64_t), value :: n end subroutine - subroutine gpu_stream_create(stream) bind(C) + subroutine gpu_stream_create_c(stream) bind(C, name='gpu_stream_create') import type(c_ptr) :: stream end subroutine - subroutine gpu_stream_destroy(stream) bind(C) + subroutine gpu_stream_destroy_c(stream) bind(C, name='gpu_stream_destroy') import type(c_ptr) :: stream end subroutine - subroutine gpu_set_stream(handle, stream) bind(C) + subroutine gpu_set_stream_c(handle, stream) bind(C, name='gpu_set_stream') import type(c_ptr) :: handle, stream end subroutine - subroutine gpu_synchronize() + subroutine gpu_synchronize() bind(C) + import end subroutine - subroutine gpu_blas_create(handle) bind(C) + subroutine gpu_blas_create_c(handle) bind(C, name='gpu_blas_create') import type(c_ptr) :: handle end subroutine - subroutine gpu_blas_destroy(handle) bind(C) + subroutine gpu_blas_destroy_c(handle) bind(C, name='gpu_blas_destroy') import type(c_ptr) :: handle end subroutine - subroutine gpu_ddot(handle, n, dx, incx, dy, incy, res) bind(C) + subroutine gpu_ddot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_ddot') import - type(c_ptr), intent(in) :: handle - integer(c_int64_t), value :: n, incx, incy - real(c_double), intent(in) :: dx(*), dy(*) - real(c_double), intent(out) :: res + type(c_ptr), intent(in), value :: handle + integer(c_int64_t), value :: n, incx, incy + type(c_ptr), intent(in), value :: dx, dy + real(c_double), intent(out) :: res end subroutine - subroutine gpu_sdot(handle, n, dx, incx, dy, incy, res) bind(C) + subroutine gpu_sdot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_sdot') import - type(c_ptr), intent(in) :: handle - integer(c_int64_t), value :: n, incx, incy - real(c_float), intent(in) :: dx(*), dy(*) + type(c_ptr), intent(in), value :: handle + integer(c_int64_t), value :: n, incx, incy + type(c_ptr), intent(in), value :: dx, dy real(c_float), intent(out) :: res end subroutine + subroutine gpu_dgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, & + b, ldb, c, ldc) bind(C, name='gpu_dgeam') + import + type(c_ptr), intent(in), value :: handle + character(c_char), intent(in), value :: transa, transb + integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc + real(c_double), intent(in), value :: alpha, beta + type(c_ptr), value :: a, b, c + end subroutine + end interface + +! Polymorphic interfaces +! ---------------------- + + interface gpu_allocate + procedure gpu_allocate_double1 & + ,gpu_allocate_double2 & + ,gpu_allocate_double3 & + ,gpu_allocate_double4 & + ,gpu_allocate_double5 & + ,gpu_allocate_double6 + end interface gpu_allocate + + interface gpu_deallocate + procedure gpu_deallocate_double1 & + ,gpu_deallocate_double2 & + ,gpu_deallocate_double3 & + ,gpu_deallocate_double4 & + ,gpu_deallocate_double5 & + ,gpu_deallocate_double6 + end interface gpu_deallocate + + interface gpu_upload + procedure gpu_upload_double1 & + ,gpu_upload_double2 & + ,gpu_upload_double3 & + ,gpu_upload_double4 & + ,gpu_upload_double5 & + ,gpu_upload_double6 + end interface gpu_upload + + interface gpu_download + procedure gpu_download_double1 & + ,gpu_download_double2 & + ,gpu_download_double3 & + ,gpu_download_double4 & + ,gpu_download_double5 & + ,gpu_download_double6 + end interface gpu_download + + interface gpu_copy + procedure gpu_copy_double1 & + ,gpu_copy_double2 & + ,gpu_copy_double3 & + ,gpu_copy_double4 & + ,gpu_copy_double5 & + ,gpu_copy_double6 + end interface gpu_copy + + contains - subroutine gpu_allocate_double(ptr, s) - implicit none - double precision, pointer, intent(inout) :: ptr - integer, intent(in) :: s(:) - type(c_ptr) :: cptr +! gpu_allocate +! ------------ - call gpu_allocate_c(cptr, sum(s*1_8)*8_8) - call c_f_pointer(cptr, ptr, s) + subroutine gpu_allocate_double1(ptr, s) + implicit none + type(gpu_double1), intent(inout) :: ptr + integer, intent(in) :: s + + call gpu_allocate_c(ptr%c, s*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s /)) end subroutine - subroutine gpu_deallocate_double(ptr) + subroutine gpu_allocate_double2(ptr, s1, s2) implicit none - double precision, pointer, intent(inout) :: ptr - type(c_ptr) :: cptr - cptr = c_loc(ptr) - call gpu_deallocate_c(cptr) - NULLIFY(ptr) + type(gpu_double2), intent(inout) :: ptr + integer, intent(in) :: s1, s2 + + call gpu_allocate_c(ptr%c, s1*s2*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2 /)) + end subroutine + + subroutine gpu_allocate_double3(ptr, s1, s2, s3) + implicit none + type(gpu_double3), intent(inout) :: ptr + integer, intent(in) :: s1, s2, s3 + + call gpu_allocate_c(ptr%c, s1*s2*s3*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3 /)) + end subroutine + + subroutine gpu_allocate_double4(ptr, s1, s2, s3, s4) + implicit none + type(gpu_double4), intent(inout) :: ptr + integer, intent(in) :: s1, s2, s3, s4 + + call gpu_allocate_c(ptr%c, s1*s2*s3*s4*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4 /)) + end subroutine + + subroutine gpu_allocate_double5(ptr, s1, s2, s3, s4, s5) + implicit none + type(gpu_double5), intent(inout) :: ptr + integer, intent(in) :: s1, s2, s3, s4, s5 + + call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5 /)) + end subroutine + + subroutine gpu_allocate_double6(ptr, s1, s2, s3, s4, s5, s6) + implicit none + type(gpu_double6), intent(inout) :: ptr + integer, intent(in) :: s1, s2, s3, s4, s5, s6 + + call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*s6*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5, s6 /)) + end subroutine + + +! gpu_deallocate +! -------------- + + subroutine gpu_deallocate_double1(ptr) + implicit none + type(gpu_double1), intent(inout) :: ptr + call gpu_deallocate_c(ptr%c) + NULLIFY(ptr%f) + end subroutine + + subroutine gpu_deallocate_double2(ptr) + implicit none + type(gpu_double2), intent(inout) :: ptr + call gpu_deallocate_c(ptr%c) + NULLIFY(ptr%f) + end subroutine + + subroutine gpu_deallocate_double3(ptr) + implicit none + type(gpu_double3), intent(inout) :: ptr + call gpu_deallocate_c(ptr%c) + NULLIFY(ptr%f) + end subroutine + + subroutine gpu_deallocate_double4(ptr) + implicit none + type(gpu_double4), intent(inout) :: ptr + call gpu_deallocate_c(ptr%c) + NULLIFY(ptr%f) + end subroutine + + subroutine gpu_deallocate_double5(ptr) + implicit none + type(gpu_double5), intent(inout) :: ptr + call gpu_deallocate_c(ptr%c) + NULLIFY(ptr%f) + end subroutine + + subroutine gpu_deallocate_double6(ptr) + implicit none + type(gpu_double6), intent(inout) :: ptr + call gpu_deallocate_c(ptr%c) + NULLIFY(ptr%f) + end subroutine + + +! gpu_upload +! ---------- + + subroutine gpu_upload_double1(cpu_ptr, gpu_ptr) + implicit none + double precision, intent(in) :: cpu_ptr(:) + type(gpu_double1), intent(in) :: gpu_ptr + call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, 8_8*size(gpu_ptr%f)) + end subroutine + + subroutine gpu_upload_double2(cpu_ptr, gpu_ptr) + implicit none + double precision, intent(in) :: cpu_ptr(:,:) + type(gpu_double2), intent(in) :: gpu_ptr + call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8) + end subroutine + + subroutine gpu_upload_double3(cpu_ptr, gpu_ptr) + implicit none + double precision, intent(in) :: cpu_ptr(:,:,:) + type(gpu_double3), intent(in) :: gpu_ptr + call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8) + end subroutine + + subroutine gpu_upload_double4(cpu_ptr, gpu_ptr) + implicit none + double precision, intent(in) :: cpu_ptr(:,:,:,:) + type(gpu_double4), intent(in) :: gpu_ptr + call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8) + end subroutine + + subroutine gpu_upload_double5(cpu_ptr, gpu_ptr) + implicit none + double precision, intent(in) :: cpu_ptr(:,:,:,:,:) + type(gpu_double5), intent(in) :: gpu_ptr + call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8) + end subroutine + + subroutine gpu_upload_double6(cpu_ptr, gpu_ptr) + implicit none + double precision, intent(in) :: cpu_ptr(:,:,:,:,:,:) + type(gpu_double6), intent(in) :: gpu_ptr + call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8) + end subroutine + + +! gpu_download +! ------------ + + subroutine gpu_download_double1(gpu_ptr, cpu_ptr) + implicit none + type(gpu_double1), intent(in) :: gpu_ptr + double precision, intent(in) :: cpu_ptr(:) + call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*size(gpu_ptr%f)) + end subroutine + + subroutine gpu_download_double2(gpu_ptr, cpu_ptr) + implicit none + type(gpu_double2), intent(in) :: gpu_ptr + double precision, intent(in) :: cpu_ptr(:,:) + call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8)) + end subroutine + + subroutine gpu_download_double3(gpu_ptr, cpu_ptr) + implicit none + type(gpu_double3), intent(in) :: gpu_ptr + double precision, intent(in) :: cpu_ptr(:,:,:) + call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8)) + end subroutine + + subroutine gpu_download_double4(gpu_ptr, cpu_ptr) + implicit none + type(gpu_double4), intent(in) :: gpu_ptr + double precision, intent(in) :: cpu_ptr(:,:,:,:) + call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8)) + end subroutine + + subroutine gpu_download_double5(gpu_ptr, cpu_ptr) + implicit none + type(gpu_double5), intent(in) :: gpu_ptr + double precision, intent(in) :: cpu_ptr(:,:,:,:,:) + call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8)) + end subroutine + + subroutine gpu_download_double6(gpu_ptr, cpu_ptr) + implicit none + type(gpu_double6), intent(in) :: gpu_ptr + double precision, intent(in) :: cpu_ptr(:,:,:,:,:,:) + call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8)) + end subroutine + +! gpu_copy +! -------- + + subroutine gpu_copy_double1(gpu_ptr_src, gpu_ptr_dest) + implicit none + type(gpu_double1), intent(in) :: gpu_ptr_src + type(gpu_double1), intent(in) :: gpu_ptr_dest + call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*size(gpu_ptr_dest%f)) + end subroutine + + subroutine gpu_copy_double2(gpu_ptr_src, gpu_ptr_dest) + implicit none + type(gpu_double2), intent(in) :: gpu_ptr_src + type(gpu_double2), intent(in) :: gpu_ptr_dest + call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8)) + end subroutine + + subroutine gpu_copy_double3(gpu_ptr_src, gpu_ptr_dest) + implicit none + type(gpu_double3), intent(in) :: gpu_ptr_src + type(gpu_double3), intent(in) :: gpu_ptr_dest + call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8)) + end subroutine + + subroutine gpu_copy_double4(gpu_ptr_src, gpu_ptr_dest) + implicit none + type(gpu_double4), intent(in) :: gpu_ptr_src + type(gpu_double4), intent(in) :: gpu_ptr_dest + call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8)) + end subroutine + + subroutine gpu_copy_double5(gpu_ptr_src, gpu_ptr_dest) + implicit none + type(gpu_double5), intent(in) :: gpu_ptr_src + type(gpu_double5), intent(in) :: gpu_ptr_dest + call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8)) + end subroutine + + subroutine gpu_copy_double6(gpu_ptr_src, gpu_ptr_dest) + implicit none + type(gpu_double6), intent(in) :: gpu_ptr_src + type(gpu_double6), intent(in) :: gpu_ptr_dest + call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8)) + end subroutine + + +! gpu_stream +! ---------- + + subroutine gpu_stream_create(stream) + import + type(gpu_stream) :: stream + call gpu_stream_create_c(stream%c) + end subroutine + + subroutine gpu_stream_destroy(stream) + import + type(gpu_stream) :: stream + call gpu_stream_destroy_c(stream%c) + end subroutine + + subroutine gpu_set_stream(handle, stream) + import + type(gpu_blas) :: handle + type(gpu_stream) :: stream + call gpu_set_stream_c(handle%c, stream%c) + end subroutine + + +! gpu_blas +! -------- + + subroutine gpu_blas_create(handle) + import + type(gpu_blas) :: handle + call gpu_blas_create_c(handle%c) + end subroutine + + subroutine gpu_blas_destroy(handle) + import + type(gpu_blas) :: handle + call gpu_blas_destroy_c(handle%c) end subroutine end module -subroutine gpu_upload_double(cpu_ptr, gpu_ptr, n) - use gpu - implicit none - double precision, intent(in) :: cpu_ptr(*) - double precision, intent(in) :: gpu_ptr(*) - integer(c_int64_t), intent(in) :: n - call gpu_upload_c(c_loc(cpu_ptr), c_loc(gpu_ptr), 8_8*n) -end subroutine - -subroutine gpu_download_double(gpu_ptr, cpu_ptr, n) - use gpu - implicit none - double precision, intent(in) :: gpu_ptr(*) - double precision, intent(in) :: cpu_ptr(*) - integer(c_int64_t), intent(in) :: n - call gpu_download_c(c_loc(gpu_ptr), c_loc(cpu_ptr), 8_8*n) -end subroutine - -subroutine gpu_copy_double(gpu_ptr_src, gpu_ptr_dest, n) - use gpu - implicit none - double precision, intent(in) :: gpu_ptr_src(*) - double precision, intent(in) :: gpu_ptr_dest(*) - integer(c_int64_t), intent(in) :: n - call gpu_copy_c(c_loc(gpu_ptr_src), c_loc(gpu_ptr_dest), 8_8*n) -end subroutine - diff --git a/src/gpu_x86/gpu.c b/src/gpu_x86/gpu.c index 41ede396..5f42cb0d 100644 --- a/src/gpu_x86/gpu.c +++ b/src/gpu_x86/gpu.c @@ -251,7 +251,7 @@ void gpu_dgeam(const void* handle, const char transa, const char transb, const i if (alpha == 0.) { for (int64_t j=0 ; j Date: Fri, 28 Jun 2024 11:00:58 +0200 Subject: [PATCH 097/131] Added Nvidia module --- src/ccsd/ccsd_space_orb_sub.irp.f | 10 +- src/gpu/gpu_module.F90 | 6 +- src/gpu_nvidia/LIB | 1 + src/gpu_nvidia/NEED | 1 + src/gpu_nvidia/README.rst | 5 + src/gpu_nvidia/gpu.c | 327 ++++++++++++++++++++++++++++++ src/gpu_x86/gpu.c | 40 ++-- 7 files changed, 359 insertions(+), 31 deletions(-) create mode 100644 src/gpu_nvidia/LIB create mode 100644 src/gpu_nvidia/NEED create mode 100644 src/gpu_nvidia/README.rst create mode 100644 src/gpu_nvidia/gpu.c diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 455d62f7..e7c9b1ab 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -384,17 +384,17 @@ subroutine update_tau_space(nO,nV,h_t1,t1,t2,tau) !$OMP SHARED(nO,nV,tau,t2,t1,h_t1,stream,blas) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - do j=1,nO - !$OMP DO - do b=1,nV - call gpu_set_stream(blas,stream(b)) + !$OMP DO + do b=1,nV + call gpu_set_stream(blas,stream(b)) + do j=1,nO call gpu_dgeam_c(blas%c, 'N', 'N', nO*1_8, nV*1_8, & 1.d0, c_loc(t2%f(1,j,1,b)), nO*nO*1_8, & h_t1(j,b), t1%c, nO*1_8, & c_loc(tau%f(1,j,1,b)), nO*nO*1_8) enddo - !$OMP END DO enddo + !$OMP END DO !$OMP END PARALLEL call gpu_synchronize() diff --git a/src/gpu/gpu_module.F90 b/src/gpu/gpu_module.F90 index 51f80ac0..d1ddad4c 100644 --- a/src/gpu/gpu_module.F90 +++ b/src/gpu/gpu_module.F90 @@ -120,7 +120,7 @@ module gpu subroutine gpu_ddot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_ddot') import - type(c_ptr), intent(in), value :: handle + type(c_ptr), intent(in) :: handle integer(c_int64_t), value :: n, incx, incy type(c_ptr), intent(in), value :: dx, dy real(c_double), intent(out) :: res @@ -128,7 +128,7 @@ module gpu subroutine gpu_sdot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_sdot') import - type(c_ptr), intent(in), value :: handle + type(c_ptr), intent(in) :: handle integer(c_int64_t), value :: n, incx, incy type(c_ptr), intent(in), value :: dx, dy real(c_float), intent(out) :: res @@ -137,7 +137,7 @@ module gpu subroutine gpu_dgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, & b, ldb, c, ldc) bind(C, name='gpu_dgeam') import - type(c_ptr), intent(in), value :: handle + type(c_ptr), intent(in) :: handle character(c_char), intent(in), value :: transa, transb integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc real(c_double), intent(in), value :: alpha, beta diff --git a/src/gpu_nvidia/LIB b/src/gpu_nvidia/LIB new file mode 100644 index 00000000..91f54e91 --- /dev/null +++ b/src/gpu_nvidia/LIB @@ -0,0 +1 @@ +-lcudart -lcublas -lcublasLt diff --git a/src/gpu_nvidia/NEED b/src/gpu_nvidia/NEED new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/src/gpu_nvidia/NEED @@ -0,0 +1 @@ + diff --git a/src/gpu_nvidia/README.rst b/src/gpu_nvidia/README.rst new file mode 100644 index 00000000..5dcfca92 --- /dev/null +++ b/src/gpu_nvidia/README.rst @@ -0,0 +1,5 @@ +========== +gpu_nvidia +========== + +Nvidia implementation of GPU routines. Uses CUDA and CUBLAS libraries. diff --git a/src/gpu_nvidia/gpu.c b/src/gpu_nvidia/gpu.c new file mode 100644 index 00000000..f0bd247a --- /dev/null +++ b/src/gpu_nvidia/gpu.c @@ -0,0 +1,327 @@ +#include +#include +#include +#include +#include + +#include +#include + + +/* Generic functions */ + +int gpu_ndevices() { + int ngpus; + cudaGetDeviceCount(&ngpus); + return ngpus; +} + +void gpu_set_device(int32_t igpu) { + cudaSetDevice(igpu); +} + + +/* Allocation functions */ + +void gpu_allocate(void** ptr, const int64_t size) { + size_t free, total; + cudaError_t rc = cudaMemGetInfo( &free, &total ); + if (rc != cudaSuccess) { + free = INT64_MAX; + } + + /* Use managed memory if it does not fit on the GPU */ + if (size < free && size < total/2) { +// rc= cudaMalloc(ptr, size); + rc = cudaMallocManaged(ptr, size, cudaMemAttachGlobal); + } else { + rc = cudaMallocManaged(ptr, size, cudaMemAttachGlobal); + } + assert (rc == cudaSuccess); +} + +void gpu_deallocate(void** ptr) { + assert (*ptr != NULL); + cudaFree(*ptr); + *ptr = NULL; +} + + +/* Memory transfer functions */ + +void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) { + cudaMemcpy (gpu_ptr, cpu_ptr, n, cudaMemcpyHostToDevice); +} + +void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) { + cudaMemcpy (cpu_ptr, gpu_ptr, n, cudaMemcpyDeviceToHost); +} + +void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) { + cudaMemcpy (gpu_ptr_dest, gpu_ptr_src, n, cudaMemcpyDeviceToDevice); +} + + +/* Streams */ + +void gpu_stream_create(void** ptr) { + cudaStream_t stream; + cudaError_t rc = cudaStreamCreate(&stream); + assert (rc == cudaSuccess); + *ptr = (void*) stream; +} + +void gpu_stream_destroy(void** ptr) { + assert (*ptr != NULL); + cudaError_t rc = cudaStreamDestroy( (cudaStream_t) *ptr); + assert (rc == cudaSuccess); + *ptr = NULL; +} + +void gpu_set_stream(void** handle, void** stream) { + cublasSetStream( (cublasHandle_t) *handle, (cudaStream_t) *stream); +} + +void gpu_synchronize() { + cudaDeviceSynchronize(); +} + + +/* BLAS functions */ + +void gpu_blas_create(void** handle) { + cublasHandle_t cublas_handle; + cublasStatus_t rc = cublasCreate(&cublas_handle); + assert (rc == CUBLAS_STATUS_SUCCESS); + *handle = (void*) cublas_handle; +} + + +void gpu_blas_destroy(void** handle) { + assert (*handle != NULL); + cublasStatus_t rc = cublasDestroy( (cublasHandle_t) *handle); + assert (rc == CUBLAS_STATUS_SUCCESS); + *handle = NULL; +} + + +void gpu_ddot(void** handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) { + assert (*handle != NULL); + + /* Convert to int32_t */ + int32_t n_, incx_, incy_; + + n_ = (int32_t) n; + incx_ = (int32_t) incx; + incy_ = (int32_t) incy; + + /* Check for integer overflows */ + assert ( (int64_t) n_ == n ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + cublasDdot((cublasHandle_t) *handle, n_, x, incx_, y, incy_, result); +} + + + +void gpu_sdot(void** handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) { + assert (*handle != NULL); + + /* Convert to int32_t */ + int32_t n_, incx_, incy_; + + n_ = (int32_t) n; + incx_ = (int32_t) incx; + incy_ = (int32_t) incy; + + /* Check for integer overflows */ + assert ( (int64_t) n_ == n ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + cublasSdot((cublasHandle_t) *handle, n_, x, incx_, y, incy_, result); +} + + + +void gpu_dgemv(void** handle, const char transa, const int64_t m, const int64_t n, const double alpha, + const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) { + + assert (*handle != NULL); + + /* Convert to int32_t */ + int32_t m_, n_, lda_, incx_, incy_; + + m_ = (int32_t) m; + n_ = (int32_t) n; + lda_ = (int32_t) lda; + incx_ = (int32_t) incx; + incy_ = (int32_t) incy; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) lda_ == lda ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + cublasOperation_t transa_ = CUBLAS_OP_N; + if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; + + cublasDgemv((cublasHandle_t) *handle, transa_, m_, n_, &alpha, a, lda_, x, incx_, &beta, y, incy_); +} + + + +void gpu_sgemv(void** handle, const char transa, const int64_t m, const int64_t n, const float alpha, + const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) { + + assert (*handle != NULL); + + /* Convert to int32_t */ + int32_t m_, n_, lda_, incx_, incy_; + + m_ = (int32_t) m; + n_ = (int32_t) n; + lda_ = (int32_t) lda; + incx_ = (int32_t) incx; + incy_ = (int32_t) incy; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) lda_ == lda ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + cublasOperation_t transa_ = CUBLAS_OP_N; + if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; + + cublasSgemv((cublasHandle_t) *handle, transa_, m_, n_, &alpha, a, lda_, x, incx_, &beta, y, incy_); +} + + +void gpu_dgemm(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, + const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) { + + assert (*handle != NULL); + + /* Convert to int32_t */ + int32_t m_, n_, k_, lda_, ldb_, ldc_; + + m_ = (int32_t) m; + n_ = (int32_t) n; + k_ = (int32_t) k; + lda_ = (int32_t) lda; + ldb_ = (int32_t) ldb; + ldc_ = (int32_t) ldc; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) k_ == k ); + assert ( (int64_t) lda_ == lda); + assert ( (int64_t) ldb_ == ldb); + assert ( (int64_t) ldc_ == ldc); + + cublasOperation_t transa_ = CUBLAS_OP_N; + cublasOperation_t transb_ = CUBLAS_OP_N; + if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; + if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T; + + cublasDgemm((cublasHandle_t) *handle, transa_, transb_, m_, n_, k_, &alpha, a, lda_, b, ldb_, &beta, c, ldc_); +} + + + +void gpu_sgemm(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, + const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) { + + assert (*handle != NULL); + + /* Convert to int32_t */ + int32_t m_, n_, k_, lda_, ldb_, ldc_; + + m_ = (int32_t) m; + n_ = (int32_t) n; + k_ = (int32_t) k; + lda_ = (int32_t) lda; + ldb_ = (int32_t) ldb; + ldc_ = (int32_t) ldc; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) k_ == k ); + assert ( (int64_t) lda_ == lda); + assert ( (int64_t) ldb_ == ldb); + assert ( (int64_t) ldc_ == ldc); + + cublasOperation_t transa_ = CUBLAS_OP_N; + cublasOperation_t transb_ = CUBLAS_OP_N; + if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; + if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T; + + cublasSgemm((cublasHandle_t) *handle, transa_, transb_, m_, n_, k_, &alpha, a, lda_, b, ldb_, &beta, c, ldc_); +} + + +void gpu_dgeam(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha, + const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { + assert (*handle != NULL); + + /* Convert to int32_t */ + int32_t m_, n_, lda_, ldb_, ldc_; + + m_ = (int32_t) m; + n_ = (int32_t) n; + lda_ = (int32_t) lda; + ldb_ = (int32_t) ldb; + ldc_ = (int32_t) ldc; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) lda_ == lda); + assert ( (int64_t) ldb_ == ldb); + assert ( (int64_t) ldc_ == ldc); + + cublasOperation_t transa_ = CUBLAS_OP_N; + cublasOperation_t transb_ = CUBLAS_OP_N; + if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; + if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T; + + cublasDgeam((cublasHandle_t) *handle, transa_, transb_, m_, n_, &alpha, a, lda_, &beta, b, ldb_, c, ldc_); + +} + + +void gpu_sgeam(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha, + const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) { + assert (*handle != NULL); + + /* Convert to int32_t */ + int32_t m_, n_, lda_, ldb_, ldc_; + + m_ = (int32_t) m; + n_ = (int32_t) n; + lda_ = (int32_t) lda; + ldb_ = (int32_t) ldb; + ldc_ = (int32_t) ldc; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) lda_ == lda); + assert ( (int64_t) ldb_ == ldb); + assert ( (int64_t) ldc_ == ldc); + + cublasOperation_t transa_ = CUBLAS_OP_N; + cublasOperation_t transb_ = CUBLAS_OP_N; + if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; + if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T; + + cublasSgeam((cublasHandle_t) *handle, transa_, transb_, m_, n_, &alpha, a, lda_, &beta, b, ldb_, c, ldc_); + +} diff --git a/src/gpu_x86/gpu.c b/src/gpu_x86/gpu.c index 5f42cb0d..ac7c3620 100644 --- a/src/gpu_x86/gpu.c +++ b/src/gpu_x86/gpu.c @@ -56,7 +56,7 @@ void gpu_stream_destroy(void** ptr) { *ptr = NULL; } -void gpu_set_stream(void* handle, void* stream) { +void gpu_set_stream(void** handle, void** stream) { return; } @@ -79,8 +79,8 @@ void gpu_blas_destroy(void** handle) { double ddot_(const int32_t* n, const double* x, const int32_t* incx, const double* y, const int32_t* incy); -void gpu_ddot(const void* handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) { - assert (handle != NULL); +void gpu_ddot(void** handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) { + assert (*handle != NULL); /* Convert to int32_t */ int32_t n_, incx_, incy_; @@ -100,8 +100,8 @@ void gpu_ddot(const void* handle, const int64_t n, const double* x, const int64_ float sdot_(const int32_t* n, const float* x, const int32_t* incx, const float* y, const int32_t* incy); -void gpu_sdot(const void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) { - assert (handle != NULL); +void gpu_sdot(void** handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) { + assert (*handle != NULL); /* Convert to int32_t */ int32_t n_, incx_, incy_; @@ -122,10 +122,10 @@ void gpu_sdot(const void* handle, const int64_t n, const float* x, const int64_t void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha, const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy); -void gpu_dgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const double alpha, +void gpu_dgemv(void** handle, const char transa, const int64_t m, const int64_t n, const double alpha, const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) { - assert (handle != NULL); + assert (*handle != NULL); /* Convert to int32_t */ int32_t m_, n_, lda_, incx_, incy_; @@ -150,10 +150,10 @@ void gpu_dgemv(const void* handle, const char transa, const int64_t m, const int void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha, const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy); -void gpu_sgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const float alpha, +void gpu_sgemv(void** handle, const char transa, const int64_t m, const int64_t n, const float alpha, const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) { - assert (handle != NULL); + assert (*handle != NULL); /* Convert to int32_t */ int32_t m_, n_, lda_, incx_, incy_; @@ -178,10 +178,10 @@ void gpu_sgemv(const void* handle, const char transa, const int64_t m, const int void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha, const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc); -void gpu_dgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, +void gpu_dgemm(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) { - assert (handle != NULL); + assert (*handle != NULL); /* Convert to int32_t */ int32_t m_, n_, k_, lda_, ldb_, ldc_; @@ -209,10 +209,10 @@ void gpu_dgemm(const void* handle, const char transa, const char transb, const i void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha, const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc); -void gpu_sgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, +void gpu_sgemm(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) { - assert (handle != NULL); + assert (*handle != NULL); /* Convert to int32_t */ int32_t m_, n_, k_, lda_, ldb_, ldc_; @@ -236,12 +236,9 @@ void gpu_sgemm(const void* handle, const char transa, const char transb, const i } -void gpu_dgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha, +void gpu_dgeam(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha, const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { - if (handle == NULL) { - perror("NULL handle"); - exit(-1); - } + assert (*handle != NULL); if ( (transa == 'N' && transb == 'N') || (transa == 'n' && transb == 'N') || @@ -371,12 +368,9 @@ void gpu_dgeam(const void* handle, const char transa, const char transb, const i } -void gpu_sgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha, +void gpu_sgeam(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha, const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) { - if (handle == NULL) { - perror("NULL handle"); - exit(-1); - } + assert (*handle != NULL); if ( (transa == 'N' && transb == 'N') || (transa == 'n' && transb == 'N') || From d3d89022bc8092ab0c6131904f85475f160dfa53 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 28 Jun 2024 16:50:52 +0200 Subject: [PATCH 098/131] Move GPU modules to plugins --- configure | 6 +++--- {src => plugins/local}/gpu_nvidia/LIB | 0 {src => plugins/local}/gpu_nvidia/NEED | 0 {src => plugins/local}/gpu_nvidia/README.rst | 0 {src => plugins/local}/gpu_nvidia/gpu.c | 0 {src => plugins/local}/gpu_x86/NEED | 0 {src => plugins/local}/gpu_x86/README.rst | 0 {src => plugins/local}/gpu_x86/gpu.c | 0 8 files changed, 3 insertions(+), 3 deletions(-) rename {src => plugins/local}/gpu_nvidia/LIB (100%) rename {src => plugins/local}/gpu_nvidia/NEED (100%) rename {src => plugins/local}/gpu_nvidia/README.rst (100%) rename {src => plugins/local}/gpu_nvidia/gpu.c (100%) rename {src => plugins/local}/gpu_x86/NEED (100%) rename {src => plugins/local}/gpu_x86/README.rst (100%) rename {src => plugins/local}/gpu_x86/gpu.c (100%) diff --git a/configure b/configure index db158966..08dac444 100755 --- a/configure +++ b/configure @@ -119,15 +119,15 @@ rm -f ${QP_ROOT}/src/gpu_arch case "$GPU" in amd) # Nvidia echo "Activating AMD GPU acceleration" - ln -s ${QP_ROOT}/src/gpu_amd ${QP_ROOT}/src/gpu_arch + ln -s ${QP_ROOT}/plugins/local/gpu_amd ${QP_ROOT}/src/gpu_arch ;; nvidia) # Nvidia echo "Activating Nvidia GPU acceleration" - ln -s ${QP_ROOT}/src/gpu_nvidia ${QP_ROOT}/src/gpu_arch + ln -s ${QP_ROOT}/plugins/local/gpu_nvidia ${QP_ROOT}/src/gpu_arch ;; *) # No Acceleration echo "Disabling GPU acceleration" - ln -s ${QP_ROOT}/src/gpu_x86 ${QP_ROOT}/src/gpu_arch + ln -s ${QP_ROOT}/plugins/local/gpu_x86 ${QP_ROOT}/src/gpu_arch ;; esac diff --git a/src/gpu_nvidia/LIB b/plugins/local/gpu_nvidia/LIB similarity index 100% rename from src/gpu_nvidia/LIB rename to plugins/local/gpu_nvidia/LIB diff --git a/src/gpu_nvidia/NEED b/plugins/local/gpu_nvidia/NEED similarity index 100% rename from src/gpu_nvidia/NEED rename to plugins/local/gpu_nvidia/NEED diff --git a/src/gpu_nvidia/README.rst b/plugins/local/gpu_nvidia/README.rst similarity index 100% rename from src/gpu_nvidia/README.rst rename to plugins/local/gpu_nvidia/README.rst diff --git a/src/gpu_nvidia/gpu.c b/plugins/local/gpu_nvidia/gpu.c similarity index 100% rename from src/gpu_nvidia/gpu.c rename to plugins/local/gpu_nvidia/gpu.c diff --git a/src/gpu_x86/NEED b/plugins/local/gpu_x86/NEED similarity index 100% rename from src/gpu_x86/NEED rename to plugins/local/gpu_x86/NEED diff --git a/src/gpu_x86/README.rst b/plugins/local/gpu_x86/README.rst similarity index 100% rename from src/gpu_x86/README.rst rename to plugins/local/gpu_x86/README.rst diff --git a/src/gpu_x86/gpu.c b/plugins/local/gpu_x86/gpu.c similarity index 100% rename from src/gpu_x86/gpu.c rename to plugins/local/gpu_x86/gpu.c From 85b1035cfba778559e629045961cb542631841bd Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 28 Jun 2024 17:33:08 +0200 Subject: [PATCH 099/131] Working on CCSD --- src/ccsd/ccsd_space_orb_sub.irp.f | 117 +++++++++++------------------- src/gpu/gpu_module.F90 | 62 ++++++++++++++++ 2 files changed, 103 insertions(+), 76 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index e7c9b1ab..1329f172 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -14,6 +14,9 @@ subroutine run_ccsd_space_orb type(gpu_double2) :: t1, r1 type(gpu_double2) :: H_oo, H_vv, H_vo + type(gpu_double2) :: d_cc_space_f_vo + type(gpu_double4) :: d_cc_space_v_oovv + double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) integer(bit_kind) :: det(N_int,2) @@ -52,6 +55,12 @@ subroutine run_ccsd_space_orb !print*,'occ',list_occ !print*,'vir',list_vir + call gpu_allocate(d_cc_space_f_vo, nV, nO) + call gpu_allocate(d_cc_space_v_oovv, nO, nO, nV, nV) + call gpu_upload(cc_space_f_vo, d_cc_space_f_vo) + call gpu_upload(cc_space_v_oovv, d_cc_space_v_oovv) + + call gpu_allocate(t2, nO,nO,nV,nV) call gpu_allocate(r2, nO,nO,nV,nV) call gpu_allocate(tau, nO,nO,nV,nV) @@ -116,7 +125,8 @@ subroutine run_ccsd_space_orb !print*,'hf_energy', hf_energy call det_energy(det,uncorr_energy) print*,'Det energy', uncorr_energy - call ccsd_energy_space_x(nO,nV,tau_x%f,t1%f,energy) + + call ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,energy) print*,'Guess energy', uncorr_energy+energy, energy nb_iter = 0 @@ -166,7 +176,7 @@ subroutine run_ccsd_space_orb call update_tau_x_space(nO,nV,tau,tau_x) ! Energy - call ccsd_energy_space_x(nO,nV,tau_x%f,t1%f,energy) + call ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,energy) write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' nb_iter = nb_iter + 1 @@ -239,6 +249,8 @@ subroutine run_ccsd_space_orb call save_energy(uncorr_energy + energy, e_t) deallocate(h_t1, h_t2) + call gpu_deallocate(d_cc_space_f_vo) + call gpu_deallocate(d_cc_space_v_oovv) call gpu_deallocate(t1) call gpu_deallocate(t2) @@ -246,59 +258,14 @@ end ! Energy -subroutine ccsd_energy_space(nO,nV,tau,t1,energy) - +subroutine ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,energy) + use gpu implicit none - integer, intent(in) :: nO, nV - double precision, intent(in) :: tau(nO,nO,nV,nV) - double precision, intent(in) :: t1(nO,nV) - double precision, intent(out) :: energy - - ! internal - integer :: i,j,a,b - double precision :: e - - energy = 0d0 - !$omp parallel & - !$omp shared(nO,nV,energy,tau,t1,& - !$omp cc_space_f_vo,cc_space_w_oovv) & - !$omp private(i,j,a,b,e) & - !$omp default(none) - e = 0d0 - !$omp do - do a = 1, nV - do i = 1, nO - e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) - enddo - enddo - !$omp end do nowait - !$omp do - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b) - enddo - enddo - enddo - enddo - !$omp end do nowait - !$omp critical - energy = energy + e - !$omp end critical - !$omp end parallel - -end - -subroutine ccsd_energy_space_x(nO,nV,tau_x,t1,energy) - - implicit none - - integer, intent(in) :: nO, nV - double precision, intent(in) :: tau_x(nO,nO,nV,nV) - double precision, intent(in) :: t1(nO,nV) - double precision, intent(out) :: energy + integer, intent(in) :: nO, nV + type(gpu_double4), intent(in) :: tau_x, d_cc_space_v_oovv + type(gpu_double2), intent(in) :: t1, d_cc_space_f_vo + double precision, intent(out) :: energy ! internal integer :: i,j,a,b @@ -307,14 +274,14 @@ subroutine ccsd_energy_space_x(nO,nV,tau_x,t1,energy) energy = 0d0 !$omp parallel & !$omp shared(nO,nV,energy,tau_x,t1,& - !$omp cc_space_f_vo,cc_space_v_oovv) & + !$omp d_cc_space_f_vo,d_cc_space_v_oovv) & !$omp private(i,j,a,b,e) & !$omp default(none) e = 0d0 !$omp do do a = 1, nV do i = 1, nO - e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) + e = e + 2d0 * d_cc_space_f_vo%f(a,i) * t1%f(i,a) enddo enddo !$omp end do nowait @@ -323,7 +290,7 @@ subroutine ccsd_energy_space_x(nO,nV,tau_x,t1,energy) do a = 1, nV do j = 1, nO do i = 1, nO - e = e + tau_x(i,j,a,b) * cc_space_v_oovv(i,j,a,b) + e = e + tau_x%f(i,j,a,b) * d_cc_space_v_oovv%f(i,j,a,b) enddo enddo enddo @@ -333,6 +300,12 @@ subroutine ccsd_energy_space_x(nO,nV,tau_x,t1,energy) energy = energy + e !$omp end critical !$omp end parallel +! +! +! call gpu_ddot(blas_handle, nO*nO*nV*nV*1_8, tau_x, 1, d_cc_space_v_oovv, 1, energy) +! call gpu_ddot(blas_handle, nO*nV*1_8, d_cc_space_f_vo, 1, t1, 1, e) +! energy = energy + 2.d0*e + end @@ -372,26 +345,24 @@ subroutine update_tau_space(nO,nV,h_t1,t1,t2,tau) ! !$OMP END PARALLEL - type(gpu_blas) :: blas type(gpu_stream) :: stream(nV) - call gpu_blas_create(blas) do b=1,nV call gpu_stream_create(stream(b)) enddo !$OMP PARALLEL & - !$OMP SHARED(nO,nV,tau,t2,t1,h_t1,stream,blas) & + !$OMP SHARED(nO,nV,tau,t2,t1,h_t1,stream,blas_handle) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) !$OMP DO do b=1,nV - call gpu_set_stream(blas,stream(b)) + call gpu_set_stream(blas_handle,stream(b)) do j=1,nO - call gpu_dgeam_c(blas%c, 'N', 'N', nO*1_8, nV*1_8, & - 1.d0, c_loc(t2%f(1,j,1,b)), nO*nO*1_8, & - h_t1(j,b), t1%c, nO*1_8, & - c_loc(tau%f(1,j,1,b)), nO*nO*1_8) + call gpu_dgeam(blas_handle, 'N', 'N', nO*1_8, nV*1_8, & + 1.d0, t2%f(1,j,1,b), nO*nO*1_8, & + h_t1(j,b), t1%f, nO*1_8, & + tau%f(1,j,1,b), nO*nO*1_8) enddo enddo !$OMP END DO @@ -403,8 +374,6 @@ subroutine update_tau_space(nO,nV,h_t1,t1,t2,tau) call gpu_stream_destroy(stream(b)) enddo - call gpu_blas_destroy(blas) - end subroutine update_tau_x_space(nO,nV,tau,tau_x) @@ -438,26 +407,24 @@ subroutine update_tau_x_space(nO,nV,tau,tau_x) ! !$OMP END DO ! !$OMP END PARALLEL - type(gpu_blas) :: blas type(gpu_stream) :: stream(nV) - call gpu_blas_create(blas) do a=1,nV call gpu_stream_create(stream(a)) enddo !$OMP PARALLEL & - !$OMP SHARED(nO,nV,tau,tau_x,stream,blas) & + !$OMP SHARED(nO,nV,tau,tau_x,stream,blas_handle) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) !$OMP DO do b=1,nV do a=1,nV - call gpu_set_stream(blas,stream(a)) - call gpu_dgeam_c(blas%c, 'N', 'N', nO*1_8, nO*1_8, & - 2.d0, c_loc(tau%f(1,1,a,b)), nO*1_8, & - -1.d0, c_loc(tau%f(1,1,b,a)), nO*1_8, & - c_loc(tau_x%f(1,1,a,b)), nO*1_8) + call gpu_set_stream(blas_handle,stream(a)) + call gpu_dgeam(blas_handle, 'N', 'N', nO*1_8, nO*1_8, & + 2.d0, tau%f(1,1,a,b), nO*1_8, & + -1.d0, tau%f(1,1,b,a), nO*1_8, & + tau_x%f(1,1,a,b), nO*1_8) enddo enddo !$OMP END DO @@ -469,8 +436,6 @@ subroutine update_tau_x_space(nO,nV,tau,tau_x) call gpu_stream_destroy(stream(b)) enddo - call gpu_blas_destroy(blas) - end ! R1 diff --git a/src/gpu/gpu_module.F90 b/src/gpu/gpu_module.F90 index d1ddad4c..2057d1eb 100644 --- a/src/gpu/gpu_module.F90 +++ b/src/gpu/gpu_module.F90 @@ -144,6 +144,16 @@ module gpu type(c_ptr), value :: a, b, c end subroutine + subroutine gpu_sgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, & + b, ldb, c, ldc) bind(C, name='gpu_sgeam') + import + type(c_ptr), intent(in) :: handle + character(c_char), intent(in), value :: transa, transb + integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc + real(c_float), intent(in), value :: alpha, beta + type(c_ptr), value :: a, b, c + end subroutine + end interface @@ -478,5 +488,57 @@ module gpu call gpu_blas_destroy_c(handle%c) end subroutine + end module + + +! dot +! --- + +subroutine gpu_ddot(handle, n, dx, incx, dy, incy, res) + use gpu + type(gpu_blas), intent(in) :: handle + integer*8 :: n, incx, incy + double precision, intent(in) :: dx(*), dy(*) + double precision, intent(out) :: res + call gpu_ddot_c(handle%c, n, c_loc(dx), incx, c_loc(dy), incy, res) +end subroutine + +subroutine gpu_sdot(handle, n, dx, incx, dy, incy, res) + use gpu + type(gpu_blas), intent(in) :: handle + integer*8 :: n, incx, incy + real, intent(in) :: dx(*), dy(*) + real, intent(out) :: res + call gpu_sdot_c(handle%c, n, c_loc(dx), incx, c_loc(dy), incy, res) +end subroutine + + +! geam +! ---- + +subroutine gpu_dgeam(handle, transa, transb, m, n, alpha, a, lda, beta, & + b, ldb, c, ldc) + use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa, transb + integer*8, intent(in) :: m, n, lda, ldb, ldc + double precision, intent(in) :: alpha, beta + double precision :: a(lda,*), b(ldb,*), c(ldc,*) + call gpu_dgeam_c(handle%c, transa, transb, m, n, alpha, c_loc(a), lda, beta, & + c_loc(b), ldb, c_loc(c), ldc) +end subroutine + +subroutine gpu_sgeam(handle, transa, transb, m, n, alpha, a, lda, beta, & + b, ldb, c, ldc) + use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa, transb + integer*8, intent(in) :: m, n, lda, ldb, ldc + real, intent(in) :: alpha, beta + real :: a(lda,*), b(ldb,*), c(ldc,*) + call gpu_sgeam_c(handle%c, transa, transb, m, n, alpha, c_loc(a), lda, beta, & + c_loc(b), ldb, c_loc(c), ldc) +end subroutine + From a5f4f0516eec9f17438474529616368a6f9e5de4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 28 Jun 2024 17:39:43 +0200 Subject: [PATCH 100/131] Fixing compile --- src/ccsd/ccsd_space_orb_sub.irp.f | 1 - src/gpu/gpu_module.F90 | 23 +++++++++-------------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 1329f172..4e06e31d 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -306,7 +306,6 @@ subroutine ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1, ! call gpu_ddot(blas_handle, nO*nV*1_8, d_cc_space_f_vo, 1, t1, 1, e) ! energy = energy + 2.d0*e - end ! Tau diff --git a/src/gpu/gpu_module.F90 b/src/gpu/gpu_module.F90 index 2057d1eb..d7c26ba6 100644 --- a/src/gpu/gpu_module.F90 +++ b/src/gpu/gpu_module.F90 @@ -365,42 +365,42 @@ module gpu subroutine gpu_download_double1(gpu_ptr, cpu_ptr) implicit none type(gpu_double1), intent(in) :: gpu_ptr - double precision, intent(in) :: cpu_ptr(:) + double precision, target, intent(in) :: cpu_ptr(:) call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*size(gpu_ptr%f)) end subroutine subroutine gpu_download_double2(gpu_ptr, cpu_ptr) implicit none type(gpu_double2), intent(in) :: gpu_ptr - double precision, intent(in) :: cpu_ptr(:,:) + double precision, target, intent(in) :: cpu_ptr(:,:) call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8)) end subroutine subroutine gpu_download_double3(gpu_ptr, cpu_ptr) implicit none type(gpu_double3), intent(in) :: gpu_ptr - double precision, intent(in) :: cpu_ptr(:,:,:) + double precision, target, intent(in) :: cpu_ptr(:,:,:) call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8)) end subroutine subroutine gpu_download_double4(gpu_ptr, cpu_ptr) implicit none type(gpu_double4), intent(in) :: gpu_ptr - double precision, intent(in) :: cpu_ptr(:,:,:,:) + double precision, target, intent(in) :: cpu_ptr(:,:,:,:) call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8)) end subroutine subroutine gpu_download_double5(gpu_ptr, cpu_ptr) implicit none type(gpu_double5), intent(in) :: gpu_ptr - double precision, intent(in) :: cpu_ptr(:,:,:,:,:) + double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:) call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8)) end subroutine subroutine gpu_download_double6(gpu_ptr, cpu_ptr) implicit none type(gpu_double6), intent(in) :: gpu_ptr - double precision, intent(in) :: cpu_ptr(:,:,:,:,:,:) + double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:,:) call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8)) end subroutine @@ -454,19 +454,16 @@ module gpu ! ---------- subroutine gpu_stream_create(stream) - import type(gpu_stream) :: stream call gpu_stream_create_c(stream%c) end subroutine subroutine gpu_stream_destroy(stream) - import type(gpu_stream) :: stream call gpu_stream_destroy_c(stream%c) end subroutine subroutine gpu_set_stream(handle, stream) - import type(gpu_blas) :: handle type(gpu_stream) :: stream call gpu_set_stream_c(handle%c, stream%c) @@ -477,13 +474,11 @@ module gpu ! -------- subroutine gpu_blas_create(handle) - import type(gpu_blas) :: handle call gpu_blas_create_c(handle%c) end subroutine subroutine gpu_blas_destroy(handle) - import type(gpu_blas) :: handle call gpu_blas_destroy_c(handle%c) end subroutine @@ -500,7 +495,7 @@ subroutine gpu_ddot(handle, n, dx, incx, dy, incy, res) use gpu type(gpu_blas), intent(in) :: handle integer*8 :: n, incx, incy - double precision, intent(in) :: dx(*), dy(*) + double precision, target, intent(in) :: dx(*), dy(*) double precision, intent(out) :: res call gpu_ddot_c(handle%c, n, c_loc(dx), incx, c_loc(dy), incy, res) end subroutine @@ -525,7 +520,7 @@ subroutine gpu_dgeam(handle, transa, transb, m, n, alpha, a, lda, beta, & character, intent(in) :: transa, transb integer*8, intent(in) :: m, n, lda, ldb, ldc double precision, intent(in) :: alpha, beta - double precision :: a(lda,*), b(ldb,*), c(ldc,*) + double precision, target :: a(lda,*), b(ldb,*), c(ldc,*) call gpu_dgeam_c(handle%c, transa, transb, m, n, alpha, c_loc(a), lda, beta, & c_loc(b), ldb, c_loc(c), ldc) end subroutine @@ -537,7 +532,7 @@ subroutine gpu_sgeam(handle, transa, transb, m, n, alpha, a, lda, beta, & character, intent(in) :: transa, transb integer*8, intent(in) :: m, n, lda, ldb, ldc real, intent(in) :: alpha, beta - real :: a(lda,*), b(ldb,*), c(ldc,*) + real, target :: a(lda,*), b(ldb,*), c(ldc,*) call gpu_sgeam_c(handle%c, transa, transb, m, n, alpha, c_loc(a), lda, beta, & c_loc(b), ldb, c_loc(c), ldc) end subroutine From c7df9a72cc68a7f5dfded36aa94ac50d5188a5a1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 28 Jun 2024 21:32:04 +0200 Subject: [PATCH 101/131] Fixing again actions --- src/gpu/gpu_module.F90 | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/gpu/gpu_module.F90 b/src/gpu/gpu_module.F90 index d7c26ba6..ecf79c83 100644 --- a/src/gpu/gpu_module.F90 +++ b/src/gpu/gpu_module.F90 @@ -318,42 +318,42 @@ module gpu subroutine gpu_upload_double1(cpu_ptr, gpu_ptr) implicit none - double precision, intent(in) :: cpu_ptr(:) + double precision, target, intent(in) :: cpu_ptr(*) type(gpu_double1), intent(in) :: gpu_ptr call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, 8_8*size(gpu_ptr%f)) end subroutine subroutine gpu_upload_double2(cpu_ptr, gpu_ptr) implicit none - double precision, intent(in) :: cpu_ptr(:,:) + double precision, target, intent(in) :: cpu_ptr(:,:) type(gpu_double2), intent(in) :: gpu_ptr call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8) end subroutine subroutine gpu_upload_double3(cpu_ptr, gpu_ptr) implicit none - double precision, intent(in) :: cpu_ptr(:,:,:) + double precision, target, intent(in) :: cpu_ptr(:,:,:) type(gpu_double3), intent(in) :: gpu_ptr call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8) end subroutine subroutine gpu_upload_double4(cpu_ptr, gpu_ptr) implicit none - double precision, intent(in) :: cpu_ptr(:,:,:,:) + double precision, target, intent(in) :: cpu_ptr(:,:,:,:) type(gpu_double4), intent(in) :: gpu_ptr call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8) end subroutine subroutine gpu_upload_double5(cpu_ptr, gpu_ptr) implicit none - double precision, intent(in) :: cpu_ptr(:,:,:,:,:) + double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:) type(gpu_double5), intent(in) :: gpu_ptr call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8) end subroutine subroutine gpu_upload_double6(cpu_ptr, gpu_ptr) implicit none - double precision, intent(in) :: cpu_ptr(:,:,:,:,:,:) + double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:,:) type(gpu_double6), intent(in) :: gpu_ptr call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8) end subroutine @@ -504,7 +504,7 @@ subroutine gpu_sdot(handle, n, dx, incx, dy, incy, res) use gpu type(gpu_blas), intent(in) :: handle integer*8 :: n, incx, incy - real, intent(in) :: dx(*), dy(*) + real, target, intent(in) :: dx(*), dy(*) real, intent(out) :: res call gpu_sdot_c(handle%c, n, c_loc(dx), incx, c_loc(dy), incy, res) end subroutine From b467bef6dd1e14c5914cc6508aa898d5f1665e3a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 28 Jun 2024 21:37:14 +0200 Subject: [PATCH 102/131] Forgot file --- src/ccsd/ccsd_space_orb_sub.irp.f | 68 +++++++++++++++---------------- src/gpu/gpu.irp.f | 11 +++++ 2 files changed, 45 insertions(+), 34 deletions(-) create mode 100644 src/gpu/gpu.irp.f diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 4e06e31d..5c2daa05 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -271,40 +271,40 @@ subroutine ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1, integer :: i,j,a,b double precision :: e - energy = 0d0 - !$omp parallel & - !$omp shared(nO,nV,energy,tau_x,t1,& - !$omp d_cc_space_f_vo,d_cc_space_v_oovv) & - !$omp private(i,j,a,b,e) & - !$omp default(none) - e = 0d0 - !$omp do - do a = 1, nV - do i = 1, nO - e = e + 2d0 * d_cc_space_f_vo%f(a,i) * t1%f(i,a) - enddo - enddo - !$omp end do nowait - !$omp do - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - e = e + tau_x%f(i,j,a,b) * d_cc_space_v_oovv%f(i,j,a,b) - enddo - enddo - enddo - enddo - !$omp end do nowait - !$omp critical - energy = energy + e - !$omp end critical - !$omp end parallel -! -! -! call gpu_ddot(blas_handle, nO*nO*nV*nV*1_8, tau_x, 1, d_cc_space_v_oovv, 1, energy) -! call gpu_ddot(blas_handle, nO*nV*1_8, d_cc_space_f_vo, 1, t1, 1, e) -! energy = energy + 2.d0*e +! energy = 0d0 +! !$omp parallel & +! !$omp shared(nO,nV,energy,tau_x,t1,& +! !$omp d_cc_space_f_vo,d_cc_space_v_oovv) & +! !$omp private(i,j,a,b,e) & +! !$omp default(none) +! e = 0d0 +! !$omp do +! do a = 1, nV +! do i = 1, nO +! e = e + 2d0 * d_cc_space_f_vo%f(a,i) * t1%f(i,a) +! enddo +! enddo +! !$omp end do nowait +! !$omp do +! do b = 1, nV +! do a = 1, nV +! do j = 1, nO +! do i = 1, nO +! e = e + tau_x%f(i,j,a,b) * d_cc_space_v_oovv%f(i,j,a,b) +! enddo +! enddo +! enddo +! enddo +! !$omp end do nowait +! !$omp critical +! energy = energy + e +! !$omp end critical +! !$omp end parallel + + + call gpu_ddot(blas_handle, nO*nO*nV*nV*1_8, tau_x, 1, d_cc_space_v_oovv, 1, energy) + call gpu_ddot(blas_handle, nO*nV*1_8, d_cc_space_f_vo, 1, t1, 1, e) + energy = energy + 2.d0*e end diff --git a/src/gpu/gpu.irp.f b/src/gpu/gpu.irp.f new file mode 100644 index 00000000..e91d66f5 --- /dev/null +++ b/src/gpu/gpu.irp.f @@ -0,0 +1,11 @@ +use gpu + +BEGIN_PROVIDER [ type(gpu_blas), blas_handle ] + implicit none + BEGIN_DOC + ! Handle for cuBLAS or RocBLAS + END_DOC + call gpu_blas_create(blas_handle) +END_PROVIDER + + From 860121d404f7ae255790cd12136139103bdc48d0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 29 Jun 2024 02:27:50 +0200 Subject: [PATCH 103/131] H_oo on GPU --- plugins/local/gpu_nvidia/gpu.c | 224 +++++++++++---------- plugins/local/gpu_x86/gpu.c | 38 ++-- src/ccsd/ccsd_space_orb_sub.irp.f | 112 ++++++++--- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 127 ++++++++---- src/gpu/gpu.irp.f | 7 + src/gpu/gpu_module.F90 | 260 +++++++++++++++++++++---- 6 files changed, 540 insertions(+), 228 deletions(-) diff --git a/plugins/local/gpu_nvidia/gpu.c b/plugins/local/gpu_nvidia/gpu.c index f0bd247a..189de64c 100644 --- a/plugins/local/gpu_nvidia/gpu.c +++ b/plugins/local/gpu_nvidia/gpu.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -10,6 +11,10 @@ /* Generic functions */ +bool no_gpu() { + return false; +} + int gpu_ndevices() { int ngpus; cudaGetDeviceCount(&ngpus); @@ -17,7 +22,7 @@ int gpu_ndevices() { } void gpu_set_device(int32_t igpu) { - cudaSetDevice(igpu); + cudaSetDevice((int) igpu); } @@ -64,22 +69,20 @@ void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) { /* Streams */ -void gpu_stream_create(void** ptr) { - cudaStream_t stream; - cudaError_t rc = cudaStreamCreate(&stream); +void gpu_stream_create(cudaStream_t* ptr) { + cudaError_t rc = cudaStreamCreate(ptr); assert (rc == cudaSuccess); - *ptr = (void*) stream; } -void gpu_stream_destroy(void** ptr) { - assert (*ptr != NULL); - cudaError_t rc = cudaStreamDestroy( (cudaStream_t) *ptr); +void gpu_stream_destroy(cudaStream_t* ptr) { + assert (ptr != NULL); + cudaError_t rc = cudaStreamDestroy(*ptr); assert (rc == cudaSuccess); *ptr = NULL; } -void gpu_set_stream(void** handle, void** stream) { - cublasSetStream( (cublasHandle_t) *handle, (cudaStream_t) *stream); +void gpu_set_stream(cublasHandle_t handle, cudaStream_t stream) { + cublasSetStream(handle, stream); } void gpu_synchronize() { @@ -89,75 +92,80 @@ void gpu_synchronize() { /* BLAS functions */ -void gpu_blas_create(void** handle) { - cublasHandle_t cublas_handle; - cublasStatus_t rc = cublasCreate(&cublas_handle); +void gpu_blas_create(cublasHandle_t* ptr) { + cublasStatus_t rc = cublasCreate(ptr); assert (rc == CUBLAS_STATUS_SUCCESS); - *handle = (void*) cublas_handle; } -void gpu_blas_destroy(void** handle) { - assert (*handle != NULL); - cublasStatus_t rc = cublasDestroy( (cublasHandle_t) *handle); +void gpu_blas_destroy(cublasHandle_t* ptr) { + assert (ptr != NULL); + cublasStatus_t rc = cublasDestroy(*ptr); assert (rc == CUBLAS_STATUS_SUCCESS); - *handle = NULL; + ptr = NULL; } -void gpu_ddot(void** handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) { - assert (*handle != NULL); +void gpu_ddot(cublasHandle_t handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) { + assert (handle != NULL); + /* Convert to int */ + int n_, incx_, incy_; - /* Convert to int32_t */ - int32_t n_, incx_, incy_; + n_ = (int) n; + incx_ = (int) incx; + incy_ = (int) incy; - n_ = (int32_t) n; - incx_ = (int32_t) incx; - incy_ = (int32_t) incy; + assert ( (int64_t) n_ == n ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + cublasStatus_t rc = cublasDdot(handle, n_, x, incx_, y, incy_, result); +/* + double alpha = 1.0; + double beta = 0.0; + cublasStatus_t rc = cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, 1, 1, n_, &alpha, x, 1, y, n_, &beta, &result_, 1); +*/ + assert (rc == CUBLAS_STATUS_SUCCESS); +} + + + +void gpu_sdot(cublasHandle_t handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) { + assert (handle != NULL); + + /* Convert to int */ + int n_, incx_, incy_; + + n_ = (int) n; + incx_ = (int) incx; + incy_ = (int) incy; /* Check for integer overflows */ assert ( (int64_t) n_ == n ); assert ( (int64_t) incx_ == incx); assert ( (int64_t) incy_ == incy); - cublasDdot((cublasHandle_t) *handle, n_, x, incx_, y, incy_, result); + float result_ = 0.; + cublasStatus_t rc = cublasSdot(handle, n_, x, incx_, y, incy_, &result_); + assert (rc == CUBLAS_STATUS_SUCCESS); + *result = result_; } -void gpu_sdot(void** handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) { - assert (*handle != NULL); - - /* Convert to int32_t */ - int32_t n_, incx_, incy_; - - n_ = (int32_t) n; - incx_ = (int32_t) incx; - incy_ = (int32_t) incy; - - /* Check for integer overflows */ - assert ( (int64_t) n_ == n ); - assert ( (int64_t) incx_ == incx); - assert ( (int64_t) incy_ == incy); - - cublasSdot((cublasHandle_t) *handle, n_, x, incx_, y, incy_, result); -} - - - -void gpu_dgemv(void** handle, const char transa, const int64_t m, const int64_t n, const double alpha, +void gpu_dgemv(cublasHandle_t handle, const char transa, const int64_t m, const int64_t n, const double alpha, const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) { - assert (*handle != NULL); + assert (handle != NULL); - /* Convert to int32_t */ - int32_t m_, n_, lda_, incx_, incy_; + /* Convert to int */ + int m_, n_, lda_, incx_, incy_; - m_ = (int32_t) m; - n_ = (int32_t) n; - lda_ = (int32_t) lda; - incx_ = (int32_t) incx; - incy_ = (int32_t) incy; + m_ = (int) m; + n_ = (int) n; + lda_ = (int) lda; + incx_ = (int) incx; + incy_ = (int) incy; /* Check for integer overflows */ assert ( (int64_t) m_ == m ); @@ -169,24 +177,24 @@ void gpu_dgemv(void** handle, const char transa, const int64_t m, const int64_t cublasOperation_t transa_ = CUBLAS_OP_N; if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; - cublasDgemv((cublasHandle_t) *handle, transa_, m_, n_, &alpha, a, lda_, x, incx_, &beta, y, incy_); + cublasDgemv(handle, transa_, m_, n_, &alpha, a, lda_, x, incx_, &beta, y, incy_); } -void gpu_sgemv(void** handle, const char transa, const int64_t m, const int64_t n, const float alpha, +void gpu_sgemv(cublasHandle_t handle, const char transa, const int64_t m, const int64_t n, const float alpha, const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) { - assert (*handle != NULL); + assert (handle != NULL); - /* Convert to int32_t */ - int32_t m_, n_, lda_, incx_, incy_; + /* Convert to int */ + int m_, n_, lda_, incx_, incy_; - m_ = (int32_t) m; - n_ = (int32_t) n; - lda_ = (int32_t) lda; - incx_ = (int32_t) incx; - incy_ = (int32_t) incy; + m_ = (int) m; + n_ = (int) n; + lda_ = (int) lda; + incx_ = (int) incx; + incy_ = (int) incy; /* Check for integer overflows */ assert ( (int64_t) m_ == m ); @@ -198,24 +206,24 @@ void gpu_sgemv(void** handle, const char transa, const int64_t m, const int64_t cublasOperation_t transa_ = CUBLAS_OP_N; if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; - cublasSgemv((cublasHandle_t) *handle, transa_, m_, n_, &alpha, a, lda_, x, incx_, &beta, y, incy_); + cublasSgemv(handle, transa_, m_, n_, &alpha, a, lda_, x, incx_, &beta, y, incy_); } -void gpu_dgemm(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, +void gpu_dgemm(cublasHandle_t handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) { - assert (*handle != NULL); + assert (handle != NULL); - /* Convert to int32_t */ - int32_t m_, n_, k_, lda_, ldb_, ldc_; + /* Convert to int */ + int m_, n_, k_, lda_, ldb_, ldc_; - m_ = (int32_t) m; - n_ = (int32_t) n; - k_ = (int32_t) k; - lda_ = (int32_t) lda; - ldb_ = (int32_t) ldb; - ldc_ = (int32_t) ldc; + m_ = (int) m; + n_ = (int) n; + k_ = (int) k; + lda_ = (int) lda; + ldb_ = (int) ldb; + ldc_ = (int) ldc; /* Check for integer overflows */ assert ( (int64_t) m_ == m ); @@ -230,25 +238,25 @@ void gpu_dgemm(void** handle, const char transa, const char transb, const int64_ if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T; - cublasDgemm((cublasHandle_t) *handle, transa_, transb_, m_, n_, k_, &alpha, a, lda_, b, ldb_, &beta, c, ldc_); + cublasDgemm(handle, transa_, transb_, m_, n_, k_, &alpha, a, lda_, b, ldb_, &beta, c, ldc_); } -void gpu_sgemm(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, +void gpu_sgemm(cublasHandle_t handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) { - assert (*handle != NULL); + assert (handle != NULL); - /* Convert to int32_t */ - int32_t m_, n_, k_, lda_, ldb_, ldc_; + /* Convert to int */ + int m_, n_, k_, lda_, ldb_, ldc_; - m_ = (int32_t) m; - n_ = (int32_t) n; - k_ = (int32_t) k; - lda_ = (int32_t) lda; - ldb_ = (int32_t) ldb; - ldc_ = (int32_t) ldc; + m_ = (int) m; + n_ = (int) n; + k_ = (int) k; + lda_ = (int) lda; + ldb_ = (int) ldb; + ldc_ = (int) ldc; /* Check for integer overflows */ assert ( (int64_t) m_ == m ); @@ -263,22 +271,22 @@ void gpu_sgemm(void** handle, const char transa, const char transb, const int64_ if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T; - cublasSgemm((cublasHandle_t) *handle, transa_, transb_, m_, n_, k_, &alpha, a, lda_, b, ldb_, &beta, c, ldc_); + cublasSgemm(handle, transa_, transb_, m_, n_, k_, &alpha, a, lda_, b, ldb_, &beta, c, ldc_); } -void gpu_dgeam(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha, +void gpu_dgeam(cublasHandle_t handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha, const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { - assert (*handle != NULL); + assert (handle != NULL); - /* Convert to int32_t */ - int32_t m_, n_, lda_, ldb_, ldc_; + /* Convert to int */ + int m_, n_, lda_, ldb_, ldc_; - m_ = (int32_t) m; - n_ = (int32_t) n; - lda_ = (int32_t) lda; - ldb_ = (int32_t) ldb; - ldc_ = (int32_t) ldc; + m_ = (int) m; + n_ = (int) n; + lda_ = (int) lda; + ldb_ = (int) ldb; + ldc_ = (int) ldc; /* Check for integer overflows */ assert ( (int64_t) m_ == m ); @@ -292,23 +300,23 @@ void gpu_dgeam(void** handle, const char transa, const char transb, const int64_ if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T; - cublasDgeam((cublasHandle_t) *handle, transa_, transb_, m_, n_, &alpha, a, lda_, &beta, b, ldb_, c, ldc_); + cublasDgeam(handle, transa_, transb_, m_, n_, &alpha, a, lda_, &beta, b, ldb_, c, ldc_); } -void gpu_sgeam(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha, +void gpu_sgeam(cublasHandle_t handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha, const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) { - assert (*handle != NULL); + assert (handle != NULL); - /* Convert to int32_t */ - int32_t m_, n_, lda_, ldb_, ldc_; + /* Convert to int */ + int m_, n_, lda_, ldb_, ldc_; - m_ = (int32_t) m; - n_ = (int32_t) n; - lda_ = (int32_t) lda; - ldb_ = (int32_t) ldb; - ldc_ = (int32_t) ldc; + m_ = (int) m; + n_ = (int) n; + lda_ = (int) lda; + ldb_ = (int) ldb; + ldc_ = (int) ldc; /* Check for integer overflows */ assert ( (int64_t) m_ == m ); @@ -322,6 +330,6 @@ void gpu_sgeam(void** handle, const char transa, const char transb, const int64_ if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T; - cublasSgeam((cublasHandle_t) *handle, transa_, transb_, m_, n_, &alpha, a, lda_, &beta, b, ldb_, c, ldc_); + cublasSgeam(handle, transa_, transb_, m_, n_, &alpha, a, lda_, &beta, b, ldb_, c, ldc_); } diff --git a/plugins/local/gpu_x86/gpu.c b/plugins/local/gpu_x86/gpu.c index ac7c3620..53267a7c 100644 --- a/plugins/local/gpu_x86/gpu.c +++ b/plugins/local/gpu_x86/gpu.c @@ -2,8 +2,12 @@ #include #include #include +#include #include +bool no_gpu() { + return true; +} /* Generic functions */ @@ -56,7 +60,7 @@ void gpu_stream_destroy(void** ptr) { *ptr = NULL; } -void gpu_set_stream(void** handle, void** stream) { +void gpu_set_stream(void* handle, void* stream) { return; } @@ -79,8 +83,8 @@ void gpu_blas_destroy(void** handle) { double ddot_(const int32_t* n, const double* x, const int32_t* incx, const double* y, const int32_t* incy); -void gpu_ddot(void** handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) { - assert (*handle != NULL); +void gpu_ddot(void* handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) { + assert (handle != NULL); /* Convert to int32_t */ int32_t n_, incx_, incy_; @@ -100,8 +104,8 @@ void gpu_ddot(void** handle, const int64_t n, const double* x, const int64_t inc float sdot_(const int32_t* n, const float* x, const int32_t* incx, const float* y, const int32_t* incy); -void gpu_sdot(void** handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) { - assert (*handle != NULL); +void gpu_sdot(void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) { + assert (handle != NULL); /* Convert to int32_t */ int32_t n_, incx_, incy_; @@ -122,10 +126,10 @@ void gpu_sdot(void** handle, const int64_t n, const float* x, const int64_t incx void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha, const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy); -void gpu_dgemv(void** handle, const char transa, const int64_t m, const int64_t n, const double alpha, +void gpu_dgemv(void* handle, const char transa, const int64_t m, const int64_t n, const double alpha, const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) { - assert (*handle != NULL); + assert (handle != NULL); /* Convert to int32_t */ int32_t m_, n_, lda_, incx_, incy_; @@ -150,10 +154,10 @@ void gpu_dgemv(void** handle, const char transa, const int64_t m, const int64_t void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha, const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy); -void gpu_sgemv(void** handle, const char transa, const int64_t m, const int64_t n, const float alpha, +void gpu_sgemv(void* handle, const char transa, const int64_t m, const int64_t n, const float alpha, const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) { - assert (*handle != NULL); + assert (handle != NULL); /* Convert to int32_t */ int32_t m_, n_, lda_, incx_, incy_; @@ -178,10 +182,10 @@ void gpu_sgemv(void** handle, const char transa, const int64_t m, const int64_t void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha, const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc); -void gpu_dgemm(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, +void gpu_dgemm(void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) { - assert (*handle != NULL); + assert (handle != NULL); /* Convert to int32_t */ int32_t m_, n_, k_, lda_, ldb_, ldc_; @@ -209,10 +213,10 @@ void gpu_dgemm(void** handle, const char transa, const char transb, const int64_ void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha, const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc); -void gpu_sgemm(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, +void gpu_sgemm(void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) { - assert (*handle != NULL); + assert (handle != NULL); /* Convert to int32_t */ int32_t m_, n_, k_, lda_, ldb_, ldc_; @@ -236,9 +240,9 @@ void gpu_sgemm(void** handle, const char transa, const char transb, const int64_ } -void gpu_dgeam(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha, +void gpu_dgeam(void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha, const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { - assert (*handle != NULL); + assert (handle != NULL); if ( (transa == 'N' && transb == 'N') || (transa == 'n' && transb == 'N') || @@ -368,9 +372,9 @@ void gpu_dgeam(void** handle, const char transa, const char transb, const int64_ } -void gpu_sgeam(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha, +void gpu_sgeam(void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha, const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) { - assert (*handle != NULL); + assert (handle != NULL); if ( (transa == 'N' && transb == 'N') || (transa == 'n' && transb == 'N') || diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 5c2daa05..5ee7366e 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -14,9 +14,15 @@ subroutine run_ccsd_space_orb type(gpu_double2) :: t1, r1 type(gpu_double2) :: H_oo, H_vv, H_vo - type(gpu_double2) :: d_cc_space_f_vo + type(gpu_double2) :: d_cc_space_f_oo, d_cc_space_f_vo + type(gpu_double2) :: d_cc_space_f_ov, d_cc_space_f_vv + + type(gpu_double3) :: d_cc_space_v_oo_chol, d_cc_space_v_vo_chol + type(gpu_double3) :: d_cc_space_v_ov_chol, d_cc_space_v_vv_chol + type(gpu_double4) :: d_cc_space_v_oovv + double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) integer(bit_kind) :: det(N_int,2) @@ -24,7 +30,7 @@ subroutine run_ccsd_space_orb call set_multiple_levels_omp(.False.) - if (do_ao_cholesky) then + if (do_mo_cholesky) then PROVIDE cholesky_mo_transp FREE cholesky_ao else @@ -55,11 +61,36 @@ subroutine run_ccsd_space_orb !print*,'occ',list_occ !print*,'vir',list_vir + ! GPU arrays + call gpu_allocate(d_cc_space_f_oo, nO, nO) call gpu_allocate(d_cc_space_f_vo, nV, nO) - call gpu_allocate(d_cc_space_v_oovv, nO, nO, nV, nV) - call gpu_upload(cc_space_f_vo, d_cc_space_f_vo) - call gpu_upload(cc_space_v_oovv, d_cc_space_v_oovv) + call gpu_allocate(d_cc_space_f_ov, nO, nV) + call gpu_allocate(d_cc_space_f_vv, nV, nV) + call gpu_upload(cc_space_f_oo, d_cc_space_f_oo) + call gpu_upload(cc_space_f_vo, d_cc_space_f_vo) + call gpu_upload(cc_space_f_vv, d_cc_space_f_vv) + +! FREE cc_space_f_oo +! FREE cc_space_f_vo +! FREE cc_space_f_vv + + if (do_mo_cholesky) then + call gpu_allocate(d_cc_space_v_oo_chol, cholesky_mo_num, nO, nO) + call gpu_allocate(d_cc_space_v_ov_chol, cholesky_mo_num, nO, nV) + call gpu_allocate(d_cc_space_v_vo_chol, cholesky_mo_num, nV, nO) + call gpu_allocate(d_cc_space_v_vv_chol, cholesky_mo_num, nV, nV) + + call gpu_upload(cc_space_v_oo_chol, d_cc_space_v_oo_chol) + call gpu_upload(cc_space_v_ov_chol, d_cc_space_v_ov_chol) + call gpu_upload(cc_space_v_vo_chol, d_cc_space_v_vo_chol) + call gpu_upload(cc_space_v_vv_chol, d_cc_space_v_vv_chol) + +! FREE cc_space_v_oo_chol +! FREE cc_space_v_ov_chol +! FREE cc_space_v_vo_chol +! FREE cc_space_v_vv_chol + endif call gpu_allocate(t2, nO,nO,nV,nV) call gpu_allocate(r2, nO,nO,nV,nV) @@ -120,6 +151,13 @@ subroutine run_ccsd_space_orb call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,h_t2) call gpu_upload(h_t2, t2) + + call gpu_allocate(d_cc_space_v_oovv, nO, nO, nV, nV) + call gpu_upload(cc_space_v_oovv, d_cc_space_v_oovv) + +! FREE cc_space_v_oovv + + call update_tau_space(nO,nV,h_t1,t1,t2,tau) call update_tau_x_space(nO,nV,tau,tau_x) !print*,'hf_energy', hf_energy @@ -142,10 +180,10 @@ subroutine run_ccsd_space_orb do while (not_converged) ! Residue - if (do_ao_cholesky) then -! if (.False.) then - call compute_H_oo_chol(nO,nV,tau_x,H_oo) - call compute_H_vv_chol(nO,nV,tau_x%f,H_vv%f) + if (do_mo_cholesky) then + call compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, & + d_cc_space_v_ov_chol,d_cc_space_v_vo_chol,H_oo) + call compute_H_vv_chol(nO,nV,tau_x,H_vv) call compute_H_vo_chol(nO,nV,t1%f,H_vo%f) call compute_r1_space_chol(nO,nV,t1%f,t2%f,tau%f,H_oo%F,H_vv%F,H_vo%F,r1%f,max_r1) @@ -249,6 +287,12 @@ subroutine run_ccsd_space_orb call save_energy(uncorr_energy + energy, e_t) deallocate(h_t1, h_t2) + if (do_mo_cholesky) then + call gpu_deallocate(d_cc_space_v_oo_chol) + call gpu_deallocate(d_cc_space_v_ov_chol) + call gpu_deallocate(d_cc_space_v_vo_chol) + call gpu_deallocate(d_cc_space_v_vv_chol) + endif call gpu_deallocate(d_cc_space_f_vo) call gpu_deallocate(d_cc_space_v_oovv) call gpu_deallocate(t1) @@ -302,8 +346,21 @@ subroutine ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1, ! !$omp end parallel - call gpu_ddot(blas_handle, nO*nO*nV*nV*1_8, tau_x, 1, d_cc_space_v_oovv, 1, energy) - call gpu_ddot(blas_handle, nO*nV*1_8, d_cc_space_f_vo, 1, t1, 1, e) + type(gpu_stream) :: s1, s2 + call gpu_stream_create(s1) + call gpu_stream_create(s2) + + call gpu_set_stream(blas_handle,s1) + call gpu_ddot(blas_handle, nO*nV, d_cc_space_f_vo, 1, t1, 1, e) + + call gpu_set_stream(blas_handle,s2) + call gpu_ddot_64(blas_handle, nO*nO*nV*nV*1_8, tau_x, 1_8, d_cc_space_v_oovv, 1_8, energy) + call gpu_synchronize() + call gpu_set_stream(blas_handle,gpu_default_stream) + + call gpu_stream_destroy(s1) + call gpu_stream_destroy(s2) + energy = energy + 2.d0*e end @@ -346,32 +403,29 @@ subroutine update_tau_space(nO,nV,h_t1,t1,t2,tau) type(gpu_stream) :: stream(nV) - do b=1,nV - call gpu_stream_create(stream(b)) - enddo - - !$OMP PARALLEL & + !$OMP PARALLEL if (no_gpu()) & !$OMP SHARED(nO,nV,tau,t2,t1,h_t1,stream,blas_handle) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) !$OMP DO do b=1,nV + call gpu_stream_create(stream(b)) call gpu_set_stream(blas_handle,stream(b)) do j=1,nO - call gpu_dgeam(blas_handle, 'N', 'N', nO*1_8, nV*1_8, & - 1.d0, t2%f(1,j,1,b), nO*nO*1_8, & - h_t1(j,b), t1%f, nO*1_8, & - tau%f(1,j,1,b), nO*nO*1_8) + call gpu_dgeam_f(blas_handle, 'N', 'N', nO, nV, & + 1.d0, t2%f(1,j,1,b), nO*nO, & + h_t1(j,b), t1%f, nO, & + tau%f(1,j,1,b), nO*nO) enddo enddo !$OMP END DO !$OMP END PARALLEL - call gpu_synchronize() - do b=1,nV call gpu_stream_destroy(stream(b)) enddo + call gpu_set_stream(blas_handle,gpu_default_stream) + end @@ -412,7 +466,7 @@ subroutine update_tau_x_space(nO,nV,tau,tau_x) call gpu_stream_create(stream(a)) enddo - !$OMP PARALLEL & + !$OMP PARALLEL if (no_gpu()) & !$OMP SHARED(nO,nV,tau,tau_x,stream,blas_handle) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) @@ -420,20 +474,20 @@ subroutine update_tau_x_space(nO,nV,tau,tau_x) do b=1,nV do a=1,nV call gpu_set_stream(blas_handle,stream(a)) - call gpu_dgeam(blas_handle, 'N', 'N', nO*1_8, nO*1_8, & - 2.d0, tau%f(1,1,a,b), nO*1_8, & - -1.d0, tau%f(1,1,b,a), nO*1_8, & - tau_x%f(1,1,a,b), nO*1_8) + call gpu_dgeam_f(blas_handle, 'N', 'N', nO, nO, & + 2.d0, tau%f(1,1,a,b), nO, & + -1.d0, tau%f(1,1,b,a), nO, & + tau_x%f(1,1,a,b), nO) enddo enddo !$OMP END DO !$OMP END PARALLEL - call gpu_synchronize() - do b=1,nV call gpu_stream_destroy(stream(b)) enddo + call gpu_set_stream(blas_handle,gpu_default_stream) + end diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 9b161001..288724f3 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -293,62 +293,115 @@ end ! H_oo -subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) +subroutine compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, & + d_cc_space_v_ov_chol,d_cc_space_v_vo_chol,H_oo) use gpu implicit none integer, intent(in) :: nO,nV + type(gpu_double2), intent(in) :: d_cc_space_f_oo + type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol, d_cc_space_v_vo_chol type(gpu_double4), intent(in) :: tau_x type(gpu_double2), intent(out) :: H_oo integer :: a,b,i,j,u,k - double precision, allocatable :: tau_kau(:,:,:), tmp_vov(:,:,:) + type(gpu_double3) :: tau_kau, tmp_vov, tmp_ovv - allocate(tau_kau(cholesky_mo_num,nV,nO)) - !$omp parallel & - !$omp default(shared) & - !$omp private(i,u,j,k,a,b,tmp_vov) - allocate(tmp_vov(nV,nO,nV) ) - !$omp do - do u = 1, nO + call gpu_allocate(tau_kau, cholesky_mo_num, nV, nO) + +! !$omp parallel & +! !$omp default(shared) & +! !$omp private(i,u,j,k,a,b,tmp_vov) +! call gpu_allocate(tmp_vov, nV, nO, nV) +! !$omp do +! do u = 1, nO +! do b=1,nV +! do j=1,nO +! do a=1,nV +! tmp_vov%f(a,j,b) = tau_x%f(u,j,a,b) +! enddo +! enddo +! enddo +! call dgemm('N','T',cholesky_mo_num,nV,nO*nV,1.d0, & +! d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num, tmp_vov%f, nV, & +! 0.d0, tau_kau%f(1,1,u), cholesky_mo_num) +! enddo +! !$omp end do nowait +! call gpu_deallocate(tmp_vov) +! !$omp do +! do i = 1, nO +! do u = 1, nO +! H_oo%f(u,i) = d_cc_space_f_oo%f(u,i) +! enddo +! enddo +! !$omp end do nowait +! +! !$omp barrier +! !$omp end parallel +! call dgemm('T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & +! tau_kau%f(1,1,1), cholesky_mo_num*nV, d_cc_space_v_vo_chol%f(1,1,1), cholesky_mo_num*nV, & +! 1.d0, H_oo%f(1,1), nO) +! + + type(gpu_stream) :: stream(nV) + + do b=1,nV + call gpu_stream_create(stream(b)) + enddo + + !$OMP PARALLEL if (no_gpu()) & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(u,b,tmp_vov,tmp_ovv) + + call gpu_allocate(tmp_vov, nV, nO, nV) + call gpu_allocate(tmp_ovv, nO, nV, nV) + + !$OMP DO + do u=1,nO + call gpu_dgeam_f(blas_handle, 'N', 'N', 1, nO*nV*nV, 1.d0, & + tau_x%f(u,1,1,1), nO, 0.d0, tau_x%f, nO, tmp_ovv%f, 1) do b=1,nV - do j=1,nO - do a=1,nV - tmp_vov(a,j,b) = tau_x%f(u,j,a,b) - enddo - enddo + call gpu_set_stream(blas_handle,stream(b)) + call gpu_dgeam_f(blas_handle, 'T', 'T', nV, nO, 1.d0, & + tmp_ovv%f(1,1,b), nO, 0.d0, & + tmp_ovv%f(1,1,b), nO, tmp_vov%f(1,1,b), nV) enddo - call dgemm('N','T',cholesky_mo_num,nV,nO*nV,1.d0, & - cc_space_v_ov_chol, cholesky_mo_num, tmp_vov, nV, & - 0.d0, tau_kau(1,1,u), cholesky_mo_num) + call gpu_dgemm_f(blas_handle, 'N','T',cholesky_mo_num,nV,nO*nV,1.d0, & + d_cc_space_v_ov_chol%f, cholesky_mo_num, tmp_vov%f, nV, & + 0.d0, tau_kau%f(1,1,u), cholesky_mo_num) + call gpu_synchronize() enddo - !$omp end do nowait - deallocate(tmp_vov) - !$omp do - do i = 1, nO - do u = 1, nO - H_oo%f(u,i) = cc_space_f_oo(u,i) - enddo - enddo - !$omp end do nowait - !$omp barrier - !$omp end parallel - call dgemm('T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & - tau_kau, cholesky_mo_num*nV, cc_space_v_vo_chol, cholesky_mo_num*nV, & - 1.d0, H_oo%f, nO) + !$OMP END DO + call gpu_deallocate(tmp_vov) + call gpu_deallocate(tmp_ovv) + !$OMP END PARALLEL + + do b=1,nV + call gpu_stream_destroy(stream(b)) + enddo + + call gpu_set_stream(blas_handle,gpu_default_stream) + + call gpu_copy(d_cc_space_f_oo, H_oo) + + call gpu_dgemm(blas_handle, 'T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & + tau_kau, cholesky_mo_num*nV, d_cc_space_v_vo_chol, cholesky_mo_num*nV, & + 1.d0, H_oo, nO) + + call gpu_deallocate(tau_kau) end ! H_vv subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) - + use gpu implicit none integer, intent(in) :: nO,nV - double precision, intent(in) :: tau_x(nO, nO, nV, nV) - double precision, intent(out) :: H_vv(nV, nV) + type(gpu_double4), intent(in) :: tau_x + type(gpu_double2), intent(out) :: H_vv integer :: a,b,i,j,u,k, beta @@ -364,7 +417,7 @@ subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) do b=1,nV do j=1,nO do i=1,nO - tmp_oov(i,j,b) = tau_x(i,j,a,b) + tmp_oov(i,j,b) = tau_x%f(i,j,a,b) enddo enddo enddo @@ -378,7 +431,7 @@ subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) !$omp do do beta = 1, nV do a = 1, nV - H_vv(a,beta) = cc_space_f_vv(a,beta) + H_vv%f(a,beta) = cc_space_f_vv(a,beta) enddo enddo !$omp end do nowait @@ -386,7 +439,7 @@ subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) !$omp end parallel call dgemm('T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & tau_kia, cholesky_mo_num*nO, cc_space_v_ov_chol, cholesky_mo_num*nO, & - 1.d0, H_vv, nV) + 1.d0, H_vv%f, nV) end diff --git a/src/gpu/gpu.irp.f b/src/gpu/gpu.irp.f index e91d66f5..6ad0a075 100644 --- a/src/gpu/gpu.irp.f +++ b/src/gpu/gpu.irp.f @@ -8,4 +8,11 @@ BEGIN_PROVIDER [ type(gpu_blas), blas_handle ] call gpu_blas_create(blas_handle) END_PROVIDER +BEGIN_PROVIDER [ type(gpu_stream), gpu_default_stream ] + implicit none + BEGIN_DOC + ! Default stream + END_DOC + gpu_default_stream%c = C_NULL_PTR +END_PROVIDER diff --git a/src/gpu/gpu_module.F90 b/src/gpu/gpu_module.F90 index ecf79c83..2676b339 100644 --- a/src/gpu/gpu_module.F90 +++ b/src/gpu/gpu_module.F90 @@ -49,7 +49,12 @@ module gpu ! ------------ interface + logical(c_bool) function no_gpu() bind(C) + import + end function + integer function gpu_ndevices() bind(C) + import end function subroutine gpu_set_device(id) bind(C) @@ -101,7 +106,7 @@ module gpu subroutine gpu_set_stream_c(handle, stream) bind(C, name='gpu_set_stream') import - type(c_ptr) :: handle, stream + type(c_ptr), value :: handle, stream end subroutine subroutine gpu_synchronize() bind(C) @@ -120,15 +125,15 @@ module gpu subroutine gpu_ddot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_ddot') import - type(c_ptr), intent(in) :: handle + type(c_ptr), value, intent(in) :: handle integer(c_int64_t), value :: n, incx, incy - type(c_ptr), intent(in), value :: dx, dy + type(c_ptr), value :: dx, dy real(c_double), intent(out) :: res end subroutine subroutine gpu_sdot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_sdot') import - type(c_ptr), intent(in) :: handle + type(c_ptr), value, intent(in) :: handle integer(c_int64_t), value :: n, incx, incy type(c_ptr), intent(in), value :: dx, dy real(c_float), intent(out) :: res @@ -137,8 +142,8 @@ module gpu subroutine gpu_dgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, & b, ldb, c, ldc) bind(C, name='gpu_dgeam') import - type(c_ptr), intent(in) :: handle - character(c_char), intent(in), value :: transa, transb + type(c_ptr), value, intent(in) :: handle + character(c_char), intent(in), value :: transa, transb integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc real(c_double), intent(in), value :: alpha, beta type(c_ptr), value :: a, b, c @@ -147,13 +152,33 @@ module gpu subroutine gpu_sgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, & b, ldb, c, ldc) bind(C, name='gpu_sgeam') import - type(c_ptr), intent(in) :: handle - character(c_char), intent(in), value :: transa, transb + type(c_ptr), value, intent(in) :: handle + character(c_char), intent(in), value :: transa, transb integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc real(c_float), intent(in), value :: alpha, beta type(c_ptr), value :: a, b, c end subroutine + subroutine gpu_dgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, & + b, ldb, beta, c, ldc) bind(C, name='gpu_dgemm') + import + type(c_ptr), value, intent(in) :: handle + character(c_char), intent(in), value :: transa, transb + integer(c_int64_t), intent(in), value :: m, n, k, lda, ldb, ldc + real(c_double), intent(in), value :: alpha, beta + type(c_ptr), value :: a, b, c + end subroutine + + subroutine gpu_sgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, & + b, ldb, beta, c, ldc) bind(C, name='gpu_sgemm') + import + type(c_ptr), value, intent(in) :: handle + character(c_char), intent(in), value :: transa, transb + integer(c_int64_t), intent(in), value :: m, n, k, lda, ldb, ldc + real(c_float), intent(in), value :: alpha, beta + type(c_ptr), value :: a, b, c + end subroutine + end interface @@ -161,20 +186,26 @@ module gpu ! ---------------------- interface gpu_allocate - procedure gpu_allocate_double1 & - ,gpu_allocate_double2 & - ,gpu_allocate_double3 & - ,gpu_allocate_double4 & - ,gpu_allocate_double5 & - ,gpu_allocate_double6 + procedure gpu_allocate_double1 & + ,gpu_allocate_double2 & + ,gpu_allocate_double3 & + ,gpu_allocate_double4 & + ,gpu_allocate_double5 & + ,gpu_allocate_double6 & + ,gpu_allocate_double1_64 & + ,gpu_allocate_double2_64 & + ,gpu_allocate_double3_64 & + ,gpu_allocate_double4_64 & + ,gpu_allocate_double5_64 & + ,gpu_allocate_double6_64 end interface gpu_allocate interface gpu_deallocate - procedure gpu_deallocate_double1 & - ,gpu_deallocate_double2 & - ,gpu_deallocate_double3 & - ,gpu_deallocate_double4 & - ,gpu_deallocate_double5 & + procedure gpu_deallocate_double1 & + ,gpu_deallocate_double2 & + ,gpu_deallocate_double3 & + ,gpu_deallocate_double4 & + ,gpu_deallocate_double5 & ,gpu_deallocate_double6 end interface gpu_deallocate @@ -267,6 +298,61 @@ module gpu end subroutine + subroutine gpu_allocate_double1_64(ptr, s) + implicit none + type(gpu_double1), intent(inout) :: ptr + integer*8, intent(in) :: s + + call gpu_allocate_c(ptr%c, s) + call c_f_pointer(ptr%c, ptr%f, (/ s /)) + end subroutine + + subroutine gpu_allocate_double2_64(ptr, s1, s2) + implicit none + type(gpu_double2), intent(inout) :: ptr + integer*8, intent(in) :: s1, s2 + + call gpu_allocate_c(ptr%c, s1*s2*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2 /)) + end subroutine + + subroutine gpu_allocate_double3_64(ptr, s1, s2, s3) + implicit none + type(gpu_double3), intent(inout) :: ptr + integer*8, intent(in) :: s1, s2, s3 + + call gpu_allocate_c(ptr%c, s1*s2*s3*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3 /)) + end subroutine + + subroutine gpu_allocate_double4_64(ptr, s1, s2, s3, s4) + implicit none + type(gpu_double4), intent(inout) :: ptr + integer*8, intent(in) :: s1, s2, s3, s4 + + call gpu_allocate_c(ptr%c, s1*s2*s3*s4*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4 /)) + end subroutine + + subroutine gpu_allocate_double5_64(ptr, s1, s2, s3, s4, s5) + implicit none + type(gpu_double5), intent(inout) :: ptr + integer*8, intent(in) :: s1, s2, s3, s4, s5 + + call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5 /)) + end subroutine + + subroutine gpu_allocate_double6_64(ptr, s1, s2, s3, s4, s5, s6) + implicit none + type(gpu_double6), intent(inout) :: ptr + integer*8, intent(in) :: s1, s2, s3, s4, s5, s6 + + call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*s6*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5, s6 /)) + end subroutine + + ! gpu_deallocate ! -------------- @@ -494,19 +580,38 @@ end module subroutine gpu_ddot(handle, n, dx, incx, dy, incy, res) use gpu type(gpu_blas), intent(in) :: handle - integer*8 :: n, incx, incy - double precision, target, intent(in) :: dx(*), dy(*) - double precision, intent(out) :: res - call gpu_ddot_c(handle%c, n, c_loc(dx), incx, c_loc(dy), incy, res) + integer*4 :: n, incx, incy + type(gpu_double1), intent(in) :: dx, dy + double precision, intent(out) :: res + call gpu_ddot_c(handle%c, int(n,c_int64_t), dx%c, int(incx,c_int64_t), dy%c, int(incy,c_int64_t), res) end subroutine -subroutine gpu_sdot(handle, n, dx, incx, dy, incy, res) +subroutine gpu_ddot_f(handle, n, dx, incx, dy, incy, res) + use gpu + type(gpu_blas), intent(in) :: handle + integer*4 :: n, incx, incy + double precision, target :: dx(*), dy(*) + double precision, intent(out) :: res + call gpu_ddot_c(handle%c, int(n,c_int64_t), c_loc(dx), int(incx,c_int64_t), c_loc(dy), int(incy,c_int64_t), res) +end subroutine + + +subroutine gpu_ddot_64(handle, n, dx, incx, dy, incy, res) use gpu type(gpu_blas), intent(in) :: handle integer*8 :: n, incx, incy - real, target, intent(in) :: dx(*), dy(*) - real, intent(out) :: res - call gpu_sdot_c(handle%c, n, c_loc(dx), incx, c_loc(dy), incy, res) + type(gpu_double1), intent(in) :: dx, dy + double precision, intent(out) :: res + call gpu_ddot_c(handle%c, n, dx%c, incx, dy%c, incy, res) +end subroutine + +subroutine gpu_ddot_f_64(handle, n, dx, incx, dy, incy, res) + use gpu + type(gpu_blas), intent(in) :: handle + integer*8 :: n, incx, incy + double precision, target :: dx(*), dy(*) + double precision, intent(out) :: res + call gpu_ddot_c(handle%c, n, c_loc(dx), incx, c_loc(dy), incy, res) end subroutine @@ -518,22 +623,103 @@ subroutine gpu_dgeam(handle, transa, transb, m, n, alpha, a, lda, beta, & use gpu type(gpu_blas), intent(in) :: handle character, intent(in) :: transa, transb - integer*8, intent(in) :: m, n, lda, ldb, ldc + integer*4, intent(in) :: m, n, lda, ldb, ldc double precision, intent(in) :: alpha, beta - double precision, target :: a(lda,*), b(ldb,*), c(ldc,*) - call gpu_dgeam_c(handle%c, transa, transb, m, n, alpha, c_loc(a), lda, beta, & - c_loc(b), ldb, c_loc(c), ldc) + type(gpu_double2) :: a, b, c + call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, a%c, int(lda,c_int64_t), beta, & + b%c, int(ldb,c_int64_t), c%c, int(ldc,c_int64_t)) end subroutine -subroutine gpu_sgeam(handle, transa, transb, m, n, alpha, a, lda, beta, & + +subroutine gpu_dgeam_f(handle, transa, transb, m, n, alpha, a, lda, beta, & b, ldb, c, ldc) - use gpu + use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa, transb + integer*4, intent(in) :: m, n, lda, ldb, ldc + double precision, intent(in) :: alpha, beta + double precision, target :: a(*), b(*), c(*) + call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, c_loc(a), int(lda,c_int64_t), beta, & + c_loc(b), int(ldb,c_int64_t), c_loc(c), int(ldc,c_int64_t)) +end subroutine + + +subroutine gpu_dgeam_64(handle, transa, transb, m, n, alpha, a, lda, beta, & + b, ldb, c, ldc) + use gpu type(gpu_blas), intent(in) :: handle character, intent(in) :: transa, transb integer*8, intent(in) :: m, n, lda, ldb, ldc - real, intent(in) :: alpha, beta - real, target :: a(lda,*), b(ldb,*), c(ldc,*) - call gpu_sgeam_c(handle%c, transa, transb, m, n, alpha, c_loc(a), lda, beta, & - c_loc(b), ldb, c_loc(c), ldc) + double precision, intent(in) :: alpha, beta + type(gpu_double2) :: a, b, c + call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, a%c, int(lda,c_int64_t), beta, & + b%c, int(ldb,c_int64_t), c%c, int(ldc,c_int64_t)) +end subroutine + + +subroutine gpu_dgeam_f_64(handle, transa, transb, m, n, alpha, a, lda, beta, & + b, ldb, c, ldc) + use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa, transb + integer*8, intent(in) :: m, n, lda, ldb, ldc + double precision, intent(in) :: alpha, beta + double precision, target :: a(*), b(*), c(*) + call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, c_loc(a), int(lda,c_int64_t), beta, & + c_loc(b), int(ldb,c_int64_t), c_loc(c), int(ldc,c_int64_t)) +end subroutine + + +! gemm +! ---- + +subroutine gpu_dgemm(handle, transa, transb, m, n, k, alpha, a, lda, & + b, ldb, beta, c, ldc) + use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa, transb + integer*4, intent(in) :: m, n, k, lda, ldb, ldc + double precision, intent(in) :: alpha, beta + type(gpu_double2) :: a, b, c + call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), & + alpha, a%c, int(lda,c_int64_t), & + b%c, int(ldb,c_int64_t), beta, c%c, int(ldc,c_int64_t)) +end subroutine + +subroutine gpu_dgemm_64(handle, transa, transb, m, n, k, alpha, a, lda, & + b, ldb, beta, c, ldc) + use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa, transb + integer*8, intent(in) :: m, n, k, lda, ldb, ldc + double precision, intent(in) :: alpha, beta + type(gpu_double2) :: a, b, c + call gpu_dgemm_c(handle%c, transa, transb, m, n, k, & + alpha, a%c, lda, b%c, ldb, beta, c%c, ldc) +end subroutine + +subroutine gpu_dgemm_f(handle, transa, transb, m, n, k, alpha, a, lda, & + b, ldb, beta, c, ldc) + use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa, transb + integer*4, intent(in) :: m, n, k, lda, ldb, ldc + double precision, intent(in) :: alpha, beta + double precision, target :: a(*), b(*), c(*) + call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), & + alpha, c_loc(a), int(lda,c_int64_t), & + c_loc(b), int(ldb,c_int64_t), beta, c_loc(c), int(ldc,c_int64_t)) +end subroutine + +subroutine gpu_dgemm_f_64(handle, transa, transb, m, n, k, alpha, a, lda, & + b, ldb, beta, c, ldc) + use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa, transb + integer*8, intent(in) :: m, n, k, lda, ldb, ldc + double precision, intent(in) :: alpha, beta + double precision, target :: a(*), b(*), c(*) + call gpu_dgemm_c(handle%c, transa, transb, m, n, k, & + alpha, c_loc(a), lda, c_loc(b), ldb, beta, c_loc(c), ldc) end subroutine From d3c1994c64ed9ae9914ce605a6b7c364ac518d9b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 1 Jul 2024 18:04:48 +0200 Subject: [PATCH 104/131] H_vv --- plugins/local/gpu_nvidia/gpu.c | 16 +-- plugins/local/gpu_x86/gpu.c | 6 +- src/ccsd/ccsd_space_orb_sub.irp.f | 7 +- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 146 ++++++++++++++++--------- src/gpu/gpu.irp.f | 8 ++ 5 files changed, 114 insertions(+), 69 deletions(-) diff --git a/plugins/local/gpu_nvidia/gpu.c b/plugins/local/gpu_nvidia/gpu.c index 189de64c..39a82984 100644 --- a/plugins/local/gpu_nvidia/gpu.c +++ b/plugins/local/gpu_nvidia/gpu.c @@ -11,10 +11,6 @@ /* Generic functions */ -bool no_gpu() { - return false; -} - int gpu_ndevices() { int ngpus; cudaGetDeviceCount(&ngpus); @@ -35,13 +31,13 @@ void gpu_allocate(void** ptr, const int64_t size) { free = INT64_MAX; } - /* Use managed memory if it does not fit on the GPU */ - if (size < free && size < total/2) { + rc = cudaMallocManaged(ptr, size, cudaMemAttachGlobal); +// /* Use managed memory if it does not fit on the GPU */ +// if (size < free && size < total/2) { // rc= cudaMalloc(ptr, size); - rc = cudaMallocManaged(ptr, size, cudaMemAttachGlobal); - } else { - rc = cudaMallocManaged(ptr, size, cudaMemAttachGlobal); - } +// } else { +// rc = cudaMallocManaged(ptr, size, cudaMemAttachGlobal); +// } assert (rc == cudaSuccess); } diff --git a/plugins/local/gpu_x86/gpu.c b/plugins/local/gpu_x86/gpu.c index 53267a7c..dab23a25 100644 --- a/plugins/local/gpu_x86/gpu.c +++ b/plugins/local/gpu_x86/gpu.c @@ -5,14 +5,10 @@ #include #include -bool no_gpu() { - return true; -} - /* Generic functions */ int gpu_ndevices() { - return 1; + return 0; } void gpu_set_device(int32_t i) { diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 5ee7366e..0b3636ac 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -183,7 +183,8 @@ subroutine run_ccsd_space_orb if (do_mo_cholesky) then call compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, & d_cc_space_v_ov_chol,d_cc_space_v_vo_chol,H_oo) - call compute_H_vv_chol(nO,nV,tau_x,H_vv) + call compute_H_vv_chol(nO,nV,tau_x,d_cc_space_f_vv, & + d_cc_space_v_ov_chol,H_vv) call compute_H_vo_chol(nO,nV,t1%f,H_vo%f) call compute_r1_space_chol(nO,nV,t1%f,t2%f,tau%f,H_oo%F,H_vv%F,H_vo%F,r1%f,max_r1) @@ -403,7 +404,7 @@ subroutine update_tau_space(nO,nV,h_t1,t1,t2,tau) type(gpu_stream) :: stream(nV) - !$OMP PARALLEL if (no_gpu()) & + !$OMP PARALLEL if (gpu_num == 0) & !$OMP SHARED(nO,nV,tau,t2,t1,h_t1,stream,blas_handle) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) @@ -466,7 +467,7 @@ subroutine update_tau_x_space(nO,nV,tau,tau_x) call gpu_stream_create(stream(a)) enddo - !$OMP PARALLEL if (no_gpu()) & + !$OMP PARALLEL if (gpu_num == 0) & !$OMP SHARED(nO,nV,tau,tau_x,stream,blas_handle) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 288724f3..458016fb 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -344,48 +344,47 @@ subroutine compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, & ! 1.d0, H_oo%f(1,1), nO) ! - type(gpu_stream) :: stream(nV) + type(gpu_blas) :: blas - do b=1,nV - call gpu_stream_create(stream(b)) - enddo - !$OMP PARALLEL if (no_gpu()) & + !$OMP PARALLEL & !$OMP DEFAULT(SHARED) & - !$OMP PRIVATE(u,b,tmp_vov,tmp_ovv) + !$OMP PRIVATE(blas,u,b,tmp_vov,tmp_ovv) + + !$OMP SINGLE + !$OMP TASK + call gpu_copy(d_cc_space_f_oo, H_oo) + !$OMP END TASK + !$OMP END SINGLE - call gpu_allocate(tmp_vov, nV, nO, nV) call gpu_allocate(tmp_ovv, nO, nV, nV) + call gpu_allocate(tmp_vov, nV, nO, nV) + + call gpu_blas_create(blas) !$OMP DO do u=1,nO - call gpu_dgeam_f(blas_handle, 'N', 'N', 1, nO*nV*nV, 1.d0, & + call gpu_dgeam_f(blas, 'N', 'N', 1, nO*nV*nV, 1.d0, & tau_x%f(u,1,1,1), nO, 0.d0, tau_x%f, nO, tmp_ovv%f, 1) do b=1,nV - call gpu_set_stream(blas_handle,stream(b)) - call gpu_dgeam_f(blas_handle, 'T', 'T', nV, nO, 1.d0, & + call gpu_dgeam_f(blas, 'T', 'T', nV, nO, 1.d0, & tmp_ovv%f(1,1,b), nO, 0.d0, & tmp_ovv%f(1,1,b), nO, tmp_vov%f(1,1,b), nV) enddo - call gpu_dgemm_f(blas_handle, 'N','T',cholesky_mo_num,nV,nO*nV,1.d0, & + call gpu_dgemm_f(blas, 'N','T',cholesky_mo_num,nV,nO*nV,1.d0, & d_cc_space_v_ov_chol%f, cholesky_mo_num, tmp_vov%f, nV, & 0.d0, tau_kau%f(1,1,u), cholesky_mo_num) - call gpu_synchronize() enddo !$OMP END DO + call gpu_blas_destroy(blas) + call gpu_deallocate(tmp_vov) call gpu_deallocate(tmp_ovv) + + !$OMP TASKWAIT !$OMP END PARALLEL - do b=1,nV - call gpu_stream_destroy(stream(b)) - enddo - - call gpu_set_stream(blas_handle,gpu_default_stream) - - call gpu_copy(d_cc_space_f_oo, H_oo) - call gpu_dgemm(blas_handle, 'T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & tau_kau, cholesky_mo_num*nV, d_cc_space_v_vo_chol, cholesky_mo_num*nV, & 1.d0, H_oo, nO) @@ -395,52 +394,97 @@ end ! H_vv -subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) +subroutine compute_H_vv_chol(nO,nV,tau_x,d_cc_space_f_vv, & + d_cc_space_v_ov_chol,H_vv) use gpu implicit none - integer, intent(in) :: nO,nV + integer, intent(in) :: nO,nV + type(gpu_double2), intent(in) :: d_cc_space_f_vv + type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol type(gpu_double4), intent(in) :: tau_x type(gpu_double2), intent(out) :: H_vv integer :: a,b,i,j,u,k, beta - double precision, allocatable :: tau_kia(:,:,:), tmp_oov(:,:,:) + type(gpu_double3) :: tau_kia, tmp_oov - allocate(tau_kia(cholesky_mo_num,nO,nV)) - !$omp parallel & - !$omp default(shared) & - !$omp private(i,beta,j,k,a,b,tmp_oov) - allocate(tmp_oov(nO,nO,nV) ) - !$omp do + call gpu_allocate(tau_kia, cholesky_mo_num, nO, nV) + +! !$omp parallel & +! !$omp default(shared) & +! !$omp private(i,beta,j,k,a,b,tmp_oov) +! allocate(tmp_oov(nO,nO,nV) ) +! !$omp do +! do a = 1, nV +! do b=1,nV +! do j=1,nO +! do i=1,nO +! tmp_oov(i,j,b) = tau_x%f(i,j,a,b) +! enddo +! enddo +! enddo +! call dgemm('N','T',cholesky_mo_num,nO,nO*nV,1.d0, & +! d_cc_space_v_ov_chol%f, cholesky_mo_num, tmp_oov, nO, & +! 0.d0, tau_kia(1,1,a), cholesky_mo_num) +! enddo +! !$omp end do nowait +! deallocate(tmp_oov) + +! !$omp do +! do beta = 1, nV +! do a = 1, nV +! H_vv%f(a,beta) = cc_space_f_vv(a,beta) +! enddo +! enddo +! !$omp end do nowait +! !$omp barrier +! !$omp end parallel +! call dgemm('T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & +! tau_kia, cholesky_mo_num*nO, d_cc_space_v_ov_chol%f, cholesky_mo_num*nO, & +! 1.d0, H_vv%f, nV) + + type(gpu_blas) :: blas + + + PROVIDE gpu_num + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(a,b,tmp_oov,blas) + + !$OMP SINGLE + !$OMP TASK + call gpu_copy(d_cc_space_f_vv, H_vv) + !$OMP END TASK + !$OMP END SINGLE + + call gpu_blas_create(blas) + call gpu_allocate(tmp_oov, nO, nO, nV) + + !$OMP DO do a = 1, nV do b=1,nV - do j=1,nO - do i=1,nO - tmp_oov(i,j,b) = tau_x%f(i,j,a,b) - enddo - enddo + call gpu_dgeam_f(blas, 'N', 'N', nO, nO, 1.d0, & + tau_x%f(1,1,a,b), nO, 0.d0, & + tau_x%f(1,1,a,b), nO, tmp_oov%f(1,1,b), nO) enddo - call dgemm('N','T',cholesky_mo_num,nO,nO*nV,1.d0, & - cc_space_v_ov_chol, cholesky_mo_num, tmp_oov, nO, & - 0.d0, tau_kia(1,1,a), cholesky_mo_num) + call gpu_dgemm_f(blas, 'N','T',cholesky_mo_num,nO,nO*nV,1.d0, & + d_cc_space_v_ov_chol%f, cholesky_mo_num, tmp_oov%f, nO, & + 0.d0, tau_kia%f(1,1,a), cholesky_mo_num) enddo - !$omp end do nowait - deallocate(tmp_oov) + !$OMP END DO - !$omp do - do beta = 1, nV - do a = 1, nV - H_vv%f(a,beta) = cc_space_f_vv(a,beta) - enddo - enddo - !$omp end do nowait - !$omp barrier - !$omp end parallel - call dgemm('T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & - tau_kia, cholesky_mo_num*nO, cc_space_v_ov_chol, cholesky_mo_num*nO, & - 1.d0, H_vv%f, nV) + call gpu_blas_destroy(blas) + call gpu_deallocate(tmp_oov) + !$OMP TASKWAIT + !$OMP END PARALLEL + + call gpu_dgemm(blas_handle,'T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & + tau_kia, cholesky_mo_num*nO, d_cc_space_v_ov_chol, cholesky_mo_num*nO, & + 1.d0, H_vv, nV) + + call gpu_deallocate(tau_kia) end ! H_vo diff --git a/src/gpu/gpu.irp.f b/src/gpu/gpu.irp.f index 6ad0a075..3b2feeb6 100644 --- a/src/gpu/gpu.irp.f +++ b/src/gpu/gpu.irp.f @@ -16,3 +16,11 @@ BEGIN_PROVIDER [ type(gpu_stream), gpu_default_stream ] gpu_default_stream%c = C_NULL_PTR END_PROVIDER +BEGIN_PROVIDER [ integer, gpu_num ] + implicit none + BEGIN_DOC + ! Number of usable GPUs + END_DOC + gpu_num = gpu_ndevices() +END_PROVIDER + From 44a7729f65a37cc3a7c35ae55f462bb1d61e411b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 1 Jul 2024 19:00:27 +0200 Subject: [PATCH 105/131] H_ finished in CCSD --- src/ccsd/ccsd_space_orb_sub.irp.f | 108 ++---- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 482 +++++++++---------------- 2 files changed, 200 insertions(+), 390 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 0b3636ac..13b974be 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -181,11 +181,9 @@ subroutine run_ccsd_space_orb ! Residue if (do_mo_cholesky) then - call compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, & - d_cc_space_v_ov_chol,d_cc_space_v_vo_chol,H_oo) - call compute_H_vv_chol(nO,nV,tau_x,d_cc_space_f_vv, & - d_cc_space_v_ov_chol,H_vv) - call compute_H_vo_chol(nO,nV,t1%f,H_vo%f) + call compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, d_cc_space_v_ov_chol,d_cc_space_v_vo_chol,H_oo) + call compute_H_vv_chol(nO,nV,tau_x,d_cc_space_f_vv, d_cc_space_v_ov_chol,H_vv) + call compute_H_vo_chol(nO,nV,t1,d_cc_space_f_vo, d_cc_space_v_ov_chol,d_cc_space_v_vo_chol, H_vo) call compute_r1_space_chol(nO,nV,t1%f,t2%f,tau%f,H_oo%F,H_vv%F,H_vo%F,r1%f,max_r1) call compute_r2_space_chol(nO,nV,t1%f,t2%f,tau%f,H_oo%F,H_vv%F,H_vo%F,r2%f,max_r2) @@ -316,51 +314,20 @@ subroutine ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1, integer :: i,j,a,b double precision :: e -! energy = 0d0 -! !$omp parallel & -! !$omp shared(nO,nV,energy,tau_x,t1,& -! !$omp d_cc_space_f_vo,d_cc_space_v_oovv) & -! !$omp private(i,j,a,b,e) & -! !$omp default(none) -! e = 0d0 -! !$omp do -! do a = 1, nV -! do i = 1, nO -! e = e + 2d0 * d_cc_space_f_vo%f(a,i) * t1%f(i,a) -! enddo -! enddo -! !$omp end do nowait -! !$omp do -! do b = 1, nV -! do a = 1, nV -! do j = 1, nO -! do i = 1, nO -! e = e + tau_x%f(i,j,a,b) * d_cc_space_v_oovv%f(i,j,a,b) -! enddo -! enddo -! enddo -! enddo -! !$omp end do nowait -! !$omp critical -! energy = energy + e -! !$omp end critical -! !$omp end parallel + type(gpu_stream) :: s1, s2 + call gpu_stream_create(s1) + call gpu_stream_create(s2) + call gpu_set_stream(blas_handle,s1) + call gpu_ddot(blas_handle, nO*nV, d_cc_space_f_vo, 1, t1, 1, e) - type(gpu_stream) :: s1, s2 - call gpu_stream_create(s1) - call gpu_stream_create(s2) + call gpu_set_stream(blas_handle,s2) + call gpu_ddot_64(blas_handle, nO*nO*nV*nV*1_8, tau_x, 1_8, d_cc_space_v_oovv, 1_8, energy) + call gpu_set_stream(blas_handle,gpu_default_stream) - call gpu_set_stream(blas_handle,s1) - call gpu_ddot(blas_handle, nO*nV, d_cc_space_f_vo, 1, t1, 1, e) - - call gpu_set_stream(blas_handle,s2) - call gpu_ddot_64(blas_handle, nO*nO*nV*nV*1_8, tau_x, 1_8, d_cc_space_v_oovv, 1_8, energy) - call gpu_synchronize() - call gpu_set_stream(blas_handle,gpu_default_stream) - - call gpu_stream_destroy(s1) - call gpu_stream_destroy(s2) + call gpu_synchronize() + call gpu_stream_destroy(s1) + call gpu_stream_destroy(s2) energy = energy + 2.d0*e @@ -384,27 +351,9 @@ subroutine update_tau_space(nO,nV,h_t1,t1,t2,tau) ! internal integer :: i,j,a,b -! !$OMP PARALLEL & -! !$OMP SHARED(nO,nV,tau,t2,t1,h_t1) & -! !$OMP PRIVATE(i,j,a,b) & -! !$OMP DEFAULT(NONE) -! !$OMP DO -! do b = 1, nV -! do a = 1, nV -! do j = 1, nO -! do i = 1, nO -! tau%f(i,j,a,b) = t2%f(i,j,a,b) + t1%f(i,a) * h_t1(j,b) -! enddo -! enddo -! enddo -! enddo -! !$OMP END DO -! !$OMP END PARALLEL - - type(gpu_stream) :: stream(nV) - !$OMP PARALLEL if (gpu_num == 0) & + !$OMP PARALLEL & !$OMP SHARED(nO,nV,tau,t2,t1,h_t1,stream,blas_handle) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) @@ -422,6 +371,8 @@ subroutine update_tau_space(nO,nV,h_t1,t1,t2,tau) !$OMP END DO !$OMP END PARALLEL + call gpu_synchronize() + do b=1,nV call gpu_stream_destroy(stream(b)) enddo @@ -444,32 +395,15 @@ subroutine update_tau_x_space(nO,nV,tau,tau_x) ! internal integer :: i,j,a,b -! !$OMP PARALLEL & -! !$OMP SHARED(nO,nV,tau,tau_x) & -! !$OMP PRIVATE(i,j,a,b) & -! !$OMP DEFAULT(NONE) -! !$OMP DO -! do b = 1, nV -! do a = 1, nV -! do j = 1, nO -! do i = 1, nO -! tau_x%f(i,j,a,b) = 2.d0*tau%f(i,j,a,b) - tau%f(i,j,b,a) -! enddo -! enddo -! enddo -! enddo -! !$OMP END DO -! !$OMP END PARALLEL - type(gpu_stream) :: stream(nV) do a=1,nV call gpu_stream_create(stream(a)) enddo - !$OMP PARALLEL if (gpu_num == 0) & + !$OMP PARALLEL & !$OMP SHARED(nO,nV,tau,tau_x,stream,blas_handle) & - !$OMP PRIVATE(i,j,a,b) & + !$OMP PRIVATE(a,b) & !$OMP DEFAULT(NONE) !$OMP DO do b=1,nV @@ -484,10 +418,12 @@ subroutine update_tau_x_space(nO,nV,tau,tau_x) !$OMP END DO !$OMP END PARALLEL + call gpu_set_stream(blas_handle,gpu_default_stream) + call gpu_synchronize() + do b=1,nV call gpu_stream_destroy(stream(b)) enddo - call gpu_set_stream(blas_handle,gpu_default_stream) end diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 458016fb..5eb95a06 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -1,81 +1,200 @@ -subroutine ccsd_energy_space_chol(nO,nV,tau,t1,energy) +! H_oo +subroutine compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, & + d_cc_space_v_ov_chol,d_cc_space_v_vo_chol,H_oo) + use gpu implicit none - integer, intent(in) :: nO, nV - double precision, intent(in) :: tau(nO,nO,nV,nV) - double precision, intent(in) :: t1(nO,nV) - double precision, intent(out) :: energy + integer, intent(in) :: nO,nV + type(gpu_double2), intent(in) :: d_cc_space_f_oo + type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol, d_cc_space_v_vo_chol + type(gpu_double4), intent(in) :: tau_x + type(gpu_double2), intent(out) :: H_oo - ! internal - integer :: i,j,a,b - double precision :: e + integer :: a,b,i,j,u,k - energy = 0d0 - !$omp parallel & - !$omp shared(nO,nV,energy,tau,t1,& - !$omp cc_space_f_vo,cc_space_w_oovv) & - !$omp private(i,j,a,b,e) & - !$omp default(none) - e = 0d0 - !$omp do - do a = 1, nV - do i = 1, nO - e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) - enddo - enddo - !$omp end do nowait - !$omp do - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b) - enddo - enddo - enddo - enddo - !$omp end do nowait - !$omp critical - energy = energy + e - !$omp end critical - !$omp end parallel + type(gpu_double3) :: tau_kau, tmp_vov, tmp_ovv -end + call gpu_allocate(tau_kau, cholesky_mo_num, nV, nO) -! Tau + type(gpu_blas) :: blas -subroutine update_tau_space_chol(nO,nV,t1,t2,tau) - implicit none + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(blas,u,b,tmp_vov,tmp_ovv) - ! in - integer, intent(in) :: nO, nV - double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + !$OMP SINGLE + !$OMP TASK + call gpu_copy(d_cc_space_f_oo, H_oo) + !$OMP END TASK + !$OMP END SINGLE - ! out - double precision, intent(out) :: tau(nO,nO,nV,nV) + call gpu_allocate(tmp_ovv, nO, nV, nV) + call gpu_allocate(tmp_vov, nV, nO, nV) - ! internal - integer :: i,j,a,b + call gpu_blas_create(blas) - !$OMP PARALLEL & - !$OMP SHARED(nO,nV,tau,t2,t1) & - !$OMP PRIVATE(i,j,a,b) & - !$OMP DEFAULT(NONE) !$OMP DO - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b) - enddo - enddo + do u=1,nO + call gpu_dgeam_f(blas, 'N', 'N', 1, nO*nV*nV, 1.d0, & + tau_x%f(u,1,1,1), nO, 0.d0, tau_x%f, nO, tmp_ovv%f, 1) + do b=1,nV + call gpu_dgeam_f(blas, 'T', 'T', nV, nO, 1.d0, & + tmp_ovv%f(1,1,b), nO, 0.d0, & + tmp_ovv%f(1,1,b), nO, tmp_vov%f(1,1,b), nV) enddo + call gpu_dgemm_f(blas, 'N','T',cholesky_mo_num,nV,nO*nV,1.d0, & + d_cc_space_v_ov_chol%f, cholesky_mo_num, tmp_vov%f, nV, & + 0.d0, tau_kau%f(1,1,u), cholesky_mo_num) enddo !$OMP END DO + + call gpu_blas_destroy(blas) + + call gpu_deallocate(tmp_vov) + call gpu_deallocate(tmp_ovv) + + !$OMP TASKWAIT !$OMP END PARALLEL + call gpu_dgemm(blas_handle, 'T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & + tau_kau, cholesky_mo_num*nV, d_cc_space_v_vo_chol, cholesky_mo_num*nV, & + 1.d0, H_oo, nO) + + call gpu_synchronize() + call gpu_deallocate(tau_kau) +end + +! H_vv + +subroutine compute_H_vv_chol(nO,nV,tau_x,d_cc_space_f_vv, & + d_cc_space_v_ov_chol,H_vv) + use gpu + implicit none + + integer, intent(in) :: nO,nV + type(gpu_double2), intent(in) :: d_cc_space_f_vv + type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol + type(gpu_double4), intent(in) :: tau_x + type(gpu_double2), intent(out) :: H_vv + + integer :: a,b,i,j,u,k, beta + + type(gpu_double3) :: tau_kia, tmp_oov + + call gpu_allocate(tau_kia, cholesky_mo_num, nO, nV) + + type(gpu_blas) :: blas + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(a,b,tmp_oov,blas) + + !$OMP SINGLE + !$OMP TASK + call gpu_copy(d_cc_space_f_vv, H_vv) + !$OMP END TASK + !$OMP END SINGLE + + call gpu_blas_create(blas) + call gpu_allocate(tmp_oov, nO, nO, nV) + + !$OMP DO + do a = 1, nV + do b=1,nV + call gpu_dgeam_f(blas, 'N', 'N', nO, nO, 1.d0, & + tau_x%f(1,1,a,b), nO, 0.d0, & + tau_x%f(1,1,a,b), nO, tmp_oov%f(1,1,b), nO) + enddo + call gpu_dgemm_f(blas, 'N','T',cholesky_mo_num,nO,nO*nV,1.d0, & + d_cc_space_v_ov_chol%f, cholesky_mo_num, tmp_oov%f, nO, & + 0.d0, tau_kia%f(1,1,a), cholesky_mo_num) + enddo + !$OMP END DO + + call gpu_blas_destroy(blas) + + call gpu_deallocate(tmp_oov) + !$OMP TASKWAIT + !$OMP END PARALLEL + + call gpu_dgemm(blas_handle,'T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & + tau_kia, cholesky_mo_num*nO, d_cc_space_v_ov_chol, cholesky_mo_num*nO, & + 1.d0, H_vv, nV) + + call gpu_synchronize() + call gpu_deallocate(tau_kia) +end + +! H_vo +subroutine compute_H_vo_chol(nO,nV,t1,d_cc_space_f_vo, & + d_cc_space_v_ov_chol,d_cc_space_v_vo_chol, H_vo) + use gpu + implicit none + + integer, intent(in) :: nO,nV + type(gpu_double2), intent(in) :: t1, d_cc_space_f_vo + type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol, d_cc_space_v_vo_chol + type(gpu_double2), intent(out) :: H_vo + + integer :: a,b,i,j,u,k + + type(gpu_double1) :: tmp_k + type(gpu_double3) :: tmp, tmp2 + + call gpu_copy(d_cc_space_f_vo, H_vo) + + call gpu_allocate(tmp_k, cholesky_mo_num) + + call gpu_dgemm(blas_handle, 'N', 'N', cholesky_mo_num, 1, nO*nV, 2.d0, & + d_cc_space_v_ov_chol, cholesky_mo_num, & + t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) + + call gpu_dgemm(blas_handle, 'T','N',nV*nO,1,cholesky_mo_num,1.d0, & + d_cc_space_v_vo_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & + H_vo, nV*nO) + + call gpu_deallocate(tmp_k) + + + call gpu_allocate(tmp, cholesky_mo_num, nO, nO) + + call gpu_dgemm(blas_handle, 'N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & + d_cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, tmp, cholesky_mo_num*nO) + + call gpu_allocate(tmp2, cholesky_mo_num, nO, nO) + + type(gpu_stream) :: stream(nO) + do i=1,nO + call gpu_stream_create(stream(i)) + enddo + + !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,j) + do i=1,nO + do j=1,nO + call gpu_set_stream(blas_handle,stream(j)) + call gpu_dgeam_f(blas_handle, 'N', 'N', cholesky_mo_num, 1, 1.d0, & + tmp%f(1,i,j), cholesky_mo_num, 0.d0, & + tmp%f(1,i,j), cholesky_mo_num, tmp2%f(1,j,i), cholesky_mo_num) + enddo + enddo + !$OMP END PARALLEL DO + + call gpu_set_stream(blas_handle,gpu_default_stream) + call gpu_synchronize() + + do i=1,nO + call gpu_stream_destroy(stream(i)) + enddo + call gpu_deallocate(tmp) + + call gpu_dgemm(blas_handle, 'T','N', nV, nO, cholesky_mo_num*nO, -1.d0, & + d_cc_space_v_ov_chol, cholesky_mo_num*nO, tmp2, cholesky_mo_num*nO, & + 1.d0, H_vo, nV) + + call gpu_synchronize() + call gpu_deallocate(tmp2) end ! R1 @@ -291,251 +410,6 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) end -! H_oo - -subroutine compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, & - d_cc_space_v_ov_chol,d_cc_space_v_vo_chol,H_oo) - use gpu - implicit none - - integer, intent(in) :: nO,nV - type(gpu_double2), intent(in) :: d_cc_space_f_oo - type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol, d_cc_space_v_vo_chol - type(gpu_double4), intent(in) :: tau_x - type(gpu_double2), intent(out) :: H_oo - - integer :: a,b,i,j,u,k - - type(gpu_double3) :: tau_kau, tmp_vov, tmp_ovv - - call gpu_allocate(tau_kau, cholesky_mo_num, nV, nO) - -! !$omp parallel & -! !$omp default(shared) & -! !$omp private(i,u,j,k,a,b,tmp_vov) -! call gpu_allocate(tmp_vov, nV, nO, nV) -! !$omp do -! do u = 1, nO -! do b=1,nV -! do j=1,nO -! do a=1,nV -! tmp_vov%f(a,j,b) = tau_x%f(u,j,a,b) -! enddo -! enddo -! enddo -! call dgemm('N','T',cholesky_mo_num,nV,nO*nV,1.d0, & -! d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num, tmp_vov%f, nV, & -! 0.d0, tau_kau%f(1,1,u), cholesky_mo_num) -! enddo -! !$omp end do nowait -! call gpu_deallocate(tmp_vov) -! !$omp do -! do i = 1, nO -! do u = 1, nO -! H_oo%f(u,i) = d_cc_space_f_oo%f(u,i) -! enddo -! enddo -! !$omp end do nowait -! -! !$omp barrier -! !$omp end parallel -! call dgemm('T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & -! tau_kau%f(1,1,1), cholesky_mo_num*nV, d_cc_space_v_vo_chol%f(1,1,1), cholesky_mo_num*nV, & -! 1.d0, H_oo%f(1,1), nO) -! - - type(gpu_blas) :: blas - - - !$OMP PARALLEL & - !$OMP DEFAULT(SHARED) & - !$OMP PRIVATE(blas,u,b,tmp_vov,tmp_ovv) - - !$OMP SINGLE - !$OMP TASK - call gpu_copy(d_cc_space_f_oo, H_oo) - !$OMP END TASK - !$OMP END SINGLE - - call gpu_allocate(tmp_ovv, nO, nV, nV) - call gpu_allocate(tmp_vov, nV, nO, nV) - - call gpu_blas_create(blas) - - !$OMP DO - do u=1,nO - call gpu_dgeam_f(blas, 'N', 'N', 1, nO*nV*nV, 1.d0, & - tau_x%f(u,1,1,1), nO, 0.d0, tau_x%f, nO, tmp_ovv%f, 1) - do b=1,nV - call gpu_dgeam_f(blas, 'T', 'T', nV, nO, 1.d0, & - tmp_ovv%f(1,1,b), nO, 0.d0, & - tmp_ovv%f(1,1,b), nO, tmp_vov%f(1,1,b), nV) - enddo - call gpu_dgemm_f(blas, 'N','T',cholesky_mo_num,nV,nO*nV,1.d0, & - d_cc_space_v_ov_chol%f, cholesky_mo_num, tmp_vov%f, nV, & - 0.d0, tau_kau%f(1,1,u), cholesky_mo_num) - enddo - !$OMP END DO - - call gpu_blas_destroy(blas) - - call gpu_deallocate(tmp_vov) - call gpu_deallocate(tmp_ovv) - - !$OMP TASKWAIT - !$OMP END PARALLEL - - call gpu_dgemm(blas_handle, 'T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & - tau_kau, cholesky_mo_num*nV, d_cc_space_v_vo_chol, cholesky_mo_num*nV, & - 1.d0, H_oo, nO) - - call gpu_deallocate(tau_kau) -end - -! H_vv - -subroutine compute_H_vv_chol(nO,nV,tau_x,d_cc_space_f_vv, & - d_cc_space_v_ov_chol,H_vv) - use gpu - implicit none - - integer, intent(in) :: nO,nV - type(gpu_double2), intent(in) :: d_cc_space_f_vv - type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol - type(gpu_double4), intent(in) :: tau_x - type(gpu_double2), intent(out) :: H_vv - - integer :: a,b,i,j,u,k, beta - - type(gpu_double3) :: tau_kia, tmp_oov - - call gpu_allocate(tau_kia, cholesky_mo_num, nO, nV) - -! !$omp parallel & -! !$omp default(shared) & -! !$omp private(i,beta,j,k,a,b,tmp_oov) -! allocate(tmp_oov(nO,nO,nV) ) -! !$omp do -! do a = 1, nV -! do b=1,nV -! do j=1,nO -! do i=1,nO -! tmp_oov(i,j,b) = tau_x%f(i,j,a,b) -! enddo -! enddo -! enddo -! call dgemm('N','T',cholesky_mo_num,nO,nO*nV,1.d0, & -! d_cc_space_v_ov_chol%f, cholesky_mo_num, tmp_oov, nO, & -! 0.d0, tau_kia(1,1,a), cholesky_mo_num) -! enddo -! !$omp end do nowait -! deallocate(tmp_oov) - -! !$omp do -! do beta = 1, nV -! do a = 1, nV -! H_vv%f(a,beta) = cc_space_f_vv(a,beta) -! enddo -! enddo -! !$omp end do nowait -! !$omp barrier -! !$omp end parallel -! call dgemm('T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & -! tau_kia, cholesky_mo_num*nO, d_cc_space_v_ov_chol%f, cholesky_mo_num*nO, & -! 1.d0, H_vv%f, nV) - - type(gpu_blas) :: blas - - - PROVIDE gpu_num - !$OMP PARALLEL & - !$OMP DEFAULT(SHARED) & - !$OMP PRIVATE(a,b,tmp_oov,blas) - - !$OMP SINGLE - !$OMP TASK - call gpu_copy(d_cc_space_f_vv, H_vv) - !$OMP END TASK - !$OMP END SINGLE - - call gpu_blas_create(blas) - call gpu_allocate(tmp_oov, nO, nO, nV) - - !$OMP DO - do a = 1, nV - do b=1,nV - call gpu_dgeam_f(blas, 'N', 'N', nO, nO, 1.d0, & - tau_x%f(1,1,a,b), nO, 0.d0, & - tau_x%f(1,1,a,b), nO, tmp_oov%f(1,1,b), nO) - enddo - call gpu_dgemm_f(blas, 'N','T',cholesky_mo_num,nO,nO*nV,1.d0, & - d_cc_space_v_ov_chol%f, cholesky_mo_num, tmp_oov%f, nO, & - 0.d0, tau_kia%f(1,1,a), cholesky_mo_num) - enddo - !$OMP END DO - - call gpu_blas_destroy(blas) - - call gpu_deallocate(tmp_oov) - !$OMP TASKWAIT - !$OMP END PARALLEL - - call gpu_dgemm(blas_handle,'T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & - tau_kia, cholesky_mo_num*nO, d_cc_space_v_ov_chol, cholesky_mo_num*nO, & - 1.d0, H_vv, nV) - - call gpu_deallocate(tau_kia) -end - -! H_vo -subroutine compute_H_vo_chol(nO,nV,t1,H_vo) - - implicit none - - integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV) - double precision, intent(out) :: H_vo(nV, nO) - - integer :: a,b,i,j,u,k - - double precision, allocatable :: tmp_k(:), tmp(:,:,:), tmp2(:,:,:) - do i=1,nO - do a=1,nV - H_vo(a,i) = cc_space_f_vo(a,i) - enddo - enddo - - allocate(tmp_k(cholesky_mo_num)) - call dgemm('N', 'N', cholesky_mo_num, 1, nO*nV, 2.d0, & - cc_space_v_ov_chol, cholesky_mo_num, & - t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) - - call dgemm('T','N',nV*nO,1,cholesky_mo_num,1.d0, & - cc_space_v_vo_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & - H_vo, nV*nO) - deallocate(tmp_k) - - allocate(tmp(cholesky_mo_num,nO,nO)) - allocate(tmp2(cholesky_mo_num,nO,nO)) - - call dgemm('N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & - cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, tmp, cholesky_mo_num*nO) - - do i=1,nO - do j=1,nO - do k=1,cholesky_mo_num - tmp2(k,j,i) = tmp(k,i,j) - enddo - enddo - enddo - deallocate(tmp) - - call dgemm('T','N', nV, nO, cholesky_mo_num*nO, -1.d0, & - cc_space_v_ov_chol, cholesky_mo_num*nO, tmp2, cholesky_mo_num*nO, & - 1.d0, H_vo, nV) - -end - ! R2 From 2bead959d0eee7790162df656e3781e4dcdedb7d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 2 Jul 2024 13:58:19 +0200 Subject: [PATCH 106/131] Fxied GPU interface for gfortran --- plugins/local/gpu_x86/gpu.c | 90 ++++++++++---------- src/ccsd/ccsd_space_orb_sub.irp.f | 10 +-- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 46 +++++------ src/gpu/gpu_module.F90 | 109 +++++-------------------- 4 files changed, 94 insertions(+), 161 deletions(-) diff --git a/plugins/local/gpu_x86/gpu.c b/plugins/local/gpu_x86/gpu.c index dab23a25..fe3cadc5 100644 --- a/plugins/local/gpu_x86/gpu.c +++ b/plugins/local/gpu_x86/gpu.c @@ -49,10 +49,11 @@ void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) { /* Streams */ void gpu_stream_create(void** ptr) { - *ptr = (void*) 2; + *ptr = (void*) malloc(sizeof(char)); } void gpu_stream_destroy(void** ptr) { + free(*ptr); *ptr = NULL; } @@ -68,11 +69,12 @@ void gpu_synchronize() { /* BLAS functions */ void gpu_blas_create(void** handle) { - *handle = (void*) 1; + *handle = (void*) malloc(sizeof(char)); } void gpu_blas_destroy(void** handle) { + free(*handle); *handle = NULL; } @@ -122,7 +124,7 @@ void gpu_sdot(void* handle, const int64_t n, const float* x, const int64_t incx, void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha, const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy); -void gpu_dgemv(void* handle, const char transa, const int64_t m, const int64_t n, const double alpha, +void gpu_dgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const double alpha, const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) { assert (handle != NULL); @@ -143,14 +145,14 @@ void gpu_dgemv(void* handle, const char transa, const int64_t m, const int64_t n assert ( (int64_t) incx_ == incx); assert ( (int64_t) incy_ == incy); - dgemv_(&transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_); + dgemv_(transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_); } void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha, const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy); -void gpu_sgemv(void* handle, const char transa, const int64_t m, const int64_t n, const float alpha, +void gpu_sgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const float alpha, const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) { assert (handle != NULL); @@ -171,14 +173,14 @@ void gpu_sgemv(void* handle, const char transa, const int64_t m, const int64_t n assert ( (int64_t) incx_ == incx); assert ( (int64_t) incy_ == incy); - sgemv_(&transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_); + sgemv_(transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_); } void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha, const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc); -void gpu_dgemm(void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, +void gpu_dgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) { assert (handle != NULL); @@ -201,7 +203,7 @@ void gpu_dgemm(void* handle, const char transa, const char transb, const int64_t assert ( (int64_t) ldb_ == ldb); assert ( (int64_t) ldc_ == ldc); - dgemm_(&transa, &transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_); + dgemm_(transa, transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_); } @@ -209,7 +211,7 @@ void gpu_dgemm(void* handle, const char transa, const char transb, const int64_t void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha, const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc); -void gpu_sgemm(void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, +void gpu_sgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) { assert (handle != NULL); @@ -232,18 +234,18 @@ void gpu_sgemm(void* handle, const char transa, const char transb, const int64_t assert ( (int64_t) ldb_ == ldb); assert ( (int64_t) ldc_ == ldc); - sgemm_(&transa, &transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_); + sgemm_(transa, transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_); } -void gpu_dgeam(void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha, +void gpu_dgeam(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double alpha, const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { assert (handle != NULL); - if ( (transa == 'N' && transb == 'N') || - (transa == 'n' && transb == 'N') || - (transa == 'N' && transb == 'n') || - (transa == 'n' && transb == 'n') ) { + if ( (*transa == 'N' && *transb == 'N') || + (*transa == 'n' && *transb == 'N') || + (*transa == 'N' && *transb == 'n') || + (*transa == 'n' && *transb == 'n') ) { if (alpha == 0.) { @@ -271,10 +273,10 @@ void gpu_dgeam(void* handle, const char transa, const char transb, const int64_t } - } else if ( (transa == 'N' && transb == 'T') || - (transa == 'n' && transb == 'T') || - (transa == 'N' && transb == 't') || - (transa == 'n' && transb == 't') ) { + } else if ( (*transa == 'N' && *transb == 'T') || + (*transa == 'n' && *transb == 'T') || + (*transa == 'N' && *transb == 't') || + (*transa == 'n' && *transb == 't') ) { if (alpha == 0.) { @@ -302,10 +304,10 @@ void gpu_dgeam(void* handle, const char transa, const char transb, const int64_t } - } else if ( (transa == 'T' && transb == 'N') || - (transa == 't' && transb == 'N') || - (transa == 'T' && transb == 'n') || - (transa == 't' && transb == 'n') ) { + } else if ( (*transa == 'T' && *transb == 'N') || + (*transa == 't' && *transb == 'N') || + (*transa == 'T' && *transb == 'n') || + (*transa == 't' && *transb == 'n') ) { if (alpha == 0.) { @@ -333,10 +335,10 @@ void gpu_dgeam(void* handle, const char transa, const char transb, const int64_t } - } else if ( (transa == 'T' && transb == 'T') || - (transa == 't' && transb == 'T') || - (transa == 'T' && transb == 't') || - (transa == 't' && transb == 't') ) { + } else if ( (*transa == 'T' && *transb == 'T') || + (*transa == 't' && *transb == 'T') || + (*transa == 'T' && *transb == 't') || + (*transa == 't' && *transb == 't') ) { if (alpha == 0.) { @@ -368,14 +370,14 @@ void gpu_dgeam(void* handle, const char transa, const char transb, const int64_t } -void gpu_sgeam(void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha, +void gpu_sgeam(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const float alpha, const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) { assert (handle != NULL); - if ( (transa == 'N' && transb == 'N') || - (transa == 'n' && transb == 'N') || - (transa == 'N' && transb == 'n') || - (transa == 'n' && transb == 'n') ) { + if ( (*transa == 'N' && *transb == 'N') || + (*transa == 'n' && *transb == 'N') || + (*transa == 'N' && *transb == 'n') || + (*transa == 'n' && *transb == 'n') ) { if (alpha == 0.) { @@ -403,10 +405,10 @@ void gpu_sgeam(void* handle, const char transa, const char transb, const int64_t } - } else if ( (transa == 'N' && transb == 'T') || - (transa == 'n' && transb == 'T') || - (transa == 'N' && transb == 't') || - (transa == 'n' && transb == 't') ) { + } else if ( (*transa == 'N' && *transb == 'T') || + (*transa == 'n' && *transb == 'T') || + (*transa == 'N' && *transb == 't') || + (*transa == 'n' && *transb == 't') ) { if (alpha == 0.) { @@ -434,10 +436,10 @@ void gpu_sgeam(void* handle, const char transa, const char transb, const int64_t } - } else if ( (transa == 'T' && transb == 'N') || - (transa == 't' && transb == 'N') || - (transa == 'T' && transb == 'n') || - (transa == 't' && transb == 'n') ) { + } else if ( (*transa == 'T' && *transb == 'N') || + (*transa == 't' && *transb == 'N') || + (*transa == 'T' && *transb == 'n') || + (*transa == 't' && *transb == 'n') ) { if (alpha == 0.) { @@ -465,10 +467,10 @@ void gpu_sgeam(void* handle, const char transa, const char transb, const int64_t } - } else if ( (transa == 'T' && transb == 'T') || - (transa == 't' && transb == 'T') || - (transa == 'T' && transb == 't') || - (transa == 't' && transb == 't') ) { + } else if ( (*transa == 'T' && *transb == 'T') || + (*transa == 't' && *transb == 'T') || + (*transa == 'T' && *transb == 't') || + (*transa == 't' && *transb == 't') ) { if (alpha == 0.) { diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 13b974be..de109cea 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -319,10 +319,10 @@ subroutine ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1, call gpu_stream_create(s2) call gpu_set_stream(blas_handle,s1) - call gpu_ddot(blas_handle, nO*nV, d_cc_space_f_vo, 1, t1, 1, e) + call gpu_ddot(blas_handle, nO*nV, d_cc_space_f_vo%f(1,1), 1, t1%f(1,1), 1, e) call gpu_set_stream(blas_handle,s2) - call gpu_ddot_64(blas_handle, nO*nO*nV*nV*1_8, tau_x, 1_8, d_cc_space_v_oovv, 1_8, energy) + call gpu_ddot_64(blas_handle, nO*nO*nV*nV*1_8, tau_x%f(1,1,1,1), 1_8, d_cc_space_v_oovv%f(1,1,1,1), 1_8, energy) call gpu_set_stream(blas_handle,gpu_default_stream) call gpu_synchronize() @@ -362,9 +362,9 @@ subroutine update_tau_space(nO,nV,h_t1,t1,t2,tau) call gpu_stream_create(stream(b)) call gpu_set_stream(blas_handle,stream(b)) do j=1,nO - call gpu_dgeam_f(blas_handle, 'N', 'N', nO, nV, & + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, & 1.d0, t2%f(1,j,1,b), nO*nO, & - h_t1(j,b), t1%f, nO, & + h_t1(j,b), t1%f(1,1), nO, & tau%f(1,j,1,b), nO*nO) enddo enddo @@ -409,7 +409,7 @@ subroutine update_tau_x_space(nO,nV,tau,tau_x) do b=1,nV do a=1,nV call gpu_set_stream(blas_handle,stream(a)) - call gpu_dgeam_f(blas_handle, 'N', 'N', nO, nO, & + call gpu_dgeam(blas_handle, 'N', 'N', nO, nO, & 2.d0, tau%f(1,1,a,b), nO, & -1.d0, tau%f(1,1,b,a), nO, & tau_x%f(1,1,a,b), nO) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 5eb95a06..a3490589 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -37,15 +37,15 @@ subroutine compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, & !$OMP DO do u=1,nO - call gpu_dgeam_f(blas, 'N', 'N', 1, nO*nV*nV, 1.d0, & - tau_x%f(u,1,1,1), nO, 0.d0, tau_x%f, nO, tmp_ovv%f, 1) + call gpu_dgeam(blas, 'N', 'N', 1, nO*nV*nV, 1.d0, & + tau_x%f(u,1,1,1), nO, 0.d0, tau_x%f(1,1,1,1), nO, tmp_ovv%f(1,1,1), 1) do b=1,nV - call gpu_dgeam_f(blas, 'T', 'T', nV, nO, 1.d0, & + call gpu_dgeam(blas, 'T', 'T', nV, nO, 1.d0, & tmp_ovv%f(1,1,b), nO, 0.d0, & tmp_ovv%f(1,1,b), nO, tmp_vov%f(1,1,b), nV) enddo - call gpu_dgemm_f(blas, 'N','T',cholesky_mo_num,nV,nO*nV,1.d0, & - d_cc_space_v_ov_chol%f, cholesky_mo_num, tmp_vov%f, nV, & + call gpu_dgemm(blas, 'N','T',cholesky_mo_num,nV,nO*nV,1.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num, tmp_vov%f(1,1,1), nV, & 0.d0, tau_kau%f(1,1,u), cholesky_mo_num) enddo !$OMP END DO @@ -59,8 +59,8 @@ subroutine compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, & !$OMP END PARALLEL call gpu_dgemm(blas_handle, 'T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & - tau_kau, cholesky_mo_num*nV, d_cc_space_v_vo_chol, cholesky_mo_num*nV, & - 1.d0, H_oo, nO) + tau_kau%f(1,1,1), cholesky_mo_num*nV, d_cc_space_v_vo_chol%f(1,1,1), cholesky_mo_num*nV, & + 1.d0, H_oo%f(1,1), nO) call gpu_synchronize() call gpu_deallocate(tau_kau) @@ -103,12 +103,12 @@ subroutine compute_H_vv_chol(nO,nV,tau_x,d_cc_space_f_vv, & !$OMP DO do a = 1, nV do b=1,nV - call gpu_dgeam_f(blas, 'N', 'N', nO, nO, 1.d0, & + call gpu_dgeam(blas, 'N', 'N', nO, nO, 1.d0, & tau_x%f(1,1,a,b), nO, 0.d0, & tau_x%f(1,1,a,b), nO, tmp_oov%f(1,1,b), nO) enddo - call gpu_dgemm_f(blas, 'N','T',cholesky_mo_num,nO,nO*nV,1.d0, & - d_cc_space_v_ov_chol%f, cholesky_mo_num, tmp_oov%f, nO, & + call gpu_dgemm(blas, 'N', 'T', cholesky_mo_num, nO, nO*nV, 1.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num, tmp_oov%f(1,1,1), nO, & 0.d0, tau_kia%f(1,1,a), cholesky_mo_num) enddo !$OMP END DO @@ -119,9 +119,9 @@ subroutine compute_H_vv_chol(nO,nV,tau_x,d_cc_space_f_vv, & !$OMP TASKWAIT !$OMP END PARALLEL - call gpu_dgemm(blas_handle,'T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & - tau_kia, cholesky_mo_num*nO, d_cc_space_v_ov_chol, cholesky_mo_num*nO, & - 1.d0, H_vv, nV) + call gpu_dgemm(blas_handle, 'T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & + tau_kia%f(1,1,1), cholesky_mo_num*nO, d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num*nO, & + 1.d0, H_vv%f(1,1), nV) call gpu_synchronize() call gpu_deallocate(tau_kia) @@ -148,20 +148,20 @@ subroutine compute_H_vo_chol(nO,nV,t1,d_cc_space_f_vo, & call gpu_allocate(tmp_k, cholesky_mo_num) call gpu_dgemm(blas_handle, 'N', 'N', cholesky_mo_num, 1, nO*nV, 2.d0, & - d_cc_space_v_ov_chol, cholesky_mo_num, & - t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num, & + t1%f(1,1), nO*nV, 0.d0, tmp_k%f(1), cholesky_mo_num) - call gpu_dgemm(blas_handle, 'T','N',nV*nO,1,cholesky_mo_num,1.d0, & - d_cc_space_v_vo_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & - H_vo, nV*nO) + call gpu_dgemm(blas_handle, 'T', 'N', nV*nO, 1, cholesky_mo_num, 1.d0, & + d_cc_space_v_vo_chol%f(1,1,1), cholesky_mo_num, tmp_k%f(1), cholesky_mo_num, 1.d0, & + H_vo%f(1,1), nV*nO) call gpu_deallocate(tmp_k) call gpu_allocate(tmp, cholesky_mo_num, nO, nO) - call gpu_dgemm(blas_handle, 'N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & - d_cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, tmp, cholesky_mo_num*nO) + call gpu_dgemm(blas_handle, 'N', 'T', cholesky_mo_num*nO, nO, nV, 1.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num*nO, t1%f(1,1), nO, 0.d0, tmp%f(1,1,1), cholesky_mo_num*nO) call gpu_allocate(tmp2, cholesky_mo_num, nO, nO) @@ -174,7 +174,7 @@ subroutine compute_H_vo_chol(nO,nV,t1,d_cc_space_f_vo, & do i=1,nO do j=1,nO call gpu_set_stream(blas_handle,stream(j)) - call gpu_dgeam_f(blas_handle, 'N', 'N', cholesky_mo_num, 1, 1.d0, & + call gpu_dgeam(blas_handle, 'N', 'N', cholesky_mo_num, 1, 1.d0, & tmp%f(1,i,j), cholesky_mo_num, 0.d0, & tmp%f(1,i,j), cholesky_mo_num, tmp2%f(1,j,i), cholesky_mo_num) enddo @@ -190,8 +190,8 @@ subroutine compute_H_vo_chol(nO,nV,t1,d_cc_space_f_vo, & call gpu_deallocate(tmp) call gpu_dgemm(blas_handle, 'T','N', nV, nO, cholesky_mo_num*nO, -1.d0, & - d_cc_space_v_ov_chol, cholesky_mo_num*nO, tmp2, cholesky_mo_num*nO, & - 1.d0, H_vo, nV) + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num*nO, tmp2%f(1,1,1), cholesky_mo_num*nO, & + 1.d0, H_vo%f(1,1), nV) call gpu_synchronize() call gpu_deallocate(tmp2) diff --git a/src/gpu/gpu_module.F90 b/src/gpu/gpu_module.F90 index 2676b339..20d99ede 100644 --- a/src/gpu/gpu_module.F90 +++ b/src/gpu/gpu_module.F90 @@ -156,17 +156,17 @@ module gpu character(c_char), intent(in), value :: transa, transb integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc real(c_float), intent(in), value :: alpha, beta - type(c_ptr), value :: a, b, c + real(c_float) :: a, b, c end subroutine subroutine gpu_dgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, & b, ldb, beta, c, ldc) bind(C, name='gpu_dgemm') import type(c_ptr), value, intent(in) :: handle - character(c_char), intent(in), value :: transa, transb + character(c_char), intent(in) :: transa, transb integer(c_int64_t), intent(in), value :: m, n, k, lda, ldb, ldc real(c_double), intent(in), value :: alpha, beta - type(c_ptr), value :: a, b, c + real(c_double) :: a, b, c end subroutine subroutine gpu_sgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, & @@ -176,7 +176,7 @@ module gpu character(c_char), intent(in), value :: transa, transb integer(c_int64_t), intent(in), value :: m, n, k, lda, ldb, ldc real(c_float), intent(in), value :: alpha, beta - type(c_ptr), value :: a, b, c + real(c_float) :: a, b, c end subroutine end interface @@ -570,7 +570,6 @@ module gpu end subroutine -end module @@ -578,38 +577,20 @@ end module ! --- subroutine gpu_ddot(handle, n, dx, incx, dy, incy, res) - use gpu +! use gpu type(gpu_blas), intent(in) :: handle integer*4 :: n, incx, incy - type(gpu_double1), intent(in) :: dx, dy - double precision, intent(out) :: res - call gpu_ddot_c(handle%c, int(n,c_int64_t), dx%c, int(incx,c_int64_t), dy%c, int(incy,c_int64_t), res) -end subroutine - -subroutine gpu_ddot_f(handle, n, dx, incx, dy, incy, res) - use gpu - type(gpu_blas), intent(in) :: handle - integer*4 :: n, incx, incy - double precision, target :: dx(*), dy(*) + double precision, target :: dx, dy double precision, intent(out) :: res call gpu_ddot_c(handle%c, int(n,c_int64_t), c_loc(dx), int(incx,c_int64_t), c_loc(dy), int(incy,c_int64_t), res) end subroutine subroutine gpu_ddot_64(handle, n, dx, incx, dy, incy, res) - use gpu +! use gpu type(gpu_blas), intent(in) :: handle integer*8 :: n, incx, incy - type(gpu_double1), intent(in) :: dx, dy - double precision, intent(out) :: res - call gpu_ddot_c(handle%c, n, dx%c, incx, dy%c, incy, res) -end subroutine - -subroutine gpu_ddot_f_64(handle, n, dx, incx, dy, incy, res) - use gpu - type(gpu_blas), intent(in) :: handle - integer*8 :: n, incx, incy - double precision, target :: dx(*), dy(*) + double precision, target :: dx, dy double precision, intent(out) :: res call gpu_ddot_c(handle%c, n, c_loc(dx), incx, c_loc(dy), incy, res) end subroutine @@ -620,25 +601,12 @@ end subroutine subroutine gpu_dgeam(handle, transa, transb, m, n, alpha, a, lda, beta, & b, ldb, c, ldc) - use gpu +! use gpu type(gpu_blas), intent(in) :: handle character, intent(in) :: transa, transb integer*4, intent(in) :: m, n, lda, ldb, ldc double precision, intent(in) :: alpha, beta - type(gpu_double2) :: a, b, c - call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, a%c, int(lda,c_int64_t), beta, & - b%c, int(ldb,c_int64_t), c%c, int(ldc,c_int64_t)) -end subroutine - - -subroutine gpu_dgeam_f(handle, transa, transb, m, n, alpha, a, lda, beta, & - b, ldb, c, ldc) - use gpu - type(gpu_blas), intent(in) :: handle - character, intent(in) :: transa, transb - integer*4, intent(in) :: m, n, lda, ldb, ldc - double precision, intent(in) :: alpha, beta - double precision, target :: a(*), b(*), c(*) + double precision, target :: a, b, c call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, c_loc(a), int(lda,c_int64_t), beta, & c_loc(b), int(ldb,c_int64_t), c_loc(c), int(ldc,c_int64_t)) end subroutine @@ -646,25 +614,12 @@ end subroutine subroutine gpu_dgeam_64(handle, transa, transb, m, n, alpha, a, lda, beta, & b, ldb, c, ldc) - use gpu +! use gpu type(gpu_blas), intent(in) :: handle character, intent(in) :: transa, transb integer*8, intent(in) :: m, n, lda, ldb, ldc double precision, intent(in) :: alpha, beta - type(gpu_double2) :: a, b, c - call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, a%c, int(lda,c_int64_t), beta, & - b%c, int(ldb,c_int64_t), c%c, int(ldc,c_int64_t)) -end subroutine - - -subroutine gpu_dgeam_f_64(handle, transa, transb, m, n, alpha, a, lda, beta, & - b, ldb, c, ldc) - use gpu - type(gpu_blas), intent(in) :: handle - character, intent(in) :: transa, transb - integer*8, intent(in) :: m, n, lda, ldb, ldc - double precision, intent(in) :: alpha, beta - double precision, target :: a(*), b(*), c(*) + double precision, target :: a, b, c call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, c_loc(a), int(lda,c_int64_t), beta, & c_loc(b), int(ldb,c_int64_t), c_loc(c), int(ldc,c_int64_t)) end subroutine @@ -675,51 +630,27 @@ end subroutine subroutine gpu_dgemm(handle, transa, transb, m, n, k, alpha, a, lda, & b, ldb, beta, c, ldc) - use gpu +! use gpu type(gpu_blas), intent(in) :: handle character, intent(in) :: transa, transb integer*4, intent(in) :: m, n, k, lda, ldb, ldc double precision, intent(in) :: alpha, beta - type(gpu_double2) :: a, b, c + double precision :: a, b, c call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), & - alpha, a%c, int(lda,c_int64_t), & - b%c, int(ldb,c_int64_t), beta, c%c, int(ldc,c_int64_t)) + alpha, a, int(lda,c_int64_t), & + b, int(ldb,c_int64_t), beta, c, int(ldc,c_int64_t)) end subroutine subroutine gpu_dgemm_64(handle, transa, transb, m, n, k, alpha, a, lda, & b, ldb, beta, c, ldc) - use gpu +! use gpu type(gpu_blas), intent(in) :: handle character, intent(in) :: transa, transb integer*8, intent(in) :: m, n, k, lda, ldb, ldc double precision, intent(in) :: alpha, beta - type(gpu_double2) :: a, b, c - call gpu_dgemm_c(handle%c, transa, transb, m, n, k, & - alpha, a%c, lda, b%c, ldb, beta, c%c, ldc) -end subroutine - -subroutine gpu_dgemm_f(handle, transa, transb, m, n, k, alpha, a, lda, & - b, ldb, beta, c, ldc) - use gpu - type(gpu_blas), intent(in) :: handle - character, intent(in) :: transa, transb - integer*4, intent(in) :: m, n, k, lda, ldb, ldc - double precision, intent(in) :: alpha, beta - double precision, target :: a(*), b(*), c(*) + double precision :: a, b, c call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), & - alpha, c_loc(a), int(lda,c_int64_t), & - c_loc(b), int(ldb,c_int64_t), beta, c_loc(c), int(ldc,c_int64_t)) -end subroutine - -subroutine gpu_dgemm_f_64(handle, transa, transb, m, n, k, alpha, a, lda, & - b, ldb, beta, c, ldc) - use gpu - type(gpu_blas), intent(in) :: handle - character, intent(in) :: transa, transb - integer*8, intent(in) :: m, n, k, lda, ldb, ldc - double precision, intent(in) :: alpha, beta - double precision, target :: a(*), b(*), c(*) - call gpu_dgemm_c(handle%c, transa, transb, m, n, k, & - alpha, c_loc(a), lda, c_loc(b), ldb, beta, c_loc(c), ldc) + alpha, a, int(lda,c_int64_t), b, int(ldb,c_int64_t), beta, c, int(ldc,c_int64_t)) end subroutine +end module From 447cdcd907dd864252777423763ed6947efc32d8 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 2 Jul 2024 17:22:41 +0200 Subject: [PATCH 107/131] Working on r1 --- plugins/local/gpu_nvidia/gpu.c | 32 ++--- src/ccsd/ccsd_space_orb_sub.irp.f | 31 ++++- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 176 ++++++++++++------------- src/gpu/gpu_module.F90 | 57 +++++++- 4 files changed, 182 insertions(+), 114 deletions(-) diff --git a/plugins/local/gpu_nvidia/gpu.c b/plugins/local/gpu_nvidia/gpu.c index 39a82984..e77847a6 100644 --- a/plugins/local/gpu_nvidia/gpu.c +++ b/plugins/local/gpu_nvidia/gpu.c @@ -149,7 +149,7 @@ void gpu_sdot(cublasHandle_t handle, const int64_t n, const float* x, const int6 -void gpu_dgemv(cublasHandle_t handle, const char transa, const int64_t m, const int64_t n, const double alpha, +void gpu_dgemv(cublasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const double alpha, const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) { assert (handle != NULL); @@ -171,14 +171,14 @@ void gpu_dgemv(cublasHandle_t handle, const char transa, const int64_t m, const assert ( (int64_t) incy_ == incy); cublasOperation_t transa_ = CUBLAS_OP_N; - if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; + if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; cublasDgemv(handle, transa_, m_, n_, &alpha, a, lda_, x, incx_, &beta, y, incy_); } -void gpu_sgemv(cublasHandle_t handle, const char transa, const int64_t m, const int64_t n, const float alpha, +void gpu_sgemv(cublasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const float alpha, const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) { assert (handle != NULL); @@ -200,13 +200,13 @@ void gpu_sgemv(cublasHandle_t handle, const char transa, const int64_t m, const assert ( (int64_t) incy_ == incy); cublasOperation_t transa_ = CUBLAS_OP_N; - if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; + if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; cublasSgemv(handle, transa_, m_, n_, &alpha, a, lda_, x, incx_, &beta, y, incy_); } -void gpu_dgemm(cublasHandle_t handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, +void gpu_dgemm(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) { assert (handle != NULL); @@ -231,15 +231,15 @@ void gpu_dgemm(cublasHandle_t handle, const char transa, const char transb, cons cublasOperation_t transa_ = CUBLAS_OP_N; cublasOperation_t transb_ = CUBLAS_OP_N; - if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; - if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T; + if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; + if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T; cublasDgemm(handle, transa_, transb_, m_, n_, k_, &alpha, a, lda_, b, ldb_, &beta, c, ldc_); } -void gpu_sgemm(cublasHandle_t handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, +void gpu_sgemm(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) { assert (handle != NULL); @@ -264,14 +264,14 @@ void gpu_sgemm(cublasHandle_t handle, const char transa, const char transb, cons cublasOperation_t transa_ = CUBLAS_OP_N; cublasOperation_t transb_ = CUBLAS_OP_N; - if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; - if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T; + if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; + if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T; cublasSgemm(handle, transa_, transb_, m_, n_, k_, &alpha, a, lda_, b, ldb_, &beta, c, ldc_); } -void gpu_dgeam(cublasHandle_t handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha, +void gpu_dgeam(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double alpha, const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { assert (handle != NULL); @@ -293,15 +293,15 @@ void gpu_dgeam(cublasHandle_t handle, const char transa, const char transb, cons cublasOperation_t transa_ = CUBLAS_OP_N; cublasOperation_t transb_ = CUBLAS_OP_N; - if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; - if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T; + if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; + if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T; cublasDgeam(handle, transa_, transb_, m_, n_, &alpha, a, lda_, &beta, b, ldb_, c, ldc_); } -void gpu_sgeam(cublasHandle_t handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha, +void gpu_sgeam(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const float alpha, const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) { assert (handle != NULL); @@ -323,8 +323,8 @@ void gpu_sgeam(cublasHandle_t handle, const char transa, const char transb, cons cublasOperation_t transa_ = CUBLAS_OP_N; cublasOperation_t transb_ = CUBLAS_OP_N; - if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T; - if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T; + if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; + if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T; cublasSgeam(handle, transa_, transb_, m_, n_, &alpha, a, lda_, &beta, b, ldb_, c, ldc_); diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index de109cea..256f743b 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -20,8 +20,8 @@ subroutine run_ccsd_space_orb type(gpu_double3) :: d_cc_space_v_oo_chol, d_cc_space_v_vo_chol type(gpu_double3) :: d_cc_space_v_ov_chol, d_cc_space_v_vv_chol - type(gpu_double4) :: d_cc_space_v_oovv - + type(gpu_double4) :: d_cc_space_v_oovv, d_cc_space_v_voov, d_cc_space_v_ovov + type(gpu_double4) :: d_cc_space_v_oovo double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) @@ -69,6 +69,7 @@ subroutine run_ccsd_space_orb call gpu_upload(cc_space_f_oo, d_cc_space_f_oo) call gpu_upload(cc_space_f_vo, d_cc_space_f_vo) + call gpu_upload(cc_space_f_ov, d_cc_space_f_ov) call gpu_upload(cc_space_f_vv, d_cc_space_f_vv) ! FREE cc_space_f_oo @@ -92,6 +93,18 @@ subroutine run_ccsd_space_orb ! FREE cc_space_v_vv_chol endif + call gpu_allocate(d_cc_space_v_voov, nV, nO, nO, nV) + call gpu_allocate(d_cc_space_v_ovov, nO, nV, nO, nV) + call gpu_allocate(d_cc_space_v_oovo, nO, nO, nV, nO) + + call gpu_upload(cc_space_v_voov, d_cc_space_v_voov) + call gpu_upload(cc_space_v_ovov, d_cc_space_v_ovov) + call gpu_upload(cc_space_v_oovo, d_cc_space_v_oovo) + +! FREE cc_space_v_voov +! FREE cc_space_v_ovov +! FREE cc_space_v_oovo + call gpu_allocate(t2, nO,nO,nV,nV) call gpu_allocate(r2, nO,nO,nV,nV) call gpu_allocate(tau, nO,nO,nV,nV) @@ -185,7 +198,8 @@ subroutine run_ccsd_space_orb call compute_H_vv_chol(nO,nV,tau_x,d_cc_space_f_vv, d_cc_space_v_ov_chol,H_vv) call compute_H_vo_chol(nO,nV,t1,d_cc_space_f_vo, d_cc_space_v_ov_chol,d_cc_space_v_vo_chol, H_vo) - call compute_r1_space_chol(nO,nV,t1%f,t2%f,tau%f,H_oo%F,H_vv%F,H_vo%F,r1%f,max_r1) + call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1,d_cc_space_f_ov,d_cc_space_f_vo, & + d_cc_space_v_voov, d_cc_space_v_ovov, d_cc_space_v_oovo, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol) call compute_r2_space_chol(nO,nV,t1%f,t2%f,tau%f,H_oo%F,H_vv%F,H_vo%F,r2%f,max_r2) else call compute_H_oo(nO,nV,t1%f,t2%f,tau%f,H_oo%f) @@ -292,8 +306,17 @@ subroutine run_ccsd_space_orb call gpu_deallocate(d_cc_space_v_vo_chol) call gpu_deallocate(d_cc_space_v_vv_chol) endif - call gpu_deallocate(d_cc_space_f_vo) + call gpu_deallocate(d_cc_space_v_oovv) + call gpu_deallocate(d_cc_space_v_voov) + call gpu_deallocate(d_cc_space_v_ovov) + call gpu_deallocate(d_cc_space_v_oovo) + + call gpu_deallocate(d_cc_space_f_oo) + call gpu_deallocate(d_cc_space_f_vo) + call gpu_deallocate(d_cc_space_f_ov) + call gpu_deallocate(d_cc_space_f_vv) + call gpu_deallocate(t1) call gpu_deallocate(t2) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index a3490589..6190e985 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -199,59 +199,52 @@ end ! R1 -subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) - +subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1,d_cc_space_f_ov,d_cc_space_f_vo, & + d_cc_space_v_voov, d_cc_space_v_ovov, d_cc_space_v_oovo, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol) + use gpu implicit none ! in integer, intent(in) :: nO, nV - double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) - double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + type(gpu_double2), intent(in) :: t1, H_oo, H_vo, H_vv, d_cc_space_f_ov,d_cc_space_f_vo + type(gpu_double3), intent(in) :: d_cc_space_v_vo_chol, d_cc_space_v_vv_chol + type(gpu_double4), intent(in) :: t2, tau, d_cc_space_v_voov, d_cc_space_v_ovov, d_cc_space_v_oovo ! out - double precision, intent(out) :: r1(nO,nV), max_r1 + type(gpu_double2), intent(out) :: r1 + double precision, intent(out) :: max_r1 ! internal integer :: u,i,j,beta,a,b - !$omp parallel & - !$omp shared(nO,nV,r1,cc_space_f_ov) & - !$omp private(u,beta) & - !$omp default(none) - !$omp do - do beta = 1, nV - do u = 1, nO - r1(u,beta) = cc_space_f_ov(u,beta) - enddo - enddo - !$omp end do - !$omp end parallel + call gpu_copy(d_cc_space_f_ov, r1) - double precision, allocatable :: X_oo(:,:) - allocate(X_oo(nO,nO)) - call dgemm('N','N', nO, nO, nV, & - -2d0, t1 , size(t1,1), & - cc_space_f_vo, size(cc_space_f_vo,1), & - 0d0, X_oo , size(X_oo,1)) + type(gpu_double2) :: X_oo + call gpu_allocate(X_oo,nO,nO) - call dgemm('T','N', nO, nV, nO, & - 1d0, X_oo, size(X_oo,2), & - t1 , size(t1,1), & - 1d0, r1 , size(r1,1)) - deallocate(X_oo) + call gpu_dgemm(blas_handle, 'N','N', nO, nO, nV, & + -2d0, t1%f(1,1), size(t1%f,1), & + d_cc_space_f_vo%f(1,1), size(d_cc_space_f_vo%f,1), & + 0d0, X_oo%f(1,1), size(X_oo%f,1)) - call dgemm('N','N', nO, nV, nV, & - 1d0, t1 , size(t1,1), & - H_vv, size(H_vv,1), & - 1d0, r1 , size(r1,1)) + call gpu_dgemm(blas_handle, 'T','N', nO, nV, nO, & + 1d0, X_oo%f(1,1), size(X_oo%f,2), & + t1%f(1,1) , size(t1%f,1), & + 1d0, r1%f(1,1) , size(r1%f,1)) - call dgemm('N','N', nO, nV, nO, & - -1d0, H_oo, size(H_oo,1), & - t1 , size(t1,1), & - 1d0, r1, size(r1,1)) + call gpu_dgemm(blas_handle, 'N','N', nO, nV, nV, & + 1d0, t1%f(1,1) , size(t1%f,1), & + H_vv%f(1,1), size(H_vv%f,1), & + 1d0, r1%f(1,1) , size(r1%f,1)) + + call gpu_dgemm(blas_handle, 'N','N', nO, nV, nO, & + -1d0, H_oo%f(1,1), size(H_oo%f,1), & + t1%f(1,1) , size(t1%f,1), & + 1d0, r1%f(1,1), size(r1%f,1)) + + type(gpu_double4) :: X_voov + call gpu_allocate(X_voov, nV, nO, nO, nV) - double precision, allocatable :: X_voov(:,:,:,:) - allocate(X_voov(nV, nO, nO, nV)) !$omp parallel & !$omp shared(nO,nV,X_voov,t2,t1) & @@ -262,7 +255,7 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) do u = 1, nO do i = 1, nO do a = 1, nV - X_voov(a,i,u,beta) = 2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta) + X_voov%f(a,i,u,beta) = 2d0 * t2%f(i,u,a,beta) - t2%f(u,i,a,beta) + t1%f(u,a) * t1%f(i,beta) enddo enddo enddo @@ -270,18 +263,20 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp end do !$omp end parallel - call dgemv('T', nV*nO, nO*nV, & - 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & - H_vo , 1, & - 1d0, r1 , 1) + call gpu_dgemv(blas_handle, 'T', nV*nO, nO*nV, & + 1d0, X_voov%f(1,1,1,1), size(X_voov%f,1) * size(X_voov%f,2), & + H_vo%f(1,1) , 1, & + 1d0, r1%f(1,1) , 1) - deallocate(X_voov) + call gpu_synchronize() + call gpu_deallocate(X_oo) + call gpu_deallocate(X_voov) - double precision, allocatable :: X_ovov(:,:,:,:) - allocate(X_ovov(nO, nV, nO, nV)) + type(gpu_double4) :: X_ovov + call gpu_allocate(X_ovov, nO, nV, nO, nV) !$omp parallel & - !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & + !$omp shared(nO,nV,d_cc_space_v_ovov,d_cc_space_v_voov,X_ovov) & !$omp private(u,beta,i,a) & !$omp default(none) !$omp do @@ -289,7 +284,7 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) do u = 1, nO do a = 1, nv do i = 1, nO - X_ovov(i,a,u,beta) = 2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta) + X_ovov%f(i,a,u,beta) = 2d0 * d_cc_space_v_voov%f(a,u,i,beta) - d_cc_space_v_ovov%f(u,a,i,beta) enddo enddo enddo @@ -297,17 +292,25 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp end do !$omp end parallel - call dgemv('T', nO*nV, nO*nV, & - 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & - t1 , 1, & - 1d0, r1 , 1) - - deallocate(X_ovov) +! call dgemv('T', nO*nV, nO*nV, & +! 1d0, X_ovov%f, size(X_ovov%f,1) * size(X_ovov%f,2), & +! t1%f, 1, & +! 1d0, r1%f, 1) + call gpu_dgemv(blas_handle, 'T', nO*nV, nO*nV, & + 1d0, X_ovov%f(1,1,1,1), size(X_ovov%f,1) * size(X_ovov%f,2), & + t1%f(1,1), 1, & + 1d0, r1%f(1,1), 1) integer :: iblock, block_size, nVmax - double precision, allocatable :: W_vvov(:,:,:,:), W_vvov_tmp(:,:,:,:), T_vvoo(:,:,:,:) + type(gpu_double4) :: W_vvov, W_vvov_tmp, T_vvoo + block_size = 16 - allocate(W_vvov(nV,nV,nO,block_size), W_vvov_tmp(nV,nO,nV,block_size), T_vvoo(nV,nV,nO,nO)) + call gpu_allocate(W_vvov,nV, nV,nO,block_size) + call gpu_allocate(W_vvov_tmp, nV,nO,nV,block_size) + call gpu_allocate(T_vvoo, nV,nV,nO,nO) + + call gpu_synchronize() + call gpu_deallocate(X_ovov) !$omp parallel & !$omp private(u,i,b,a) & @@ -317,7 +320,7 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) do i = 1, nO do b = 1, nV do a = 1, nV - T_vvoo(a,b,i,u) = tau(i,u,a,b) + T_vvoo%f(a,b,i,u) = tau%f(i,u,a,b) enddo enddo enddo @@ -328,11 +331,12 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) do iblock = 1, nV, block_size nVmax = min(block_size,nV-iblock+1) - call dgemm('T','N', nV*nO, nV*nVmax, cholesky_mo_num, 1.d0, & - cc_space_v_vo_chol , cholesky_mo_num, & - cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & - 0.d0, W_vvov_tmp, nV*nO) + call gpu_dgemm(blas_handle, 'T','N', nV*nO, nV*nVmax, cholesky_mo_num, 1.d0, & + d_cc_space_v_vo_chol%f(1,1,1) , cholesky_mo_num, & + d_cc_space_v_vv_chol%f(1,1,iblock), cholesky_mo_num, & + 0.d0, W_vvov_tmp%f(1,1,1,1), nV*nO) + call gpu_synchronize() !$omp parallel & !$omp private(b,i,a,beta) & !$omp default(shared) @@ -341,7 +345,7 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp do do b = 1, nV do a = 1, nV - W_vvov(a,b,i,beta) = 2d0 * W_vvov_tmp(a,i,b,beta) - W_vvov_tmp(b,i,a,beta) + W_vvov%f(a,b,i,beta) = 2d0 * W_vvov_tmp%f(a,i,b,beta) - W_vvov_tmp%f(b,i,a,beta) enddo enddo !$omp end do nowait @@ -350,20 +354,22 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp barrier !$omp end parallel - call dgemm('T','N',nO,nVmax,nO*nV*nV, & - 1d0, T_vvoo, nV*nV*nO, & - W_vvov, nO*nV*nV, & - 1d0, r1(1,iblock), nO) + call gpu_dgemm(blas_handle, 'T','N',nO,nVmax,nO*nV*nV, & + 1d0, T_vvoo%f(1,1,1,1), nV*nV*nO, & + W_vvov%f(1,1,1,1), nO*nV*nV, & + 1d0, r1%f(1,iblock), nO) enddo - deallocate(W_vvov,T_vvoo) + call gpu_synchronize() + call gpu_deallocate(W_vvov) + call gpu_deallocate(T_vvoo) - double precision, allocatable :: W_oovo(:,:,:,:) - allocate(W_oovo(nO,nO,nV,nO)) + type(gpu_double4) :: W_oovo + call gpu_allocate(W_oovo, nO,nO,nV,nO) !$omp parallel & - !$omp shared(nO,nV,cc_space_v_oovo,W_oovo) & + !$omp shared(nO,nV,d_cc_space_v_oovo,W_oovo) & !$omp private(u,a,i,j) & !$omp default(none) do u = 1, nO @@ -371,8 +377,7 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) do a = 1, nV do j = 1, nO do i = 1, nO -! W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) - W_oovo(i,j,a,u) = 2d0 * cc_space_v_oovo(i,j,a,u) - cc_space_v_oovo(j,i,a,u) + W_oovo%f(i,j,a,u) = 2d0 * d_cc_space_v_oovo%f(i,j,a,u) - d_cc_space_v_oovo%f(j,i,a,u) enddo enddo enddo @@ -380,33 +385,22 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo !$omp end parallel - call dgemm('T','N', nO, nV, nO*nO*nV, & - -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & - tau , size(tau,1) * size(tau,2) * size(tau,3), & - 1d0, r1 , size(r1,1)) + ! Change the sign for consistency with the code in spin orbitals + call gpu_dgemm(blas_handle, 'T','N', nO, nV, nO*nO*nV, & + 1d0, W_oovo%f(1,1,1,1), size(W_oovo%f,1) * size(W_oovo%f,2) * size(W_oovo%f,3), & + tau%f(1,1,1,1), size(tau%f,1) * size(tau%f,2) * size(tau%f,3), & + -1d0, r1%f(1,1), size(r1%f,1)) - deallocate(W_oovo) + call gpu_synchronize() + call gpu_deallocate(W_oovo) max_r1 = 0d0 do a = 1, nV do i = 1, nO - max_r1 = max(dabs(r1(i,a)), max_r1) + max_r1 = max(dabs(r1%f(i,a)), max_r1) enddo enddo - ! Change the sign for consistency with the code in spin orbitals - !$omp parallel & - !$omp shared(nO,nV,r1) & - !$omp private(a,i) & - !$omp default(none) - !$omp do - do a = 1, nV - do i = 1, nO - r1(i,a) = -r1(i,a) - enddo - enddo - !$omp end do - !$omp end parallel end diff --git a/src/gpu/gpu_module.F90 b/src/gpu/gpu_module.F90 index 20d99ede..949ae4fc 100644 --- a/src/gpu/gpu_module.F90 +++ b/src/gpu/gpu_module.F90 @@ -136,7 +136,7 @@ module gpu type(c_ptr), value, intent(in) :: handle integer(c_int64_t), value :: n, incx, incy type(c_ptr), intent(in), value :: dx, dy - real(c_float), intent(out) :: res + real(c_float), intent(out) :: res end subroutine subroutine gpu_dgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, & @@ -145,7 +145,7 @@ module gpu type(c_ptr), value, intent(in) :: handle character(c_char), intent(in), value :: transa, transb integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc - real(c_double), intent(in), value :: alpha, beta + real(c_double), intent(in), value :: alpha, beta type(c_ptr), value :: a, b, c end subroutine @@ -155,10 +155,31 @@ module gpu type(c_ptr), value, intent(in) :: handle character(c_char), intent(in), value :: transa, transb integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc - real(c_float), intent(in), value :: alpha, beta + real(c_float), intent(in), value :: alpha, beta real(c_float) :: a, b, c end subroutine + subroutine gpu_dgemv_c(handle, transa, m, n, alpha, a, lda, & + x, incx, beta, y, incy) bind(C, name='gpu_dgemv') + import + type(c_ptr), value, intent(in) :: handle + character(c_char), intent(in) :: transa + integer(c_int64_t), intent(in), value :: m, n, lda, incx, incy + real(c_double), intent(in), value :: alpha, beta + real(c_double) :: a, x, y + end subroutine + + subroutine gpu_sgemv_c(handle, transa, m, n, alpha, a, lda, & + x, incx, beta, y, incy) bind(C, name='gpu_sgemv') + import + type(c_ptr), value, intent(in) :: handle + character(c_char), intent(in) :: transa + integer(c_int64_t), intent(in), value :: m, n, lda, incx, incy + real(c_float), intent(in), value :: alpha, beta + real(c_float) :: a, x, y + end subroutine + + subroutine gpu_dgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, & b, ldb, beta, c, ldc) bind(C, name='gpu_dgemm') import @@ -625,6 +646,36 @@ subroutine gpu_dgeam_64(handle, transa, transb, m, n, alpha, a, lda, beta, & end subroutine +! gemv +! ---- + +subroutine gpu_dgemv(handle, transa, m, n, alpha, a, lda, & + x, incx, beta, y, incy) +! use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa + integer*4, intent(in) :: m, n, lda, incx, incy + double precision, intent(in) :: alpha, beta + double precision :: a, x, y + call gpu_dgemv_c(handle%c, transa, int(m,c_int64_t), int(n,c_int64_t), & + alpha, a, int(lda,c_int64_t), & + x, int(incx,c_int64_t), beta, y, int(incy,c_int64_t)) +end subroutine + +subroutine gpu_dgemv_64(handle, transa, m, n, alpha, a, lda, & + x, incx, beta, y, incy) +! use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa + integer*8, intent(in) :: m, n, lda, incx, incy + double precision, intent(in) :: alpha, beta + double precision :: a, x, y + call gpu_dgemv_c(handle%c, transa, int(m,c_int64_t), int(n,c_int64_t), & + alpha, a, int(lda,c_int64_t), & + x, int(incx,c_int64_t), beta, y, int(incy,c_int64_t)) +end subroutine + + ! gemm ! ---- From 92fe3a6f84b0af99bf554602528969699c206cde Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 2 Jul 2024 18:36:19 +0200 Subject: [PATCH 108/131] Working on r1 --- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 37 ++++++++++++++++++++------ 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 6190e985..e0048637 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -245,23 +245,44 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1,d_cc_s type(gpu_double4) :: X_voov call gpu_allocate(X_voov, nV, nO, nO, nV) + type(gpu_stream) :: stream(nV) + + do a=1,nV + call gpu_stream_create(stream(a)) + enddo + + call gpu_synchronize() + +! do i=1,nO +! do beta=1,nV +! call gpu_set_stream(blas_handle, stream(beta)) +! call gpu_dgeam(blas_handle, 'T', 'T', nV, nO, -1.d0, t2%f(1,i,1,beta), & +! nO*nO, t1%f(i,beta), t1%f(1,1), nO, X_voov%f(1,i,1,beta), nV) +! enddo +! enddo - !$omp parallel & - !$omp shared(nO,nV,X_voov,t2,t1) & - !$omp private(u,beta,i,a) & - !$omp default(none) - !$omp do do beta = 1, nV do u = 1, nO do i = 1, nO do a = 1, nV - X_voov%f(a,i,u,beta) = 2d0 * t2%f(i,u,a,beta) - t2%f(u,i,a,beta) + t1%f(u,a) * t1%f(i,beta) + X_voov%f(a,i,u,beta) = - t2%f(u,i,a,beta) + t1%f(u,a) * t1%f(i,beta) enddo enddo enddo enddo - !$omp end do - !$omp end parallel + call gpu_synchronize() + + do beta=1,nV + call gpu_set_stream(blas_handle, stream(beta)) + call gpu_dgeam(blas_handle, 'N', 'T', nV, nO*nO, 1.d0, X_voov%f(1,1,1,beta), & + nV, 2.d0, t2%f(1,1,1,beta), nO*nO, X_voov%f(1,1,1,beta), nV) + enddo + + call gpu_synchronize() + do a=1,nV + call gpu_stream_destroy(stream(a)) + enddo + call gpu_set_stream(blas_handle, gpu_default_stream) call gpu_dgemv(blas_handle, 'T', nV*nO, nO*nV, & 1d0, X_voov%f(1,1,1,1), size(X_voov%f,1) * size(X_voov%f,2), & From cc09f8c61a0e8a29e7d2a2933d9659c1ad70a7b5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 3 Jul 2024 14:52:11 +0200 Subject: [PATCH 109/131] Minor changes in Cholesky --- src/ao_two_e_ints/cholesky.irp.f | 6 +++--- src/mo_two_e_ints/map_integrals.irp.f | 2 +- src/tools/four_idx_transform.irp.f | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index acb0872b..a2d9d043 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -158,9 +158,9 @@ END_PROVIDER Lset(np8) = p8 endif enddo - np = np8 + if (np8 > ndim8) stop 'np>ndim8' + np = int(np8,4) if (np <= 0) stop 'np<=0' - if (np > ndim8) stop 'np>ndim8' rank_max = min(np,20*elec_num*elec_num) call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1)) @@ -431,7 +431,7 @@ END_PROVIDER Lset(np8) = p8 endif enddo - np = np8 + np = int(np8,4) enddo diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 168c34b4..eeb4279f 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -40,7 +40,7 @@ end ! Min and max values of the MOs for which the integrals are in the cache END_DOC - mo_integrals_cache_size = 2_8**mo_integrals_cache_shift + mo_integrals_cache_size = 2**mo_integrals_cache_shift mo_integrals_cache_min = max(1,elec_alpha_num - (mo_integrals_cache_size/2 - 1) ) mo_integrals_cache_max = min(mo_num, mo_integrals_cache_min + mo_integrals_cache_size - 1) diff --git a/src/tools/four_idx_transform.irp.f b/src/tools/four_idx_transform.irp.f index 92e87cad..fc6bface 100644 --- a/src/tools/four_idx_transform.irp.f +++ b/src/tools/four_idx_transform.irp.f @@ -12,6 +12,9 @@ program four_idx_transform ! END_DOC + if (do_mo_cholesky) then + stop 'Not implemented with Cholesky integrals' + endif io_mo_two_e_integrals = 'Write' SOFT_TOUCH io_mo_two_e_integrals if (.true.) then From 2f8e7bd4f79108476bcc1b04165912d629d7d924 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 3 Jul 2024 15:32:38 +0200 Subject: [PATCH 110/131] Updated to read CHolesky MO integrals from TREXIO --- src/trexio/export_trexio_routines.irp.f | 2 +- src/trexio/import_trexio_integrals.irp.f | 226 ++++++++++++++++------- 2 files changed, 159 insertions(+), 69 deletions(-) diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f index 63630243..0eec68bd 100644 --- a/src/trexio/export_trexio_routines.irp.f +++ b/src/trexio/export_trexio_routines.irp.f @@ -557,7 +557,7 @@ subroutine export_trexio(update,full_path) do k=1,cholesky_ao_num do j=1,mo_num do i=1,mo_num - integral = cholesky_mo(i,j,k) + integral = cholesky_mo_transp(k,i,j) if (integral == 0.d0) cycle icount += 1_8 chol_buffer(icount) = integral diff --git a/src/trexio/import_trexio_integrals.irp.f b/src/trexio/import_trexio_integrals.irp.f index 5a6b3c03..556ed7bc 100644 --- a/src/trexio/import_trexio_integrals.irp.f +++ b/src/trexio/import_trexio_integrals.irp.f @@ -28,7 +28,7 @@ subroutine run(f) integer(trexio_t), intent(in) :: f ! TREXIO file handle integer(trexio_exit_code) :: rc - integer ::i,j,k,l + integer :: i,j,k,l, iunit integer(8) :: m, n_integrals double precision :: integral @@ -41,10 +41,12 @@ subroutine run(f) integer , allocatable :: Vi(:,:) double precision :: s -! TODO: -! - If Cholesky AO in trexio file, read cholesky ao vectors -! - If Cholesky MO in trexio file, read cholesky mo vectors -! - If Cholesky MO not in trexio file, force do_cholesky_mo to False + integer*4 :: BUFSIZE + integer :: rank + double precision, allocatable :: tmp(:,:,:) + integer*8 :: offset, icount + + integer, external :: getUnitAndOpen if (trexio_has_nucleus_repulsion(f) == TREXIO_SUCCESS) then rc = trexio_read_nucleus_repulsion(f, s) @@ -120,45 +122,88 @@ subroutine run(f) rc = trexio_has_ao_2e_int(f) PROVIDE ao_num if (rc /= TREXIO_HAS_NOT) then - PROVIDE ao_integrals_map - integer*4 :: BUFSIZE - BUFSIZE=ao_num**2 - allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) - allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + rc = trexio_has_ao_2e_int_eri_cholesky(f) + if (rc /= TREXIO_HAS_NOT) then - integer*8 :: offset, icount + rc = trexio_read_ao_2e_int_eri_cholesky_num(f, rank) + call trexio_assert(rc, TREXIO_SUCCESS) - offset = 0_8 - icount = BUFSIZE - rc = TREXIO_SUCCESS - do while (icount == size(V)) - rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) - do m=1,icount - i = Vi(1,m) - j = Vi(2,m) - k = Vi(3,m) - l = Vi(4,m) - integral = V(m) - call two_e_integrals_index(i, j, k, l, buffer_i(m) ) - buffer_values(m) = integral - enddo - call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) - offset = offset + icount - if (rc /= TREXIO_SUCCESS) then - exit - endif - end do - n_integrals = offset + allocate(tmp(ao_num,ao_num,rank)) + tmp(:,:,:) = 0.d0 - call map_sort(ao_integrals_map) - call map_unique(ao_integrals_map) + BUFSIZE=ao_num**2 + allocate(Vi(3,BUFSIZE), V(BUFSIZE)) - call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) - call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') - deallocate(buffer_i, buffer_values, Vi, V) - print *, 'AO integrals read from TREXIO file' + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_ao_2e_int_eri_cholesky(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + integral = V(m) + tmp(i,j,k) = integral + enddo + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + + print *, 'Writing Cholesky AO vectors to disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'W') + write(iunit) rank + write(iunit) tmp(:,:,:) + close(iunit) + call ezfio_set_ao_two_e_ints_io_ao_cholesky('Read') + + deallocate(Vi, V, tmp) + print *, 'Cholesky AO integrals read from TREXIO file' + endif + + rc = trexio_has_ao_2e_int_eri(f) + if (rc /= TREXIO_HAS_NOT) then + PROVIDE ao_integrals_map + + BUFSIZE=ao_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset + + call map_sort(ao_integrals_map) + call map_unique(ao_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) + call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') + + deallocate(buffer_i, buffer_values, Vi, V) + print *, 'AO integrals read from TREXIO file' + endif else print *, 'AO integrals not found in TREXIO file' endif @@ -186,40 +231,85 @@ subroutine run(f) rc = trexio_has_mo_2e_int(f) if (rc /= TREXIO_HAS_NOT) then - BUFSIZE=mo_num**2 - allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) - allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + rc = trexio_has_mo_2e_int_eri_cholesky(f) + if (rc /= TREXIO_HAS_NOT) then + + rc = trexio_read_mo_2e_int_eri_cholesky_num(f, rank) + call trexio_assert(rc, TREXIO_SUCCESS) + + allocate(tmp(rank,mo_num,mo_num)) + tmp(:,:,:) = 0.d0 + + BUFSIZE=mo_num**2 + allocate(Vi(3,BUFSIZE), V(BUFSIZE)) + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_mo_2e_int_eri_cholesky(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + integral = V(m) + tmp(k,i,j) = integral + enddo + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + + print *, 'Writing Cholesky MO vectors to disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_mo_transp', 'W') + write(iunit) rank + write(iunit) tmp(:,:,:) + close(iunit) + call ezfio_set_mo_two_e_ints_io_mo_cholesky('Read') + + deallocate(Vi, V, tmp) + print *, 'Cholesky MO integrals read from TREXIO file' + endif + + rc = trexio_has_mo_2e_int_eri(f) + if (rc /= TREXIO_HAS_NOT) then + BUFSIZE=mo_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) - offset = 0_8 - icount = BUFSIZE - rc = TREXIO_SUCCESS - do while (icount == size(V)) - rc = trexio_read_mo_2e_int_eri(f, offset, icount, Vi, V) - do m=1,icount - i = Vi(1,m) - j = Vi(2,m) - k = Vi(3,m) - l = Vi(4,m) - integral = V(m) - call two_e_integrals_index(i, j, k, l, buffer_i(m) ) - buffer_values(m) = integral - enddo - call map_append(mo_integrals_map, buffer_i, buffer_values, int(icount,4)) - offset = offset + icount - if (rc /= TREXIO_SUCCESS) then - exit - endif - end do - n_integrals = offset + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_mo_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call map_append(mo_integrals_map, buffer_i, buffer_values, int(icount,4)) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset - call map_sort(mo_integrals_map) - call map_unique(mo_integrals_map) + call map_sort(mo_integrals_map) + call map_unique(mo_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) + call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals('Read') + deallocate(buffer_i, buffer_values, Vi, V) + print *, 'MO integrals read from TREXIO file' + endif - call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) - call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals('Read') - deallocate(buffer_i, buffer_values, Vi, V) - print *, 'MO integrals read from TREXIO file' else print *, 'MO integrals not found in TREXIO file' endif From 7ceb8fdcca5cd7bff3984c816eef8e47aa681a0b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 3 Jul 2024 18:24:13 +0200 Subject: [PATCH 111/131] Finished r1 --- plugins/local/gpu_nvidia/gpu.c | 41 +++--- plugins/local/gpu_x86/gpu.c | 112 ++++++++-------- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 177 ++++++++++--------------- src/gpu/gpu.h | 24 ++-- src/gpu/gpu_module.F90 | 12 +- 5 files changed, 162 insertions(+), 204 deletions(-) diff --git a/plugins/local/gpu_nvidia/gpu.c b/plugins/local/gpu_nvidia/gpu.c index e77847a6..a775ab95 100644 --- a/plugins/local/gpu_nvidia/gpu.c +++ b/plugins/local/gpu_nvidia/gpu.c @@ -116,11 +116,6 @@ void gpu_ddot(cublasHandle_t handle, const int64_t n, const double* x, const int assert ( (int64_t) incy_ == incy); cublasStatus_t rc = cublasDdot(handle, n_, x, incx_, y, incy_, result); -/* - double alpha = 1.0; - double beta = 0.0; - cublasStatus_t rc = cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, 1, 1, n_, &alpha, x, 1, y, n_, &beta, &result_, 1); -*/ assert (rc == CUBLAS_STATUS_SUCCESS); } @@ -149,8 +144,8 @@ void gpu_sdot(cublasHandle_t handle, const int64_t n, const float* x, const int6 -void gpu_dgemv(cublasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const double alpha, - const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) { +void gpu_dgemv(cublasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const double* alpha, + const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) { assert (handle != NULL); @@ -173,13 +168,13 @@ void gpu_dgemv(cublasHandle_t handle, const char* transa, const int64_t m, const cublasOperation_t transa_ = CUBLAS_OP_N; if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; - cublasDgemv(handle, transa_, m_, n_, &alpha, a, lda_, x, incx_, &beta, y, incy_); + cublasDgemv(handle, transa_, m_, n_, alpha, a, lda_, x, incx_, beta, y, incy_); } -void gpu_sgemv(cublasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const float alpha, - const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) { +void gpu_sgemv(cublasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const float* alpha, + const float* a, const int64_t lda, const float* x, const int64_t incx, const float* beta, float* y, const int64_t incy) { assert (handle != NULL); @@ -202,12 +197,12 @@ void gpu_sgemv(cublasHandle_t handle, const char* transa, const int64_t m, const cublasOperation_t transa_ = CUBLAS_OP_N; if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; - cublasSgemv(handle, transa_, m_, n_, &alpha, a, lda_, x, incx_, &beta, y, incy_); + cublasSgemv(handle, transa_, m_, n_, alpha, a, lda_, x, incx_, beta, y, incy_); } -void gpu_dgemm(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, - const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) { +void gpu_dgemm(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha, + const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) { assert (handle != NULL); @@ -234,13 +229,13 @@ void gpu_dgemm(cublasHandle_t handle, const char* transa, const char* transb, co if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T; - cublasDgemm(handle, transa_, transb_, m_, n_, k_, &alpha, a, lda_, b, ldb_, &beta, c, ldc_); + cublasDgemm(handle, transa_, transb_, m_, n_, k_, alpha, a, lda_, b, ldb_, beta, c, ldc_); } -void gpu_sgemm(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, - const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) { +void gpu_sgemm(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float* alpha, + const float* a, const int64_t lda, const float* b, const int64_t ldb, const float* beta, float* c, const int64_t ldc) { assert (handle != NULL); @@ -267,12 +262,12 @@ void gpu_sgemm(cublasHandle_t handle, const char* transa, const char* transb, co if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T; - cublasSgemm(handle, transa_, transb_, m_, n_, k_, &alpha, a, lda_, b, ldb_, &beta, c, ldc_); + cublasSgemm(handle, transa_, transb_, m_, n_, k_, alpha, a, lda_, b, ldb_, beta, c, ldc_); } -void gpu_dgeam(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double alpha, - const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { +void gpu_dgeam(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha, + const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { assert (handle != NULL); /* Convert to int */ @@ -296,13 +291,13 @@ void gpu_dgeam(cublasHandle_t handle, const char* transa, const char* transb, co if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T; - cublasDgeam(handle, transa_, transb_, m_, n_, &alpha, a, lda_, &beta, b, ldb_, c, ldc_); + cublasDgeam(handle, transa_, transb_, m_, n_, alpha, a, lda_, beta, b, ldb_, c, ldc_); } -void gpu_sgeam(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const float alpha, - const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) { +void gpu_sgeam(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const float* alpha, + const float* a, const int64_t lda, const float* beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) { assert (handle != NULL); /* Convert to int */ @@ -326,6 +321,6 @@ void gpu_sgeam(cublasHandle_t handle, const char* transa, const char* transb, co if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T; - cublasSgeam(handle, transa_, transb_, m_, n_, &alpha, a, lda_, &beta, b, ldb_, c, ldc_); + cublasSgeam(handle, transa_, transb_, m_, n_, alpha, a, lda_, beta, b, ldb_, c, ldc_); } diff --git a/plugins/local/gpu_x86/gpu.c b/plugins/local/gpu_x86/gpu.c index fe3cadc5..49aec9d3 100644 --- a/plugins/local/gpu_x86/gpu.c +++ b/plugins/local/gpu_x86/gpu.c @@ -124,8 +124,8 @@ void gpu_sdot(void* handle, const int64_t n, const float* x, const int64_t incx, void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha, const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy); -void gpu_dgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const double alpha, - const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) { +void gpu_dgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const double* alpha, + const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) { assert (handle != NULL); @@ -145,15 +145,15 @@ void gpu_dgemv(void* handle, const char* transa, const int64_t m, const int64_t assert ( (int64_t) incx_ == incx); assert ( (int64_t) incy_ == incy); - dgemv_(transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_); + dgemv_(transa, &m_, &n_, alpha, a, &lda_, x, &incx_, beta, y, &incy_); } void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha, const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy); -void gpu_sgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const float alpha, - const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) { +void gpu_sgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const float* alpha, + const float* a, const int64_t lda, const float* x, const int64_t incx, const float* beta, float* y, const int64_t incy) { assert (handle != NULL); @@ -173,15 +173,15 @@ void gpu_sgemv(void* handle, const char* transa, const int64_t m, const int64_t assert ( (int64_t) incx_ == incx); assert ( (int64_t) incy_ == incy); - sgemv_(transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_); + sgemv_(transa, &m_, &n_, alpha, a, &lda_, x, &incx_, beta, y, &incy_); } void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha, const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc); -void gpu_dgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, - const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) { +void gpu_dgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha, + const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) { assert (handle != NULL); @@ -203,7 +203,7 @@ void gpu_dgemm(void* handle, const char* transa, const char* transb, const int64 assert ( (int64_t) ldb_ == ldb); assert ( (int64_t) ldc_ == ldc); - dgemm_(transa, transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_); + dgemm_(transa, transb, &m_, &n_, &k_, alpha, a, &lda_, b, &ldb_, beta, c, &ldc_); } @@ -211,8 +211,8 @@ void gpu_dgemm(void* handle, const char* transa, const char* transb, const int64 void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha, const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc); -void gpu_sgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, - const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) { +void gpu_sgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float* alpha, + const float* a, const int64_t lda, const float* b, const int64_t ldb, const float* beta, float* c, const int64_t ldc) { assert (handle != NULL); @@ -234,12 +234,12 @@ void gpu_sgemm(void* handle, const char* transa, const char* transb, const int64 assert ( (int64_t) ldb_ == ldb); assert ( (int64_t) ldc_ == ldc); - sgemm_(transa, transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_); + sgemm_(transa, transb, &m_, &n_, &k_, alpha, a, &lda_, b, &ldb_, beta, c, &ldc_); } -void gpu_dgeam(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double alpha, - const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { +void gpu_dgeam(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha, + const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { assert (handle != NULL); if ( (*transa == 'N' && *transb == 'N') || @@ -247,19 +247,19 @@ void gpu_dgeam(void* handle, const char* transa, const char* transb, const int64 (*transa == 'N' && *transb == 'n') || (*transa == 'n' && *transb == 'n') ) { - if (alpha == 0.) { + if (*alpha == 0.) { for (int64_t j=0 ; j Date: Thu, 4 Jul 2024 12:01:16 +0200 Subject: [PATCH 112/131] Working on r2 --- src/ccsd/ccsd_space_orb_sub.irp.f | 27 +- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 353 +++++++++++++------------ 2 files changed, 200 insertions(+), 180 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 256f743b..59b9ebd2 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -21,7 +21,8 @@ subroutine run_ccsd_space_orb type(gpu_double3) :: d_cc_space_v_ov_chol, d_cc_space_v_vv_chol type(gpu_double4) :: d_cc_space_v_oovv, d_cc_space_v_voov, d_cc_space_v_ovov - type(gpu_double4) :: d_cc_space_v_oovo + type(gpu_double4) :: d_cc_space_v_oovo, d_cc_space_v_vooo, d_cc_space_v_oooo + type(gpu_double4) :: d_cc_space_v_vvoo double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) @@ -93,17 +94,29 @@ subroutine run_ccsd_space_orb ! FREE cc_space_v_vv_chol endif + call gpu_allocate(d_cc_space_v_oovv, nO, nO, nV, nV) call gpu_allocate(d_cc_space_v_voov, nV, nO, nO, nV) call gpu_allocate(d_cc_space_v_ovov, nO, nV, nO, nV) call gpu_allocate(d_cc_space_v_oovo, nO, nO, nV, nO) + call gpu_allocate(d_cc_space_v_vooo, nV, nO, nO, nO) + call gpu_allocate(d_cc_space_v_oooo, nO, nO, nO, nO) + call gpu_allocate(d_cc_space_v_vvoo, nV, nV, nO, nO) + call gpu_upload(cc_space_v_oovv, d_cc_space_v_oovv) call gpu_upload(cc_space_v_voov, d_cc_space_v_voov) call gpu_upload(cc_space_v_ovov, d_cc_space_v_ovov) call gpu_upload(cc_space_v_oovo, d_cc_space_v_oovo) + call gpu_upload(cc_space_v_vooo, d_cc_space_v_vooo) + call gpu_upload(cc_space_v_oooo, d_cc_space_v_oooo) + call gpu_upload(cc_space_v_vvoo, d_cc_space_v_vvoo) ! FREE cc_space_v_voov ! FREE cc_space_v_ovov ! FREE cc_space_v_oovo +! FREE cc_space_v_oovv +! FREE cc_space_v_vooo +! FREE cc_space_v_oooo +! FREE cc_space_v_vvoo call gpu_allocate(t2, nO,nO,nV,nV) call gpu_allocate(r2, nO,nO,nV,nV) @@ -165,15 +178,8 @@ subroutine run_ccsd_space_orb call gpu_upload(h_t2, t2) - call gpu_allocate(d_cc_space_v_oovv, nO, nO, nV, nV) - call gpu_upload(cc_space_v_oovv, d_cc_space_v_oovv) - -! FREE cc_space_v_oovv - - call update_tau_space(nO,nV,h_t1,t1,t2,tau) call update_tau_x_space(nO,nV,tau,tau_x) - !print*,'hf_energy', hf_energy call det_energy(det,uncorr_energy) print*,'Det energy', uncorr_energy @@ -200,7 +206,10 @@ subroutine run_ccsd_space_orb call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1,d_cc_space_f_ov,d_cc_space_f_vo, & d_cc_space_v_voov, d_cc_space_v_ovov, d_cc_space_v_oovo, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol) - call compute_r2_space_chol(nO,nV,t1%f,t2%f,tau%f,H_oo%F,H_vv%F,H_vo%F,r2%f,max_r2) + call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & + d_cc_space_v_oovv, d_cc_space_v_vooo, d_cc_space_v_oooo, & + d_cc_space_v_vvoo, d_cc_space_v_ov_chol, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol, & + r2, max_r2) else call compute_H_oo(nO,nV,t1%f,t2%f,tau%f,H_oo%f) call compute_H_vv(nO,nV,t1%f,t2%f,tau%f,H_vv%f) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index c34b390b..0474dcec 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -391,168 +391,162 @@ end ! R2 -subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) - +subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & + d_cc_space_v_oovv, d_cc_space_v_vooo, d_cc_space_v_oooo, & + d_cc_space_v_vvoo, d_cc_space_v_ov_chol, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol, & + r2,max_r2) + use gpu implicit none ! in - integer, intent(in) :: nO, nV - double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) - double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + integer, intent(in) :: nO, nV + type(gpu_double2), intent(in) :: t1, H_oo, H_vv + type(gpu_double4), intent(in) :: t2, tau, d_cc_space_v_oovv + type(gpu_double4), intent(in) :: d_cc_space_v_vooo, d_cc_space_v_oooo + type(gpu_double4), intent(in) :: d_cc_space_v_vvoo + type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol, d_cc_space_v_vv_chol + type(gpu_double3), intent(in) :: d_cc_space_v_vo_chol ! out - double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2 + double precision, intent(out) :: max_r2 + type(gpu_double4), intent(out) :: r2 ! internal integer :: u,v,i,j,beta,gam,a,b double precision :: max_r2_local + type(gpu_stream) :: stream(nV) + call set_multiple_levels_omp(.False.) - !$omp parallel & - !$omp shared(nO,nV,r2,cc_space_v_oovv) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do - do gam = 1, nV - do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2(u,v,beta,gam) = cc_space_v_oovv(u,v,beta,gam) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel + call gpu_copy(d_cc_space_v_oovv, r2) - double precision, allocatable :: A1(:,:,:,:) - allocate(A1(nO,nO,nO,nO)) - call compute_A1_chol(nO,nV,t1,t2,tau,A1) - call dgemm('N','N',nO*nO,nV*nV,nO*nO, & - 1d0, A1, size(A1,1) * size(A1,2), & - tau, size(tau,1) * size(tau,2), & - 1d0, r2, size(r2,1) * size(r2,2)) + type(gpu_double4) :: A1 + call gpu_allocate(A1,nO,nO,nO,nO) + call compute_A1_chol(nO,nV,t1,t2,tau,d_cc_space_v_vooo, & + d_cc_space_v_oooo, d_cc_space_v_vvoo, A1) + + call gpu_dgemm(blas_handle, 'N','N',nO*nO,nV*nV,nO*nO, & + 1d0, A1%f(1,1,1,1), size(A1%f,1) * size(A1%f,2), & + tau%f(1,1,1,1), size(tau%f,1) * size(tau%f,2), & + 1d0, r2%f(1,1,1,1), size(r2%f,1) * size(r2%f,2)) + + call gpu_deallocate(A1) - deallocate(A1) integer :: block_size, iblock, k block_size = 16 - double precision, dimension(:,:,:), allocatable :: B1, tmp_cc, tmpB1 - double precision, dimension(:,:), allocatable :: tmp_cc2 + type(gpu_double3) :: tmp_cc, B1, tmpB1 + type(gpu_double2) :: tmp_cc2 - allocate(tmp_cc(cholesky_mo_num,nV,nV)) - call dgemm('N','N', cholesky_mo_num*nV, nV, nO, 1.d0, & - cc_space_v_vo_chol, cholesky_mo_num*nV, t1, nO, 0.d0, tmp_cc, cholesky_mo_num*nV) + call gpu_allocate(tmp_cc,cholesky_mo_num,nV,nV) + call gpu_dgemm(blas_handle, 'N','N', cholesky_mo_num*nV, nV, nO, 1.d0, & + d_cc_space_v_vo_chol%f(1,1,1), cholesky_mo_num*nV, t1%f(1,1), nO, 0.d0, tmp_cc%f(1,1,1), cholesky_mo_num*nV) call set_multiple_levels_omp(.False.) + call gpu_synchronize() + + type(gpu_blas) :: blas + + !$OMP PARALLEL PRIVATE(gam, iblock, B1, tmpB1, tmp_cc2, beta, b, a, blas) + call gpu_allocate(B1,nV,nV,block_size) + call gpu_allocate(tmpB1,nV,block_size,nV) + call gpu_allocate(tmp_cc2,cholesky_mo_num,nV) + + call gpu_blas_create(blas) - !$OMP PARALLEL PRIVATE(gam, iblock, B1, tmpB1, tmp_cc2, beta, b, a) - allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV), tmp_cc2(cholesky_mo_num,nV)) !$OMP DO do gam = 1, nV - do a=1,nV - do k=1,cholesky_mo_num - tmp_cc2(k,a) = cc_space_v_vv_chol(k,a,gam) - tmp_cc(k,a,gam) - enddo - enddo + call gpu_dgeam(blas, 'N', 'N', cholesky_mo_num, nV, 1.d0, d_cc_space_v_vv_chol%f(1,1,gam), & + cholesky_mo_num, -1.d0, tmp_cc%f(1,1,gam), cholesky_mo_num, tmp_cc2%f(1,1), cholesky_mo_num) do iblock = 1, nV, block_size - call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & - -1.d0, tmp_cc(1,1,iblock), cholesky_mo_num, & - cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, & - 0.d0, tmpB1, nV*block_size) + call gpu_dgemm(blas, 'T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & + -1.d0, tmp_cc%f(1,1,iblock), cholesky_mo_num, & + d_cc_space_v_vv_chol%f(1,1,gam), cholesky_mo_num, & + 0.d0, tmpB1%f(1,1,1), nV*block_size) - call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & - 1.d0, cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & - tmp_cc2, cholesky_mo_num, & - 1.d0, tmpB1, nV*block_size) + call gpu_dgemm(blas, 'T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & + 1.d0, d_cc_space_v_vv_chol%f(1,1,iblock), cholesky_mo_num, & + tmp_cc2%f(1,1), cholesky_mo_num, & + 1.d0, tmpB1%f(1,1,1), nV*block_size) do beta = iblock, min(nV, iblock+block_size-1) - do b = 1, nV - do a = 1, nV - B1(a,b,beta-iblock+1) = tmpB1(a,beta-iblock+1,b) - enddo - enddo + call gpu_dgeam(blas, 'N', 'N', nV, nV, 1.d0, tmpB1%f(1,beta-iblock+1,1), & + nV*block_size, 0.d0, B1%f(1,1,beta-iblock+1), nV, B1%f(1,1,beta-iblock+1), nV) enddo - call dgemm('N','N',nO*nO,min(block_size, nV-iblock+1),nV*nV, & - 1d0, tau, size(tau,1) * size(tau,2), & - B1 , size(B1 ,1) * size(B1 ,2), & - 1d0, r2(1,1,iblock,gam), size(r2 ,1) * size(r2 ,2)) + call gpu_dgemm(blas, 'N','N',nO*nO,min(block_size, nV-iblock+1),nV*nV, & + 1d0, tau%f(1,1,1,1), size(tau%f,1) * size(tau%f,2), & + B1%f(1,1,1) , size(B1%f ,1) * size(B1%f ,2), & + 1d0, r2%f(1,1,iblock,gam), size(r2%f ,1) * size(r2%f ,2)) enddo enddo !$OMP ENDDO - deallocate(B1, tmpB1, tmp_cc2) + call gpu_blas_destroy(blas) + + call gpu_deallocate(B1) + call gpu_deallocate(tmpB1) + call gpu_deallocate(tmp_cc2) !$OMP END PARALLEL - deallocate(tmp_cc) + call gpu_deallocate(tmp_cc) + type(gpu_double4) :: X_oovv + call gpu_allocate(X_oovv,nO,nO,nV,nV) + call gpu_copy(t2,X_oovv) - double precision, allocatable :: X_oovv(:,:,:,:) - allocate(X_oovv(nO,nO,nV,nV)) - !$omp parallel & - !$omp shared(nO,nV,t2,X_oovv) & - !$omp private(u,v,gam,a) & - !$omp default(none) - !$omp do - do a = 1, nV - do gam = 1, nV - do v = 1, nO - do u = 1, nO - X_oovv(u,v,gam,a) = t2(u,v,gam,a) - enddo - enddo - enddo + type(gpu_double2) :: g_vir + call gpu_allocate(g_vir,nV,nV) + call compute_g_vir_chol(nO,nV,t1%f,t2%f,H_vv%f,g_vir%f) + + type(gpu_double4) :: Y_oovv + call gpu_allocate(Y_oovv,nO,nO,nV,nV) + + call gpu_dgemm(blas_handle, 'N','N',nO*nO*nV,nV,nV, & + 1d0, X_oovv%f(1,1,1,1), size(X_oovv%f,1) * size(X_oovv%f,2) * size(X_oovv%f,3), & + g_vir%f(1,1), size(g_vir%f,1), & + 0d0, Y_oovv%f(1,1,1,1), size(Y_oovv%f,1) * size(Y_oovv%f,2) * size(Y_oovv%f,3)) + + call gpu_synchronize() + + do a=1,nV + call gpu_stream_create(stream(a)) enddo - !$omp end do - !$omp end parallel - double precision, allocatable :: g_vir(:,:) - allocate(g_vir(nV,nV)) - call compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) - - double precision, allocatable :: Y_oovv(:,:,:,:) - allocate(Y_oovv(nO,nO,nV,nV)) - - call dgemm('N','N',nO*nO*nV,nV,nV, & - 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & - g_vir, size(g_vir,1), & - 0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3)) - deallocate(g_vir) - deallocate(X_oovv) - - !$omp parallel & - !$omp shared(nO,nV,r2,Y_oovv) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do do gam = 1, nV do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(u,v,beta,gam) + Y_oovv(v,u,gam,beta) - enddo - enddo + call gpu_set_stream(blas_handle, stream(beta)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nO, 1.d0, Y_oovv%f(1,1,beta,gam), & + nO, 1.d0, r2%f(1,1,beta,gam), nO, r2%f(1,1,beta,gam), nO) + call gpu_dgeam(blas_handle, 'N', 'T', nO, nO, 1.d0, r2%f(1,1,beta,gam), & + nO, 1.d0, Y_oovv%f(1,1,gam,beta), nO, r2%f(1,1,beta,gam), nO) enddo enddo - !$omp end do - !$omp end parallel - deallocate(Y_oovv) - double precision, allocatable :: g_occ(:,:) - allocate(g_occ(nO,nO)) - call compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) + call gpu_deallocate(g_vir) + call gpu_set_stream(blas_handle, gpu_default_stream) - allocate(X_oovv(nO,nO,nV,nV)) - call dgemm('N','N',nO,nO*nV*nV,nO, & - 1d0, g_occ , size(g_occ,1), & - t2 , size(t2,1), & - 0d0, X_oovv, size(X_oovv,1)) - deallocate(g_occ) + do a=1,nV + call gpu_stream_destroy(stream(a)) + enddo + + call gpu_deallocate(Y_oovv) + + type(gpu_double2) :: g_occ + call gpu_allocate(g_occ,nO,nO) + + call compute_g_occ_chol(nO,nV,t1%f,t2%f,H_oo%f,g_occ%f) + + call gpu_dgemm(blas_handle, 'N','N',nO,nO*nV*nV,nO, & + 1d0, g_occ%f(1,1), size(g_occ%f,1), & + t2%f(1,1,1,1) , size(t2%f,1), & + 0d0, X_oovv%f(1,1,1,1), size(X_oovv%f,1)) + + call gpu_synchronize() !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & @@ -563,7 +557,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do beta = 1, nV do v = 1, nO do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) - X_oovv%f(u,v,beta,gam) - X_oovv%f(v,u,gam,beta) enddo enddo enddo @@ -571,27 +565,39 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - deallocate(X_oovv) + call gpu_deallocate(g_occ) + call gpu_deallocate(X_oovv) - double precision, allocatable :: X_vovv(:,:,:,:) + type(gpu_double4) :: X_vovv + + call gpu_allocate(X_vovv,nV,nO,nV,block_size) + call gpu_allocate(Y_oovv,nO,nO,nV,nV) - allocate(X_vovv(nV,nO,nV,block_size)) - allocate(Y_oovv(nO,nO,nV,nV)) do iblock = 1, nV, block_size do gam = iblock, min(nV, iblock+block_size-1) - call dgemm('T','N',nV, nO*nV, cholesky_mo_num, 1.d0, & - cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, cc_space_v_ov_chol, & - cholesky_mo_num, 0.d0, X_vovv(1,1,1,gam-iblock+1), nV) + call gpu_stream_create(stream(gam)) + call gpu_set_stream(blas_handle, stream(gam)) + call gpu_dgemm(blas_handle, 'T','N',nV, nO*nV, cholesky_mo_num, 1.d0, & + d_cc_space_v_vv_chol%f(1,1,gam), cholesky_mo_num, d_cc_space_v_ov_chol%f(1,1,1), & + cholesky_mo_num, 0.d0, X_vovv%f(1,1,1,gam-iblock+1), nV) enddo + do gam = iblock, min(nV, iblock+block_size-1) + call gpu_stream_destroy(stream(gam)) + enddo + + call gpu_synchronize() + + call gpu_set_stream(blas_handle, gpu_default_stream) call dgemm('N','N',nO,nO*nV*min(block_size, nV-iblock+1),nV, & - 1d0, t1 , size(t1,1), & - X_vovv, size(X_vovv,1), & - 0d0, Y_oovv(1,1,1,iblock), size(Y_oovv,1)) + 1d0, t1%f , size(t1%f,1), & + X_vovv%f, size(X_vovv%f,1), & + 0d0, Y_oovv%f(1,1,1,iblock), size(Y_oovv%f,1)) enddo - deallocate(X_vovv) + call gpu_synchronize() + call gpu_deallocate(X_vovv) !$omp parallel & !$omp shared(nO,nV,r2,Y_oovv) & @@ -602,14 +608,14 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do beta = 1, nV do v = 1, nO do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta) + r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) + Y_oovv%f(v,u,beta,gam) + Y_oovv%f(u,v,gam,beta) enddo enddo enddo enddo !$omp end do !$omp end parallel - deallocate(Y_oovv) + call gpu_deallocate(Y_oovv) double precision, allocatable :: X_ovvo(:,:,:,:) double precision, allocatable :: tcc(:,:,:), tcc2(:,:,:) @@ -617,11 +623,11 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) allocate(tcc(cholesky_mo_num,nO,nV)) call dgemm('N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & - cc_space_v_vv_chol, cholesky_mo_num*nV, t1, nO, & + d_cc_space_v_vv_chol%f, cholesky_mo_num*nV, t1%f, nO, & 0.d0, tcc2, cholesky_mo_num*nV) call dgemm('N','N', cholesky_mo_num*nO, nV, nO, 1.d0, & - cc_space_v_oo_chol, cholesky_mo_num*nO, t1, nO, & + cc_space_v_oo_chol, cholesky_mo_num*nO, t1%f, nO, & 0.d0, tcc, cholesky_mo_num*nO) call dgemm('T','N', nO*nV, nV*nO, cholesky_mo_num, 1.d0, & @@ -639,7 +645,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do beta = 1, nV do v = 1, nO do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_ovvo(u,beta,gam,v) + r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) - X_ovvo(u,beta,gam,v) enddo enddo enddo @@ -650,7 +656,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do gam = 1, nV do v = 1, nO do u = 1, nO - r2(v,u,gam,beta) = r2(v,u,gam,beta) - X_ovvo(u,beta,gam,v) + r2%f(v,u,gam,beta) = r2%f(v,u,gam,beta) - X_ovvo(u,beta,gam,v) enddo enddo enddo @@ -661,12 +667,12 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) deallocate(X_ovvo) !----- - allocate(X_oovv(nO,nO,nV,nV)) + call gpu_allocate(X_oovv,nO,nO,nV,nV) call dgemm('N','N',nO*nO*nV,nV,nO, & 1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & - t1 , size(t1,1), & - 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + t1%f , size(t1%f,1), & + 0d0, X_oovv%f, size(X_oovv%f,1) * size(X_oovv%f,2) * size(X_oovv%f,3)) !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & @@ -677,14 +683,14 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do beta = 1, nV do v = 1, nO do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) - X_oovv%f(u,v,beta,gam) - X_oovv%f(v,u,gam,beta) enddo enddo enddo enddo !$omp end do !$omp end parallel - deallocate(X_oovv) + call gpu_deallocate(X_oovv) double precision, allocatable :: X_vovo(:,:,:,:), Y_oovo(:,:,:,:) allocate(X_vovo(nV,nO,nV,nO)) @@ -708,16 +714,16 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) allocate(Y_oovo(nO,nO,nV,nO)) call dgemm('N','N',nO,nO*nV*nO,nV, & - 1d0, t1, size(t1,1), & + 1d0, t1%f, size(t1%f,1), & X_vovo, size(X_vovo,1), & 0d0, Y_oovo, size(Y_oovo,1)) deallocate(X_vovo) - allocate(X_oovv(nO,nO,nV,nV)) + call gpu_allocate(X_oovv,nO,nO,nV,nV) call dgemm('N','N',nO*nO*nV, nV, nO, & 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & - t1 , size(t1,1), & - 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + t1%f , size(t1%f,1), & + 0d0, X_oovv%f, size(X_oovv%f,1) * size(X_oovv%f,2) * size(X_oovv%f,3)) deallocate(Y_oovo) !$omp parallel & @@ -729,24 +735,24 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do beta = 1, nV do v = 1, nO do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,gam,beta) - X_oovv(v,u,beta,gam) + r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) - X_oovv%f(u,v,gam,beta) - X_oovv%f(v,u,beta,gam) enddo enddo enddo enddo !$omp end do !$omp end parallel - deallocate(X_oovv) + call gpu_deallocate(X_oovv) double precision, allocatable :: J1(:,:,:,:) allocate(J1(nO,nV,nV,nO)) - call compute_J1_chol(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & + call compute_J1_chol(nO,nV,t1%f,t2%f,cc_space_v_ovvo,cc_space_v_ovoo, & cc_space_v_vvoo,J1) double precision, allocatable :: K1(:,:,:,:) allocate(K1(nO,nV,nO,nV)) - call compute_K1_chol(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & + call compute_K1_chol(nO,nV,t1%f,t2%f,cc_space_v_ovoo,cc_space_v_vvoo, & cc_space_v_ovov,K1) allocate(X_ovvo(nO,nV,nV,nO)) @@ -778,7 +784,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do v = 1, nO do i = 1, nO do a = 1, nV - Y_voov(a,i,v,gam) = 2d0 * t2(i,v,a,gam) - t2(i,v,gam,a) + Y_voov(a,i,v,gam) = 2d0 * t2%f(i,v,a,gam) - t2%f(i,v,gam,a) enddo enddo enddo @@ -805,7 +811,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do beta = 1, nV do v = 1, nO do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta) + r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta) enddo enddo enddo @@ -820,7 +826,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) allocate(Y_ovov(nO,nV,nO,nV)) !$omp parallel & - !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & + !$omp shared(nO,nV,K1,X_ovov,Y_ovov,t2) & !$omp private(u,a,i,beta,gam) & !$omp default(none) !$omp do @@ -840,7 +846,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do v = 1, nO do a = 1, nV do i = 1, nO - Y_ovov(i,a,v,gam) = t2(i,v,gam,a) + Y_ovov(i,a,v,gam) = t2%f(i,v,gam,a) enddo enddo enddo @@ -864,7 +870,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do beta = 1, nV do v = 1, nO do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta) + r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta) enddo enddo enddo @@ -895,7 +901,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do v = 1, nO do a = 1, nV do i = 1, nO - Y_ovov(i,a,v,beta) = t2(i,v,beta,a) + Y_ovov(i,a,v,beta) = t2%f(i,v,beta,a) enddo enddo enddo @@ -922,7 +928,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do beta = 1, nV do v = 1, nO do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam) + r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam) enddo enddo enddo @@ -945,8 +951,8 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do a = 1, nV do j = 1, nO do i = 1, nO - r2(i,j,a,b) = -r2(i,j,a,b) - max_r2_local = max(r2(i,j,a,b), max_r2_local) + r2%f(i,j,a,b) = -r2%f(i,j,a,b) + max_r2_local = max(r2%f(i,j,a,b), max_r2_local) enddo enddo enddo @@ -961,28 +967,29 @@ end ! A1 -subroutine compute_A1_chol(nO,nV,t1,t2,tau,A1) - +subroutine compute_A1_chol(nO,nV,t1,t2,tau,d_cc_space_v_vooo, & + d_cc_space_v_oooo, d_cc_space_v_vvoo, A1) + use gpu implicit none - integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV) - double precision, intent(in) :: t2(nO, nO, nV, nV) - double precision, intent(in) :: tau(nO, nO, nV, nV) - double precision, intent(out) :: A1(nO, nO, nO, nO) + integer, intent(in) :: nO,nV + type(gpu_double2), intent(in) :: t1 + type(gpu_double4), intent(in) :: t2, tau + type(gpu_double4), intent(in) :: d_cc_space_v_vooo, d_cc_space_v_oooo, d_cc_space_v_vvoo + type(gpu_double4), intent(out) :: A1 integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta - double precision, allocatable :: Y_oooo(:,:,:,:) - allocate(Y_oooo(nO,nO,nO,nO)) + type(gpu_double4) :: Y_oooo + call gpu_allocate(Y_oooo,nO,nO,nO,nO) ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & call dgemm('N','N', nO, nO*nO*nO, nV, & - 1d0, t1 , size(t1,1), & - cc_space_v_vooo, size(cc_space_v_vooo,1), & - 0d0, Y_oooo, size(Y_oooo,1)) + 1d0, t1%f , size(t1%f,1), & + d_cc_space_v_vooo%f, size(d_cc_space_v_vooo%f,1), & + 0d0, Y_oooo%f, size(Y_oooo%f,1)) !$omp parallel & !$omp private(u,v,i,j) & @@ -992,7 +999,7 @@ subroutine compute_A1_chol(nO,nV,t1,t2,tau,A1) do i = 1, nO do v = 1, nO do u = 1, nO - A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j) + A1%f(u,v,i,j) = d_cc_space_v_oooo%f(u,v,i,j) + Y_oooo%f(v,u,j,i) + Y_oooo%f(u,v,i,j) enddo enddo enddo @@ -1000,19 +1007,20 @@ subroutine compute_A1_chol(nO,nV,t1,t2,tau,A1) !$omp end do !$omp end parallel - deallocate(Y_oooo) + call gpu_deallocate(Y_oooo) ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) call dgemm('N','N', nO*nO, nO*nO, nV*nV, & - 1d0, tau , size(tau,1) * size(tau,2), & - cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & - 1d0, A1 , size(A1,1) * size(A1,2)) + 1d0, tau%f , size(tau%f,1) * size(tau%f,2), & + d_cc_space_v_vvoo%f, size(d_cc_space_v_vvoo%f,1) * size(d_cc_space_v_vvoo%f,2), & + 1d0, A1%f , size(A1%f,1) * size(A1%f,2)) end ! g_occ subroutine compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) + use gpu implicit none @@ -1048,6 +1056,7 @@ end ! g_vir subroutine compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) + use gpu implicit none @@ -1102,6 +1111,7 @@ end ! J1 subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvoo,J1) + use gpu implicit none integer, intent(in) :: nO,nV @@ -1305,6 +1315,7 @@ end ! K1 subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,K1) + use gpu implicit none From 5b1e5f84e6defc8857b5d29c54449e3b8d35cb67 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 4 Jul 2024 14:52:09 +0200 Subject: [PATCH 113/131] Working on r2 --- src/ccsd/ccsd_space_orb_sub.irp.f | 17 +- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 469 +++++++++++-------------- 2 files changed, 220 insertions(+), 266 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 59b9ebd2..e97c2325 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -22,7 +22,7 @@ subroutine run_ccsd_space_orb type(gpu_double4) :: d_cc_space_v_oovv, d_cc_space_v_voov, d_cc_space_v_ovov type(gpu_double4) :: d_cc_space_v_oovo, d_cc_space_v_vooo, d_cc_space_v_oooo - type(gpu_double4) :: d_cc_space_v_vvoo + type(gpu_double4) :: d_cc_space_v_vvoo, d_cc_space_v_ovvo, d_cc_space_v_ovoo double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) @@ -98,17 +98,21 @@ subroutine run_ccsd_space_orb call gpu_allocate(d_cc_space_v_voov, nV, nO, nO, nV) call gpu_allocate(d_cc_space_v_ovov, nO, nV, nO, nV) call gpu_allocate(d_cc_space_v_oovo, nO, nO, nV, nO) + call gpu_allocate(d_cc_space_v_ovvo, nO, nV, nV, nO) call gpu_allocate(d_cc_space_v_vooo, nV, nO, nO, nO) call gpu_allocate(d_cc_space_v_oooo, nO, nO, nO, nO) call gpu_allocate(d_cc_space_v_vvoo, nV, nV, nO, nO) + call gpu_allocate(d_cc_space_v_ovoo, nO, nV, nO, nO) call gpu_upload(cc_space_v_oovv, d_cc_space_v_oovv) call gpu_upload(cc_space_v_voov, d_cc_space_v_voov) call gpu_upload(cc_space_v_ovov, d_cc_space_v_ovov) call gpu_upload(cc_space_v_oovo, d_cc_space_v_oovo) + call gpu_upload(cc_space_v_ovvo, d_cc_space_v_ovvo) call gpu_upload(cc_space_v_vooo, d_cc_space_v_vooo) call gpu_upload(cc_space_v_oooo, d_cc_space_v_oooo) call gpu_upload(cc_space_v_vvoo, d_cc_space_v_vvoo) + call gpu_upload(cc_space_v_ovoo, d_cc_space_v_ovoo) ! FREE cc_space_v_voov ! FREE cc_space_v_ovov @@ -117,6 +121,8 @@ subroutine run_ccsd_space_orb ! FREE cc_space_v_vooo ! FREE cc_space_v_oooo ! FREE cc_space_v_vvoo +! FREE cc_space_v_ovvo +! FREE cc_space_v_ovoo call gpu_allocate(t2, nO,nO,nV,nV) call gpu_allocate(r2, nO,nO,nV,nV) @@ -207,8 +213,8 @@ subroutine run_ccsd_space_orb call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1,d_cc_space_f_ov,d_cc_space_f_vo, & d_cc_space_v_voov, d_cc_space_v_ovov, d_cc_space_v_oovo, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol) call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & - d_cc_space_v_oovv, d_cc_space_v_vooo, d_cc_space_v_oooo, & - d_cc_space_v_vvoo, d_cc_space_v_ov_chol, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol, & + d_cc_space_v_oovv, d_cc_space_v_vooo, d_cc_space_v_oooo, d_cc_space_v_oovo, d_cc_space_v_ovvo, d_cc_space_v_ovoo, & + d_cc_space_v_ovov, d_cc_space_v_vvoo, d_cc_space_v_oo_chol, d_cc_space_v_ov_chol, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol, & r2, max_r2) else call compute_H_oo(nO,nV,t1%f,t2%f,tau%f,H_oo%f) @@ -320,6 +326,11 @@ subroutine run_ccsd_space_orb call gpu_deallocate(d_cc_space_v_voov) call gpu_deallocate(d_cc_space_v_ovov) call gpu_deallocate(d_cc_space_v_oovo) + call gpu_deallocate(d_cc_space_v_ovvo) + call gpu_deallocate(d_cc_space_v_vooo) + call gpu_deallocate(d_cc_space_v_oooo) + call gpu_deallocate(d_cc_space_v_vvoo) + call gpu_deallocate(d_cc_space_v_ovoo) call gpu_deallocate(d_cc_space_f_oo) call gpu_deallocate(d_cc_space_f_vo) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 0474dcec..abb9909b 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -392,8 +392,8 @@ end ! R2 subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & - d_cc_space_v_oovv, d_cc_space_v_vooo, d_cc_space_v_oooo, & - d_cc_space_v_vvoo, d_cc_space_v_ov_chol, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol, & + d_cc_space_v_oovv, d_cc_space_v_vooo, d_cc_space_v_oooo, d_cc_space_v_oovo, d_cc_space_v_ovvo, d_cc_space_v_ovoo, & + d_cc_space_v_ovov, d_cc_space_v_vvoo, d_cc_space_v_oo_chol, d_cc_space_v_ov_chol, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol, & r2,max_r2) use gpu implicit none @@ -403,9 +403,11 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & type(gpu_double2), intent(in) :: t1, H_oo, H_vv type(gpu_double4), intent(in) :: t2, tau, d_cc_space_v_oovv type(gpu_double4), intent(in) :: d_cc_space_v_vooo, d_cc_space_v_oooo - type(gpu_double4), intent(in) :: d_cc_space_v_vvoo - type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol, d_cc_space_v_vv_chol - type(gpu_double3), intent(in) :: d_cc_space_v_vo_chol + type(gpu_double4), intent(in) :: d_cc_space_v_vvoo, d_cc_space_v_oovo + type(gpu_double4), intent(in) :: d_cc_space_v_ovvo, d_cc_space_v_ovoo + type(gpu_double4), intent(in) :: d_cc_space_v_ovov + type(gpu_double3), intent(in) :: d_cc_space_v_oo_chol, d_cc_space_v_ov_chol + type(gpu_double3), intent(in) :: d_cc_space_v_vo_chol, d_cc_space_v_vv_chol ! out double precision, intent(out) :: max_r2 @@ -499,9 +501,11 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & call gpu_allocate(X_oovv,nO,nO,nV,nV) call gpu_copy(t2,X_oovv) - type(gpu_double2) :: g_vir + type(gpu_double2) :: g_occ, g_vir call gpu_allocate(g_vir,nV,nV) + call gpu_allocate(g_occ,nO,nO) call compute_g_vir_chol(nO,nV,t1%f,t2%f,H_vv%f,g_vir%f) + call compute_g_occ_chol(nO,nV,t1%f,t2%f,H_oo%f,g_occ%f) type(gpu_double4) :: Y_oovv call gpu_allocate(Y_oovv,nO,nO,nV,nV) @@ -511,7 +515,41 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & g_vir%f(1,1), size(g_vir%f,1), & 0d0, Y_oovv%f(1,1,1,1), size(Y_oovv%f,1) * size(Y_oovv%f,2) * size(Y_oovv%f,3)) + call gpu_dgemm(blas_handle, 'N','N',nO,nO*nV*nV,nO, & + -1d0, g_occ%f(1,1), size(g_occ%f,1), & + t2%f(1,1,1,1) , size(t2%f,1), & + 1d0, Y_oovv%f(1,1,1,1), size(Y_oovv%f,1)) + + call gpu_dgemm(blas_handle, 'N','N',nO*nO*nV,nV,nO, & + -1d0, d_cc_space_v_oovo%f(1,1,1,1), size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & + t1%f(1,1) , size(t1%f,1), & + 1d0, Y_oovv%f(1,1,1,1), size(Y_oovv%f,1) * size(Y_oovv%f,2) * size(Y_oovv%f,3)) + call gpu_synchronize() + call gpu_deallocate(X_oovv) + + call gpu_deallocate(g_vir) + call gpu_deallocate(g_occ) + + type(gpu_double4) :: X_vovo, Y_oovo + call gpu_allocate(X_vovo,nV,nO,nV,nO) + +! !$omp parallel & +! !$omp shared(nO,nV,r2,Y_oovv) & +! !$omp private(u,v,gam,beta) & +! !$omp default(none) +! !$omp do +! do gam = 1, nV +! do beta = 1, nV +! do v = 1, nO +! do u = 1, nO +! r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) + Y_oovv%f(u,v,beta,gam) + Y_oovv%f(v,u,gam,beta) +! enddo +! enddo +! enddo +! enddo +! !$omp end do +! !$omp end parallel do a=1,nV call gpu_stream_create(stream(a)) @@ -527,52 +565,24 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & enddo enddo - call gpu_deallocate(g_vir) - call gpu_set_stream(blas_handle, gpu_default_stream) + do i = 1, nO + do gam = 1, nV + call gpu_set_stream(blas_handle, stream(gam)) + call gpu_dgeam(blas_handle, 'T', 'N', nV, nO, 1.d0, d_cc_space_v_ovvo%f(1,1,gam,i), & + nO, 0.d0, X_vovo%f(1,1,gam,i), nV, X_vovo%f(1,1,gam,i), nV) + enddo + enddo do a=1,nV call gpu_stream_destroy(stream(a)) enddo + call gpu_set_stream(blas_handle, gpu_default_stream) - call gpu_deallocate(Y_oovv) - - type(gpu_double2) :: g_occ - call gpu_allocate(g_occ,nO,nO) - - call compute_g_occ_chol(nO,nV,t1%f,t2%f,H_oo%f,g_occ%f) - - call gpu_dgemm(blas_handle, 'N','N',nO,nO*nV*nV,nO, & - 1d0, g_occ%f(1,1), size(g_occ%f,1), & - t2%f(1,1,1,1) , size(t2%f,1), & - 0d0, X_oovv%f(1,1,1,1), size(X_oovv%f,1)) - - call gpu_synchronize() - - !$omp parallel & - !$omp shared(nO,nV,r2,X_oovv) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do - do gam = 1, nV - do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) - X_oovv%f(u,v,beta,gam) - X_oovv%f(v,u,gam,beta) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel - - call gpu_deallocate(g_occ) - call gpu_deallocate(X_oovv) type(gpu_double4) :: X_vovv call gpu_allocate(X_vovv,nV,nO,nV,block_size) - call gpu_allocate(Y_oovv,nO,nO,nV,nV) - + call gpu_allocate(Y_oovo,nO,nO,nV,nO) do iblock = 1, nV, block_size do gam = iblock, min(nV, iblock+block_size-1) @@ -590,241 +600,176 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & call gpu_synchronize() call gpu_set_stream(blas_handle, gpu_default_stream) - call dgemm('N','N',nO,nO*nV*min(block_size, nV-iblock+1),nV, & - 1d0, t1%f , size(t1%f,1), & - X_vovv%f, size(X_vovv%f,1), & + call gpu_dgemm(blas_handle, 'N','N', nO, & + nO*nV*min(block_size, nV-iblock+1),nV, & + 1.d0, t1%f(1,1) , size(t1%f,1), & + X_vovv%f(1,1,1,1), size(X_vovv%f,1), & 0d0, Y_oovv%f(1,1,1,iblock), size(Y_oovv%f,1)) enddo + + call gpu_dgemm(blas_handle, 'N','N',nO,nO*nV*nO,nV, & + 1d0, t1%f(1,1), size(t1%f,1), & + X_vovo%f(1,1,1,1), size(X_vovo%f,1), & + 0d0, Y_oovo%f(1,1,1,1), size(Y_oovo%f,1)) + + call gpu_dgemm(blas_handle, 'N','N',nO*nO*nV, nV, nO, & + -1d0, Y_oovo%f(1,1,1,1), size(Y_oovo%f,1) * size(Y_oovo%f,2) * size(Y_oovo%f,3), & + t1%f(1,1) , size(t1%f,1), & + 1d0, Y_oovv%f(1,1,1,1), size(Y_oovv%f,1) * size(Y_oovv%f,2) * size(Y_oovv%f,3)) + call gpu_synchronize() call gpu_deallocate(X_vovv) + call gpu_deallocate(X_vovo) + call gpu_deallocate(Y_oovo) + +! !$omp parallel & +! !$omp shared(nO,nV,r2,Y_oovv) & +! !$omp private(u,v,gam,beta) & +! !$omp default(none) +! !$omp do +! do gam = 1, nV +! do beta = 1, nV +! do v = 1, nO +! do u = 1, nO +! r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) + Y_oovv%f(u,v,gam,beta) + Y_oovv%f(v,u,beta,gam) +! enddo +! enddo +! enddo +! enddo +! !$omp end do +! !$omp end parallel + + do a=1,nV + call gpu_stream_create(stream(a)) + enddo - !$omp parallel & - !$omp shared(nO,nV,r2,Y_oovv) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do do gam = 1, nV do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) + Y_oovv%f(v,u,beta,gam) + Y_oovv%f(u,v,gam,beta) - enddo - enddo + call gpu_set_stream(blas_handle, stream(beta)) + call gpu_dgeam(blas_handle, 'T', 'N', nO, nO, 1.d0, Y_oovv%f(1,1,beta,gam), & + nO, 1.d0, r2%f(1,1,beta,gam), nO, r2%f(1,1,beta,gam), nO) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nO, 1.d0, r2%f(1,1,beta,gam), & + nO, 1.d0, Y_oovv%f(1,1,gam,beta), nO, r2%f(1,1,beta,gam), nO) enddo enddo - !$omp end do - !$omp end parallel + + call gpu_set_stream(blas_handle, gpu_default_stream) + + do a=1,nV + call gpu_stream_destroy(stream(a)) + enddo + + + call gpu_synchronize() call gpu_deallocate(Y_oovv) - double precision, allocatable :: X_ovvo(:,:,:,:) - double precision, allocatable :: tcc(:,:,:), tcc2(:,:,:) - allocate(tcc2(cholesky_mo_num,nV,nO), X_ovvo(nO,nV,nV,nO)) - allocate(tcc(cholesky_mo_num,nO,nV)) + type(gpu_double4) :: X_ovvo + type(gpu_double3) :: tcc, tcc2 + call gpu_allocate(tcc2,cholesky_mo_num,nV,nO) + call gpu_allocate(X_ovvo,nO,nV,nV,nO) + call gpu_allocate(tcc,cholesky_mo_num,nO,nV) - call dgemm('N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & - d_cc_space_v_vv_chol%f, cholesky_mo_num*nV, t1%f, nO, & - 0.d0, tcc2, cholesky_mo_num*nV) + call gpu_dgemm(blas_handle, 'N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & + d_cc_space_v_vv_chol%f(1,1,1), cholesky_mo_num*nV, t1%f(1,1), nO, & + 0.d0, tcc2%f(1,1,1), cholesky_mo_num*nV) - call dgemm('N','N', cholesky_mo_num*nO, nV, nO, 1.d0, & - cc_space_v_oo_chol, cholesky_mo_num*nO, t1%f, nO, & - 0.d0, tcc, cholesky_mo_num*nO) + call gpu_dgemm(blas_handle, 'N','N', cholesky_mo_num*nO, nV, nO, 1.d0, & + d_cc_space_v_oo_chol%f(1,1,1), cholesky_mo_num*nO, t1%f(1,1), nO, & + 0.d0, tcc%f(1,1,1), cholesky_mo_num*nO) - call dgemm('T','N', nO*nV, nV*nO, cholesky_mo_num, 1.d0, & - tcc, cholesky_mo_num, tcc2, cholesky_mo_num, 0.d0, & - X_ovvo, nO*nV) + call gpu_dgemm(blas_handle, 'T','N', nO*nV, nV*nO, cholesky_mo_num, 1.d0, & + tcc%f(1,1,1), cholesky_mo_num, tcc2%f(1,1,1), cholesky_mo_num, 0.d0, & + X_ovvo%f(1,1,1,1), nO*nV) - deallocate(tcc, tcc2) + call gpu_synchronize() + + do a=1,nV + call gpu_stream_create(stream(a)) + enddo - !$omp parallel & - !$omp shared(nO,nV,r2,X_ovvo) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do do gam = 1, nV do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) - X_ovvo(u,beta,gam,v) - enddo - enddo + call gpu_set_stream(blas_handle, stream(beta)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nO, -1.d0, X_ovvo%f(1,beta,gam,1), & + nO*nV*nV, 1.d0, r2%f(1,1,beta,gam), nO, r2%f(1,1,beta,gam), nO) + call gpu_dgeam(blas_handle, 'T', 'N', nO, nO, -1.d0, X_ovvo%f(1,gam,beta,1), & + nO*nV*nV, 1.d0, r2%f(1,1,beta,gam), nO, r2%f(1,1,beta,gam), nO) enddo enddo - !$omp end do - !$omp do - do beta = 1, nV - do gam = 1, nV - do v = 1, nO - do u = 1, nO - r2%f(v,u,gam,beta) = r2%f(v,u,gam,beta) - X_ovvo(u,beta,gam,v) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel - deallocate(X_ovvo) + call gpu_set_stream(blas_handle, gpu_default_stream) + + do a=1,nV + call gpu_stream_destroy(stream(a)) + enddo + + call gpu_synchronize() + call gpu_deallocate(tcc) + call gpu_deallocate(tcc2) + call gpu_deallocate(X_ovvo) + !----- - call gpu_allocate(X_oovv,nO,nO,nV,nV) + type(gpu_double4) :: J1, K1 + type(gpu_double4) :: Y_voov, Z_ovov - call dgemm('N','N',nO*nO*nV,nV,nO, & - 1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & - t1%f , size(t1%f,1), & - 0d0, X_oovv%f, size(X_oovv%f,1) * size(X_oovv%f,2) * size(X_oovv%f,3)) + call gpu_allocate(J1,nO,nV,nV,nO) + call compute_J1_chol(nO,nV,t1%f,t2%f,d_cc_space_v_ovvo%f,d_cc_space_v_ovoo%f, & + d_cc_space_v_vvoo%f,J1%f) - !$omp parallel & - !$omp shared(nO,nV,r2,X_oovv) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do - do gam = 1, nV - do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) - X_oovv%f(u,v,beta,gam) - X_oovv%f(v,u,gam,beta) - enddo - enddo + call gpu_allocate(K1,nO,nV,nO,nV) + call compute_K1_chol(nO,nV,t1%f,t2%f,d_cc_space_v_ovoo%f,d_cc_space_v_vvoo%f, & + d_cc_space_v_ovov%f,K1%f) + + + call gpu_allocate(X_ovvo,nO,nV,nV,nO) + call gpu_allocate(Y_voov,nV,nO,nO,nV) + + do a=1,nV + call gpu_stream_create(stream(a)) + enddo + + do i=1, nO + do a=1, nV + call gpu_set_stream(blas_handle, stream(a)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, J1%f(1,a,1,i), & + nO*nV, -0.5d0, K1%f(1,a,i,1), nO*nV*nO, X_ovvo%f(1,1,a,i), nO) enddo enddo - !$omp end do - !$omp end parallel - call gpu_deallocate(X_oovv) - double precision, allocatable :: X_vovo(:,:,:,:), Y_oovo(:,:,:,:) - allocate(X_vovo(nV,nO,nV,nO)) - - !$omp parallel & - !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & - !$omp private(a,v,gam,i) & - !$omp default(none) - do i = 1, nO - !$omp do - do gam = 1, nV - do v = 1, nO - do a = 1, nV - X_vovo(a,v,gam,i) = cc_space_v_ovvo(v,a,gam,i) - enddo - enddo - enddo - !$omp end do nowait - enddo - !$omp end parallel - - allocate(Y_oovo(nO,nO,nV,nO)) - call dgemm('N','N',nO,nO*nV*nO,nV, & - 1d0, t1%f, size(t1%f,1), & - X_vovo, size(X_vovo,1), & - 0d0, Y_oovo, size(Y_oovo,1)) - - deallocate(X_vovo) - call gpu_allocate(X_oovv,nO,nO,nV,nV) - call dgemm('N','N',nO*nO*nV, nV, nO, & - 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & - t1%f , size(t1%f,1), & - 0d0, X_oovv%f, size(X_oovv%f,1) * size(X_oovv%f,2) * size(X_oovv%f,3)) - deallocate(Y_oovo) - - !$omp parallel & - !$omp shared(nO,nV,r2,X_oovv) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do - do gam = 1, nV - do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) - X_oovv%f(u,v,gam,beta) - X_oovv%f(v,u,beta,gam) - enddo - enddo + do gam=1, nV + call gpu_set_stream(blas_handle, stream(gam)) + do v=1, nO + call gpu_dgeam(blas_handle, 'T', 'T', nV, nO, 2.d0, t2%f(1,v,1,gam), & + nO*nO, -1.d0, t2%f(1,v,gam,1), nO*nO*nV, Y_voov%f(1,1,v,gam), nV) enddo enddo - !$omp end do - !$omp end parallel - call gpu_deallocate(X_oovv) + call gpu_allocate(Z_ovov,nO,nV,nO,nV) - double precision, allocatable :: J1(:,:,:,:) - allocate(J1(nO,nV,nV,nO)) - call compute_J1_chol(nO,nV,t1%f,t2%f,cc_space_v_ovvo,cc_space_v_ovoo, & - cc_space_v_vvoo,J1) - - double precision, allocatable :: K1(:,:,:,:) - allocate(K1(nO,nV,nO,nV)) - call compute_K1_chol(nO,nV,t1%f,t2%f,cc_space_v_ovoo,cc_space_v_vvoo, & - cc_space_v_ovov,K1) - - allocate(X_ovvo(nO,nV,nV,nO)) - !$omp parallel & - !$omp private(u,v,gam,beta,i,a) & - !$omp default(shared) - do i = 1, nO - !$omp do - do a = 1, nV - do beta = 1, nV - do u = 1, nO - X_ovvo(u,beta,a,i) = (J1(u,a,beta,i) - 0.5d0 * K1(u,a,i,beta)) - enddo - enddo - enddo - !$omp end do nowait + do a=1,nV + call gpu_stream_destroy(stream(a)) enddo - !$omp end parallel - deallocate(J1) - double precision, allocatable :: Y_voov(:,:,:,:) - allocate(Y_voov(nV,nO,nO,nV)) + call gpu_deallocate(J1) + call gpu_set_stream(blas_handle, gpu_default_stream) - !$omp parallel & - !$omp private(u,v,gam,beta,i,a) & - !$omp default(shared) - !$omp do - do gam = 1, nV - do v = 1, nO - do i = 1, nO - do a = 1, nV - Y_voov(a,i,v,gam) = 2d0 * t2%f(i,v,a,gam) - t2%f(i,v,gam,a) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel - double precision, allocatable :: Z_ovov(:,:,:,:) - allocate(Z_ovov(nO,nV,nO,nV)) + call gpu_dgemm(blas_handle, 'N','N', nO*nV,nO*nV,nV*nO, & + 1d0, X_ovvo%f(1,1,1,1), size(X_ovvo%f,1) * size(X_ovvo%f,2), & + Y_voov%f(1,1,1,1), size(Y_voov%f,1) * size(Y_voov%f,2), & + 0d0, Z_ovov%f(1,1,1,1), size(Z_ovov%f,1) * size(Z_ovov%f,2)) - call dgemm('N','N', nO*nV,nO*nV,nV*nO, & - 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & - Y_voov, size(Y_voov,1) * size(Y_voov,2), & - 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) + call gpu_synchronize() + call gpu_deallocate(Y_voov) + call gpu_deallocate(X_ovvo) - deallocate(X_ovvo,Y_voov) - - !$omp parallel & - !$omp shared(nO,nV,r2,Z_ovov) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do - do gam = 1, nV - do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel - - deallocate(Z_ovov) - - double precision, allocatable :: Y_ovov(:,:,:,:), X_ovov(:,:,:,:) - allocate(X_ovov(nO,nV,nO,nV)) - allocate(Y_ovov(nO,nV,nO,nV)) + type(gpu_double4) :: Y_ovov, X_ovov + call gpu_allocate(X_ovov,nO,nV,nO,nV) + call gpu_allocate(Y_ovov,nO,nV,nO,nV) +!TODO !$omp parallel & !$omp shared(nO,nV,K1,X_ovov,Y_ovov,t2) & !$omp private(u,a,i,beta,gam) & @@ -834,7 +779,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & do u = 1, nO do a = 1, nV do i = 1, nO - X_ovov(i,a,u,beta) = 0.5d0 * K1(u,a,i,beta) + X_ovov%f(i,a,u,beta) = 0.5d0 * K1%f(u,a,i,beta) enddo enddo enddo @@ -846,7 +791,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & do v = 1, nO do a = 1, nV do i = 1, nO - Y_ovov(i,a,v,gam) = t2%f(i,v,gam,a) + Y_ovov%f(i,a,v,gam) = t2%f(i,v,gam,a) enddo enddo enddo @@ -854,12 +799,12 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & !$omp end do !$omp end parallel - allocate(Z_ovov(nO,nV,nO,nV)) - call dgemm('T','N',nO*nV,nO*nV,nO*nV, & - 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & - Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & - 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) - deallocate(X_ovov, Y_ovov) + call gpu_dgemm(blas_handle, 'T','N',nO*nV,nO*nV,nO*nV, & + -1d0, X_ovov%f(1,1,1,1), size(X_ovov%f,1) * size(X_ovov%f,2), & + Y_ovov%f(1,1,1,1), size(Y_ovov%f,1) * size(Y_ovov%f,2), & + 1d0, Z_ovov%f(1,1,1,1), size(Z_ovov%f,1) * size(Z_ovov%f,2)) + + call gpu_synchronize() !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & @@ -870,16 +815,14 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & do beta = 1, nV do v = 1, nO do u = 1, nO - r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta) + r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) + Z_ovov%f(u,beta,v,gam) + Z_ovov%f(v,gam,u,beta) enddo enddo enddo enddo !$omp end do !$omp end parallel - deallocate(Z_ovov) - allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) !$omp parallel & !$omp shared(nO,nV,K1,X_ovov,Y_ovov,t2) & !$omp private(u,v,gam,beta,i,a) & @@ -889,7 +832,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & do i = 1, nO do gam = 1, nV do u = 1, nO - X_ovov(u,gam,i,a) = K1(u,a,i,gam) + X_ovov%f(u,gam,i,a) = K1%f(u,a,i,gam) enddo enddo enddo @@ -901,7 +844,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & do v = 1, nO do a = 1, nV do i = 1, nO - Y_ovov(i,a,v,beta) = t2%f(i,v,beta,a) + Y_ovov%f(i,a,v,beta) = t2%f(i,v,beta,a) enddo enddo enddo @@ -909,16 +852,19 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & !$omp end do !$omp end parallel - deallocate(K1) + call gpu_deallocate(K1) - allocate(Z_ovov(nO,nV,nO,nV)) - call dgemm('N','N',nO*nV,nO*nV,nO*nV, & - 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & - Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & - 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + call gpu_dgemm(blas_handle, 'N','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov%f(1,1,1,1), size(X_ovov%f,1) * size(X_ovov%f,2), & + Y_ovov%f(1,1,1,1), size(Y_ovov%f,1) * size(Y_ovov%f,2), & + 0d0, Z_ovov%f(1,1,1,1), size(Z_ovov%f,1) * size(Z_ovov%f,2)) - deallocate(X_ovov,Y_ovov) + call gpu_synchronize() + call gpu_deallocate(X_ovov) + call gpu_deallocate(Y_ovov) + + ! Change the sign for consistency with the code in spin orbitals !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & @@ -928,7 +874,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & do beta = 1, nV do v = 1, nO do u = 1, nO - r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam) + r2%f(u,v,beta,gam) = -r2%f(u,v,beta,gam) + Z_ovov%f(u,gam,v,beta) + Z_ovov%f(v,beta,u,gam) enddo enddo enddo @@ -936,9 +882,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & !$omp end do !$omp end parallel - deallocate(Z_ovov) - - ! Change the sign for consistency with the code in spin orbitals + call gpu_deallocate(Z_ovov) max_r2 = 0d0 !$omp parallel & @@ -951,7 +895,6 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & do a = 1, nV do j = 1, nO do i = 1, nO - r2%f(i,j,a,b) = -r2%f(i,j,a,b) max_r2_local = max(r2%f(i,j,a,b), max_r2_local) enddo enddo From f09e91cb2296b3c1fb5cf854dc06372c879c4104 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 8 Jul 2024 12:44:32 +0200 Subject: [PATCH 114/131] Working on CCSD GPU --- src/ccsd/ccsd_space_orb_sub.irp.f | 1 + src/ccsd/ccsd_space_orb_sub_chol.irp.f | 874 +++++++++++-------------- 2 files changed, 392 insertions(+), 483 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index e97c2325..d8131a9c 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -215,6 +215,7 @@ subroutine run_ccsd_space_orb call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & d_cc_space_v_oovv, d_cc_space_v_vooo, d_cc_space_v_oooo, d_cc_space_v_oovo, d_cc_space_v_ovvo, d_cc_space_v_ovoo, & d_cc_space_v_ovov, d_cc_space_v_vvoo, d_cc_space_v_oo_chol, d_cc_space_v_ov_chol, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol, & + d_cc_space_f_vo, & r2, max_r2) else call compute_H_oo(nO,nV,t1%f,t2%f,tau%f,H_oo%f) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index abb9909b..a185df13 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -394,13 +394,14 @@ end subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & d_cc_space_v_oovv, d_cc_space_v_vooo, d_cc_space_v_oooo, d_cc_space_v_oovo, d_cc_space_v_ovvo, d_cc_space_v_ovoo, & d_cc_space_v_ovov, d_cc_space_v_vvoo, d_cc_space_v_oo_chol, d_cc_space_v_ov_chol, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol, & + d_cc_space_f_vo, & r2,max_r2) use gpu implicit none ! in integer, intent(in) :: nO, nV - type(gpu_double2), intent(in) :: t1, H_oo, H_vv + type(gpu_double2), intent(in) :: t1, H_oo, H_vv, d_cc_space_f_vo type(gpu_double4), intent(in) :: t2, tau, d_cc_space_v_oovv type(gpu_double4), intent(in) :: d_cc_space_v_vooo, d_cc_space_v_oooo type(gpu_double4), intent(in) :: d_cc_space_v_vvoo, d_cc_space_v_oovo @@ -504,7 +505,8 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & type(gpu_double2) :: g_occ, g_vir call gpu_allocate(g_vir,nV,nV) call gpu_allocate(g_occ,nO,nO) - call compute_g_vir_chol(nO,nV,t1%f,t2%f,H_vv%f,g_vir%f) + call compute_g_vir_chol(nO,nV,t1,t2,H_vv,d_cc_space_f_vo, & + d_cc_space_v_ov_chol, d_cc_space_v_vv_chol, g_vir) call compute_g_occ_chol(nO,nV,t1%f,t2%f,H_oo%f,g_occ%f) type(gpu_double4) :: Y_oovv @@ -525,6 +527,10 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & t1%f(1,1) , size(t1%f,1), & 1d0, Y_oovv%f(1,1,1,1), size(Y_oovv%f,1) * size(Y_oovv%f,2) * size(Y_oovv%f,3)) + + call gpu_dgeam(blas_handle, 'N', 'N', nO*nO, nV*nV, 1.d0, Y_oovv%f(1,1,1,1), & + nO*nO, 1.d0, r2%f(1,1,1,1), nO*nO, r2%f(1,1,1,1), nO*nO) + call gpu_synchronize() call gpu_deallocate(X_oovv) @@ -534,32 +540,13 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & type(gpu_double4) :: X_vovo, Y_oovo call gpu_allocate(X_vovo,nV,nO,nV,nO) -! !$omp parallel & -! !$omp shared(nO,nV,r2,Y_oovv) & -! !$omp private(u,v,gam,beta) & -! !$omp default(none) -! !$omp do -! do gam = 1, nV -! do beta = 1, nV -! do v = 1, nO -! do u = 1, nO -! r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) + Y_oovv%f(u,v,beta,gam) + Y_oovv%f(v,u,gam,beta) -! enddo -! enddo -! enddo -! enddo -! !$omp end do -! !$omp end parallel - do a=1,nV call gpu_stream_create(stream(a)) enddo do gam = 1, nV + call gpu_set_stream(blas_handle, stream(gam)) do beta = 1, nV - call gpu_set_stream(blas_handle, stream(beta)) - call gpu_dgeam(blas_handle, 'N', 'N', nO, nO, 1.d0, Y_oovv%f(1,1,beta,gam), & - nO, 1.d0, r2%f(1,1,beta,gam), nO, r2%f(1,1,beta,gam), nO) call gpu_dgeam(blas_handle, 'N', 'T', nO, nO, 1.d0, r2%f(1,1,beta,gam), & nO, 1.d0, Y_oovv%f(1,1,gam,beta), nO, r2%f(1,1,beta,gam), nO) enddo @@ -579,34 +566,33 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & call gpu_set_stream(blas_handle, gpu_default_stream) - type(gpu_double4) :: X_vovv - call gpu_allocate(X_vovv,nV,nO,nV,block_size) call gpu_allocate(Y_oovo,nO,nO,nV,nO) + !$OMP PARALLEL PRIVATE(blas, iblock, gam, X_vovv) + call gpu_blas_create(blas) + type(gpu_double4) :: X_vovv + call gpu_allocate(X_vovv,nV,nO,nV,block_size) + !$OMP DO do iblock = 1, nV, block_size do gam = iblock, min(nV, iblock+block_size-1) - call gpu_stream_create(stream(gam)) - call gpu_set_stream(blas_handle, stream(gam)) - call gpu_dgemm(blas_handle, 'T','N',nV, nO*nV, cholesky_mo_num, 1.d0, & + call gpu_dgemm(blas, 'T','N',nV, nO*nV, cholesky_mo_num, 1.d0, & d_cc_space_v_vv_chol%f(1,1,gam), cholesky_mo_num, d_cc_space_v_ov_chol%f(1,1,1), & cholesky_mo_num, 0.d0, X_vovv%f(1,1,1,gam-iblock+1), nV) enddo - do gam = iblock, min(nV, iblock+block_size-1) - call gpu_stream_destroy(stream(gam)) - enddo - call gpu_synchronize() - - call gpu_set_stream(blas_handle, gpu_default_stream) - call gpu_dgemm(blas_handle, 'N','N', nO, & + call gpu_dgemm(blas, 'N','N', nO, & nO*nV*min(block_size, nV-iblock+1),nV, & 1.d0, t1%f(1,1) , size(t1%f,1), & X_vovv%f(1,1,1,1), size(X_vovv%f,1), & 0d0, Y_oovv%f(1,1,1,iblock), size(Y_oovv%f,1)) - enddo + !$OMP END DO + + call gpu_blas_destroy(blas) + call gpu_deallocate(X_vovv) + !$OMP END PARALLEL call gpu_dgemm(blas_handle, 'N','N',nO,nO*nV*nO,nV, & 1d0, t1%f(1,1), size(t1%f,1), & @@ -619,47 +605,27 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & 1d0, Y_oovv%f(1,1,1,1), size(Y_oovv%f,1) * size(Y_oovv%f,2) * size(Y_oovv%f,3)) call gpu_synchronize() - call gpu_deallocate(X_vovv) call gpu_deallocate(X_vovo) call gpu_deallocate(Y_oovo) -! !$omp parallel & -! !$omp shared(nO,nV,r2,Y_oovv) & -! !$omp private(u,v,gam,beta) & -! !$omp default(none) -! !$omp do -! do gam = 1, nV -! do beta = 1, nV -! do v = 1, nO -! do u = 1, nO -! r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) + Y_oovv%f(u,v,gam,beta) + Y_oovv%f(v,u,beta,gam) -! enddo -! enddo -! enddo -! enddo -! !$omp end do -! !$omp end parallel - do a=1,nV call gpu_stream_create(stream(a)) enddo do gam = 1, nV + call gpu_set_stream(blas_handle, stream(gam)) do beta = 1, nV - call gpu_set_stream(blas_handle, stream(beta)) call gpu_dgeam(blas_handle, 'T', 'N', nO, nO, 1.d0, Y_oovv%f(1,1,beta,gam), & nO, 1.d0, r2%f(1,1,beta,gam), nO, r2%f(1,1,beta,gam), nO) - call gpu_dgeam(blas_handle, 'N', 'N', nO, nO, 1.d0, r2%f(1,1,beta,gam), & - nO, 1.d0, Y_oovv%f(1,1,gam,beta), nO, r2%f(1,1,beta,gam), nO) + enddo + do j=1,nO + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, r2%f(1,j,1,gam), & + nO*nO, 1.d0, Y_oovv%f(1,j,gam,1), nO*nO*nV, r2%f(1,j,1,gam), nO*nO) enddo enddo call gpu_set_stream(blas_handle, gpu_default_stream) - do a=1,nV - call gpu_stream_destroy(stream(a)) - enddo - call gpu_synchronize() call gpu_deallocate(Y_oovv) @@ -684,15 +650,14 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & call gpu_synchronize() - do a=1,nV - call gpu_stream_create(stream(a)) - enddo do gam = 1, nV + call gpu_set_stream(blas_handle, stream(gam)) + do j=1,nO + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, -1.d0, X_ovvo%f(1,1,gam,j), & + nO, 1.d0, r2%f(1,j,1,gam), nO*nO, r2%f(1,j,1,gam), nO*nO) + enddo do beta = 1, nV - call gpu_set_stream(blas_handle, stream(beta)) - call gpu_dgeam(blas_handle, 'N', 'N', nO, nO, -1.d0, X_ovvo%f(1,beta,gam,1), & - nO*nV*nV, 1.d0, r2%f(1,1,beta,gam), nO, r2%f(1,1,beta,gam), nO) call gpu_dgeam(blas_handle, 'T', 'N', nO, nO, -1.d0, X_ovvo%f(1,gam,beta,1), & nO*nV*nV, 1.d0, r2%f(1,1,beta,gam), nO, r2%f(1,1,beta,gam), nO) enddo @@ -700,58 +665,41 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & call gpu_set_stream(blas_handle, gpu_default_stream) - do a=1,nV - call gpu_stream_destroy(stream(a)) - enddo - - call gpu_synchronize() + call gpu_synchronize call gpu_deallocate(tcc) call gpu_deallocate(tcc2) call gpu_deallocate(X_ovvo) - !----- type(gpu_double4) :: J1, K1 type(gpu_double4) :: Y_voov, Z_ovov + call gpu_allocate(J1,nO,nV,nV,nO) - call compute_J1_chol(nO,nV,t1%f,t2%f,d_cc_space_v_ovvo%f,d_cc_space_v_ovoo%f, & - d_cc_space_v_vvoo%f,J1%f) + call compute_J1_chol(nO,nV,t1,t2,d_cc_space_v_ovvo,d_cc_space_v_ovoo, & + d_cc_space_v_vvoo,d_cc_space_v_vo_chol,d_cc_space_v_vv_chol,J1) call gpu_allocate(K1,nO,nV,nO,nV) - call compute_K1_chol(nO,nV,t1%f,t2%f,d_cc_space_v_ovoo%f,d_cc_space_v_vvoo%f, & - d_cc_space_v_ovov%f,K1%f) + call compute_K1_chol(nO,nV,t1,t2,d_cc_space_v_ovoo,d_cc_space_v_vvoo, & + d_cc_space_v_ovov,d_cc_space_v_ov_chol,d_cc_space_v_vv_chol,K1) call gpu_allocate(X_ovvo,nO,nV,nV,nO) call gpu_allocate(Y_voov,nV,nO,nO,nV) - do a=1,nV - call gpu_stream_create(stream(a)) - enddo - - do i=1, nO - do a=1, nV - call gpu_set_stream(blas_handle, stream(a)) + do a=1, nV + call gpu_set_stream(blas_handle, stream(a)) + do i=1, nO call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, J1%f(1,a,1,i), & nO*nV, -0.5d0, K1%f(1,a,i,1), nO*nV*nO, X_ovvo%f(1,1,a,i), nO) - enddo - enddo - - do gam=1, nV - call gpu_set_stream(blas_handle, stream(gam)) - do v=1, nO - call gpu_dgeam(blas_handle, 'T', 'T', nV, nO, 2.d0, t2%f(1,v,1,gam), & - nO*nO, -1.d0, t2%f(1,v,gam,1), nO*nO*nV, Y_voov%f(1,1,v,gam), nV) + call gpu_dgeam(blas_handle, 'T', 'T', nV, nO, 2.d0, t2%f(1,i,1,a), & + nO*nO, -1.d0, t2%f(1,i,a,1), nO*nO*nV, Y_voov%f(1,1,i,a), nV) enddo enddo call gpu_allocate(Z_ovov,nO,nV,nO,nV) - do a=1,nV - call gpu_stream_destroy(stream(a)) - enddo - + call gpu_synchronize() call gpu_deallocate(J1) call gpu_set_stream(blas_handle, gpu_default_stream) @@ -769,35 +717,20 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & call gpu_allocate(X_ovov,nO,nV,nO,nV) call gpu_allocate(Y_ovov,nO,nV,nO,nV) -!TODO - !$omp parallel & - !$omp shared(nO,nV,K1,X_ovov,Y_ovov,t2) & - !$omp private(u,a,i,beta,gam) & - !$omp default(none) - !$omp do - do beta = 1, nV - do u = 1, nO - do a = 1, nV - do i = 1, nO - X_ovov%f(i,a,u,beta) = 0.5d0 * K1%f(u,a,i,beta) - enddo - enddo + do a=1, nV + call gpu_set_stream(blas_handle, stream(a)) + do j=1,nO + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, t2%f(1,j,1,a), & + nO*nO, 0.d0, t2%f(1,j,1,a), nO*nO, Y_ovov%f(1,a,j,1), nO*nV*nO) + enddo + do beta=1, nV + call gpu_dgeam(blas_handle, 'T', 'T', nO, nO, 0.5d0, K1%f(1,a,1,beta), & + nO*nV, 0.d0, K1%f(1,a,1,beta), nO*nV, X_ovov%f(1,a,1,beta), nO*nV) enddo enddo - !$omp end do nowait + call gpu_set_stream(blas_handle, gpu_default_stream) - !$omp do - do gam = 1, nV - do v = 1, nO - do a = 1, nV - do i = 1, nO - Y_ovov%f(i,a,v,gam) = t2%f(i,v,gam,a) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel + call gpu_synchronize() call gpu_dgemm(blas_handle, 'T','N',nO*nV,nO*nV,nO*nV, & -1d0, X_ovov%f(1,1,1,1), size(X_ovov%f,1) * size(X_ovov%f,2), & @@ -806,51 +739,23 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & call gpu_synchronize() - !$omp parallel & - !$omp shared(nO,nV,r2,Z_ovov) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do - do gam = 1, nV - do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2%f(u,v,beta,gam) = r2%f(u,v,beta,gam) + Z_ovov%f(u,beta,v,gam) + Z_ovov%f(v,gam,u,beta) - enddo - enddo + do gam=1, nV + call gpu_set_stream(blas_handle, stream(gam)) + do j=1,nO + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, r2%f(1,j,1,gam), & + nO*nO, 1.d0, Z_ovov%f(1,1,j,gam), nO, r2%f(1,j,1,gam), nO*nO) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, K1%f(1,1,j,gam), & + nO, 0.d0, K1%f(1,1,j,gam), nO, X_ovov%f(1,gam,j,1), nO*nV*nO) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nO, 1.d0, t2%f(1,j,1,gam), & + nO*nO, 0.d0, t2%f(1,j,1,gam), nO*nO, Y_ovov%f(1,gam,j,1), nO*nV*nO) + enddo + do beta=1, nV + call gpu_dgeam(blas_handle, 'N', 'T', nO, nO, 1.d0, r2%f(1,1,beta,gam), & + nO, 1.d0, Z_ovov%f(1,gam,1,beta), nO*nV, r2%f(1,1,beta,gam), nO) enddo enddo - !$omp end do - !$omp end parallel - !$omp parallel & - !$omp shared(nO,nV,K1,X_ovov,Y_ovov,t2) & - !$omp private(u,v,gam,beta,i,a) & - !$omp default(none) - !$omp do - do a = 1, nV - do i = 1, nO - do gam = 1, nV - do u = 1, nO - X_ovov%f(u,gam,i,a) = K1%f(u,a,i,gam) - enddo - enddo - enddo - enddo - !$omp end do nowait - - !$omp do - do beta = 1, nV - do v = 1, nO - do a = 1, nV - do i = 1, nO - Y_ovov%f(i,a,v,beta) = t2%f(i,v,beta,a) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel + call gpu_set_stream(blas_handle, gpu_default_stream) call gpu_deallocate(K1) @@ -865,22 +770,17 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & call gpu_deallocate(Y_ovov) ! Change the sign for consistency with the code in spin orbitals - !$omp parallel & - !$omp shared(nO,nV,r2,Z_ovov) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do do gam = 1, nV + call gpu_set_stream(blas_handle, stream(gam)) + do j=1,nO + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, r2%f(1,j,1,gam), & + nO*nO, -1.d0, Z_ovov%f(1,gam,j,1), nO*nV*nO, r2%f(1,j,1,gam), nO*nO) + enddo do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2%f(u,v,beta,gam) = -r2%f(u,v,beta,gam) + Z_ovov%f(u,gam,v,beta) + Z_ovov%f(v,beta,u,gam) - enddo - enddo + call gpu_dgeam(blas_handle, 'N', 'T', nO, nO, -1.d0, r2%f(1,1,beta,gam), & + nO, 1.d0, Z_ovov%f(1,beta,1,gam), nO*nV, r2%f(1,1,beta,gam), nO) enddo enddo - !$omp end do - !$omp end parallel call gpu_deallocate(Z_ovov) @@ -929,34 +829,42 @@ subroutine compute_A1_chol(nO,nV,t1,t2,tau,d_cc_space_v_vooo, & ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & - call dgemm('N','N', nO, nO*nO*nO, nV, & - 1d0, t1%f , size(t1%f,1), & - d_cc_space_v_vooo%f, size(d_cc_space_v_vooo%f,1), & - 0d0, Y_oooo%f, size(Y_oooo%f,1)) + call gpu_dgemm(blas_handle, 'N','N', nO, nO*nO*nO, nV, & + 1d0, t1%f(1,1) , size(t1%f,1), & + d_cc_space_v_vooo%f(1,1,1,1), size(d_cc_space_v_vooo%f,1), & + 0d0, Y_oooo%f(1,1,1,1), size(Y_oooo%f,1)) - !$omp parallel & - !$omp private(u,v,i,j) & - !$omp default(shared) - !$omp do collapse(2) - do j = 1, nO - do i = 1, nO - do v = 1, nO - do u = 1, nO - A1%f(u,v,i,j) = d_cc_space_v_oooo%f(u,v,i,j) + Y_oooo%f(v,u,j,i) + Y_oooo%f(u,v,i,j) - enddo - enddo - enddo + type(gpu_stream) :: stream(nO) + + do i=1, nO + call gpu_stream_create(stream(i)) + enddo + + call gpu_synchronize() + + do j = 1, nO + call gpu_set_stream(blas_handle, stream(j)) + do i = 1, nO + call gpu_dgeam(blas_handle, 'N', 'T', nO, nO, 1.d0, d_cc_space_v_oooo%f(1,1,i,j), & + nO, 1.d0, Y_oooo%f(1,1,j,i), nO, A1%f(1,1,i,j), nO) + enddo + call gpu_dgeam(blas_handle, 'N', 'N', nO, nO*nO, 1.d0, A1%f(1,1,1,j), & + nO, 1.d0, Y_oooo%f(1,1,1,j), nO, A1%f(1,1,1,j), nO) + enddo + + call gpu_set_stream(blas_handle, gpu_default_stream) + do i=1, nO + call gpu_stream_destroy(stream(i)) enddo - !$omp end do - !$omp end parallel call gpu_deallocate(Y_oooo) ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) - call dgemm('N','N', nO*nO, nO*nO, nV*nV, & - 1d0, tau%f , size(tau%f,1) * size(tau%f,2), & - d_cc_space_v_vvoo%f, size(d_cc_space_v_vvoo%f,1) * size(d_cc_space_v_vvoo%f,2), & - 1d0, A1%f , size(A1%f,1) * size(A1%f,2)) + call gpu_dgemm(blas_handle, 'N','N', nO*nO, nO*nO, nV*nV, & + 1d0, tau%f(1,1,1,1), size(tau%f,1) * size(tau%f,2), & + d_cc_space_v_vvoo%f(1,1,1,1), size(d_cc_space_v_vvoo%f,1) * size(d_cc_space_v_vvoo%f,2), & + 1d0, A1%f(1,1,1,1), size(A1%f,1) * size(A1%f,2)) + call gpu_synchronize() end @@ -998,364 +906,364 @@ end ! g_vir -subroutine compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) +subroutine compute_g_vir_chol(nO,nV,t1,t2,H_vv,d_cc_space_f_vo, & + d_cc_space_v_ov_chol, d_cc_space_v_vv_chol, g_vir) use gpu implicit none integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV), H_vv(nV, nV) - double precision, intent(in) :: t2(nO, nO, nV, nV) - double precision, intent(out) :: g_vir(nV, nV) + type(gpu_double2), intent(in) :: t1, H_vv, d_cc_space_f_vo + type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol, d_cc_space_v_vv_chol + type(gpu_double4), intent(in) :: t2 + type(gpu_double2), intent(out) :: g_vir integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam - call dgemm('N','N',nV,nV,nO, & - -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & - t1 , size(t1,1), & - 0d0, g_vir, size(g_vir,1)) + type(gpu_stream) :: stream(max(nO,4)) - double precision, allocatable :: tmp_k(:), tmp_vo(:,:,:), tmp_vo2(:,:,:) - allocate(tmp_k(cholesky_mo_num)) - call dgemm('N','N', cholesky_mo_num, 1, nO*nV, 1.d0, & - cc_space_v_ov_chol, cholesky_mo_num, t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) - - call dgemm('T','N', nV*nV, 1, cholesky_mo_num, 2.d0, & - cc_space_v_vv_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & - g_vir, nV*nV) - deallocate(tmp_k) - - allocate(tmp_vo(cholesky_mo_num,nV,nO)) - call dgemm('N','T',cholesky_mo_num*nV, nO, nV, 1.d0, & - cc_space_v_vv_chol, cholesky_mo_num*nV, t1, nO, 0.d0, tmp_vo, cholesky_mo_num*nV) - - allocate(tmp_vo2(cholesky_mo_num,nO,nV)) - do beta=1,nV - do i=1,nO - do k=1,cholesky_mo_num - tmp_vo2(k,i,beta) = -tmp_vo(k,beta,i) - enddo - enddo - enddo - deallocate(tmp_vo) - - do beta = 1, nV - do a = 1, nV - g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta) - enddo + do i=1,max(nO,4) + call gpu_stream_create(stream(i)) enddo - call dgemm('T','N', nV, nV, nO*cholesky_mo_num, 1.d0, & - cc_space_v_ov_chol, cholesky_mo_num*nO, & - tmp_vo2, cholesky_mo_num*nO, 1.d0, g_vir, nV) + call gpu_set_stream(blas_handle, stream(1)) + call gpu_dgemm(blas_handle, 'N','N',nV,nV,nO, & + -1d0, d_cc_space_f_vo%f(1,1) , size(d_cc_space_f_vo%f,1), & + t1%f(1,1) , size(t1%f,1), & + 0d0, g_vir%f(1,1), size(g_vir%f,1)) + + type(gpu_double1) :: tmp_k + type(gpu_double3) :: tmp_vo, tmp_vo2 + + call gpu_allocate(tmp_k,cholesky_mo_num) + + call gpu_set_stream(blas_handle, stream(2)) + call gpu_dgemm(blas_handle, 'N','N', cholesky_mo_num, 1, nO*nV, 1.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num, t1%f(1,1), nO*nV, 0.d0, tmp_k%f(1), cholesky_mo_num) + + call gpu_dgemm(blas_handle, 'T','N', nV*nV, 1, cholesky_mo_num, 2.d0, & + d_cc_space_v_vv_chol%f(1,1,1), cholesky_mo_num, tmp_k%f(1), cholesky_mo_num, 1.d0, & + g_vir%f(1,1), nV*nV) + + call gpu_set_stream(blas_handle, stream(3)) + call gpu_allocate(tmp_vo,cholesky_mo_num,nV,nO) + + call gpu_dgemm(blas_handle, 'N','T',cholesky_mo_num*nV, nO, nV, 1.d0, & + d_cc_space_v_vv_chol%f(1,1,1), cholesky_mo_num*nV, t1%f(1,1), nO, 0.d0, tmp_vo%f(1,1,1), cholesky_mo_num*nV) + + call gpu_allocate(tmp_vo2,cholesky_mo_num,nO,nV) + + call gpu_synchronize() + call gpu_deallocate(tmp_k) + + do i=1,nO + call gpu_set_stream(blas_handle, stream(i)) + call gpu_dgeam(blas_handle, 'N', 'N', cholesky_mo_num, nV, -1.d0, tmp_vo%f(1,1,i), & + cholesky_mo_num, 0.d0, tmp_vo%f(1,1,i), cholesky_mo_num, tmp_vo2%f(1,i,1), cholesky_mo_num*nO) + enddo + + call gpu_set_stream(blas_handle, gpu_default_stream) + + do i=1,max(nO,4) + call gpu_stream_destroy(stream(i)) + enddo + call gpu_deallocate(tmp_vo) + + call gpu_dgeam(blas_handle, 'N', 'N', nV, nV, 1.d0, g_vir%f(1,1), & + nV, 1.d0, H_vv%f(1,1), nV, g_vir%f(1,1), nV) + + call gpu_dgemm(blas_handle, 'T','N', nV, nV, nO*cholesky_mo_num, 1.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num*nO, & + tmp_vo2%f(1,1,1), cholesky_mo_num*nO, 1.d0, g_vir%f(1,1), nV) + + call gpu_synchronize() + call gpu_deallocate(tmp_vo2) end ! J1 -subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvoo,J1) +subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvoo,d_cc_space_v_vo_chol,d_cc_space_v_vv_chol,J1) use gpu implicit none - integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV) - double precision, intent(in) :: t2(nO, nO, nV, nV) - double precision, intent(in) :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO) - double precision, intent(in) :: v_vvoo(nV,nV,nO,nO) - double precision, intent(out) :: J1(nO, nV, nV, nO) + integer, intent(in) :: nO,nV + type(gpu_double2), intent(in) :: t1 + type(gpu_double4), intent(in) :: t2, v_ovvo, v_ovoo, v_vvoo + type(gpu_double4), intent(out) :: J1 + type(gpu_double3), intent(out) :: d_cc_space_v_vo_chol,d_cc_space_v_vv_chol integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam - double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) - allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) + type(gpu_double4) :: X_ovoo, Y_ovov - !$omp parallel & - !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & - !$omp private(i,j,a,u,beta) & - !$omp default(none) - do i = 1, nO - !$omp do - do beta = 1, nV - do a = 1, nV - do u = 1, nO - J1(u,a,beta,i) = v_ovvo(u,a,beta,i) - enddo - enddo - enddo - !$omp end do nowait - enddo + call gpu_allocate(X_ovoo,nO,nV,nO,nO) - !$omp do collapse(2) - do j = 1, nO - do i = 1, nO - do a = 1, nV - do u = 1, nO - X_ovoo(u,a,i,j) = v_ovoo(u,a,j,i) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel + type(gpu_stream) :: stream(nV) - call dgemm('N','N',nO*nV*nO,nV,nO, & - -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & - t1 , size(t1,1), & - 0d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2) * size(Y_ovov,3)) - - !$omp parallel & - !$omp shared(nO,nV,J1,Y_ovov) & - !$omp private(i,beta,a,u) & - !$omp default(none) - do i = 1, nO - !$omp do - do beta = 1, nV - do a = 1, nV - do u = 1, nO - J1(u,a,beta,i) = J1(u,a,beta,i) + Y_ovov(u,a,i,beta) - enddo - enddo - enddo - !$omp end do nowait - enddo - !$omp end parallel - deallocate(X_ovoo) - - double precision, allocatable :: tmp_cc(:,:,:), J1_tmp(:,:,:,:) - allocate(tmp_cc(cholesky_mo_num,nV,nO), J1_tmp(nV,nO,nV,nO)) - - call dgemm('N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & - cc_space_v_vv_chol, cholesky_mo_num*nV, & - t1, nO, & - 0.d0, tmp_cc, cholesky_mo_num*nV) - - call dgemm('T','N', nV*nO, nV*nO, cholesky_mo_num, 1.d0, & - tmp_cc, cholesky_mo_num, cc_space_v_vo_chol, cholesky_mo_num, & - 0.d0, J1_tmp, nV*nO) - - deallocate(tmp_cc) do i=1,nO - do b=1,nV - do a=1,nV - do u=1,nO - J1(u,a,b,i) = J1(u,a,b,i) + J1_tmp(b,u,a,i) - enddo - enddo - enddo - enddo - - deallocate(J1_tmp) - - !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & - double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:) - allocate(X_voov(nV,nO,nO,nV), Z_ovvo(nO,nV,nV,nO)) - !$omp parallel & - !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & - !$omp private(i,beta,a,u,b,j) & - !$omp default(none) - !$omp do - do b = 1, nV - do j = 1, nO - do beta = 1, nV - do u = 1, nO - Y_ovov(u,beta,j,b) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) - enddo - enddo - enddo - enddo - !$omp end do nowait - - !$omp do - do b = 1, nV - do j = 1, nO - do i = 1, nO - do a = 1, nV - X_voov(a,i,j,b) = v_vvoo(a,b,i,j) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel - - call dgemm('N','T',nO*nV,nV*nO,nO*nV, & - -1d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & - X_voov, size(X_voov,1) * size(X_voov,2), & - 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) - deallocate(X_voov) - - double precision, allocatable :: X_ovvo(:,:,:,:), Y_vovo(:,:,:,:) - allocate(X_ovvo(nO,nV,nV,nO),Y_vovo(nV,nO,nV,nO)) - !$omp parallel & - !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & - !$omp private(i,beta,a,u,j,b) & - !$omp default(none) - do i = 1, nO - !$omp do - do beta = 1, nV - do a = 1, nV - do u = 1, nO - J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) - enddo - enddo - enddo - !$omp end do nowait - enddo - - !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) - do j = 1, nO - !$omp do - do b = 1, nV - do i = 1, nO - do a = 1, nV - Y_vovo(a,i,b,j) = 0.5d0 * (2d0 * v_vvoo(a,b,i,j) - v_vvoo(b,a,i,j)) - enddo - enddo - enddo - !$omp end do nowait + call gpu_stream_create(stream(i)) enddo do j = 1, nO - !$omp do - do b = 1, nV - do beta = 1, nV - do u = 1, nO - X_ovvo(u,beta,b,j) = t2(u,j,beta,b) - enddo - enddo + call gpu_set_stream(blas_handle, stream(j)) + do i = 1, nO + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, v_ovoo%f(1,1,j,i), & + nO, 0.d0, X_ovoo%f(1,1,i,j), nO, X_ovoo%f(1,1,i,j), nO) enddo - !$omp end do nowait enddo - !$omp end parallel - call dgemm('N','T',nO*nV,nV*nO,nV*nO, & - 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & - Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & - 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + call gpu_set_stream(blas_handle, gpu_default_stream) + + do i=1,nO + call gpu_stream_destroy(stream(i)) + enddo + + call gpu_allocate(Y_ovov,nO,nV,nO,nV) + + call gpu_dgemm(blas_handle, 'N','N',nO*nV*nO,nV,nO, & + -1d0, X_ovoo%f(1,1,1,1), size(X_ovoo%f,1) * size(X_ovoo%f,2) * size(X_ovoo%f,3), & + t1%f(1,1) , size(t1%f,1), & + 0d0, Y_ovov%f(1,1,1,1), size(Y_ovov%f,1) * size(Y_ovov%f,2) * size(Y_ovov%f,3)) + + + call gpu_copy(v_ovvo, J1) + + call gpu_synchronize() + + do a=1,nV + call gpu_stream_create(stream(a)) + enddo - !$omp parallel & - !$omp shared(nO,nV,J1,Z_ovvo) & - !$omp private(i,beta,a,u) & - !$omp default(none) do i = 1, nO - !$omp do do beta = 1, nV - do a = 1, nV - do u = 1, nO - J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) - enddo - enddo + call gpu_set_stream(blas_handle, stream(beta)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, J1%f(1,1,beta,i), & + nO, 1.d0, Y_ovov%f(1,1,i,beta), nO, J1%f(1,1,beta,i), nO) enddo - !$omp end do nowait enddo - !$omp end parallel - deallocate(X_ovvo,Z_ovvo,Y_ovov) + call gpu_allocate(tmp_cc,cholesky_mo_num,nV,nO) + call gpu_allocate(J1_tmp,nV,nO,nV,nO) + + call gpu_set_stream(blas_handle, gpu_default_stream) + + type(gpu_double4) :: J1_tmp + type(gpu_double3) :: tmp_cc + + call gpu_dgemm(blas_handle, 'N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & + d_cc_space_v_vv_chol%f(1,1,1), cholesky_mo_num*nV, & + t1%f(1,1), nO, & + 0.d0, tmp_cc%f(1,1,1), cholesky_mo_num*nV) + + call gpu_dgemm(blas_handle, 'T','N', nV*nO, nV*nO, cholesky_mo_num, 1.d0, & + tmp_cc%f(1,1,1), cholesky_mo_num, d_cc_space_v_vo_chol%f(1,1,1), cholesky_mo_num, & + 0.d0, J1_tmp%f(1,1,1,1), nV*nO) + + + call gpu_deallocate(X_ovoo) + + call gpu_synchronize() + call gpu_deallocate(tmp_cc) + + do i = 1, nO + do a = 1, nV + call gpu_set_stream(blas_handle, stream(a)) + call gpu_dgeam(blas_handle, 'N', 'T', nO, nV, 1.d0, J1%f(1,a,1,i), & + nO*nV, 1.d0, J1_tmp%f(1,1,a,i), nV, J1%f(1,a,1,i), nO*nV) + enddo + enddo + + type(gpu_double4) :: X_voov, Z_ovvo + + call gpu_allocate(X_voov,nV,nO,nO,nV) + call gpu_allocate(Z_ovvo,nO,nV,nV,nO) + + do j = 1, nO + do beta = 1, nV + call gpu_set_stream(blas_handle, stream(beta)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 0.5d0, t2%f(1,j,1,beta), & + nO*nO, t1%f(j,beta), t1%f(1,1), nO, Y_ovov%f(1,beta,j,1), nO*nV*nO) + enddo + enddo + + do b = 1, nV + call gpu_set_stream(blas_handle, stream(b)) + call gpu_dgeam(blas_handle, 'N', 'N', nV, nO*nO, 1.d0, v_vvoo%f(1,b,1,1), & + nV*nV, 0.d0, X_voov%f(1,1,1,b), nV, X_voov%f(1,1,1,b), nV) + enddo + + call gpu_set_stream(blas_handle, gpu_default_stream) + + call gpu_synchronize() + call gpu_deallocate(J1_tmp) + + call gpu_dgemm(blas_handle, 'N','T',nO*nV,nV*nO,nO*nV, & + -1d0, Y_ovov%f(1,1,1,1), size(Y_ovov%f,1) * size(Y_ovov%f,2), & + X_voov%f(1,1,1,1), size(X_voov%f,1) * size(X_voov%f,2), & + 0d0, Z_ovvo%f(1,1,1,1), size(Z_ovvo%f,1) * size(Z_ovvo%f,2)) + + call gpu_synchronize() + + do i = 1, nO + do a = 1, nV + call gpu_set_stream(blas_handle, stream(a)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, J1%f(1,a,1,i), & + nO*nV, 1.d0, Z_ovvo%f(1,1,a,i), nO, J1%f(1,a,1,i), nO*nV) + enddo + enddo + + type(gpu_double4) :: X_ovvo, Y_vovo + call gpu_allocate(Y_vovo,nV,nO,nV,nO) + + do j = 1, nO + do i = 1, nO + call gpu_set_stream(blas_handle, stream(i)) + call gpu_dgeam(blas_handle, 'N', 'T', nV, nV, 1.d0, v_vvoo%f(1,1,i,j), & + nV, -0.5d0, v_vvoo%f(1,1,i,j), nV, Y_vovo%f(1,i,1,j), nO*nV) + enddo + enddo + + call gpu_allocate(X_ovvo,nO,nV,nV,nO) + + do j = 1, nO + do b = 1, nV + call gpu_set_stream(blas_handle, stream(b)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, t2%f(1,j,1,b), & + nO*nO, 0.d0, t2%f(1,j,1,b), nO*nO, X_ovvo%f(1,1,b,j), nO) + enddo + enddo + + call gpu_set_stream(blas_handle, gpu_default_stream) + call gpu_synchronize() + call gpu_deallocate(X_voov) + + call gpu_dgemm(blas_handle, 'N','T',nO*nV,nV*nO,nV*nO, & + 1d0, X_ovvo%f(1,1,1,1), size(X_ovvo%f,1) * size(X_ovvo%f,2), & + Y_vovo%f(1,1,1,1), size(Y_vovo%f,1) * size(Y_vovo%f,2), & + 0d0, Z_ovvo%f(1,1,1,1), size(Z_ovvo%f,1) * size(Z_ovvo%f,2)) + + call gpu_synchronize() + + do i = 1, nO + do beta = 1, nV + call gpu_set_stream(blas_handle, stream(beta)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, J1%f(1,1,beta,i), & + nO, 1.d0, Z_ovvo%f(1,beta,1,i), nO*nV, J1%f(1,1,beta,i), nO) + enddo + enddo + + call gpu_set_stream(blas_handle, gpu_default_stream) + call gpu_deallocate(Y_ovov) + call gpu_deallocate(X_ovvo) + + do a = 1, nV + call gpu_stream_destroy(stream(a)) + enddo + + call gpu_deallocate(Z_ovvo) end ! K1 -subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,K1) +subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov, & + d_cc_space_v_ov_chol,d_cc_space_v_vv_chol,K1) use gpu implicit none - integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV) - double precision, intent(in) :: t2(nO, nO, nV, nV) - double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV) - double precision, intent(in) :: v_ovoo(nO,nV,nO,nO) - double precision, intent(out) :: K1(nO, nV, nO, nV) + integer, intent(in) :: nO,nV + type(gpu_double2), intent(in) :: t1 + type(gpu_double4), intent(in) :: t2, v_vvoo, v_ovov, v_ovoo + type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol, d_cc_space_v_vv_chol + type(gpu_double4), intent(out) :: K1 - double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:) + type(gpu_double4) :: X, Y, Z integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam - allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) - !$omp parallel & - !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & - !$omp private(i,beta,a,u,j,b) & - !$omp default(none) - !$omp do - do beta = 1, nV - do i = 1, nO - do a = 1, nV - do u = 1, nO - K1(u,a,i,beta) = v_ovov(u,a,i,beta) - enddo - enddo - enddo + call gpu_copy(v_ovov, K1) + + type(gpu_stream) :: stream(nV) + do a = 1, nV + call gpu_stream_create(stream(a)) enddo - !$omp end do nowait + + call gpu_allocate(X,nV,nO,nV,nO) do i = 1, nO - !$omp do do a = 1, nV - do j = 1, nO - do b = 1, nV - X(b,j,a,i) = - v_vvoo(b,a,i,j) - enddo - enddo + call gpu_set_stream(blas_handle, stream(a)) + call gpu_dgeam(blas_handle, 'N', 'N', nV, nO, -1.d0, v_vvoo%f(1,a,i,1), & + nV*nV*nO, 0.d0, v_vvoo%f(1,a,i,1), nV*nV*nO, X%f(1,1,a,i), nV) enddo - !$omp end do nowait enddo + call gpu_allocate(Y,nO,nV,nV,nO) + do j = 1, nO - !$omp do - do b = 1, nV - do beta = 1, nV - do u = 1, nO - Y(u,beta,b,j) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) - enddo - enddo + do beta = 1, nV + call gpu_set_stream(blas_handle, stream(beta)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 0.5d0, t2%f(1,j,1,beta), & + nO*nO, t1%f(j,beta), t1%f(1,1), nO, Y%f(1,beta,1,j), nO*nV) enddo - !$omp end do enddo - !$omp end parallel - call dgemm('N','N',nO*nV*nO,nV,nO, & - -1d0, v_ovoo, size(v_ovoo,1) * size(v_ovoo,2) * size(v_ovoo,3), & - t1 , size(t1,1), & - 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) + call gpu_set_stream(blas_handle, gpu_default_stream) - double precision, allocatable :: K1tmp(:,:,:,:), t1v(:,:,:) - allocate(K1tmp(nO,nO,nV,nV), t1v(cholesky_mo_num,nO,nO)) + call gpu_dgemm(blas_handle, 'N','N',nO*nV*nO,nV,nO, & + -1d0, v_ovoo%f(1,1,1,1), size(v_ovoo%f,1) * size(v_ovoo%f,2) * size(v_ovoo%f,3), & + t1%f(1,1) , size(t1%f,1), & + 1d0, K1%f(1,1,1,1) , size(K1%f,1) * size(K1%f,2) * size(K1%f,3)) - call dgemm('N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & - cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, & - t1v, cholesky_mo_num*nO) + type(gpu_double4) :: K1tmp + type(gpu_double3) :: t1v - call dgemm('T','N', nO*nO, nV*nV, cholesky_mo_num, 1.d0, & - t1v, cholesky_mo_num, cc_space_v_vv_chol, cholesky_mo_num, 0.d0, & - K1tmp, nO*nO) + call gpu_allocate(t1v,cholesky_mo_num,nO,nO) + + call gpu_dgemm(blas_handle, 'N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num*nO, t1%f(1,1), nO, 0.d0, & + t1v%f(1,1,1), cholesky_mo_num*nO) + + call gpu_allocate(K1tmp,nO,nO,nV,nV) + + call gpu_dgemm(blas_handle, 'T','N', nO*nO, nV*nV, cholesky_mo_num, 1.d0, & + t1v%f(1,1,1), cholesky_mo_num, d_cc_space_v_vv_chol%f(1,1,1), cholesky_mo_num, 0.d0, & + K1tmp%f(1,1,1,1), nO*nO) + + call gpu_allocate(Z,nO,nV,nV,nO) + call gpu_synchronize() - deallocate(t1v) ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i) - call dgemm('N','N',nV*nO,nO*nV,nV*nO, & - 1d0, Y, size(Y,1) * size(Y,2), & - X, size(X,1) * size(X,2), & - 0d0, Z, size(Z,1) * size(Z,2)) + call gpu_dgemm(blas_handle, 'N','N',nV*nO,nO*nV,nV*nO, & + 1d0, Y%f(1,1,1,1), size(Y%f,1) * size(Y%f,2), & + X%f(1,1,1,1), size(X%f,1) * size(X%f,2), & + 0d0, Z%f(1,1,1,1), size(Z%f,1) * size(Z%f,2)) - !$omp parallel & - !$omp shared(nO,nV,K1,Z,K1tmp) & - !$omp private(i,beta,a,u) & - !$omp default(none) - !$omp do - do beta = 1, nV - do i = 1, nO - do a = 1, nV - do u = 1, nO - K1(u,a,i,beta) = K1(u,a,i,beta) + K1tmp(u,i,a,beta) + Z(u,beta,a,i) - enddo - enddo + call gpu_synchronize() + call gpu_deallocate(t1v) + + do i = 1, nO + do beta = 1, nV + call gpu_set_stream(blas_handle, stream(beta)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, K1%f(1,1,i,beta), & + nO, 1.d0, K1tmp%f(1,i,1,beta), nO*nO, K1%f(1,1,i,beta), nO) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, K1%f(1,1,i,beta), & + nO, 1.d0, Z%f(1,beta,1,i), nO*nV, K1%f(1,1,i,beta), nO) enddo enddo - !$omp end do - !$omp end parallel - deallocate(K1tmp,X,Y,Z) + call gpu_deallocate(X) + call gpu_deallocate(Y) + + do a = 1, nV + call gpu_stream_destroy(stream(a)) + enddo + + call gpu_deallocate(K1tmp) + call gpu_deallocate(Z) end From 9ad69bb27dc4195ae2ae2c9ea2f280156c20366e Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 9 Jul 2024 03:27:54 +0200 Subject: [PATCH 115/131] GPU accelerated CCSD --- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 75 +++++++++++++------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index a185df13..24fcc5af 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -507,7 +507,8 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & call gpu_allocate(g_occ,nO,nO) call compute_g_vir_chol(nO,nV,t1,t2,H_vv,d_cc_space_f_vo, & d_cc_space_v_ov_chol, d_cc_space_v_vv_chol, g_vir) - call compute_g_occ_chol(nO,nV,t1%f,t2%f,H_oo%f,g_occ%f) + call compute_g_occ_chol(nO,nV,t1,t2,H_oo, & + d_cc_space_f_vo, d_cc_space_v_ov_chol, d_cc_space_v_oo_chol, d_cc_space_v_ovoo, g_occ) type(gpu_double4) :: Y_oovv call gpu_allocate(Y_oovv,nO,nO,nV,nV) @@ -870,37 +871,42 @@ end ! g_occ -subroutine compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) +subroutine compute_g_occ_chol(nO,nV,t1,t2,H_oo, & + d_cc_space_f_vo, d_cc_space_v_ov_chol, d_cc_space_v_oo_chol, d_cc_space_v_ovoo, g_occ) use gpu implicit none integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV), H_oo(nO, nO) - double precision, intent(in) :: t2(nO, nO, nV, nV) - double precision, intent(out) :: g_occ(nO, nO) + type(gpu_double2), intent(in) :: t1, H_oo, d_cc_space_f_vo + type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol, d_cc_space_v_oo_chol + type(gpu_double4), intent(in) :: t2, d_cc_space_v_ovoo + type(gpu_double2), intent(out) :: g_occ - g_occ = H_oo + call gpu_copy(H_oo, g_occ) - call dgemm('N','N',nO,nO,nV, & - 1d0, t1, size(t1,1), & - cc_space_f_vo, size(cc_space_f_vo,1), & - 1d0, g_occ, size(g_occ,1)) + call gpu_dgemm(blas_handle, 'N','N',nO,nO,nV, & + 1d0, t1%f(1,1), size(t1%f,1), & + d_cc_space_f_vo%f(1,1), size(d_cc_space_f_vo%f,1), & + 1d0, g_occ%f(1,1), size(g_occ%f,1)) - double precision, allocatable :: X(:) - allocate(X(cholesky_mo_num)) - call dgemv('N',cholesky_mo_num,nO*nV,2.d0, & - cc_space_v_ov_chol, cholesky_mo_num, & - t1, 1, 0.d0, X, 1) + type(gpu_double1) :: X + call gpu_allocate(X,cholesky_mo_num) - call dgemv('T',cholesky_mo_num,nO*nO,1.d0, & - cc_space_v_oo_chol, cholesky_mo_num, & - X, 1, 1.d0, g_occ, 1) - deallocate(X) + call gpu_dgemv(blas_handle, 'N',cholesky_mo_num,nO*nV,2.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num, & + t1%f(1,1), 1, 0.d0, X%f(1), 1) - call dgemv('T',nO*nV,nO*nO,-1.d0, & - cc_space_v_ovoo, nO*nV, & - t1, 1, 1.d0, g_occ, 1) + call gpu_dgemv(blas_handle, 'T',cholesky_mo_num,nO*nO,1.d0, & + d_cc_space_v_oo_chol%f(1,1,1), cholesky_mo_num, & + X%f(1), 1, 1.d0, g_occ%f(1,1), 1) + + call gpu_dgemv(blas_handle, 'T',nO*nV,nO*nO,-1.d0, & + d_cc_space_v_ovoo%f(1,1,1,1), nO*nV, & + t1%f(1,1), 1, 1.d0, g_occ%f(1,1), 1) + + call gpu_synchronize() + call gpu_deallocate(X) end @@ -1193,22 +1199,15 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov, & enddo call gpu_allocate(X,nV,nO,nV,nO) - - do i = 1, nO - do a = 1, nV - call gpu_set_stream(blas_handle, stream(a)) - call gpu_dgeam(blas_handle, 'N', 'N', nV, nO, -1.d0, v_vvoo%f(1,a,i,1), & - nV*nV*nO, 0.d0, v_vvoo%f(1,a,i,1), nV*nV*nO, X%f(1,1,a,i), nV) - enddo - enddo - call gpu_allocate(Y,nO,nV,nV,nO) - do j = 1, nO - do beta = 1, nV - call gpu_set_stream(blas_handle, stream(beta)) - call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 0.5d0, t2%f(1,j,1,beta), & - nO*nO, t1%f(j,beta), t1%f(1,1), nO, Y%f(1,beta,1,j), nO*nV) + do a = 1, nV + call gpu_set_stream(blas_handle, stream(a)) + do i = 1, nO + call gpu_dgeam(blas_handle, 'N', 'N', nV, nO, -1.d0, v_vvoo%f(1,a,i,1), & + nV*nV*nO, 0.d0, v_vvoo%f(1,a,i,1), nV*nV*nO, X%f(1,1,a,i), nV) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 0.5d0, t2%f(1,i,1,a), & + nO*nO, t1%f(i,a), t1%f(1,1), nO, Y%f(1,a,1,i), nO*nV) enddo enddo @@ -1246,9 +1245,9 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov, & call gpu_synchronize() call gpu_deallocate(t1v) + do beta = 1, nV + call gpu_set_stream(blas_handle, stream(beta)) do i = 1, nO - do beta = 1, nV - call gpu_set_stream(blas_handle, stream(beta)) call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, K1%f(1,1,i,beta), & nO, 1.d0, K1tmp%f(1,i,1,beta), nO*nO, K1%f(1,1,i,beta), nO) call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, K1%f(1,1,i,beta), & From dd9c6dcc03e6d24a78ed5651e5c825e81d68a9c6 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 9 Jul 2024 21:11:13 +0200 Subject: [PATCH 116/131] Introducing dpcpp --- configure | 8 +- plugins/local/gpu_intel/LIB | 1 + plugins/local/gpu_intel/NEED | 1 + plugins/local/gpu_intel/README.rst | 8 + plugins/local/gpu_intel/gpu.sycl | 266 +++++++++++++++++++++++++++++ 5 files changed, 282 insertions(+), 2 deletions(-) create mode 100644 plugins/local/gpu_intel/LIB create mode 100644 plugins/local/gpu_intel/NEED create mode 100644 plugins/local/gpu_intel/README.rst create mode 100644 plugins/local/gpu_intel/gpu.sycl diff --git a/configure b/configure index 08dac444..3e3390e1 100755 --- a/configure +++ b/configure @@ -40,7 +40,7 @@ Usage: $(basename $0) -c $(basename $0) -h $(basename $0) -i - $(basename $0) -g [nvidia|none] + $(basename $0) -g [nvidia|intel|none] Options: -c Define a COMPILATION configuration file, @@ -49,7 +49,7 @@ Options: -i INSTALL . Use at your OWN RISK: no support will be provided for the installation of dependencies. - -g [nvidia|none] Choose GPU acceleration (experimental) + -g [nvidia|intel|none] Choose GPU acceleration Example: ./$(basename $0) -c config/gfortran.cfg @@ -121,6 +121,10 @@ case "$GPU" in echo "Activating AMD GPU acceleration" ln -s ${QP_ROOT}/plugins/local/gpu_amd ${QP_ROOT}/src/gpu_arch ;; + intel) # Intel + echo "Activating Intel GPU acceleration" + ln -s ${QP_ROOT}/plugins/local/gpu_intel ${QP_ROOT}/src/gpu_arch + ;; nvidia) # Nvidia echo "Activating Nvidia GPU acceleration" ln -s ${QP_ROOT}/plugins/local/gpu_nvidia ${QP_ROOT}/src/gpu_arch diff --git a/plugins/local/gpu_intel/LIB b/plugins/local/gpu_intel/LIB new file mode 100644 index 00000000..027c35b0 --- /dev/null +++ b/plugins/local/gpu_intel/LIB @@ -0,0 +1 @@ +-lmkl_sycl -lsycl diff --git a/plugins/local/gpu_intel/NEED b/plugins/local/gpu_intel/NEED new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/plugins/local/gpu_intel/NEED @@ -0,0 +1 @@ + diff --git a/plugins/local/gpu_intel/README.rst b/plugins/local/gpu_intel/README.rst new file mode 100644 index 00000000..3a4653de --- /dev/null +++ b/plugins/local/gpu_intel/README.rst @@ -0,0 +1,8 @@ +========= +gpu_intel +========= + +Intel implementation of GPU routines. Uses MKL and SYCL. +```bash +dpcpp -O3 -c gpu.o gpu.sycl +``` diff --git a/plugins/local/gpu_intel/gpu.sycl b/plugins/local/gpu_intel/gpu.sycl new file mode 100644 index 00000000..7b589490 --- /dev/null +++ b/plugins/local/gpu_intel/gpu.sycl @@ -0,0 +1,266 @@ +#include +#include +#include +#include + +extern "C" { + +/* Generic functions */ + +int gpu_ndevices() { + return 1; +} + +void gpu_set_device(int32_t igpu) { +} + + +/* Allocation functions */ + +void gpu_allocate(void** ptr, int64_t size) { + auto queue = sycl::queue(sycl::default_selector{}); + + try { + *ptr = sycl::malloc_shared(size, queue); + assert(*ptr != nullptr); + } catch (const sycl::exception& e) { + std::cerr << "SYCL exception caught: " << e.what() << std::endl; + *ptr = nullptr; // If allocation fails, set pointer to nullptr + } +} + +void gpu_deallocate(void** ptr) { + assert(*ptr != nullptr); + sycl::free(*ptr, sycl::queue(sycl::default_selector{})); + *ptr = nullptr; +} + +/* Upload data from host to device */ +void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) { + sycl::queue queue(sycl::default_selector{}); + queue.memcpy(gpu_ptr, cpu_ptr, n).wait(); +} + +/* Download data from device to host */ +void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) { + sycl::queue queue(sycl::default_selector{}); + queue.memcpy(cpu_ptr, gpu_ptr, n).wait(); +} + +/* Copy data from one GPU memory location to another */ +void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) { + sycl::queue queue(sycl::default_selector{}); + queue.memcpy(gpu_ptr_dest, gpu_ptr_src, n).wait(); +} + +/* Queues */ + +/* SYCL queue as a replacement for CUDA stream */ +void gpu_stream_create(sycl::queue** ptr) { + *ptr = new sycl::queue(sycl::default_selector{}); +} + +void gpu_stream_destroy(sycl::queue** ptr) { + assert(*ptr != nullptr); + delete *ptr; + *ptr = nullptr; +} + +To translate the CUDA functions related to stream management to SYCL, you will need to adapt to SYCL's approach to command groups and queues. SYCL uses queues to manage execution order and parallelism, similar to CUDA streams but integrated within the SYCL ecosystem. + +### Original CUDA Code + +```c +/* Create a CUDA stream */ +void gpu_stream_create(cudaStream_t* ptr) { + cudaError_t rc = cudaStreamCreate(ptr); + assert(rc == cudaSuccess); +} + +/* Destroy a CUDA stream */ +void gpu_stream_destroy(cudaStream_t* ptr) { + assert(ptr != NULL); + cudaError_t rc = cudaStreamDestroy(*ptr); + assert(rc == cudaSuccess); + *ptr = NULL; +} + +/* Set a specific stream for cuBLAS operations */ +void gpu_set_stream(cublasHandle_t handle, cudaStream_t stream) { + cublasSetStream(handle, stream); +} + +/* Synchronize all streams */ +void gpu_synchronize() { + cudaDeviceSynchronize(); +} +``` + +### Translated SYCL Code + +```cpp +#include +#include + +/* SYCL queue as a replacement for CUDA stream */ +void gpu_stream_create(sycl::queue** ptr) { + *ptr = new sycl::queue(sycl::default_selector{}); +} + +void gpu_stream_destroy(sycl::queue** ptr) { + *ptr->wait_and_throw(); + assert(*ptr != nullptr); + delete *ptr; + *ptr = nullptr; +} + +/* SYCL does not need an equivalent for setting a stream on a cuBLAS handle, + because each SYCL queue acts independently and can be used directly. */ + +void gpu_synchronize() { + sycl::queue queue(sycl::default_selector{}); + queue.wait_and_throw(); +} + +/* BLAS functions */ + +typedef struct { + sycl::queue* queue; +} blasHandle_t; + +void gpu_set_stream(blasHandle_t* handle, sycl::queue* ptr) { + handle->queue = ptr; +} + +void gpu_blas_create(blasHandle_t* ptr) { + *ptr = new blasHandle_t; + assert(*ptr != nullptr); + ptr->queue = new sycl::queue(sycl::default_selector{}); + assert(ptr->queue != nullptr); +} + +void gpu_blas_destroy(blasHandle_t* ptr) { + assert(*ptr != nullptr); + delete ptr->queue; + delete *ptr; + *ptr = nullptr; +} + + +void gpu_ddot(blasHandle_t* handle, const int64_t n, const double* x, const int64_t incx, + const double* y, const int64_t incy, double* result) { + // Ensure input parameters are valid + assert(handle != nullptr); + assert(handle->queue != nullptr); + assert(n > 0); + assert(incx > 0); + assert(incy > 0); + assert(x != nullptr); + assert(y != nullptr); + assert(result != nullptr); + + // SYCL buffer for the result + sycl::buffer result_buf(result, sycl::range<1>(1)); + + sycl::queue& queue = handle->queue; + + // Perform the dot product operation + queue.submit([&](sycl::handler& cgh) { + // Accessors for the buffers + auto result_acc = result_buf.get_access(cgh); + + // This is an asynchronous call to compute dot product + cgh.single_task([=]() { + result_acc[0] = oneapi::mkl::blas::dot(cgh, n, x, incx, y, incy); + }); + }); + +} + +void gpu_dgemv(blasHandle_t* handle, const char* transa, const int64_t m, const int64_t n, const double* alpha, + const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) { + + assert(handle != nullptr); + assert(handle->queue != nullptr); + + // Validate matrix dimensions and increments to be positive + assert(m > 0 && n > 0 && lda > 0 && incx > 0 && incy > 0); + assert(a != nullptr && x != nullptr && y != nullptr && alpha != nullptr && beta != nullptr); + + // Determine the operation type + oneapi::mkl::transpose transa_ = oneapi::mkl::transpose::nontrans; + if (*transa == 'T' || *transa == 't') { + transa_ = oneapi::mkl::transpose::trans; + } + + // Perform DGEMV operation using oneMKL + handle->queue->submit([&](sycl::handler& cgh) { + // Use accessors to ensure data consistency and dependency resolution + auto a_acc = sycl::accessor(a, sycl::range(m * lda), sycl::read_only, cgh); + auto x_acc = sycl::accessor(x, sycl::range(n * incx), sycl::read_only, cgh); + auto y_acc = sycl::accessor(y, sycl::range(m * incy), sycl::read_write, cgh); + + cgh.parallel_for(sycl::range(1), [=](sycl::id<1>) { + oneapi::mkl::blas::gemv(*handle->queue, transa_, m, n, *alpha, a_acc.get_pointer(), lda, x_acc.get_pointer(), incx, *beta, y_acc.get_pointer(), incy); + }); + }); + +} + +void gpu_dgemm(blasHandle_t* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha, + const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) { + + assert(handle != nullptr && handle->queue != nullptr); + assert(m > 0 && n > 0 && k > 0 && lda > 0 && ldb > 0 && ldc > 0); + assert(a != nullptr && b != nullptr && c != nullptr && alpha != nullptr && beta != nullptr); + + // Transpose operations + auto transa_ = (*transa == 'T' || *transa == 't') ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans; + auto transb_ = (*transb == 'T' || *transb == 't') ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans; + + // Ensure queue is ready + handle->queue->submit([&](sycl::handler& cgh) { + // Accessors for matrices + auto a_acc = sycl::accessor(a, sycl::range<1>(m * lda), sycl::read_only, cgh); + auto b_acc = sycl::accessor(b, sycl::range<1>(k * ldb), sycl::read_only, cgh); + auto c_acc = sycl::accessor(c, sycl::range<1>(m * ldc), sycl::read_write, cgh); + + cgh.parallel_for(sycl::range(1), [=](sycl::id<1>) { + oneapi::mkl::blas::gemm(*handle->queue, transa_, transb_, m, n, k, + *alpha, a_acc.get_pointer(), lda, + b_acc.get_pointer(), ldb, + *beta, c_acc.get_pointer(), ldc); + }); + }); + +} + +void gpu_dgeam(blasHandle_t* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha, + const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { + assert(handle != nullptr && handle->queue != nullptr); + assert(m > 0 && n > 0 && lda > 0 && ldb > 0 && ldc > 0); + assert(a != nullptr && b != nullptr && c != nullptr && alpha != nullptr && beta != nullptr); + + // Determine transpose operations + bool transA = (*transa == 'T' || *transa == 't'); + bool transB = (*transb == 'T' || *transb == 't'); + + handle->queue->submit([&](sycl::handler& cgh) { + auto a_acc = sycl::accessor(a, sycl::range(m * lda), sycl::read_only, cgh); + auto b_acc = sycl::accessor(b, sycl::range(n * ldb), sycl::read_only, cgh); + auto c_acc = sycl::accessor(c, sycl::range(m * ldc), sycl::read_write, cgh); + + cgh.parallel_for(sycl::range<2>(m, n), [=](sycl::id<2> idx) { + int i = idx[0]; + int j = idx[1]; + int ai = transA ? j * lda + i : i * lda + j; + int bi = transB ? j * ldb + i : i * ldb + j; + int ci = i * ldc + j; + + c_acc[ci] = (*alpha) * a_acc[ai] + (*beta) * b_acc[bi]; + }); + }); + +} + +} // extern C From 44b8e22e7aebf4dd89874549eac8bb8aef2fb16d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 9 Jul 2024 22:02:13 +0200 Subject: [PATCH 117/131] Fixed sycl --- plugins/local/gpu_intel/README.rst | 2 +- plugins/local/gpu_intel/gpu.sycl | 139 ++++++----------------------- 2 files changed, 26 insertions(+), 115 deletions(-) diff --git a/plugins/local/gpu_intel/README.rst b/plugins/local/gpu_intel/README.rst index 3a4653de..d42e2557 100644 --- a/plugins/local/gpu_intel/README.rst +++ b/plugins/local/gpu_intel/README.rst @@ -4,5 +4,5 @@ gpu_intel Intel implementation of GPU routines. Uses MKL and SYCL. ```bash -dpcpp -O3 -c gpu.o gpu.sycl +icpx -fsycl gpu.cxx -c -qmkl=sequential ``` diff --git a/plugins/local/gpu_intel/gpu.sycl b/plugins/local/gpu_intel/gpu.sycl index 7b589490..1f9f89ce 100644 --- a/plugins/local/gpu_intel/gpu.sycl +++ b/plugins/local/gpu_intel/gpu.sycl @@ -18,7 +18,7 @@ void gpu_set_device(int32_t igpu) { /* Allocation functions */ void gpu_allocate(void** ptr, int64_t size) { - auto queue = sycl::queue(sycl::default_selector{}); + auto queue = sycl::queue(sycl::default_selector_v); try { *ptr = sycl::malloc_shared(size, queue); @@ -31,25 +31,25 @@ void gpu_allocate(void** ptr, int64_t size) { void gpu_deallocate(void** ptr) { assert(*ptr != nullptr); - sycl::free(*ptr, sycl::queue(sycl::default_selector{})); + sycl::free(*ptr, sycl::queue(sycl::default_selector_v)); *ptr = nullptr; } /* Upload data from host to device */ void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) { - sycl::queue queue(sycl::default_selector{}); + sycl::queue queue(sycl::default_selector_v); queue.memcpy(gpu_ptr, cpu_ptr, n).wait(); } /* Download data from device to host */ void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) { - sycl::queue queue(sycl::default_selector{}); + sycl::queue queue(sycl::default_selector_v); queue.memcpy(cpu_ptr, gpu_ptr, n).wait(); } /* Copy data from one GPU memory location to another */ void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) { - sycl::queue queue(sycl::default_selector{}); + sycl::queue queue(sycl::default_selector_v); queue.memcpy(gpu_ptr_dest, gpu_ptr_src, n).wait(); } @@ -57,7 +57,7 @@ void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) { /* SYCL queue as a replacement for CUDA stream */ void gpu_stream_create(sycl::queue** ptr) { - *ptr = new sycl::queue(sycl::default_selector{}); + *ptr = new sycl::queue(sycl::default_selector_v); } void gpu_stream_destroy(sycl::queue** ptr) { @@ -66,59 +66,8 @@ void gpu_stream_destroy(sycl::queue** ptr) { *ptr = nullptr; } -To translate the CUDA functions related to stream management to SYCL, you will need to adapt to SYCL's approach to command groups and queues. SYCL uses queues to manage execution order and parallelism, similar to CUDA streams but integrated within the SYCL ecosystem. - -### Original CUDA Code - -```c -/* Create a CUDA stream */ -void gpu_stream_create(cudaStream_t* ptr) { - cudaError_t rc = cudaStreamCreate(ptr); - assert(rc == cudaSuccess); -} - -/* Destroy a CUDA stream */ -void gpu_stream_destroy(cudaStream_t* ptr) { - assert(ptr != NULL); - cudaError_t rc = cudaStreamDestroy(*ptr); - assert(rc == cudaSuccess); - *ptr = NULL; -} - -/* Set a specific stream for cuBLAS operations */ -void gpu_set_stream(cublasHandle_t handle, cudaStream_t stream) { - cublasSetStream(handle, stream); -} - -/* Synchronize all streams */ void gpu_synchronize() { - cudaDeviceSynchronize(); -} -``` - -### Translated SYCL Code - -```cpp -#include -#include - -/* SYCL queue as a replacement for CUDA stream */ -void gpu_stream_create(sycl::queue** ptr) { - *ptr = new sycl::queue(sycl::default_selector{}); -} - -void gpu_stream_destroy(sycl::queue** ptr) { - *ptr->wait_and_throw(); - assert(*ptr != nullptr); - delete *ptr; - *ptr = nullptr; -} - -/* SYCL does not need an equivalent for setting a stream on a cuBLAS handle, - because each SYCL queue acts independently and can be used directly. */ - -void gpu_synchronize() { - sycl::queue queue(sycl::default_selector{}); + sycl::queue queue(sycl::default_selector_v); queue.wait_and_throw(); } @@ -132,17 +81,17 @@ void gpu_set_stream(blasHandle_t* handle, sycl::queue* ptr) { handle->queue = ptr; } -void gpu_blas_create(blasHandle_t* ptr) { - *ptr = new blasHandle_t; +void gpu_blas_create(blasHandle_t** ptr) { + *ptr = (blasHandle_t*) malloc(sizeof(blasHandle_t)); assert(*ptr != nullptr); - ptr->queue = new sycl::queue(sycl::default_selector{}); - assert(ptr->queue != nullptr); + (*ptr)->queue = new sycl::queue(sycl::default_selector_v); + assert((*ptr)->queue != nullptr); } -void gpu_blas_destroy(blasHandle_t* ptr) { +void gpu_blas_destroy(blasHandle_t** ptr) { assert(*ptr != nullptr); - delete ptr->queue; - delete *ptr; + delete (*ptr)->queue; + free(*ptr); *ptr = nullptr; } @@ -159,21 +108,7 @@ void gpu_ddot(blasHandle_t* handle, const int64_t n, const double* x, const int6 assert(y != nullptr); assert(result != nullptr); - // SYCL buffer for the result - sycl::buffer result_buf(result, sycl::range<1>(1)); - - sycl::queue& queue = handle->queue; - - // Perform the dot product operation - queue.submit([&](sycl::handler& cgh) { - // Accessors for the buffers - auto result_acc = result_buf.get_access(cgh); - - // This is an asynchronous call to compute dot product - cgh.single_task([=]() { - result_acc[0] = oneapi::mkl::blas::dot(cgh, n, x, incx, y, incy); - }); - }); + oneapi::mkl::blas::dot(*handle->queue, n, x, incx, y, incy, result); } @@ -194,16 +129,7 @@ void gpu_dgemv(blasHandle_t* handle, const char* transa, const int64_t m, const } // Perform DGEMV operation using oneMKL - handle->queue->submit([&](sycl::handler& cgh) { - // Use accessors to ensure data consistency and dependency resolution - auto a_acc = sycl::accessor(a, sycl::range(m * lda), sycl::read_only, cgh); - auto x_acc = sycl::accessor(x, sycl::range(n * incx), sycl::read_only, cgh); - auto y_acc = sycl::accessor(y, sycl::range(m * incy), sycl::read_write, cgh); - - cgh.parallel_for(sycl::range(1), [=](sycl::id<1>) { - oneapi::mkl::blas::gemv(*handle->queue, transa_, m, n, *alpha, a_acc.get_pointer(), lda, x_acc.get_pointer(), incx, *beta, y_acc.get_pointer(), incy); - }); - }); + oneapi::mkl::blas::column_major::gemv(*handle->queue, transa_, m, n, *alpha, a, lda, x, incx, *beta, y, incy); } @@ -218,23 +144,12 @@ void gpu_dgemm(blasHandle_t* handle, const char* transa, const char* transb, con auto transa_ = (*transa == 'T' || *transa == 't') ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans; auto transb_ = (*transb == 'T' || *transb == 't') ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans; - // Ensure queue is ready - handle->queue->submit([&](sycl::handler& cgh) { - // Accessors for matrices - auto a_acc = sycl::accessor(a, sycl::range<1>(m * lda), sycl::read_only, cgh); - auto b_acc = sycl::accessor(b, sycl::range<1>(k * ldb), sycl::read_only, cgh); - auto c_acc = sycl::accessor(c, sycl::range<1>(m * ldc), sycl::read_write, cgh); - - cgh.parallel_for(sycl::range(1), [=](sycl::id<1>) { - oneapi::mkl::blas::gemm(*handle->queue, transa_, transb_, m, n, k, - *alpha, a_acc.get_pointer(), lda, - b_acc.get_pointer(), ldb, - *beta, c_acc.get_pointer(), ldc); - }); - }); + oneapi::mkl::blas::column_major::gemm(*handle->queue, transa_, transb_, m, n, k, + *alpha, a, lda, b, ldb, *beta, c, ldc); } + void gpu_dgeam(blasHandle_t* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha, const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { assert(handle != nullptr && handle->queue != nullptr); @@ -246,18 +161,14 @@ void gpu_dgeam(blasHandle_t* handle, const char* transa, const char* transb, con bool transB = (*transb == 'T' || *transb == 't'); handle->queue->submit([&](sycl::handler& cgh) { - auto a_acc = sycl::accessor(a, sycl::range(m * lda), sycl::read_only, cgh); - auto b_acc = sycl::accessor(b, sycl::range(n * ldb), sycl::read_only, cgh); - auto c_acc = sycl::accessor(c, sycl::range(m * ldc), sycl::read_write, cgh); - cgh.parallel_for(sycl::range<2>(m, n), [=](sycl::id<2> idx) { - int i = idx[0]; - int j = idx[1]; - int ai = transA ? j * lda + i : i * lda + j; - int bi = transB ? j * ldb + i : i * ldb + j; - int ci = i * ldc + j; + const int i = idx[0]; + const int j = idx[1]; + const int ai = transA ? j * lda + i : i * lda + j; + const int bi = transB ? j * ldb + i : i * ldb + j; + const int ci = i * ldc + j; - c_acc[ci] = (*alpha) * a_acc[ai] + (*beta) * b_acc[bi]; + c[ci] = (*alpha) * a[ai] + (*beta) * b[bi]; }); }); From 6c275d54ef050ec8d210a35aa4bbb2c93d176f34 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 9 Jul 2024 22:14:19 +0200 Subject: [PATCH 118/131] Fix intent --- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 24fcc5af..6f65ea79 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -996,8 +996,8 @@ subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvoo,d_cc_space_v_vo_chol integer, intent(in) :: nO,nV type(gpu_double2), intent(in) :: t1 type(gpu_double4), intent(in) :: t2, v_ovvo, v_ovoo, v_vvoo + type(gpu_double3), intent(in) :: d_cc_space_v_vo_chol,d_cc_space_v_vv_chol type(gpu_double4), intent(out) :: J1 - type(gpu_double3), intent(out) :: d_cc_space_v_vo_chol,d_cc_space_v_vv_chol integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam From f5cf674d7b4eb98637bde7eb07d1119cfeccc557 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 9 Jul 2024 23:04:22 +0200 Subject: [PATCH 119/131] Fix link stage for intel gpus --- configure | 4 ++-- plugins/local/gpu_intel/LIB | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/configure b/configure index 3e3390e1..43ca9f6d 100755 --- a/configure +++ b/configure @@ -117,12 +117,12 @@ done # Handle GPU acceleration rm -f ${QP_ROOT}/src/gpu_arch case "$GPU" in - amd) # Nvidia + amd) # AMD echo "Activating AMD GPU acceleration" ln -s ${QP_ROOT}/plugins/local/gpu_amd ${QP_ROOT}/src/gpu_arch ;; intel) # Intel - echo "Activating Intel GPU acceleration" + echo "Activating Intel GPU acceleration (EXPERIMENTAL)" ln -s ${QP_ROOT}/plugins/local/gpu_intel ${QP_ROOT}/src/gpu_arch ;; nvidia) # Nvidia diff --git a/plugins/local/gpu_intel/LIB b/plugins/local/gpu_intel/LIB index 027c35b0..199b0f1c 100644 --- a/plugins/local/gpu_intel/LIB +++ b/plugins/local/gpu_intel/LIB @@ -1 +1,2 @@ --lmkl_sycl -lsycl +-ltbb -lsycl -lmkl_sycl -lgpu -limf -lintlc -lstdc++ + From d219dc10267c0fe86fbae4683c00f5051229c8c0 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 11 Jul 2024 13:57:28 +0200 Subject: [PATCH 120/131] beginning to put cholesky in CASSCF --- plugins/local/spher_harm/spher_harm.irp.f | 4 +- .../local/spher_harm/spher_harm_func.irp.f | 13 +++ src/casscf_cipsi/chol_bielec.irp.f | 93 ++++++++++++++++ src/casscf_cipsi/test_chol.irp.f | 23 ++++ src/mo_two_e_ints/cholesky.irp.f | 31 ++++++ src/mu_of_r/f_hf_cholesky.irp.f | 100 ++++++++++++++++++ src/mu_of_r/test_proj_op.irp.f | 20 +++- 7 files changed, 280 insertions(+), 4 deletions(-) create mode 100644 src/casscf_cipsi/chol_bielec.irp.f create mode 100644 src/casscf_cipsi/test_chol.irp.f diff --git a/plugins/local/spher_harm/spher_harm.irp.f b/plugins/local/spher_harm/spher_harm.irp.f index 7a2eea06..e8deafb9 100644 --- a/plugins/local/spher_harm/spher_harm.irp.f +++ b/plugins/local/spher_harm/spher_harm.irp.f @@ -1,7 +1,7 @@ program spher_harm implicit none -! call test_spher_harm + call test_spher_harm ! call test_cart - call test_brutal_spheric +! call test_brutal_spheric end diff --git a/plugins/local/spher_harm/spher_harm_func.irp.f b/plugins/local/spher_harm/spher_harm_func.irp.f index 825bd8ac..f12c8fb9 100644 --- a/plugins/local/spher_harm/spher_harm_func.irp.f +++ b/plugins/local/spher_harm/spher_harm_func.irp.f @@ -7,6 +7,7 @@ subroutine spher_harm_func_r3(r,l,m,re_ylm, im_ylm) double precision :: theta, phi,r_abs call cartesian_to_spherical(r,theta,phi,r_abs) call spher_harm_func(l,m,theta,phi,re_ylm, im_ylm) +! call spher_harm_func_expl(l,m,theta,phi,re_ylm, im_ylm) end @@ -131,6 +132,10 @@ subroutine spher_harm_func_expl(l,m,theta,phi,re_ylm, im_ylm) tmp = - inv_sq_pi * dsqrt(3.d0/8.d0) * dsin(theta) re_ylm = tmp * dcos(phi) im_ylm = tmp * dsin(phi) + else if (l==1.and.m==-1)then + tmp = - inv_sq_pi * dsqrt(3.d0/8.d0) * dsin(theta) + re_ylm = tmp * dcos(phi) + im_ylm = -tmp * dsin(phi) else if(l==1.and.m==0)then tmp = inv_sq_pi * dsqrt(3.d0/4.d0) * dcos(theta) re_ylm = tmp @@ -139,10 +144,18 @@ subroutine spher_harm_func_expl(l,m,theta,phi,re_ylm, im_ylm) tmp = 0.25d0 * inv_sq_pi * dsqrt(0.5d0*15.d0) * dsin(theta)*dsin(theta) re_ylm = tmp * dcos(2.d0*phi) im_ylm = tmp * dsin(2.d0*phi) + else if(l==2.and.m==-2)then + tmp = 0.25d0 * inv_sq_pi * dsqrt(0.5d0*15.d0) * dsin(theta)*dsin(theta) + re_ylm = tmp * dcos(2.d0*phi) + im_ylm =-tmp * dsin(2.d0*phi) else if(l==2.and.m==1)then tmp = - inv_sq_pi * dsqrt(15.d0/8.d0) * dsin(theta) * dcos(theta) re_ylm = tmp * dcos(phi) im_ylm = tmp * dsin(phi) + else if(l==2.and.m==-1)then + tmp = - inv_sq_pi * dsqrt(15.d0/8.d0) * dsin(theta) * dcos(theta) + re_ylm = tmp * dcos(phi) + im_ylm =-tmp * dsin(phi) else if(l==2.and.m==0)then tmp = dsqrt(5.d0/4.d0) * inv_sq_pi* (1.5d0*dcos(theta)*dcos(theta)-0.5d0) re_ylm = tmp diff --git a/src/casscf_cipsi/chol_bielec.irp.f b/src/casscf_cipsi/chol_bielec.irp.f new file mode 100644 index 00000000..1fe985ad --- /dev/null +++ b/src/casscf_cipsi/chol_bielec.irp.f @@ -0,0 +1,93 @@ + +BEGIN_PROVIDER [double precision, cholesky_no_1_idx_transp, (cholesky_mo_num, n_act_orb, mo_num)] + BEGIN_DOC + ! Cholesky vectors with ONE orbital on the active natural orbital basis + END_DOC + implicit none + integer :: i_chol,i_act,i_mo,jj_act + double precision, allocatable :: chol_tmp(:,:) + allocate(chol_tmp(cholesky_mo_num,n_act_orb)) + cholesky_no_1_idx_transp = 0.D0 + do i_mo = 1, mo_num + ! Get all the integrals corresponding to the "i_mo" + do i_act = 1, n_act_orb + jj_act = list_act(i_act) + do i_chol = 1, cholesky_mo_num + chol_tmp(i_chol, i_act) = cholesky_mo_transp(i_chol, jj_act, i_mo) + enddo + enddo +! ! Do the matrix product +! do i_act = 1, n_act_orb +! do jj_act = 1, n_act_orb +! do i_chol = 1, cholesky_mo_num +! cholesky_no_1_idx_transp(i_chol, i_act, i_mo) += chol_tmp(i_chol, jj_act) * natorbsCI(jj_act,i_act) +! enddo +! enddo +! enddo + call dgemm('N','N',cholesky_mo_num,n_act_orb,n_act_orb,1.d0, & + chol_tmp, size(chol_tmp,1), & + natorbsCI, size(natorbsCI,1), & + 0.d0, & + cholesky_no_1_idx_transp(1,1,i_mo), size(cholesky_no_1_idx_transp,1)) + enddo + +END_PROVIDER + + +BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp, (cholesky_mo_num, n_act_orb, n_act_orb)] + BEGIN_DOC + ! Cholesky vectors with TWO orbital on the active natural orbital basis + END_DOC + implicit none + integer :: i_chol,i_act,j_act,jj_act + double precision, allocatable :: chol_tmp(:,:) + allocate(chol_tmp(cholesky_mo_num,n_act_orb)) + cholesky_no_2_idx_transp = 0.D0 + do j_act = 1, n_act_orb + do i_act = 1, n_act_orb + do jj_act = 1, n_act_orb + do i_chol = 1, cholesky_mo_num + cholesky_no_2_idx_transp(i_chol, i_act, j_act) += cholesky_no_1_idx_transp(i_chol, i_act,jj_act) * natorbsCI(jj_act,i_act) + enddo + enddo + enddo + enddo + +END_PROVIDER + + +BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp_dgemm, (cholesky_mo_num, n_act_orb, n_act_orb)] + BEGIN_DOC + ! Cholesky vectors with TWO orbital on the active natural orbital basis + END_DOC + implicit none + integer :: i_chol,i_act,j_act,jj_act + double precision, allocatable :: chol_tmp(:,:) + allocate(chol_tmp(cholesky_mo_num,n_act_orb)) + cholesky_no_2_idx_transp_dgemm = 0.D0 + do j_act = 1, n_act_orb + ! Get all the integrals corresponding to the "j_act" + do i_act = 1, n_act_orb + jj_act = list_act(i_act) + do i_chol = 1, cholesky_mo_num + chol_tmp(i_chol, i_act) = cholesky_no_1_idx_transp(i_chol, j_act, jj_act) + enddo + enddo +! ! Do the matrix product +! do i_act = 1, n_act_orb +! do jj_act = 1, n_act_orb +! do i_chol = 1, cholesky_mo_num +! cholesky_no_1_idx_transp(i_chol, i_act, j_act) += chol_tmp(i_chol, jj_act) * natorbsCI(jj_act,i_act) +! enddo +! enddo +! enddo + call dgemm('N','N',cholesky_mo_num,n_act_orb,n_act_orb,1.d0, & + chol_tmp, size(chol_tmp,1), & + natorbsCI, size(natorbsCI,1), & + 0.d0, & + cholesky_no_2_idx_transp_dgemm(1,1,j_act), size(cholesky_no_2_idx_transp_dgemm,1)) + enddo + +END_PROVIDER + + diff --git a/src/casscf_cipsi/test_chol.irp.f b/src/casscf_cipsi/test_chol.irp.f new file mode 100644 index 00000000..b94851f9 --- /dev/null +++ b/src/casscf_cipsi/test_chol.irp.f @@ -0,0 +1,23 @@ +program test_chol + implicit none + read_wf= .True. + touch read_wf + call routine + +end + +subroutine routine + implicit none + integer :: i_chol, i_act, i_mo + double precision :: accu + accu = 0.d0 + do i_mo = 1, n_act_orb + do i_act = 1, n_act_orb + do i_chol = 1, cholesky_mo_num + accu += dabs(cholesky_no_2_idx_transp_dgemm(i_chol,i_act,i_mo) - cholesky_no_2_idx_transp(i_chol,i_act,i_mo)) + print*,cholesky_no_2_idx_transp_dgemm(i_chol,i_act,i_mo) , cholesky_no_2_idx_transp(i_chol,i_act,i_mo) + enddo + enddo + enddo + print*,'accu =', accu +end diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 7e2c8b37..1fed949d 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -101,3 +101,34 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, END_PROVIDER + +BEGIN_PROVIDER [ double precision, cholesky_semi_mo_transp_simple, (cholesky_mo_num, ao_num, mo_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis + END_DOC + + double precision, allocatable :: X(:,:,:) + double precision :: wall0, wall1 + integer :: ierr + print *, 'Semi AO->MO Transformation of Cholesky vectors' + call wall_time(wall0) + + allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': Allocation failed' + endif + integer :: i_chol, i_mo, j_mo, i_ao + cholesky_semi_mo_transp_simple = 0.d0 + do i_mo = 1, mo_num + do i_ao = 1, ao_num + do j_mo = 1, mo_num + do i_chol = 1, cholesky_mo_num + cholesky_semi_mo_transp_simple(i_chol, i_ao,i_mo) += cholesky_mo_transp(i_chol,j_mo,i_mo) * mo_coef_transp(j_mo,i_ao) + enddo + enddo + enddo + enddo + +END_PROVIDER + diff --git a/src/mu_of_r/f_hf_cholesky.irp.f b/src/mu_of_r/f_hf_cholesky.irp.f index 5dd69eb6..179b80dd 100644 --- a/src/mu_of_r/f_hf_cholesky.irp.f +++ b/src/mu_of_r/f_hf_cholesky.irp.f @@ -289,6 +289,106 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse, (n_points_final_grid)] endif END_PROVIDER +BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse_bis, (n_points_final_grid)] + implicit none + integer :: ipoint,m,mm,i,ii,p + !!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ + !! = \sum_{I}\sum_{J}\sum_A Phi_I(R) Phi_J(R) V_AI V_AJ + !! = \sum_A \sum_{I}Phi_I(R)V_AI \sum_{J}V_AJ Phi_J(R) + !! = \sum_A V_AR G_AR + !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI + double precision :: u_dot_v,wall0,wall1,accu_1, accu_2,mo_i_r1,mo_b_r1 + double precision :: thresh_1,thresh_2 + double precision, allocatable :: accu_vec(:),delta_vec(:) + thresh_2 = ao_cholesky_threshold * 100.d0 + thresh_1 = dsqrt(thresh_2) + provide cholesky_mo_transp + if(elec_alpha_num == elec_beta_num)then + call wall_time(wall0) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE (accu_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,mos_in_r_array_omp,aos_in_r_array,thresh_1,thresh_2) & + !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse_bis,n_points_final_grid,cholesky_semi_mo_transp_simple,ao_num) + allocate(accu_vec(cholesky_mo_num)) + !$OMP DO + do ipoint = 1, n_points_final_grid + f_hf_cholesky_sparse_bis(ipoint) = 0.d0 + accu_vec = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle + do mm = 1, ao_num ! electron 1 + mo_b_r1 = aos_in_r_array(mm,ipoint)*mo_i_r1 + if(dabs(mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + accu_vec(p) = accu_vec(p) + mo_b_r1 * cholesky_semi_mo_transp_simple(p,mm,i) + enddo + enddo + enddo + do p = 1, cholesky_mo_num + f_hf_cholesky_sparse_bis(ipoint) = f_hf_cholesky_sparse_bis(ipoint) + accu_vec(p) * accu_vec(p) + enddo + f_hf_cholesky_sparse_bis(ipoint) *= 2.D0 + enddo + !$OMP END DO + deallocate(accu_vec) + !$OMP END PARALLEL + + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky_sparse_bis = ',wall1-wall0 + else + call wall_time(wall0) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE (accu_vec,delta_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp,thresh_1,thresh_2) & + !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse_bis,n_points_final_grid,cholesky_mo_transp,n_basis_orb) + allocate(accu_vec(cholesky_mo_num),delta_vec(cholesky_mo_num)) + !$OMP DO + do ipoint = 1, n_points_final_grid + f_hf_cholesky_sparse_bis(ipoint) = 0.d0 + accu_vec = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + accu_vec(p) = accu_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo + enddo + enddo + delta_vec = 0.d0 + do ii = n_occ_val_orb_for_hf(2)+1,n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + delta_vec(p) = delta_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo + enddo + enddo + do p = 1, cholesky_mo_num + f_hf_cholesky_sparse_bis(ipoint) = f_hf_cholesky_sparse_bis(ipoint) + accu_vec(p) * accu_vec(p) + accu_vec(p) * delta_vec(p) + enddo + f_hf_cholesky_sparse_bis(ipoint) *= 2.D0 + enddo + !$OMP END DO + deallocate(accu_vec) + !$OMP END PARALLEL + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky_sparse_bis = ',wall1-wall0 + endif +END_PROVIDER + + BEGIN_PROVIDER [ double precision, on_top_hf_grid, (n_points_final_grid)] implicit none integer :: ipoint,i,ii diff --git a/src/mu_of_r/test_proj_op.irp.f b/src/mu_of_r/test_proj_op.irp.f index f9aba094..fd5e976b 100644 --- a/src/mu_of_r/test_proj_op.irp.f +++ b/src/mu_of_r/test_proj_op.irp.f @@ -15,7 +15,23 @@ program projected_operators ! call test_f_HF_valence_ab ! call routine_full_mos ! call test_f_ii_valence_ab - call test_f_ia_valence_ab - call test_f_ii_ia_aa_valence_ab +! call test_f_ia_valence_ab +! call test_f_ii_ia_aa_valence_ab + call test end + +subroutine test + implicit none + integer :: i_point + double precision :: ref, new, accu, weight + accu = 0.d0 + do i_point = 1, n_points_final_grid + ref = f_hf_cholesky_sparse(i_point) + new = f_hf_cholesky_sparse_bis(i_point) + weight = final_weight_at_r_vector(i_point) + accu += dabs(ref - new) * weight + enddo + print*,'accu = ',accu + +end From 31ec3ace0540177c2476e478d763a068d73bd41b Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 11 Jul 2024 14:22:27 +0200 Subject: [PATCH 121/131] correct transformation of cholesky vectors on the NO basis --- src/casscf_cipsi/chol_bielec.irp.f | 41 +++++++++++++----------------- src/casscf_cipsi/test_chol.irp.f | 14 +++++++--- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/src/casscf_cipsi/chol_bielec.irp.f b/src/casscf_cipsi/chol_bielec.irp.f index 1fe985ad..3104fe5f 100644 --- a/src/casscf_cipsi/chol_bielec.irp.f +++ b/src/casscf_cipsi/chol_bielec.irp.f @@ -34,20 +34,21 @@ BEGIN_PROVIDER [double precision, cholesky_no_1_idx_transp, (cholesky_mo_num, n_ END_PROVIDER -BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp, (cholesky_mo_num, n_act_orb, n_act_orb)] +BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp_old, (cholesky_mo_num, n_act_orb, n_act_orb)] BEGIN_DOC ! Cholesky vectors with TWO orbital on the active natural orbital basis END_DOC implicit none - integer :: i_chol,i_act,j_act,jj_act + integer :: i_chol,i_act,j_act,jj_act,jjj_act double precision, allocatable :: chol_tmp(:,:) allocate(chol_tmp(cholesky_mo_num,n_act_orb)) - cholesky_no_2_idx_transp = 0.D0 - do j_act = 1, n_act_orb - do i_act = 1, n_act_orb - do jj_act = 1, n_act_orb + cholesky_no_2_idx_transp_old = 0.D0 + do jj_act = 1, n_act_orb + jjj_act = list_act(jj_act) + do j_act = 1, n_act_orb + do i_act = 1, n_act_orb do i_chol = 1, cholesky_mo_num - cholesky_no_2_idx_transp(i_chol, i_act, j_act) += cholesky_no_1_idx_transp(i_chol, i_act,jj_act) * natorbsCI(jj_act,i_act) + cholesky_no_2_idx_transp_old(i_chol, i_act, j_act) += cholesky_no_1_idx_transp(i_chol, i_act,jjj_act) * natorbsCI(jj_act,j_act) enddo enddo enddo @@ -56,36 +57,28 @@ BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp, (cholesky_mo_num, n_ END_PROVIDER -BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp_dgemm, (cholesky_mo_num, n_act_orb, n_act_orb)] +BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp, (cholesky_mo_num, n_act_orb, n_act_orb)] BEGIN_DOC ! Cholesky vectors with TWO orbital on the active natural orbital basis END_DOC implicit none integer :: i_chol,i_act,j_act,jj_act - double precision, allocatable :: chol_tmp(:,:) - allocate(chol_tmp(cholesky_mo_num,n_act_orb)) - cholesky_no_2_idx_transp_dgemm = 0.D0 - do j_act = 1, n_act_orb + double precision, allocatable :: chol_tmp(:,:),chol_tmp_bis(:,:) + allocate(chol_tmp(cholesky_mo_num,n_act_orb),chol_tmp_bis(cholesky_mo_num,n_act_orb)) + cholesky_no_2_idx_transp = 0.D0 + do i_act = 1, n_act_orb ! Get all the integrals corresponding to the "j_act" - do i_act = 1, n_act_orb - jj_act = list_act(i_act) + do j_act = 1, n_act_orb + jj_act = list_act(j_act) do i_chol = 1, cholesky_mo_num - chol_tmp(i_chol, i_act) = cholesky_no_1_idx_transp(i_chol, j_act, jj_act) + chol_tmp(i_chol, j_act) = cholesky_no_1_idx_transp(i_chol, i_act, jj_act) enddo enddo -! ! Do the matrix product -! do i_act = 1, n_act_orb -! do jj_act = 1, n_act_orb -! do i_chol = 1, cholesky_mo_num -! cholesky_no_1_idx_transp(i_chol, i_act, j_act) += chol_tmp(i_chol, jj_act) * natorbsCI(jj_act,i_act) -! enddo -! enddo -! enddo call dgemm('N','N',cholesky_mo_num,n_act_orb,n_act_orb,1.d0, & chol_tmp, size(chol_tmp,1), & natorbsCI, size(natorbsCI,1), & 0.d0, & - cholesky_no_2_idx_transp_dgemm(1,1,j_act), size(cholesky_no_2_idx_transp_dgemm,1)) + cholesky_no_2_idx_transp(1,1,i_act), size(cholesky_no_2_idx_transp,1)) enddo END_PROVIDER diff --git a/src/casscf_cipsi/test_chol.irp.f b/src/casscf_cipsi/test_chol.irp.f index b94851f9..8d978817 100644 --- a/src/casscf_cipsi/test_chol.irp.f +++ b/src/casscf_cipsi/test_chol.irp.f @@ -9,15 +9,21 @@ end subroutine routine implicit none integer :: i_chol, i_act, i_mo - double precision :: accu + double precision :: accu,error,exact accu = 0.d0 do i_mo = 1, n_act_orb do i_act = 1, n_act_orb do i_chol = 1, cholesky_mo_num - accu += dabs(cholesky_no_2_idx_transp_dgemm(i_chol,i_act,i_mo) - cholesky_no_2_idx_transp(i_chol,i_act,i_mo)) - print*,cholesky_no_2_idx_transp_dgemm(i_chol,i_act,i_mo) , cholesky_no_2_idx_transp(i_chol,i_act,i_mo) + error = dabs(cholesky_no_2_idx_transp(i_chol,i_act,i_mo) - cholesky_no_2_idx_transp_old(i_chol,i_act,i_mo)) + exact = dabs(cholesky_no_2_idx_transp_old(i_chol,i_act,i_mo)) + accu += error + if(exact.gt.1.d-10)then + if(error/exact.gt.1.d-7)then + write(*,'(4(E16.10,X))')cholesky_no_2_idx_transp(i_chol,i_act,i_mo) , cholesky_no_2_idx_transp_old(i_chol,i_act,i_mo),error,error/exact + endif + endif enddo enddo enddo - print*,'accu =', accu + print*,'accu =', accu/(dble(n_act_orb*n_act_orb*cholesky_mo_num)) end From 56450ed0436c4c1be26f33757ea3a4ca35238b57 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 11 Jul 2024 19:09:20 +0200 Subject: [PATCH 122/131] introduced functions mimicking the arrays --- src/casscf_cipsi/chol_bielec.irp.f | 158 ++++++++++++++++++++++++++++- src/casscf_cipsi/test_chol.irp.f | 92 ++++++++++++++--- 2 files changed, 234 insertions(+), 16 deletions(-) diff --git a/src/casscf_cipsi/chol_bielec.irp.f b/src/casscf_cipsi/chol_bielec.irp.f index 3104fe5f..94a76453 100644 --- a/src/casscf_cipsi/chol_bielec.irp.f +++ b/src/casscf_cipsi/chol_bielec.irp.f @@ -33,7 +33,6 @@ BEGIN_PROVIDER [double precision, cholesky_no_1_idx_transp, (cholesky_mo_num, n_ END_PROVIDER - BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp_old, (cholesky_mo_num, n_act_orb, n_act_orb)] BEGIN_DOC ! Cholesky vectors with TWO orbital on the active natural orbital basis @@ -83,4 +82,161 @@ BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp, (cholesky_mo_num, n_ END_PROVIDER +BEGIN_PROVIDER [ double precision, cholesky_no_total_transp, (cholesky_mo_num, mo_num, mo_num)] + implicit none + BEGIN_DOC + ! Cholesky vectors defined on all basis including the NO basis + END_DOC + integer :: i_chol, i_act, ii_act, j_act, jj_act, i_core_inact, j_core_inact, ii_core_inact, jj_core_inact + integer :: i_virt, ii_virt, j_virt, jj_virt + ! Block when two orbitals belong to the core/inact + do j_core_inact = 1, n_core_inact_orb + jj_core_inact = list_core_inact(j_core_inact) + do i_core_inact = 1, n_core_inact_orb + ii_core_inact = list_core_inact(i_core_inact) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol, ii_core_inact, jj_core_inact) = cholesky_mo_transp(i_chol,ii_core_inact,jj_core_inact) + enddo + enddo + enddo + ! Block when one orbitals belongs to the core/inact and one belongs to the active + do j_core_inact = 1, n_core_inact_orb + jj_core_inact = list_core_inact(j_core_inact) + do i_act = 1, n_act_orb + ii_act = list_act(i_act) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol,ii_act,j_core_inact) = cholesky_no_1_idx_transp(i_chol,i_act,jj_core_inact) + enddo + enddo + enddo + do j_core_inact = 1, n_core_inact_orb + jj_core_inact = list_core_inact(j_core_inact) + do i_act = 1, n_act_orb + ii_act = list_act(i_act) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol,j_core_inact,ii_act) = cholesky_no_1_idx_transp(i_chol,i_act,jj_core_inact) + enddo + enddo + enddo + + ! Block when two orbitals belong to the active + do j_act = 1, n_act_orb + jj_act = list_act(j_act) + do i_act = 1, n_act_orb + ii_act = list_act(i_act) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol,ii_act,jj_act) = cholesky_no_2_idx_transp(i_chol,i_act,j_act) + enddo + enddo + enddo + + ! Block when two orbitals belong to the virtuals + do i_virt = 1, n_virt_orb + ii_virt = list_virt(i_virt) + do j_virt = 1, n_virt_orb + jj_virt = list_virt(j_virt) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol,jj_virt,ii_virt) = cholesky_mo_transp(i_chol,jj_virt,ii_virt) + enddo + enddo + enddo + + ! Block when one orbital is in active and the other in the virtuals + do i_virt = 1, n_virt_orb + ii_virt = list_virt(i_virt) + do i_act = 1, n_act_orb + ii_act = list_act(i_act) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol,ii_act,ii_virt) = cholesky_no_1_idx_transp(i_chol, i_act,ii_virt) + enddo + enddo + enddo + do i_virt = 1, n_virt_orb + ii_virt = list_virt(i_virt) + do i_act = 1, n_act_orb + ii_act = list_act(i_act) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol,ii_virt,ii_act) = cholesky_no_1_idx_transp(i_chol, i_act,ii_virt) + enddo + enddo + enddo + ! Block when one orbital is in the virtual and one in the core-inact + do i_virt = 1, n_virt_orb + ii_virt = list_virt(i_virt) + do i_core_inact = 1, n_core_inact_orb + ii_core_inact = list_core_inact(i_core_inact) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol, ii_core_inact, ii_virt) = cholesky_mo_transp(i_chol, ii_core_inact, ii_virt) + enddo + enddo + enddo + do i_core_inact = 1, n_core_inact_orb + ii_core_inact = list_core_inact(i_core_inact) + do i_virt = 1, n_virt_orb + ii_virt = list_virt(i_virt) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol, ii_virt, ii_core_inact) = cholesky_mo_transp(i_chol, ii_virt, ii_core_inact) + enddo + enddo + enddo +END_PROVIDER + + +double precision function bielec_no_basis_chol(i_1,j_1,i_2,j_2) + implicit none + integer, intent(in) :: i_1,j_1,i_2,j_2 + BEGIN_DOC + ! integral (i_1 j_1|i_2 j_2) in the mixed basis of both MOs and natural MOs + ! + END_DOC + integer :: i_chol + bielec_no_basis_chol = 0.d0 + do i_chol = 1, cholesky_mo_num + bielec_no_basis_chol += cholesky_no_total_transp(i_chol,i_1, j_1) * cholesky_no_total_transp(i_chol,i_2,j_2) + enddo +end + +double precision function bielec_PQxx_no_chol(i_mo, j_mo, i_ca, j_ca) + implicit none + BEGIN_DOC + ! function that computes (i_mo j_mo| i_ca j_ca) with Cholesky decomposition + ! + ! indices are unshifted orbital numbers + END_DOC + integer, intent(in) :: i_ca, j_ca, i_mo, j_mo + integer :: ii_ca, jj_ca + double precision :: bielec_no_basis_chol + ii_ca = list_core_inact_act(i_ca) + jj_ca = list_core_inact_act(j_ca) + bielec_PQxx_no_chol = bielec_no_basis_chol(i_mo,j_mo,ii_ca,jj_ca) + +end + +double precision function bielec_PxxQ_no_chol(i_mo, j_ca, i_ca, j_mo) + implicit none + BEGIN_DOC + ! function that computes (i_mo j_ca |i_ca j_mo) with Cholesky decomposition + ! + ! indices are unshifted orbital numbers + END_DOC + integer, intent(in) :: i_ca, j_ca, i_mo, j_mo + integer :: ii_ca, jj_ca + double precision :: bielec_no_basis_chol + ii_ca = list_core_inact_act(i_ca) + jj_ca = list_core_inact_act(j_ca) + bielec_PxxQ_no_chol = bielec_no_basis_chol(i_mo, jj_ca, ii_ca, j_mo) + +end + +double precision function bielecCI_no_chol(i_ca, j_ca, k_ca, i_mo) + implicit none + integer, intent(in) :: i_ca, j_ca, k_ca, i_mo + integer :: ii_ca, jj_ca, kk_ca + double precision :: bielec_no_basis_chol + ii_ca = list_act(i_ca) + jj_ca = list_act(j_ca) + kk_ca = list_act(k_ca) + bielecCI_no_chol = bielec_no_basis_chol(ii_ca, jj_ca, kk_ca, i_mo) + +end diff --git a/src/casscf_cipsi/test_chol.irp.f b/src/casscf_cipsi/test_chol.irp.f index 8d978817..87c5c352 100644 --- a/src/casscf_cipsi/test_chol.irp.f +++ b/src/casscf_cipsi/test_chol.irp.f @@ -2,28 +2,90 @@ program test_chol implicit none read_wf= .True. touch read_wf - call routine +! call routine_bielec_PxxQ_no + call routine_bielecCI_no end -subroutine routine +subroutine routine_bielec_PQxx_no implicit none - integer :: i_chol, i_act, i_mo - double precision :: accu,error,exact + integer :: i_chol, i_act, ii_act, j_act, jj_act, i_core_inact, j_core_inact, ii_core_inact, jj_core_inact + integer :: i_virt, ii_virt, j_virt, jj_virt, i_mo, j_mo + double precision :: exact, new, error, accu, bielec_no_basis_chol + double precision :: bielec_PQxx_no_chol + accu = 0.d0 - do i_mo = 1, n_act_orb - do i_act = 1, n_act_orb - do i_chol = 1, cholesky_mo_num - error = dabs(cholesky_no_2_idx_transp(i_chol,i_act,i_mo) - cholesky_no_2_idx_transp_old(i_chol,i_act,i_mo)) - exact = dabs(cholesky_no_2_idx_transp_old(i_chol,i_act,i_mo)) - accu += error - if(exact.gt.1.d-10)then - if(error/exact.gt.1.d-7)then - write(*,'(4(E16.10,X))')cholesky_no_2_idx_transp(i_chol,i_act,i_mo) , cholesky_no_2_idx_transp_old(i_chol,i_act,i_mo),error,error/exact + do i_core_inact = 1, n_core_inact_act_orb + ii_core_inact = list_core_inact_act(i_core_inact) + do j_core_inact = 1, n_core_inact_act_orb + jj_core_inact = list_core_inact_act(j_core_inact) + do i_mo = 1, mo_num + do j_mo = 1, mo_num + exact = bielec_PQxx_no(j_mo,i_mo, j_core_inact, i_core_inact) +! new = bielec_no_basis_chol(j_mo,i_mo, jj_core_inact, ii_core_inact) + new = bielec_PQxx_no_chol(j_mo,i_mo, j_core_inact, i_core_inact) + error = dabs(exact-new) + if(dabs(exact).gt.1.d-10)then + print*,exact,new,error endif - endif + accu += error + enddo enddo enddo enddo - print*,'accu =', accu/(dble(n_act_orb*n_act_orb*cholesky_mo_num)) + print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2)) +end + +subroutine routine_bielec_PxxQ_no + implicit none + integer :: i_chol, i_act, ii_act, j_act, jj_act, i_core_inact, j_core_inact, ii_core_inact, jj_core_inact + integer :: i_virt, ii_virt, j_virt, jj_virt, i_mo, j_mo + double precision :: exact, new, error, accu, bielec_no_basis_chol + double precision :: bielec_PxxQ_no_chol + + accu = 0.d0 + do i_mo = 1, mo_num + do i_core_inact = 1, n_core_inact_act_orb + ii_core_inact = list_core_inact_act(i_core_inact) + do j_core_inact = 1, n_core_inact_act_orb + jj_core_inact = list_core_inact_act(j_core_inact) + do j_mo = 1, mo_num + exact = bielec_PxxQ_no(j_mo, j_core_inact, i_core_inact,i_mo) +! new = bielec_no_basis_chol(j_mo,i_mo, jj_core_inact, ii_core_inact) + new = bielec_PxxQ_no_chol(j_mo, j_core_inact, i_core_inact,i_mo) + error = dabs(exact-new) + accu += error + if(dabs(exact).gt.1.d-10)then + print*,exact,new,error + endif + enddo + enddo + enddo + enddo + print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2)) +end + +subroutine routine_bielecCI_no + implicit none + integer :: i_ca, j_ca, k_ca, i_mo + double precision :: exact, new, error, accu, bielec_no_basis_chol + double precision :: bielecCI_no_chol + + accu = 0.d0 + do i_mo = 1, mo_num + do i_ca = 1, n_act_orb + do j_ca = 1, n_act_orb + do k_ca = 1, n_act_orb + exact =bielecCI_no(k_ca, j_ca, i_ca, i_mo) + new = bielecCI_no_chol(k_ca, j_ca, i_ca, i_mo) + error = dabs(exact-new) + accu += error + if(dabs(exact).gt.1.d-10)then + print*,exact,new,error + endif + enddo + enddo + enddo + enddo + print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2)) end From 505d10084c8331c36ea6a2244bdc47ea8fb33a81 Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 12 Jul 2024 16:19:53 +0200 Subject: [PATCH 123/131] Choleskization of the CASSCF --- src/casscf_cipsi/bielec.irp.f | 61 +++++++++----- src/casscf_cipsi/bielec_natorb.irp.f | 68 ++++++++++----- src/casscf_cipsi/casscf.irp.f | 2 +- src/casscf_cipsi/chol_bielec.irp.f | 118 ++++++++++++++------------- src/casscf_cipsi/chol_garb.irp.f | 34 ++++++++ src/casscf_cipsi/gradient.irp.f | 1 + src/casscf_cipsi/hessian.irp.f | 6 ++ src/casscf_cipsi/mcscf_fock.irp.f | 2 + src/casscf_cipsi/test_chol.irp.f | 63 +++++++++----- src/casscf_cipsi/tot_en.irp.f | 1 + 10 files changed, 238 insertions(+), 118 deletions(-) create mode 100644 src/casscf_cipsi/chol_garb.irp.f diff --git a/src/casscf_cipsi/bielec.irp.f b/src/casscf_cipsi/bielec.irp.f index 0a44f994..a4901985 100644 --- a/src/casscf_cipsi/bielec.irp.f +++ b/src/casscf_cipsi/bielec.irp.f @@ -1,18 +1,25 @@ -BEGIN_PROVIDER [real*8, bielec_PQxx, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] +BEGIN_PROVIDER [real*8, bielec_PQxx_array, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] BEGIN_DOC - ! bielec_PQxx : integral (pq|xx) with p,q arbitrary, x core or active + ! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!! + ! + ! Replaced by the Cholesky-based function bielec_PQxx + ! + ! bielec_PQxx_array : integral (pq|xx) with p,q arbitrary, x core or active ! indices are unshifted orbital numbers END_DOC implicit none integer :: i,j,ii,jj,p,q,i3,j3,t3,v3 real*8 :: mo_two_e_integral + print*,'' + print*,'Providing bielec_PQxx_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!' + print*,'' - bielec_PQxx(:,:,:,:) = 0.d0 + bielec_PQxx_array(:,:,:,:) = 0.d0 PROVIDE mo_two_e_integrals_in_map !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,ii,j,jj,i3,j3) & - !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PQxx, & + !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PQxx_array, & !$OMP n_act_orb,mo_integrals_map,list_act) !$OMP DO @@ -20,14 +27,14 @@ BEGIN_PROVIDER [real*8, bielec_PQxx, (mo_num, mo_num,n_core_inact_act_orb,n_core ii=list_core_inact(i) do j=i,n_core_inact_orb jj=list_core_inact(j) - call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j),mo_integrals_map) - bielec_PQxx(:,:,j,i)=bielec_PQxx(:,:,i,j) + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx_array(1,1,i,j),mo_integrals_map) + bielec_PQxx_array(:,:,j,i)=bielec_PQxx_array(:,:,i,j) end do do j=1,n_act_orb jj=list_act(j) j3=j+n_core_inact_orb - call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j3),mo_integrals_map) - bielec_PQxx(:,:,j3,i)=bielec_PQxx(:,:,i,j3) + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx_array(1,1,i,j3),mo_integrals_map) + bielec_PQxx_array(:,:,j3,i)=bielec_PQxx_array(:,:,i,j3) end do end do !$OMP END DO @@ -40,8 +47,8 @@ BEGIN_PROVIDER [real*8, bielec_PQxx, (mo_num, mo_num,n_core_inact_act_orb,n_core do j=i,n_act_orb jj=list_act(j) j3=j+n_core_inact_orb - call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i3,j3),mo_integrals_map) - bielec_PQxx(:,:,j3,i3)=bielec_PQxx(:,:,i3,j3) + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx_array(1,1,i3,j3),mo_integrals_map) + bielec_PQxx_array(:,:,j3,i3)=bielec_PQxx_array(:,:,i3,j3) end do end do !$OMP END DO @@ -52,9 +59,13 @@ END_PROVIDER -BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] +BEGIN_PROVIDER [real*8, bielec_PxxQ_array, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] BEGIN_DOC - ! bielec_PxxQ : integral (px|xq) with p,q arbitrary, x core or active + ! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!! + ! + ! Replaced by the Cholesky-based function bielec_PxxQ + ! + ! bielec_PxxQ_array : integral (px|xq) with p,q arbitrary, x core or active ! indices are unshifted orbital numbers END_DOC implicit none @@ -62,12 +73,15 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_a double precision, allocatable :: integrals_array(:,:) real*8 :: mo_two_e_integral + print*,'' + print*,'Providing bielec_PxxQ_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!' + print*,'' PROVIDE mo_two_e_integrals_in_map - bielec_PxxQ = 0.d0 + bielec_PxxQ_array = 0.d0 !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,ii,j,jj,i3,j3,integrals_array) & - !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PxxQ, & + !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PxxQ_array, & !$OMP n_act_orb,mo_integrals_map,list_act) allocate(integrals_array(mo_num,mo_num)) @@ -80,8 +94,8 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_a call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) do q=1,mo_num do p=1,mo_num - bielec_PxxQ(p,i,j,q)=integrals_array(p,q) - bielec_PxxQ(p,j,i,q)=integrals_array(q,p) + bielec_PxxQ_array(p,i,j,q)=integrals_array(p,q) + bielec_PxxQ_array(p,j,i,q)=integrals_array(q,p) end do end do end do @@ -91,8 +105,8 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_a call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) do q=1,mo_num do p=1,mo_num - bielec_PxxQ(p,i,j3,q)=integrals_array(p,q) - bielec_PxxQ(p,j3,i,q)=integrals_array(q,p) + bielec_PxxQ_array(p,i,j3,q)=integrals_array(p,q) + bielec_PxxQ_array(p,j3,i,q)=integrals_array(q,p) end do end do end do @@ -111,8 +125,8 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_a call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) do q=1,mo_num do p=1,mo_num - bielec_PxxQ(p,i3,j3,q)=integrals_array(p,q) - bielec_PxxQ(p,j3,i3,q)=integrals_array(q,p) + bielec_PxxQ_array(p,i3,j3,q)=integrals_array(p,q) + bielec_PxxQ_array(p,j3,i3,q)=integrals_array(q,p) end do end do end do @@ -129,10 +143,15 @@ BEGIN_PROVIDER [real*8, bielecCI, (n_act_orb,n_act_orb,n_act_orb, mo_num)] BEGIN_DOC ! bielecCI : integrals (tu|vp) with p arbitrary, tuv active ! index p runs over the whole basis, t,u,v only over the active orbitals + ! + ! This array can be stored anyway. Ex: 50 active orbitals, 1500 MOs ==> 8x50^3x1500 = 1.5 Gb END_DOC implicit none integer :: i,j,k,p,t,u,v double precision, external :: mo_two_e_integral + double precision :: wall0, wall1 + call wall_time(wall0) + print*,'Providing bielecCI' PROVIDE mo_two_e_integrals_in_map !$OMP PARALLEL DO DEFAULT(NONE) & @@ -151,5 +170,7 @@ BEGIN_PROVIDER [real*8, bielecCI, (n_act_orb,n_act_orb,n_act_orb, mo_num)] end do end do !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide bielecCI = ',wall1 - wall0 END_PROVIDER diff --git a/src/casscf_cipsi/bielec_natorb.irp.f b/src/casscf_cipsi/bielec_natorb.irp.f index 9968530c..99734a0b 100644 --- a/src/casscf_cipsi/bielec_natorb.irp.f +++ b/src/casscf_cipsi/bielec_natorb.irp.f @@ -1,30 +1,38 @@ - BEGIN_PROVIDER [real*8, bielec_PQxx_no, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] + BEGIN_PROVIDER [real*8, bielec_PQxx_no_array, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] BEGIN_DOC + ! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!! + ! + ! Replaced by the Cholesky-based function bielec_PQxx_no + ! ! integral (pq|xx) in the basis of natural MOs ! indices are unshifted orbital numbers + ! END_DOC implicit none integer :: i,j,k,l,t,u,p,q double precision, allocatable :: f(:,:,:), d(:,:,:) + print*,'' + print*,'Providing bielec_PQxx_no_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!' + print*,'' !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(j,k,l,p,d,f) & !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & - !$OMP bielec_PQxx_no,bielec_PQxx,list_act,natorbsCI) + !$OMP bielec_PQxx_no_array,bielec_PQxx_array,list_act,natorbsCI) allocate (f(n_act_orb,mo_num,n_core_inact_act_orb), & d(n_act_orb,mo_num,n_core_inact_act_orb)) !$OMP DO do l=1,n_core_inact_act_orb - bielec_PQxx_no(:,:,:,l) = bielec_PQxx(:,:,:,l) + bielec_PQxx_no_array(:,:,:,l) = bielec_PQxx_array(:,:,:,l) do k=1,n_core_inact_act_orb do j=1,mo_num do p=1,n_act_orb - f(p,j,k)=bielec_PQxx_no(list_act(p),j,k,l) + f(p,j,k)=bielec_PQxx_no_array(list_act(p),j,k,l) end do end do end do @@ -36,13 +44,13 @@ do k=1,n_core_inact_act_orb do j=1,mo_num do p=1,n_act_orb - bielec_PQxx_no(list_act(p),j,k,l)=d(p,j,k) + bielec_PQxx_no_array(list_act(p),j,k,l)=d(p,j,k) end do end do do j=1,mo_num do p=1,n_act_orb - f(p,j,k)=bielec_PQxx_no(j,list_act(p),k,l) + f(p,j,k)=bielec_PQxx_no_array(j,list_act(p),k,l) end do end do end do @@ -54,7 +62,7 @@ do k=1,n_core_inact_act_orb do p=1,n_act_orb do j=1,mo_num - bielec_PQxx_no(j,list_act(p),k,l)=d(p,j,k) + bielec_PQxx_no_array(j,list_act(p),k,l)=d(p,j,k) end do end do end do @@ -71,7 +79,7 @@ do p=1,n_act_orb do k=1,mo_num do j=1,mo_num - f(j,k,p) = bielec_PQxx_no(j,k,n_core_inact_orb+p,l) + f(j,k,p) = bielec_PQxx_no_array(j,k,n_core_inact_orb+p,l) end do end do end do @@ -83,7 +91,7 @@ do p=1,n_act_orb do k=1,mo_num do j=1,mo_num - bielec_PQxx_no(j,k,n_core_inact_orb+p,l)=d(j,k,p) + bielec_PQxx_no_array(j,k,n_core_inact_orb+p,l)=d(j,k,p) end do end do end do @@ -97,7 +105,7 @@ do p=1,n_act_orb do k=1,mo_num do j=1,mo_num - f(j,k,p) = bielec_PQxx_no(j,k,l,n_core_inact_orb+p) + f(j,k,p) = bielec_PQxx_no_array(j,k,l,n_core_inact_orb+p) end do end do end do @@ -109,7 +117,7 @@ do p=1,n_act_orb do k=1,mo_num do j=1,mo_num - bielec_PQxx_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) + bielec_PQxx_no_array(j,k,l,n_core_inact_orb+p)=d(j,k,p) end do end do end do @@ -123,8 +131,12 @@ END_PROVIDER -BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] +BEGIN_PROVIDER [real*8, bielec_PxxQ_no_array, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] BEGIN_DOC + ! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!! + ! + ! Replaced by the Cholesky-based function bielec_PxxQ_no + ! ! integral (px|xq) in the basis of natural MOs ! indices are unshifted orbital numbers END_DOC @@ -132,10 +144,14 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac integer :: i,j,k,l,t,u,p,q double precision, allocatable :: f(:,:,:), d(:,:,:) + print*,'' + print*,'Providing bielec_PxxQ_no_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!' + print*,'' + !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(j,k,l,p,d,f) & !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & - !$OMP bielec_PxxQ_no,bielec_PxxQ,list_act,natorbsCI) + !$OMP bielec_PxxQ_no_array,bielec_PxxQ_array,list_act,natorbsCI) allocate (f(n_act_orb,n_core_inact_act_orb,n_core_inact_act_orb), & @@ -143,11 +159,11 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac !$OMP DO do j=1,mo_num - bielec_PxxQ_no(:,:,:,j) = bielec_PxxQ(:,:,:,j) + bielec_PxxQ_no_array(:,:,:,j) = bielec_PxxQ_array(:,:,:,j) do l=1,n_core_inact_act_orb do k=1,n_core_inact_act_orb do p=1,n_act_orb - f(p,k,l) = bielec_PxxQ_no(list_act(p),k,l,j) + f(p,k,l) = bielec_PxxQ_no_array(list_act(p),k,l,j) end do end do end do @@ -159,7 +175,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac do l=1,n_core_inact_act_orb do k=1,n_core_inact_act_orb do p=1,n_act_orb - bielec_PxxQ_no(list_act(p),k,l,j)=d(p,k,l) + bielec_PxxQ_no_array(list_act(p),k,l,j)=d(p,k,l) end do end do end do @@ -176,7 +192,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac do l=1,n_core_inact_act_orb do j=1,mo_num do p=1,n_act_orb - f(p,j,l) = bielec_PxxQ_no(j,n_core_inact_orb+p,l,k) + f(p,j,l) = bielec_PxxQ_no_array(j,n_core_inact_orb+p,l,k) end do end do end do @@ -188,7 +204,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac do l=1,n_core_inact_act_orb do j=1,mo_num do p=1,n_act_orb - bielec_PxxQ_no(j,n_core_inact_orb+p,l,k)=d(p,j,l) + bielec_PxxQ_no_array(j,n_core_inact_orb+p,l,k)=d(p,j,l) end do end do end do @@ -205,7 +221,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac do p=1,n_act_orb do l=1,n_core_inact_act_orb do j=1,mo_num - f(j,l,p) = bielec_PxxQ_no(j,l,n_core_inact_orb+p,k) + f(j,l,p) = bielec_PxxQ_no_array(j,l,n_core_inact_orb+p,k) end do end do end do @@ -217,7 +233,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac do p=1,n_act_orb do l=1,n_core_inact_act_orb do j=1,mo_num - bielec_PxxQ_no(j,l,n_core_inact_orb+p,k)=d(j,l,p) + bielec_PxxQ_no_array(j,l,n_core_inact_orb+p,k)=d(j,l,p) end do end do end do @@ -231,7 +247,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac do p=1,n_act_orb do k=1,n_core_inact_act_orb do j=1,mo_num - f(j,k,p) = bielec_PxxQ_no(j,k,l,n_core_inact_orb+p) + f(j,k,p) = bielec_PxxQ_no_array(j,k,l,n_core_inact_orb+p) end do end do end do @@ -243,7 +259,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac do p=1,n_act_orb do k=1,n_core_inact_act_orb do j=1,mo_num - bielec_PxxQ_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) + bielec_PxxQ_no_array(j,k,l,n_core_inact_orb+p)=d(j,k,p) end do end do end do @@ -259,10 +275,16 @@ BEGIN_PROVIDER [real*8, bielecCI_no, (n_act_orb,n_act_orb,n_act_orb, mo_num)] BEGIN_DOC ! integrals (tu|vp) in the basis of natural MOs ! index p runs over the whole basis, t,u,v only over the active orbitals + ! + ! This array can be stored anyway. Ex: 50 active orbitals, 1500 MOs ==> 8x50^3x1500 = 1.5 Gb END_DOC implicit none integer :: i,j,k,l,t,u,p,q double precision, allocatable :: f(:,:,:), d(:,:,:) + + double precision :: wall0, wall1 + call wall_time(wall0) + print*,'Providing bielecCI_no' !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(j,k,l,p,d,f) & @@ -363,6 +385,8 @@ BEGIN_PROVIDER [real*8, bielecCI_no, (n_act_orb,n_act_orb,n_act_orb, mo_num)] deallocate(d,f) !$OMP END PARALLEL + call wall_time(wall1) + print*,'Time to provide bielecCI_no = ',wall1-wall0 END_PROVIDER diff --git a/src/casscf_cipsi/casscf.irp.f b/src/casscf_cipsi/casscf.irp.f index d0a26d36..dc3e2245 100644 --- a/src/casscf_cipsi/casscf.irp.f +++ b/src/casscf_cipsi/casscf.irp.f @@ -11,7 +11,7 @@ program casscf if(small_active_space)then pt2_relative_error = 0.00001 else - thresh_scf = 1.d-4 + thresh_scf = max(1.d-4,thresh_scf) pt2_relative_error = 0.04 endif touch pt2_relative_error diff --git a/src/casscf_cipsi/chol_bielec.irp.f b/src/casscf_cipsi/chol_bielec.irp.f index 94a76453..f69832c1 100644 --- a/src/casscf_cipsi/chol_bielec.irp.f +++ b/src/casscf_cipsi/chol_bielec.irp.f @@ -6,6 +6,9 @@ BEGIN_PROVIDER [double precision, cholesky_no_1_idx_transp, (cholesky_mo_num, n_ implicit none integer :: i_chol,i_act,i_mo,jj_act double precision, allocatable :: chol_tmp(:,:) + double precision :: wall0,wall1 + call wall_time(wall0) + print*,'Providing cholesky_no_1_idx_transp' allocate(chol_tmp(cholesky_mo_num,n_act_orb)) cholesky_no_1_idx_transp = 0.D0 do i_mo = 1, mo_num @@ -16,46 +19,17 @@ BEGIN_PROVIDER [double precision, cholesky_no_1_idx_transp, (cholesky_mo_num, n_ chol_tmp(i_chol, i_act) = cholesky_mo_transp(i_chol, jj_act, i_mo) enddo enddo -! ! Do the matrix product -! do i_act = 1, n_act_orb -! do jj_act = 1, n_act_orb -! do i_chol = 1, cholesky_mo_num -! cholesky_no_1_idx_transp(i_chol, i_act, i_mo) += chol_tmp(i_chol, jj_act) * natorbsCI(jj_act,i_act) -! enddo -! enddo -! enddo call dgemm('N','N',cholesky_mo_num,n_act_orb,n_act_orb,1.d0, & chol_tmp, size(chol_tmp,1), & natorbsCI, size(natorbsCI,1), & 0.d0, & cholesky_no_1_idx_transp(1,1,i_mo), size(cholesky_no_1_idx_transp,1)) enddo + call wall_time(wall1) + print*,'Time to provide cholesky_no_1_idx_transp = ', wall1 - wall0 END_PROVIDER -BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp_old, (cholesky_mo_num, n_act_orb, n_act_orb)] - BEGIN_DOC - ! Cholesky vectors with TWO orbital on the active natural orbital basis - END_DOC - implicit none - integer :: i_chol,i_act,j_act,jj_act,jjj_act - double precision, allocatable :: chol_tmp(:,:) - allocate(chol_tmp(cholesky_mo_num,n_act_orb)) - cholesky_no_2_idx_transp_old = 0.D0 - do jj_act = 1, n_act_orb - jjj_act = list_act(jj_act) - do j_act = 1, n_act_orb - do i_act = 1, n_act_orb - do i_chol = 1, cholesky_mo_num - cholesky_no_2_idx_transp_old(i_chol, i_act, j_act) += cholesky_no_1_idx_transp(i_chol, i_act,jjj_act) * natorbsCI(jj_act,j_act) - enddo - enddo - enddo - enddo - -END_PROVIDER - - BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp, (cholesky_mo_num, n_act_orb, n_act_orb)] BEGIN_DOC ! Cholesky vectors with TWO orbital on the active natural orbital basis @@ -64,6 +38,9 @@ BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp, (cholesky_mo_num, n_ integer :: i_chol,i_act,j_act,jj_act double precision, allocatable :: chol_tmp(:,:),chol_tmp_bis(:,:) allocate(chol_tmp(cholesky_mo_num,n_act_orb),chol_tmp_bis(cholesky_mo_num,n_act_orb)) + double precision :: wall0,wall1 + call wall_time(wall0) + print*,'Providing cholesky_no_2_idx_transp' cholesky_no_2_idx_transp = 0.D0 do i_act = 1, n_act_orb ! Get all the integrals corresponding to the "j_act" @@ -79,6 +56,8 @@ BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp, (cholesky_mo_num, n_ 0.d0, & cholesky_no_2_idx_transp(1,1,i_act), size(cholesky_no_2_idx_transp,1)) enddo + call wall_time(wall1) + print*,'Time to provide cholesky_no_2_idx_transp = ', wall1 - wall0 END_PROVIDER @@ -89,6 +68,9 @@ BEGIN_PROVIDER [ double precision, cholesky_no_total_transp, (cholesky_mo_num, m END_DOC integer :: i_chol, i_act, ii_act, j_act, jj_act, i_core_inact, j_core_inact, ii_core_inact, jj_core_inact integer :: i_virt, ii_virt, j_virt, jj_virt + double precision :: wall0,wall1 + call wall_time(wall0) + print*,'Providing cholesky_no_total_transp ' ! Block when two orbitals belong to the core/inact do j_core_inact = 1, n_core_inact_orb jj_core_inact = list_core_inact(j_core_inact) @@ -180,63 +162,87 @@ BEGIN_PROVIDER [ double precision, cholesky_no_total_transp, (cholesky_mo_num, m enddo enddo enddo + + call wall_time(wall1) + print*,'Time to provide cholesky_no_total_transp = ', wall1 - wall0 END_PROVIDER -double precision function bielec_no_basis_chol(i_1,j_1,i_2,j_2) +double precision function bielec_no_basis(i_1,j_1,i_2,j_2) implicit none integer, intent(in) :: i_1,j_1,i_2,j_2 BEGIN_DOC ! integral (i_1 j_1|i_2 j_2) in the mixed basis of both MOs and natural MOs ! END_DOC - integer :: i_chol - bielec_no_basis_chol = 0.d0 - do i_chol = 1, cholesky_mo_num - bielec_no_basis_chol += cholesky_no_total_transp(i_chol,i_1, j_1) * cholesky_no_total_transp(i_chol,i_2,j_2) + integer :: i + bielec_no_basis = 0.d0 + do i = 1, cholesky_mo_num + bielec_no_basis += cholesky_no_total_transp(i,i_1, j_1) * cholesky_no_total_transp(i,i_2,j_2) enddo end -double precision function bielec_PQxx_no_chol(i_mo, j_mo, i_ca, j_ca) +double precision function bielec_PQxx_no(i_mo, j_mo, i_ca, j_ca) implicit none BEGIN_DOC - ! function that computes (i_mo j_mo| i_ca j_ca) with Cholesky decomposition + ! function that computes (i_mo j_mo| i_ca j_ca) with Cholesky decomposition on the NO basis for active orbitals ! - ! indices are unshifted orbital numbers + ! where i_ca, j_ca are in [1:n_core_inact_act_orb] END_DOC integer, intent(in) :: i_ca, j_ca, i_mo, j_mo integer :: ii_ca, jj_ca - double precision :: bielec_no_basis_chol + double precision :: bielec_no_basis ii_ca = list_core_inact_act(i_ca) jj_ca = list_core_inact_act(j_ca) - bielec_PQxx_no_chol = bielec_no_basis_chol(i_mo,j_mo,ii_ca,jj_ca) - + bielec_PQxx_no = bielec_no_basis(i_mo,j_mo,ii_ca,jj_ca) end -double precision function bielec_PxxQ_no_chol(i_mo, j_ca, i_ca, j_mo) +double precision function bielec_PxxQ_no(i_mo, j_ca, i_ca, j_mo) implicit none BEGIN_DOC - ! function that computes (i_mo j_ca |i_ca j_mo) with Cholesky decomposition + ! function that computes (i_mo j_ca |i_ca j_mo) with Cholesky decomposition on the NO basis for active orbitals ! - ! indices are unshifted orbital numbers + ! where i_ca, j_ca are in [1:n_core_inact_act_orb] END_DOC integer, intent(in) :: i_ca, j_ca, i_mo, j_mo integer :: ii_ca, jj_ca - double precision :: bielec_no_basis_chol + double precision :: bielec_no_basis ii_ca = list_core_inact_act(i_ca) jj_ca = list_core_inact_act(j_ca) - bielec_PxxQ_no_chol = bielec_no_basis_chol(i_mo, jj_ca, ii_ca, j_mo) + bielec_PxxQ_no = bielec_no_basis(i_mo, jj_ca, ii_ca, j_mo) end -double precision function bielecCI_no_chol(i_ca, j_ca, k_ca, i_mo) + +double precision function bielec_PQxx(i_mo, j_mo, i_ca, j_ca) + BEGIN_DOC + ! function that computes (i_mo j_mo |i_ca j_ca) with Cholesky decomposition + ! + ! indices are unshifted orbital numbers + ! + ! where i_ca, j_ca are in [1:n_core_inact_act_orb] + END_DOC implicit none - integer, intent(in) :: i_ca, j_ca, k_ca, i_mo - integer :: ii_ca, jj_ca, kk_ca - double precision :: bielec_no_basis_chol - ii_ca = list_act(i_ca) - jj_ca = list_act(j_ca) - kk_ca = list_act(k_ca) - bielecCI_no_chol = bielec_no_basis_chol(ii_ca, jj_ca, kk_ca, i_mo) - + integer, intent(in) :: i_ca, j_ca, j_mo, i_mo + double precision :: mo_two_e_integral + integer :: ii_ca, jj_ca + ii_ca = list_core_inact_act(i_ca) + jj_ca = list_core_inact_act(j_ca) + bielec_PQxx = mo_two_e_integral(i_mo,ii_ca,j_mo,jj_ca) end + +double precision function bielec_PxxQ(i_mo, i_ca, j_ca, j_mo) + BEGIN_DOC + ! function that computes (i_mo j_mo |i_ca j_ca) with Cholesky decomposition + ! + ! where i_ca, j_ca are in [1:n_core_inact_act_orb] + END_DOC + implicit none + integer, intent(in) :: i_ca, j_ca, j_mo, i_mo + double precision :: mo_two_e_integral + integer :: ii_ca, jj_ca + ii_ca = list_core_inact_act(i_ca) + jj_ca = list_core_inact_act(j_ca) + bielec_PxxQ = mo_two_e_integral(i_mo,jj_ca,ii_ca,j_mo) +end + diff --git a/src/casscf_cipsi/chol_garb.irp.f b/src/casscf_cipsi/chol_garb.irp.f new file mode 100644 index 00000000..c4a8fa59 --- /dev/null +++ b/src/casscf_cipsi/chol_garb.irp.f @@ -0,0 +1,34 @@ + +!!!!! FUNCTIONS THAT WORK BUT WHICH ARE USELESS AS THE ARRAYS CAN ALWAYS BE STORED +!double precision function bielecCI_chol(i_a, j_a, k_a, i_mo) +! BEGIN_DOC +! ! function that computes (i_a j_a |k_a j_mo) with Cholesky decomposition +! ! +! ! where i_a, j_a, k_a are in [1:n_act_orb] !!! ONLY ON ACTIVE +! END_DOC +! implicit none +! integer, intent(in) :: i_a, j_a, k_a, i_mo +! integer :: ii_a, jj_a, kk_a +! double precision :: mo_two_e_integral +! ii_a = list_act(i_a) +! jj_a = list_act(j_a) +! kk_a = list_act(k_a) +! bielecCI_chol = mo_two_e_integral(ii_a,kk_a,jj_a,i_mo) +!end + +!double precision function bielecCI_no_chol(i_ca, j_ca, k_ca, i_mo) +! BEGIN_DOC +! ! function that computes (i_ca j_ca |k_ca j_mo) with Cholesky decomposition on the NO basis for active orbitals +! ! +! ! where i_ca, j_ca, k_ca are in [1:n_core_inact_act_orb] +! END_DOC +! implicit none +! integer, intent(in) :: i_ca, j_ca, k_ca, i_mo +! integer :: ii_ca, jj_ca, kk_ca +! double precision :: bielec_no_basis_chol +! ii_ca = list_act(i_ca) +! jj_ca = list_act(j_ca) +! kk_ca = list_act(k_ca) +! bielecCI_no_chol = bielec_no_basis_chol(ii_ca, jj_ca, kk_ca, i_mo) +! +!end diff --git a/src/casscf_cipsi/gradient.irp.f b/src/casscf_cipsi/gradient.irp.f index a1c5e947..961d260d 100644 --- a/src/casscf_cipsi/gradient.irp.f +++ b/src/casscf_cipsi/gradient.irp.f @@ -157,6 +157,7 @@ real*8 function gradvec_it(i,t) integer :: ii,tt,v,vv,x,y integer :: x3,y3 + double precision :: bielec_PQxx_no ii=list_core_inact(i) tt=list_act(t) diff --git a/src/casscf_cipsi/hessian.irp.f b/src/casscf_cipsi/hessian.irp.f index 458c6aa6..9a7a9031 100644 --- a/src/casscf_cipsi/hessian.irp.f +++ b/src/casscf_cipsi/hessian.irp.f @@ -10,6 +10,7 @@ real*8 function hessmat_itju(i,t,j,u) implicit none integer :: i,t,j,u,ii,tt,uu,v,vv,x,xx,y,jj real*8 :: term,t2 + double precision :: bielec_pqxx_no,bielec_pxxq_no ii=list_core_inact(i) tt=list_act(t) @@ -95,6 +96,7 @@ real*8 function hessmat_itja(i,t,j,a) implicit none integer :: i,t,j,a,ii,tt,jj,aa,v,vv,x,y real*8 :: term + double precision :: bielec_pqxx_no,bielec_pxxq_no ! it/ja ii=list_core_inact(i) @@ -128,6 +130,7 @@ real*8 function hessmat_itua(i,t,u,a) implicit none integer :: i,t,u,a,ii,tt,uu,aa,v,vv,x,xx,u3,t3,v3 real*8 :: term + double precision :: bielec_pqxx_no,bielec_pxxq_no ii=list_core_inact(i) tt=list_act(t) @@ -169,6 +172,7 @@ real*8 function hessmat_iajb(i,a,j,b) implicit none integer :: i,a,j,b,ii,aa,jj,bb real*8 :: term + double precision :: bielec_pqxx_no,bielec_pxxq_no ii=list_core_inact(i) aa=list_virt(a) @@ -205,6 +209,7 @@ real*8 function hessmat_iatb(i,a,t,b) implicit none integer :: i,a,t,b,ii,aa,tt,bb,v,vv,x,y,v3,t3 real*8 :: term + double precision :: bielec_pqxx_no,bielec_pxxq_no ii=list_core_inact(i) aa=list_virt(a) @@ -237,6 +242,7 @@ real*8 function hessmat_taub(t,a,u,b) integer :: t,a,u,b,tt,aa,uu,bb,v,vv,x,xx,y integer :: v3,x3 real*8 :: term,t1,t2,t3 + double precision :: bielec_pqxx_no,bielec_pxxq_no tt=list_act(t) aa=list_virt(a) diff --git a/src/casscf_cipsi/mcscf_fock.irp.f b/src/casscf_cipsi/mcscf_fock.irp.f index 0f4b7a99..82b710a7 100644 --- a/src/casscf_cipsi/mcscf_fock.irp.f +++ b/src/casscf_cipsi/mcscf_fock.irp.f @@ -4,6 +4,7 @@ BEGIN_PROVIDER [real*8, Fipq, (mo_num,mo_num) ] END_DOC implicit none integer :: p,q,k,kk,t,tt,u,uu + double precision :: bielec_pxxq_no, bielec_pqxx_no do q=1,mo_num do p=1,mo_num @@ -44,6 +45,7 @@ BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ] END_DOC implicit none integer :: p,q,k,kk,t,tt,u,uu + double precision :: bielec_pxxq_no, bielec_pqxx_no Fapq = 0.d0 diff --git a/src/casscf_cipsi/test_chol.irp.f b/src/casscf_cipsi/test_chol.irp.f index 87c5c352..bcce7cf7 100644 --- a/src/casscf_cipsi/test_chol.irp.f +++ b/src/casscf_cipsi/test_chol.irp.f @@ -3,7 +3,9 @@ program test_chol read_wf= .True. touch read_wf ! call routine_bielec_PxxQ_no - call routine_bielecCI_no +! call routine_bielecCI_no +! call test_bielec_PxxQ_chol +! call test_bielecCI end @@ -12,7 +14,7 @@ subroutine routine_bielec_PQxx_no integer :: i_chol, i_act, ii_act, j_act, jj_act, i_core_inact, j_core_inact, ii_core_inact, jj_core_inact integer :: i_virt, ii_virt, j_virt, jj_virt, i_mo, j_mo double precision :: exact, new, error, accu, bielec_no_basis_chol - double precision :: bielec_PQxx_no_chol + double precision :: bielec_PQxx_no accu = 0.d0 do i_core_inact = 1, n_core_inact_act_orb @@ -21,9 +23,8 @@ subroutine routine_bielec_PQxx_no jj_core_inact = list_core_inact_act(j_core_inact) do i_mo = 1, mo_num do j_mo = 1, mo_num - exact = bielec_PQxx_no(j_mo,i_mo, j_core_inact, i_core_inact) -! new = bielec_no_basis_chol(j_mo,i_mo, jj_core_inact, ii_core_inact) - new = bielec_PQxx_no_chol(j_mo,i_mo, j_core_inact, i_core_inact) + exact = bielec_PQxx_no_array(j_mo,i_mo, j_core_inact, i_core_inact) + new = bielec_PQxx_no(j_mo,i_mo, j_core_inact, i_core_inact) error = dabs(exact-new) if(dabs(exact).gt.1.d-10)then print*,exact,new,error @@ -36,12 +37,12 @@ subroutine routine_bielec_PQxx_no print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2)) end -subroutine routine_bielec_PxxQ_no +subroutine routine_bielec_PxxQ_no_array implicit none integer :: i_chol, i_act, ii_act, j_act, jj_act, i_core_inact, j_core_inact, ii_core_inact, jj_core_inact integer :: i_virt, ii_virt, j_virt, jj_virt, i_mo, j_mo double precision :: exact, new, error, accu, bielec_no_basis_chol - double precision :: bielec_PxxQ_no_chol + double precision :: bielec_PxxQ_no accu = 0.d0 do i_mo = 1, mo_num @@ -50,9 +51,9 @@ subroutine routine_bielec_PxxQ_no do j_core_inact = 1, n_core_inact_act_orb jj_core_inact = list_core_inact_act(j_core_inact) do j_mo = 1, mo_num - exact = bielec_PxxQ_no(j_mo, j_core_inact, i_core_inact,i_mo) + exact = bielec_PxxQ_no_array(j_mo, j_core_inact, i_core_inact,i_mo) ! new = bielec_no_basis_chol(j_mo,i_mo, jj_core_inact, ii_core_inact) - new = bielec_PxxQ_no_chol(j_mo, j_core_inact, i_core_inact,i_mo) + new = bielec_PxxQ_no(j_mo, j_core_inact, i_core_inact,i_mo) error = dabs(exact-new) accu += error if(dabs(exact).gt.1.d-10)then @@ -65,19 +66,43 @@ subroutine routine_bielec_PxxQ_no print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2)) end -subroutine routine_bielecCI_no +subroutine test_bielec_PQxx(i_mo, j_mo, i_ca, j_ca) implicit none - integer :: i_ca, j_ca, k_ca, i_mo - double precision :: exact, new, error, accu, bielec_no_basis_chol - double precision :: bielecCI_no_chol + integer :: i_mo, j_mo, i_ca, j_ca + double precision :: exact, new, error, accu + double precision :: bielec_PQxx accu = 0.d0 - do i_mo = 1, mo_num - do i_ca = 1, n_act_orb - do j_ca = 1, n_act_orb - do k_ca = 1, n_act_orb - exact =bielecCI_no(k_ca, j_ca, i_ca, i_mo) - new = bielecCI_no_chol(k_ca, j_ca, i_ca, i_mo) + do j_ca = 1, n_core_inact_act_orb + do i_ca = 1, n_core_inact_act_orb + do j_mo = 1, mo_num + do i_mo = 1, mo_num + exact = bielec_PQxx_array(i_mo, j_mo, i_ca, j_ca) + new = bielec_PQxx(i_mo, j_mo, i_ca, j_ca) + error = dabs(exact-new) + accu += error + if(dabs(exact).gt.1.d-10)then + print*,exact,new,error + endif + enddo + enddo + enddo + enddo + print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2)) +end + +subroutine test_bielec_PxxQ_chol(i_mo, i_ca, j_ca, j_mo) + implicit none + integer :: i_mo, i_ca, j_ca, j_mo + double precision :: exact, new, error, accu + double precision :: bielec_PxxQ + accu = 0.d0 + do j_mo = 1, mo_num + do j_ca = 1, n_core_inact_act_orb + do i_ca =1, n_core_inact_act_orb + do i_mo = 1, mo_num + exact = bielec_PxxQ_array(i_mo, i_ca, j_ca, j_mo) + new = bielec_PxxQ(i_mo, i_ca, j_ca, j_mo) error = dabs(exact-new) accu += error if(dabs(exact).gt.1.d-10)then diff --git a/src/casscf_cipsi/tot_en.irp.f b/src/casscf_cipsi/tot_en.irp.f index 1d70e087..37ceac05 100644 --- a/src/casscf_cipsi/tot_en.irp.f +++ b/src/casscf_cipsi/tot_en.irp.f @@ -8,6 +8,7 @@ implicit none integer :: t,u,v,x,i,ii,tt,uu,vv,xx,j,jj,t3,u3,v3,x3 real*8 :: e_one_all,e_two_all + double precision :: bielec_PQxx,bielec_PxxQ e_one_all=0.D0 e_two_all=0.D0 do i=1,n_core_inact_orb From 4a9a11c630c94528fb429255d5d9767d9b2edefc Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 Jul 2024 17:32:41 +0200 Subject: [PATCH 124/131] GPU acceleration compute_tc_int --- plugins/local/tc_int/NEED | 1 + plugins/local/tc_int/compute_tc_int.irp.f | 117 +++++++++++++--------- 2 files changed, 69 insertions(+), 49 deletions(-) diff --git a/plugins/local/tc_int/NEED b/plugins/local/tc_int/NEED index 8a4caf5b..34d2e183 100644 --- a/plugins/local/tc_int/NEED +++ b/plugins/local/tc_int/NEED @@ -1,3 +1,4 @@ +gpu tc_keywords jastrow qmckl diff --git a/plugins/local/tc_int/compute_tc_int.irp.f b/plugins/local/tc_int/compute_tc_int.irp.f index 02f21570..92c90d03 100644 --- a/plugins/local/tc_int/compute_tc_int.irp.f +++ b/plugins/local/tc_int/compute_tc_int.irp.f @@ -2,23 +2,23 @@ ! --- subroutine provide_int2_grad1_u12_ao() - + use gpu BEGIN_DOC ! - ! int2_grad1_u12_ao(i,j,ipoint,1) = \int dr2 [\grad1 u(r1,r2)]_x1 \chi_i(r2) \chi_j(r2) - ! int2_grad1_u12_ao(i,j,ipoint,2) = \int dr2 [\grad1 u(r1,r2)]_y1 \chi_i(r2) \chi_j(r2) - ! int2_grad1_u12_ao(i,j,ipoint,3) = \int dr2 [\grad1 u(r1,r2)]_z1 \chi_i(r2) \chi_j(r2) - ! int2_grad1_u12_ao(i,j,ipoint,4) = \int dr2 [-(1/2) [\grad1 u(r1,r2)]^2] \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,1) = \int dr2 [\grad1 u(r1,r2)]_x1 \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,2) = \int dr2 [\grad1 u(r1,r2)]_y1 \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,3) = \int dr2 [\grad1 u(r1,r2)]_z1 \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,4) = \int dr2 [-(1/2) [\grad1 u(r1,r2)]^2] \chi_i(r2) \chi_j(r2) ! ! - ! tc_int_2e_ao(k,i,l,j) = (ki|V^TC(r_12)|lj) - ! = where V^TC(r_12) is the total TC operator + ! tc_int_2e_ao(k,i,l,j) = (ki|V^TC(r_12)|lj) + ! = where V^TC(r_12) is the total TC operator ! = tc_grad_and_lapl_ao(k,i,l,j) + tc_grad_square_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) ! where: ! ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij > - ! = -1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) - ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 (-1) \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! = -1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 (-1) \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) ! ! tc_grad_square_ao(k,i,l,j) = -1/2 ! @@ -35,8 +35,9 @@ subroutine provide_int2_grad1_u12_ao() double precision :: weight1, ao_k_r, ao_i_r double precision :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq double precision :: time0, time1, time2, tc1, tc2, tc - double precision, allocatable :: int2_grad1_u12_ao(:,:,:,:), tc_int_2e_ao(:,:,:,:) - double precision, allocatable :: tmp(:,:,:), c_mat(:,:,:), tmp_grad1_u12(:,:,:) + type(gpu_double4) :: int2_grad1_u12_ao + type(gpu_double3) :: tmp_grad1_u12, tmp_grad1_u12p, tmp + double precision, allocatable :: c_mat(:,:,:), tc_int_2e_ao(:,:,:,:) double precision, external :: get_ao_two_e_integral @@ -51,6 +52,7 @@ subroutine provide_int2_grad1_u12_ao() call total_memory(mem) mem = max(1.d0, qp_max_mem - mem) + mem = 6 n_double = mem * 1.d8 n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid)) n_rest = int(mod(n_points_final_grid, n_blocks)) @@ -64,9 +66,9 @@ subroutine provide_int2_grad1_u12_ao() ! --- ! --- - allocate(int2_grad1_u12_ao(ao_num,ao_num,n_points_final_grid,4)) + call gpu_allocate(int2_grad1_u12_ao, ao_num,ao_num,n_points_final_grid,4) - allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + call gpu_allocate(tmp,n_points_extra_final_grid,ao_num,ao_num) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (j, i, jpoint) & @@ -75,49 +77,55 @@ subroutine provide_int2_grad1_u12_ao() do j = 1, ao_num do i = 1, ao_num do jpoint = 1, n_points_extra_final_grid - tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + tmp%f(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) enddo enddo enddo !$OMP END DO !$OMP END PARALLEL - allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,4)) + call gpu_allocate(tmp_grad1_u12,n_points_extra_final_grid,n_blocks,4) + call gpu_allocate(tmp_grad1_u12p,n_points_extra_final_grid,n_blocks,4) tc = 0.d0 - + + type(gpu_stream) :: stream(4) + do i=1,4 + call gpu_stream_create(stream(i)) + enddo + do i_pass = 1, n_pass ii = (i_pass-1)*n_blocks + 1 - + call wall_time(tc1) + !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i_blocks, ipoint) & !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, final_grid_points, tmp_grad1_u12) - !$OMP DO + !$OMP DO do i_blocks = 1, n_blocks ipoint = ii - 1 + i_blocks ! r1 - call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1), tmp_grad1_u12(1,i_blocks,2), tmp_grad1_u12(1,i_blocks,3), tmp_grad1_u12(1,i_blocks,4)) + call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12%f(1,i_blocks,1), tmp_grad1_u12%f(1,i_blocks,2), & + tmp_grad1_u12%f(1,i_blocks,3), tmp_grad1_u12%f(1,i_blocks,4)) enddo !$OMP END DO !$OMP END PARALLEL call wall_time(tc2) - tc = tc + tc2 - tc1 + tc = tc + tc2 - tc1 + call gpu_synchronize() + call gpu_copy(tmp_grad1_u12,tmp_grad1_u12p) do m = 1, 4 - call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & - , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + call gpu_set_stream(blas_handle, stream(m)) + call gpu_dgemm(blas_handle, "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & + , tmp%f(1,1,1), n_points_extra_final_grid, tmp_grad1_u12p%f(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao%f(1,1,ii,m), ao_num*ao_num) enddo enddo - - deallocate(tmp_grad1_u12) - if(n_rest .gt. 0) then - - allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,4)) - + ii = n_pass*n_blocks + 1 call wall_time(tc1) @@ -125,26 +133,35 @@ subroutine provide_int2_grad1_u12_ao() !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i_rest, ipoint) & !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, final_grid_points, tmp_grad1_u12) - !$OMP DO + !$OMP DO do i_rest = 1, n_rest ipoint = ii - 1 + i_rest ! r1 - call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1), tmp_grad1_u12(1,i_rest,2), tmp_grad1_u12(1,i_rest,3), tmp_grad1_u12(1,i_rest,4)) + call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12%f(1,i_rest,1), tmp_grad1_u12%f(1,i_rest,2), & + tmp_grad1_u12%f(1,i_rest,3), tmp_grad1_u12%f(1,i_rest,4)) enddo !$OMP END DO !$OMP END PARALLEL call wall_time(tc2) - tc = tc + tc2 - tc1 - + tc = tc + tc2 - tc1 + do m = 1, 4 - call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & - , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + call gpu_set_stream(blas_handle, stream(m)) + call gpu_dgemm(blas_handle, "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & + , tmp%f(1,1,1), n_points_extra_final_grid, tmp_grad1_u12%f(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao%f(1,1,ii,m), ao_num*ao_num) enddo - deallocate(tmp_grad1_u12) endif + call gpu_synchronize() + call gpu_deallocate(tmp_grad1_u12) + call gpu_deallocate(tmp_grad1_u12p) - deallocate(tmp) + do i=1,4 + call gpu_stream_destroy(stream(i)) + enddo + + + call gpu_deallocate(tmp) call wall_time(time1) @@ -152,6 +169,8 @@ subroutine provide_int2_grad1_u12_ao() print*, ' wall time Jastrow derivatives (min) = ', tc / 60.d0 call print_memory_usage() +!TODO +stop ! --- ! --- ! --- @@ -177,7 +196,7 @@ subroutine provide_int2_grad1_u12_ao() !$OMP END DO !$OMP END PARALLEL call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , int2_grad1_u12_ao(1,1,1,4), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , int2_grad1_u12_ao%f(1,1,1,4), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & , 0.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num) deallocate(c_mat) @@ -188,23 +207,23 @@ subroutine provide_int2_grad1_u12_ao() ! --- call wall_time(time1) - + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) do m = 1, 3 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & - !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, c_mat, & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, c_mat, & !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector, m) !$OMP DO SCHEDULE (static) do i = 1, ao_num do k = 1, ao_num do ipoint = 1, n_points_final_grid - + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) ao_i_r = aos_in_r_array_transp(ipoint,i) ao_k_r = aos_in_r_array_transp(ipoint,k) - + c_mat(ipoint,k,i) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,m) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,m)) enddo enddo @@ -213,7 +232,7 @@ subroutine provide_int2_grad1_u12_ao() !$OMP END PARALLEL call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 & - , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , int2_grad1_u12_ao%f(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & , 1.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num) enddo deallocate(c_mat) @@ -234,7 +253,7 @@ subroutine provide_int2_grad1_u12_ao() ! --- - call wall_time(time1) + call wall_time(time1) PROVIDE ao_integrals_map !$OMP PARALLEL DEFAULT(NONE) & @@ -245,7 +264,7 @@ subroutine provide_int2_grad1_u12_ao() do l = 1, ao_num do i = 1, ao_num do k = 1, ao_num - ! < 1:i, 2:j | 1:k, 2:l > + ! < 1:i, 2:j | 1:k, 2:l > tc_int_2e_ao(k,i,l,j) = tc_int_2e_ao(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) enddo enddo @@ -263,7 +282,7 @@ subroutine provide_int2_grad1_u12_ao() print*, ' Writing int2_grad1_u12_ao in ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao' open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") call ezfio_set_work_empty(.False.) - write(11) int2_grad1_u12_ao(:,:,:,1:3) + write(11) int2_grad1_u12_ao%f(:,:,:,1:3) close(11) print*, ' Saving tc_int_2e_ao in ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot' @@ -276,7 +295,7 @@ subroutine provide_int2_grad1_u12_ao() ! ---- - deallocate(int2_grad1_u12_ao) + call gpu_deallocate(int2_grad1_u12_ao) deallocate(tc_int_2e_ao) call wall_time(time2) From 228796cff525bc6420b2b961c803fda2aa0094bc Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 12 Jul 2024 17:41:15 +0200 Subject: [PATCH 125/131] split the mo_optimization into mo_optimizatio_utils and mo_optimization --- src/casscf_cipsi/NEED | 1 + src/mo_optimization/NEED | 6 +- src/mo_optimization/cipsi_orb_opt.irp.f | 82 +------------------ .../EZFIO.cfg | 0 src/mo_optimization_utils/NEED | 5 ++ src/mo_optimization_utils/README.md | 74 +++++++++++++++++ .../class.irp.f | 0 .../constants.h | 0 .../diagonal_hessian_list_opt.irp.f | 0 .../diagonal_hessian_opt.irp.f | 0 .../diagonalization_hessian.irp.f | 0 .../first_diagonal_hessian_list_opt.irp.f | 0 .../first_diagonal_hessian_opt.irp.f | 0 .../first_gradient_list_opt.irp.f | 0 .../first_gradient_opt.irp.f | 0 .../first_hessian_list_opt.irp.f | 0 .../first_hessian_opt.irp.f | 0 .../gradient_list_opt.irp.f | 0 .../gradient_opt.irp.f | 0 .../hessian_list_opt.irp.f | 0 .../hessian_opt.irp.f | 0 .../org/TODO.org | 0 .../org/debug_gradient_list_opt.org | 0 .../org/debug_gradient_opt.org | 0 .../org/debug_hessian_list_opt.org | 0 .../org/debug_hessian_opt.org | 0 .../org/diagonal_hessian_list_opt.org | 0 .../org/diagonal_hessian_opt.org | 0 .../org/diagonalization_hessian.org | 0 .../org/first_diagonal_hessian_list_opt.org | 0 .../org/first_diagonal_hessian_opt.org | 0 .../org/first_gradient_list_opt.org | 0 .../org/first_gradient_opt.org | 0 .../org/first_hessian_list_opt.org | 0 .../org/first_hessian_opt.org | 0 .../org/gradient_list_opt.org | 0 .../org/gradient_opt.org | 0 .../org/hessian_list_opt.org | 0 .../org/hessian_opt.org | 0 .../org/my_providers.org | 0 .../org/optimization.org | 0 .../org/orb_opt_trust_v2.org | 0 .../org/state_average_energy.org | 0 .../org/state_weight_normalization.org | 0 .../org/update_parameters.org | 0 .../org/update_st_av_ci_energy.org | 0 .../routine_opt_mos.irp.f | 81 ++++++++++++++++++ .../run_orb_opt_trust_v2.irp.f | 0 .../save_energy.irp.f | 0 .../state_average_energy.irp.f | 0 .../state_weight_normalization.irp.f | 0 .../update_parameters.irp.f | 0 .../update_st_av_ci_energy.irp.f | 0 53 files changed, 163 insertions(+), 86 deletions(-) rename src/{mo_optimization => mo_optimization_utils}/EZFIO.cfg (100%) create mode 100644 src/mo_optimization_utils/NEED create mode 100644 src/mo_optimization_utils/README.md rename src/{mo_optimization => mo_optimization_utils}/class.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/constants.h (100%) rename src/{mo_optimization => mo_optimization_utils}/diagonal_hessian_list_opt.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/diagonal_hessian_opt.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/diagonalization_hessian.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/first_diagonal_hessian_list_opt.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/first_diagonal_hessian_opt.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/first_gradient_list_opt.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/first_gradient_opt.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/first_hessian_list_opt.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/first_hessian_opt.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/gradient_list_opt.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/gradient_opt.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/hessian_list_opt.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/hessian_opt.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/org/TODO.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/debug_gradient_list_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/debug_gradient_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/debug_hessian_list_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/debug_hessian_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/diagonal_hessian_list_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/diagonal_hessian_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/diagonalization_hessian.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/first_diagonal_hessian_list_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/first_diagonal_hessian_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/first_gradient_list_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/first_gradient_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/first_hessian_list_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/first_hessian_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/gradient_list_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/gradient_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/hessian_list_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/hessian_opt.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/my_providers.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/optimization.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/orb_opt_trust_v2.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/state_average_energy.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/state_weight_normalization.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/update_parameters.org (100%) rename src/{mo_optimization => mo_optimization_utils}/org/update_st_av_ci_energy.org (100%) create mode 100644 src/mo_optimization_utils/routine_opt_mos.irp.f rename src/{mo_optimization => mo_optimization_utils}/run_orb_opt_trust_v2.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/save_energy.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/state_average_energy.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/state_weight_normalization.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/update_parameters.irp.f (100%) rename src/{mo_optimization => mo_optimization_utils}/update_st_av_ci_energy.irp.f (100%) diff --git a/src/casscf_cipsi/NEED b/src/casscf_cipsi/NEED index dd91c7bd..11d1a78c 100644 --- a/src/casscf_cipsi/NEED +++ b/src/casscf_cipsi/NEED @@ -3,3 +3,4 @@ selectors_full generators_cas two_body_rdm dav_general_mat +mo_optimization diff --git a/src/mo_optimization/NEED b/src/mo_optimization/NEED index 91f41ee3..33f770c3 100644 --- a/src/mo_optimization/NEED +++ b/src/mo_optimization/NEED @@ -1,7 +1,3 @@ -two_body_rdm -hartree_fock -cipsi -davidson_undressed +mo_optimization_utils selectors_full generators_full -utils_trust_region diff --git a/src/mo_optimization/cipsi_orb_opt.irp.f b/src/mo_optimization/cipsi_orb_opt.irp.f index 7e3a79eb..19b3e9db 100644 --- a/src/mo_optimization/cipsi_orb_opt.irp.f +++ b/src/mo_optimization/cipsi_orb_opt.irp.f @@ -2,87 +2,7 @@ program optimization read_wf = .true. ! must be True for the orbital optimization !!! TOUCH read_wf - call run_optimization + call run_optimization_mos_CIPSI end -subroutine run_optimization - - implicit none - - double precision :: e_cipsi, e_opt, delta_e - double precision, allocatable :: Ev(:),PT2(:) - integer :: nb_iter,i - logical :: not_converged - character (len=100) :: filename - - PROVIDE psi_det psi_coef mo_two_e_integrals_in_map ao_pseudo_integrals - allocate(Ev(N_states),PT2(N_states)) - - not_converged = .True. - nb_iter = 0 - - ! To start from the wf - N_det_max = max(n_det,5) - TOUCH N_det_max - - open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt') - write(10,*) " Ndet E_cipsi E_opt Delta_e" - call state_average_energy(e_cipsi) - write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0 - close(10) - - do while (not_converged) - print*,'' - print*,'======================' - print*,' Cipsi step:', nb_iter - print*,'======================' - print*,'' - print*,'********** cipsi step **********' - ! cispi calculation - call run_stochastic_cipsi(Ev,PT2) - - ! State average energy after the cipsi step - call state_average_energy(e_cipsi) - - print*,'' - print*,'********** optimization step **********' - ! orbital optimization - call run_orb_opt_trust_v2 - - ! State average energy after the orbital optimization - call state_average_energy(e_opt) - - print*,'' - print*,'********** diff step **********' - ! Gain in energy - delta_e = e_opt - e_cipsi - print*, 'Gain in energy during the orbital optimization:', delta_e - - open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append') - write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e - close(10) - - ! Exit - if (delta_e > 1d-12) then - print*, 'WARNING, something wrong happened' - print*, 'The gain (delta_e) in energy during the optimization process' - print*, 'is > 0, but it must be < 0' - print*, 'The program will exit' - exit - endif - - if (n_det > n_det_max_opt) then - print*, 'The number of determinants in the wf > n_det_max_opt' - print*, 'The program will exit' - exit - endif - - ! To double the number of determinants in the wf - N_det_max = int(dble(n_det * 2)*0.9) - TOUCH N_det_max - - nb_iter = nb_iter + 1 - enddo - -end diff --git a/src/mo_optimization/EZFIO.cfg b/src/mo_optimization_utils/EZFIO.cfg similarity index 100% rename from src/mo_optimization/EZFIO.cfg rename to src/mo_optimization_utils/EZFIO.cfg diff --git a/src/mo_optimization_utils/NEED b/src/mo_optimization_utils/NEED new file mode 100644 index 00000000..1a78a17f --- /dev/null +++ b/src/mo_optimization_utils/NEED @@ -0,0 +1,5 @@ +two_body_rdm +hartree_fock +cipsi +davidson_undressed +utils_trust_region diff --git a/src/mo_optimization_utils/README.md b/src/mo_optimization_utils/README.md new file mode 100644 index 00000000..94f29aee --- /dev/null +++ b/src/mo_optimization_utils/README.md @@ -0,0 +1,74 @@ +# Orbital optimization + +## Methods +Different methods are available: +- full hessian +``` +qp set orbital_optimization optimization_method full +``` +- diagonal hessian +``` +qp set orbital_optimization optimization_method diag +``` +- identity matrix +``` +qp set orbital_optimization optimization_method none +``` + +After the optimization the ezfio contains the optimized orbitals + +## For a fixed number of determinants +To optimize the MOs for the actual determinants: +``` +qp run orb_opt +``` + +## For a complete optimization, i.e, with a larger and larger wave function +To optimize the MOs with a larger and larger wave function: +``` +qp run optimization +``` + +The results are stored in the EZFIO in "mo_optimization/result_opt", +with the following format: +(1) (2) (3) (4) +1: Number of determinants in the wf, +2: Cispi energy before the optimization, +3: Cipsi energy after the optimization, +4: Energy difference between (2) and (3). + +The optimization process if the following: +- we do a first cipsi step to obtain a small number of determinants in the wf +- we run an orbital optimization for this wf +- we do a new cipsi step to double the number of determinants in the wf +- we run an orbital optimization for this wf +- ... +- we do that until the energy difference between (2) and (3) is + smaller than the targeted accuracy for the cispi (targeted_accuracy_cipsi in qp edit) + or the wf is larger than a given size (n_det_max_opt in qp_edit) +- after that you can reset your determinants (qp reset -d) and run a clean Cispi calculation + +### End of the optimization +You can choos the number of determinants after what the +optimization will stop: +``` +qp set orbital_optimization n_det_max_opt 1e5 # or any number +``` +## Weight of the states +You can change the weights of the differents states directly in qp edit. +It will affect ths weights used in the orbital optimization. + +# Tests +To run the tests: +``` +qp test +``` + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` + diff --git a/src/mo_optimization/class.irp.f b/src/mo_optimization_utils/class.irp.f similarity index 100% rename from src/mo_optimization/class.irp.f rename to src/mo_optimization_utils/class.irp.f diff --git a/src/mo_optimization/constants.h b/src/mo_optimization_utils/constants.h similarity index 100% rename from src/mo_optimization/constants.h rename to src/mo_optimization_utils/constants.h diff --git a/src/mo_optimization/diagonal_hessian_list_opt.irp.f b/src/mo_optimization_utils/diagonal_hessian_list_opt.irp.f similarity index 100% rename from src/mo_optimization/diagonal_hessian_list_opt.irp.f rename to src/mo_optimization_utils/diagonal_hessian_list_opt.irp.f diff --git a/src/mo_optimization/diagonal_hessian_opt.irp.f b/src/mo_optimization_utils/diagonal_hessian_opt.irp.f similarity index 100% rename from src/mo_optimization/diagonal_hessian_opt.irp.f rename to src/mo_optimization_utils/diagonal_hessian_opt.irp.f diff --git a/src/mo_optimization/diagonalization_hessian.irp.f b/src/mo_optimization_utils/diagonalization_hessian.irp.f similarity index 100% rename from src/mo_optimization/diagonalization_hessian.irp.f rename to src/mo_optimization_utils/diagonalization_hessian.irp.f diff --git a/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f b/src/mo_optimization_utils/first_diagonal_hessian_list_opt.irp.f similarity index 100% rename from src/mo_optimization/first_diagonal_hessian_list_opt.irp.f rename to src/mo_optimization_utils/first_diagonal_hessian_list_opt.irp.f diff --git a/src/mo_optimization/first_diagonal_hessian_opt.irp.f b/src/mo_optimization_utils/first_diagonal_hessian_opt.irp.f similarity index 100% rename from src/mo_optimization/first_diagonal_hessian_opt.irp.f rename to src/mo_optimization_utils/first_diagonal_hessian_opt.irp.f diff --git a/src/mo_optimization/first_gradient_list_opt.irp.f b/src/mo_optimization_utils/first_gradient_list_opt.irp.f similarity index 100% rename from src/mo_optimization/first_gradient_list_opt.irp.f rename to src/mo_optimization_utils/first_gradient_list_opt.irp.f diff --git a/src/mo_optimization/first_gradient_opt.irp.f b/src/mo_optimization_utils/first_gradient_opt.irp.f similarity index 100% rename from src/mo_optimization/first_gradient_opt.irp.f rename to src/mo_optimization_utils/first_gradient_opt.irp.f diff --git a/src/mo_optimization/first_hessian_list_opt.irp.f b/src/mo_optimization_utils/first_hessian_list_opt.irp.f similarity index 100% rename from src/mo_optimization/first_hessian_list_opt.irp.f rename to src/mo_optimization_utils/first_hessian_list_opt.irp.f diff --git a/src/mo_optimization/first_hessian_opt.irp.f b/src/mo_optimization_utils/first_hessian_opt.irp.f similarity index 100% rename from src/mo_optimization/first_hessian_opt.irp.f rename to src/mo_optimization_utils/first_hessian_opt.irp.f diff --git a/src/mo_optimization/gradient_list_opt.irp.f b/src/mo_optimization_utils/gradient_list_opt.irp.f similarity index 100% rename from src/mo_optimization/gradient_list_opt.irp.f rename to src/mo_optimization_utils/gradient_list_opt.irp.f diff --git a/src/mo_optimization/gradient_opt.irp.f b/src/mo_optimization_utils/gradient_opt.irp.f similarity index 100% rename from src/mo_optimization/gradient_opt.irp.f rename to src/mo_optimization_utils/gradient_opt.irp.f diff --git a/src/mo_optimization/hessian_list_opt.irp.f b/src/mo_optimization_utils/hessian_list_opt.irp.f similarity index 100% rename from src/mo_optimization/hessian_list_opt.irp.f rename to src/mo_optimization_utils/hessian_list_opt.irp.f diff --git a/src/mo_optimization/hessian_opt.irp.f b/src/mo_optimization_utils/hessian_opt.irp.f similarity index 100% rename from src/mo_optimization/hessian_opt.irp.f rename to src/mo_optimization_utils/hessian_opt.irp.f diff --git a/src/mo_optimization/org/TODO.org b/src/mo_optimization_utils/org/TODO.org similarity index 100% rename from src/mo_optimization/org/TODO.org rename to src/mo_optimization_utils/org/TODO.org diff --git a/src/mo_optimization/org/debug_gradient_list_opt.org b/src/mo_optimization_utils/org/debug_gradient_list_opt.org similarity index 100% rename from src/mo_optimization/org/debug_gradient_list_opt.org rename to src/mo_optimization_utils/org/debug_gradient_list_opt.org diff --git a/src/mo_optimization/org/debug_gradient_opt.org b/src/mo_optimization_utils/org/debug_gradient_opt.org similarity index 100% rename from src/mo_optimization/org/debug_gradient_opt.org rename to src/mo_optimization_utils/org/debug_gradient_opt.org diff --git a/src/mo_optimization/org/debug_hessian_list_opt.org b/src/mo_optimization_utils/org/debug_hessian_list_opt.org similarity index 100% rename from src/mo_optimization/org/debug_hessian_list_opt.org rename to src/mo_optimization_utils/org/debug_hessian_list_opt.org diff --git a/src/mo_optimization/org/debug_hessian_opt.org b/src/mo_optimization_utils/org/debug_hessian_opt.org similarity index 100% rename from src/mo_optimization/org/debug_hessian_opt.org rename to src/mo_optimization_utils/org/debug_hessian_opt.org diff --git a/src/mo_optimization/org/diagonal_hessian_list_opt.org b/src/mo_optimization_utils/org/diagonal_hessian_list_opt.org similarity index 100% rename from src/mo_optimization/org/diagonal_hessian_list_opt.org rename to src/mo_optimization_utils/org/diagonal_hessian_list_opt.org diff --git a/src/mo_optimization/org/diagonal_hessian_opt.org b/src/mo_optimization_utils/org/diagonal_hessian_opt.org similarity index 100% rename from src/mo_optimization/org/diagonal_hessian_opt.org rename to src/mo_optimization_utils/org/diagonal_hessian_opt.org diff --git a/src/mo_optimization/org/diagonalization_hessian.org b/src/mo_optimization_utils/org/diagonalization_hessian.org similarity index 100% rename from src/mo_optimization/org/diagonalization_hessian.org rename to src/mo_optimization_utils/org/diagonalization_hessian.org diff --git a/src/mo_optimization/org/first_diagonal_hessian_list_opt.org b/src/mo_optimization_utils/org/first_diagonal_hessian_list_opt.org similarity index 100% rename from src/mo_optimization/org/first_diagonal_hessian_list_opt.org rename to src/mo_optimization_utils/org/first_diagonal_hessian_list_opt.org diff --git a/src/mo_optimization/org/first_diagonal_hessian_opt.org b/src/mo_optimization_utils/org/first_diagonal_hessian_opt.org similarity index 100% rename from src/mo_optimization/org/first_diagonal_hessian_opt.org rename to src/mo_optimization_utils/org/first_diagonal_hessian_opt.org diff --git a/src/mo_optimization/org/first_gradient_list_opt.org b/src/mo_optimization_utils/org/first_gradient_list_opt.org similarity index 100% rename from src/mo_optimization/org/first_gradient_list_opt.org rename to src/mo_optimization_utils/org/first_gradient_list_opt.org diff --git a/src/mo_optimization/org/first_gradient_opt.org b/src/mo_optimization_utils/org/first_gradient_opt.org similarity index 100% rename from src/mo_optimization/org/first_gradient_opt.org rename to src/mo_optimization_utils/org/first_gradient_opt.org diff --git a/src/mo_optimization/org/first_hessian_list_opt.org b/src/mo_optimization_utils/org/first_hessian_list_opt.org similarity index 100% rename from src/mo_optimization/org/first_hessian_list_opt.org rename to src/mo_optimization_utils/org/first_hessian_list_opt.org diff --git a/src/mo_optimization/org/first_hessian_opt.org b/src/mo_optimization_utils/org/first_hessian_opt.org similarity index 100% rename from src/mo_optimization/org/first_hessian_opt.org rename to src/mo_optimization_utils/org/first_hessian_opt.org diff --git a/src/mo_optimization/org/gradient_list_opt.org b/src/mo_optimization_utils/org/gradient_list_opt.org similarity index 100% rename from src/mo_optimization/org/gradient_list_opt.org rename to src/mo_optimization_utils/org/gradient_list_opt.org diff --git a/src/mo_optimization/org/gradient_opt.org b/src/mo_optimization_utils/org/gradient_opt.org similarity index 100% rename from src/mo_optimization/org/gradient_opt.org rename to src/mo_optimization_utils/org/gradient_opt.org diff --git a/src/mo_optimization/org/hessian_list_opt.org b/src/mo_optimization_utils/org/hessian_list_opt.org similarity index 100% rename from src/mo_optimization/org/hessian_list_opt.org rename to src/mo_optimization_utils/org/hessian_list_opt.org diff --git a/src/mo_optimization/org/hessian_opt.org b/src/mo_optimization_utils/org/hessian_opt.org similarity index 100% rename from src/mo_optimization/org/hessian_opt.org rename to src/mo_optimization_utils/org/hessian_opt.org diff --git a/src/mo_optimization/org/my_providers.org b/src/mo_optimization_utils/org/my_providers.org similarity index 100% rename from src/mo_optimization/org/my_providers.org rename to src/mo_optimization_utils/org/my_providers.org diff --git a/src/mo_optimization/org/optimization.org b/src/mo_optimization_utils/org/optimization.org similarity index 100% rename from src/mo_optimization/org/optimization.org rename to src/mo_optimization_utils/org/optimization.org diff --git a/src/mo_optimization/org/orb_opt_trust_v2.org b/src/mo_optimization_utils/org/orb_opt_trust_v2.org similarity index 100% rename from src/mo_optimization/org/orb_opt_trust_v2.org rename to src/mo_optimization_utils/org/orb_opt_trust_v2.org diff --git a/src/mo_optimization/org/state_average_energy.org b/src/mo_optimization_utils/org/state_average_energy.org similarity index 100% rename from src/mo_optimization/org/state_average_energy.org rename to src/mo_optimization_utils/org/state_average_energy.org diff --git a/src/mo_optimization/org/state_weight_normalization.org b/src/mo_optimization_utils/org/state_weight_normalization.org similarity index 100% rename from src/mo_optimization/org/state_weight_normalization.org rename to src/mo_optimization_utils/org/state_weight_normalization.org diff --git a/src/mo_optimization/org/update_parameters.org b/src/mo_optimization_utils/org/update_parameters.org similarity index 100% rename from src/mo_optimization/org/update_parameters.org rename to src/mo_optimization_utils/org/update_parameters.org diff --git a/src/mo_optimization/org/update_st_av_ci_energy.org b/src/mo_optimization_utils/org/update_st_av_ci_energy.org similarity index 100% rename from src/mo_optimization/org/update_st_av_ci_energy.org rename to src/mo_optimization_utils/org/update_st_av_ci_energy.org diff --git a/src/mo_optimization_utils/routine_opt_mos.irp.f b/src/mo_optimization_utils/routine_opt_mos.irp.f new file mode 100644 index 00000000..fceba2c5 --- /dev/null +++ b/src/mo_optimization_utils/routine_opt_mos.irp.f @@ -0,0 +1,81 @@ + +subroutine run_optimization_mos_CIPSI + + implicit none + + double precision :: e_cipsi, e_opt, delta_e + double precision, allocatable :: Ev(:),PT2(:) + integer :: nb_iter,i + logical :: not_converged + character (len=100) :: filename + + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map ao_pseudo_integrals + allocate(Ev(N_states),PT2(N_states)) + + not_converged = .True. + nb_iter = 0 + + ! To start from the wf + N_det_max = max(n_det,5) + TOUCH N_det_max + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt') + write(10,*) " Ndet E_cipsi E_opt Delta_e" + call state_average_energy(e_cipsi) + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0 + close(10) + + do while (not_converged) + print*,'' + print*,'======================' + print*,' Cipsi step:', nb_iter + print*,'======================' + print*,'' + print*,'********** cipsi step **********' + ! cispi calculation + call run_stochastic_cipsi(Ev,PT2) + + ! State average energy after the cipsi step + call state_average_energy(e_cipsi) + + print*,'' + print*,'********** optimization step **********' + ! orbital optimization + call run_orb_opt_trust_v2 + + ! State average energy after the orbital optimization + call state_average_energy(e_opt) + + print*,'' + print*,'********** diff step **********' + ! Gain in energy + delta_e = e_opt - e_cipsi + print*, 'Gain in energy during the orbital optimization:', delta_e + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append') + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e + close(10) + + ! Exit + if (delta_e > 1d-12) then + print*, 'WARNING, something wrong happened' + print*, 'The gain (delta_e) in energy during the optimization process' + print*, 'is > 0, but it must be < 0' + print*, 'The program will exit' + exit + endif + + if (n_det > n_det_max_opt) then + print*, 'The number of determinants in the wf > n_det_max_opt' + print*, 'The program will exit' + exit + endif + + ! To double the number of determinants in the wf + N_det_max = int(dble(n_det * 2)*0.9) + TOUCH N_det_max + + nb_iter = nb_iter + 1 + enddo + +end diff --git a/src/mo_optimization/run_orb_opt_trust_v2.irp.f b/src/mo_optimization_utils/run_orb_opt_trust_v2.irp.f similarity index 100% rename from src/mo_optimization/run_orb_opt_trust_v2.irp.f rename to src/mo_optimization_utils/run_orb_opt_trust_v2.irp.f diff --git a/src/mo_optimization/save_energy.irp.f b/src/mo_optimization_utils/save_energy.irp.f similarity index 100% rename from src/mo_optimization/save_energy.irp.f rename to src/mo_optimization_utils/save_energy.irp.f diff --git a/src/mo_optimization/state_average_energy.irp.f b/src/mo_optimization_utils/state_average_energy.irp.f similarity index 100% rename from src/mo_optimization/state_average_energy.irp.f rename to src/mo_optimization_utils/state_average_energy.irp.f diff --git a/src/mo_optimization/state_weight_normalization.irp.f b/src/mo_optimization_utils/state_weight_normalization.irp.f similarity index 100% rename from src/mo_optimization/state_weight_normalization.irp.f rename to src/mo_optimization_utils/state_weight_normalization.irp.f diff --git a/src/mo_optimization/update_parameters.irp.f b/src/mo_optimization_utils/update_parameters.irp.f similarity index 100% rename from src/mo_optimization/update_parameters.irp.f rename to src/mo_optimization_utils/update_parameters.irp.f diff --git a/src/mo_optimization/update_st_av_ci_energy.irp.f b/src/mo_optimization_utils/update_st_av_ci_energy.irp.f similarity index 100% rename from src/mo_optimization/update_st_av_ci_energy.irp.f rename to src/mo_optimization_utils/update_st_av_ci_energy.irp.f From 6985d4d5493a6204f95a8d1d1cccbccc80c12071 Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 12 Jul 2024 18:25:17 +0200 Subject: [PATCH 126/131] the casscf does not work with mo optimization ... --- src/casscf_cipsi/EZFIO.cfg | 6 + src/casscf_cipsi/NEED | 2 +- src/casscf_cipsi/casscf.irp.f | 166 +++++++++--------- .../class.irp.f | 0 4 files changed, 94 insertions(+), 80 deletions(-) rename src/{mo_optimization_utils => mo_optimization}/class.irp.f (100%) diff --git a/src/casscf_cipsi/EZFIO.cfg b/src/casscf_cipsi/EZFIO.cfg index 18e0b6b1..5b72d906 100644 --- a/src/casscf_cipsi/EZFIO.cfg +++ b/src/casscf_cipsi/EZFIO.cfg @@ -79,3 +79,9 @@ type: logical doc: If |true|, the pt2_max value in the CIPSI is set to 10-10 and will not change interface: ezfio,provider,ocaml default: False + +[act_mos_opt] +type: logical +doc: If |true|, the active orbitals are also optimized variationally +interface: ezfio,provider,ocaml +default: False diff --git a/src/casscf_cipsi/NEED b/src/casscf_cipsi/NEED index 11d1a78c..32f5ae90 100644 --- a/src/casscf_cipsi/NEED +++ b/src/casscf_cipsi/NEED @@ -3,4 +3,4 @@ selectors_full generators_cas two_body_rdm dav_general_mat -mo_optimization +mo_optimization_utils diff --git a/src/casscf_cipsi/casscf.irp.f b/src/casscf_cipsi/casscf.irp.f index dc3e2245..b64a9d8f 100644 --- a/src/casscf_cipsi/casscf.irp.f +++ b/src/casscf_cipsi/casscf.irp.f @@ -46,94 +46,101 @@ subroutine run do while (.not.converged) print*,'pt2_max = ',pt2_max call run_stochastic_cipsi(Ev,PT2) - print*,'Ev,PT2',Ev(1),PT2(1) - E_PT2(1:N_states) = Ev(1:N_states) + PT2(1:N_states) - energy_old = energy - energy = eone+etwo+ecore - pt2_max_before = pt2_max - - call write_time(6) - call write_int(6,iteration,'CAS-SCF iteration = ') - call write_double(6,energy,'State-average CAS-SCF energy = ') -! if(n_states == 1)then -! call ezfio_get_casscf_cipsi_energy_pt2(E_PT2) -! call ezfio_get_casscf_cipsi_energy(PT2) - double precision :: delta_E_istate, e_av - e_av = 0.d0 - do istate=1,N_states - e_av += state_average_weight(istate) * Ev(istate) - if(istate.gt.1)then - delta_E_istate = E_PT2(istate) - E_PT2(1) - write(*,'(A6,I2,A18,F16.10)')'state ',istate,' Delta E+PT2 = ',delta_E_istate - endif - write(*,'(A6,I2,A18,F16.10)')'state ',istate,' E + PT2 energy = ',E_PT2(istate) - write(*,'(A6,I2,A18,F16.10)')'state ',istate,' PT2 energy = ',PT2(istate) -! call write_double(6,E_PT2(istate),'E + PT2 energy = ') -! call write_double(6,PT2(istate),' PT2 = ') - enddo - call write_double(6,e_av,'State-average CAS-SCF energy bis = ') - call write_double(6,pt2_max,' PT2_MAX = ') +! if(act_mos_opt)then DOES NOT WORK +! call run_orb_opt_trust_v2 +! call run_stochastic_cipsi(Ev,PT2) ! endif - - print*,'' - call write_double(6,norm_grad_vec2,'Norm of gradients = ') - call write_double(6,norm_grad_vec2_tab(1), ' Core-active gradients = ') - call write_double(6,norm_grad_vec2_tab(2), ' Core-virtual gradients = ') - call write_double(6,norm_grad_vec2_tab(3), ' Active-virtual gradients = ') - print*,'' - call write_double(6,energy_improvement, 'Predicted energy improvement = ') - - if(criterion_casscf == "energy")then - converged = dabs(energy_improvement) < thresh_scf - else if (criterion_casscf == "gradients")then - converged = norm_grad_vec2 < thresh_scf - else if (criterion_casscf == "e_pt2")then - delta_E = 0.d0 - do istate = 1, N_states - delta_E += dabs(E_PT2(istate) - ept2_before(istate)) - enddo - converged = dabs(delta_E) < thresh_casscf - endif - ept2_before = E_PT2 - if(.not.small_active_space)then - if(adaptive_pt2_max)then - pt2_max = dabs(energy_improvement / (pt2_relative_error)) - pt2_max = min(pt2_max, pt2_max_before) - if(n_act_orb.ge.n_big_act_orb)then - pt2_max = max(pt2_max,pt2_min_casscf) - endif + if(.True.)then + print*,'Ev,PT2',Ev(1),PT2(1) + E_PT2(1:N_states) = Ev(1:N_states) + PT2(1:N_states) + energy_old = energy + energy = eone+etwo+ecore + pt2_max_before = pt2_max + + call write_time(6) + call write_int(6,iteration,'CAS-SCF iteration = ') + call write_double(6,energy,'State-average CAS-SCF energy = ') +!! if(n_states == 1)then +!! call ezfio_get_casscf_cipsi_energy_pt2(E_PT2) +!! call ezfio_get_casscf_cipsi_energy(PT2) + double precision :: delta_E_istate, e_av + e_av = 0.d0 + do istate=1,N_states + e_av += state_average_weight(istate) * Ev(istate) + if(istate.gt.1)then + delta_E_istate = E_PT2(istate) - E_PT2(1) + write(*,'(A6,I2,A18,F16.10)')'state ',istate,' Delta E+PT2 = ',delta_E_istate + endif + write(*,'(A6,I2,A18,F16.10)')'state ',istate,' E + PT2 energy = ',E_PT2(istate) + write(*,'(A6,I2,A18,F16.10)')'state ',istate,' PT2 energy = ',PT2(istate) +!! call write_double(6,E_PT2(istate),'E + PT2 energy = ') +!! call write_double(6,PT2(istate),' PT2 = ') + enddo + call write_double(6,e_av,'State-average CAS-SCF energy bis = ') + call write_double(6,pt2_max,' PT2_MAX = ') +!! endif + + print*,'' + call write_double(6,norm_grad_vec2,'Norm of gradients = ') + call write_double(6,norm_grad_vec2_tab(1), ' Core-active gradients = ') + call write_double(6,norm_grad_vec2_tab(2), ' Core-virtual gradients = ') + call write_double(6,norm_grad_vec2_tab(3), ' Active-virtual gradients = ') + print*,'' + call write_double(6,energy_improvement, 'Predicted energy improvement = ') + + if(criterion_casscf == "energy")then + converged = dabs(energy_improvement) < thresh_scf + else if (criterion_casscf == "gradients")then + converged = norm_grad_vec2 < thresh_scf + else if (criterion_casscf == "e_pt2")then + delta_E = 0.d0 + do istate = 1, N_states + delta_E += dabs(E_PT2(istate) - ept2_before(istate)) + enddo + converged = dabs(delta_E) < thresh_casscf endif - endif - print*,'' - call write_double(6,pt2_max, 'PT2_MAX for next iteration = ') - - mo_coef = NewOrbs - mo_occ = occnum - if(.not.converged)then - call save_mos - iteration += 1 - if(norm_grad_vec2.gt.0.01d0)then - N_det = N_states - else - N_det = max(N_det/8 ,N_states) - endif - psi_det = psi_det_sorted - psi_coef = psi_coef_sorted - read_wf = .True. - call clear_mo_map - SOFT_TOUCH mo_coef N_det psi_det psi_coef + ept2_before = E_PT2 if(.not.small_active_space)then if(adaptive_pt2_max)then - SOFT_TOUCH pt2_max + pt2_max = dabs(energy_improvement / (pt2_relative_error)) + pt2_max = min(pt2_max, pt2_max_before) + if(n_act_orb.ge.n_big_act_orb)then + pt2_max = max(pt2_max,pt2_min_casscf) + endif endif endif - if(iteration .gt. 3)then - state_following_casscf = state_following_casscf_cipsi_save - soft_touch state_following_casscf + print*,'' + call write_double(6,pt2_max, 'PT2_MAX for next iteration = ') + + mo_coef = NewOrbs + mo_occ = occnum + if(.not.converged)then + call save_mos + iteration += 1 + if(norm_grad_vec2.gt.0.01d0)then + N_det = N_states + else + N_det = max(N_det/8 ,N_states) + endif + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + read_wf = .True. + call clear_mo_map + SOFT_TOUCH mo_coef N_det psi_det psi_coef + if(.not.small_active_space)then + if(adaptive_pt2_max)then + SOFT_TOUCH pt2_max + endif + endif + if(iteration .gt. 3)then + state_following_casscf = state_following_casscf_cipsi_save + soft_touch state_following_casscf + endif endif endif - + enddo + if(.True.)then integer :: i print*,'Converged CASSCF ' print*,'--------------------------' @@ -153,6 +160,7 @@ subroutine run ! write(*,*)mcscf_fock_alpha_mo(i,i) enddo + endif end diff --git a/src/mo_optimization_utils/class.irp.f b/src/mo_optimization/class.irp.f similarity index 100% rename from src/mo_optimization_utils/class.irp.f rename to src/mo_optimization/class.irp.f From 31028f8979189d1cc2822d4dc60eb527ed639932 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 16 Jul 2024 17:44:48 +0200 Subject: [PATCH 127/131] fixed some weird dependencies in TC, introduced an AO cholesky 2e function --- .../cipsi_tc_bi_ortho/stochastic_cipsi.irp.f | 32 +++++++++++++++++++ plugins/local/fci_tc_bi/fci_tc_bi_ortho.irp.f | 20 ++++++++++-- .../local/non_h_ints_mu/total_tc_int.irp.f | 18 +++++++---- plugins/local/slater_tc/slater_tc_opt.irp.f | 2 -- plugins/local/slater_tc/tc_hmat.irp.f | 4 ++- src/ao_two_e_ints/cholesky.irp.f | 15 ++++++++- 6 files changed, 79 insertions(+), 12 deletions(-) diff --git a/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index e363830d..6b8f3b42 100644 --- a/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -1,4 +1,36 @@ + +! --- + +subroutine run_pouet + + BEGIN_DOC + ! Selected Full Configuration Interaction with Stochastic selection and PT2. + END_DOC + + use selection_types + implicit none + integer :: i, j, k, ndet + integer :: to_select + logical :: has + type(pt2_type) :: pt2_data, pt2_data_err + double precision :: rss + double precision :: correlation_energy_ratio + double precision :: hf_energy_ref + double precision :: relative_error + double precision, allocatable :: zeros(:),E_tc(:), norm(:) + + logical, external :: qp_stop + double precision, external :: memory_of_double + + PROVIDE mo_l_coef mo_r_coef + PROVIDE H_apply_buffer_allocated distributed_davidson + + print*, ' Diagonal elements of the Fock matrix ' + do i = 1, mo_num + write(*,*) i, Fock_matrix_tc_mo_tot(i,i) + enddo +end ! --- subroutine run_stochastic_cipsi diff --git a/plugins/local/fci_tc_bi/fci_tc_bi_ortho.irp.f b/plugins/local/fci_tc_bi/fci_tc_bi_ortho.irp.f index 1c1c0411..f1de0fe3 100644 --- a/plugins/local/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/plugins/local/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -65,7 +65,15 @@ subroutine run_cipsi_tc() if (.not. is_zmq_slave) then - PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e + if(.True.)then! DO NOT REMOVE THE IF(.TRUE.) !! + ! this has to be provided before mo_bi_ortho_tc_two_e to avoid twice the computation of ao_two_e_tc_tot + PROVIDE Fock_matrix_tc_mo_tot + ! because Fock_matrix_tc_mo_tot depends on ao_two_e_tc_tot + ! and that mo_bi_ortho_tc_two_e erase ao_two_e_tc_tot after being provided + endif + if(.True.)then ! DO NOT REMOVE THE IF(.TRUE.) !! + PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e + endif if((elec_alpha_num+elec_beta_num) .ge. 3) then if(three_body_h_tc) then @@ -90,8 +98,16 @@ subroutine run_cipsi_tc() call json_close else + if(.True.)then! DO NOT REMOVE THE IF(.TRUE.) !! + ! this has to be provided before mo_bi_ortho_tc_two_e to avoid twice the computation of ao_two_e_tc_tot + PROVIDE Fock_matrix_tc_mo_tot + ! because Fock_matrix_tc_mo_tot depends on ao_two_e_tc_tot + ! and that mo_bi_ortho_tc_two_e erase ao_two_e_tc_tot after being provided + endif - PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks + if(.True.)then! DO NOT REMOVE THE IF(.TRUE.) !! + PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks + endif if((elec_alpha_num+elec_beta_num) .ge. 3) then if(three_body_h_tc) then diff --git a/plugins/local/non_h_ints_mu/total_tc_int.irp.f b/plugins/local/non_h_ints_mu/total_tc_int.irp.f index 656f5f16..fb09168e 100644 --- a/plugins/local/non_h_ints_mu/total_tc_int.irp.f +++ b/plugins/local/non_h_ints_mu/total_tc_int.irp.f @@ -288,25 +288,31 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n !$OMP END DO !$OMP END PARALLEL else - print*, ' ao_integrals_map will be used' - PROVIDE ao_integrals_map +! print*, ' ao_integrals_map will be used' +! PROVIDE ao_integrals_map + print*,'Cholesky vectors will be used ' + double precision :: get_ao_integ_chol,eri + eri = get_ao_integ_chol(1,1,1,1) ! FOR OPENMP !$OMP PARALLEL DEFAULT(NONE) & - !$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & - !$OMP PRIVATE(i, j, k, l) +!!! !$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & + !$OMP SHARED(ao_num, ao_two_e_tc_tot) & + !$OMP PRIVATE(i, j, k, l,eri) !$OMP DO COLLAPSE(3) do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num do k = 1, ao_num ! < 1:i, 2:j | 1:k, 2:l > - ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) +! eri = get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + eri = get_ao_integ_chol(i,k,j,l) + ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + eri enddo enddo enddo enddo !$OMP END DO !$OMP END PARALLEL - FREE ao_integrals_map +! FREE ao_integrals_map endif if((tc_integ_type .eq. "numeric") .and. (.not. tc_save_mem)) then diff --git a/plugins/local/slater_tc/slater_tc_opt.irp.f b/plugins/local/slater_tc/slater_tc_opt.irp.f index 5651a299..3c4421f8 100644 --- a/plugins/local/slater_tc/slater_tc_opt.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt.irp.f @@ -10,8 +10,6 @@ subroutine provide_all_three_ints_bi_ortho() implicit none double precision :: t1, t2 - PROVIDE ao_two_e_integrals_in_map - print *, ' start provide_all_three_ints_bi_ortho' call wall_time(t1) diff --git a/plugins/local/slater_tc/tc_hmat.irp.f b/plugins/local/slater_tc/tc_hmat.irp.f index cc780364..6323d129 100644 --- a/plugins/local/slater_tc/tc_hmat.irp.f +++ b/plugins/local/slater_tc/tc_hmat.irp.f @@ -30,7 +30,9 @@ BEGIN_PROVIDER [double precision, htilde_matrix_elmt_bi_ortho, (N_det,N_det)] print *, ' PROVIDING htilde_matrix_elmt_bi_ortho ...' call wall_time(t1) - call provide_all_three_ints_bi_ortho() + if(three_body_h_tc)then + call provide_all_three_ints_bi_ortho() + endif i = 1 j = 1 diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index a2d9d043..bfa6bd0a 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -1,3 +1,15 @@ +double precision function get_ao_integ_chol(i,j,k,l) + implicit none + BEGIN_DOC + ! CHOLESKY representation of the integral of the AO basis or (ij|kl) + ! i(r1) j(r1) 1/r12 k(r2) l(r2) + END_DOC + integer, intent(in) :: i,j,k,l + double precision, external :: ddot + get_ao_integ_chol = ddot(cholesky_ao_num, cholesky_ao_transp(1,i,j), 1, cholesky_ao_transp(1,k,l), 1) + +end + BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ] implicit none BEGIN_DOC @@ -162,7 +174,8 @@ END_PROVIDER np = int(np8,4) if (np <= 0) stop 'np<=0' - rank_max = min(np,20*elec_num*elec_num) +! rank_max = min(np,20*elec_num*elec_num) + rank_max = np call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1)) call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) From a0140b9b0af59712494de5b443846c70a432465b Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 24 Jul 2024 12:20:16 +0200 Subject: [PATCH 128/131] added mu_of_r_mean_field.irp.f --- src/mu_of_r/mu_of_r_mean_field.irp.f | 132 +++++++++++++++++++++++++++ src/mu_of_r/test_proj_op.irp.f | 96 ++++++++++++++++++- 2 files changed, 227 insertions(+), 1 deletion(-) create mode 100644 src/mu_of_r/mu_of_r_mean_field.irp.f diff --git a/src/mu_of_r/mu_of_r_mean_field.irp.f b/src/mu_of_r/mu_of_r_mean_field.irp.f new file mode 100644 index 00000000..9b9c2e20 --- /dev/null +++ b/src/mu_of_r/mu_of_r_mean_field.irp.f @@ -0,0 +1,132 @@ +BEGIN_PROVIDER [ double precision, two_e_int_mf, (elec_beta_num,elec_alpha_num,elec_beta_num,elec_alpha_num)] + implicit none + integer :: i,j,k,l + double precision :: get_two_e_integral + do i = 1, elec_alpha_num + do j = 1, elec_beta_num + do k = 1, elec_alpha_num + do l = 1, elec_beta_num + two_e_int_mf(l,k,j,i) = get_two_e_integral(l,k,j,i,mo_integrals_map) + enddo + enddo + enddo + enddo +END_PROVIDER + +subroutine get_f_mf_ab(r,f_mf_ab,two_bod_dens, dm_a, dm_b) + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out):: f_mf_ab,two_bod_dens, dm_a, dm_b + double precision, allocatable :: mos_array_r(:),mos_array_a(:), mos_array_b(:) + integer :: i,j,k,l + allocate(mos_array_r(mo_num), mos_array_a(elec_alpha_num), mos_array_b(elec_alpha_num)) + call give_all_mos_at_r(r,mos_array_r) + do i = 1, elec_alpha_num + mos_array_a(i) = mos_array_r(i) + enddo + do i = 1, elec_beta_num + mos_array_b(i) = mos_array_r(i) + enddo + + dm_a = 0.d0 + do i = 1, elec_alpha_num + dm_a += mos_array_a(i) * mos_array_a(i) + enddo + + dm_b = 0.d0 + do i = 1, elec_beta_num + dm_b += mos_array_b(i) * mos_array_b(i) + enddo + two_bod_dens = dm_a * dm_b + + f_mf_ab = 0.d0 + do i = 1, elec_alpha_num + do j = 1, elec_beta_num + do k = 1, elec_alpha_num + do l = 1, elec_beta_num + f_mf_ab += two_e_int_mf(l,k,j,i) * mos_array_a(i) * mos_array_a(k) * mos_array_b(j) * mos_array_b(l) + enddo + enddo + enddo + enddo + ! multiply by two to adapt to the N(N-1) normalization condition of the active two-rdm + f_mf_ab *= 2.d0 + two_bod_dens *= 2.d0 + +end + +subroutine get_grad_f_mf_ab(r,grad_f_mf_ab, grad_two_bod_dens,f_mf_ab,two_bod_dens, dm_a, dm_b,grad_dm_a, grad_dm_b) + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out) :: f_mf_ab, two_bod_dens + double precision, intent(out) :: grad_two_bod_dens(3), grad_f_mf_ab(3) + double precision, intent(out) :: dm_a, dm_b, grad_dm_a(3), grad_dm_b(3) + + double precision, allocatable :: mos_array_r(:), mos_grad_array_r(:,:) + double precision, allocatable :: mos_array_a(:), mos_array_b(:) + double precision, allocatable :: mos_grad_array_a(:,:), mos_grad_array_b(:,:) + double precision :: mo_i, mo_j, mo_k, mo_l + double precision :: grad_mo_i(3), grad_mo_j(3), grad_mo_k(3), grad_mo_l(3) + + integer :: i,j,k,l + allocate(mos_array_r(mo_num),mos_grad_array_r(3,mo_num)) + allocate(mos_array_a(elec_alpha_num), mos_array_b(elec_beta_num)) + allocate(mos_grad_array_a(3,elec_alpha_num), mos_grad_array_b(3,elec_beta_num)) + call give_all_mos_and_grad_at_r(r,mos_array_r,mos_grad_array_r) + do i = 1, elec_alpha_num + mos_array_a(i) = mos_array_r(i) + mos_grad_array_a(1:3,i) = mos_grad_array_r(1:3,i) + enddo + do i = 1, elec_beta_num + mos_array_b(i) = mos_array_r(i) + mos_grad_array_b(1:3,i) = mos_grad_array_r(1:3,i) + enddo + + ! ALPHA DENSITY AND GRADIENT + dm_a = 0.d0 + grad_dm_a = 0.d0 + do i = 1, elec_alpha_num + dm_a += mos_array_a(i) * mos_array_a(i) + grad_dm_a(1:3) += 2.d0 * mos_array_a(i) * mos_grad_array_a(1:3,i) + enddo + + ! BETA DENSITY AND GRADIENT + dm_b = 0.d0 + grad_dm_b = 0.d0 + do i = 1, elec_beta_num + dm_b += mos_array_b(i) * mos_array_b(i) + grad_dm_b(1:3) += 2.d0 * mos_array_b(i) * mos_grad_array_b(1:3,i) + enddo + ! TWO-BODY DENSITY AND GRADIENT + two_bod_dens = dm_a * dm_b + grad_two_bod_dens(1:3) = dm_a * grad_dm_b(1:3) + dm_b * grad_dm_a(1:3) + + ! F_MF and GRADIENT + grad_f_mf_ab = 0.d0 + f_mf_ab = 0.d0 + do i = 1, elec_alpha_num + mo_i = mos_array_a(i) + grad_mo_i(1:3) = mos_grad_array_a(1:3,i) + do j = 1, elec_beta_num + mo_j = mos_array_b(j) + grad_mo_j(1:3) = mos_grad_array_b(1:3,j) + do k = 1, elec_alpha_num + mo_k = mos_array_a(k) + grad_mo_k(1:3) = mos_grad_array_a(1:3,k) + do l = 1, elec_beta_num + mo_l = mos_array_b(l) + grad_mo_l(1:3) = mos_grad_array_b(1:3,l) + f_mf_ab += two_e_int_mf(l,k,j,i) * mo_i * mo_j * mo_k * mo_l + grad_f_mf_ab(1:3) += two_e_int_mf(l,k,j,i) * & + (mo_i * mo_j * mo_k * grad_mo_l(1:3) + mo_i * mo_j * grad_mo_k(1:3) * mo_l & + +mo_i * grad_mo_j(1:3) * mo_k * mo_l + grad_mo_i(1:3) * mo_j * mo_k * mo_l) + enddo + enddo + enddo + enddo + + f_mf_ab *= 2.d0 + two_bod_dens *= 2.d0 + grad_f_mf_ab *= 2.D0 + grad_two_bod_dens *= 2.d0 +end diff --git a/src/mu_of_r/test_proj_op.irp.f b/src/mu_of_r/test_proj_op.irp.f index fd5e976b..bd2f3b4f 100644 --- a/src/mu_of_r/test_proj_op.irp.f +++ b/src/mu_of_r/test_proj_op.irp.f @@ -17,7 +17,9 @@ program projected_operators ! call test_f_ii_valence_ab ! call test_f_ia_valence_ab ! call test_f_ii_ia_aa_valence_ab - call test +! call test +! call test_f_mean_field + call test_grad_f_mean_field end @@ -35,3 +37,95 @@ subroutine test print*,'accu = ',accu end + +subroutine test_f_mean_field + implicit none + integer :: i_point + double precision :: weight,r(3) + double precision :: ref_f, new_f, accu_f + double precision :: ref_two_dens, new_two_dens, accu_two_dens, dm_a, dm_b + accu_f = 0.d0 + accu_two_dens = 0.d0 + do i_point = 1, n_points_final_grid + r(1:3) = final_grid_points(1:3,i_point) + weight = final_weight_at_r_vector(i_point) + call get_f_mf_ab(r,new_f,new_two_dens, dm_a, dm_b) + call f_HF_valence_ab(r,r,ref_f,ref_two_dens) + accu_f += weight * dabs(new_f- ref_f) + accu_two_dens += weight * dabs(new_two_dens - ref_two_dens) + enddo + print*,'accu_f = ',accu_f + print*,'accu_two_dens = ',accu_two_dens + +end + +subroutine test_grad_f_mean_field + implicit none + integer :: i_point,k + double precision :: weight,r(3) + double precision :: grad_f_mf_ab(3), grad_two_bod_dens(3) + double precision :: grad_dm_a(3), grad_dm_b(3) + double precision :: f_mf_ab,two_bod_dens, dm_a, dm_b + + double precision :: num_grad_f_mf_ab(3), num_grad_two_bod_dens(3) + double precision :: num_grad_dm_a(3), num_grad_dm_b(3) + double precision :: f_mf_ab_p,f_mf_ab_m + double precision :: two_bod_dens_p, two_bod_dens_m + double precision :: dm_a_p, dm_a_m + double precision :: dm_b_p, dm_b_m + double precision :: rbis(3), dr + double precision :: accu_grad_f_mf_ab(3),accu_grad_two_bod_dens(3) + double precision :: accu_grad_dm_a(3),accu_grad_dm_b(3) + double precision :: accu_f_mf_ab, accu_two_bod_dens, accu_dm_a, accu_dm_b + dr = 0.00001d0 + accu_f_mf_ab = 0.d0 + accu_two_bod_dens = 0.d0 + accu_dm_a = 0.d0 + accu_dm_b = 0.d0 + + accu_grad_f_mf_ab = 0.d0 + accu_grad_two_bod_dens = 0.d0 + accu_grad_dm_a = 0.d0 + accu_grad_dm_b = 0.d0 + do i_point = 1, n_points_final_grid + r(1:3) = final_grid_points(1:3,i_point) + weight = final_weight_at_r_vector(i_point) + call get_grad_f_mf_ab(r,grad_f_mf_ab, grad_two_bod_dens,f_mf_ab,two_bod_dens, dm_a, dm_b,grad_dm_a, grad_dm_b) + call get_f_mf_ab(r,f_mf_ab_p,two_bod_dens_p, dm_a_p, dm_b_p) + accu_f_mf_ab += weight * dabs(f_mf_ab - f_mf_ab_p) + accu_two_bod_dens += weight * dabs(two_bod_dens - two_bod_dens_p) + accu_dm_a += weight*dabs(dm_a - dm_a_p) + accu_dm_b += weight*dabs(dm_b - dm_b_p) + do k = 1, 3 + rbis = r + rbis(k) += dr + call get_f_mf_ab(rbis,f_mf_ab_p,two_bod_dens_p, dm_a_p, dm_b_p) + rbis = r + rbis(k) -= dr + call get_f_mf_ab(rbis,f_mf_ab_m,two_bod_dens_m, dm_a_m, dm_b_m) + num_grad_f_mf_ab(k) = (f_mf_ab_p - f_mf_ab_m)/(2.d0*dr) + num_grad_two_bod_dens(k) = (two_bod_dens_p - two_bod_dens_m)/(2.d0*dr) + num_grad_dm_a(k) = (dm_a_p - dm_a_m)/(2.d0*dr) + num_grad_dm_b(k) = (dm_b_p - dm_b_m)/(2.d0*dr) + enddo + do k = 1, 3 + accu_grad_f_mf_ab(k) += weight * dabs(grad_f_mf_ab(k) - num_grad_f_mf_ab(k)) + accu_grad_two_bod_dens(k) += weight * dabs(grad_two_bod_dens(k) - num_grad_two_bod_dens(k)) + accu_grad_dm_a(k) += weight * dabs(grad_dm_a(k) - num_grad_dm_a(k)) + accu_grad_dm_b(k) += weight * dabs(grad_dm_b(k) - num_grad_dm_b(k)) + enddo + enddo + print*,'accu_f_mf_ab = ',accu_f_mf_ab + print*,'accu_two_bod_dens = ',accu_two_bod_dens + print*,'accu_dm_a = ',accu_dm_a + print*,'accu_dm_b = ',accu_dm_b + print*,'accu_grad_f_mf_ab = ' + print*,accu_grad_f_mf_ab + print*,'accu_grad_two_bod_dens = ' + print*,accu_grad_two_bod_dens + print*,'accu_dm_a = ' + print*,accu_grad_dm_a + print*,'accu_dm_b = ' + print*,accu_grad_dm_b + +end From cb8bef2ecda20f5e8b6076ece8cd922aa063db65 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 24 Jul 2024 12:43:20 +0200 Subject: [PATCH 129/131] added gradients of mu_mf --- src/mu_of_r/mu_of_r_mean_field.irp.f | 39 +++++++++++++++++++++++ src/mu_of_r/test_proj_op.irp.f | 47 +++++++++++++++++++++++++++- 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/src/mu_of_r/mu_of_r_mean_field.irp.f b/src/mu_of_r/mu_of_r_mean_field.irp.f index 9b9c2e20..6abc7e4f 100644 --- a/src/mu_of_r/mu_of_r_mean_field.irp.f +++ b/src/mu_of_r/mu_of_r_mean_field.irp.f @@ -130,3 +130,42 @@ subroutine get_grad_f_mf_ab(r,grad_f_mf_ab, grad_two_bod_dens,f_mf_ab,two_bod_de grad_f_mf_ab *= 2.D0 grad_two_bod_dens *= 2.d0 end + +subroutine mu_of_r_mean_field(r,mu_mf, dm) + implicit none + include 'constants.include.F' + double precision, intent(in) :: r(3) + double precision, intent(out):: mu_mf, dm + double precision :: f_mf_ab,two_bod_dens, dm_a, dm_b + call get_f_mf_ab(r,f_mf_ab,two_bod_dens, dm_a, dm_b) + dm = dm_a + dm_b + if(dabs(two_bod_dens).lt.1.d-10)then + mu_mf = 1.d+10 + else + mu_mf = 0.5d0 * sqpi * f_mf_ab/two_bod_dens + endif +end + +subroutine grad_mu_of_r_mean_field(r,mu_mf, dm, grad_mu_mf, grad_dm) + implicit none + include 'constants.include.F' + double precision, intent(in) :: r(3) + double precision, intent(out):: grad_mu_mf(3), grad_dm(3) + double precision, intent(out):: mu_mf, dm + double precision :: grad_f_mf_ab(3), grad_two_bod_dens(3),grad_dm_a(3), grad_dm_b(3) + double precision :: f_mf_ab,two_bod_dens, dm_a, dm_b + call get_grad_f_mf_ab(r,grad_f_mf_ab, grad_two_bod_dens,f_mf_ab,two_bod_dens, dm_a, dm_b,grad_dm_a, grad_dm_b) + + dm = dm_a + dm_b + grad_dm(1:3) = grad_dm_a(1:3) + grad_dm_b(1:3) + + if(dabs(two_bod_dens).lt.1.d-10)then + mu_mf = 1.d+10 + grad_mu_mf = 0.d0 + else + mu_mf = 0.5d0 * sqpi * f_mf_ab/two_bod_dens + grad_mu_mf(1:3) = 0.5d0 * sqpi * (grad_f_mf_ab(1:3) * two_bod_dens - f_mf_ab * grad_two_bod_dens(1:3))& + /(two_bod_dens*two_bod_dens) + endif + +end diff --git a/src/mu_of_r/test_proj_op.irp.f b/src/mu_of_r/test_proj_op.irp.f index bd2f3b4f..cf53c772 100644 --- a/src/mu_of_r/test_proj_op.irp.f +++ b/src/mu_of_r/test_proj_op.irp.f @@ -19,7 +19,8 @@ program projected_operators ! call test_f_ii_ia_aa_valence_ab ! call test ! call test_f_mean_field - call test_grad_f_mean_field +! call test_grad_f_mean_field + call test_grad_mu_mf end @@ -129,3 +130,47 @@ subroutine test_grad_f_mean_field print*,accu_grad_dm_b end + +subroutine test_grad_mu_mf + implicit none + integer :: i_point,k + double precision :: weight,r(3),rbis(3) + double precision :: mu_mf, dm,grad_mu_mf(3), grad_dm(3) + double precision :: mu_mf_p, mu_mf_m, dm_m, dm_p, num_grad_mu_mf(3),dr, num_grad_dm(3) + double precision :: accu_mu, accu_dm, accu_grad_dm(3), accu_grad_mu_mf(3) + dr = 0.00001d0 + accu_grad_mu_mf = 0.d0 + accu_mu = 0.d0 + accu_grad_dm = 0.d0 + accu_dm = 0.d0 + do i_point = 1, n_points_final_grid + r(1:3) = final_grid_points(1:3,i_point) + weight = final_weight_at_r_vector(i_point) + call grad_mu_of_r_mean_field(r,mu_mf, dm, grad_mu_mf, grad_dm) + call mu_of_r_mean_field(r,mu_mf_p, dm_p) + accu_mu += weight*dabs(mu_mf_p - mu_mf) + accu_dm += weight*dabs(dm_p - dm) + do k = 1, 3 + rbis = r + rbis(k) += dr + call mu_of_r_mean_field(rbis,mu_mf_p, dm_p) + rbis = r + rbis(k) -= dr + call mu_of_r_mean_field(rbis,mu_mf_m, dm_m) + + num_grad_mu_mf(k) = (mu_mf_p - mu_mf_m)/(2.d0*dr) + num_grad_dm(k) = (dm_p - dm_m)/(2.d0*dr) + enddo + do k = 1, 3 + accu_grad_dm(k)+= weight *dabs(num_grad_dm(k) - grad_dm(k)) + accu_grad_mu_mf(k)+= weight *dabs(num_grad_mu_mf(k) - grad_mu_mf(k)) + enddo + enddo + print*,'accu_mu = ',accu_mu + print*,'accu_dm = ',accu_dm + print*,'accu_grad_dm = ' + print*, accu_grad_dm + print*,'accu_grad_mu_mf = ' + print*, accu_grad_mu_mf + +end From edf3a27534e531a1866eac30202bf26ca305123a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 29 Jul 2024 16:15:48 +0200 Subject: [PATCH 130/131] rank_max --- src/ao_two_e_ints/cholesky.irp.f | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index bfa6bd0a..ccaa7239 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -174,8 +174,11 @@ END_PROVIDER np = int(np8,4) if (np <= 0) stop 'np<=0' -! rank_max = min(np,20*elec_num*elec_num) rank_max = np + ! Avoid too large arrays when there are many electrons + if (elec_num > 10) then + rank_max = min(np,20*elec_num*elec_num) + endif call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1)) call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) From 4d5467218de414d76e81a5080f762519e49f54f4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 31 Jul 2024 16:36:44 +0200 Subject: [PATCH 131/131] Fix state following when N_states_diag is too small --- .../diagonalization_hs2_dressed.irp.f | 50 +++---------------- src/davidson/diagonalize_ci.irp.f | 5 +- 2 files changed, 10 insertions(+), 45 deletions(-) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index fb04b29b..191e0021 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -594,6 +594,13 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ state(l) = idx enddo + ! Check if all states are attributed. If not, exit and N_st_diag will be increased. + do l=1,N_st + if (state(l) == 0) then + return + endif + enddo + ! tmp array before setting state_ok ok = .False. do l = 1, N_st @@ -627,47 +634,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ ! Swapped eigenvectors prev_y = y -! if (state_following) then -! -! overlap = -1.d0 -! do k=1,shift2 -! do i=1,shift2 -! overlap(k,i) = dabs(y(k,i)) -! enddo -! enddo -! do k=1,N_st -! cmax = -1.d0 -! do i=1,N_st -! if (overlap(i,k) > cmax) then -! cmax = overlap(i,k) -! order(k) = i -! endif -! enddo -! do i=1,N_st_diag -! overlap(order(k),i) = -1.d0 -! enddo -! enddo -! overlap = y -! do k=1,N_st -! l = order(k) -! if (k /= l) then -! y(1:shift2,k) = overlap(1:shift2,l) -! endif -! enddo -! do k=1,N_st -! overlap(k,1) = lambda(k) -! overlap(k,2) = s2(k) -! enddo -! do k=1,N_st -! l = order(k) -! if (k /= l) then -! lambda(k) = overlap(l,1) -! s2(k) = overlap(l,2) -! endif -! enddo -! -! endif - ! Express eigenvectors of h in the determinant basis ! -------------------------------------------------- @@ -703,7 +669,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ if ((itertot>1).and.(iter == 1)) then - !don't print + ! Don't print continue else write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:3,1:N_st) diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f index 59c8313a..6b852905 100644 --- a/src/davidson/diagonalize_ci.irp.f +++ b/src/davidson/diagonalize_ci.irp.f @@ -282,9 +282,8 @@ END_PROVIDER print*,' Within the ',N_det,'determinants selected' print*,' and the ',N_states_diag,'states requested' print*,' We did not find only states with S^2 values close to ',expected_s2 - print*,' We will then set the first N_states eigenvectors of the H matrix' - print*,' as the CI_eigenvectors' - print*,' You should consider more states and maybe ask for s2_eig to be .True. or just enlarge the CI space' + print*,' You should consider more states, or change s2_eig, or just enlarge the CI space' + print*,'!!!!!!!! WARNING !!!!!!!!!' print*,'' do j=1,min(N_states_diag,N_det)