From 63650890e2e936934ba794b30961cef09d9792e3 Mon Sep 17 00:00:00 2001 From: Emmanuel Giner LCT Date: Fri, 22 Sep 2023 16:40:12 +0200 Subject: [PATCH 01/64] trying to fix bug --- src/tc_bi_ortho/psi_r_l_prov.irp.f | 6 ++++-- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/tc_bi_ortho/psi_r_l_prov.irp.f b/src/tc_bi_ortho/psi_r_l_prov.irp.f index b28c417f..1d233b0b 100644 --- a/src/tc_bi_ortho/psi_r_l_prov.irp.f +++ b/src/tc_bi_ortho/psi_r_l_prov.irp.f @@ -1,6 +1,7 @@ use bitmasks -BEGIN_PROVIDER [ double precision, psi_l_coef_bi_ortho, (psi_det_size,N_states) ] +!BEGIN_PROVIDER [ double precision, psi_l_coef_bi_ortho, (psi_det_size,N_states) ] +BEGIN_PROVIDER [ double precision, psi_l_coef_bi_ortho, (N_det,N_states) ] implicit none BEGIN_DOC ! The wave function coefficients. Initialized with Hartree-Fock if the |EZFIO| file @@ -68,7 +69,8 @@ BEGIN_PROVIDER [ double precision, psi_l_coef_bi_ortho, (psi_det_size,N_states) END_PROVIDER -BEGIN_PROVIDER [ double precision, psi_r_coef_bi_ortho, (psi_det_size,N_states) ] +!BEGIN_PROVIDER [ double precision, psi_r_coef_bi_ortho, (psi_det_size,N_states) ] +BEGIN_PROVIDER [ double precision, psi_r_coef_bi_ortho, (N_det,N_states) ] implicit none BEGIN_DOC ! The wave function coefficients. Initialized with Hartree-Fock if the |EZFIO| file diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index a9e22e03..a636e8d6 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -320,24 +320,38 @@ end enddo double precision, allocatable :: buffer(:,:) - allocate(buffer(N_det,N_states)) + allocate(buffer(psi_det_size,N_states)) + print*,'passed the allocate' +! print*,N_det,N_states +! print*,size(psi_l_coef_bi_ortho,1),size(psi_l_coef_bi_ortho,2) +! print*,size(leigvec_tc_bi_orth,1),size(leigvec_tc_bi_orth,2) +! print*,size(reigvec_tc_bi_orth,1),size(reigvec_tc_bi_orth,2) +! print*,size(psi_r_coef_bi_ortho,1),size(psi_r_coef_bi_ortho,2) + buffer = 0.d0 do k = 1, N_states do i = 1, N_det psi_l_coef_bi_ortho(i,k) = leigvec_tc_bi_orth(i,k) buffer(i,k) = leigvec_tc_bi_orth(i,k) enddo enddo + print*,'passed the first loop' TOUCH psi_l_coef_bi_ortho + print*,'passed the TOUCH psi_l_coef_bi_ortho' call ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho(buffer) + print*,'passed the ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho' do k = 1, N_states do i = 1, N_det psi_r_coef_bi_ortho(i,k) = reigvec_tc_bi_orth(i,k) buffer(i,k) = reigvec_tc_bi_orth(i,k) enddo enddo + print*,'passed the second loop' TOUCH psi_r_coef_bi_ortho + print*,'passed the TOUCH psi_r_coef_bi_ortho' call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer) + print*,'passed the ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho' deallocate(buffer) + print*,'passed saving the wf' ! print*,'After diag' ! do i = 1, N_det! old version ! print*,'i',i,psi_l_coef_bi_ortho(i,1),psi_r_coef_bi_ortho(i,1) From 3f861a41b5438d1722fa003da233642c79d96a47 Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 8 Mar 2024 17:27:18 +0100 Subject: [PATCH 02/64] added the thresh_de_tc_angles keyword in minimize tc angles --- bin/zcat | 23 --------------------- plugins/local/tc_keywords/EZFIO.cfg | 5 +++++ plugins/local/tc_scf/routines_rotates.irp.f | 1 + scripts/PYSCF_EOMCC.py | 1 + 4 files changed, 7 insertions(+), 23 deletions(-) delete mode 100755 bin/zcat create mode 120000 scripts/PYSCF_EOMCC.py diff --git a/bin/zcat b/bin/zcat deleted file mode 100755 index 7ccecf07..00000000 --- a/bin/zcat +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -# On Darwin: try gzcat if available, otherwise use Python - -if [[ $(uname -s) = Darwin ]] ; then - which gzcat &> /dev/null - if [[ $? -eq 0 ]] ; then - exec gzcat $@ - else - - exec python3 << EOF -import sys -import gzip -with gzip.open("$1", "rt") as f: - print(f.read()) -EOF - fi -else - SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" - command=$(which -a zcat | grep -v "$SCRIPTPATH/" | head -1) - exec $command $@ -fi - diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index 93ff790f..68fe9c94 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -280,3 +280,8 @@ doc: approach used to evaluate TC integrals [ analytic | numeric | semi-analytic interface: ezfio,ocaml,provider default: semi-analytic +[thresh_de_tc_angles] +type: Threshold +doc: Thresholds on delta E for changing angles between orbitals +interface: ezfio,provider,ocaml +default: 1.e-03 diff --git a/plugins/local/tc_scf/routines_rotates.irp.f b/plugins/local/tc_scf/routines_rotates.irp.f index c42e846e..92abfa44 100644 --- a/plugins/local/tc_scf/routines_rotates.irp.f +++ b/plugins/local/tc_scf/routines_rotates.irp.f @@ -301,6 +301,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) ! check if TC energy has changed E_new = TC_HF_energy + E_thr = thresh_de_tc_angles if(dabs(E_new - E_old) .gt. E_thr) then mo_r_coef = mo_r_coef_old mo_l_coef = mo_l_coef_old diff --git a/scripts/PYSCF_EOMCC.py b/scripts/PYSCF_EOMCC.py new file mode 120000 index 00000000..8ad341da --- /dev/null +++ b/scripts/PYSCF_EOMCC.py @@ -0,0 +1 @@ +/home_lct/eginer/qp2/plugins/qp_plugins_lct/dev/fcidump_for_vbarb/PYSCF_EOMCC.py \ No newline at end of file From e9dccd2364f282397df9f3b5bc4e3373fe3bd7e6 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 25 Apr 2024 19:46:26 +0200 Subject: [PATCH 03/64] added spherical harmonics --- plugins/local/spher_harm/.gitignore | 59 +++++ plugins/local/spher_harm/NEED | 1 + plugins/local/spher_harm/README.rst | 4 + plugins/local/spher_harm/assoc_gaus_pol.irp.f | 50 ++++ plugins/local/spher_harm/spher_harm.irp.f | 217 ++++++++++++++++++ .../local/spher_harm/spher_harm_func.irp.f | 151 ++++++++++++ 6 files changed, 482 insertions(+) create mode 100644 plugins/local/spher_harm/.gitignore create mode 100644 plugins/local/spher_harm/NEED create mode 100644 plugins/local/spher_harm/README.rst create mode 100644 plugins/local/spher_harm/assoc_gaus_pol.irp.f create mode 100644 plugins/local/spher_harm/spher_harm.irp.f create mode 100644 plugins/local/spher_harm/spher_harm_func.irp.f diff --git a/plugins/local/spher_harm/.gitignore b/plugins/local/spher_harm/.gitignore new file mode 100644 index 00000000..1561915b --- /dev/null +++ b/plugins/local/spher_harm/.gitignore @@ -0,0 +1,59 @@ +IRPF90_temp/ +IRPF90_man/ +build.ninja +irpf90.make +ezfio_interface.irp.f +irpf90_entities +tags +Makefile +ao_basis +ao_one_e_ints +ao_two_e_erf_ints +ao_two_e_ints +aux_quantities +becke_numerical_grid +bitmask +cis +cisd +cipsi +davidson +davidson_dressed +davidson_undressed +density_for_dft +determinants +dft_keywords +dft_utils_in_r +dft_utils_one_e +dft_utils_two_body +dressing +dummy +electrons +ezfio_files +fci +generators_cas +generators_full +hartree_fock +iterations +kohn_sham +kohn_sham_rs +mo_basis +mo_guess +mo_one_e_ints +mo_two_e_erf_ints +mo_two_e_ints +mpi +mrpt_utils +nuclei +perturbation +pseudo +psiref_cas +psiref_utils +scf_utils +selectors_cassd +selectors_full +selectors_utils +single_ref_method +slave +tools +utils +zmq diff --git a/plugins/local/spher_harm/NEED b/plugins/local/spher_harm/NEED new file mode 100644 index 00000000..92df7f12 --- /dev/null +++ b/plugins/local/spher_harm/NEED @@ -0,0 +1 @@ +dft_utils_in_r diff --git a/plugins/local/spher_harm/README.rst b/plugins/local/spher_harm/README.rst new file mode 100644 index 00000000..bf897f73 --- /dev/null +++ b/plugins/local/spher_harm/README.rst @@ -0,0 +1,4 @@ +========== +spher_harm +========== + diff --git a/plugins/local/spher_harm/assoc_gaus_pol.irp.f b/plugins/local/spher_harm/assoc_gaus_pol.irp.f new file mode 100644 index 00000000..fa790307 --- /dev/null +++ b/plugins/local/spher_harm/assoc_gaus_pol.irp.f @@ -0,0 +1,50 @@ +double precision function plgndr(l,m,x) + integer, intent(in) :: l,m + double precision, intent(in) :: x + BEGIN_DOC + ! associated Legenre polynom P_l,m(x). Used for the Y_lm(theta,phi) + ! Taken from https://iate.oac.uncor.edu/~mario/materia/nr/numrec/f6-8.pdf + END_DOC + integer :: i,ll + double precision :: fact,pll,pmm,pmmp1,somx2 + if(m.lt.0.or.m.gt.l.or.dabs(x).gt.1.d0)then + print*,'bad arguments in plgndr' + pause + endif + pmm=1.d0 + if(m.gt.0) then + somx2=dsqrt((1.d0-x)*(1.d0+x)) + fact=1.d0 + do i=1,m + pmm=-pmm*fact*somx2 + fact=fact+2.d0 + enddo + endif ! m > 0 + if(l.eq.m) then + plgndr=pmm + else + pmmp1=x*(2*m+1)*pmm ! Compute P_m+1^m + if(l.eq.m+1) then + plgndr=pmmp1 + else ! Compute P_l^m, l> m+1 + do ll=m+2,l + pll=(x*dble(2*ll-1)*pmmp1-dble(ll+m-1)*pmm)/(ll-m) + pmm=pmmp1 + pmmp1=pll + enddo + plgndr=pll + endif ! l.eq.m+1 + endif ! l.eq.m + return +end + +double precision function ortho_assoc_gaus_pol(l1,m1,l2) + implicit none + integer, intent(in) :: l1,m1,l2 + double precision :: fact + if(l1.ne.l2)then + ortho_assoc_gaus_pol= 0.d0 + else + ortho_assoc_gaus_pol = 2.d0*fact(l1+m1) / (dble(2*l1+1)*fact(l1-m1)) + endif +end diff --git a/plugins/local/spher_harm/spher_harm.irp.f b/plugins/local/spher_harm/spher_harm.irp.f new file mode 100644 index 00000000..40661db1 --- /dev/null +++ b/plugins/local/spher_harm/spher_harm.irp.f @@ -0,0 +1,217 @@ +program spher_harm + implicit none + call test_spher_harm +! call test_cart +! call test_brutal_spheric +end + +subroutine test_cart + implicit none + include 'constants.include.F' + double precision :: r(3),theta,phi,r_abs + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi +end + +subroutine test_spher_harm + implicit none + include 'constants.include.F' + integer :: l1,m1,i,l2,m2,lmax + double precision :: r(3),weight,accu_re, accu_im,accu + double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 + l1 = 0 + m1 = 0 + l2 = 0 + m2 = 0 + lmax = 5 + do l1 = 0,lmax + do m1 = -l1 ,l1 + do l2 = 0,lmax + do m2 = -l2 ,l2 + accu_re = 0.d0 + accu_im = 0.d0 + ! = \int dOmega Y_l1,m1^* Y_l2,m2 + ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) + ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu = 0.d0 + do i = 1, n_points_integration_angular + double precision :: theta,phi,r_abs + r(1:3) = angular_quadrature_points(i,1:3) + weight = weights_angular_points(i) + call cartesian_to_spherical(r,theta,phi,r_abs) + if(theta.gt.pi.or.theta.lt.0.d0)then + print*,'pb with theta',theta + print*,r + endif + if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then + print*,'pb with phi',phi/pi + print*,r + endif + call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) + call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) +! call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) +! call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) +! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) +! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) +! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) +! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + write(33,'(10(F16.10,X))')phi/pi + enddo + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + enddo + enddo + enddo + enddo + double precision :: x,dx,xmax,xmin + integer:: nx + nx = 10000 + xmin = -5.d0 + xmax = 5.d0 + dx = (xmax - xmin)/dble(nx) + x = xmin + do i = 1, nx + write(34,*)x,datan(x),dacos(x) + x += dx + enddo +end + +subroutine test_brutal_spheric + implicit none + include 'constants.include.F' + integer :: itheta, iphi,ntheta,nphi + double precision :: theta_min, theta_max, dtheta,theta + double precision :: phi_min, phi_max, dphi,phi + double precision :: accu_re, accu_im,weight + double precision :: re_ylm_1, im_ylm_1 ,re_ylm_2, im_ylm_2,accu + integer :: l1,m1,i,l2,m2,lmax + phi_min = 0.d0 + phi_max = 2.D0 * pi + theta_min = 0.d0 + theta_max = 1.D0 * pi + ntheta = 1000 + nphi = 1000 + dphi = (phi_max - phi_min)/dble(nphi) + dtheta = (theta_max - theta_min)/dble(ntheta) + + lmax = 3 + do l1 = 0,lmax + do m1 = 0 ,l1 + do l2 = 0,lmax + do m2 = 0 ,l2 + accu_re = 0.d0 + accu_im = 0.d0 + accu = 0.d0 + theta = theta_min + do itheta = 1, ntheta + phi = phi_min + do iphi = 1, nphi +! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) +! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) + call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + weight = dtheta * dphi * dsin(theta) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + phi += dphi + enddo + theta += dtheta + enddo + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + print*,'accu = ',accu + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + endif + endif + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + endif + endif + enddo + enddo + enddo + enddo + + +end + +subroutine test_assoc_leg_pol + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + print *, 'Hello world' + integer :: l1,m1,ngrid,i,l2,m2 + l1 = 0 + m1 = 0 + l2 = 2 + m2 = 0 + double precision :: x, dx,xmax,accu,xmin + double precision :: plgndr,func_1,func_2,ortho_assoc_gaus_pol + ngrid = 100000 + xmax = 1.d0 + xmin = -1.d0 + dx = (xmax-xmin)/dble(ngrid) + do l2 = 0,10 + x = xmin + accu = 0.d0 + do i = 1, ngrid + func_1 = plgndr(l1,m1,x) + func_2 = plgndr(l2,m2,x) + write(33,*)x, func_1,func_2 + accu += func_1 * func_2 * dx + x += dx + enddo + print*,'l2 = ',l2 + print*,'accu = ',accu + print*,ortho_assoc_gaus_pol(l1,m1,l2) + enddo +end diff --git a/plugins/local/spher_harm/spher_harm_func.irp.f b/plugins/local/spher_harm/spher_harm_func.irp.f new file mode 100644 index 00000000..825bd8ac --- /dev/null +++ b/plugins/local/spher_harm/spher_harm_func.irp.f @@ -0,0 +1,151 @@ +subroutine spher_harm_func_r3(r,l,m,re_ylm, im_ylm) + implicit none + integer, intent(in) :: l,m + double precision, intent(in) :: r(3) + double precision, intent(out) :: re_ylm, im_ylm + + double precision :: theta, phi,r_abs + call cartesian_to_spherical(r,theta,phi,r_abs) + call spher_harm_func(l,m,theta,phi,re_ylm, im_ylm) +end + + +subroutine spher_harm_func_m_pos(l,m,theta,phi,re_ylm, im_ylm) + include 'constants.include.F' + implicit none + BEGIN_DOC +! Y_lm(theta,phi) with m >0 +! + END_DOC + double precision, intent(in) :: theta, phi + integer, intent(in) :: l,m + double precision, intent(out):: re_ylm,im_ylm + double precision :: prefact,fact,cos_theta,plgndr,p_lm + double precision :: tmp + prefact = dble(2*l+1)*fact(l-m)/(dfour_pi * fact(l+m)) + prefact = dsqrt(prefact) + cos_theta = dcos(theta) + p_lm = plgndr(l,m,cos_theta) + tmp = prefact * p_lm + re_ylm = dcos(dble(m)*phi) * tmp + im_ylm = dsin(dble(m)*phi) * tmp +end + +subroutine spher_harm_func(l,m,theta,phi,re_ylm, im_ylm) + implicit none + BEGIN_DOC + ! Y_lm(theta,phi) with -l l in spher_harm_func !! stopping ...' + stop + endif + if(m.ge.0)then + call spher_harm_func_m_pos(l,m,theta,phi,re_ylm_pos, im_ylm_pos) + re_ylm = re_ylm_pos + im_ylm = im_ylm_pos + else + minus_m = -m !> 0 + call spher_harm_func_m_pos(l,minus_m,theta,phi,re_ylm_pos, im_ylm_pos) + tmp = (-1)**minus_m + re_ylm = tmp * re_ylm_pos + im_ylm = -tmp * im_ylm_pos ! complex conjugate + endif +end + +subroutine cartesian_to_spherical(r,theta,phi,r_abs) + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out):: theta, phi,r_abs + double precision :: r_2,x_2_y_2,tmp + include 'constants.include.F' + x_2_y_2 = r(1)*r(1) + r(2)*r(2) + r_2 = x_2_y_2 + r(3)*r(3) + r_abs = dsqrt(r_2) + + if(r_abs.gt.1.d-20)then + theta = dacos(r(3)/r_abs) + else + theta = 0.d0 + endif + + if(.true.)then + if(dabs(r(1)).gt.0.d0)then + tmp = datan(r(2)/r(1)) +! phi = datan2(r(2),r(1)) + endif + ! From Wikipedia on Spherical Harmonics + if(r(1).gt.0.d0)then + phi = tmp + else if(r(1).lt.0.d0.and.r(2).ge.0.d0)then + phi = tmp + pi + else if(r(1).lt.0.d0.and.r(2).lt.0.d0)then + phi = tmp - pi + else if(r(1)==0.d0.and.r(2).gt.0.d0)then + phi = 0.5d0*pi + else if(r(1)==0.d0.and.r(2).lt.0.d0)then + phi =-0.5d0*pi + else if(r(1)==0.d0.and.r(2)==0.d0)then + phi = 0.d0 + endif + if(r(2).lt.0.d0.and.r(1).le.0.d0)then + tmp = pi - dabs(phi) + phi = pi + tmp + else if(r(2).lt.0.d0.and.r(1).gt.0.d0)then + phi = dtwo_pi + phi + endif + endif + + if(.false.)then + x_2_y_2 = dsqrt(x_2_y_2) + if(dabs(x_2_y_2).gt.1.d-20.and.dabs(r(2)).gt.1.d-20)then + phi = dabs(r(2))/r(2) * dacos(r(1)/x_2_y_2) + else + phi = 0.d0 + endif + endif +end + + +subroutine spher_harm_func_expl(l,m,theta,phi,re_ylm, im_ylm) + implicit none + BEGIN_DOC + ! Y_lm(theta,phi) with -l Date: Thu, 25 Apr 2024 20:00:42 +0200 Subject: [PATCH 04/64] Added properly the routines for the test of the Spherical Harmonics --- plugins/local/spher_harm/routines_test.irp.f | 227 +++++++++++++++++++ plugins/local/spher_harm/spher_harm.irp.f | 210 ----------------- 2 files changed, 227 insertions(+), 210 deletions(-) create mode 100644 plugins/local/spher_harm/routines_test.irp.f diff --git a/plugins/local/spher_harm/routines_test.irp.f b/plugins/local/spher_harm/routines_test.irp.f new file mode 100644 index 00000000..6f7cbc1c --- /dev/null +++ b/plugins/local/spher_harm/routines_test.irp.f @@ -0,0 +1,227 @@ + +subroutine test_cart + implicit none + BEGIN_DOC + ! test for the cartesian --> spherical change of coordinates + ! + ! simple test such that the polar angle theta ranges in [0,pi] + ! + ! and the asymuthal angle phi ranges in [0,2pi] + END_DOC + include 'constants.include.F' + double precision :: r(3),theta,phi,r_abs + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi +end + +subroutine test_spher_harm + implicit none + BEGIN_DOC + ! routine to test the spherical harmonics integration on a sphere with the grid. + ! + ! We test = delta_m1,m2 delta_l1,l2 + END_DOC + include 'constants.include.F' + integer :: l1,m1,i,l2,m2,lmax + double precision :: r(3),weight,accu_re, accu_im,accu + double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 + l1 = 0 + m1 = 0 + l2 = 0 + m2 = 0 + lmax = 5 + do l1 = 0,lmax + do m1 = -l1 ,l1 + do l2 = 0,lmax + do m2 = -l2 ,l2 + accu_re = 0.d0 + accu_im = 0.d0 + ! = \int dOmega Y_l1,m1^* Y_l2,m2 + ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) + ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu = 0.d0 + do i = 1, n_points_integration_angular + double precision :: theta,phi,r_abs + r(1:3) = angular_quadrature_points(i,1:3) + weight = weights_angular_points(i) + call cartesian_to_spherical(r,theta,phi,r_abs) + if(theta.gt.pi.or.theta.lt.0.d0)then + print*,'pb with theta',theta + print*,r + endif + if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then + print*,'pb with phi',phi/pi + print*,r + endif + call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) + call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + write(33,'(10(F16.10,X))')phi/pi + enddo + ! Test for the delta l1,l2 and delta m1,m2 + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + enddo + enddo + enddo + enddo + double precision :: x,dx,xmax,xmin + integer:: nx + nx = 10000 + xmin = -5.d0 + xmax = 5.d0 + dx = (xmax - xmin)/dble(nx) + x = xmin + do i = 1, nx + write(34,*)x,datan(x),dacos(x) + x += dx + enddo +end + +subroutine test_brutal_spheric + implicit none + include 'constants.include.F' + BEGIN_DOC + ! test for the = delta_m1,m2 delta_l1,l2 using a two dimentional integration + ! + ! \int_0^2pi d Phi \int_-1^+1 d(cos(Theta)) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) + ! + != \int_0^2pi d Phi \int_0^pi dTheta sin(Theta) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) + ! + ! Allows to test for the general functions spher_harm_func_m_pos with spher_harm_func_expl + END_DOC + integer :: itheta, iphi,ntheta,nphi + double precision :: theta_min, theta_max, dtheta,theta + double precision :: phi_min, phi_max, dphi,phi + double precision :: accu_re, accu_im,weight + double precision :: re_ylm_1, im_ylm_1 ,re_ylm_2, im_ylm_2,accu + integer :: l1,m1,i,l2,m2,lmax + phi_min = 0.d0 + phi_max = 2.D0 * pi + theta_min = 0.d0 + theta_max = 1.D0 * pi + ntheta = 1000 + nphi = 1000 + dphi = (phi_max - phi_min)/dble(nphi) + dtheta = (theta_max - theta_min)/dble(ntheta) + + lmax = 3 + do l1 = 0,lmax + do m1 = 0 ,l1 + do l2 = 0,lmax + do m2 = 0 ,l2 + accu_re = 0.d0 + accu_im = 0.d0 + accu = 0.d0 + theta = theta_min + do itheta = 1, ntheta + phi = phi_min + do iphi = 1, nphi +! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) +! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) + call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + weight = dtheta * dphi * dsin(theta) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + phi += dphi + enddo + theta += dtheta + enddo + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + print*,'accu = ',accu + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + endif + endif + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + endif + endif + enddo + enddo + enddo + enddo + + +end + +subroutine test_assoc_leg_pol + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + print *, 'Hello world' + integer :: l1,m1,ngrid,i,l2,m2 + l1 = 0 + m1 = 0 + l2 = 2 + m2 = 0 + double precision :: x, dx,xmax,accu,xmin + double precision :: plgndr,func_1,func_2,ortho_assoc_gaus_pol + ngrid = 100000 + xmax = 1.d0 + xmin = -1.d0 + dx = (xmax-xmin)/dble(ngrid) + do l2 = 0,10 + x = xmin + accu = 0.d0 + do i = 1, ngrid + func_1 = plgndr(l1,m1,x) + func_2 = plgndr(l2,m2,x) + write(33,*)x, func_1,func_2 + accu += func_1 * func_2 * dx + x += dx + enddo + print*,'l2 = ',l2 + print*,'accu = ',accu + print*,ortho_assoc_gaus_pol(l1,m1,l2) + enddo +end diff --git a/plugins/local/spher_harm/spher_harm.irp.f b/plugins/local/spher_harm/spher_harm.irp.f index 40661db1..e8deafb9 100644 --- a/plugins/local/spher_harm/spher_harm.irp.f +++ b/plugins/local/spher_harm/spher_harm.irp.f @@ -5,213 +5,3 @@ program spher_harm ! call test_brutal_spheric end -subroutine test_cart - implicit none - include 'constants.include.F' - double precision :: r(3),theta,phi,r_abs - print*,'' - r = 0.d0 - r(1) = 1.d0 - r(2) = 1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi - print*,'' - r = 0.d0 - r(1) =-1.d0 - r(2) = 1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi - print*,'' - r = 0.d0 - r(1) =-1.d0 - r(2) =-1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi - print*,'' - r = 0.d0 - r(1) = 1.d0 - r(2) =-1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi -end - -subroutine test_spher_harm - implicit none - include 'constants.include.F' - integer :: l1,m1,i,l2,m2,lmax - double precision :: r(3),weight,accu_re, accu_im,accu - double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 - l1 = 0 - m1 = 0 - l2 = 0 - m2 = 0 - lmax = 5 - do l1 = 0,lmax - do m1 = -l1 ,l1 - do l2 = 0,lmax - do m2 = -l2 ,l2 - accu_re = 0.d0 - accu_im = 0.d0 - ! = \int dOmega Y_l1,m1^* Y_l2,m2 - ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) - ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu = 0.d0 - do i = 1, n_points_integration_angular - double precision :: theta,phi,r_abs - r(1:3) = angular_quadrature_points(i,1:3) - weight = weights_angular_points(i) - call cartesian_to_spherical(r,theta,phi,r_abs) - if(theta.gt.pi.or.theta.lt.0.d0)then - print*,'pb with theta',theta - print*,r - endif - if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then - print*,'pb with phi',phi/pi - print*,r - endif - call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) - call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) -! call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) -! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) -! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) - accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) - accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu += weight - write(33,'(10(F16.10,X))')phi/pi - enddo - if(l1.ne.l2.or.m1.ne.m2)then - if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then - print*,'pb OFF DIAG !!!!! ' - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - endif - endif - if(l1==l2.and.m1==m2)then - if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then - print*,'pb DIAG !!!!! ' - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - endif - endif - enddo - enddo - enddo - enddo - double precision :: x,dx,xmax,xmin - integer:: nx - nx = 10000 - xmin = -5.d0 - xmax = 5.d0 - dx = (xmax - xmin)/dble(nx) - x = xmin - do i = 1, nx - write(34,*)x,datan(x),dacos(x) - x += dx - enddo -end - -subroutine test_brutal_spheric - implicit none - include 'constants.include.F' - integer :: itheta, iphi,ntheta,nphi - double precision :: theta_min, theta_max, dtheta,theta - double precision :: phi_min, phi_max, dphi,phi - double precision :: accu_re, accu_im,weight - double precision :: re_ylm_1, im_ylm_1 ,re_ylm_2, im_ylm_2,accu - integer :: l1,m1,i,l2,m2,lmax - phi_min = 0.d0 - phi_max = 2.D0 * pi - theta_min = 0.d0 - theta_max = 1.D0 * pi - ntheta = 1000 - nphi = 1000 - dphi = (phi_max - phi_min)/dble(nphi) - dtheta = (theta_max - theta_min)/dble(ntheta) - - lmax = 3 - do l1 = 0,lmax - do m1 = 0 ,l1 - do l2 = 0,lmax - do m2 = 0 ,l2 - accu_re = 0.d0 - accu_im = 0.d0 - accu = 0.d0 - theta = theta_min - do itheta = 1, ntheta - phi = phi_min - do iphi = 1, nphi -! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) - call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) - call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) - weight = dtheta * dphi * dsin(theta) - accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) - accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu += weight - phi += dphi - enddo - theta += dtheta - enddo - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - print*,'accu = ',accu - if(l1.ne.l2.or.m1.ne.m2)then - if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then - print*,'pb OFF DIAG !!!!! ' - endif - endif - if(l1==l2.and.m1==m2)then - if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then - print*,'pb DIAG !!!!! ' - endif - endif - enddo - enddo - enddo - enddo - - -end - -subroutine test_assoc_leg_pol - implicit none - BEGIN_DOC -! TODO : Put the documentation of the program here - END_DOC - print *, 'Hello world' - integer :: l1,m1,ngrid,i,l2,m2 - l1 = 0 - m1 = 0 - l2 = 2 - m2 = 0 - double precision :: x, dx,xmax,accu,xmin - double precision :: plgndr,func_1,func_2,ortho_assoc_gaus_pol - ngrid = 100000 - xmax = 1.d0 - xmin = -1.d0 - dx = (xmax-xmin)/dble(ngrid) - do l2 = 0,10 - x = xmin - accu = 0.d0 - do i = 1, ngrid - func_1 = plgndr(l1,m1,x) - func_2 = plgndr(l2,m2,x) - write(33,*)x, func_1,func_2 - accu += func_1 * func_2 * dx - x += dx - enddo - print*,'l2 = ',l2 - print*,'accu = ',accu - print*,ortho_assoc_gaus_pol(l1,m1,l2) - enddo -end From c3483df9a16003065a41bfa92d37274a3eb466ee Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 25 Apr 2024 20:00:42 +0200 Subject: [PATCH 05/64] Added properly the routines for the test of the Spherical Harmonics --- plugins/local/spher_harm/routines_test.irp.f | 227 +++++++++++++++++++ plugins/local/spher_harm/spher_harm.irp.f | 210 ----------------- 2 files changed, 227 insertions(+), 210 deletions(-) create mode 100644 plugins/local/spher_harm/routines_test.irp.f diff --git a/plugins/local/spher_harm/routines_test.irp.f b/plugins/local/spher_harm/routines_test.irp.f new file mode 100644 index 00000000..6f7cbc1c --- /dev/null +++ b/plugins/local/spher_harm/routines_test.irp.f @@ -0,0 +1,227 @@ + +subroutine test_cart + implicit none + BEGIN_DOC + ! test for the cartesian --> spherical change of coordinates + ! + ! simple test such that the polar angle theta ranges in [0,pi] + ! + ! and the asymuthal angle phi ranges in [0,2pi] + END_DOC + include 'constants.include.F' + double precision :: r(3),theta,phi,r_abs + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi +end + +subroutine test_spher_harm + implicit none + BEGIN_DOC + ! routine to test the spherical harmonics integration on a sphere with the grid. + ! + ! We test = delta_m1,m2 delta_l1,l2 + END_DOC + include 'constants.include.F' + integer :: l1,m1,i,l2,m2,lmax + double precision :: r(3),weight,accu_re, accu_im,accu + double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 + l1 = 0 + m1 = 0 + l2 = 0 + m2 = 0 + lmax = 5 + do l1 = 0,lmax + do m1 = -l1 ,l1 + do l2 = 0,lmax + do m2 = -l2 ,l2 + accu_re = 0.d0 + accu_im = 0.d0 + ! = \int dOmega Y_l1,m1^* Y_l2,m2 + ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) + ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu = 0.d0 + do i = 1, n_points_integration_angular + double precision :: theta,phi,r_abs + r(1:3) = angular_quadrature_points(i,1:3) + weight = weights_angular_points(i) + call cartesian_to_spherical(r,theta,phi,r_abs) + if(theta.gt.pi.or.theta.lt.0.d0)then + print*,'pb with theta',theta + print*,r + endif + if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then + print*,'pb with phi',phi/pi + print*,r + endif + call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) + call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + write(33,'(10(F16.10,X))')phi/pi + enddo + ! Test for the delta l1,l2 and delta m1,m2 + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + enddo + enddo + enddo + enddo + double precision :: x,dx,xmax,xmin + integer:: nx + nx = 10000 + xmin = -5.d0 + xmax = 5.d0 + dx = (xmax - xmin)/dble(nx) + x = xmin + do i = 1, nx + write(34,*)x,datan(x),dacos(x) + x += dx + enddo +end + +subroutine test_brutal_spheric + implicit none + include 'constants.include.F' + BEGIN_DOC + ! test for the = delta_m1,m2 delta_l1,l2 using a two dimentional integration + ! + ! \int_0^2pi d Phi \int_-1^+1 d(cos(Theta)) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) + ! + != \int_0^2pi d Phi \int_0^pi dTheta sin(Theta) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) + ! + ! Allows to test for the general functions spher_harm_func_m_pos with spher_harm_func_expl + END_DOC + integer :: itheta, iphi,ntheta,nphi + double precision :: theta_min, theta_max, dtheta,theta + double precision :: phi_min, phi_max, dphi,phi + double precision :: accu_re, accu_im,weight + double precision :: re_ylm_1, im_ylm_1 ,re_ylm_2, im_ylm_2,accu + integer :: l1,m1,i,l2,m2,lmax + phi_min = 0.d0 + phi_max = 2.D0 * pi + theta_min = 0.d0 + theta_max = 1.D0 * pi + ntheta = 1000 + nphi = 1000 + dphi = (phi_max - phi_min)/dble(nphi) + dtheta = (theta_max - theta_min)/dble(ntheta) + + lmax = 3 + do l1 = 0,lmax + do m1 = 0 ,l1 + do l2 = 0,lmax + do m2 = 0 ,l2 + accu_re = 0.d0 + accu_im = 0.d0 + accu = 0.d0 + theta = theta_min + do itheta = 1, ntheta + phi = phi_min + do iphi = 1, nphi +! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) +! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) + call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + weight = dtheta * dphi * dsin(theta) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + phi += dphi + enddo + theta += dtheta + enddo + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + print*,'accu = ',accu + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + endif + endif + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + endif + endif + enddo + enddo + enddo + enddo + + +end + +subroutine test_assoc_leg_pol + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + print *, 'Hello world' + integer :: l1,m1,ngrid,i,l2,m2 + l1 = 0 + m1 = 0 + l2 = 2 + m2 = 0 + double precision :: x, dx,xmax,accu,xmin + double precision :: plgndr,func_1,func_2,ortho_assoc_gaus_pol + ngrid = 100000 + xmax = 1.d0 + xmin = -1.d0 + dx = (xmax-xmin)/dble(ngrid) + do l2 = 0,10 + x = xmin + accu = 0.d0 + do i = 1, ngrid + func_1 = plgndr(l1,m1,x) + func_2 = plgndr(l2,m2,x) + write(33,*)x, func_1,func_2 + accu += func_1 * func_2 * dx + x += dx + enddo + print*,'l2 = ',l2 + print*,'accu = ',accu + print*,ortho_assoc_gaus_pol(l1,m1,l2) + enddo +end diff --git a/plugins/local/spher_harm/spher_harm.irp.f b/plugins/local/spher_harm/spher_harm.irp.f index 40661db1..e8deafb9 100644 --- a/plugins/local/spher_harm/spher_harm.irp.f +++ b/plugins/local/spher_harm/spher_harm.irp.f @@ -5,213 +5,3 @@ program spher_harm ! call test_brutal_spheric end -subroutine test_cart - implicit none - include 'constants.include.F' - double precision :: r(3),theta,phi,r_abs - print*,'' - r = 0.d0 - r(1) = 1.d0 - r(2) = 1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi - print*,'' - r = 0.d0 - r(1) =-1.d0 - r(2) = 1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi - print*,'' - r = 0.d0 - r(1) =-1.d0 - r(2) =-1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi - print*,'' - r = 0.d0 - r(1) = 1.d0 - r(2) =-1.d0 - call cartesian_to_spherical(r,theta,phi,r_abs) - print*,r - print*,phi/pi -end - -subroutine test_spher_harm - implicit none - include 'constants.include.F' - integer :: l1,m1,i,l2,m2,lmax - double precision :: r(3),weight,accu_re, accu_im,accu - double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 - l1 = 0 - m1 = 0 - l2 = 0 - m2 = 0 - lmax = 5 - do l1 = 0,lmax - do m1 = -l1 ,l1 - do l2 = 0,lmax - do m2 = -l2 ,l2 - accu_re = 0.d0 - accu_im = 0.d0 - ! = \int dOmega Y_l1,m1^* Y_l2,m2 - ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) - ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu = 0.d0 - do i = 1, n_points_integration_angular - double precision :: theta,phi,r_abs - r(1:3) = angular_quadrature_points(i,1:3) - weight = weights_angular_points(i) - call cartesian_to_spherical(r,theta,phi,r_abs) - if(theta.gt.pi.or.theta.lt.0.d0)then - print*,'pb with theta',theta - print*,r - endif - if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then - print*,'pb with phi',phi/pi - print*,r - endif - call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) - call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) -! call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) -! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) -! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) - accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) - accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu += weight - write(33,'(10(F16.10,X))')phi/pi - enddo - if(l1.ne.l2.or.m1.ne.m2)then - if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then - print*,'pb OFF DIAG !!!!! ' - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - endif - endif - if(l1==l2.and.m1==m2)then - if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then - print*,'pb DIAG !!!!! ' - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - endif - endif - enddo - enddo - enddo - enddo - double precision :: x,dx,xmax,xmin - integer:: nx - nx = 10000 - xmin = -5.d0 - xmax = 5.d0 - dx = (xmax - xmin)/dble(nx) - x = xmin - do i = 1, nx - write(34,*)x,datan(x),dacos(x) - x += dx - enddo -end - -subroutine test_brutal_spheric - implicit none - include 'constants.include.F' - integer :: itheta, iphi,ntheta,nphi - double precision :: theta_min, theta_max, dtheta,theta - double precision :: phi_min, phi_max, dphi,phi - double precision :: accu_re, accu_im,weight - double precision :: re_ylm_1, im_ylm_1 ,re_ylm_2, im_ylm_2,accu - integer :: l1,m1,i,l2,m2,lmax - phi_min = 0.d0 - phi_max = 2.D0 * pi - theta_min = 0.d0 - theta_max = 1.D0 * pi - ntheta = 1000 - nphi = 1000 - dphi = (phi_max - phi_min)/dble(nphi) - dtheta = (theta_max - theta_min)/dble(ntheta) - - lmax = 3 - do l1 = 0,lmax - do m1 = 0 ,l1 - do l2 = 0,lmax - do m2 = 0 ,l2 - accu_re = 0.d0 - accu_im = 0.d0 - accu = 0.d0 - theta = theta_min - do itheta = 1, ntheta - phi = phi_min - do iphi = 1, nphi -! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) -! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) - call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) - call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) - weight = dtheta * dphi * dsin(theta) - accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) - accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu += weight - phi += dphi - enddo - theta += dtheta - enddo - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - print*,'accu = ',accu - if(l1.ne.l2.or.m1.ne.m2)then - if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then - print*,'pb OFF DIAG !!!!! ' - endif - endif - if(l1==l2.and.m1==m2)then - if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then - print*,'pb DIAG !!!!! ' - endif - endif - enddo - enddo - enddo - enddo - - -end - -subroutine test_assoc_leg_pol - implicit none - BEGIN_DOC -! TODO : Put the documentation of the program here - END_DOC - print *, 'Hello world' - integer :: l1,m1,ngrid,i,l2,m2 - l1 = 0 - m1 = 0 - l2 = 2 - m2 = 0 - double precision :: x, dx,xmax,accu,xmin - double precision :: plgndr,func_1,func_2,ortho_assoc_gaus_pol - ngrid = 100000 - xmax = 1.d0 - xmin = -1.d0 - dx = (xmax-xmin)/dble(ngrid) - do l2 = 0,10 - x = xmin - accu = 0.d0 - do i = 1, ngrid - func_1 = plgndr(l1,m1,x) - func_2 = plgndr(l2,m2,x) - write(33,*)x, func_1,func_2 - accu += func_1 * func_2 * dx - x += dx - enddo - print*,'l2 = ',l2 - print*,'accu = ',accu - print*,ortho_assoc_gaus_pol(l1,m1,l2) - enddo -end From 5c69a7c005ecabe8428c386bf17bad3327891578 Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 26 Apr 2024 10:57:57 +0200 Subject: [PATCH 06/64] removed stupid stuffs in spher_harm --- plugins/local/spher_harm/README.rst | 3 + plugins/local/spher_harm/routines_test.irp.f | 172 ++++++++++--------- plugins/local/spher_harm/spher_harm.irp.f | 4 +- 3 files changed, 93 insertions(+), 86 deletions(-) diff --git a/plugins/local/spher_harm/README.rst b/plugins/local/spher_harm/README.rst index bf897f73..9c9b12a6 100644 --- a/plugins/local/spher_harm/README.rst +++ b/plugins/local/spher_harm/README.rst @@ -2,3 +2,6 @@ spher_harm ========== +Routines for spherical Harmonics evaluation in real space. +The main routine is "spher_harm_func_r3(r,l,m,re_ylm, im_ylm)". +The test routine is "test_spher_harm" where everything is explained in details. diff --git a/plugins/local/spher_harm/routines_test.irp.f b/plugins/local/spher_harm/routines_test.irp.f index 6f7cbc1c..fe8fc422 100644 --- a/plugins/local/spher_harm/routines_test.irp.f +++ b/plugins/local/spher_harm/routines_test.irp.f @@ -1,10 +1,93 @@ +subroutine test_spher_harm + implicit none + BEGIN_DOC + ! routine to test the generic spherical harmonics routine "spher_harm_func_r3" from R^3 --> C + ! + ! We test = delta_m1,m2 delta_l1,l2 + ! + ! The test is done through the integration on a sphere with the Lebedev grid. + END_DOC + include 'constants.include.F' + integer :: l1,m1,i,l2,m2,lmax + double precision :: r(3),weight,accu_re, accu_im,accu + double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 + double precision :: theta,phi,r_abs + lmax = 5 ! Maximum angular momentum until which we are going to test orthogonality conditions + do l1 = 0,lmax + do m1 = -l1 ,l1 + do l2 = 0,lmax + do m2 = -l2 ,l2 + accu_re = 0.d0 ! accumulator for the REAL part of + accu_im = 0.d0 ! accumulator for the IMAGINARY part of + accu = 0.d0 ! accumulator for the weights ==> should be \int dOmega == 4 pi + ! = \int dOmega Y_l1,m1^* Y_l2,m2 + ! \approx \sum_i W_i Y_l1,m1^*(r_i) Y_l2,m2(r_i) WITH r_i being on the spher of radius 1 + do i = 1, n_points_integration_angular + r(1:3) = angular_quadrature_points(i,1:3) ! ith Lebedev point (x,y,z) on the sphere of radius 1 + weight = weights_angular_points(i) ! associated Lebdev weight not necessarily positive + +!!!!!!!!!!! Test of the Cartesian --> Spherical coordinates + ! theta MUST belong to [0,pi] and phi to [0,2pi] + ! gets the cartesian to spherical change of coordinates + call cartesian_to_spherical(r,theta,phi,r_abs) + if(theta.gt.pi.or.theta.lt.0.d0)then + print*,'pb with theta, it should be in [0,pi]',theta + print*,r + endif + if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then + print*,'pb with phi, it should be in [0,2 pi]',phi/pi + print*,r + endif + +!!!!!!!!!!! Routines returning the Spherical harmonics on the grid point + call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) + call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) + +!!!!!!!!!!! Integration of Y_l1,m1^*(r) Y_l2,m2(r) + ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) + ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + enddo + ! Test that the sum of the weights is 4 pi + if(dabs(accu - dfour_pi).gt.1.d-6)then + print*,'Problem !! The sum of the Lebedev weight is not 4 pi ..' + print*,accu + stop + endif + ! Test for the delta l1,l2 and delta m1,m2 + ! + ! Test for the off-diagonal part of the Kronecker delta + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + ! Test for the diagonal part of the Kronecker delta + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + enddo + enddo + enddo + enddo +end subroutine test_cart implicit none BEGIN_DOC ! test for the cartesian --> spherical change of coordinates ! - ! simple test such that the polar angle theta ranges in [0,pi] + ! test the routine "cartesian_to_spherical" such that the polar angle theta ranges in [0,pi] ! ! and the asymuthal angle phi ranges in [0,2pi] END_DOC @@ -40,97 +123,18 @@ subroutine test_cart print*,phi/pi end -subroutine test_spher_harm - implicit none - BEGIN_DOC - ! routine to test the spherical harmonics integration on a sphere with the grid. - ! - ! We test = delta_m1,m2 delta_l1,l2 - END_DOC - include 'constants.include.F' - integer :: l1,m1,i,l2,m2,lmax - double precision :: r(3),weight,accu_re, accu_im,accu - double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 - l1 = 0 - m1 = 0 - l2 = 0 - m2 = 0 - lmax = 5 - do l1 = 0,lmax - do m1 = -l1 ,l1 - do l2 = 0,lmax - do m2 = -l2 ,l2 - accu_re = 0.d0 - accu_im = 0.d0 - ! = \int dOmega Y_l1,m1^* Y_l2,m2 - ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) - ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu = 0.d0 - do i = 1, n_points_integration_angular - double precision :: theta,phi,r_abs - r(1:3) = angular_quadrature_points(i,1:3) - weight = weights_angular_points(i) - call cartesian_to_spherical(r,theta,phi,r_abs) - if(theta.gt.pi.or.theta.lt.0.d0)then - print*,'pb with theta',theta - print*,r - endif - if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then - print*,'pb with phi',phi/pi - print*,r - endif - call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) - call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) - accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) - accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) - accu += weight - write(33,'(10(F16.10,X))')phi/pi - enddo - ! Test for the delta l1,l2 and delta m1,m2 - if(l1.ne.l2.or.m1.ne.m2)then - if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then - print*,'pb OFF DIAG !!!!! ' - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - endif - endif - if(l1==l2.and.m1==m2)then - if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then - print*,'pb DIAG !!!!! ' - print*,'l1,m1,l2,m2',l1,m1,l2,m2 - print*,'accu_re = ',accu_re - print*,'accu_im = ',accu_im - endif - endif - enddo - enddo - enddo - enddo - double precision :: x,dx,xmax,xmin - integer:: nx - nx = 10000 - xmin = -5.d0 - xmax = 5.d0 - dx = (xmax - xmin)/dble(nx) - x = xmin - do i = 1, nx - write(34,*)x,datan(x),dacos(x) - x += dx - enddo -end subroutine test_brutal_spheric implicit none include 'constants.include.F' BEGIN_DOC - ! test for the = delta_m1,m2 delta_l1,l2 using a two dimentional integration + ! Test for the = delta_m1,m2 delta_l1,l2 using the following two dimentional integration ! ! \int_0^2pi d Phi \int_-1^+1 d(cos(Theta)) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) ! != \int_0^2pi d Phi \int_0^pi dTheta sin(Theta) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) ! - ! Allows to test for the general functions spher_harm_func_m_pos with spher_harm_func_expl + ! Allows to test for the general functions "spher_harm_func_m_pos" with "spher_harm_func_expl" END_DOC integer :: itheta, iphi,ntheta,nphi double precision :: theta_min, theta_max, dtheta,theta @@ -147,7 +151,7 @@ subroutine test_brutal_spheric dphi = (phi_max - phi_min)/dble(nphi) dtheta = (theta_max - theta_min)/dble(ntheta) - lmax = 3 + lmax = 2 do l1 = 0,lmax do m1 = 0 ,l1 do l2 = 0,lmax @@ -196,7 +200,7 @@ end subroutine test_assoc_leg_pol implicit none BEGIN_DOC -! TODO : Put the documentation of the program here +! Test for the associated Legendre Polynoms. The test is done through the orthogonality condition. END_DOC print *, 'Hello world' integer :: l1,m1,ngrid,i,l2,m2 diff --git a/plugins/local/spher_harm/spher_harm.irp.f b/plugins/local/spher_harm/spher_harm.irp.f index e8deafb9..7a2eea06 100644 --- a/plugins/local/spher_harm/spher_harm.irp.f +++ b/plugins/local/spher_harm/spher_harm.irp.f @@ -1,7 +1,7 @@ program spher_harm implicit none - call test_spher_harm +! call test_spher_harm ! call test_cart -! call test_brutal_spheric + call test_brutal_spheric end From 109a956f0d947665af7fbd3ed02d3569c49e592e Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 6 May 2024 18:30:05 +0200 Subject: [PATCH 07/64] does not compile but working on it --- plugins/local/slater_tc/h_mat_triple.irp.f | 391 ------------------ .../local/slater_tc/slater_tc_opt_diag.irp.f | 311 +++++++++++++- .../slater_tc/symmetrized_3_e_int_prov.irp.f | 140 ------- plugins/local/slater_tc_no_opt/.gitignore | 59 +++ plugins/local/slater_tc_no_opt/NEED | 8 + plugins/local/slater_tc_no_opt/README.rst | 4 + .../h_biortho.irp.f | 0 .../local/slater_tc_no_opt/h_mat_triple.irp.f | 193 +++++++++ .../h_tc_bi_ortho_psi.irp.f | 0 .../slater_tc_3e_slow.irp.f | 2 +- .../slater_tc_no_opt.irp.f} | 2 +- .../slater_tc_slow.irp.f | 73 +--- src/determinants/slater_rules_general.irp.f | 192 +++++++++ 13 files changed, 769 insertions(+), 606 deletions(-) delete mode 100644 plugins/local/slater_tc/h_mat_triple.irp.f delete mode 100644 plugins/local/slater_tc/symmetrized_3_e_int_prov.irp.f create mode 100644 plugins/local/slater_tc_no_opt/.gitignore create mode 100644 plugins/local/slater_tc_no_opt/NEED create mode 100644 plugins/local/slater_tc_no_opt/README.rst rename plugins/local/{slater_tc => slater_tc_no_opt}/h_biortho.irp.f (100%) create mode 100644 plugins/local/slater_tc_no_opt/h_mat_triple.irp.f rename plugins/local/{slater_tc => slater_tc_no_opt}/h_tc_bi_ortho_psi.irp.f (100%) rename plugins/local/{slater_tc => slater_tc_no_opt}/slater_tc_3e_slow.irp.f (99%) rename plugins/local/{slater_tc/slater_tc.irp.f => slater_tc_no_opt/slater_tc_no_opt.irp.f} (82%) rename plugins/local/{slater_tc => slater_tc_no_opt}/slater_tc_slow.irp.f (80%) create mode 100644 src/determinants/slater_rules_general.irp.f diff --git a/plugins/local/slater_tc/h_mat_triple.irp.f b/plugins/local/slater_tc/h_mat_triple.irp.f deleted file mode 100644 index 6f5697a2..00000000 --- a/plugins/local/slater_tc/h_mat_triple.irp.f +++ /dev/null @@ -1,391 +0,0 @@ -subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) - use bitmasks - BEGIN_DOC -! returns the array, for each spin, of holes/particles between key_i and key_j -! -! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> - END_DOC - include 'utils/constants.include.F' - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) - double precision, intent(out) :: phase - integer :: ispin,k,i,pos - integer(bit_kind) :: key_hole, key_particle - integer(bit_kind) :: xorvec(N_int_max,2) - holes_array = -1 - particles_array = -1 - degree_array = 0 - do i = 1, N_int - xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) - xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) - degree_array(1) += popcnt(xorvec(i,1)) - degree_array(2) += popcnt(xorvec(i,2)) - enddo - degree_array(1) = shiftr(degree_array(1),1) - degree_array(2) = shiftr(degree_array(2),1) - - do ispin = 1, 2 - k = 1 - !!! GETTING THE HOLES - do i = 1, N_int - key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) - do while(key_hole .ne.0_bit_kind) - pos = trailz(key_hole) - holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_hole = ibclr(key_hole,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_excitation_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo - do ispin = 1, 2 - k = 1 - !!! GETTING THE PARTICLES - do i = 1, N_int - key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) - do while(key_particle .ne.0_bit_kind) - pos = trailz(key_particle) - particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_particle = ibclr(key_particle,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_excitation_general ' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo - integer :: h,p, i_ok - integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) - integer :: exc(0:2,2,2) - double precision :: phase_tmp - allocate(det_i(Nint,2),det_ip(N_int,2)) - det_i = key_i - phase = 1.d0 - do ispin = 1, 2 - do i = 1, degree_array(ispin) - h = holes_array(i,ispin) - p = particles_array(i,ispin) - det_ip = det_i - call do_single_excitation(det_ip,h,p,ispin,i_ok) - if(i_ok == -1)then - print*,'excitation was not possible ' - stop - endif - call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) - phase *= phase_tmp - det_i = det_ip - enddo - enddo - -end - -subroutine get_holes_general(key_i, key_j,Nint, holes_array) - use bitmasks - BEGIN_DOC -! returns the array, per spin, of holes between key_i and key_j -! -! with the following convention: a_{hole}|key_i> --> |key_j> - END_DOC - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: holes_array(100,2) - integer(bit_kind) :: key_hole - integer :: ispin,k,i,pos - holes_array = -1 - do ispin = 1, 2 - k = 1 - do i = 1, N_int - key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) - do while(key_hole .ne.0_bit_kind) - pos = trailz(key_hole) - holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_hole = ibclr(key_hole,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_holes_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo -end - -subroutine get_particles_general(key_i, key_j,Nint,particles_array) - use bitmasks - BEGIN_DOC -! returns the array, per spin, of particles between key_i and key_j -! -! with the following convention: a^dagger_{particle}|key_i> --> |key_j> - END_DOC - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: particles_array(100,2) - integer(bit_kind) :: key_particle - integer :: ispin,k,i,pos - particles_array = -1 - do ispin = 1, 2 - k = 1 - do i = 1, N_int - key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) - do while(key_particle .ne.0_bit_kind) - pos = trailz(key_particle) - particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_particle = ibclr(key_particle,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_holes_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'Those are the two determinants' - call debug_det(key_i, N_int) - call debug_det(key_j, N_int) - print*,'stoping ...' - stop - endif - enddo - enddo - enddo -end - -subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) - implicit none - integer, intent(in) :: degree(2), Nint - integer(bit_kind), intent(in) :: key_i(Nint,2) - integer, intent(in) :: holes_array(100,2),particles_array(100,2) - double precision, intent(out) :: phase - integer :: i,ispin,h,p, i_ok - integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) - integer :: exc(0:2,2,2) - double precision :: phase_tmp - allocate(det_i(Nint,2),det_ip(N_int,2)) - det_i = key_i - phase = 1.d0 - do ispin = 1, 2 - do i = 1, degree(ispin) - h = holes_array(i,ispin) - p = particles_array(i,ispin) - det_ip = det_i - call do_single_excitation(det_ip,h,p,ispin,i_ok) - if(i_ok == -1)then - print*,'excitation was not possible ' - stop - endif - call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) - phase *= phase_tmp - det_i = det_ip - enddo - enddo - -end - -subroutine H_tc_s2_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) - BEGIN_DOC - ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS - ! - ! Assumes that the determinants are in psi_det - ! - ! istart, iend, ishift, istep are used in ZMQ parallelization. - END_DOC - - use bitmasks - implicit none - - integer, intent(in) :: N_st,sze - double precision, intent(in) :: u_0(sze,N_st) - double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) - call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) - integer :: i,j,degree,ist - double precision :: hmono, htwoe, hthree, htot - do i = 1, N_det - do j = 1, N_det - call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) - if(degree .ne. 3)cycle - call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) - do ist = 1, N_st - v_0(i,ist) += htot * u_0(j,ist) - enddo - enddo - enddo -end - -subroutine H_tc_s2_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) - BEGIN_DOC - ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS - ! - ! Assumes that the determinants are in psi_det - ! - ! istart, iend, ishift, istep are used in ZMQ parallelization. - END_DOC - - use bitmasks - implicit none - - integer, intent(in) :: N_st,sze - double precision, intent(in) :: u_0(sze,N_st) - double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) - call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) - integer :: i,j,degree,ist - double precision :: hmono, htwoe, hthree, htot - !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & - !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & - !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) - do i = 1, N_det - do j = 1, N_det - call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) - if(degree .ne. 3)cycle - call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) - do ist = 1, N_st - v_0(i,ist) += htot * u_0(j,ist) - enddo - enddo - enddo - !$OMP END PARALLEL DO -end - -! --- - -subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) - BEGIN_DOC - ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS - ! - ! Assumes that the determinants are in psi_det - ! - ! istart, iend, ishift, istep are used in ZMQ parallelization. - END_DOC - - use bitmasks - implicit none - - integer, intent(in) :: N_st,sze - double precision, intent(in) :: u_0(sze,N_st) - double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) - call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) - integer :: i,j,degree,ist - double precision :: hmono, htwoe, hthree, htot - do i = 1, N_det - do j = 1, N_det - call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) - if(degree .ne. 3)cycle - call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) - do ist = 1, N_st - v_0(i,ist) += htot * u_0(j,ist) - enddo - enddo - enddo -end - -subroutine H_tc_s2_dagger_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) - BEGIN_DOC - ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS - ! - ! Assumes that the determinants are in psi_det - ! - ! istart, iend, ishift, istep are used in ZMQ parallelization. - END_DOC - - use bitmasks - implicit none - - integer, intent(in) :: N_st,sze - double precision, intent(in) :: u_0(sze,N_st) - double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) - call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) - integer :: i,j,degree,ist - double precision :: hmono, htwoe, hthree, htot - !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & - !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & - !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) - do i = 1, N_det - do j = 1, N_det - call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) - if(degree .ne. 3)cycle - call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) - do ist = 1, N_st - v_0(i,ist) += htot * u_0(j,ist) - enddo - enddo - enddo - !$OMP END PARALLEL DO -end - -! --- -subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) - use bitmasks - BEGIN_DOC -! for triple excitation -!! -!! WARNING !! -! -! Genuine triple excitations of the same spin are not yet implemented - END_DOC - implicit none - integer(bit_kind), intent(in) :: key_j(N_int,2),key_i(N_int,2) - integer, intent(in) :: Nint - double precision, intent(out) :: hmono, htwoe, hthree, htot - integer :: degree - integer :: h1, p1, h2, p2, s1, s2, h3, p3, s3 - integer :: holes_array(100,2),particles_array(100,2),degree_array(2) - double precision :: phase,sym_3_e_int_from_6_idx_tensor - - hmono = 0.d0 - htwoe = 0.d0 - hthree = 0.d0 - htot = 0.d0 - call get_excitation_general(key_j, key_i, Nint,degree_array,holes_array, particles_array,phase) - degree = degree_array(1) + degree_array(2) - if(degree .ne. 3)return - if(degree_array(1)==3.or.degree_array(2)==3)then - if(degree_array(1) == 3)then - h1 = holes_array(1,1) - h2 = holes_array(2,1) - h3 = holes_array(3,1) - p1 = particles_array(1,1) - p2 = particles_array(2,1) - p3 = particles_array(3,1) - else - h1 = holes_array(1,2) - h2 = holes_array(2,2) - h3 = holes_array(3,2) - p1 = particles_array(1,2) - p2 = particles_array(2,2) - p3 = particles_array(3,2) - endif - hthree = sym_3_e_int_from_6_idx_tensor(p3, p2, p1, h3, h2, h1) - else - if(degree_array(1) == 2.and.degree_array(2) == 1)then ! double alpha + single beta - h1 = holes_array(1,1) - h2 = holes_array(2,1) - h3 = holes_array(1,2) - p1 = particles_array(1,1) - p2 = particles_array(2,1) - p3 = particles_array(1,2) - else if(degree_array(2) == 2 .and. degree_array(1) == 1)then ! double beta + single alpha - h1 = holes_array(1,2) - h2 = holes_array(2,2) - h3 = holes_array(1,1) - p1 = particles_array(1,2) - p2 = particles_array(2,2) - p3 = particles_array(1,1) - else - print*,'PB !!' - stop - endif - hthree = three_body_ints_bi_ort(p3,p2,p1,h3,h2,h1) - three_body_ints_bi_ort(p3,p2,p1,h3,h1,h2) - endif - hthree *= phase - htot = hthree - end - diff --git a/plugins/local/slater_tc/slater_tc_opt_diag.irp.f b/plugins/local/slater_tc/slater_tc_opt_diag.irp.f index 78f9dc66..3c5a5d12 100644 --- a/plugins/local/slater_tc/slater_tc_opt_diag.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt_diag.irp.f @@ -19,13 +19,13 @@ PROVIDE HF_bitmask PROVIDE mo_l_coef mo_r_coef - call diag_htilde_mu_mat_bi_ortho_slow(N_int, HF_bitmask, hmono, htwoe, htot) + call diag_htc_bi_orth_2e_brute(N_int, HF_bitmask, hmono, htwoe, htot) ref_tc_energy_1e = hmono ref_tc_energy_2e = htwoe if(three_body_h_tc) then - call diag_htilde_three_body_ints_bi_ort_slow(N_int, HF_bitmask, hthree) + call diag_htc_bi_orth_3e_brute(N_int, HF_bitmask, hthree) ref_tc_energy_3e = hthree else ref_tc_energy_3e = 0.d0 @@ -524,3 +524,310 @@ end ! --- +subroutine diag_htc_bi_orth_2e_brute(Nint, key_i, hmono, htwoe, htot) + + BEGIN_DOC + ! + ! diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS + ! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + double precision, intent(out) :: hmono,htwoe,htot + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk + double precision :: get_mo_two_e_integral_tc_int + integer(bit_kind) :: key_i_core(Nint,2) + + PROVIDE mo_bi_ortho_tc_two_e + + hmono = 0.d0 + htwoe = 0.d0 + htot = 0.d0 + + call bitstring_to_list_ab(key_i, occ, Ne, Nint) + + do ispin = 1, 2 + do i = 1, Ne(ispin) + ii = occ(i,ispin) + hmono += mo_bi_ortho_tc_one_e(ii,ii) + enddo + enddo + + ! alpha/beta two-body + ispin = 1 + jspin = 2 + do i = 1, Ne(ispin) ! electron 1 (so it can be associated to mu(r1)) + ii = occ(i,ispin) + do j = 1, Ne(jspin) ! electron 2 + jj = occ(j,jspin) + htwoe += mo_bi_ortho_tc_two_e(jj,ii,jj,ii) + enddo + enddo + + ! alpha/alpha two-body + do i = 1, Ne(ispin) + ii = occ(i,ispin) + do j = i+1, Ne(ispin) + jj = occ(j,ispin) + htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) + enddo + enddo + + ! beta/beta two-body + do i = 1, Ne(jspin) + ii = occ(i,jspin) + do j = i+1, Ne(jspin) + jj = occ(j,jspin) + htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) + enddo + enddo + + htot = hmono + htwoe + +end + +! --- + +subroutine diag_htc_bi_orth_3e_brute(Nint, key_i, hthree) + + BEGIN_DOC + ! diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + double precision, intent(out) :: hthree + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2),i,j,ii,jj,ispin,jspin,m,mm + integer(bit_kind) :: key_i_core(Nint,2) + double precision :: direct_int, exchange_int, ref + double precision, external :: sym_3_e_int_from_6_idx_tensor + double precision, external :: three_e_diag_parrallel_spin + + PROVIDE mo_l_coef mo_r_coef + + if(core_tc_op) then + do i = 1, Nint + key_i_core(i,1) = xor(key_i(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(key_i(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, Nint) + else + call bitstring_to_list_ab(key_i, occ, Ne, Nint) + endif + + hthree = 0.d0 + + if((Ne(1)+Ne(2)) .ge. 3) then + + ! alpha/alpha/beta three-body + do i = 1, Ne(1) + ii = occ(i,1) + do j = i+1, Ne(1) + jj = occ(j,1) + do m = 1, Ne(2) + mm = occ(m,2) + !direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) !uses the 6-idx tensor + !exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) !uses the 6-idx tensor + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) !uses 3-idx tensor + exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) !uses 3-idx tensor + hthree += direct_int - exchange_int + enddo + enddo + enddo + + ! beta/beta/alpha three-body + do i = 1, Ne(2) + ii = occ(i,2) + do j = i+1, Ne(2) + jj = occ(j,2) + do m = 1, Ne(1) + mm = occ(m,1) + !direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) !uses the 6-idx tensor + !exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) !uses the 6-idx tensor + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) + exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) + hthree += direct_int - exchange_int + enddo + enddo + enddo + + ! alpha/alpha/alpha three-body + do i = 1, Ne(1) + ii = occ(i,1) ! 1 + do j = i+1, Ne(1) + jj = occ(j,1) ! 2 + do m = j+1, Ne(1) + mm = occ(m,1) ! 3 + !hthree += sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) !uses the 6 idx tensor + hthree += three_e_diag_parrallel_spin(mm,jj,ii) !uses only 3-idx tensors + enddo + enddo + enddo + + ! beta/beta/beta three-body + do i = 1, Ne(2) + ii = occ(i,2) ! 1 + do j = i+1, Ne(2) + jj = occ(j,2) ! 2 + do m = j+1, Ne(2) + mm = occ(m,2) ! 3 + !hthree += sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) !uses the 6 idx tensor + hthree += three_e_diag_parrallel_spin(mm,jj,ii) !uses only 3-idx tensors + enddo + enddo + enddo + + endif + +end + + + +BEGIN_PROVIDER [ double precision, three_e_diag_parrallel_spin_prov, (mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator ON A BI ORTHONORMAL BASIS + ! + ! three_e_diag_parrallel_spin_prov(m,j,i) = All combinations of the form for same spin matrix elements + ! + ! notice the -1 sign: in this way three_e_diag_parrallel_spin_prov can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, m + double precision :: integral, wall1, wall0, three_e_diag_parrallel_spin + + three_e_diag_parrallel_spin_prov = 0.d0 + print *, ' Providing the three_e_diag_parrallel_spin_prov ...' + + integral = three_e_diag_parrallel_spin(1,1,1) ! to provide all stuffs + call wall_time(wall0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,m,integral) & + !$OMP SHARED (mo_num,three_e_diag_parrallel_spin_prov) + !$OMP DO SCHEDULE (dynamic) + do i = 1, mo_num + do j = 1, mo_num + do m = j, mo_num + three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin(m,j,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + do i = 1, mo_num + do j = 1, mo_num + do m = 1, j + three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin_prov(j,m,i) + enddo + enddo + enddo + + call wall_time(wall1) + print *, ' wall time for three_e_diag_parrallel_spin_prov', wall1 - wall0 + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, three_e_single_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_single_parrallel_spin_prov(m,j,k,i) = All combination of for same spin matrix elements + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0, three_e_single_parrallel_spin + + three_e_single_parrallel_spin_prov = 0.d0 + print *, ' Providing the three_e_single_parrallel_spin_prov ...' + + integral = three_e_single_parrallel_spin(1,1,1,1) + call wall_time(wall0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_single_parrallel_spin_prov) + !$OMP DO SCHEDULE (dynamic) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_single_parrallel_spin_prov(m,j,k,i) = three_e_single_parrallel_spin(m,j,k,i) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_single_parrallel_spin_prov', wall1 - wall0 + +END_PROVIDER + + +! --- + +BEGIN_PROVIDER [ double precision, three_e_double_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_double_parrallel_spin_prov(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0, three_e_double_parrallel_spin + + three_e_double_parrallel_spin_prov = 0.d0 + print *, ' Providing the three_e_double_parrallel_spin_prov ...' + call wall_time(wall0) + + integral = three_e_double_parrallel_spin(1,1,1,1,1) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_double_parrallel_spin_prov) + !$OMP DO SCHEDULE (dynamic) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + three_e_double_parrallel_spin_prov(m,l,j,k,i) = three_e_double_parrallel_spin(m,l,j,k,i) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_double_parrallel_spin_prov', wall1 - wall0 + +END_PROVIDER + diff --git a/plugins/local/slater_tc/symmetrized_3_e_int_prov.irp.f b/plugins/local/slater_tc/symmetrized_3_e_int_prov.irp.f deleted file mode 100644 index e8277a74..00000000 --- a/plugins/local/slater_tc/symmetrized_3_e_int_prov.irp.f +++ /dev/null @@ -1,140 +0,0 @@ - -BEGIN_PROVIDER [ double precision, three_e_diag_parrallel_spin_prov, (mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator ON A BI ORTHONORMAL BASIS - ! - ! three_e_diag_parrallel_spin_prov(m,j,i) = All combinations of the form for same spin matrix elements - ! - ! notice the -1 sign: in this way three_e_diag_parrallel_spin_prov can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, m - double precision :: integral, wall1, wall0, three_e_diag_parrallel_spin - - three_e_diag_parrallel_spin_prov = 0.d0 - print *, ' Providing the three_e_diag_parrallel_spin_prov ...' - - integral = three_e_diag_parrallel_spin(1,1,1) ! to provide all stuffs - call wall_time(wall0) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,m,integral) & - !$OMP SHARED (mo_num,three_e_diag_parrallel_spin_prov) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num - do j = 1, mo_num - do m = j, mo_num - three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin(m,j,i) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - do i = 1, mo_num - do j = 1, mo_num - do m = 1, j - three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin_prov(j,m,i) - enddo - enddo - enddo - - call wall_time(wall1) - print *, ' wall time for three_e_diag_parrallel_spin_prov', wall1 - wall0 - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, three_e_single_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_single_parrallel_spin_prov(m,j,k,i) = All combination of for same spin matrix elements - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0, three_e_single_parrallel_spin - - three_e_single_parrallel_spin_prov = 0.d0 - print *, ' Providing the three_e_single_parrallel_spin_prov ...' - - integral = three_e_single_parrallel_spin(1,1,1,1) - call wall_time(wall0) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_single_parrallel_spin_prov) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_single_parrallel_spin_prov(m,j,k,i) = three_e_single_parrallel_spin(m,j,k,i) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_single_parrallel_spin_prov', wall1 - wall0 - -END_PROVIDER - - -! --- - -BEGIN_PROVIDER [ double precision, three_e_double_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_double_parrallel_spin_prov(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0, three_e_double_parrallel_spin - - three_e_double_parrallel_spin_prov = 0.d0 - print *, ' Providing the three_e_double_parrallel_spin_prov ...' - call wall_time(wall0) - - integral = three_e_double_parrallel_spin(1,1,1,1,1) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_double_parrallel_spin_prov) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - three_e_double_parrallel_spin_prov(m,l,j,k,i) = three_e_double_parrallel_spin(m,l,j,k,i) - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_double_parrallel_spin_prov', wall1 - wall0 - -END_PROVIDER - diff --git a/plugins/local/slater_tc_no_opt/.gitignore b/plugins/local/slater_tc_no_opt/.gitignore new file mode 100644 index 00000000..1561915b --- /dev/null +++ b/plugins/local/slater_tc_no_opt/.gitignore @@ -0,0 +1,59 @@ +IRPF90_temp/ +IRPF90_man/ +build.ninja +irpf90.make +ezfio_interface.irp.f +irpf90_entities +tags +Makefile +ao_basis +ao_one_e_ints +ao_two_e_erf_ints +ao_two_e_ints +aux_quantities +becke_numerical_grid +bitmask +cis +cisd +cipsi +davidson +davidson_dressed +davidson_undressed +density_for_dft +determinants +dft_keywords +dft_utils_in_r +dft_utils_one_e +dft_utils_two_body +dressing +dummy +electrons +ezfio_files +fci +generators_cas +generators_full +hartree_fock +iterations +kohn_sham +kohn_sham_rs +mo_basis +mo_guess +mo_one_e_ints +mo_two_e_erf_ints +mo_two_e_ints +mpi +mrpt_utils +nuclei +perturbation +pseudo +psiref_cas +psiref_utils +scf_utils +selectors_cassd +selectors_full +selectors_utils +single_ref_method +slave +tools +utils +zmq diff --git a/plugins/local/slater_tc_no_opt/NEED b/plugins/local/slater_tc_no_opt/NEED new file mode 100644 index 00000000..a8669866 --- /dev/null +++ b/plugins/local/slater_tc_no_opt/NEED @@ -0,0 +1,8 @@ +determinants +normal_order_old +bi_ort_ints +bi_ortho_mos +tc_keywords +non_hermit_dav +dav_general_mat +tc_scf diff --git a/plugins/local/slater_tc_no_opt/README.rst b/plugins/local/slater_tc_no_opt/README.rst new file mode 100644 index 00000000..90679e4c --- /dev/null +++ b/plugins/local/slater_tc_no_opt/README.rst @@ -0,0 +1,4 @@ +================ +slater_tc_no_opt +================ + diff --git a/plugins/local/slater_tc/h_biortho.irp.f b/plugins/local/slater_tc_no_opt/h_biortho.irp.f similarity index 100% rename from plugins/local/slater_tc/h_biortho.irp.f rename to plugins/local/slater_tc_no_opt/h_biortho.irp.f diff --git a/plugins/local/slater_tc_no_opt/h_mat_triple.irp.f b/plugins/local/slater_tc_no_opt/h_mat_triple.irp.f new file mode 100644 index 00000000..e2c8f982 --- /dev/null +++ b/plugins/local/slater_tc_no_opt/h_mat_triple.irp.f @@ -0,0 +1,193 @@ +subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) + use bitmasks + BEGIN_DOC +! returns the array, for each spin, of holes/particles between key_i and key_j +! +! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> + END_DOC + include 'utils/constants.include.F' + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision, intent(out) :: phase + integer :: ispin,k,i,pos + integer(bit_kind) :: key_hole, key_particle + integer(bit_kind) :: xorvec(N_int_max,2) + holes_array = -1 + particles_array = -1 + degree_array = 0 + do i = 1, N_int + xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) + xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) + degree_array(1) += popcnt(xorvec(i,1)) + degree_array(2) += popcnt(xorvec(i,2)) + enddo + degree_array(1) = shiftr(degree_array(1),1) + degree_array(2) = shiftr(degree_array(2),1) + + do ispin = 1, 2 + k = 1 + !!! GETTING THE HOLES + do i = 1, N_int + key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + do ispin = 1, 2 + k = 1 + !!! GETTING THE PARTICLES + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general ' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + integer :: h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree_array(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + +subroutine get_holes_general(key_i, key_j,Nint, holes_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of holes between key_i and key_j +! +! with the following convention: a_{hole}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2) + integer(bit_kind) :: key_hole + integer :: ispin,k,i,pos + holes_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_particles_general(key_i, key_j,Nint,particles_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of particles between key_i and key_j +! +! with the following convention: a^dagger_{particle}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: particles_array(100,2) + integer(bit_kind) :: key_particle + integer :: ispin,k,i,pos + particles_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'Those are the two determinants' + call debug_det(key_i, N_int) + call debug_det(key_j, N_int) + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) + implicit none + integer, intent(in) :: degree(2), Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + integer, intent(in) :: holes_array(100,2),particles_array(100,2) + double precision, intent(out) :: phase + integer :: i,ispin,h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + diff --git a/plugins/local/slater_tc/h_tc_bi_ortho_psi.irp.f b/plugins/local/slater_tc_no_opt/h_tc_bi_ortho_psi.irp.f similarity index 100% rename from plugins/local/slater_tc/h_tc_bi_ortho_psi.irp.f rename to plugins/local/slater_tc_no_opt/h_tc_bi_ortho_psi.irp.f diff --git a/plugins/local/slater_tc/slater_tc_3e_slow.irp.f b/plugins/local/slater_tc_no_opt/slater_tc_3e_slow.irp.f similarity index 99% rename from plugins/local/slater_tc/slater_tc_3e_slow.irp.f rename to plugins/local/slater_tc_no_opt/slater_tc_3e_slow.irp.f index cb33d343..f7919653 100644 --- a/plugins/local/slater_tc/slater_tc_3e_slow.irp.f +++ b/plugins/local/slater_tc_no_opt/slater_tc_3e_slow.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) +subroutine diag_htc_bi_orth_3e_brute(Nint, key_i, hthree) BEGIN_DOC ! diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS diff --git a/plugins/local/slater_tc/slater_tc.irp.f b/plugins/local/slater_tc_no_opt/slater_tc_no_opt.irp.f similarity index 82% rename from plugins/local/slater_tc/slater_tc.irp.f rename to plugins/local/slater_tc_no_opt/slater_tc_no_opt.irp.f index 27ab47c5..0fcc587f 100644 --- a/plugins/local/slater_tc/slater_tc.irp.f +++ b/plugins/local/slater_tc_no_opt/slater_tc_no_opt.irp.f @@ -1,4 +1,4 @@ -program slater_tc +program slater_tc_no_opt implicit none BEGIN_DOC ! TODO : Put the documentation of the program here diff --git a/plugins/local/slater_tc/slater_tc_slow.irp.f b/plugins/local/slater_tc_no_opt/slater_tc_slow.irp.f similarity index 80% rename from plugins/local/slater_tc/slater_tc_slow.irp.f rename to plugins/local/slater_tc_no_opt/slater_tc_slow.irp.f index caf7d665..b06fd12f 100644 --- a/plugins/local/slater_tc/slater_tc_slow.irp.f +++ b/plugins/local/slater_tc_no_opt/slater_tc_slow.irp.f @@ -61,7 +61,7 @@ subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, if(degree.gt.2) return if(degree == 0) then - call diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) + call diag_htc_bi_orth_2e_brute(Nint, key_i, hmono, htwoe, htot) else if (degree == 1) then call single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) else if(degree == 2) then @@ -76,7 +76,7 @@ subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, else if((degree == 1) .and. (elec_num .gt. 2) .and. three_e_4_idx_term) then call single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) else if((degree == 0) .and. (elec_num .gt. 2) .and. three_e_3_idx_term) then - call diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) + call diag_htc_bi_orth_3e_brute(Nint, key_i, hthree) endif endif @@ -95,75 +95,6 @@ end ! --- -subroutine diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) - - BEGIN_DOC - ! - ! diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS - ! - END_DOC - - use bitmasks - - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_i(Nint,2) - double precision, intent(out) :: hmono,htwoe,htot - integer :: occ(Nint*bit_kind_size,2) - integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk - double precision :: get_mo_two_e_integral_tc_int - integer(bit_kind) :: key_i_core(Nint,2) - - PROVIDE mo_bi_ortho_tc_two_e - - hmono = 0.d0 - htwoe = 0.d0 - htot = 0.d0 - - call bitstring_to_list_ab(key_i, occ, Ne, Nint) - - do ispin = 1, 2 - do i = 1, Ne(ispin) - ii = occ(i,ispin) - hmono += mo_bi_ortho_tc_one_e(ii,ii) - enddo - enddo - - ! alpha/beta two-body - ispin = 1 - jspin = 2 - do i = 1, Ne(ispin) ! electron 1 (so it can be associated to mu(r1)) - ii = occ(i,ispin) - do j = 1, Ne(jspin) ! electron 2 - jj = occ(j,jspin) - htwoe += mo_bi_ortho_tc_two_e(jj,ii,jj,ii) - enddo - enddo - - ! alpha/alpha two-body - do i = 1, Ne(ispin) - ii = occ(i,ispin) - do j = i+1, Ne(ispin) - jj = occ(j,ispin) - htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) - enddo - enddo - - ! beta/beta two-body - do i = 1, Ne(jspin) - ii = occ(i,jspin) - do j = i+1, Ne(jspin) - jj = occ(j,jspin) - htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) - enddo - enddo - - htot = hmono + htwoe - -end - -! --- - subroutine double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) BEGIN_DOC diff --git a/src/determinants/slater_rules_general.irp.f b/src/determinants/slater_rules_general.irp.f new file mode 100644 index 00000000..e987c846 --- /dev/null +++ b/src/determinants/slater_rules_general.irp.f @@ -0,0 +1,192 @@ +subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) + use bitmasks + BEGIN_DOC +! returns the array, for each spin, of holes/particles between key_i and key_j +! +! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> + END_DOC + include 'utils/constants.include.F' + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision, intent(out) :: phase + integer :: ispin,k,i,pos + integer(bit_kind) :: key_hole, key_particle + integer(bit_kind) :: xorvec(N_int_max,2) + holes_array = -1 + particles_array = -1 + degree_array = 0 + do i = 1, N_int + xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) + xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) + degree_array(1) += popcnt(xorvec(i,1)) + degree_array(2) += popcnt(xorvec(i,2)) + enddo + degree_array(1) = shiftr(degree_array(1),1) + degree_array(2) = shiftr(degree_array(2),1) + + do ispin = 1, 2 + k = 1 + !!! GETTING THE HOLES + do i = 1, N_int + key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + do ispin = 1, 2 + k = 1 + !!! GETTING THE PARTICLES + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general ' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + integer :: h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree_array(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + +subroutine get_holes_general(key_i, key_j,Nint, holes_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of holes between key_i and key_j +! +! with the following convention: a_{hole}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2) + integer(bit_kind) :: key_hole + integer :: ispin,k,i,pos + holes_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_particles_general(key_i, key_j,Nint,particles_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of particles between key_i and key_j +! +! with the following convention: a^dagger_{particle}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: particles_array(100,2) + integer(bit_kind) :: key_particle + integer :: ispin,k,i,pos + particles_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'Those are the two determinants' + call debug_det(key_i, N_int) + call debug_det(key_j, N_int) + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) + implicit none + integer, intent(in) :: degree(2), Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + integer, intent(in) :: holes_array(100,2),particles_array(100,2) + double precision, intent(out) :: phase + integer :: i,ispin,h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end From b749796d931401f2c7e966e2c7eeedfff2f4477c Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 6 May 2024 18:33:29 +0200 Subject: [PATCH 08/64] still not compiling --- .../tc_bi_ortho/dressing_vectors_lr.irp.f | 8 ++++---- .../local/tc_bi_ortho/e_corr_bi_ortho.irp.f | 18 +++++++++--------- plugins/local/tc_bi_ortho/print_tc_wf.irp.f | 6 +++--- .../local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f | 2 +- plugins/local/tc_bi_ortho/tc_som.irp.f | 4 ++-- plugins/local/tc_bi_ortho/tc_utils.irp.f | 10 +++++----- .../local/tc_bi_ortho/test_normal_order.irp.f | 8 ++++---- .../local/tc_bi_ortho/test_tc_bi_ortho.irp.f | 10 +++++----- plugins/local/tc_bi_ortho/test_tc_fock.irp.f | 4 ++-- 9 files changed, 35 insertions(+), 35 deletions(-) diff --git a/plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f b/plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f index 0aff9980..135f9d17 100644 --- a/plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f +++ b/plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f @@ -27,7 +27,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) call hmat_bi_ortho (psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) delta = 0.d0 @@ -39,7 +39,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I |Htilde | J > - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) ! < I |H | J > call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) @@ -78,7 +78,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta = 0.d0 !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & @@ -88,7 +88,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I |Htilde | J > - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta(i) = delta(i) + psicoef(j) * htc_tot enddo diff --git a/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f b/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f index 6d5c3b21..4abdc25b 100644 --- a/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f +++ b/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f @@ -2,7 +2,7 @@ BEGIN_PROVIDER [ double precision, e_tilde_00] implicit none double precision :: hmono,htwoe,hthree,htot - call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot) + call htilde_mu_mat_opt_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot) e_tilde_00 = htot END_PROVIDER @@ -18,11 +18,11 @@ do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e - call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) e_pt2_tc_bi_orth += coef_pt1 * htilde_ij if(degree == 1)then e_pt2_tc_bi_orth_single += coef_pt1 * htilde_ij @@ -37,7 +37,7 @@ BEGIN_PROVIDER [ double precision, e_tilde_bi_orth_00] implicit none double precision :: hmono,htwoe,hthree,htilde_ij - call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00) + call htilde_mu_mat_opt_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00) e_tilde_bi_orth_00 += nuclear_repulsion END_PROVIDER @@ -57,7 +57,7 @@ e_corr_double_bi_orth = 0.d0 do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) - call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) if(degree == 1)then e_corr_single_bi_orth += reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1) e_corr_single_bi_orth_abs += dabs(reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)) @@ -80,7 +80,7 @@ do i = 1, N_det accu += reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(i,1) do j = 1, N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) e_tc_left_right += htilde_ij * reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(j,1) enddo enddo @@ -99,8 +99,8 @@ BEGIN_PROVIDER [ double precision, coef_pt1_bi_ortho, (N_det)] if(degree==0)then coef_pt1_bi_ortho(i) = 1.d0 else - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e coef_pt1_bi_ortho(i)= coef_pt1 diff --git a/plugins/local/tc_bi_ortho/print_tc_wf.irp.f b/plugins/local/tc_bi_ortho/print_tc_wf.irp.f index 2b88bc5b..ab5ce371 100644 --- a/plugins/local/tc_bi_ortho/print_tc_wf.irp.f +++ b/plugins/local/tc_bi_ortho/print_tc_wf.irp.f @@ -61,12 +61,12 @@ subroutine routine do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e - call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) contrib_pt = coef_pt1 * htilde_ij e_pt2 += contrib_pt diff --git a/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f b/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f index 4c3c0788..5cbf26d2 100644 --- a/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f +++ b/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f @@ -14,7 +14,7 @@ call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) + call htilde_mu_mat_opt_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) endif enddo reigvec_tc_bi_orth_tmp = 0.d0 diff --git a/plugins/local/tc_bi_ortho/tc_som.irp.f b/plugins/local/tc_bi_ortho/tc_som.irp.f index 1d11c81b..6bdcc1f0 100644 --- a/plugins/local/tc_bi_ortho/tc_som.irp.f +++ b/plugins/local/tc_bi_ortho/tc_som.irp.f @@ -49,8 +49,8 @@ subroutine main() U_SOM = 0.d0 do i = 1, N_det if(i == i_HF) cycle - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2) U_SOM += htot_1 * htot_2 enddo U_SOM = 0.5d0 * U_SOM diff --git a/plugins/local/tc_bi_ortho/tc_utils.irp.f b/plugins/local/tc_bi_ortho/tc_utils.irp.f index 43a6865e..2aa148a3 100644 --- a/plugins/local/tc_bi_ortho/tc_utils.irp.f +++ b/plugins/local/tc_bi_ortho/tc_utils.irp.f @@ -25,7 +25,7 @@ subroutine write_tc_energy() E_2e_tmp(i) = 0.d0 E_3e_tmp(i) = 0.d0 do j = 1, N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) E_TC_tmp(i) = E_TC_tmp(i) + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * htot E_1e_tmp(i) = E_1e_tmp(i) + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * hmono E_2e_tmp(i) = E_2e_tmp(i) + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * htwoe @@ -70,7 +70,7 @@ subroutine write_tc_energy() E_3e = 0.d0 do i = 1, N_det do j = 1, N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot E_1e = E_1e + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * hmono E_2e = E_2e + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htwoe @@ -109,8 +109,8 @@ subroutine write_tc_var() SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) SIGMA_TC = SIGMA_TC + htot_1j * htot_j1 enddo @@ -132,7 +132,7 @@ subroutine write_tc_gs_var_HF() SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot) SIGMA_TC = SIGMA_TC + htot * htot enddo diff --git a/plugins/local/tc_bi_ortho/test_normal_order.irp.f b/plugins/local/tc_bi_ortho/test_normal_order.irp.f index 0cf27396..7b4c558f 100644 --- a/plugins/local/tc_bi_ortho/test_normal_order.irp.f +++ b/plugins/local/tc_bi_ortho/test_normal_order.irp.f @@ -54,7 +54,7 @@ subroutine test if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) hthree_tmp *= phase @@ -66,7 +66,7 @@ subroutine test if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) hthree_tmp *= phase @@ -109,7 +109,7 @@ do h1 = 1, elec_alpha_num if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) integer :: hh1, pp1, hh2, pp2, ss1, ss2 @@ -145,7 +145,7 @@ do h1 = 1, elec_beta_num if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) diff --git a/plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f b/plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f index 369efd15..559c0200 100644 --- a/plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -88,7 +88,7 @@ subroutine test_slater_tc_opt i_count = 0.d0 do i = 1, N_det do j = 1,N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hnewmono, hnewtwoe, hnewthree, hnewtot) if(dabs(htot).gt.1.d-15)then i_count += 1.D0 @@ -124,7 +124,7 @@ subroutine timing_tot do j = 1, N_det ! call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int) i_count += 1.d0 - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -171,7 +171,7 @@ subroutine timing_diag do i = 1, N_det do j = i,i i_count += 1.d0 - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -208,7 +208,7 @@ subroutine timing_single if(degree.ne.1)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo @@ -250,7 +250,7 @@ subroutine timing_double if(degree.ne.2)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo diff --git a/plugins/local/tc_bi_ortho/test_tc_fock.irp.f b/plugins/local/tc_bi_ortho/test_tc_fock.irp.f index 85f3ed97..b33b2e93 100644 --- a/plugins/local/tc_bi_ortho/test_tc_fock.irp.f +++ b/plugins/local/tc_bi_ortho/test_tc_fock.irp.f @@ -64,7 +64,7 @@ subroutine routine_3() print*, ' excited det' call debug_det(det_i, N_int) - call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) + call htilde_mu_mat_opt_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) if(dabs(hthree).lt.1.d-10)cycle ref = hthree if(s1 == 1)then @@ -130,7 +130,7 @@ subroutine routine_tot() stop endif - call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) + call htilde_mu_mat_opt_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) print*,htilde_ij ! if(dabs(htilde_ij).lt.1.d-10)cycle print*, ' excited det' From 366afb2933baba919db1ad85b7eee965ea56d0c6 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 6 May 2024 18:53:20 +0200 Subject: [PATCH 09/64] compiling after some cleaning --- plugins/local/old_delta_tc_qmc/NEED | 1 + plugins/local/old_delta_tc_qmc/README.rst | 4 + .../compute_deltamu_right.irp.f | 0 .../dressing_vectors_lr.irp.f | 0 .../old_delta_tc_qmc/old_delta_tc_qmc.irp.f | 7 + plugins/local/slater_tc/h_mat_triple.irp.f | 198 ++++++++++++++++++ .../local/slater_tc_no_opt/h_mat_triple.irp.f | 193 ----------------- .../test_tc_bi_ortho.irp.f | 0 plugins/local/tc_bi_ortho/pt2_tc_cisd.irp.f | 129 ------------ plugins/local/tc_bi_ortho/tc_cisd_sc2.irp.f | 36 ---- .../local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f | 145 ------------- plugins/local/tc_bi_ortho/test_s2_tc.irp.f | 170 --------------- 12 files changed, 210 insertions(+), 673 deletions(-) create mode 100644 plugins/local/old_delta_tc_qmc/NEED create mode 100644 plugins/local/old_delta_tc_qmc/README.rst rename plugins/local/{tc_bi_ortho => old_delta_tc_qmc}/compute_deltamu_right.irp.f (100%) rename plugins/local/{tc_bi_ortho => old_delta_tc_qmc}/dressing_vectors_lr.irp.f (100%) create mode 100644 plugins/local/old_delta_tc_qmc/old_delta_tc_qmc.irp.f create mode 100644 plugins/local/slater_tc/h_mat_triple.irp.f delete mode 100644 plugins/local/slater_tc_no_opt/h_mat_triple.irp.f rename plugins/local/{tc_bi_ortho => slater_tc_no_opt}/test_tc_bi_ortho.irp.f (100%) delete mode 100644 plugins/local/tc_bi_ortho/pt2_tc_cisd.irp.f delete mode 100644 plugins/local/tc_bi_ortho/tc_cisd_sc2.irp.f delete mode 100644 plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f delete mode 100644 plugins/local/tc_bi_ortho/test_s2_tc.irp.f diff --git a/plugins/local/old_delta_tc_qmc/NEED b/plugins/local/old_delta_tc_qmc/NEED new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/plugins/local/old_delta_tc_qmc/NEED @@ -0,0 +1 @@ + diff --git a/plugins/local/old_delta_tc_qmc/README.rst b/plugins/local/old_delta_tc_qmc/README.rst new file mode 100644 index 00000000..1d56f96c --- /dev/null +++ b/plugins/local/old_delta_tc_qmc/README.rst @@ -0,0 +1,4 @@ +================ +old_delta_tc_qmc +================ + diff --git a/plugins/local/tc_bi_ortho/compute_deltamu_right.irp.f b/plugins/local/old_delta_tc_qmc/compute_deltamu_right.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/compute_deltamu_right.irp.f rename to plugins/local/old_delta_tc_qmc/compute_deltamu_right.irp.f diff --git a/plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f b/plugins/local/old_delta_tc_qmc/dressing_vectors_lr.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f rename to plugins/local/old_delta_tc_qmc/dressing_vectors_lr.irp.f diff --git a/plugins/local/old_delta_tc_qmc/old_delta_tc_qmc.irp.f b/plugins/local/old_delta_tc_qmc/old_delta_tc_qmc.irp.f new file mode 100644 index 00000000..5ff08bd6 --- /dev/null +++ b/plugins/local/old_delta_tc_qmc/old_delta_tc_qmc.irp.f @@ -0,0 +1,7 @@ +program old_delta_tc_qmc + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + print *, 'Hello world' +end diff --git a/plugins/local/slater_tc/h_mat_triple.irp.f b/plugins/local/slater_tc/h_mat_triple.irp.f new file mode 100644 index 00000000..9cb4b60a --- /dev/null +++ b/plugins/local/slater_tc/h_mat_triple.irp.f @@ -0,0 +1,198 @@ +subroutine H_tc_s2_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo +end + +subroutine H_tc_s2_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & + !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & + !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo + !$OMP END PARALLEL DO +end + +! --- + +subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo +end + +subroutine H_tc_s2_dagger_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & + !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & + !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo + !$OMP END PARALLEL DO +end + +! --- +subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + use bitmasks + BEGIN_DOC +! for triple excitation +!! +!! WARNING !! +! +! Genuine triple excitations of the same spin are not yet implemented + END_DOC + implicit none + integer(bit_kind), intent(in) :: key_j(N_int,2),key_i(N_int,2) + integer, intent(in) :: Nint + double precision, intent(out) :: hmono, htwoe, hthree, htot + integer :: degree + integer :: h1, p1, h2, p2, s1, s2, h3, p3, s3 + integer :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision :: phase,sym_3_e_int_from_6_idx_tensor + + hmono = 0.d0 + htwoe = 0.d0 + hthree = 0.d0 + htot = 0.d0 + call get_excitation_general(key_j, key_i, Nint,degree_array,holes_array, particles_array,phase) + degree = degree_array(1) + degree_array(2) + if(degree .ne. 3)return + if(degree_array(1)==3.or.degree_array(2)==3)then + if(degree_array(1) == 3)then + h1 = holes_array(1,1) + h2 = holes_array(2,1) + h3 = holes_array(3,1) + p1 = particles_array(1,1) + p2 = particles_array(2,1) + p3 = particles_array(3,1) + else + h1 = holes_array(1,2) + h2 = holes_array(2,2) + h3 = holes_array(3,2) + p1 = particles_array(1,2) + p2 = particles_array(2,2) + p3 = particles_array(3,2) + endif + hthree = sym_3_e_int_from_6_idx_tensor(p3, p2, p1, h3, h2, h1) + else + if(degree_array(1) == 2.and.degree_array(2) == 1)then ! double alpha + single beta + h1 = holes_array(1,1) + h2 = holes_array(2,1) + h3 = holes_array(1,2) + p1 = particles_array(1,1) + p2 = particles_array(2,1) + p3 = particles_array(1,2) + else if(degree_array(2) == 2 .and. degree_array(1) == 1)then ! double beta + single alpha + h1 = holes_array(1,2) + h2 = holes_array(2,2) + h3 = holes_array(1,1) + p1 = particles_array(1,2) + p2 = particles_array(2,2) + p3 = particles_array(1,1) + else + print*,'PB !!' + stop + endif + hthree = three_body_ints_bi_ort(p3,p2,p1,h3,h2,h1) - three_body_ints_bi_ort(p3,p2,p1,h3,h1,h2) + endif + hthree *= phase + htot = hthree + end + diff --git a/plugins/local/slater_tc_no_opt/h_mat_triple.irp.f b/plugins/local/slater_tc_no_opt/h_mat_triple.irp.f deleted file mode 100644 index e2c8f982..00000000 --- a/plugins/local/slater_tc_no_opt/h_mat_triple.irp.f +++ /dev/null @@ -1,193 +0,0 @@ -subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) - use bitmasks - BEGIN_DOC -! returns the array, for each spin, of holes/particles between key_i and key_j -! -! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> - END_DOC - include 'utils/constants.include.F' - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) - double precision, intent(out) :: phase - integer :: ispin,k,i,pos - integer(bit_kind) :: key_hole, key_particle - integer(bit_kind) :: xorvec(N_int_max,2) - holes_array = -1 - particles_array = -1 - degree_array = 0 - do i = 1, N_int - xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) - xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) - degree_array(1) += popcnt(xorvec(i,1)) - degree_array(2) += popcnt(xorvec(i,2)) - enddo - degree_array(1) = shiftr(degree_array(1),1) - degree_array(2) = shiftr(degree_array(2),1) - - do ispin = 1, 2 - k = 1 - !!! GETTING THE HOLES - do i = 1, N_int - key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) - do while(key_hole .ne.0_bit_kind) - pos = trailz(key_hole) - holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_hole = ibclr(key_hole,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_excitation_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo - do ispin = 1, 2 - k = 1 - !!! GETTING THE PARTICLES - do i = 1, N_int - key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) - do while(key_particle .ne.0_bit_kind) - pos = trailz(key_particle) - particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_particle = ibclr(key_particle,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_excitation_general ' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo - integer :: h,p, i_ok - integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) - integer :: exc(0:2,2,2) - double precision :: phase_tmp - allocate(det_i(Nint,2),det_ip(N_int,2)) - det_i = key_i - phase = 1.d0 - do ispin = 1, 2 - do i = 1, degree_array(ispin) - h = holes_array(i,ispin) - p = particles_array(i,ispin) - det_ip = det_i - call do_single_excitation(det_ip,h,p,ispin,i_ok) - if(i_ok == -1)then - print*,'excitation was not possible ' - stop - endif - call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) - phase *= phase_tmp - det_i = det_ip - enddo - enddo - -end - -subroutine get_holes_general(key_i, key_j,Nint, holes_array) - use bitmasks - BEGIN_DOC -! returns the array, per spin, of holes between key_i and key_j -! -! with the following convention: a_{hole}|key_i> --> |key_j> - END_DOC - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: holes_array(100,2) - integer(bit_kind) :: key_hole - integer :: ispin,k,i,pos - holes_array = -1 - do ispin = 1, 2 - k = 1 - do i = 1, N_int - key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) - do while(key_hole .ne.0_bit_kind) - pos = trailz(key_hole) - holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_hole = ibclr(key_hole,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_holes_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo -end - -subroutine get_particles_general(key_i, key_j,Nint,particles_array) - use bitmasks - BEGIN_DOC -! returns the array, per spin, of particles between key_i and key_j -! -! with the following convention: a^dagger_{particle}|key_i> --> |key_j> - END_DOC - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: particles_array(100,2) - integer(bit_kind) :: key_particle - integer :: ispin,k,i,pos - particles_array = -1 - do ispin = 1, 2 - k = 1 - do i = 1, N_int - key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) - do while(key_particle .ne.0_bit_kind) - pos = trailz(key_particle) - particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_particle = ibclr(key_particle,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_holes_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'Those are the two determinants' - call debug_det(key_i, N_int) - call debug_det(key_j, N_int) - print*,'stoping ...' - stop - endif - enddo - enddo - enddo -end - -subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) - implicit none - integer, intent(in) :: degree(2), Nint - integer(bit_kind), intent(in) :: key_i(Nint,2) - integer, intent(in) :: holes_array(100,2),particles_array(100,2) - double precision, intent(out) :: phase - integer :: i,ispin,h,p, i_ok - integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) - integer :: exc(0:2,2,2) - double precision :: phase_tmp - allocate(det_i(Nint,2),det_ip(N_int,2)) - det_i = key_i - phase = 1.d0 - do ispin = 1, 2 - do i = 1, degree(ispin) - h = holes_array(i,ispin) - p = particles_array(i,ispin) - det_ip = det_i - call do_single_excitation(det_ip,h,p,ispin,i_ok) - if(i_ok == -1)then - print*,'excitation was not possible ' - stop - endif - call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) - phase *= phase_tmp - det_i = det_ip - enddo - enddo - -end - diff --git a/plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f b/plugins/local/slater_tc_no_opt/test_tc_bi_ortho.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f rename to plugins/local/slater_tc_no_opt/test_tc_bi_ortho.irp.f diff --git a/plugins/local/tc_bi_ortho/pt2_tc_cisd.irp.f b/plugins/local/tc_bi_ortho/pt2_tc_cisd.irp.f deleted file mode 100644 index 8940a4f6..00000000 --- a/plugins/local/tc_bi_ortho/pt2_tc_cisd.irp.f +++ /dev/null @@ -1,129 +0,0 @@ -program pt2_tc_cisd - - BEGIN_DOC - ! - ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together - ! with the energy. Saves the left-right wave functions at the end. - ! - END_DOC - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - print*, ' nb of states = ', N_states - print*, ' nb of det = ', N_det - call routine_diag() - - call routine -end - -subroutine routine - implicit none - integer :: i,h1,p1,h2,p2,s1,s2,degree - double precision :: h0i,hi0,e00,ei,delta_e - double precision :: norm,e_corr,coef,e_corr_pos,e_corr_neg,e_corr_abs - - integer :: exc(0:2,2,2) - double precision :: phase - double precision :: eh1,ep1,eh2,ep2 - - norm = 0.d0 - e_corr = 0.d0 - e_corr_abs = 0.d0 - e_corr_pos = 0.d0 - e_corr_neg = 0.d0 - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,1), N_int, e00) - do i = 2, N_det - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,1), N_int, hi0) - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,i), N_int, h0i) - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, ei) - call get_excitation_degree(psi_det(1,1,1), psi_det(1,1,i),degree,N_int) - call get_excitation(psi_det(1,1,1), psi_det(1,1,i),exc,degree,phase,N_int) - call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) - eh1 = Fock_matrix_tc_diag_mo_tot(h1) - ep1 = Fock_matrix_tc_diag_mo_tot(p1) - delta_e = eh1 - ep1 - if (degree==2)then - eh2 = Fock_matrix_tc_diag_mo_tot(h2) - ep2 = Fock_matrix_tc_diag_mo_tot(p2) - delta_e += eh2 - ep2 - endif -! delta_e = e00 - ei - coef = hi0/delta_e - norm += coef*coef - e_corr = coef* h0i - if(e_corr.lt.0.d0)then - e_corr_neg += e_corr - elseif(e_corr.gt.0.d0)then - e_corr_pos += e_corr - endif - e_corr_abs += dabs(e_corr) - enddo - print*,'e_corr_abs = ',e_corr_abs - print*,'e_corr_pos = ',e_corr_pos - print*,'e_corr_neg = ',e_corr_neg - print*,'norm = ',dsqrt(norm) - -end - -subroutine routine_diag() - - implicit none - integer :: i, j, k - double precision :: dE - - ! provide eigval_right_tc_bi_orth - ! provide overlap_bi_ortho - ! provide htilde_matrix_elmt_bi_ortho - - if(N_states .eq. 1) then - - print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1) - print*,'e_tc_left_right = ',e_tc_left_right - print*,'e_tilde_bi_orth_00 = ',e_tilde_bi_orth_00 - print*,'e_pt2_tc_bi_orth = ',e_pt2_tc_bi_orth - print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single - print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double - print*,'***' - print*,'e_corr_bi_orth = ',e_corr_bi_orth - print*,'e_corr_bi_orth_proj = ',e_corr_bi_orth_proj - print*,'e_corr_bi_orth_proj_abs = ',e_corr_bi_orth_proj_abs - print*,'e_corr_single_bi_orth = ',e_corr_single_bi_orth - print*,'e_corr_double_bi_orth = ',e_corr_double_bi_orth - print*,'e_corr_single_bi_orth_abs = ',e_corr_single_bi_orth_abs - print*,'e_corr_double_bi_orth_abs = ',e_corr_double_bi_orth_abs - print*,'Left/right eigenvectors' - do i = 1,N_det - write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1) - enddo - - else - - print*,'eigval_right_tc_bi_orth : ' - do i = 1, N_states - print*, i, eigval_right_tc_bi_orth(i) - enddo - - print*,'' - print*,'******************************************************' - print*,'TC Excitation energies (au) (eV)' - do i = 2, N_states - dE = eigval_right_tc_bi_orth(i) - eigval_right_tc_bi_orth(1) - print*, i, dE, dE/0.0367502d0 - enddo - print*,'' - - endif - -end - - - diff --git a/plugins/local/tc_bi_ortho/tc_cisd_sc2.irp.f b/plugins/local/tc_bi_ortho/tc_cisd_sc2.irp.f deleted file mode 100644 index d4c8c55d..00000000 --- a/plugins/local/tc_bi_ortho/tc_cisd_sc2.irp.f +++ /dev/null @@ -1,36 +0,0 @@ - -! --- - -program tc_cisd_sc2 - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - print *, 'Hello world' - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - call test - -end - -! --- - -subroutine test() - implicit none -! double precision, allocatable :: dressing_dets(:),e_corr_dets(:) -! allocate(dressing_dets(N_det),e_corr_dets(N_det)) -! e_corr_dets = 0.d0 -! call get_cisd_sc2_dressing(psi_det,e_corr_dets,N_det,dressing_dets) - provide eigval_tc_cisd_sc2_bi_ortho -end diff --git a/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f b/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f deleted file mode 100644 index 5cbf26d2..00000000 --- a/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f +++ /dev/null @@ -1,145 +0,0 @@ - BEGIN_PROVIDER [ double precision, reigvec_tc_cisd_sc2_bi_ortho, (N_det,N_states)] -&BEGIN_PROVIDER [ double precision, leigvec_tc_cisd_sc2_bi_ortho, (N_det,N_states)] -&BEGIN_PROVIDER [ double precision, eigval_tc_cisd_sc2_bi_ortho, (N_states)] - implicit none - integer :: it,n_real,degree,i,istate - double precision :: e_before, e_current,thr, hmono,htwoe,hthree,accu - double precision, allocatable :: e_corr_dets(:),h0j(:), h_sc2(:,:), dressing_dets(:) - double precision, allocatable :: leigvec_tc_bi_orth_tmp(:,:),reigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:) - allocate(leigvec_tc_bi_orth_tmp(N_det,N_det),reigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det)) - allocate(e_corr_dets(N_det),h0j(N_det),h_sc2(N_det,N_det),dressing_dets(N_det)) - allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),eigval_tmp(N_states)) - dressing_dets = 0.d0 - do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) - call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) - if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_opt_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) - endif - enddo - reigvec_tc_bi_orth_tmp = 0.d0 - do i = 1, N_det - reigvec_tc_bi_orth_tmp(i,1) = psi_r_coef_bi_ortho(i,1) - enddo - vec_tmp = 0.d0 - do istate = 1, N_states - vec_tmp(:,istate) = reigvec_tc_bi_orth_tmp(:,istate) - enddo - do istate = N_states+1, n_states_diag - vec_tmp(istate,istate) = 1.d0 - enddo - print*,'Diagonalizing the TC CISD ' - call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow) - do i = 1, N_det - e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1) - enddo - E_before = eigval_tmp(1) - print*,'Starting from ',E_before - - e_current = 10.d0 - thr = 1.d-5 - it = 0 - dressing_dets = 0.d0 - double precision, allocatable :: H_jj(:),vec_tmp(:,:),eigval_tmp(:) - external htc_bi_ortho_calc_tdav_slow - external htcdag_bi_ortho_calc_tdav_slow - logical :: converged - do while (dabs(E_before-E_current).gt.thr) - it += 1 - E_before = E_current -! h_sc2 = htilde_matrix_elmt_bi_ortho - call get_cisd_sc2_dressing(psi_det,e_corr_dets,N_det,dressing_dets) - do i = 1, N_det -! print*,'dressing_dets(i) = ',dressing_dets(i) - h_sc2(i,i) += dressing_dets(i) - enddo - print*,'********************' - print*,'iteration ',it -! call non_hrmt_real_diag(N_det,h_sc2,& -! leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& -! n_real,eigval_right_tmp) -! print*,'eigval_right_tmp(1)',eigval_right_tmp(1) - vec_tmp = 0.d0 - do istate = 1, N_states - vec_tmp(:,istate) = reigvec_tc_bi_orth_tmp(:,istate) - enddo - do istate = N_states+1, n_states_diag - vec_tmp(istate,istate) = 1.d0 - enddo - call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow) - print*,'outside Davidson' - print*,'eigval_tmp(1) = ',eigval_tmp(1) - do i = 1, N_det - reigvec_tc_bi_orth_tmp(i,1) = vec_tmp(i,1) - e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1) - enddo -! E_current = eigval_right_tmp(1) - E_current = eigval_tmp(1) - print*,'it, E(SC)^2 = ',it,E_current - enddo - eigval_tc_cisd_sc2_bi_ortho(1:N_states) = eigval_right_tmp(1:N_states) - reigvec_tc_cisd_sc2_bi_ortho(1:N_det,1:N_states) = reigvec_tc_bi_orth_tmp(1:N_det,1:N_states) - leigvec_tc_cisd_sc2_bi_ortho(1:N_det,1:N_states) = leigvec_tc_bi_orth_tmp(1:N_det,1:N_states) - -END_PROVIDER - -subroutine get_cisd_sc2_dressing(dets,e_corr_dets,ndet,dressing_dets) - implicit none - use bitmasks - integer, intent(in) :: ndet - integer(bit_kind), intent(in) :: dets(N_int,2,ndet) - double precision, intent(in) :: e_corr_dets(ndet) - double precision, intent(out) :: dressing_dets(ndet) - integer, allocatable :: degree(:),hole(:,:),part(:,:),spin(:,:) - integer(bit_kind), allocatable :: hole_part(:,:,:) - integer :: i,j,k, exc(0:2,2,2),h1,p1,h2,p2,s1,s2 - integer(bit_kind) :: xorvec(2,N_int) - - double precision :: phase - dressing_dets = 0.d0 - allocate(degree(ndet),hole(2,ndet),part(2,ndet), spin(2,ndet),hole_part(N_int,2,ndet)) - do i = 2, ndet - call get_excitation_degree(HF_bitmask,dets(1,1,i),degree(i),N_int) - do j = 1, N_int - hole_part(j,1,i) = xor( HF_bitmask(j,1), dets(j,1,i)) - hole_part(j,2,i) = xor( HF_bitmask(j,2), dets(j,2,i)) - enddo - if(degree(i) == 1)then - call get_single_excitation(HF_bitmask,psi_det(1,1,i),exc,phase,N_int) - else if(degree(i) == 2)then - call get_double_excitation(HF_bitmask,psi_det(1,1,i),exc,phase,N_int) - endif - call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) - hole(1,i) = h1 - hole(2,i) = h2 - part(1,i) = p1 - part(2,i) = p2 - spin(1,i) = s1 - spin(2,i) = s2 - enddo - - integer :: same - if(elec_alpha_num+elec_beta_num<3)return - do i = 2, ndet - do j = i+1, ndet - same = 0 - if(degree(i) == degree(j) .and. degree(i)==1)cycle - do k = 1, N_int - xorvec(k,1) = iand(hole_part(k,1,i),hole_part(k,1,j)) - xorvec(k,2) = iand(hole_part(k,2,i),hole_part(k,2,j)) - same += popcnt(xorvec(k,1)) + popcnt(xorvec(k,2)) - enddo -! print*,'i,j',i,j -! call debug_det(dets(1,1,i),N_int) -! call debug_det(hole_part(1,1,i),N_int) -! call debug_det(dets(1,1,j),N_int) -! call debug_det(hole_part(1,1,j),N_int) -! print*,'same = ',same - if(same.eq.0)then - dressing_dets(i) += e_corr_dets(j) - dressing_dets(j) += e_corr_dets(i) - endif - enddo - enddo - -end diff --git a/plugins/local/tc_bi_ortho/test_s2_tc.irp.f b/plugins/local/tc_bi_ortho/test_s2_tc.irp.f deleted file mode 100644 index 7c70b119..00000000 --- a/plugins/local/tc_bi_ortho/test_s2_tc.irp.f +++ /dev/null @@ -1,170 +0,0 @@ - -! --- - -program test_tc - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - call provide_all_three_ints_bi_ortho() - call routine_h_triple_left - call routine_h_triple_right -! call routine_test_s2_davidson - -end - -subroutine routine_h_triple_right - implicit none - logical :: do_right - integer :: sze ,i, N_st, j - double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 - double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) - double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) - sze = N_det - N_st = 1 - allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) - print*,'Checking first the Right ' - do i = 1, sze - u_0(i,1) = psi_r_coef_bi_ortho(i,1) - enddo - double precision :: wall0,wall1 - call wall_time(wall0) - call H_tc_s2_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) - call wall_time(wall1) - print*,'time for omp',wall1 - wall0 - call wall_time(wall0) - call H_tc_s2_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) - call wall_time(wall1) - print*,'time serial ',wall1 - wall0 - accu_e = 0.d0 - accu_s = 0.d0 - do i = 1, sze - accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) - accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) - enddo - print*,'accu_e = ',accu_e - print*,'accu_s = ',accu_s - -end - -subroutine routine_h_triple_left - implicit none - logical :: do_right - integer :: sze ,i, N_st, j - double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 - double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) - double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) - sze = N_det - N_st = 1 - allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) - print*,'Checking the Left ' - do i = 1, sze - u_0(i,1) = psi_l_coef_bi_ortho(i,1) - enddo - double precision :: wall0,wall1 - call wall_time(wall0) - call H_tc_s2_dagger_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) - call wall_time(wall1) - print*,'time for omp',wall1 - wall0 - call wall_time(wall0) - call H_tc_s2_dagger_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) - call wall_time(wall1) - print*,'time serial ',wall1 - wall0 - accu_e = 0.d0 - accu_s = 0.d0 - do i = 1, sze - accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) - accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) - enddo - print*,'accu_e = ',accu_e - print*,'accu_s = ',accu_s - -end - - -subroutine routine_test_s2_davidson - implicit none - double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) , s2(:) - integer :: i,istate - logical :: converged - external H_tc_s2_dagger_u_0_opt - external H_tc_s2_u_0_opt - allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag), s2(n_states_diag)) - do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) - enddo - ! Preparing the left-eigenvector - print*,'Computing the left-eigenvector ' - vec_tmp = 0.d0 - do istate = 1, N_states - vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) - enddo - do istate = N_states+1, n_states_diag - vec_tmp(istate,istate) = 1.d0 - enddo - do istate = 1, N_states - leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) - enddo - integer :: n_it_max - n_it_max = 1 - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) - double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) - integer :: sze,N_st - logical :: do_right - sze = N_det - N_st = 1 - do_right = .False. - allocate(s_0_new(N_det,1),v_0_new(N_det,1)) - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) - double precision :: accu_e_0, accu_s_0 - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 - do i = 1, sze - accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) - accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) - enddo - print*,'energies = ',energies - print*,'s2 = ',s2 - print*,'accu_e_0',accu_e_0 - print*,'accu_s_0',accu_s_0 - - ! Preparing the right-eigenvector - print*,'Computing the right-eigenvector ' - vec_tmp = 0.d0 - do istate = 1, N_states - vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate) - enddo - do istate = N_states+1, n_states_diag - vec_tmp(istate,istate) = 1.d0 - enddo - do istate = 1, N_states - leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) - enddo - n_it_max = 1 - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) - sze = N_det - N_st = 1 - do_right = .True. - v_0_new = 0.d0 - s_0_new = 0.d0 - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 - do i = 1, sze - accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) - accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) - enddo - print*,'energies = ',energies - print*,'s2 = ',s2 - print*,'accu_e_0',accu_e_0 - print*,'accu_s_0',accu_s_0 - -end From 17ae4d8fe2f103bac46205380ae0e6a33736de71 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 7 May 2024 18:27:09 +0200 Subject: [PATCH 10/64] added tc_progs --- .../local/cipsi_tc_bi_ortho/selection.irp.f | 4 +- ..._bi_ortho.irp.f => diagonalize_tc_h.irp.f} | 0 plugins/local/tc_bi_ortho/test_natorb.irp.f | 64 ------- .../local/tc_bi_ortho/test_normal_order.irp.f | 173 ------------------ plugins/local/tc_bi_ortho/test_tc_fock.irp.f | 171 ----------------- plugins/local/tc_progs/NEED | 1 + .../print_he_tc_energy.irp.f | 0 .../print_tc_dump.irp.f | 0 .../print_tc_energy.irp.f | 0 .../print_tc_spin_dens.irp.f | 0 .../print_tc_var.irp.f | 0 .../print_tc_wf.irp.f | 0 .../save_bitcpsileft_for_qmcchem.irp.f | 0 .../save_tc_bi_ortho_nat.irp.f | 0 .../select_dets_bi_ortho.irp.f | 0 .../tc_bi_ortho_prop.irp.f | 0 .../{tc_bi_ortho => tc_progs}/tc_som.irp.f | 0 .../test_tc_two_rdm.irp.f | 0 18 files changed, 3 insertions(+), 410 deletions(-) rename plugins/local/tc_bi_ortho/{tc_bi_ortho.irp.f => diagonalize_tc_h.irp.f} (100%) delete mode 100644 plugins/local/tc_bi_ortho/test_natorb.irp.f delete mode 100644 plugins/local/tc_bi_ortho/test_normal_order.irp.f delete mode 100644 plugins/local/tc_bi_ortho/test_tc_fock.irp.f create mode 100644 plugins/local/tc_progs/NEED rename plugins/local/{tc_bi_ortho => tc_progs}/print_he_tc_energy.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/print_tc_dump.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/print_tc_energy.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/print_tc_spin_dens.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/print_tc_var.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/print_tc_wf.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/save_bitcpsileft_for_qmcchem.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/save_tc_bi_ortho_nat.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/select_dets_bi_ortho.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/tc_bi_ortho_prop.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/tc_som.irp.f (100%) rename plugins/local/{tc_bi_ortho => tc_progs}/test_tc_two_rdm.irp.f (100%) diff --git a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f index 12163e06..0b4345d5 100644 --- a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f @@ -892,8 +892,8 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d psi_h_alpha = 0.d0 alpha_h_psi = 0.d0 do iii = 1, N_det_selectors - call htilde_mu_mat_bi_ortho_tot_slow(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_bi_ortho_tot_slow(det, psi_selectors(1,1,iii), N_int, alpha_h_i) + call htilde_mu_mat_opt_bi_ortho_tot(psi_selectors(1,1,iii), det, N_int, i_h_alpha) + call htilde_mu_mat_opt_bi_ortho_tot(det, psi_selectors(1,1,iii), N_int, alpha_h_i) call get_excitation_degree(psi_selectors(1,1,iii), det,degree,N_int) if(degree == 0)then print*,'problem !!!' diff --git a/plugins/local/tc_bi_ortho/tc_bi_ortho.irp.f b/plugins/local/tc_bi_ortho/diagonalize_tc_h.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/tc_bi_ortho.irp.f rename to plugins/local/tc_bi_ortho/diagonalize_tc_h.irp.f diff --git a/plugins/local/tc_bi_ortho/test_natorb.irp.f b/plugins/local/tc_bi_ortho/test_natorb.irp.f deleted file mode 100644 index 5b8801f7..00000000 --- a/plugins/local/tc_bi_ortho/test_natorb.irp.f +++ /dev/null @@ -1,64 +0,0 @@ - -! --- - -program test_natorb - - BEGIN_DOC - ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. - END_DOC - - implicit none - - print *, 'Hello world' - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - call routine() - ! call test() - -end - -! --- - -subroutine routine() - - implicit none - double precision, allocatable :: fock_diag(:),eigval(:),leigvec(:,:),reigvec(:,:),mat_ref(:,:) - allocate(eigval(mo_num),leigvec(mo_num,mo_num),reigvec(mo_num,mo_num),fock_diag(mo_num),mat_ref(mo_num, mo_num)) - double precision, allocatable :: eigval_ref(:),leigvec_ref(:,:),reigvec_ref(:,:) - allocate(eigval_ref(mo_num),leigvec_ref(mo_num,mo_num),reigvec_ref(mo_num,mo_num)) - - double precision :: thr_deg - integer :: i,n_real,j - print*,'fock_matrix' - do i = 1, mo_num - fock_diag(i) = Fock_matrix_mo(i,i) - print*,i,fock_diag(i) - enddo - thr_deg = 1.d-6 - mat_ref = -one_e_dm_mo - print*,'diagonalization by block' - call diag_mat_per_fock_degen(fock_diag,mat_ref,mo_num,thr_deg,leigvec,reigvec,eigval) - call non_hrmt_bieig( mo_num, mat_ref& - , leigvec_ref, reigvec_ref& - , n_real, eigval_ref) - print*,'TEST ***********************************' - double precision :: accu_l, accu_r - do i = 1, mo_num - accu_l = 0.d0 - accu_r = 0.d0 - do j = 1, mo_num - accu_r += reigvec_ref(j,i) * reigvec(j,i) - accu_l += leigvec_ref(j,i) * leigvec(j,i) - enddo - print*,i - write(*,'(I3,X,100(F16.10,X))')i,eigval(i),eigval_ref(i),accu_l,accu_r - enddo -end diff --git a/plugins/local/tc_bi_ortho/test_normal_order.irp.f b/plugins/local/tc_bi_ortho/test_normal_order.irp.f deleted file mode 100644 index 7b4c558f..00000000 --- a/plugins/local/tc_bi_ortho/test_normal_order.irp.f +++ /dev/null @@ -1,173 +0,0 @@ - -! --- - -program test_normal_order - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - print *, 'Hello world' - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - call provide_all_three_ints_bi_ortho() - call test() - -end - -! --- - -subroutine test - implicit none - use bitmasks ! you need to include the bitmasks_module.f90 features - integer :: h1,h2,p1,p2,s1,s2,i_ok,degree,Ne(2) - integer :: exc(0:2,2,2) - integer(bit_kind), allocatable :: det_i(:,:) - double precision :: hmono,htwoe,hthree,htilde_ij,accu,phase,normal,hthree_tmp - integer, allocatable :: occ(:,:) - allocate( occ(N_int*bit_kind_size,2) ) - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - allocate(det_i(N_int,2)) - s1 = 1 - s2 = 2 - accu = 0.d0 - do h1 = 1, elec_beta_num - do p1 = elec_alpha_num+1, mo_num - do h2 = 1, elec_beta_num - do p2 = elec_beta_num+1, mo_num - hthree = 0.d0 - - det_i = ref_bitmask - s1 = 1 - s2 = 2 - call do_single_excitation(det_i,h1,p1,s1,i_ok) - if(i_ok.ne.1)cycle - call do_single_excitation(det_i,h2,p2,s2,i_ok) - if(i_ok.ne.1)cycle - call htilde_mu_mat_opt_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) - call get_excitation_degree(ref_bitmask,det_i,degree,N_int) - call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - hthree_tmp *= phase - hthree += 0.5d0 * hthree_tmp - det_i = ref_bitmask - s1 = 2 - s2 = 1 - call do_single_excitation(det_i,h1,p1,s1,i_ok) - if(i_ok.ne.1)cycle - call do_single_excitation(det_i,h2,p2,s2,i_ok) - if(i_ok.ne.1)cycle - call htilde_mu_mat_opt_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) - call get_excitation_degree(ref_bitmask,det_i,degree,N_int) - call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - hthree_tmp *= phase - hthree += 0.5d0 * hthree_tmp - - -! normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1) - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, normal) - if(dabs(hthree).lt.1.d-10)cycle - if(dabs(hthree-normal).gt.1.d-10)then -! print*,pp2,pp1,hh2,hh1 - print*,p2,p1,h2,h1 - print*,hthree,normal,dabs(hthree-normal) - stop - endif -! call three_comp_two_e_elem(det_i,h1,h2,p1,p2,s1,s2,normal) -! normal = eff_2_e_from_3_e_ab(p2,p1,h2,h1) - accu += dabs(hthree-normal) - enddo - enddo - enddo - enddo -print*,'accu opposite spin = ',accu -stop - -! p2=6 -! p1=5 -! h2=2 -! h1=1 - -s1 = 1 -s2 = 1 -accu = 0.d0 -do h1 = 1, elec_alpha_num - do p1 = elec_alpha_num+1, mo_num - do p2 = p1+1, mo_num - do h2 = h1+1, elec_alpha_num - det_i = ref_bitmask - call do_single_excitation(det_i,h1,p1,s1,i_ok) - if(i_ok.ne.1)cycle - call do_single_excitation(det_i,h2,p2,s2,i_ok) - if(i_ok.ne.1)cycle - call htilde_mu_mat_opt_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call get_excitation_degree(ref_bitmask,det_i,degree,N_int) - call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - integer :: hh1, pp1, hh2, pp2, ss1, ss2 - call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) - hthree *= phase - normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) -! normal = eff_2_e_from_3_e_aa(p2,p1,h2,h1) - if(dabs(hthree).lt.1.d-10)cycle - if(dabs(hthree-normal).gt.1.d-10)then - print*,pp2,pp1,hh2,hh1 - print*,p2,p1,h2,h1 - print*,hthree,normal,dabs(hthree-normal) - stop - endif -! print*,hthree,normal,dabs(hthree-normal) - accu += dabs(hthree-normal) - enddo - enddo - enddo -enddo -print*,'accu same spin alpha = ',accu - - -s1 = 2 -s2 = 2 -accu = 0.d0 -do h1 = 1, elec_beta_num - do p1 = elec_beta_num+1, mo_num - do p2 = p1+1, mo_num - do h2 = h1+1, elec_beta_num - det_i = ref_bitmask - call do_single_excitation(det_i,h1,p1,s1,i_ok) - if(i_ok.ne.1)cycle - call do_single_excitation(det_i,h2,p2,s2,i_ok) - if(i_ok.ne.1)cycle - call htilde_mu_mat_opt_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call get_excitation_degree(ref_bitmask,det_i,degree,N_int) - call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) - hthree *= phase -! normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) - normal = eff_2_e_from_3_e_bb(p2,p1,h2,h1) - if(dabs(hthree).lt.1.d-10)cycle - if(dabs(hthree-normal).gt.1.d-10)then - print*,pp2,pp1,hh2,hh1 - print*,p2,p1,h2,h1 - print*,hthree,normal,dabs(hthree-normal) - stop - endif -! print*,hthree,normal,dabs(hthree-normal) - accu += dabs(hthree-normal) - enddo - enddo - enddo -enddo -print*,'accu same spin beta = ',accu - - -end - - diff --git a/plugins/local/tc_bi_ortho/test_tc_fock.irp.f b/plugins/local/tc_bi_ortho/test_tc_fock.irp.f deleted file mode 100644 index b33b2e93..00000000 --- a/plugins/local/tc_bi_ortho/test_tc_fock.irp.f +++ /dev/null @@ -1,171 +0,0 @@ - -! --- - -program test_tc_fock - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - print *, 'Hello world' - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - !call routine_1 - !call routine_2 -! call routine_3() - - call routine_tot - -end - -! --- - -subroutine routine_3() - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i, a, i_ok, s1 - double precision :: hmono, htwoe, hthree, htilde_ij - double precision :: err_ai, err_tot, ref, new - integer(bit_kind), allocatable :: det_i(:,:) - - allocate(det_i(N_int,2)) - - err_tot = 0.d0 - - do s1 = 1, 2 - - det_i = ref_bitmask - call debug_det(det_i, N_int) - print*, ' HF det' - call debug_det(det_i, N_int) - - do i = 1, elec_num_tab(s1) - do a = elec_num_tab(s1)+1, mo_num ! virtual - - det_i = ref_bitmask - call do_single_excitation(det_i, i, a, s1, i_ok) - if(i_ok == -1) then - print*, 'PB !!' - print*, i, a - stop - endif - print*, ' excited det' - call debug_det(det_i, N_int) - - call htilde_mu_mat_opt_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) - if(dabs(hthree).lt.1.d-10)cycle - ref = hthree - if(s1 == 1)then - new = fock_a_tot_3e_bi_orth(a,i) - else if(s1 == 2)then - new = fock_b_tot_3e_bi_orth(a,i) - endif - err_ai = dabs(dabs(ref) - dabs(new)) - if(err_ai .gt. 1d-7) then - print*,'s1 = ',s1 - print*, ' warning on', i, a - print*, ref,new,err_ai - endif - print*, ref,new,err_ai - err_tot += err_ai - - write(22, *) htilde_ij - enddo - enddo - enddo - - print *, ' err_tot = ', err_tot - - deallocate(det_i) - -end subroutine routine_3 - -! --- -subroutine routine_tot() - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i, a, i_ok, s1,other_spin(2) - double precision :: hmono, htwoe, hthree, htilde_ij - double precision :: err_ai, err_tot, ref, new - integer(bit_kind), allocatable :: det_i(:,:) - - allocate(det_i(N_int,2)) - other_spin(1) = 2 - other_spin(2) = 1 - - err_tot = 0.d0 - -! do s1 = 1, 2 - s1 = 2 - det_i = ref_bitmask - call debug_det(det_i, N_int) - print*, ' HF det' - call debug_det(det_i, N_int) - -! do i = 1, elec_num_tab(s1) -! do a = elec_num_tab(s1)+1, mo_num ! virtual - do i = 1, elec_beta_num - do a = elec_beta_num+1, mo_num! virtual - print*,i,a - - det_i = ref_bitmask - call do_single_excitation(det_i, i, a, s1, i_ok) - if(i_ok == -1) then - print*, 'PB !!' - print*, i, a - stop - endif - - call htilde_mu_mat_opt_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) - print*,htilde_ij -! if(dabs(htilde_ij).lt.1.d-10)cycle - print*, ' excited det' - call debug_det(det_i, N_int) - - if(s1 == 1)then - new = Fock_matrix_tc_mo_alpha(a,i) - else - new = Fock_matrix_tc_mo_beta(a,i) - endif - ref = htilde_ij -! if(s1 == 1)then -! new = fock_a_tot_3e_bi_orth(a,i) -! else if(s1 == 2)then -! new = fock_b_tot_3e_bi_orth(a,i) -! endif - err_ai = dabs(dabs(ref) - dabs(new)) - if(err_ai .gt. 1d-7) then - print*,'---------' - print*,'s1 = ',s1 - print*, ' warning on', i, a - print*, ref,new,err_ai - print*,hmono, htwoe, hthree - print*,'---------' - endif - print*, ref,new,err_ai - err_tot += err_ai - - write(22, *) htilde_ij - enddo - enddo -! enddo - - print *, ' err_tot = ', err_tot - - deallocate(det_i) - -end subroutine routine_3 diff --git a/plugins/local/tc_progs/NEED b/plugins/local/tc_progs/NEED new file mode 100644 index 00000000..9deb3db4 --- /dev/null +++ b/plugins/local/tc_progs/NEED @@ -0,0 +1 @@ +tc_bi_ortho diff --git a/plugins/local/tc_bi_ortho/print_he_tc_energy.irp.f b/plugins/local/tc_progs/print_he_tc_energy.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/print_he_tc_energy.irp.f rename to plugins/local/tc_progs/print_he_tc_energy.irp.f diff --git a/plugins/local/tc_bi_ortho/print_tc_dump.irp.f b/plugins/local/tc_progs/print_tc_dump.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/print_tc_dump.irp.f rename to plugins/local/tc_progs/print_tc_dump.irp.f diff --git a/plugins/local/tc_bi_ortho/print_tc_energy.irp.f b/plugins/local/tc_progs/print_tc_energy.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/print_tc_energy.irp.f rename to plugins/local/tc_progs/print_tc_energy.irp.f diff --git a/plugins/local/tc_bi_ortho/print_tc_spin_dens.irp.f b/plugins/local/tc_progs/print_tc_spin_dens.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/print_tc_spin_dens.irp.f rename to plugins/local/tc_progs/print_tc_spin_dens.irp.f diff --git a/plugins/local/tc_bi_ortho/print_tc_var.irp.f b/plugins/local/tc_progs/print_tc_var.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/print_tc_var.irp.f rename to plugins/local/tc_progs/print_tc_var.irp.f diff --git a/plugins/local/tc_bi_ortho/print_tc_wf.irp.f b/plugins/local/tc_progs/print_tc_wf.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/print_tc_wf.irp.f rename to plugins/local/tc_progs/print_tc_wf.irp.f diff --git a/plugins/local/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f b/plugins/local/tc_progs/save_bitcpsileft_for_qmcchem.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f rename to plugins/local/tc_progs/save_bitcpsileft_for_qmcchem.irp.f diff --git a/plugins/local/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f b/plugins/local/tc_progs/save_tc_bi_ortho_nat.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f rename to plugins/local/tc_progs/save_tc_bi_ortho_nat.irp.f diff --git a/plugins/local/tc_bi_ortho/select_dets_bi_ortho.irp.f b/plugins/local/tc_progs/select_dets_bi_ortho.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/select_dets_bi_ortho.irp.f rename to plugins/local/tc_progs/select_dets_bi_ortho.irp.f diff --git a/plugins/local/tc_bi_ortho/tc_bi_ortho_prop.irp.f b/plugins/local/tc_progs/tc_bi_ortho_prop.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/tc_bi_ortho_prop.irp.f rename to plugins/local/tc_progs/tc_bi_ortho_prop.irp.f diff --git a/plugins/local/tc_bi_ortho/tc_som.irp.f b/plugins/local/tc_progs/tc_som.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/tc_som.irp.f rename to plugins/local/tc_progs/tc_som.irp.f diff --git a/plugins/local/tc_bi_ortho/test_tc_two_rdm.irp.f b/plugins/local/tc_progs/test_tc_two_rdm.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/test_tc_two_rdm.irp.f rename to plugins/local/tc_progs/test_tc_two_rdm.irp.f From b7787f5e6dce198bee06eb92f69b9904a7448bea Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 7 May 2024 19:43:05 +0200 Subject: [PATCH 11/64] trying to speed up the PT2 in TC by transposing the array of tc integrals --- .../local/bi_ort_ints/total_twoe_pot.irp.f | 8 +- .../cipsi_tc_bi_ortho/get_d0_transp.irp.f | 140 +++++++++++ .../local/cipsi_tc_bi_ortho/get_d2_good.irp.f | 3 - .../cipsi_tc_bi_ortho/get_d2_transp.irp.f | 235 ++++++++++++++++++ plugins/local/cipsi_tc_bi_ortho/pt2.irp.f | 1 + .../local/cipsi_tc_bi_ortho/selection.irp.f | 94 +------ .../cipsi_tc_bi_ortho/stochastic_cipsi.irp.f | 3 + plugins/local/fci_tc_bi/pt2_tc.irp.f | 2 + .../local/tc_bi_ortho/e_corr_bi_ortho.irp.f | 1 - plugins/local/tc_keywords/EZFIO.cfg | 11 +- 10 files changed, 404 insertions(+), 94 deletions(-) create mode 100644 plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f create mode 100644 plugins/local/cipsi_tc_bi_ortho/get_d2_transp.irp.f diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f index 1e127fac..71269fdc 100644 --- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f +++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f @@ -259,15 +259,21 @@ BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_transp, (mo_num, mo_num, END_DOC integer :: i,j,k,l + print*,'Providing mo_bi_ortho_tc_two_e_transp' + double precision :: t0,t1 + call wall_time(t0) do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num do l = 1, mo_num - mo_bi_ortho_tc_two_e_transp(i,j,k,l) = mo_bi_ortho_tc_two_e_transp(k,l,i,j) + mo_bi_ortho_tc_two_e_transp(i,j,k,l) = mo_bi_ortho_tc_two_e(k,l,i,j) enddo enddo enddo enddo + call wall_time(t1) + + print *, ' WALL TIME for PROVIDING mo_bi_ortho_tc_two_e_transp (min)', (t1-t0)/60.d0 END_PROVIDER ! --- diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f new file mode 100644 index 00000000..56238e13 --- /dev/null +++ b/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f @@ -0,0 +1,140 @@ +subroutine get_d0_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs) + !todo: indices/conjg should be okay for complex + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states,2) + double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num) + double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + integer :: i, j, k, s, h1, h2, p1, p2, puti, putj, mm + double precision :: phase + double precision :: hij,hji + double precision, external :: get_phase_bi + logical :: ok + + integer, parameter :: bant=1 + double precision, allocatable :: hij_cache1(:), hij_cache2(:) + allocate (hij_cache1(mo_num),hij_cache2(mo_num)) + double precision, allocatable :: hji_cache1(:), hji_cache2(:) + allocate (hji_cache1(mo_num),hji_cache2(mo_num)) +! print*,'in get_d0_new' +! call debug_det(gen,N_int) +! print*,'coefs',coefs(1,:) + + if(sp == 3) then ! AB + h1 = p(1,1) + h2 = p(1,2) + do p1=1, mo_num + if(bannedOrb(p1, 1)) cycle +! call get_mo_two_e_integrals_complex(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map) + do mm = 1, mo_num + hij_cache1(mm) = mo_bi_ortho_tc_two_e(mm,p1,h2,h1) + hji_cache1(mm) = mo_bi_ortho_tc_two_e_transp(mm,p1,h2,h1) + enddo + !!!!!!!!!! + do p2=1, mo_num + if(bannedOrb(p2,2)) cycle + if(banned(p1, p2, bant)) cycle ! rentable? + if(p1 == h1 .or. p2 == h2) then + call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) + ! call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this +! call i_h_j_complex(det, gen, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij) + else + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hij = hij_cache1(p2) * phase + end if + if (hij == (0.d0,0.d0)) cycle + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij ! HOTSPOT + enddo + end do + !!!!!!!!!! + do p2=1, mo_num + if(bannedOrb(p2,2)) cycle + if(banned(p1, p2, bant)) cycle ! rentable? + if(p1 == h1 .or. p2 == h2) then + call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) + ! call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this +! call i_h_j_complex(det, gen, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji) + else + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hji = hji_cache1(p2) * phase + end if + if (hji == (0.d0,0.d0)) cycle + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji ! HOTSPOT + enddo + end do + end do + + else ! AA BB + p1 = p(1,sp) + p2 = p(2,sp) + do puti=1, mo_num + if(bannedOrb(puti, sp)) cycle +! call get_mo_two_e_integrals_complex(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache1(mm) = mo_bi_ortho_tc_two_e(mm,puti,p2,p1) + hij_cache2(mm) = mo_bi_ortho_tc_two_e(mm,puti,p1,p2) + hji_cache1(mm) = mo_bi_ortho_tc_two_e_transp(mm,puti,p2,p1) + hji_cache2(mm) = mo_bi_ortho_tc_two_e_transp(mm,puti,p1,p2) + enddo + !!!!!!!!!! + do putj=puti+1, mo_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, bant)) cycle ! rentable? + if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then + call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) + !call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this +! call i_h_j_complex(det, gen, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij) + if (hij == 0.d0) cycle + else +! hij = (mo_two_e_integral_complex(p1, p2, puti, putj) - mo_two_e_integral_complex(p2, p1, puti, putj)) +! hij = (mo_bi_ortho_tc_two_e(p1, p2, puti, putj) - mo_bi_ortho_tc_two_e(p2, p1, puti, putj)) + hij = (mo_bi_ortho_tc_two_e(puti, putj, p1, p2) - mo_bi_ortho_tc_two_e(puti, putj, p2, p1)) + if (hij == 0.d0) cycle + hij = (hij) * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + end if + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + enddo + end do + + !!!!!!!!!! + do putj=puti+1, mo_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, bant)) cycle ! rentable? + if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then + call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) + call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji) + if (hji == 0.d0) cycle + else +! hji = (mo_bi_ortho_tc_two_e( p1, p2, puti, putj) - mo_bi_ortho_tc_two_e( p2, p1, puti, putj)) + hji = (mo_bi_ortho_tc_two_e_transp(puti, putj, p1, p2 ) - mo_bi_ortho_tc_two_e_transp( puti, putj, p2, p1)) + if (hji == 0.d0) cycle + hji = (hji) * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + end if + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + end do + end do + end if + + deallocate(hij_cache1,hij_cache2) +end + diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f index d01ed433..86922ae9 100644 --- a/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f @@ -25,9 +25,6 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, integer :: bant bant = 1 -! print*, 'in get_d2_new' -! call debug_det(gen,N_int) -! print*,'coefs',coefs(1,:) tip = p(0,1) * p(0,2) ! number of alpha particles times number of beta particles diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d2_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d2_transp.irp.f new file mode 100644 index 00000000..b2a7ea31 --- /dev/null +++ b/plugins/local/cipsi_tc_bi_ortho/get_d2_transp.irp.f @@ -0,0 +1,235 @@ + +subroutine get_d2_new_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs) + !todo: indices/conjg should be correct for complex + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + double precision, intent(in) :: coefs(N_states,2) + double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num) + double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + double precision, external :: get_phase_bi + + integer :: i, j, k, tip, ma, mi, puti, putj + integer :: h1, h2, p1, p2, i1, i2 + double precision :: phase + double precision :: hij,hji + + integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) + integer, parameter :: turn2(2) = (/2, 1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + bant = 1 + + tip = p(0,1) * p(0,2) ! number of alpha particles times number of beta particles + + ma = sp !1:(alpha,alpha); 2:(b,b); 3:(a,b) + if(p(0,1) > p(0,2)) ma = 1 ! more alpha particles than beta particles + if(p(0,1) < p(0,2)) ma = 2 ! fewer alpha particles than beta particles + mi = mod(ma, 2) + 1 + + if(sp == 3) then ! if one alpha and one beta xhole + !(where xholes refer to the ionizations from the generator, not the holes occupied in the ionized generator) + if(ma == 2) bant = 2 ! if more beta particles than alpha particles + + if(tip == 3) then ! if 3 of one particle spin and 1 of the other particle spin + puti = p(1, mi) + if(bannedOrb(puti, mi)) return + h1 = h(1, ma) + h2 = h(2, ma) + + !! + do i = 1, 3 ! loop over all 3 combinations of 2 particles with spin ma + putj = p(i, ma) + if(banned(putj,puti,bant)) cycle + i1 = turn3(1,i) + i2 = turn3(2,i) + p1 = p(i1, ma) + p2 = p(i2, ma) + + ! |G> = |psi_{gen,i}> + ! |G'> = a_{x1} a_{x2} |G> + ! |alpha> = a_{puti}^{\dagger} a_{putj}^{\dagger} |G'> + ! |alpha> = t_{x1,x2}^{puti,putj} |G> + ! hij = + ! |alpha> = t_{p1,p2}^{h1,h2}|psi_{selectors,i}> + !todo: = ( - ) * phase + ! += dconjg(c_i) * + ! = ( - ) * phase + ! += * c_i + +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + +! hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e( h1, h2, p2, p1) +! hji = mo_bi_ortho_tc_two_e_transp(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e_transp( h1, h2, p2, p1) + hij = mo_bi_ortho_tc_two_e_transp(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e_transp( p1, p2, h2, h1) + hji = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e( p1, p2, h2, h1) + if (hij == 0.d0.or.hji==0.d0) cycle + + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + + if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji + enddo + else ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + end if + end do + else ! if 2 alpha and 2 beta particles + h1 = h(1,1) + h2 = h(1,2) + !! + do j = 1,2 ! loop over all 4 combinations of one alpha and one beta particle + putj = p(j, 2) + if(bannedOrb(putj, 2)) cycle + p2 = p(turn2(j), 2) + do i = 1,2 + puti = p(i, 1) + if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle + p1 = p(turn2(i), 1) + ! hij = +! hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate +! hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2 ) +! hji = mo_bi_ortho_tc_two_e_transp(h1, h2, p1, p2 ) + hij = mo_bi_ortho_tc_two_e_transp(p1, p2 ,h1, h2 ) + hji = mo_bi_ortho_tc_two_e( p1, p2, h1, h2) + if (hij /= 0.d0.or.hji==0.d0) then + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + endif + end do + end do + end if + + else ! if holes are (a,a) or (b,b) + if(tip == 0) then ! if particles are (a,a,a,a) or (b,b,b,b) + h1 = h(1, ma) + h2 = h(2, ma) + !! + do i=1,3 + puti = p(i, ma) + if(bannedOrb(puti,ma)) cycle + do j=i+1,4 + putj = p(j, ma) + if(bannedOrb(putj,ma)) cycle + if(banned(puti,putj,1)) cycle + + i1 = turn2d(1, i, j) + i2 = turn2d(2, i, j) + p1 = p(i1, ma) + p2 = p(i2, ma) +! hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + hij = mo_bi_ortho_tc_two_e_transp(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e_transp(p1, p2, h2,h1 ) + hji = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p1, p2, h2,h1 ) + if (hij == 0.d0.or.hji == 0.d0) cycle + + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) +coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) +coefs(k,1) * hji + enddo + end do + end do + else if(tip == 3) then ! if particles are (a,a,a,b) (ma=1,mi=2) or (a,b,b,b) (ma=2,mi=1) + h1 = h(1, mi) + h2 = h(1, ma) + p1 = p(1, mi) + !! + do i=1,3 + puti = p(turn3(1,i), ma) + if(bannedOrb(puti,ma)) cycle + putj = p(turn3(2,i), ma) + if(bannedOrb(putj,ma)) cycle + if(banned(puti,putj,1)) cycle + p2 = p(i, ma) + +! hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + hij = mo_bi_ortho_tc_two_e_transp(p1, p2 ,h1, h2) + hji = mo_bi_ortho_tc_two_e(p1, p2,h1, h2 ) + if (hij == 0.d0) cycle + + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + if (puti < putj) then + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + else + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji + enddo + endif + end do + else ! tip == 4 (a,a,b,b) + puti = p(1, sp) + putj = p(2, sp) + if(.not. banned(puti,putj,1)) then + p1 = p(1, mi) + p2 = p(2, mi) + h1 = h(1, mi) + h2 = h(2, mi) + !! +! hij = (mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2)) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + hij = (mo_bi_ortho_tc_two_e_transp(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e_transp(p2,p1,h1, h2)) + hji = (mo_bi_ortho_tc_two_e(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e(p2,p1,h1, h2)) + if (hij /= 0.d0.or.hji==0.d0) then + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji* phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + end if + end if + end if + end if +end diff --git a/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f b/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f index 833cc0ea..ada19c6b 100644 --- a/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f @@ -67,6 +67,7 @@ subroutine tc_pt2 call pt2_alloc(pt2_data_err, N_states) call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) + call print_summary_tc(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, N_det, N_configuration, N_states, psi_s2) end diff --git a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f index 0b4345d5..0f785ba2 100644 --- a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f @@ -636,10 +636,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere negMask(i,2) = not(mask(i,2)) end do -! print*,'in selection ' do i = 1, N_sel -! call debug_det(det(1,1,i),N_int) -! print*,i,dabs(psi_selectors_coef_transp_tc(1,2,i) * psi_selectors_coef_transp_tc(1,1,i)) if(interesting(i) < 0) then stop 'prefetch interesting(i) and det(i)' endif @@ -691,11 +688,19 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere call get_mask_phase(psi_det_sorted_tc(1,1,interesting(i)), phasemask,N_int) if(nt == 4) then - call get_d2_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + if(transpose_two_e_int)then + call get_d2_new_transp(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + else + call get_d2_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + endif elseif(nt == 3) then call get_d1_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) else + if(transpose_two_e_int)then + call get_d0_transp (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + else call get_d0_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + endif endif elseif(nt == 4) then call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) @@ -887,79 +892,11 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d call diag_htilde_mu_mat_fock_bi_ortho(N_int, det, hmono, htwoe, hthree, hii) do istate = 1,N_states delta_E = E0(istate) - Hii + E_shift - double precision :: alpha_h_psi_tmp, psi_h_alpha_tmp, error - if(debug_tc_pt2 == 1)then !! Using the old version - psi_h_alpha = 0.d0 - alpha_h_psi = 0.d0 - do iii = 1, N_det_selectors - call htilde_mu_mat_opt_bi_ortho_tot(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_opt_bi_ortho_tot(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - call get_excitation_degree(psi_selectors(1,1,iii), det,degree,N_int) - if(degree == 0)then - print*,'problem !!!' - print*,'a determinant is already in the wave function !!' - print*,'it corresponds to the selector number ',iii - call debug_det(det,N_int) - stop - endif -! call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) -! call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - psi_h_alpha += i_h_alpha * psi_selectors_coef_tc(iii,2,1) ! left function - alpha_h_psi += alpha_h_i * psi_selectors_coef_tc(iii,1,1) ! right function - enddo - else if(debug_tc_pt2 == 2)then !! debugging the new version -! psi_h_alpha_tmp = 0.d0 -! alpha_h_psi_tmp = 0.d0 -! do iii = 1, N_det_selectors ! old version -! call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) -! call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) -! psi_h_alpha_tmp += i_h_alpha * psi_selectors_coef_tc(iii,1,1) ! left function -! alpha_h_psi_tmp += alpha_h_i * psi_selectors_coef_tc(iii,2,1) ! right function -! enddo - psi_h_alpha_tmp = mat_l(istate, p1, p2) ! new version - alpha_h_psi_tmp = mat_r(istate, p1, p2) ! new version - psi_h_alpha = 0.d0 - alpha_h_psi = 0.d0 - do iii = 1, N_det ! old version - call htilde_mu_mat_opt_bi_ortho_no_3e(psi_det(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_det(1,1,iii), N_int, alpha_h_i) - psi_h_alpha += i_h_alpha * psi_l_coef_bi_ortho(iii,1) ! left function - alpha_h_psi += alpha_h_i * psi_r_coef_bi_ortho(iii,1) ! right function - enddo - if(dabs(psi_h_alpha*alpha_h_psi/delta_E).gt.1.d-10)then - error = dabs(psi_h_alpha * alpha_h_psi - psi_h_alpha_tmp * alpha_h_psi_tmp)/dabs(psi_h_alpha * alpha_h_psi) - if(error.gt.1.d-2)then - call debug_det(det, N_int) - print*,'error =',error,psi_h_alpha * alpha_h_psi/delta_E,psi_h_alpha_tmp * alpha_h_psi_tmp/delta_E - print*,psi_h_alpha , alpha_h_psi - print*,psi_h_alpha_tmp , alpha_h_psi_tmp - print*,'selectors ' - do iii = 1, N_det_selectors ! old version - print*,'iii',iii,psi_selectors_coef_tc(iii,1,1),psi_selectors_coef_tc(iii,2,1) - call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - print*,i_h_alpha,alpha_h_i - call debug_det(psi_selectors(1,1,iii),N_int) - enddo -! print*,'psi_det ' -! do iii = 1, N_det! old version -! print*,'iii',iii,psi_l_coef_bi_ortho(iii,1),psi_r_coef_bi_ortho(iii,1) -! call debug_det(psi_det(1,1,iii),N_int) -! enddo - stop - endif - endif - else - psi_h_alpha = mat_l(istate, p1, p2) - alpha_h_psi = mat_r(istate, p1, p2) - endif + psi_h_alpha = mat_l(istate, p1, p2) + alpha_h_psi = mat_r(istate, p1, p2) val = 4.d0 * psi_h_alpha * alpha_h_psi tmp = dsqrt(delta_E * delta_E + val) -! if (delta_E < 0.d0) then -! tmp = -tmp -! endif e_pert(istate) = 0.25 * val / delta_E -! e_pert(istate) = 0.5d0 * (tmp - delta_E) if(dsqrt(tmp).gt.1.d-4.and.dabs(psi_h_alpha).gt.1.d-4)then coef(istate) = e_pert(istate) / psi_h_alpha else @@ -976,15 +913,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if(e_pert(istate).gt.0.d0)e_pert(istate)=0.d0 endif -! if(selection_tc == 1 )then -! if(e_pert(istate).lt.0.d0)then -! e_pert(istate) = 0.d0 -! endif -! else if(selection_tc == -1)then -! if(e_pert(istate).gt.0.d0)then -! e_pert(istate) = 0.d0 -! endif -! endif enddo diff --git a/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index 99a8de7e..bb5a89a1 100644 --- a/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -88,6 +88,9 @@ subroutine run_stochastic_cipsi call pt2_dealloc(pt2_data_err) call pt2_alloc(pt2_data, N_states) call pt2_alloc(pt2_data_err, N_states) + if(transpose_two_e_int)then + provide mo_bi_ortho_tc_two_e_transp + endif call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection ! stop diff --git a/plugins/local/fci_tc_bi/pt2_tc.irp.f b/plugins/local/fci_tc_bi/pt2_tc.irp.f index 390042bf..3c07e367 100644 --- a/plugins/local/fci_tc_bi/pt2_tc.irp.f +++ b/plugins/local/fci_tc_bi/pt2_tc.irp.f @@ -13,6 +13,8 @@ program tc_pt2_prog pruning = -1.d0 touch pruning + read_wf = .True. + touch read_wf ! pt2_relative_error = 0.01d0 ! touch pt2_relative_error diff --git a/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f b/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f index 4abdc25b..5a3971c5 100644 --- a/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f +++ b/plugins/local/tc_bi_ortho/e_corr_bi_ortho.irp.f @@ -27,7 +27,6 @@ if(degree == 1)then e_pt2_tc_bi_orth_single += coef_pt1 * htilde_ij else -! print*,'coef_pt1, e_pt2',coef_pt1,coef_pt1 * htilde_ij e_pt2_tc_bi_orth_double += coef_pt1 * htilde_ij endif endif diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index 1e89eaa4..39968ec8 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -184,12 +184,6 @@ doc: Read/Write normal_two_body_bi_orth from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None -[debug_tc_pt2] -type: integer -doc: If :: 1 then you compute the TC-PT2 the old way, :: 2 then you check with the new version but without three-body -interface: ezfio,provider,ocaml -default: -1 - [only_spin_tc_right] type: logical doc: If |true|, only the right part of WF is used to compute spin dens @@ -268,3 +262,8 @@ doc: Thresholds on the Imag part of TC energy interface: ezfio,provider,ocaml default: 1.e-7 +[transpose_two_e_int] +type: logical +doc: If |true|, you duplicate the two-electron TC integrals with the transpose matrix. Acceleates the PT2. +interface: ezfio,provider,ocaml +default: False From 18fd70f1b88ee4a412a351a92a98f4b1ef1ee3d0 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 7 May 2024 20:18:24 +0200 Subject: [PATCH 12/64] added get_d1_transp.irp.f --- .../cipsi_tc_bi_ortho/get_d1_transp.irp.f | 350 ++++++++++++++++++ .../local/cipsi_tc_bi_ortho/selection.irp.f | 6 +- 2 files changed, 355 insertions(+), 1 deletion(-) create mode 100644 plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f new file mode 100644 index 00000000..3c6cbf60 --- /dev/null +++ b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f @@ -0,0 +1,350 @@ +subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs) + !todo: indices should be okay for complex? + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states,2) + double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num) + double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision, external :: get_phase_bi + double precision, external :: mo_two_e_integral_complex + logical :: ok + + logical, allocatable :: lbanned(:,:) + integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j + integer :: hfix, pfix, h1, h2, p1, p2, ib, k, l, mm + + integer, parameter :: turn2(2) = (/2,1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + double precision, allocatable :: hij_cache(:,:) + double precision :: hij, tmp_rowij(N_states, mo_num), tmp_rowij2(N_states, mo_num),phase + double precision, allocatable :: hji_cache(:,:) + double precision :: hji, tmp_rowji(N_states, mo_num), tmp_rowji2(N_states, mo_num) +! PROVIDE mo_integrals_map N_int +! print*,'in get_d1_new' +! call debug_det(gen,N_int) +! print*,'coefs',coefs(1,:) + + allocate (lbanned(mo_num, 2)) + allocate (hij_cache(mo_num,2)) + allocate (hji_cache(mo_num,2)) + lbanned = bannedOrb + + do i=1, p(0,1) + lbanned(p(i,1), 1) = .true. + end do + do i=1, p(0,2) + lbanned(p(i,2), 2) = .true. + end do + + ma = 1 + if(p(0,2) >= 2) ma = 2 + mi = turn2(ma) + + bant = 1 + + if(sp == 3) then + !move MA + if(ma == 2) bant = 2 + puti = p(1,mi) + hfix = h(1,ma) + p1 = p(1,ma) + p2 = p(2,ma) + if(.not. bannedOrb(puti, mi)) then +! call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,p2) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,p1) + enddo + !! + tmp_rowij = 0.d0 + tmp_rowji = 0.d0 + do putj=1, hfix-1 + if(lbanned(putj, ma)) cycle + if(banned(putj, puti,bant)) cycle + hij = hij_cache(putj,1) - hij_cache(putj,2) + hji = hji_cache(putj,1) - hji_cache(putj,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) + enddo + endif + end do + do putj=hfix+1, mo_num + if(lbanned(putj, ma)) cycle + if(banned(putj, puti,bant)) cycle + hij = hij_cache(putj,2) - hij_cache(putj,1) + hji = hji_cache(putj,2) - hji_cache(putj,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) + enddo + endif + end do + + if(ma == 1) then + mat_r(1:N_states,1:mo_num,puti) = mat_r(1:N_states,1:mo_num,puti) + tmp_rowij(1:N_states,1:mo_num) + mat_l(1:N_states,1:mo_num,puti) = mat_l(1:N_states,1:mo_num,puti) + tmp_rowji(1:N_states,1:mo_num) + else + do l=1,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,puti,l) = mat_r(k,puti,l) + tmp_rowij(k,l) + mat_l(k,puti,l) = mat_l(k,puti,l) + tmp_rowji(k,l) + enddo + enddo + end if + + end if + + !MOVE MI + pfix = p(1,mi) + tmp_rowij = 0.d0 + tmp_rowij2 = 0.d0 + tmp_rowji = 0.d0 + tmp_rowji2 = 0.d0 +! call get_mo_two_e_integrals_complex(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p1) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p2) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,pfix,p1) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,pfix,p2) + enddo + putj = p1 + !! + do puti=1,mo_num !HOT + if(lbanned(puti,mi)) cycle + !p1 fixed + putj = p1 + if(.not. banned(putj,puti,bant)) then + hij = hij_cache(puti,2) + hji = hji_cache(puti,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) + enddo + endif + end if +! + putj = p2 + if(.not. banned(putj,puti,bant)) then + hij = hij_cache(puti,1) + hji = hji_cache(puti,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) + hij = hij * phase + hji = hji * phase + do k=1,N_states + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) + enddo + endif + end if + end do + + if(mi == 1) then + mat_r(:,:,p1) = mat_r(:,:,p1) + tmp_rowij(:,:) + mat_r(:,:,p2) = mat_r(:,:,p2) + tmp_rowij2(:,:) + mat_l(:,:,p1) = mat_l(:,:,p1) + tmp_rowji(:,:) + mat_l(:,:,p2) = mat_l(:,:,p2) + tmp_rowji2(:,:) + else + do l=1,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,p1,l) = mat_r(k,p1,l) + tmp_rowij(k,l) + mat_r(k,p2,l) = mat_r(k,p2,l) + tmp_rowij2(k,l) + mat_l(k,p1,l) = mat_l(k,p1,l) + tmp_rowji(k,l) + mat_l(k,p2,l) = mat_l(k,p2,l) + tmp_rowji2(k,l) + enddo + enddo + end if + + else ! sp /= 3 + + if(p(0,ma) == 3) then + do i=1,3 + hfix = h(1,ma) + puti = p(i, ma) + p1 = p(turn3(1,i), ma) + p2 = p(turn3(2,i), ma) +! call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,p2) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,p1) + enddo + !! + tmp_rowij = 0.d0 + tmp_rowji = 0.d0 + do putj=1,hfix-1 + if(banned(putj,puti,1)) cycle + if(lbanned(putj,ma)) cycle + hij = hij_cache(putj,1) - hij_cache(putj,2) + hji = hji_cache(putj,1) - hji_cache(putj,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + hij = hij * phase + hji = hji * phase + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) + endif + end do + do putj=hfix+1,mo_num + if(banned(putj,puti,1)) cycle + if(lbanned(putj,ma)) cycle + hij = hij_cache(putj,2) - hij_cache(putj,1) + hji = hji_cache(putj,2) - hji_cache(putj,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + hij = hij * phase + hji = hji * phase + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) + endif + end do + + mat_r(:, :puti-1, puti) = mat_r(:, :puti-1, puti) + tmp_rowij(:,:puti-1) + mat_l(:, :puti-1, puti) = mat_l(:, :puti-1, puti) + tmp_rowji(:,:puti-1) + do l=puti,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, l) = mat_r(k, puti,l) + tmp_rowij(k,l) + mat_l(k, puti, l) = mat_l(k, puti,l) + tmp_rowji(k,l) + enddo + enddo + end do + else + hfix = h(1,mi) + pfix = p(1,mi) + p1 = p(1,ma) + p2 = p(2,ma) + tmp_rowij = 0.d0 + tmp_rowij2 = 0.d0 + tmp_rowji = 0.d0 + tmp_rowji2 = 0.d0 +! call get_mo_two_e_integrals_complex(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,pfix) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,pfix) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,pfix) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,pfix) + enddo + putj = p2 + !! + do puti=1,mo_num + if(lbanned(puti,ma)) cycle + putj = p2 + if(.not. banned(puti,putj,1)) then + hij = hij_cache(puti,1) + hji = hji_cache(puti,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) + enddo + endif + end if + + putj = p1 + if(.not. banned(puti,putj,1)) then + hij = hij_cache(puti,2) + hji = hji_cache(puti,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) + hij = hij * phase + hji = hji * phase + do k=1,N_states + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) + enddo + endif + end if + end do + mat_r(:,:p2-1,p2) = mat_r(:,:p2-1,p2) + tmp_rowij(:,:p2-1) + mat_l(:,:p2-1,p2) = mat_l(:,:p2-1,p2) + tmp_rowji(:,:p2-1) + do l=p2,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,p2,l) = mat_r(k,p2,l) + tmp_rowij(k,l) + mat_l(k,p2,l) = mat_l(k,p2,l) + tmp_rowji(k,l) + enddo + enddo + mat_r(:,:p1-1,p1) = mat_r(:,:p1-1,p1) + tmp_rowij2(:,:p1-1) + mat_l(:,:p1-1,p1) = mat_l(:,:p1-1,p1) + tmp_rowji2(:,:p1-1) + do l=p1,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,p1,l) = mat_r(k,p1,l) + tmp_rowij2(k,l) + mat_l(k,p1,l) = mat_l(k,p1,l) + tmp_rowji2(k,l) + enddo + enddo + end if + end if + deallocate(lbanned,hij_cache, hji_cache) + + !! MONO + if(sp == 3) then + s1 = 1 + s2 = 2 + else + s1 = sp + s2 = sp + end if + + do i1=1,p(0,s1) + ib = 1 + if(s1 == s2) ib = i1+1 + do i2=ib,p(0,s2) + p1 = p(i1,s1) + p2 = p(i2,s2) + if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + ! gen is a selector; mask is ionized generator; det is alpha + ! hij is contribution to +! call i_h_j_complex(gen, det, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e(det, gen, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e(gen, det, N_int, hji) + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + ! take conjugate to get contribution to instead of +! mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * dconjg(hij) + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji + enddo + end do + end do +end + diff --git a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f index 0f785ba2..17d34f43 100644 --- a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f @@ -694,7 +694,11 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere call get_d2_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) endif elseif(nt == 3) then - call get_d1_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + if(transpose_two_e_int)then + call get_d1_transp(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + else + call get_d1_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + endif else if(transpose_two_e_int)then call get_d0_transp (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) From 687259c25feb8ec568b31b89b760d2e08d07ad3a Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 7 May 2024 20:32:48 +0200 Subject: [PATCH 13/64] working on the matrix elements both --- .../local/bi_ort_ints/total_twoe_pot.irp.f | 20 +++ plugins/local/slater_tc/slater_tc_opt.irp.f | 42 ++++++ .../slater_tc/slater_tc_opt_double.irp.f | 60 ++++++++ .../slater_tc/slater_tc_opt_single.irp.f | 142 ++++++++++++++++++ 4 files changed, 264 insertions(+) diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f index 71269fdc..e27fdb7f 100644 --- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f +++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f @@ -332,3 +332,23 @@ END_PROVIDER ! --- + BEGIN_PROVIDER [double precision, tc_2e_3idx_coulomb_integrals_transp , (mo_num,mo_num,mo_num)] +&BEGIN_PROVIDER [double precision, tc_2e_3idx_exchange_integrals_transp, (mo_num,mo_num,mo_num)] + + BEGIN_DOC + ! tc_2e_3idx_coulomb_integrals_transp (j,k,i) = + ! tc_2e_3idx_exchange_integrals_transp(j,k,i) = + END_DOC + implicit none + integer :: i, j, k + + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + tc_2e_3idx_coulomb_integrals_transp(j, k,i) = mo_bi_ortho_tc_two_e_transp(j ,k ,j ,i ) + tc_2e_3idx_exchange_integrals_transp(j,k,i) = mo_bi_ortho_tc_two_e_transp(k ,j ,j ,i ) + enddo + enddo + enddo + +END_PROVIDER diff --git a/plugins/local/slater_tc/slater_tc_opt.irp.f b/plugins/local/slater_tc/slater_tc_opt.irp.f index 59efc943..9ed2b389 100644 --- a/plugins/local/slater_tc/slater_tc_opt.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt.irp.f @@ -181,3 +181,45 @@ end ! --- +subroutine htilde_mu_mat_opt_bi_ortho_no_3e_both(key_j, key_i, Nint, htot) + + BEGIN_DOC + ! + ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis + !! + ! Returns the detail of the matrix element WITHOUT ANY CONTRIBUTION FROM THE THREE ELECTRON TERMS + !! WARNING !! + ! + ! Non hermitian !! + ! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) + double precision, intent(out) :: htot + integer :: degree + + htot = 0.d0 + + call get_excitation_degree(key_i, key_j, degree, Nint) + if(degree.gt.2) return + + if(degree == 0) then + call diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_i,htot) + else if (degree == 1) then + call single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint,key_j, key_i , htot) + else if(degree == 2) then + call double_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, htot) + endif + + if(degree==0) then + htot += nuclear_repulsion + endif + +end + +! --- + diff --git a/plugins/local/slater_tc/slater_tc_opt_double.irp.f b/plugins/local/slater_tc/slater_tc_opt_double.irp.f index 4067473c..181ae11d 100644 --- a/plugins/local/slater_tc/slater_tc_opt_double.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt_double.irp.f @@ -505,3 +505,63 @@ subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot) end +subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hji,hij) + + BEGIN_DOC + ! and for double excitation ONLY FOR ONE- AND TWO-BODY TERMS + !! + !! WARNING !! + ! + ! Non hermitian !! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2) + double precision, intent(out) :: hji,hij + double precision :: hmono, htwoe_ji, htwoe_ij + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk + integer :: degree,exc(0:2,2,2) + integer :: h1, p1, h2, p2, s1, s2 + double precision :: get_mo_two_e_integral_tc_int,phase + + + call get_excitation_degree(key_i, key_j, degree, Nint) + + hmono = 0.d0 + htwoe_ji = 0.d0 + htwoe_ij = 0.d0 + hji = 0.d0 + hij = 0.d0 + + if(degree.ne.2)then + return + endif + integer :: degree_i,degree_j + call get_excitation_degree(ref_bitmask,key_i,degree_i,N_int) + call get_excitation_degree(ref_bitmask,key_j,degree_j,N_int) + call get_double_excitation(key_i, key_j, exc, phase, Nint) + call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) + + if(s1.ne.s2)then + ! opposite spin two-body + htwoe_ji = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + htwoe_ij = mo_bi_ortho_tc_two_e_transp(p2,p1,h2,h1) + else + ! same spin two-body + ! direct terms + htwoe_ji = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + htwoe_ij = mo_bi_ortho_tc_two_e_transp(p2,p1,h2,h1) + ! exchange terms + htwoe_ji -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) + htwoe_ij -= mo_bi_ortho_tc_two_e_transp(p1,p2,h2,h1) + endif + htwoe_ji *= phase + hji = htwoe_ji + htwoe_ij *= phase + hij = htwoe_ij + +end diff --git a/plugins/local/slater_tc/slater_tc_opt_single.irp.f b/plugins/local/slater_tc/slater_tc_opt_single.irp.f index e57cb05c..3f4e17e2 100644 --- a/plugins/local/slater_tc/slater_tc_opt_single.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt_single.irp.f @@ -618,3 +618,145 @@ subroutine get_single_excitation_from_fock_tc_no_3e(Nint, key_i, key_j, h, p, sp end + +subroutine single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hji,hij) + + BEGIN_DOC + ! and for single excitation ONLY FOR ONE- AND TWO-BODY TERMS + !! + !! WARNING !! + ! + ! Non hermitian !! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2) + double precision, intent(out) :: hji,hij + + double precision :: hmono, htwoe + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk + integer :: degree,exc(0:2,2,2) + integer :: h1, p1, h2, p2, s1, s2 + double precision :: get_mo_two_e_integral_tc_int, phase + double precision :: direct_int, exchange_int_12, exchange_int_23, exchange_int_13 + integer :: other_spin(2) + integer(bit_kind) :: key_j_core(Nint,2), key_i_core(Nint,2) + + other_spin(1) = 2 + other_spin(2) = 1 + + hmono = 0.d0 + htwoe = 0.d0 + hji = 0.d0 + hji = 0.d0 + call get_excitation_degree(key_i, key_j, degree, Nint) + if(degree.ne.1)then + return + endif + call bitstring_to_list_ab(key_i, occ, Ne, Nint) + + call get_single_excitation(key_i, key_j, exc, phase, Nint) + call decode_exc(exc,1,h1,p1,h2,p2,s1,s2) + call get_single_excitation_from_fock_tc_no_3e_both(Nint, key_i, key_j, h1, p1, s1, phase, hmono, htwoe, hji,hij) + +end + +! --- + +subroutine get_single_excitation_from_fock_tc_no_3e_both(Nint, key_i, key_j, h, p, spin, phase, hji,hij) + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer, intent(in) :: h, p, spin + double precision, intent(in) :: phase + integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) + double precision, intent(out) :: hji,hij + double precision :: hmono_ji,htwoe_ji + double precision :: hmono_ij,htwoe_ij + + integer(bit_kind) :: differences(Nint,2) + integer(bit_kind) :: hole(Nint,2) + integer(bit_kind) :: partcl(Nint,2) + integer :: occ_hole(Nint*bit_kind_size,2) + integer :: occ_partcl(Nint*bit_kind_size,2) + integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2) + integer :: i0,i + double precision :: buffer_c_ji(mo_num), buffer_x_ji(mo_num) + double precision :: buffer_c_ij(mo_num), buffer_x_ij(mo_num) + + do i = 1, mo_num + buffer_c_ji(i) = tc_2e_3idx_coulomb_integrals(i,p,h) + buffer_x_ji(i) = tc_2e_3idx_exchange_integrals(i,p,h) + buffer_c_ij(i) = tc_2e_3idx_coulomb_integrals_transp(i,p,h) + buffer_x_ij(i) = tc_2e_3idx_exchange_integrals_transp(i,p,h) + enddo + + do i = 1, Nint + differences(i,1) = xor(key_i(i,1),ref_closed_shell_bitmask(i,1)) + differences(i,2) = xor(key_i(i,2),ref_closed_shell_bitmask(i,2)) + hole(i,1) = iand(differences(i,1),ref_closed_shell_bitmask(i,1)) + hole(i,2) = iand(differences(i,2),ref_closed_shell_bitmask(i,2)) + partcl(i,1) = iand(differences(i,1),key_i(i,1)) + partcl(i,2) = iand(differences(i,2),key_i(i,2)) + enddo + + call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, Nint) + call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, Nint) + hmono_ji = mo_bi_ortho_tc_one_e(p,h) + htwoe_ji = fock_op_2_e_tc_closed_shell(p,h) + hmono_ij = mo_bi_ortho_tc_one_e(h,p) + htwoe_ij = fock_op_2_e_tc_closed_shell(h,p) + + ! holes :: direct terms + do i0 = 1, n_occ_ab_hole(1) + i = occ_hole(i0,1) + htwoe_ji -= buffer_c_ji(i) + htwoe_ij -= buffer_c_ij(i) + enddo + do i0 = 1, n_occ_ab_hole(2) + i = occ_hole(i0,2) + htwoe_ji -= buffer_c_ji(i) + htwoe_ij -= buffer_c_ij(i) + enddo + + ! holes :: exchange terms + do i0 = 1, n_occ_ab_hole(spin) + i = occ_hole(i0,spin) + htwoe_ji += buffer_x_ji(i) + htwoe_ij += buffer_x_ij(i) + enddo + + ! particles :: direct terms + do i0 = 1, n_occ_ab_partcl(1) + i = occ_partcl(i0,1) + htwoe_ji += buffer_c_ji(i) + htwoe_ij += buffer_c_ij(i) + enddo + do i0 = 1, n_occ_ab_partcl(2) + i = occ_partcl(i0,2) + htwoe_ji += buffer_c_ji(i) + htwoe_ij += buffer_c_ij(i) + enddo + + ! particles :: exchange terms + do i0 = 1, n_occ_ab_partcl(spin) + i = occ_partcl(i0,spin) + htwoe_ji -= buffer_x_ji(i) + htwoe_ij -= buffer_x_ij(i) + enddo + htwoe_ji = htwoe_ji * phase + hmono_ji = hmono_ji * phase + hji = htwoe_ji + hmono_ji + + htwoe_ij = htwoe_ij * phase + hmono_ij = hmono_ij * phase + hij = htwoe_ij + hmono_ij + +end + From 42fdb3c4350c0452a7169614ff9dba4e0e381f62 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 7 May 2024 20:52:10 +0200 Subject: [PATCH 14/64] it works with new routines for pt2 tc --- .../cipsi_tc_bi_ortho/get_d0_transp.irp.f | 54 ++++--------------- .../cipsi_tc_bi_ortho/get_d1_transp.irp.f | 4 +- plugins/local/slater_tc/slater_tc_opt.irp.f | 17 +++--- .../slater_tc/slater_tc_opt_single.irp.f | 4 +- 4 files changed, 25 insertions(+), 54 deletions(-) diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f index 56238e13..f149e7c6 100644 --- a/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f @@ -45,33 +45,16 @@ subroutine get_d0_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) ! call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this ! call i_h_j_complex(det, gen, N_int, hij) - call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e_both(det,gen,N_int, hij,hji) else phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) hij = hij_cache1(p2) * phase + hji = hji_cache1(p2) * phase end if - if (hij == (0.d0,0.d0)) cycle + if (hij == 0.d0.or.hji == 0.d0) cycle !DIR$ LOOP COUNT AVG(4) do k=1,N_states mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij ! HOTSPOT - enddo - end do - !!!!!!!!!! - do p2=1, mo_num - if(bannedOrb(p2,2)) cycle - if(banned(p1, p2, bant)) cycle ! rentable? - if(p1 == h1 .or. p2 == h2) then - call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) - ! call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this -! call i_h_j_complex(det, gen, N_int, hij) - call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji) - else - phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) - hji = hji_cache1(p2) * phase - end if - if (hji == (0.d0,0.d0)) cycle - !DIR$ LOOP COUNT AVG(4) - do k=1,N_states mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji ! HOTSPOT enddo end do @@ -98,40 +81,25 @@ subroutine get_d0_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) !call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this ! call i_h_j_complex(det, gen, N_int, hij) - call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij) - if (hij == 0.d0) cycle + call htilde_mu_mat_opt_bi_ortho_no_3e_both(det,gen,N_int, hij,hji) + if (hij == 0.d0.or.hji == 0.d0) cycle else ! hij = (mo_two_e_integral_complex(p1, p2, puti, putj) - mo_two_e_integral_complex(p2, p1, puti, putj)) ! hij = (mo_bi_ortho_tc_two_e(p1, p2, puti, putj) - mo_bi_ortho_tc_two_e(p2, p1, puti, putj)) hij = (mo_bi_ortho_tc_two_e(puti, putj, p1, p2) - mo_bi_ortho_tc_two_e(puti, putj, p2, p1)) - if (hij == 0.d0) cycle - hij = (hij) * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + hji = (mo_bi_ortho_tc_two_e_transp(puti, putj, p1, p2) - mo_bi_ortho_tc_two_e_transp(puti, putj, p2, p1)) + if (hij == 0.d0.or.hji == 0.d0) cycle + phase = get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + hij = (hij) * phase + hji = (hji) * phase end if !DIR$ LOOP COUNT AVG(4) do k=1,N_states mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij - enddo - end do - - !!!!!!!!!! - do putj=puti+1, mo_num - if(bannedOrb(putj, sp)) cycle - if(banned(puti, putj, bant)) cycle ! rentable? - if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then - call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) - call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji) - if (hji == 0.d0) cycle - else -! hji = (mo_bi_ortho_tc_two_e( p1, p2, puti, putj) - mo_bi_ortho_tc_two_e( p2, p1, puti, putj)) - hji = (mo_bi_ortho_tc_two_e_transp(puti, putj, p1, p2 ) - mo_bi_ortho_tc_two_e_transp( puti, putj, p2, p1)) - if (hji == 0.d0) cycle - hji = (hji) * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) - end if - !DIR$ LOOP COUNT AVG(4) - do k=1,N_states mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo end do + end do end if diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f index 3c6cbf60..84a1ce24 100644 --- a/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f @@ -335,8 +335,8 @@ subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, ! gen is a selector; mask is ionized generator; det is alpha ! hij is contribution to ! call i_h_j_complex(gen, det, N_int, hij) - call htilde_mu_mat_opt_bi_ortho_no_3e(det, gen, N_int, hij) - call htilde_mu_mat_opt_bi_ortho_no_3e(gen, det, N_int, hji) + call htilde_mu_mat_opt_bi_ortho_no_3e_both(det, gen, N_int, hij,hji) +! call htilde_mu_mat_opt_bi_ortho_no_3e(gen, det, N_int, hji) !DIR$ LOOP COUNT AVG(4) do k=1,N_states ! take conjugate to get contribution to instead of diff --git a/plugins/local/slater_tc/slater_tc_opt.irp.f b/plugins/local/slater_tc/slater_tc_opt.irp.f index 9ed2b389..5651a299 100644 --- a/plugins/local/slater_tc/slater_tc_opt.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt.irp.f @@ -181,7 +181,7 @@ end ! --- -subroutine htilde_mu_mat_opt_bi_ortho_no_3e_both(key_j, key_i, Nint, htot) +subroutine htilde_mu_mat_opt_bi_ortho_no_3e_both(key_j, key_i, Nint, hji,hij) BEGIN_DOC ! @@ -199,24 +199,27 @@ subroutine htilde_mu_mat_opt_bi_ortho_no_3e_both(key_j, key_i, Nint, htot) implicit none integer, intent(in) :: Nint integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) - double precision, intent(out) :: htot + double precision, intent(out) :: hji,hij integer :: degree - htot = 0.d0 + hji = 0.d0 + hij = 0.d0 call get_excitation_degree(key_i, key_j, degree, Nint) if(degree.gt.2) return if(degree == 0) then - call diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_i,htot) + call diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_i,hji) + hij = hji else if (degree == 1) then - call single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint,key_j, key_i , htot) + call single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint,key_j, key_i , hji,hij) else if(degree == 2) then - call double_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, htot) + call double_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hji,hij) endif if(degree==0) then - htot += nuclear_repulsion + hji += nuclear_repulsion + hij += nuclear_repulsion endif end diff --git a/plugins/local/slater_tc/slater_tc_opt_single.irp.f b/plugins/local/slater_tc/slater_tc_opt_single.irp.f index 3f4e17e2..47bcbe34 100644 --- a/plugins/local/slater_tc/slater_tc_opt_single.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt_single.irp.f @@ -652,7 +652,7 @@ subroutine single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hj hmono = 0.d0 htwoe = 0.d0 hji = 0.d0 - hji = 0.d0 + hij = 0.d0 call get_excitation_degree(key_i, key_j, degree, Nint) if(degree.ne.1)then return @@ -661,7 +661,7 @@ subroutine single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hj call get_single_excitation(key_i, key_j, exc, phase, Nint) call decode_exc(exc,1,h1,p1,h2,p2,s1,s2) - call get_single_excitation_from_fock_tc_no_3e_both(Nint, key_i, key_j, h1, p1, s1, phase, hmono, htwoe, hji,hij) + call get_single_excitation_from_fock_tc_no_3e_both(Nint, key_i, key_j, h1, p1, s1, phase, hji,hij) end From a38bf00975365cc755fc7c8c24e9e74c02cd2a00 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 8 May 2024 17:26:48 +0200 Subject: [PATCH 15/64] updated default keywords in tc_keywords and ao_twoe_e_ints --- .../cipsi_tc_bi_ortho/get_d1_transp.irp.f | 34 ++++++++++++------- plugins/local/cipsi_tc_bi_ortho/pt2.irp.f | 3 ++ .../local/cipsi_tc_bi_ortho/selection.irp.f | 6 ++-- .../cipsi_tc_bi_ortho/stochastic_cipsi.irp.f | 2 +- .../local/tc_bi_ortho/diagonalize_tc_h.irp.f | 34 +++++++++---------- plugins/local/tc_keywords/EZFIO.cfg | 4 +-- src/ao_two_e_ints/EZFIO.cfg | 6 ++-- 7 files changed, 49 insertions(+), 40 deletions(-) diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f index 84a1ce24..a3d7b076 100644 --- a/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f @@ -16,7 +16,7 @@ subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, logical :: ok logical, allocatable :: lbanned(:,:) - integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j + integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j, istate integer :: hfix, pfix, h1, h2, p1, p2, ib, k, l, mm integer, parameter :: turn2(2) = (/2,1/) @@ -65,10 +65,12 @@ subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1) hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,p2) hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,p1) + do istate = 1,N_states + tmp_rowij(istate,mm) = 0.d0 + tmp_rowji(istate,mm) = 0.d0 + enddo enddo !! - tmp_rowij = 0.d0 - tmp_rowji = 0.d0 do putj=1, hfix-1 if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle @@ -119,13 +121,15 @@ subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, !MOVE MI pfix = p(1,mi) - tmp_rowij = 0.d0 - tmp_rowij2 = 0.d0 - tmp_rowji = 0.d0 - tmp_rowji2 = 0.d0 ! call get_mo_two_e_integrals_complex(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) ! call get_mo_two_e_integrals_complex(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) do mm = 1, mo_num + do istate = 1,N_states + tmp_rowij(istate,mm) = 0.d0 + tmp_rowij2(istate,mm) = 0.d0 + tmp_rowji(istate,mm) = 0.d0 + tmp_rowji2(istate,mm) = 0.d0 + enddo hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p1) hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p2) hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,pfix,p1) @@ -200,10 +204,12 @@ subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1) hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,p2) hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,p1) + do istate = 1, N_states + tmp_rowij(istate,mm) = 0.d0 + tmp_rowji(istate,mm) = 0.d0 + enddo enddo !! - tmp_rowij = 0.d0 - tmp_rowji = 0.d0 do putj=1,hfix-1 if(banned(putj,puti,1)) cycle if(lbanned(putj,ma)) cycle @@ -246,10 +252,6 @@ subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, pfix = p(1,mi) p1 = p(1,ma) p2 = p(2,ma) - tmp_rowij = 0.d0 - tmp_rowij2 = 0.d0 - tmp_rowji = 0.d0 - tmp_rowji2 = 0.d0 ! call get_mo_two_e_integrals_complex(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) ! call get_mo_two_e_integrals_complex(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) do mm = 1, mo_num @@ -257,6 +259,12 @@ subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,pfix) hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,pfix) hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,pfix) + do istate = 1,N_states + tmp_rowij (istate,mm) = 0.d0 + tmp_rowij2(istate,mm) = 0.d0 + tmp_rowji (istate,mm) = 0.d0 + tmp_rowji2(istate,mm) = 0.d0 + enddo enddo putj = p2 !! diff --git a/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f b/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f index ada19c6b..22381991 100644 --- a/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f @@ -65,6 +65,9 @@ subroutine tc_pt2 call pt2_dealloc(pt2_data_err) call pt2_alloc(pt2_data, N_states) call pt2_alloc(pt2_data_err, N_states) + if(transpose_two_e_int)then + provide mo_bi_ortho_tc_two_e_transp tc_2e_3idx_coulomb_integrals_transp + endif call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) call print_summary_tc(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, N_det, N_configuration, N_states, psi_s2) diff --git a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f index 17d34f43..72ccf9c4 100644 --- a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f @@ -691,19 +691,19 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere if(transpose_two_e_int)then call get_d2_new_transp(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) else - call get_d2_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + call get_d2_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) endif elseif(nt == 3) then if(transpose_two_e_int)then call get_d1_transp(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) else - call get_d1_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + call get_d1_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) endif else if(transpose_two_e_int)then call get_d0_transp (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) else - call get_d0_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + call get_d0_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) endif endif elseif(nt == 4) then diff --git a/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index bb5a89a1..e363830d 100644 --- a/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -89,7 +89,7 @@ subroutine run_stochastic_cipsi call pt2_alloc(pt2_data, N_states) call pt2_alloc(pt2_data_err, N_states) if(transpose_two_e_int)then - provide mo_bi_ortho_tc_two_e_transp + provide mo_bi_ortho_tc_two_e_transp tc_2e_3idx_coulomb_integrals_transp endif call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection ! stop diff --git a/plugins/local/tc_bi_ortho/diagonalize_tc_h.irp.f b/plugins/local/tc_bi_ortho/diagonalize_tc_h.irp.f index 398e96db..03388898 100644 --- a/plugins/local/tc_bi_ortho/diagonalize_tc_h.irp.f +++ b/plugins/local/tc_bi_ortho/diagonalize_tc_h.irp.f @@ -35,8 +35,8 @@ program tc_bi_ortho print*, ' nb of det = ', N_det call routine_diag() - call write_tc_energy() - call save_tc_bi_ortho_wavefunction() +! call write_tc_energy() +! call save_tc_bi_ortho_wavefunction() end @@ -76,28 +76,26 @@ subroutine routine_diag() PROVIDE noL_2e endif - PROVIDE htilde_matrix_elmt_bi_ortho - return if(N_states .eq. 1) then print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1) - print*,'e_tc_left_right = ',e_tc_left_right - print*,'e_tilde_bi_orth_00 = ',e_tilde_bi_orth_00 - print*,'e_pt2_tc_bi_orth = ',e_pt2_tc_bi_orth - print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single - print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double - print*,'***' - print*,'e_corr_bi_orth = ',e_corr_bi_orth - print*,'e_corr_bi_orth_proj = ',e_corr_bi_orth_proj - print*,'e_corr_bi_orth_proj_abs = ',e_corr_bi_orth_proj_abs - print*,'e_corr_single_bi_orth = ',e_corr_single_bi_orth - print*,'e_corr_double_bi_orth = ',e_corr_double_bi_orth - print*,'e_corr_single_bi_orth_abs = ',e_corr_single_bi_orth_abs - print*,'e_corr_double_bi_orth_abs = ',e_corr_double_bi_orth_abs +! print*,'e_tc_left_right = ',e_tc_left_right +! print*,'e_tilde_bi_orth_00 = ',e_tilde_bi_orth_00 +! print*,'e_pt2_tc_bi_orth = ',e_pt2_tc_bi_orth +! print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single +! print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double +! print*,'***' +! print*,'e_corr_bi_orth = ',e_corr_bi_orth +! print*,'e_corr_bi_orth_proj = ',e_corr_bi_orth_proj +! print*,'e_corr_bi_orth_proj_abs = ',e_corr_bi_orth_proj_abs +! print*,'e_corr_single_bi_orth = ',e_corr_single_bi_orth +! print*,'e_corr_double_bi_orth = ',e_corr_double_bi_orth +! print*,'e_corr_single_bi_orth_abs = ',e_corr_single_bi_orth_abs +! print*,'e_corr_double_bi_orth_abs = ',e_corr_double_bi_orth_abs print*,'Left/right eigenvectors' do i = 1,N_det - write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1) + write(*,'(I6,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1) enddo else diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index 39968ec8..d764224a 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -14,7 +14,7 @@ default: False type: logical doc: If |true|, three-body terms are included interface: ezfio,provider,ocaml -default: True +default: False [three_e_3_idx_term] type: logical @@ -50,7 +50,7 @@ default: False type: logical doc: If |true|, standard normal-ordering for L (to be used with three_body_h_tc |false|) interface: ezfio,provider,ocaml -default: False +default: True [core_tc_op] type: logical diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index ff932b0c..c2e083a3 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -25,16 +25,16 @@ default: 1.e-12 [do_direct_integrals] type: logical -doc: Compute integrals on the fly (very slow, only for debugging) +doc: Compute integrals on the fly (Useful only for Cholesky decomposition) interface: ezfio,provider,ocaml -default: False +default: True ezfio_name: direct [do_ao_cholesky] type: logical doc: Perform Cholesky decomposition of AO integrals interface: ezfio,provider,ocaml -default: False +default: True [io_ao_two_e_integrals_erf] type: Disk_access From 2af293fd291481896cd5114df6f653ca0f04f797 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 8 May 2024 17:38:54 +0200 Subject: [PATCH 16/64] minor modifs in BH jastrows --- .../local/non_h_ints_mu/jast_deriv_utils_vect.irp.f | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f index db06e835..09bb6528 100644 --- a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f +++ b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f @@ -340,8 +340,8 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) endif tmp1 = double_p(mpA) * f1A_power(mpA-1) * f2A_power(npA) + double_p(npA) * f1A_power(npA-1) * f2A_power(mpA) - tmp1 = tmp1 * g12_power(opA) - tmp2 = double_p(opA) * g12_power(opA-1) * (f1A_power(mpA) * f2A_power(npA) + f1A_power(npA) * f2A_power(mpA)) + tmp1 = tmp1 * g12_power(opA) * tmp + tmp2 = double_p(opA) * g12_power(opA-1) * (f1A_power(mpA) * f2A_power(npA) + f1A_power(npA) * f2A_power(mpA)) * tmp !tmp1 = 0.d0 !if(mpA .gt. 0) then @@ -356,9 +356,12 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) ! tmp2 = tmp2 + dble(opA) * g12**dble(opA-1) * (f1A**dble(mpA) * f2A**dble(npA) + f1A**dble(npA) * f2A**dble(mpA)) !endif - gradx(jpoint) = gradx(jpoint) + tmp * (tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1)) - grady(jpoint) = grady(jpoint) + tmp * (tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2)) - gradz(jpoint) = gradz(jpoint) + tmp * (tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3)) +! gradx(jpoint) = gradx(jpoint) + tmp * (tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1)) +! grady(jpoint) = grady(jpoint) + tmp * (tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2)) +! gradz(jpoint) = gradz(jpoint) + tmp * (tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3)) + gradx(jpoint) = gradx(jpoint) + tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1) + grady(jpoint) = grady(jpoint) + tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2) + gradz(jpoint) = gradz(jpoint) + tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3) enddo ! p enddo ! i_nucl enddo ! jpoint From 812e75982b96959485b8f7ccd333ef3a1b1b570b Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 10 May 2024 17:23:51 +0200 Subject: [PATCH 17/64] minor modifs in plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f --- .../non_h_ints_mu/jast_deriv_utils_vect.irp.f | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f index 09bb6528..2c41b535 100644 --- a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f +++ b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f @@ -867,19 +867,20 @@ subroutine jBH_elem_fct_grad(alpha, r1, r2, fct, grad1_fct) + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) - tmp1 = 1.d0 / (1.d0 + alpha * dist) - fct = alpha * dist * tmp1 - - if(dist .lt. 1d-10) then - grad1_fct(1) = 0.d0 - grad1_fct(2) = 0.d0 - grad1_fct(3) = 0.d0 - else + if(dist .ge. 1d-10) then + tmp1 = 1.d0 / (1.d0 + alpha * dist) + + fct = alpha * dist * tmp1 tmp2 = alpha * tmp1 * tmp1 / dist grad1_fct(1) = tmp2 * (r1(1) - r2(1)) grad1_fct(2) = tmp2 * (r1(2) - r2(2)) grad1_fct(3) = tmp2 * (r1(3) - r2(3)) + else + grad1_fct(1) = 0.d0 + grad1_fct(2) = 0.d0 + grad1_fct(3) = 0.d0 + fct = 0.d0 endif return From 8eea5d7f7f142103998d8bfa1b3bcc630935f69b Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 15 May 2024 15:41:35 +0200 Subject: [PATCH 18/64] fixed a bug in cholesk_ao_transp --- .../tuto_plugins/tuto_I/test_cholesky.irp.f | 53 +++++++++++++++++++ src/ao_two_e_ints/cholesky.irp.f | 2 +- 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 plugins/local/tuto_plugins/tuto_I/test_cholesky.irp.f diff --git a/plugins/local/tuto_plugins/tuto_I/test_cholesky.irp.f b/plugins/local/tuto_plugins/tuto_I/test_cholesky.irp.f new file mode 100644 index 00000000..d09d100a --- /dev/null +++ b/plugins/local/tuto_plugins/tuto_I/test_cholesky.irp.f @@ -0,0 +1,53 @@ +program my_program_to_print_stuffs + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + integer :: i,j,k,l,m + double precision :: integral, accu, accu_tot, integral_cholesky + double precision :: get_ao_two_e_integral, get_two_e_integral ! declaration of the functions + print*,'AO integrals, physicist notations : ' + accu_tot = 0.D0 + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + integral = get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + integral_cholesky = 0.D0 + do m = 1, cholesky_ao_num + integral_cholesky += cholesky_ao_transp(m,i,k) * cholesky_ao_transp(m,j,l) + enddo + accu = dabs(integral_cholesky-integral) + accu_tot += accu + if(accu.gt.1.d-10)then + print*,i,j,k,l + print*,accu, integral, integral_cholesky + endif + enddo + enddo + enddo + enddo + print*,'accu_tot',accu_tot + + print*,'MO integrals, physicist notations : ' + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + integral = get_two_e_integral(i, j, k, l, mo_integrals_map) + accu = 0.D0 + integral_cholesky = 0.D0 + do m = 1, cholesky_mo_num + integral_cholesky += cholesky_mo_transp(m,i,k) * cholesky_mo_transp(m,j,l) + enddo + accu = dabs(integral_cholesky-integral) + accu_tot += accu + if(accu.gt.1.d-10)then + print*,i,j,k,l + print*,accu, integral, integral_cholesky + endif + enddo + enddo + enddo + enddo +end diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 33304026..5fbd166c 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -6,7 +6,7 @@ BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, integer :: i,j,k do j=1,ao_num do i=1,ao_num - do k=1,ao_num + do k=1,cholesky_ao_num cholesky_ao_transp(k,i,j) = cholesky_ao(i,j,k) enddo enddo From c6a61639445229eca3ecb2e32556ddef646064d6 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 16 May 2024 17:57:00 +0200 Subject: [PATCH 19/64] added f_hf with cholesky by default --- src/dft_utils_in_r/mo_in_r.irp.f | 2 +- src/mu_of_r/f_cholesky.irp.f | 221 +++++++++++++++++++++++++++ src/mu_of_r/mu_of_r_conditions.irp.f | 46 +++++- 3 files changed, 264 insertions(+), 5 deletions(-) create mode 100644 src/mu_of_r/f_cholesky.irp.f diff --git a/src/dft_utils_in_r/mo_in_r.irp.f b/src/dft_utils_in_r/mo_in_r.irp.f index 192cb25a..ad931402 100644 --- a/src/dft_utils_in_r/mo_in_r.irp.f +++ b/src/dft_utils_in_r/mo_in_r.irp.f @@ -48,7 +48,7 @@ integer :: i,j do i = 1, n_points_final_grid do j = 1, mo_num - mos_in_r_array_transp(i,j) = mos_in_r_array(j,i) + mos_in_r_array_transp(i,j) = mos_in_r_array_omp(j,i) enddo enddo END_PROVIDER diff --git a/src/mu_of_r/f_cholesky.irp.f b/src/mu_of_r/f_cholesky.irp.f new file mode 100644 index 00000000..1ad4ce36 --- /dev/null +++ b/src/mu_of_r/f_cholesky.irp.f @@ -0,0 +1,221 @@ +BEGIN_PROVIDER [integer, list_couple_orb_r1, (2,n_couple_orb_r1)] + implicit none + integer :: ii,i,mm,m,itmp + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + list_couple_orb_r1(1,itmp) = i + list_couple_orb_r1(2,itmp) = m + enddo + enddo +END_PROVIDER + + +BEGIN_PROVIDER [integer, list_couple_orb_r2, (2,n_couple_orb_r2)] + implicit none + integer :: ii,i,mm,m,itmp + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + list_couple_orb_r2(1,itmp) = i + list_couple_orb_r2(2,itmp) = m + enddo + enddo +END_PROVIDER + + +BEGIN_PROVIDER [integer, n_couple_orb_r1] + implicit none + BEGIN_DOC + ! number of couples of alpha occupied times any basis orbital + END_DOC + n_couple_orb_r1 = n_occ_val_orb_for_hf(1) * n_basis_orb +END_PROVIDER + +BEGIN_PROVIDER [integer, n_couple_orb_r2] + implicit none + BEGIN_DOC + ! number of couples of beta occupied times any basis orbital + END_DOC + n_couple_orb_r2 = n_occ_val_orb_for_hf(2) * n_basis_orb +END_PROVIDER + +BEGIN_PROVIDER [ double precision, mos_times_cholesky_r1, (cholesky_mo_num,n_points_final_grid)] + implicit none + BEGIN_DOC + ! V1_AR = \sum_{I}V_AI Phi_IR where "R" specifies the index of the grid point and A the number of cholesky point + ! + ! here Phi_IR is phi_i(R)xphi_b(R) for r1 and V_AI = (ib|A) chollesky vector + END_DOC + double precision, allocatable :: mos_ib_r1(:,:),mo_chol_r1(:,:) + double precision, allocatable :: test(:,:) + double precision :: mo_i_r1,mo_b_r1 + integer :: ii,i,mm,m,itmp,ipoint,ll + allocate(mos_ib_r1(n_couple_orb_r1,n_points_final_grid)) + allocate(mo_chol_r1(cholesky_mo_num,n_couple_orb_r1)) + + do ipoint = 1, n_points_final_grid + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + itmp += 1 + mos_ib_r1(itmp,ipoint) = mo_i_r1 * mo_b_r1 + enddo + enddo + enddo + + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + do ll = 1, cholesky_mo_num + mo_chol_r1(ll,itmp) = cholesky_mo_transp(ll,m,i) + enddo + enddo + enddo + + call get_AB_prod(mo_chol_r1,cholesky_mo_num,n_couple_orb_r1,mos_ib_r1,n_points_final_grid,mos_times_cholesky_r1) + allocate(test(cholesky_mo_num,n_points_final_grid)) + test = 0.d0 + do ipoint = 1, n_points_final_grid + do itmp = 1, n_couple_orb_r1 + i = list_couple_orb_r1(1,itmp) + m = list_couple_orb_r1(2,itmp) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + do mm = 1, cholesky_mo_num + test(mm,ipoint) += mo_i_r1 * mo_b_r1 * mo_chol_r1(mm,itmp) + enddo + enddo + enddo + double precision :: accu + accu = 0.d0 + do ipoint = 1, n_points_final_grid + do mm = 1, cholesky_mo_num + accu += dabs(mos_times_cholesky_r1(mm,ipoint) - test(mm,ipoint) ) + if(dabs(mos_times_cholesky_r1(mm,ipoint) - test(mm,ipoint)).gt.1.d-10)then + print*,'problem ! ',dabs(mos_times_cholesky_r1(mm,ipoint) - test(mm,ipoint)) & + , mos_times_cholesky_r1(mm,ipoint) , test(mm,ipoint) + endif + enddo + enddo + print*,'accu = ',accu + + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, mos_times_cholesky_r2, (cholesky_mo_num,n_points_final_grid)] + implicit none + BEGIN_DOC + ! V1_AR = \sum_{I}V_AI Phi_IR where "R" specifies the index of the grid point and A the number of cholesky point + ! + ! here Phi_IR is phi_i(R)xphi_b(R) for r2 and V_AI = (ib|A) chollesky vector + END_DOC + double precision, allocatable :: mos_ib_r2(:,:),mo_chol_r2(:,:) + double precision, allocatable :: test(:,:) + double precision :: mo_i_r2,mo_b_r2 + integer :: ii,i,mm,m,itmp,ipoint,ll + allocate(mos_ib_r2(n_couple_orb_r2,n_points_final_grid)) + allocate(mo_chol_r2(cholesky_mo_num,n_couple_orb_r2)) + + do ipoint = 1, n_points_final_grid + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + mo_i_r2 = mos_in_r_array_omp(i,ipoint) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r2 = mos_in_r_array_omp(m,ipoint) + itmp += 1 + mos_ib_r2(itmp,ipoint) = mo_i_r2 * mo_b_r2 + enddo + enddo + enddo + + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + do ll = 1, cholesky_mo_num + mo_chol_r2(ll,itmp) = cholesky_mo_transp(ll,m,i) + enddo + enddo + enddo + + call get_AB_prod(mo_chol_r2,cholesky_mo_num,n_couple_orb_r2,mos_ib_r2,n_points_final_grid,mos_times_cholesky_r2) + allocate(test(cholesky_mo_num,n_points_final_grid)) + test = 0.d0 + do ipoint = 1, n_points_final_grid + do itmp = 1, n_couple_orb_r2 + i = list_couple_orb_r2(1,itmp) + m = list_couple_orb_r2(2,itmp) + mo_i_r2 = mos_in_r_array_omp(i,ipoint) + mo_b_r2 = mos_in_r_array_omp(m,ipoint) + do mm = 1, cholesky_mo_num + test(mm,ipoint) += mo_i_r2 * mo_b_r2 * mo_chol_r2(mm,itmp) + enddo + enddo + enddo + double precision :: accu + accu = 0.d0 + do ipoint = 1, n_points_final_grid + do mm = 1, cholesky_mo_num + accu += dabs(mos_times_cholesky_r2(mm,ipoint) - test(mm,ipoint) ) + if(dabs(mos_times_cholesky_r2(mm,ipoint) - test(mm,ipoint)).gt.1.d-10)then + print*,'problem ! ',dabs(mos_times_cholesky_r2(mm,ipoint) - test(mm,ipoint)) & + , mos_times_cholesky_r2(mm,ipoint) , test(mm,ipoint) + endif + enddo + enddo + print*,'accu = ',accu + +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, f_hf_cholesky, (n_points_final_grid)] + implicit none + integer :: ipoint + !!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ + !! = \sum_{I}\sum_{J}\sum_A Phi_I(R) Phi_J(R) V_AI V_AJ + !! = \sum_A \sum_{I}Phi_I(R)V_AI \sum_{J}V_AJ Phi_J(R) + !! = \sum_A V_AR G_AR + !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI + double precision :: u_dot_v + do ipoint = 1, n_points_final_grid + f_hf_cholesky(ipoint) = 2.D0 * u_dot_v(mos_times_cholesky_r2(1,ipoint),mos_times_cholesky_r1(1,ipoint),cholesky_mo_num) + enddo +END_PROVIDER + +BEGIN_PROVIDER [ double precision, on_top_hf_grid, (n_points_final_grid)] + implicit none + integer :: ipoint,i,ii + double precision :: dm_a, dm_b + do ipoint = 1, n_points_final_grid + dm_a = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + dm_a += mos_in_r_array_omp(i,ipoint)*mos_in_r_array_omp(i,ipoint) + enddo + dm_b = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + dm_b += mos_in_r_array_omp(i,ipoint)*mos_in_r_array_omp(i,ipoint) + enddo + on_top_hf_grid(ipoint) = 2.D0 * dm_a*dm_b + enddo +END_PROVIDER + diff --git a/src/mu_of_r/mu_of_r_conditions.irp.f b/src/mu_of_r/mu_of_r_conditions.irp.f index 6b49b9df..5b4d4b83 100644 --- a/src/mu_of_r/mu_of_r_conditions.irp.f +++ b/src/mu_of_r/mu_of_r_conditions.irp.f @@ -61,7 +61,7 @@ END_DOC integer :: ipoint double precision :: wall0,wall1,f_hf,on_top,w_hf,sqpi - PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals + PROVIDE f_hf_cholesky on_top_hf_grid print*,'providing mu_of_r_hf ...' call wall_time(wall0) sqpi = dsqrt(dacos(-1.d0)) @@ -69,10 +69,10 @@ !$OMP PARALLEL DO & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint,f_hf,on_top,w_hf) & - !$OMP ShARED (n_points_final_grid,mu_of_r_hf,f_psi_hf_ab,on_top_hf_mu_r,sqpi) + !$OMP ShARED (n_points_final_grid,mu_of_r_hf,f_hf_cholesky,on_top_hf_grid,sqpi) do ipoint = 1, n_points_final_grid - f_hf = f_psi_hf_ab(ipoint) - on_top = on_top_hf_mu_r(ipoint) + f_hf = f_hf_cholesky(ipoint) + on_top = on_top_hf_grid(ipoint) if(on_top.le.1.d-12.or.f_hf.le.0.d0.or.f_hf * on_top.lt.0.d0)then w_hf = 1.d+10 else @@ -85,6 +85,44 @@ print*,'Time to provide mu_of_r_hf = ',wall1-wall0 END_PROVIDER + BEGIN_PROVIDER [double precision, mu_of_r_hf_old, (n_points_final_grid) ] + implicit none + BEGIN_DOC + ! mu(r) computed with a HF wave function (assumes that HF MOs are stored in the EZFIO) + ! + ! corresponds to Eq. (37) of J. Chem. Phys. 149, 194301 (2018) but for \Psi^B = HF^B + ! + ! !!!!!! WARNING !!!!!! if no_core_density == .True. then all contributions from the core orbitals + ! + ! in the two-body density matrix are excluded + END_DOC + integer :: ipoint + double precision :: wall0,wall1,f_hf,on_top,w_hf,sqpi + PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals + print*,'providing mu_of_r_hf_old ...' + call wall_time(wall0) + sqpi = dsqrt(dacos(-1.d0)) + provide f_psi_hf_ab + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,f_hf,on_top,w_hf) & + !$OMP ShARED (n_points_final_grid,mu_of_r_hf_old,f_psi_hf_ab,on_top_hf_mu_r,sqpi) + do ipoint = 1, n_points_final_grid + f_hf = f_psi_hf_ab(ipoint) + on_top = on_top_hf_mu_r(ipoint) + if(on_top.le.1.d-12.or.f_hf.le.0.d0.or.f_hf * on_top.lt.0.d0)then + w_hf = 1.d+10 + else + w_hf = f_hf / on_top + endif + mu_of_r_hf_old(ipoint) = w_hf * sqpi * 0.5d0 + enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide mu_of_r_hf_old = ',wall1-wall0 + END_PROVIDER + + BEGIN_PROVIDER [double precision, mu_of_r_psi_cas, (n_points_final_grid,N_states) ] implicit none BEGIN_DOC From ce042fbd787a21a600830596fa3caa5f7aa2cdb1 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 21 May 2024 12:01:28 +0200 Subject: [PATCH 20/64] basis set correction with cholesky works for hf --- .../local/basis_correction/51.basis_c.bats | 8 -- .../{01.convert.bats => convert_bats_old} | 0 src/hartree_fock/10.hf.bats | 13 -- src/mu_of_r/basis_def.irp.f | 45 +++++++ .../{f_cholesky.irp.f => f_hf_cholesky.irp.f} | 121 +++++++++--------- 5 files changed, 104 insertions(+), 83 deletions(-) rename src/ezfio_files/{01.convert.bats => convert_bats_old} (100%) rename src/mu_of_r/{f_cholesky.irp.f => f_hf_cholesky.irp.f} (67%) diff --git a/plugins/local/basis_correction/51.basis_c.bats b/plugins/local/basis_correction/51.basis_c.bats index 914b482b..1e20bae3 100644 --- a/plugins/local/basis_correction/51.basis_c.bats +++ b/plugins/local/basis_correction/51.basis_c.bats @@ -37,14 +37,6 @@ function run_sd() { eq $energy1 $1 $thresh } -@test "O2 CAS" { - qp set_file o2_cas.gms.ezfio - qp set_mo_class -c "[1-2]" -a "[3-10]" -d "[11-46]" - run -149.72435425 3.e-4 10000 - qp set_mo_class -c "[1-2]" -a "[3-10]" -v "[11-46]" - run_md -0.1160222327 1.e-6 -} - @test "LiF RHF" { qp set_file lif.ezfio diff --git a/src/ezfio_files/01.convert.bats b/src/ezfio_files/convert_bats_old similarity index 100% rename from src/ezfio_files/01.convert.bats rename to src/ezfio_files/convert_bats_old diff --git a/src/hartree_fock/10.hf.bats b/src/hartree_fock/10.hf.bats index b496a089..214dfa86 100644 --- a/src/hartree_fock/10.hf.bats +++ b/src/hartree_fock/10.hf.bats @@ -115,9 +115,6 @@ rm -rf $EZFIO run hco.ezfio -113.1841002944744 } -@test "HBO" { # 0.805600 1.4543s - run hbo.ezfio -100.018582259096 -} @test "H2S" { # 1.655600 4.21402s run h2s.ezfio -398.6944130421982 @@ -127,9 +124,6 @@ rm -rf $EZFIO run h3coh.ezfio -114.9865030596373 } -@test "H2O" { # 1.811100 1.84387s - run h2o.ezfio -0.760270218692179E+02 -} @test "H2O2" { # 2.217000 8.50267s run h2o2.ezfio -150.7806608469964 @@ -187,13 +181,6 @@ rm -rf $EZFIO run oh.ezfio -75.42025413469165 } -@test "[Cu(NH3)4]2+" { # 59.610100 4.18766m - [[ -n $TRAVIS ]] && skip - qp set_file cu_nh3_4_2plus.ezfio - qp set scf_utils thresh_scf 1.e-10 - run cu_nh3_4_2plus.ezfio -1862.97590358903 -} - @test "SO2" { # 71.894900 3.22567m [[ -n $TRAVIS ]] && skip run so2.ezfio -41.55800401346361 diff --git a/src/mu_of_r/basis_def.irp.f b/src/mu_of_r/basis_def.irp.f index fff9f581..e433f4d8 100644 --- a/src/mu_of_r/basis_def.irp.f +++ b/src/mu_of_r/basis_def.irp.f @@ -114,3 +114,48 @@ BEGIN_PROVIDER [double precision, basis_mos_in_r_array, (n_basis_orb,n_points_fi enddo enddo END_PROVIDER + +! BEGIN_PROVIDER [integer, n_docc_val_orb_for_cas] +!&BEGIN_PROVIDER [integer, n_max_docc_val_orb_for_cas] +! implicit none +! BEGIN_DOC +! ! Number of DOUBLY OCCUPIED VALENCE ORBITALS for the CAS wave function +! ! +! ! This determines the size of the space \mathcal{A} of Eqs. (15-16) of Phys.Chem.Lett.2019, 10, 2931 2937 +! END_DOC +! integer :: i +! n_docc_val_orb_for_cas = 0 +! ! You browse the BETA ELECTRONS and check if its not a CORE ORBITAL +! do i = 1, elec_beta_num +! if( trim(mo_class(i))=="Inactive" & +! .or. trim(mo_class(i))=="Active" & +! .or. trim(mo_class(i))=="Virtual" )then +! n_docc_val_orb_for_cas +=1 +! endif +! enddo +! n_max_docc_val_orb_for_cas = maxval(n_docc_val_orb_for_cas) +! +!END_PROVIDER +! +!BEGIN_PROVIDER [integer, list_doc_valence_orb_for_cas, (n_max_docc_val_orb_for_cas)] +! implicit none +! BEGIN_DOC +! ! List of OCCUPIED valence orbitals for each spin to build the f_{HF}(r_1,r_2) function +! ! +! ! This corresponds to ALL OCCUPIED orbitals in the HF wave function, except those defined as "core" +! ! +! ! This determines the space \mathcal{A} of Eqs. (15-16) of Phys.Chem.Lett.2019, 10, 2931 2937 +! END_DOC +! j = 0 +! ! You browse the BETA ELECTRONS and check if its not a CORE ORBITAL +! do i = 1, elec_beta_num +! if( trim(mo_class(i))=="Inactive" & +! .or. trim(mo_class(i))=="Active" & +! .or. trim(mo_class(i))=="Virtual" )then +! j +=1 +! list_doc_valence_orb_for_cas(j) = i +! endif +! enddo +! +!END_PROVIDER + diff --git a/src/mu_of_r/f_cholesky.irp.f b/src/mu_of_r/f_hf_cholesky.irp.f similarity index 67% rename from src/mu_of_r/f_cholesky.irp.f rename to src/mu_of_r/f_hf_cholesky.irp.f index 1ad4ce36..84097f09 100644 --- a/src/mu_of_r/f_cholesky.irp.f +++ b/src/mu_of_r/f_hf_cholesky.irp.f @@ -1,4 +1,4 @@ -BEGIN_PROVIDER [integer, list_couple_orb_r1, (2,n_couple_orb_r1)] +BEGIN_PROVIDER [integer, list_couple_hf_orb_r1, (2,n_couple_orb_r1)] implicit none integer :: ii,i,mm,m,itmp itmp = 0 @@ -7,14 +7,14 @@ BEGIN_PROVIDER [integer, list_couple_orb_r1, (2,n_couple_orb_r1)] do mm = 1, n_basis_orb ! electron 1 m = list_basis(mm) itmp += 1 - list_couple_orb_r1(1,itmp) = i - list_couple_orb_r1(2,itmp) = m + list_couple_hf_orb_r1(1,itmp) = i + list_couple_hf_orb_r1(2,itmp) = m enddo enddo END_PROVIDER -BEGIN_PROVIDER [integer, list_couple_orb_r2, (2,n_couple_orb_r2)] +BEGIN_PROVIDER [integer, list_couple_hf_orb_r2, (2,n_couple_orb_r2)] implicit none integer :: ii,i,mm,m,itmp itmp = 0 @@ -23,8 +23,8 @@ BEGIN_PROVIDER [integer, list_couple_orb_r2, (2,n_couple_orb_r2)] do mm = 1, n_basis_orb ! electron 1 m = list_basis(mm) itmp += 1 - list_couple_orb_r2(1,itmp) = i - list_couple_orb_r2(2,itmp) = m + list_couple_hf_orb_r2(1,itmp) = i + list_couple_hf_orb_r2(2,itmp) = m enddo enddo END_PROVIDER @@ -87,31 +87,6 @@ BEGIN_PROVIDER [ double precision, mos_times_cholesky_r1, (cholesky_mo_num,n_poi enddo call get_AB_prod(mo_chol_r1,cholesky_mo_num,n_couple_orb_r1,mos_ib_r1,n_points_final_grid,mos_times_cholesky_r1) - allocate(test(cholesky_mo_num,n_points_final_grid)) - test = 0.d0 - do ipoint = 1, n_points_final_grid - do itmp = 1, n_couple_orb_r1 - i = list_couple_orb_r1(1,itmp) - m = list_couple_orb_r1(2,itmp) - mo_i_r1 = mos_in_r_array_omp(i,ipoint) - mo_b_r1 = mos_in_r_array_omp(m,ipoint) - do mm = 1, cholesky_mo_num - test(mm,ipoint) += mo_i_r1 * mo_b_r1 * mo_chol_r1(mm,itmp) - enddo - enddo - enddo - double precision :: accu - accu = 0.d0 - do ipoint = 1, n_points_final_grid - do mm = 1, cholesky_mo_num - accu += dabs(mos_times_cholesky_r1(mm,ipoint) - test(mm,ipoint) ) - if(dabs(mos_times_cholesky_r1(mm,ipoint) - test(mm,ipoint)).gt.1.d-10)then - print*,'problem ! ',dabs(mos_times_cholesky_r1(mm,ipoint) - test(mm,ipoint)) & - , mos_times_cholesky_r1(mm,ipoint) , test(mm,ipoint) - endif - enddo - enddo - print*,'accu = ',accu END_PROVIDER @@ -157,53 +132,72 @@ BEGIN_PROVIDER [ double precision, mos_times_cholesky_r2, (cholesky_mo_num,n_poi enddo call get_AB_prod(mo_chol_r2,cholesky_mo_num,n_couple_orb_r2,mos_ib_r2,n_points_final_grid,mos_times_cholesky_r2) - allocate(test(cholesky_mo_num,n_points_final_grid)) - test = 0.d0 - do ipoint = 1, n_points_final_grid - do itmp = 1, n_couple_orb_r2 - i = list_couple_orb_r2(1,itmp) - m = list_couple_orb_r2(2,itmp) - mo_i_r2 = mos_in_r_array_omp(i,ipoint) - mo_b_r2 = mos_in_r_array_omp(m,ipoint) - do mm = 1, cholesky_mo_num - test(mm,ipoint) += mo_i_r2 * mo_b_r2 * mo_chol_r2(mm,itmp) - enddo - enddo - enddo - double precision :: accu - accu = 0.d0 - do ipoint = 1, n_points_final_grid - do mm = 1, cholesky_mo_num - accu += dabs(mos_times_cholesky_r2(mm,ipoint) - test(mm,ipoint) ) - if(dabs(mos_times_cholesky_r2(mm,ipoint) - test(mm,ipoint)).gt.1.d-10)then - print*,'problem ! ',dabs(mos_times_cholesky_r2(mm,ipoint) - test(mm,ipoint)) & - , mos_times_cholesky_r2(mm,ipoint) , test(mm,ipoint) - endif - enddo - enddo - print*,'accu = ',accu END_PROVIDER BEGIN_PROVIDER [ double precision, f_hf_cholesky, (n_points_final_grid)] implicit none - integer :: ipoint + integer :: ipoint,m,k !!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ !! = \sum_{I}\sum_{J}\sum_A Phi_I(R) Phi_J(R) V_AI V_AJ !! = \sum_A \sum_{I}Phi_I(R)V_AI \sum_{J}V_AJ Phi_J(R) !! = \sum_A V_AR G_AR !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI - double precision :: u_dot_v - do ipoint = 1, n_points_final_grid - f_hf_cholesky(ipoint) = 2.D0 * u_dot_v(mos_times_cholesky_r2(1,ipoint),mos_times_cholesky_r1(1,ipoint),cholesky_mo_num) - enddo + double precision :: u_dot_v,wall0,wall1 + if(elec_alpha_num == elec_beta_num)then + provide mos_times_cholesky_r1 + print*,'providing f_hf_cholesky ...' + call wall_time(wall0) + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,m) & + !$OMP ShARED (mos_times_cholesky_r1,cholesky_mo_num,f_hf_cholesky,n_points_final_grid) + do ipoint = 1, n_points_final_grid + f_hf_cholesky(ipoint) = 0.d0 + do m = 1, cholesky_mo_num + f_hf_cholesky(ipoint) = f_hf_cholesky(ipoint) + & + mos_times_cholesky_r1(m,ipoint) * mos_times_cholesky_r1(m,ipoint) + enddo + f_hf_cholesky(ipoint) *= 2.D0 + enddo + !$OMP END PARALLEL DO + + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky = ',wall1-wall0 + free mos_times_cholesky_r1 + else + provide mos_times_cholesky_r2 mos_times_cholesky_r1 + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,m) & + !$OMP ShARED (mos_times_cholesky_r2,mos_times_cholesky_r1,cholesky_mo_num,f_hf_cholesky,n_points_final_grid) + do ipoint = 1, n_points_final_grid + f_hf_cholesky(ipoint) = 0.D0 + do m = 1, cholesky_mo_num + f_hf_cholesky(ipoint) = f_hf_cholesky(ipoint) + & + mos_times_cholesky_r2(m,ipoint)*mos_times_cholesky_r1(m,ipoint) + enddo + f_hf_cholesky(ipoint) *= 2.D0 + enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky = ',wall1-wall0 + free mos_times_cholesky_r2 mos_times_cholesky_r1 + endif END_PROVIDER BEGIN_PROVIDER [ double precision, on_top_hf_grid, (n_points_final_grid)] implicit none integer :: ipoint,i,ii - double precision :: dm_a, dm_b + double precision :: dm_a, dm_b,wall0,wall1 + print*,'providing on_top_hf_grid ...' + provide mos_in_r_array_omp + call wall_time(wall0) + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,dm_a,dm_b,ii,i) & + !$OMP ShARED (n_points_final_grid,n_occ_val_orb_for_hf,mos_in_r_array_omp,list_valence_orb_for_hf,on_top_hf_grid) do ipoint = 1, n_points_final_grid dm_a = 0.d0 do ii = 1, n_occ_val_orb_for_hf(1) @@ -217,5 +211,8 @@ BEGIN_PROVIDER [ double precision, on_top_hf_grid, (n_points_final_grid)] enddo on_top_hf_grid(ipoint) = 2.D0 * dm_a*dm_b enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide on_top_hf_grid = ',wall1-wall0 END_PROVIDER From 112f113ccb3f363262930b53e21aed010a29f746 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 21 May 2024 12:26:30 +0200 Subject: [PATCH 21/64] fixed forgotten stuffs in normal_order_old/NEED --- plugins/local/normal_order_old/NEED | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/local/normal_order_old/NEED b/plugins/local/normal_order_old/NEED index 8b137891..e8c8c478 100644 --- a/plugins/local/normal_order_old/NEED +++ b/plugins/local/normal_order_old/NEED @@ -1 +1 @@ - +tc_scf From 6fb0f2a58e803ea02a03fe01b4ce9daa6b2fba91 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 21 May 2024 12:53:55 +0200 Subject: [PATCH 22/64] modified scripts/get_fci_tc_conv.sh according to new printing --- plugins/local/tc_bi_ortho/print_tc_wf.irp.f | 2 +- scripts/get_fci_tc_conv.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/local/tc_bi_ortho/print_tc_wf.irp.f b/plugins/local/tc_bi_ortho/print_tc_wf.irp.f index 2b88bc5b..4d9f7c48 100644 --- a/plugins/local/tc_bi_ortho/print_tc_wf.irp.f +++ b/plugins/local/tc_bi_ortho/print_tc_wf.irp.f @@ -37,7 +37,7 @@ subroutine write_l_r_wf integer :: i print*,'Writing the left-right wf' do i = 1, N_det - write(i_unit_output,*)i, psi_coef_sorted_tc(i,1)/psi_coef_sorted_tc(i,1) & + write(i_unit_output,'(I8,X,10(F16.10,X))')i, psi_coef_sorted_tc(i,1),psi_coef_sorted_tc(i,1)/psi_coef_sorted_tc(1,1)& , psi_l_coef_sorted_bi_ortho_left(i)/psi_l_coef_sorted_bi_ortho_left(1) & , psi_r_coef_sorted_bi_ortho_right(i)/psi_r_coef_sorted_bi_ortho_right(1) enddo diff --git a/scripts/get_fci_tc_conv.sh b/scripts/get_fci_tc_conv.sh index 643f3ac0..f0c99baf 100755 --- a/scripts/get_fci_tc_conv.sh +++ b/scripts/get_fci_tc_conv.sh @@ -1,2 +1,2 @@ file=$1 -grep "Ndet,E,E+PT2,E+RPT2,|PT2|=" $file | cut -d "=" -f 2 > ${file}.conv_fci_tc +grep "Ndet,E,E+PT2,pt2_minus,pt2_plus,pt2_abs=" $file | cut -d "=" -f 2 > ${file}.conv_fci_tc From 3600c3c5ca92c6c62a0bbbb6cc1d01ec595e148c Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 22 May 2024 17:02:26 +0200 Subject: [PATCH 23/64] removed stupid dead link for PYSCF_EOMCC.py --- scripts/PYSCF_EOMCC.py | 1 - 1 file changed, 1 deletion(-) delete mode 120000 scripts/PYSCF_EOMCC.py diff --git a/scripts/PYSCF_EOMCC.py b/scripts/PYSCF_EOMCC.py deleted file mode 120000 index 8ad341da..00000000 --- a/scripts/PYSCF_EOMCC.py +++ /dev/null @@ -1 +0,0 @@ -/home_lct/eginer/qp2/plugins/qp_plugins_lct/dev/fcidump_for_vbarb/PYSCF_EOMCC.py \ No newline at end of file From 70f8019960140b965476e1efa88ee8e03850b0d9 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 23 May 2024 10:02:46 +0200 Subject: [PATCH 24/64] removed stupid print in tc_keywords/EZFIO.cfg --- plugins/local/tc_keywords/EZFIO.cfg | 1 - src/mu_of_r/f_hf_cholesky.irp.f | 75 +++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index b7ce0b19..f3bd75c8 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -273,4 +273,3 @@ type: logical doc: If |true|, you duplicate the two-electron TC integrals with the transpose matrix. Acceleates the PT2. interface: ezfio,provider,ocaml default: False ->>>>>>> 8c4183cf6e38711b097df202d1f430b76823aeff diff --git a/src/mu_of_r/f_hf_cholesky.irp.f b/src/mu_of_r/f_hf_cholesky.irp.f index 84097f09..101f9cc9 100644 --- a/src/mu_of_r/f_hf_cholesky.irp.f +++ b/src/mu_of_r/f_hf_cholesky.irp.f @@ -187,6 +187,81 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky, (n_points_final_grid)] endif END_PROVIDER +BEGIN_PROVIDER [ double precision, f_hf_sparse_cholesky, (n_points_final_grid)] + implicit none + integer :: ipoint,m,mm,i,ii,p + !!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ + !! = \sum_{I}\sum_{J}\sum_A Phi_I(R) Phi_J(R) V_AI V_AJ + !! = \sum_A \sum_{I}Phi_I(R)V_AI \sum_{J}V_AJ Phi_J(R) + !! = \sum_A V_AR G_AR + !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI + double precision :: u_dot_v,wall0,wall1,accu_1, accu_2,mo_i_r1,mo_b_r1 + if(elec_alpha_num == elec_beta_num)then + call wall_time(wall0) +! !$OMP PARALLEL DO & +! !$OMP DEFAULT (NONE) & +! !$OMP PRIVATE (ipoint,m) & +! !$OMP ShARED (mos_times_cholesky_r1,cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid) + do ipoint = 1, n_points_final_grid + f_hf_sparse_cholesky(ipoint) = 0.d0 + do p = 1, cholesky_mo_num + accu_1 = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + accu_1 += mo_i_r1 * mo_b_r1 * cholesky_mo(m,i,p) + enddo + enddo + f_hf_sparse_cholesky(ipoint) += accu_1 * accu_1 + enddo + f_hf_sparse_cholesky(ipoint) *= 2.D0 + enddo +! !$OMP END PARALLEL DO + + call wall_time(wall1) + print*,'Time to provide f_hf_sparse_cholesky = ',wall1-wall0 + else + call wall_time(wall0) +! !$OMP PARALLEL DO & +! !$OMP DEFAULT (NONE) & +! !$OMP PRIVATE (ipoint,m) & +! !$OMP ShARED (mos_times_cholesky_r1,cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid) + do ipoint = 1, n_points_final_grid + f_hf_sparse_cholesky(ipoint) = 0.d0 + do p = 1, cholesky_mo_num + accu_2 = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + accu_2 += mo_i_r1 * mo_b_r1 * cholesky_mo(m,i,p) + enddo + enddo + accu_1 = accu_2 + do ii = n_occ_val_orb_for_hf(2)+1,n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + accu_1 += mo_i_r1 * mo_b_r1 * cholesky_mo(m,i,p) + enddo + enddo + f_hf_sparse_cholesky(ipoint) += accu_1 * accu_2 + enddo + f_hf_sparse_cholesky(ipoint) *= 2.D0 + enddo +! !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide f_hf_sparse_cholesky = ',wall1-wall0 + endif +END_PROVIDER + BEGIN_PROVIDER [ double precision, on_top_hf_grid, (n_points_final_grid)] implicit none integer :: ipoint,i,ii From 49a96d4400c640928ba213f33e0493eadb4457ad Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 23 May 2024 10:17:31 +0200 Subject: [PATCH 25/64] added f_hf_cholesky without big storage in memory --- .../basis_correction/test_chol_bas.irp.f | 18 ++++++++++++ src/mu_of_r/f_hf_cholesky.irp.f | 28 +++++++++++-------- 2 files changed, 34 insertions(+), 12 deletions(-) create mode 100644 plugins/local/basis_correction/test_chol_bas.irp.f diff --git a/plugins/local/basis_correction/test_chol_bas.irp.f b/plugins/local/basis_correction/test_chol_bas.irp.f new file mode 100644 index 00000000..ae47ec09 --- /dev/null +++ b/plugins/local/basis_correction/test_chol_bas.irp.f @@ -0,0 +1,18 @@ +program pouet + implicit none + call test +end +subroutine test + implicit none +! provide mos_times_cholesky_r1 +! provide mos_times_cholesky_r2 + integer :: ipoint + double precision :: accu,weight + accu = 0.d0 + do ipoint = 1, n_points_final_grid + weight = final_weight_at_r_vector(ipoint) +! accu += dabs(mu_of_r_hf(ipoint) - mu_of_r_hf_old(ipoint)) * weight + accu += dabs(f_hf_sparse_cholesky(ipoint) - f_hf_cholesky(ipoint)) * weight + enddo + print*,'accu = ',accu +end diff --git a/src/mu_of_r/f_hf_cholesky.irp.f b/src/mu_of_r/f_hf_cholesky.irp.f index 101f9cc9..b937addf 100644 --- a/src/mu_of_r/f_hf_cholesky.irp.f +++ b/src/mu_of_r/f_hf_cholesky.irp.f @@ -146,9 +146,9 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky, (n_points_final_grid)] !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI double precision :: u_dot_v,wall0,wall1 if(elec_alpha_num == elec_beta_num)then - provide mos_times_cholesky_r1 print*,'providing f_hf_cholesky ...' call wall_time(wall0) + provide mos_times_cholesky_r1 !$OMP PARALLEL DO & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint,m) & @@ -167,6 +167,8 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky, (n_points_final_grid)] print*,'Time to provide f_hf_cholesky = ',wall1-wall0 free mos_times_cholesky_r1 else + print*,'providing f_hf_cholesky ...' + call wall_time(wall0) provide mos_times_cholesky_r2 mos_times_cholesky_r1 !$OMP PARALLEL DO & !$OMP DEFAULT (NONE) & @@ -198,10 +200,11 @@ BEGIN_PROVIDER [ double precision, f_hf_sparse_cholesky, (n_points_final_grid)] double precision :: u_dot_v,wall0,wall1,accu_1, accu_2,mo_i_r1,mo_b_r1 if(elec_alpha_num == elec_beta_num)then call wall_time(wall0) -! !$OMP PARALLEL DO & -! !$OMP DEFAULT (NONE) & -! !$OMP PRIVATE (ipoint,m) & -! !$OMP ShARED (mos_times_cholesky_r1,cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid) + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (accu_1,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp) & + !$OMP ShARED (cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid,cholesky_mo,n_basis_orb) do ipoint = 1, n_points_final_grid f_hf_sparse_cholesky(ipoint) = 0.d0 do p = 1, cholesky_mo_num @@ -219,16 +222,17 @@ BEGIN_PROVIDER [ double precision, f_hf_sparse_cholesky, (n_points_final_grid)] enddo f_hf_sparse_cholesky(ipoint) *= 2.D0 enddo -! !$OMP END PARALLEL DO + !$OMP END PARALLEL DO call wall_time(wall1) print*,'Time to provide f_hf_sparse_cholesky = ',wall1-wall0 - else + else call wall_time(wall0) -! !$OMP PARALLEL DO & -! !$OMP DEFAULT (NONE) & -! !$OMP PRIVATE (ipoint,m) & -! !$OMP ShARED (mos_times_cholesky_r1,cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid) + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (accu_2,accu_1,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp) & + !$OMP ShARED (cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid,cholesky_mo,n_basis_orb) do ipoint = 1, n_points_final_grid f_hf_sparse_cholesky(ipoint) = 0.d0 do p = 1, cholesky_mo_num @@ -256,7 +260,7 @@ BEGIN_PROVIDER [ double precision, f_hf_sparse_cholesky, (n_points_final_grid)] enddo f_hf_sparse_cholesky(ipoint) *= 2.D0 enddo -! !$OMP END PARALLEL DO + !$OMP END PARALLEL DO call wall_time(wall1) print*,'Time to provide f_hf_sparse_cholesky = ',wall1-wall0 endif From 70745cbeaaf59900c3ce4e1df042ef88ff1ecb11 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 23 May 2024 14:45:33 +0200 Subject: [PATCH 26/64] added sparse cholesky mu_of_r --- external/irpf90 | 2 +- .../basis_correction/basis_correction.irp.f | 4 -- .../basis_correction/print_routine.irp.f | 2 +- .../basis_correction/test_chol_bas.irp.f | 2 +- src/mu_of_r/f_hf_cholesky.irp.f | 52 ++++++++++++------- src/mu_of_r/mu_of_r_conditions.irp.f | 44 ++++++++++++++-- 6 files changed, 76 insertions(+), 30 deletions(-) diff --git a/external/irpf90 b/external/irpf90 index 0007f72f..4ab1b175 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 +Subproject commit 4ab1b175fc7ed0d96c1912f13dc53579b24157a6 diff --git a/plugins/local/basis_correction/basis_correction.irp.f b/plugins/local/basis_correction/basis_correction.irp.f index a7ea7244..f17b5d5b 100644 --- a/plugins/local/basis_correction/basis_correction.irp.f +++ b/plugins/local/basis_correction/basis_correction.irp.f @@ -7,10 +7,6 @@ program basis_correction touch read_wf no_core_density = .True. touch no_core_density - if(io_mo_two_e_integrals .ne. "Read")then - provide ao_two_e_integrals_in_map - endif - provide mo_two_e_integrals_in_map call print_basis_correction end diff --git a/plugins/local/basis_correction/print_routine.irp.f b/plugins/local/basis_correction/print_routine.irp.f index 96faba30..b3b38673 100644 --- a/plugins/local/basis_correction/print_routine.irp.f +++ b/plugins/local/basis_correction/print_routine.irp.f @@ -22,7 +22,7 @@ subroutine print_basis_correction print*, '****************************************' print*, '****************************************' print*, 'mu_of_r_potential = ',mu_of_r_potential - if(mu_of_r_potential.EQ."hf")then + if(mu_of_r_potential.EQ."hf".or.mu_of_r_potential.EQ."hf_old".or.mu_of_r_potential.EQ."hf_sparse")then print*, '' print*,'Using a HF-like two-body density to define mu(r)' print*,'This assumes that HF is a qualitative representation of the wave function ' diff --git a/plugins/local/basis_correction/test_chol_bas.irp.f b/plugins/local/basis_correction/test_chol_bas.irp.f index ae47ec09..076d888c 100644 --- a/plugins/local/basis_correction/test_chol_bas.irp.f +++ b/plugins/local/basis_correction/test_chol_bas.irp.f @@ -12,7 +12,7 @@ subroutine test do ipoint = 1, n_points_final_grid weight = final_weight_at_r_vector(ipoint) ! accu += dabs(mu_of_r_hf(ipoint) - mu_of_r_hf_old(ipoint)) * weight - accu += dabs(f_hf_sparse_cholesky(ipoint) - f_hf_cholesky(ipoint)) * weight + accu += dabs(f_hf_cholesky_sparse(ipoint) - f_hf_cholesky(ipoint)) * weight enddo print*,'accu = ',accu end diff --git a/src/mu_of_r/f_hf_cholesky.irp.f b/src/mu_of_r/f_hf_cholesky.irp.f index b937addf..17f0229a 100644 --- a/src/mu_of_r/f_hf_cholesky.irp.f +++ b/src/mu_of_r/f_hf_cholesky.irp.f @@ -189,7 +189,7 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky, (n_points_final_grid)] endif END_PROVIDER -BEGIN_PROVIDER [ double precision, f_hf_sparse_cholesky, (n_points_final_grid)] +BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse, (n_points_final_grid)] implicit none integer :: ipoint,m,mm,i,ii,p !!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ @@ -198,43 +198,55 @@ BEGIN_PROVIDER [ double precision, f_hf_sparse_cholesky, (n_points_final_grid)] !! = \sum_A V_AR G_AR !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI double precision :: u_dot_v,wall0,wall1,accu_1, accu_2,mo_i_r1,mo_b_r1 + double precision :: thresh_1,thresh_2 + double precision, allocatable :: accu_vec(:) + thresh_2 = ao_cholesky_threshold * 100.d0 + thresh_1 = dsqrt(thresh_2) + provide cholesky_mo_transp if(elec_alpha_num == elec_beta_num)then call wall_time(wall0) - !$OMP PARALLEL DO & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (accu_1,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & - !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp) & - !$OMP ShARED (cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid,cholesky_mo,n_basis_orb) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE (accu_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp,thresh_1,thresh_2) & + !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse,n_points_final_grid,cholesky_mo_transp,n_basis_orb) + allocate(accu_vec(cholesky_mo_num)) + !$OMP DO do ipoint = 1, n_points_final_grid - f_hf_sparse_cholesky(ipoint) = 0.d0 - do p = 1, cholesky_mo_num - accu_1 = 0.d0 + f_hf_cholesky_sparse(ipoint) = 0.d0 + accu_vec = 0.d0 do ii = 1, n_occ_val_orb_for_hf(1) i = list_valence_orb_for_hf(ii,1) mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle do mm = 1, n_basis_orb ! electron 1 m = list_basis(mm) mo_b_r1 = mos_in_r_array_omp(m,ipoint) - accu_1 += mo_i_r1 * mo_b_r1 * cholesky_mo(m,i,p) + if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + accu_vec(p) += mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo enddo enddo - f_hf_sparse_cholesky(ipoint) += accu_1 * accu_1 - enddo - f_hf_sparse_cholesky(ipoint) *= 2.D0 + do p = 1, cholesky_mo_num + f_hf_cholesky_sparse(ipoint) += accu_vec(p) * accu_vec(p) + enddo + f_hf_cholesky_sparse(ipoint) *= 2.D0 enddo - !$OMP END PARALLEL DO + !$OMP END DO + deallocate(accu_vec) + !$OMP END PARALLEL call wall_time(wall1) - print*,'Time to provide f_hf_sparse_cholesky = ',wall1-wall0 + print*,'Time to provide f_hf_cholesky_sparse = ',wall1-wall0 else call wall_time(wall0) !$OMP PARALLEL DO & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (accu_2,accu_1,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp) & - !$OMP ShARED (cholesky_mo_num,f_hf_sparse_cholesky,n_points_final_grid,cholesky_mo,n_basis_orb) + !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse,n_points_final_grid,cholesky_mo,n_basis_orb) do ipoint = 1, n_points_final_grid - f_hf_sparse_cholesky(ipoint) = 0.d0 + f_hf_cholesky_sparse(ipoint) = 0.d0 do p = 1, cholesky_mo_num accu_2 = 0.d0 do ii = 1, n_occ_val_orb_for_hf(2) @@ -256,13 +268,13 @@ BEGIN_PROVIDER [ double precision, f_hf_sparse_cholesky, (n_points_final_grid)] accu_1 += mo_i_r1 * mo_b_r1 * cholesky_mo(m,i,p) enddo enddo - f_hf_sparse_cholesky(ipoint) += accu_1 * accu_2 + f_hf_cholesky_sparse(ipoint) += accu_1 * accu_2 enddo - f_hf_sparse_cholesky(ipoint) *= 2.D0 + f_hf_cholesky_sparse(ipoint) *= 2.D0 enddo !$OMP END PARALLEL DO call wall_time(wall1) - print*,'Time to provide f_hf_sparse_cholesky = ',wall1-wall0 + print*,'Time to provide f_hf_cholesky_sparse = ',wall1-wall0 endif END_PROVIDER diff --git a/src/mu_of_r/mu_of_r_conditions.irp.f b/src/mu_of_r/mu_of_r_conditions.irp.f index 5b4d4b83..f2bb7145 100644 --- a/src/mu_of_r/mu_of_r_conditions.irp.f +++ b/src/mu_of_r/mu_of_r_conditions.irp.f @@ -13,7 +13,6 @@ integer :: ipoint,istate double precision :: wall0,wall1 print*,'providing mu_of_r ...' -! PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals call wall_time(wall0) if (read_mu_of_r) then @@ -26,6 +25,10 @@ do ipoint = 1, n_points_final_grid if(mu_of_r_potential.EQ."hf")then mu_of_r_prov(ipoint,istate) = mu_of_r_hf(ipoint) + else if(mu_of_r_potential.EQ."hf_old")then + mu_of_r_prov(ipoint,istate) = mu_of_r_hf_old(ipoint) + else if(mu_of_r_potential.EQ."hf_sparse")then + mu_of_r_prov(ipoint,istate) = mu_of_r_hf_sparse(ipoint) else if(mu_of_r_potential.EQ."cas_full".or.mu_of_r_potential.EQ."cas_truncated".or.mu_of_r_potential.EQ."pure_act")then mu_of_r_prov(ipoint,istate) = mu_of_r_psi_cas(ipoint,istate) else @@ -61,11 +64,10 @@ END_DOC integer :: ipoint double precision :: wall0,wall1,f_hf,on_top,w_hf,sqpi - PROVIDE f_hf_cholesky on_top_hf_grid print*,'providing mu_of_r_hf ...' call wall_time(wall0) + PROVIDE f_hf_cholesky on_top_hf_grid sqpi = dsqrt(dacos(-1.d0)) - provide f_psi_hf_ab !$OMP PARALLEL DO & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint,f_hf,on_top,w_hf) & @@ -85,6 +87,42 @@ print*,'Time to provide mu_of_r_hf = ',wall1-wall0 END_PROVIDER + BEGIN_PROVIDER [double precision, mu_of_r_hf_sparse, (n_points_final_grid) ] + implicit none + BEGIN_DOC + ! mu(r) computed with a HF wave function (assumes that HF MOs are stored in the EZFIO) + ! + ! corresponds to Eq. (37) of J. Chem. Phys. 149, 194301 (2018) but for \Psi^B = HF^B + ! + ! !!!!!! WARNING !!!!!! if no_core_density == .True. then all contributions from the core orbitals + ! + ! in the two-body density matrix are excluded + END_DOC + integer :: ipoint + double precision :: wall0,wall1,f_hf,on_top,w_hf,sqpi + print*,'providing mu_of_r_hf_sparse ...' + call wall_time(wall0) + sqpi = dsqrt(dacos(-1.d0)) + PROVIDE f_hf_cholesky_sparse on_top_hf_grid + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,f_hf,on_top,w_hf) & + !$OMP ShARED (n_points_final_grid,mu_of_r_hf_sparse,f_hf_cholesky_sparse,on_top_hf_grid,sqpi) + do ipoint = 1, n_points_final_grid + f_hf = f_hf_cholesky_sparse(ipoint) + on_top = on_top_hf_grid(ipoint) + if(on_top.le.1.d-12.or.f_hf.le.0.d0.or.f_hf * on_top.lt.0.d0)then + w_hf = 1.d+10 + else + w_hf = f_hf / on_top + endif + mu_of_r_hf_sparse(ipoint) = w_hf * sqpi * 0.5d0 + enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide mu_of_r_hf_sparse = ',wall1-wall0 + END_PROVIDER + BEGIN_PROVIDER [double precision, mu_of_r_hf_old, (n_points_final_grid) ] implicit none BEGIN_DOC From 1e886ac128187624af9efac5dd0cbe29e594ff5b Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 27 May 2024 10:21:29 +0200 Subject: [PATCH 27/64] implemented the f_hf_sparse for open systems in basis set correction --- src/dft_utils_func/on_top_from_ueg.irp.f | 1 - src/mo_two_e_ints/cholesky.irp.f | 4 +++ src/mu_of_r/f_hf_cholesky.irp.f | 45 +++++++++++++++--------- 3 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/dft_utils_func/on_top_from_ueg.irp.f b/src/dft_utils_func/on_top_from_ueg.irp.f index 4e28ad89..711ffc39 100644 --- a/src/dft_utils_func/on_top_from_ueg.irp.f +++ b/src/dft_utils_func/on_top_from_ueg.irp.f @@ -32,7 +32,6 @@ double precision function g0_UEG_mu_inf(rho_a,rho_b) C = 0.08193d0 D = -0.01277d0 E = 0.001859d0 - x = -d2*rs if (dabs(rho) > 1.d-20) then rs = (3d0 / (4d0*pi*rho))**(1d0/3d0) ! JT: serious bug fixed 20/03/19 x = -d2*rs diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 349f13b9..0d0989d7 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -34,8 +34,10 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, END_DOC double precision, allocatable :: X(:,:,:) + double precision :: wall0, wall1 integer :: ierr print *, 'AO->MO Transformation of Cholesky vectors' + call wall_time(wall0) allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) if (ierr /= 0) then @@ -46,6 +48,8 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, call dgemm('T','N', cholesky_mo_num*mo_num, mo_num, ao_num, 1.d0, & X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_mo_num*mo_num) deallocate(X) + call wall_time(wall1) + print*,'Time for AO->MO Cholesky vectors = ',wall1-wall0 END_PROVIDER diff --git a/src/mu_of_r/f_hf_cholesky.irp.f b/src/mu_of_r/f_hf_cholesky.irp.f index 17f0229a..472abb1c 100644 --- a/src/mu_of_r/f_hf_cholesky.irp.f +++ b/src/mu_of_r/f_hf_cholesky.irp.f @@ -199,7 +199,7 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse, (n_points_final_grid)] !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI double precision :: u_dot_v,wall0,wall1,accu_1, accu_2,mo_i_r1,mo_b_r1 double precision :: thresh_1,thresh_2 - double precision, allocatable :: accu_vec(:) + double precision, allocatable :: accu_vec(:),delta_vec(:) thresh_2 = ao_cholesky_threshold * 100.d0 thresh_1 = dsqrt(thresh_2) provide cholesky_mo_transp @@ -223,12 +223,12 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse, (n_points_final_grid)] mo_b_r1 = mos_in_r_array_omp(m,ipoint) if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle do p = 1, cholesky_mo_num - accu_vec(p) += mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + accu_vec(p) = accu_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) enddo enddo enddo do p = 1, cholesky_mo_num - f_hf_cholesky_sparse(ipoint) += accu_vec(p) * accu_vec(p) + f_hf_cholesky_sparse(ipoint) = f_hf_cholesky_sparse(ipoint) + accu_vec(p) * accu_vec(p) enddo f_hf_cholesky_sparse(ipoint) *= 2.D0 enddo @@ -240,39 +240,50 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse, (n_points_final_grid)] print*,'Time to provide f_hf_cholesky_sparse = ',wall1-wall0 else call wall_time(wall0) - !$OMP PARALLEL DO & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (accu_2,accu_1,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & - !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp) & - !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse,n_points_final_grid,cholesky_mo,n_basis_orb) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE (accu_vec,delta_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp,thresh_1,thresh_2) & + !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse,n_points_final_grid,cholesky_mo_transp,n_basis_orb) + allocate(accu_vec(cholesky_mo_num),delta_vec(cholesky_mo_num)) + !$OMP DO do ipoint = 1, n_points_final_grid f_hf_cholesky_sparse(ipoint) = 0.d0 - do p = 1, cholesky_mo_num - accu_2 = 0.d0 + accu_vec = 0.d0 do ii = 1, n_occ_val_orb_for_hf(2) i = list_valence_orb_for_hf(ii,2) mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle do mm = 1, n_basis_orb ! electron 1 m = list_basis(mm) mo_b_r1 = mos_in_r_array_omp(m,ipoint) - accu_2 += mo_i_r1 * mo_b_r1 * cholesky_mo(m,i,p) + if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + accu_vec(p) = accu_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo enddo enddo - accu_1 = accu_2 - do ii = n_occ_val_orb_for_hf(2)+1,n_occ_val_orb_for_hf(1) + delta_vec = 0.d0 + do ii = n_occ_val_orb_for_hf(2)+1,n_occ_val_orb_for_hf(1) i = list_valence_orb_for_hf(ii,1) mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle do mm = 1, n_basis_orb ! electron 1 m = list_basis(mm) mo_b_r1 = mos_in_r_array_omp(m,ipoint) - accu_1 += mo_i_r1 * mo_b_r1 * cholesky_mo(m,i,p) + if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + delta_vec(p) = delta_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo enddo enddo - f_hf_cholesky_sparse(ipoint) += accu_1 * accu_2 - enddo + do p = 1, cholesky_mo_num + f_hf_cholesky_sparse(ipoint) = f_hf_cholesky_sparse(ipoint) + accu_vec(p) * accu_vec(p) + accu_vec(p) * delta_vec(p) + enddo f_hf_cholesky_sparse(ipoint) *= 2.D0 enddo - !$OMP END PARALLEL DO + !$OMP END DO + deallocate(accu_vec) + !$OMP END PARALLEL call wall_time(wall1) print*,'Time to provide f_hf_cholesky_sparse = ',wall1-wall0 endif From 4d18a0124e9a3cd088ce19e0c56801d7e4fb478d Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 27 May 2024 10:23:47 +0200 Subject: [PATCH 28/64] changed the default in mu_of_r_potential --- src/mu_of_r/EZFIO.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mu_of_r/EZFIO.cfg b/src/mu_of_r/EZFIO.cfg index a66b00ef..7a39b012 100644 --- a/src/mu_of_r/EZFIO.cfg +++ b/src/mu_of_r/EZFIO.cfg @@ -6,9 +6,9 @@ size: (becke_numerical_grid.n_points_final_grid,determinants.n_states) [mu_of_r_potential] type: character*(32) -doc: type of potential for the mu(r) interaction: can be [ hf| cas_full | cas_truncated | pure_act] +doc: type of potential for the mu(r) interaction: can be [ hf| hf_sparse | cas_full | cas_truncated | pure_act] interface: ezfio, provider, ocaml -default: hf +default: hf_sparse [io_mu_of_r] type: Disk_access From 09d9a814d2646cc599bdbdcb0886461fd8ed6688 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 May 2024 11:38:57 +0200 Subject: [PATCH 29/64] Added stop in (T) --- plugins/local/non_h_ints_mu/deb_mos.irp.f | 101 ++++++++++++++++++++++ plugins/local/non_h_ints_mu/qmckl.irp.f | 2 +- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 17 +++- src/utils_cc/EZFIO.cfg | 6 ++ 4 files changed, 122 insertions(+), 4 deletions(-) create mode 100644 plugins/local/non_h_ints_mu/deb_mos.irp.f diff --git a/plugins/local/non_h_ints_mu/deb_mos.irp.f b/plugins/local/non_h_ints_mu/deb_mos.irp.f new file mode 100644 index 00000000..26344786 --- /dev/null +++ b/plugins/local/non_h_ints_mu/deb_mos.irp.f @@ -0,0 +1,101 @@ + +! --- + +program deb_mos + + implicit none + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + if(tc_integ_type .eq. "numeric") then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + endif + + call print_mos() + +end + +! --- + +subroutine print_mos() + + implicit none + integer :: i, ipoint + double precision :: r(3) + double precision :: mo_val, mo_der(3), mo_lap + + PROVIDE final_grid_points mos_in_r_array mos_grad_in_r_array mos_lapl_in_r_array + +! do ipoint = 1, n_points_final_grid +! r(:) = final_grid_points(:,ipoint) +! print*, r +! enddo +double precision :: accu_vgl(5) +double precision :: accu_vgl_nrm(5) + + do ipoint = 1, n_points_final_grid + r(1) = final_grid_points(1,i) + r(2) = final_grid_points(2,i) + r(3) = final_grid_points(3,i) + write(1111, '(5(f15.7, 3X))') r + do i = 1, mo_num + mo_val = mos_in_r_array (i,ipoint) + mo_der(:) = mos_grad_in_r_array(i,ipoint,:) + mo_lap = mos_lapl_in_r_array(i,ipoint,1) + mos_lapl_in_r_array(i,ipoint,2) + mos_lapl_in_r_array(i,ipoint,3) + write(1111, '(5(f15.7, 3X))') mo_val, mo_der, mo_lap + enddo + enddo + + do ipoint = 1, n_points_final_grid + r(1) = final_grid_points(1,i) + r(2) = final_grid_points(2,i) + r(3) = final_grid_points(3,i) + write(2222, '(5(f15.7, 3X))') r + do i = 1, mo_num + mo_val = mos_in_r_array_qmckl (i,ipoint) + mo_der(:) = mos_grad_in_r_array_qmckl(i,ipoint,:) + mo_lap = mos_lapl_in_r_array_qmckl(i,ipoint) + write(2222, '(5(f15.7, 3X))') mo_val, mo_der, mo_lap + enddo + enddo + + accu_vgl = 0.d0 + accu_vgl_nrm = 0.d0 + do ipoint = 1, n_points_final_grid + do i = 1, mo_num + mo_val = mos_in_r_array (i,ipoint) + mo_der(:) = mos_grad_in_r_array(i,ipoint,:) + mo_lap = mos_lapl_in_r_array(i,ipoint,1) + mos_lapl_in_r_array(i,ipoint,2) + mos_lapl_in_r_array(i,ipoint,3) + accu_vgl_nrm(1) += dabs(mo_val) + accu_vgl_nrm(2) += dabs(mo_der(1)) + accu_vgl_nrm(3) += dabs(mo_der(2)) + accu_vgl_nrm(4) += dabs(mo_der(3)) + accu_vgl_nrm(5) += dabs(mo_lap) + + mo_val -= mos_in_r_array_qmckl (i,ipoint) + mo_der(:) -= mos_grad_in_r_array_qmckl(i,ipoint,:) + mo_lap -= mos_lapl_in_r_array_qmckl(i,ipoint) + accu_vgl(1) += dabs(mo_val) + accu_vgl(2) += dabs(mo_der(1)) + accu_vgl(3) += dabs(mo_der(2)) + accu_vgl(4) += dabs(mo_der(3)) + accu_vgl(5) += dabs(mo_lap) + enddo + + enddo + accu_vgl(:) *= 1.d0 / accu_vgl_nrm(:) + print *, accu_vgl + + return +end + +! --- + diff --git a/plugins/local/non_h_ints_mu/qmckl.irp.f b/plugins/local/non_h_ints_mu/qmckl.irp.f index 4d419e24..de440f14 100644 --- a/plugins/local/non_h_ints_mu/qmckl.irp.f +++ b/plugins/local/non_h_ints_mu/qmckl.irp.f @@ -158,7 +158,7 @@ END_PROVIDER double precision, allocatable :: vgl(:,:,:) allocate( vgl(mo_num,5,n_points_final_grid)) - rc = qmckl_get_mo_basis_mo_vgl_inplace(qmckl_ctx, vgl, n_points_final_grid*mo_num*5_8) + rc = qmckl_get_mo_basis_mo_vgl(qmckl_ctx, vgl, n_points_final_grid*mo_num*5_8) if (rc /= QMCKL_SUCCESS) then print *, irp_here, 'qmckl error in get_mo_vgl' rc = qmckl_check(qmckl_ctx, rc) diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 1093c59d..485382e2 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -217,11 +217,14 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ print '(A)', ' ======================= ============== ==========' + double precision :: t_error call set_multiple_levels_omp(.False.) call wall_time(t00) imin = 1_8 - !$OMP PARALLEL & - !$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample) & + t_error = huge(1.d0) + + !$OMP PARALLEL & + !$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample) & !$OMP DEFAULT(SHARED) NUM_THREADS(nthreads_pt2) do kiter=1,Nabc @@ -328,15 +331,23 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ if (norm > 0.d0) then energy_stoch = ET / norm variance = ET2 / norm - energy_stoch*energy_stoch + if (norm > 1.d0) then + t_error = dsqrt(variance/(norm-1.d0)) + else + t_error = dsqrt(variance) + endif endif energy = energy_det + energy_stoch - print '('' '',F20.8, '' '', ES12.4,'' '', F8.2,'' '')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) + print '('' '',F20.8, '' '', ES12.4,'' '', F8.2,'' '')', eccsd+energy, t_error, 100.*real(Ncomputed)/real(Nabc) + endif !$OMP END MASTER + if (t_error < cc_par_t_stop) exit if (imin > Nabc) exit enddo + !$OMP TASKWAIT !$OMP END PARALLEL print '(A)', ' ======================= ============== ========== ' diff --git a/src/utils_cc/EZFIO.cfg b/src/utils_cc/EZFIO.cfg index fb6d9034..7d50d66a 100644 --- a/src/utils_cc/EZFIO.cfg +++ b/src/utils_cc/EZFIO.cfg @@ -58,6 +58,12 @@ doc: If true, the CCSD(T) will be computed. interface: ezfio,ocaml,provider default: False +[cc_par_t_stop] +type: double precision +doc: Stops the calculation when the statistical error bar is below the given value. +interface: ezfio,ocaml,provider +default: 1.e-5 + [cc_dev] type: logical doc: Only for dev purposes. From ed8cfdc599bdbeb6b66fd444cef6dc0ac2756d55 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 May 2024 12:58:07 +0200 Subject: [PATCH 30/64] Added print of the error bar in (T) --- src/ccsd/ccsd_space_orb_sub.irp.f | 14 ++++++++------ src/ccsd/ccsd_t_space_orb_stoch.irp.f | 5 ++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index b48ca7da..b8cfab2a 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -192,7 +192,7 @@ subroutine run_ccsd_space_orb deallocate(H_vv,H_oo,H_vo,r1,r2,tau) ! CCSD(T) - double precision :: e_t + double precision :: e_t, e_t_err e_t = 0.d0 if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then @@ -210,22 +210,24 @@ subroutine run_ccsd_space_orb !print*,'' ! New + e_t = uncorr_energy + energy ! For print in (T) call + e_t_err = 0.d0 + print*,'Computing (T) correction...' call wall_time(ta) ! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & ! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) - e_t = uncorr_energy + energy ! For print in next call call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & - ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t, e_t_err) call wall_time(tb) print*,'Time: ',tb-ta, ' s' print*,'' - write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' - write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' - write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + write(*,'(A15,F18.12,A7,F18.12)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha +/- ', e_t_err + write(*,'(A15,F18.12,A7,F18.12)') ' E(T) = ', e_t, ' Ha +/- ', e_t_err + write(*,'(A15,F18.12,A7,F18.12)') ' Correlation = ', energy + e_t, ' Ha +/- ', e_t_err print*,'' endif diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 485382e2..851b6a9f 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -1,5 +1,5 @@ ! Main -subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) +subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy,t_error) implicit none @@ -7,7 +7,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) double precision, intent(in) :: t2(nO,nO,nV,nV) double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) - double precision, intent(inout) :: energy + double precision, intent(inout) :: energy, t_error double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) @@ -217,7 +217,6 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ print '(A)', ' ======================= ============== ==========' - double precision :: t_error call set_multiple_levels_omp(.False.) call wall_time(t00) imin = 1_8 From b773a361b204a1f8424ae26bc2ac1a1a9d424a9f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 31 May 2024 20:07:29 +0200 Subject: [PATCH 31/64] fixed uninitialized variable in cholesky' --- src/ao_two_e_ints/cholesky.irp.f | 38 ++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 5fbd166c..09d86679 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -22,6 +22,9 @@ END_PROVIDER ! = (ik|jl) = sum_a (ik|a).(a|jl) ! ! Last dimension of cholesky_ao is cholesky_ao_num + ! + ! https://mogp-emulator.readthedocs.io/en/latest/methods/proc/ProcPivotedCholesky.html + ! https://doi.org/10.1016/j.apnum.2011.10.001 : Page 4, Algorithm 1 END_DOC integer :: rank, ndim @@ -86,20 +89,25 @@ END_PROVIDER call print_memory_usage() allocate(L(ndim,1)) +!print *, 'allocate : (L(ndim,1))', memory_of_double(ndim) print *, '' print *, 'Cholesky decomposition of AO integrals' print *, '======================================' print *, '' - print *, '============ =============' - print *, ' Rank Threshold' - print *, '============ =============' + print *, '============ ============ =============' + print *, ' Rank Block size Threshold' + print *, '============ ============ =============' rank = 0 allocate( D(ndim), Lset(ndim), Dset(ndim) ) allocate( addr(3,ndim) ) +!print *, 'allocate : (D(ndim))', memory_of_int(ndim) +!print *, 'allocate : (Lset(ndim))', memory_of_int(ndim) +!print *, 'allocate : (Dset(ndim))', memory_of_int(ndim) +!print *, 'allocate : (3,addr(ndim))', memory_of_int(3*ndim) ! 1. k=0 @@ -151,9 +159,10 @@ END_PROVIDER ! a. i = i+1 - s = 0.01d0 ! Inrease s until the arrays fit in memory + s = 0.01d0 + block_size = max(N,24) do while (.True.) ! b. @@ -168,6 +177,7 @@ END_PROVIDER endif enddo + call total_memory(mem) mem = mem & + np*memory_of_double(nq) &! Delta(np,nq) @@ -176,23 +186,28 @@ END_PROVIDER if (mem > qp_max_mem) then s = s*2.d0 + block_size = block_size / 2 else exit endif if ((s > 1.d0).or.(nq == 0)) then call print_memory_usage() - print *, 'Not enough memory. Reduce cholesky threshold' + print *, 'Required peak memory: ', mem, 'Gb' + call total_memory(mem) + print *, 'Already used memory: ', mem, 'Gb' + print *, 'Not enough memory. Reduce cholesky threshold' stop -1 endif enddo ! d., e. - block_size = max(N,24) L_old => L allocate(L(ndim,rank+nq), stat=ierr) +!print *, 'allocate : L(ndim,rank+nq)', memory_of_double(ndim*(rank+nq)) + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' @@ -210,6 +225,8 @@ END_PROVIDER deallocate(L_old) allocate(Delta(np,nq), stat=ierr) +!print *, 'allocate : Delta(np,nq)', memory_of_double(np*nq) + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Delta(np,nq))' @@ -217,6 +234,8 @@ END_PROVIDER endif allocate(Ltmp_p(np,block_size), stat=ierr) +!print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double(np*block_size) + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' @@ -224,6 +243,8 @@ END_PROVIDER endif allocate(Ltmp_q(nq,block_size), stat=ierr) +!print *, 'allocate : Ltmp_q(nq,block_size)', memory_of_double(nq*block_size) + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' @@ -232,6 +253,7 @@ END_PROVIDER allocate(computed(nq)) +!print *, 'allocate : computed(nq)', memory_of_int(nq) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) @@ -353,7 +375,7 @@ END_PROVIDER enddo - print '(I10, 4X, ES12.3)', rank, Qmax + print '(I10, 4X, I10, 4X, ES12.3)', rank, block_size, Qmax deallocate(computed) deallocate(Delta) @@ -380,6 +402,8 @@ END_PROVIDER enddo allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) +!print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', ao_num*ao_num*(rank*1_8) * 8_8 / 1024_8**3 + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': Allocation failed' From 0dca6cfde4ba658518637c2f42ecb2e45c04de6a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 31 May 2024 20:20:04 +0200 Subject: [PATCH 32/64] block size in cholesky --- src/ao_two_e_ints/cholesky.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 09d86679..5a44571c 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -186,7 +186,7 @@ END_PROVIDER if (mem > qp_max_mem) then s = s*2.d0 - block_size = block_size / 2 + block_size = max(block_size / 2, 1) else exit endif From 0a3d462510a6146da21770ff6fff2f7a1794a0fb Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 31 May 2024 20:30:48 +0200 Subject: [PATCH 33/64] Clean up openmp in cholesky --- src/ao_two_e_ints/cholesky.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 5a44571c..cfd57050 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -255,7 +255,7 @@ END_PROVIDER allocate(computed(nq)) !print *, 'allocate : computed(nq)', memory_of_int(nq) - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q,j) !$OMP DO do q=1,nq @@ -351,7 +351,7 @@ END_PROVIDER ! iii. f = 1.d0/dsqrt(Qmax) - !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) + !$OMP PARALLEL PRIVATE(p,q) DEFAULT(shared) !$OMP DO do p=1,np Ltmp_p(p,iblock) = Ltmp_p(p,iblock) * f From b743201efe692294db887f175dceb02a81f73422 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 31 May 2024 20:50:30 +0200 Subject: [PATCH 34/64] Use integer*8 in cholesky --- src/ao_two_e_ints/cholesky.irp.f | 152 ++++++++++++++++++------------- src/utils/memory.irp.f | 20 ++++ 2 files changed, 109 insertions(+), 63 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index cfd57050..daa29079 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -27,19 +27,22 @@ END_PROVIDER ! https://doi.org/10.1016/j.apnum.2011.10.001 : Page 4, Algorithm 1 END_DOC - integer :: rank, ndim + integer*8 :: ndim8 + integer :: rank double precision :: tau double precision, pointer :: L(:,:), L_old(:,:) double precision :: s - double precision, parameter :: dscale = 1.d0 + double precision :: dscale double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) - integer, allocatable :: Lset(:), Dset(:), addr(:,:) + integer, allocatable :: addr1(:,:), addr2(:,:) + integer**, allocatable :: Lset(:), Dset(:), addr3(:,:) logical, allocatable :: computed(:) - integer :: i,j,k,m,p,q, qj, dj, p2, q2 + integer :: i,j,k,m,p,q, dj, p2, q2 + integer*8 :: i8, j8, p8, qj8 integer :: N, np, nq double precision :: Dmax, Dmin, Qmax, f @@ -47,15 +50,15 @@ END_PROVIDER logical, external :: ao_two_e_integral_zero double precision, external :: ao_two_e_integral - integer :: block_size, iblock, ierr + integer :: block_size, iblock double precision :: mem - double precision, external :: memory_of_double, memory_of_int + double precision, external :: memory_of_double8, memory_of_int8 integer, external :: getUnitAndOpen - integer :: iunit + integer :: iunit, ierr - ndim = ao_num*ao_num + ndim8 = ao_num*ao_num*1_8 deallocate(cholesky_ao) if (read_ao_cholesky) then @@ -83,13 +86,13 @@ END_PROVIDER tau = ao_cholesky_threshold - mem = 6.d0 * memory_of_double(ndim) + 6.d0 * memory_of_int(ndim) + mem = 6.d0 * memory_of_double8(ndim8) + 6.d0 * memory_of_int8(ndim8) call check_mem(mem, irp_here) call print_memory_usage() - allocate(L(ndim,1)) -!print *, 'allocate : (L(ndim,1))', memory_of_double(ndim) + allocate(L(ndim8,1)) +print *, 'allocate : (L(ndim8,1))', memory_of_double8(ndim8) print *, '' print *, 'Cholesky decomposition of AO integrals' @@ -102,36 +105,36 @@ END_PROVIDER rank = 0 - allocate( D(ndim), Lset(ndim), Dset(ndim) ) - allocate( addr(3,ndim) ) -!print *, 'allocate : (D(ndim))', memory_of_int(ndim) -!print *, 'allocate : (Lset(ndim))', memory_of_int(ndim) -!print *, 'allocate : (Dset(ndim))', memory_of_int(ndim) -!print *, 'allocate : (3,addr(ndim))', memory_of_int(3*ndim) + allocate( D(ndim8), Lset(ndim8), Dset(ndim8) ) + allocate( addr1(ndim8), addr2(ndim8), addr3(ndim8), ) +print *, 'allocate : (D(ndim8))', memory_of_int8(ndim8) +print *, 'allocate : (Lset(ndim8))', memory_of_int8(ndim8) +print *, 'allocate : (Dset(ndim8))', memory_of_int8(ndim8) +print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) ! 1. k=0 do j=1,ao_num do i=1,ao_num k = k+1 - addr(1,k) = i - addr(2,k) = j - addr(3,k) = (i-1)*ao_num + j + addr1(k) = i + addr2(k) = j + addr3(k) = (i-1)*ao_num + j enddo enddo if (do_direct_integrals) then - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) - do i=1,ndim - D(i) = ao_two_e_integral(addr(1,i), addr(2,i), & - addr(1,i), addr(2,i)) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(guided) + do i8=1,ndim8 + D(i8) = ao_two_e_integral(addr1(i8), addr2(i8), & + addr1(i8), addr2(i8)) enddo !$OMP END PARALLEL DO else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) - do i=1,ndim - D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & - addr(2,i), addr(2,i), & + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(guided) + do i8=1,ndim8 + D(i8) = get_ao_two_e_integral(addr1(i8), addr1(i8), & + addr2(i8), addr2(i8), & ao_integrals_map) enddo !$OMP END PARALLEL DO @@ -140,12 +143,21 @@ END_PROVIDER Dmax = maxval(D) ! 2. - np=0 - do p=1,ndim - if ( dscale*dscale*Dmax*D(p) > tau*tau ) then - np = np+1 - Lset(np) = p - endif + np = huge(1_4) + dscale = 1.d0 + do while (np == huge(1_4)) + np=0 + do p8=1,ndim8 + if ( dscale*dscale*Dmax*D(p8) > tau*tau ) then + np = np+1 + Lset(np) = p8 + if (np == huge(1_4)) then + ! Overflow detected + dscale = dscale*0.5d0 + exit + endif + endif + enddo enddo ! 3. @@ -155,7 +167,7 @@ END_PROVIDER i = 0 ! 5. - do while ( (Dmax > tau).and.(rank < ndim) ) + do while ( (Dmax > tau).and.(rank < min(ndim8,huge(1_4)) ) ! a. i = i+1 @@ -181,7 +193,8 @@ END_PROVIDER call total_memory(mem) mem = mem & + np*memory_of_double(nq) &! Delta(np,nq) - + (rank+nq)* memory_of_double(ndim) &! L(ndim,rank+nq) + + (rank+nq)* memory_of_double8(ndim8) +&! L(ndim8,rank+nq) + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) if (mem > qp_max_mem) then @@ -205,19 +218,19 @@ END_PROVIDER ! d., e. L_old => L - allocate(L(ndim,rank+nq), stat=ierr) -!print *, 'allocate : L(ndim,rank+nq)', memory_of_double(ndim*(rank+nq)) + allocate(L(ndim8,rank+nq), stat=ierr) +print *, 'allocate : L(ndim8,rank+nq)', memory_of_double8(ndim8*(rank+nq)) if (ierr /= 0) then call print_memory_usage() - print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' + print *, irp_here, ': allocation failed : (L(ndim8,rank+nq))' stop -1 endif !$OMP PARALLEL DO PRIVATE(k,j) do k=1,rank - do j=1,ndim - L(j,k) = L_old(j,k) + do j8=1,ndim8 + L(j8,k) = L_old(j8,k) enddo enddo !$OMP END PARALLEL DO @@ -225,7 +238,7 @@ END_PROVIDER deallocate(L_old) allocate(Delta(np,nq), stat=ierr) -!print *, 'allocate : Delta(np,nq)', memory_of_double(np*nq) +print *, 'allocate : Delta(np,nq)', memory_of_double8(np*nq*1_8) if (ierr /= 0) then call print_memory_usage() @@ -234,7 +247,7 @@ END_PROVIDER endif allocate(Ltmp_p(np,block_size), stat=ierr) -!print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double(np*block_size) +print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double8(np*block_size*1_8) if (ierr /= 0) then call print_memory_usage() @@ -243,7 +256,7 @@ END_PROVIDER endif allocate(Ltmp_q(nq,block_size), stat=ierr) -!print *, 'allocate : Ltmp_q(nq,block_size)', memory_of_double(nq*block_size) +print *, 'allocate : Ltmp_q(nq,block_size)', memory_of_double8(nq*block_size*1_8) if (ierr /= 0) then call print_memory_usage() @@ -253,8 +266,9 @@ END_PROVIDER allocate(computed(nq)) -!print *, 'allocate : computed(nq)', memory_of_int(nq) +print *, 'allocate : computed(nq)', memory_of_int(nq) +print *, 'p1' !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q,j) !$OMP DO @@ -296,7 +310,7 @@ END_PROVIDER iblock = 0 do j=1,nq - if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit + if ( (Qmax <= Dmin).or.(N+j*1_8 > ndim8) ) exit ! i. rank = N+j @@ -308,28 +322,28 @@ END_PROVIDER ! ii. do dj=1,nq - qj = Dset(dj) - if (D(qj) == Qmax) then + qj8 = Dset(dj) + if (D(qj8) == Qmax) then exit endif enddo - L(1:ndim, rank) = 0.d0 + L(1:ndim8, rank) = 0.d0 if (.not.computed(dj)) then m = dj !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(guided) do k=np,1,-1 - if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)),& - addr(2,Lset(k)), addr(2,Dset(m)) ) ) then + if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)) ) ) then if (do_direct_integrals) then Delta(k,m) = Delta(k,m) + & - ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)),& - addr(1,Dset(m)), addr(2,Dset(m))) + ao_two_e_integral(addr1(Lset(k)), addr2(Lset(k)),& + addr1(Dset(m)), addr2(Dset(m))) else Delta(k,m) = Delta(k,m) + & - get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)),& - addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + get_ao_two_e_integral( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)), ao_integrals_map) endif endif enddo @@ -391,18 +405,28 @@ END_PROVIDER Dmax = max(Dmax, D(Lset(p))) enddo - np=0 - do p=1,ndim - if ( dscale*dscale*Dmax*D(p) > tau*tau ) then - np = np+1 - Lset(np) = p - endif + np = huge(1_4) + dscale = 1.d0 + do while (np == huge(1_4)) + np=0 + do p8=1,ndim8 + if ( dscale*dscale*Dmax*D(p8) > tau*tau ) then + np = np+1 + Lset(np) = p8 + if (np == huge(1_4)) then + ! Overflow detected + dscale = dscale*0.5d0 + exit + endif + endif + enddo enddo + enddo allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) -!print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', ao_num*ao_num*(rank*1_8) * 8_8 / 1024_8**3 +print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', memory_of_double8(ao_num*ao_num*rank*1_8) if (ierr /= 0) then call print_memory_usage() @@ -411,7 +435,9 @@ END_PROVIDER endif !$OMP PARALLEL DO PRIVATE(k) do k=1,rank - call dcopy(ndim, L(1,k), 1, cholesky_ao(1,1,k), 1) + do j=1,ao_num + call dcopy(ao_num, L((j-1)*ao_num+1,k), 1, cholesky_ao(1,j,k), 1) + enddo enddo !$OMP END PARALLEL DO deallocate(L) diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index e69bf71e..043562db 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -79,6 +79,26 @@ IRP_ENDIF call unlock_io() end function +double precision function memory_of_double8(n) + implicit none + BEGIN_DOC +! Computes the memory required for n double precision elements in gigabytes. + END_DOC + integer*8, intent(in) :: n + double precision, parameter :: f = 8.d0 / (1024.d0*1024.d0*1024.d0) + memory_of_double8 = dble(n) * f +end function + +double precision function memory_of_int8(n) + implicit none + BEGIN_DOC +! Computes the memory required for n double precision elements in gigabytes. + END_DOC + integer*8, intent(in) :: n + double precision, parameter :: f = 4.d0 / (1024.d0*1024.d0*1024.d0) + memory_of_int8 = dble(n) * f +end function + double precision function memory_of_double(n) implicit none BEGIN_DOC From 38d386d36c78ff87a6c1062d065ab2b12fa4dcc9 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 2 Jun 2024 19:03:05 +0200 Subject: [PATCH 35/64] Debug cholesky --- config/gfortran_debug_mkl.cfg | 63 ++++++++++++ src/ao_two_e_ints/cholesky.irp.f | 169 +++++++++++++++++++------------ 2 files changed, 166 insertions(+), 66 deletions(-) create mode 100644 config/gfortran_debug_mkl.cfg diff --git a/config/gfortran_debug_mkl.cfg b/config/gfortran_debug_mkl.cfg new file mode 100644 index 00000000..1dc3f2b2 --- /dev/null +++ b/config/gfortran_debug_mkl.cfg @@ -0,0 +1,63 @@ +# Common flags +############## +# +# -ffree-line-length-none : Needed for IRPF90 which produces long lines +# -lblas -llapack : Link with libblas and liblapack libraries provided by the system +# -I . : Include the curent directory (Mandatory) +# +# --ninja : Allow the utilisation of ninja. (Mandatory) +# --align=32 : Align all provided arrays on a 32-byte boundary +# +# +[COMMON] +FC : gfortran -g -ffree-line-length-none -I . -fPIC -std=legacy +LAPACK_LIB : -I${MKLROOT}/include -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_core -lpthread -lm -ldl -lmkl_gnu_thread -lgomp -fopenmp +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -Ofast : Disregard strict standards compliance. Enables all -O3 optimizations. +# It also enables optimizations that are not valid +# for all standard-compliant programs. It turns on +# -ffast-math and the Fortran-specific +# -fno-protect-parens and -fstack-arrays. +[OPT] +FCFLAGS : -Ofast + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -Ofast + +# Debugging flags +################# +# +# -fcheck=all : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# +[DEBUG] +#FCFLAGS : -g -msse4.2 -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant -Wuninitialized -fbacktrace -ffpe-trap=zero,overflow,underflow -finit-real=nan +FCFLAGS : -g -mavx -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant -Wuninitialized -fbacktrace -ffpe-trap=zero,overflow -finit-real=nan + +# OpenMP flags +################# +# +[OPENMP] +FC : -fopenmp +IRPF90_FLAGS : --openmp + diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index daa29079..3d0baa48 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -29,21 +29,20 @@ END_PROVIDER integer*8 :: ndim8 integer :: rank - double precision :: tau + double precision :: tau, tau2 double precision, pointer :: L(:,:), L_old(:,:) - double precision :: s - double precision :: dscale + double precision :: dscale, dscale_tmp double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) - integer, allocatable :: addr1(:,:), addr2(:,:) - integer**, allocatable :: Lset(:), Dset(:), addr3(:,:) + integer, allocatable :: addr1(:), addr2(:) + integer*8, allocatable :: Lset(:), Dset(:), addr3(:) logical, allocatable :: computed(:) integer :: i,j,k,m,p,q, dj, p2, q2 integer*8 :: i8, j8, p8, qj8 - integer :: N, np, nq + integer :: N, np, nq, npmax double precision :: Dmax, Dmin, Qmax, f double precision, external :: get_ao_two_e_integral @@ -53,6 +52,7 @@ END_PROVIDER integer :: block_size, iblock double precision :: mem + double precision, external :: memory_of_double, memory_of_int double precision, external :: memory_of_double8, memory_of_int8 integer, external :: getUnitAndOpen @@ -61,6 +61,9 @@ END_PROVIDER ndim8 = ao_num*ao_num*1_8 deallocate(cholesky_ao) + +! TODO : Save L() to disk + if (read_ao_cholesky) then print *, 'Reading Cholesky vectors from disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'R') @@ -85,6 +88,7 @@ END_PROVIDER endif tau = ao_cholesky_threshold + tau2 = tau*tau mem = 6.d0 * memory_of_double8(ndim8) + 6.d0 * memory_of_int8(ndim8) call check_mem(mem, irp_here) @@ -92,25 +96,25 @@ END_PROVIDER call print_memory_usage() allocate(L(ndim8,1)) -print *, 'allocate : (L(ndim8,1))', memory_of_double8(ndim8) +!print *, 'allocate : (L(ndim8,1))', memory_of_double8(ndim8) print *, '' print *, 'Cholesky decomposition of AO integrals' print *, '======================================' print *, '' - print *, '============ ============ =============' - print *, ' Rank Block size Threshold' - print *, '============ ============ =============' + print *, '============ =============' + print *, ' Rank Threshold' + print *, '============ =============' rank = 0 allocate( D(ndim8), Lset(ndim8), Dset(ndim8) ) - allocate( addr1(ndim8), addr2(ndim8), addr3(ndim8), ) -print *, 'allocate : (D(ndim8))', memory_of_int8(ndim8) -print *, 'allocate : (Lset(ndim8))', memory_of_int8(ndim8) -print *, 'allocate : (Dset(ndim8))', memory_of_int8(ndim8) -print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) + allocate( addr1(ndim8), addr2(ndim8), addr3(ndim8) ) +!print *, 'allocate : (D(ndim8))', memory_of_int8(ndim8) +!print *, 'allocate : (Lset(ndim8))', memory_of_int8(ndim8) +!print *, 'allocate : (Dset(ndim8))', memory_of_int8(ndim8) +!print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) ! 1. k=0 @@ -124,14 +128,14 @@ print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) enddo if (do_direct_integrals) then - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(guided) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,16) do i8=1,ndim8 D(i8) = ao_two_e_integral(addr1(i8), addr2(i8), & addr1(i8), addr2(i8)) enddo !$OMP END PARALLEL DO else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(guided) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,16) do i8=1,ndim8 D(i8) = get_ao_two_e_integral(addr1(i8), addr1(i8), & addr2(i8), addr2(i8), & @@ -143,17 +147,22 @@ print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) Dmax = maxval(D) ! 2. - np = huge(1_4) + npmax = huge(1_4)*1_8 + np = npmax dscale = 1.d0 - do while (np == huge(1_4)) + dscale_tmp = Dmax + do while (np == npmax) np=0 do p8=1,ndim8 - if ( dscale*dscale*Dmax*D(p8) > tau*tau ) then + if ( dscale_tmp*D(p8) > tau2 ) then np = np+1 Lset(np) = p8 - if (np == huge(1_4)) then + if (np == npmax) then ! Overflow detected - dscale = dscale*0.5d0 + dscale = dscale*0.1d0 + dscale_tmp = dscale*dscale*Dmax +!print *, 'Overflow detected ' +!print *, 'dscale = ', dscale exit endif endif @@ -167,7 +176,7 @@ print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) i = 0 ! 5. - do while ( (Dmax > tau).and.(rank < min(ndim8,huge(1_4)) ) + do while ( (Dmax > tau).and.(rank < min(ndim8,huge(1_4))) ) ! a. i = i+1 @@ -191,15 +200,13 @@ print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) call total_memory(mem) - mem = mem & - + np*memory_of_double(nq) &! Delta(np,nq) - + (rank+nq)* memory_of_double8(ndim8) -&! L(ndim8,rank+nq) - + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) + mem = mem & + + np*memory_of_double(nq) &! Delta(np,nq) + + (rank+nq)* memory_of_double8(ndim8) &! L(ndim8,rank+nq) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) if (mem > qp_max_mem) then s = s*2.d0 - block_size = max(block_size / 2, 1) else exit endif @@ -219,7 +226,7 @@ print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) L_old => L allocate(L(ndim8,rank+nq), stat=ierr) -print *, 'allocate : L(ndim8,rank+nq)', memory_of_double8(ndim8*(rank+nq)) +!print *, 'allocate : L(ndim8,rank+nq)', memory_of_double8(ndim8*(rank+nq)) if (ierr /= 0) then call print_memory_usage() @@ -227,7 +234,7 @@ print *, 'allocate : L(ndim8,rank+nq)', memory_of_double8(ndim8*(rank+nq)) stop -1 endif - !$OMP PARALLEL DO PRIVATE(k,j) + !$OMP PARALLEL DO PRIVATE(k,j8) do k=1,rank do j8=1,ndim8 L(j8,k) = L_old(j8,k) @@ -238,7 +245,7 @@ print *, 'allocate : L(ndim8,rank+nq)', memory_of_double8(ndim8*(rank+nq)) deallocate(L_old) allocate(Delta(np,nq), stat=ierr) -print *, 'allocate : Delta(np,nq)', memory_of_double8(np*nq*1_8) +!print *, 'allocate : Delta(np,nq)', memory_of_double8(np*nq*1_8) if (ierr /= 0) then call print_memory_usage() @@ -247,7 +254,7 @@ print *, 'allocate : Delta(np,nq)', memory_of_double8(np*nq*1_8) endif allocate(Ltmp_p(np,block_size), stat=ierr) -print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double8(np*block_size*1_8) +!print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double8(np*block_size*1_8), np, block_size if (ierr /= 0) then call print_memory_usage() @@ -256,7 +263,7 @@ print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double8(np*block_size*1_8 endif allocate(Ltmp_q(nq,block_size), stat=ierr) -print *, 'allocate : Ltmp_q(nq,block_size)', memory_of_double8(nq*block_size*1_8) +!print *, 'allocate : Ltmp_q(nq,block_size)', memory_of_double8(nq*block_size*1_8), nq, block_size if (ierr /= 0) then call print_memory_usage() @@ -266,34 +273,47 @@ print *, 'allocate : Ltmp_q(nq,block_size)', memory_of_double8(nq*block_size*1_8 allocate(computed(nq)) -print *, 'allocate : computed(nq)', memory_of_int(nq) +!print *, 'allocate : computed(nq)', memory_of_int(nq) -print *, 'p1' +!print *, 'N, rank, block_size', N, rank, block_size +!print *, 'p1' !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q,j) +!print *, 'computed' + !$OMP DO + do q=1,nq + computed(q) = .False. + enddo + !$OMP ENDDO NOWAIT + +!print *, 'Delta' !$OMP DO do q=1,nq do j=1,np Delta(j,q) = 0.d0 enddo - computed(q) = .False. enddo !$OMP ENDDO NOWAIT - !$OMP DO +!print *, 'Ltmp_p' do k=1,N + !$OMP DO do p=1,np Ltmp_p(p,k) = L(Lset(p),k) enddo + !$OMP END DO NOWAIT + + !$OMP DO do q=1,nq Ltmp_q(q,k) = L(Dset(q),k) enddo + !$OMP END DO NOWAIT enddo - !$OMP END DO NOWAIT !$OMP BARRIER !$OMP END PARALLEL +!print *, 'p2', np, nq, N if (N>0) then call dgemm('N','T', np, nq, N, -1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) @@ -315,8 +335,10 @@ print *, 'p1' rank = N+j if (iblock == block_size) then +!print *, 'dgemm' call dgemm('N','T',np,nq,block_size,-1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + iblock = 0 endif @@ -328,36 +350,47 @@ print *, 'p1' endif enddo - L(1:ndim8, rank) = 0.d0 + do i8=1,ndim8 + L(i8, rank) = 0.d0 + enddo if (.not.computed(dj)) then m = dj - !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(guided) - do k=np,1,-1 - if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& - addr2(Lset(k)), addr2(Dset(m)) ) ) then - if (do_direct_integrals) then - Delta(k,m) = Delta(k,m) + & - ao_two_e_integral(addr1(Lset(k)), addr2(Lset(k)),& - addr1(Dset(m)), addr2(Dset(m))) - else - Delta(k,m) = Delta(k,m) + & - get_ao_two_e_integral( addr1(Lset(k)), addr1(Dset(m)),& - addr2(Lset(k)), addr2(Dset(m)), ao_integrals_map) - endif - endif - enddo - !$OMP END PARALLEL DO + if (do_direct_integrals) then + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,16) + do k=np,1,-1 + if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)) ) ) then + Delta(k,m) = Delta(k,m) + & + ao_two_e_integral(addr1(Lset(k)), addr2(Lset(k)),& + addr1(Dset(m)), addr2(Dset(m))) + endif + enddo + !$OMP END PARALLEL DO + else + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,16) + do k=np,1,-1 + if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)) ) ) then + Delta(k,m) = Delta(k,m) + & + get_ao_two_e_integral( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)), ao_integrals_map) + endif + enddo + !$OMP END PARALLEL DO + endif computed(dj) = .True. endif iblock = iblock+1 +!print *, iblock do p=1,np Ltmp_p(p,iblock) = Delta(p,dj) enddo ! iv. if (iblock > 1) then +!print *, 'dgemv', iblock call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0,& Ltmp_p(1,iblock), 1) endif @@ -365,6 +398,7 @@ print *, 'p1' ! iii. f = 1.d0/dsqrt(Qmax) +!print *, 'p4' !$OMP PARALLEL PRIVATE(p,q) DEFAULT(shared) !$OMP DO do p=1,np @@ -379,7 +413,6 @@ print *, 'p1' Ltmp_q(q,iblock) = L(Dset(q), rank) enddo !$OMP END DO - !$OMP END PARALLEL Qmax = D(Dset(1)) @@ -389,12 +422,12 @@ print *, 'p1' enddo - print '(I10, 4X, I10, 4X, ES12.3)', rank, block_size, Qmax + print '(I10, 4X, ES12.3)', rank, Qmax - deallocate(computed) - deallocate(Delta) deallocate(Ltmp_p) deallocate(Ltmp_q) + deallocate(computed) + deallocate(Delta) ! i. N = rank @@ -405,17 +438,21 @@ print *, 'p1' Dmax = max(Dmax, D(Lset(p))) enddo - np = huge(1_4) + np = npmax dscale = 1.d0 - do while (np == huge(1_4)) + dscale_tmp = Dmax + do while (np == npmax) np=0 do p8=1,ndim8 - if ( dscale*dscale*Dmax*D(p8) > tau*tau ) then + if ( dscale_tmp*D(p8) > tau2 ) then np = np+1 Lset(np) = p8 - if (np == huge(1_4)) then + if (np == npmax) then ! Overflow detected dscale = dscale*0.5d0 + dscale_tmp = dscale*dscale*Dmax +!print *, 'Overflow detected ' +!print *, 'dscale = ', dscale exit endif endif @@ -426,7 +463,7 @@ print *, 'p1' enddo allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) -print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', memory_of_double8(ao_num*ao_num*rank*1_8) +!print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', memory_of_double8(ao_num*ao_num*rank*1_8) if (ierr /= 0) then call print_memory_usage() @@ -436,7 +473,7 @@ print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', memory_of_double8(ao_num* !$OMP PARALLEL DO PRIVATE(k) do k=1,rank do j=1,ao_num - call dcopy(ao_num, L((j-1)*ao_num+1,k), 1, cholesky_ao(1,j,k), 1) + cholesky_ao(1:ao_num,j,k) = L((j-1)*ao_num+1:j*ao_num,k) enddo enddo !$OMP END PARALLEL DO From ff59e9efcc2ab1e4ffaa352e86bbb14491ce1531 Mon Sep 17 00:00:00 2001 From: eginer Date: Sun, 2 Jun 2024 19:16:56 +0200 Subject: [PATCH 36/64] added print in src/ao_two_e_ints/cholesky.irp.f --- src/ao_two_e_ints/cholesky.irp.f | 4 ++++ src/mu_of_r/f_hf_cholesky.irp.f | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 5fbd166c..1d8b8948 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -51,7 +51,9 @@ END_PROVIDER integer, external :: getUnitAndOpen integer :: iunit + double precision :: wall0,wall1 + call wall_time(wall0) ndim = ao_num*ao_num deallocate(cholesky_ao) @@ -409,6 +411,8 @@ END_PROVIDER print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' print *, '' + call wall_time(wall1) + print*,'Time to provide AO cholesky vectors = ',wall1-wall0 END_PROVIDER diff --git a/src/mu_of_r/f_hf_cholesky.irp.f b/src/mu_of_r/f_hf_cholesky.irp.f index 472abb1c..5dd69eb6 100644 --- a/src/mu_of_r/f_hf_cholesky.irp.f +++ b/src/mu_of_r/f_hf_cholesky.irp.f @@ -220,10 +220,10 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse, (n_points_final_grid)] if(dabs(mo_i_r1).lt.thresh_1)cycle do mm = 1, n_basis_orb ! electron 1 m = list_basis(mm) - mo_b_r1 = mos_in_r_array_omp(m,ipoint) - if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + mo_b_r1 = mos_in_r_array_omp(m,ipoint)*mo_i_r1 + if(dabs(mo_b_r1).lt.thresh_2)cycle do p = 1, cholesky_mo_num - accu_vec(p) = accu_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + accu_vec(p) = accu_vec(p) + mo_b_r1 * cholesky_mo_transp(p,m,i) enddo enddo enddo From c1ca673a6fd39ef574bb5a41b420a18a45f85d58 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jun 2024 11:24:25 +0200 Subject: [PATCH 37/64] Added omp_lib.F file --- etc/paths.rc | 9 +++++++++ src/ezfio_files/omp_lib.F | 4 ++++ 2 files changed, 13 insertions(+) create mode 100644 src/ezfio_files/omp_lib.F diff --git a/etc/paths.rc b/etc/paths.rc index dc1741e8..843ec383 100644 --- a/etc/paths.rc +++ b/etc/paths.rc @@ -28,6 +28,15 @@ function qp_prepend_export () { fi } +function qp_append_export () { + eval "value_1="\${$1}"" + if [[ -z $value_1 ]] ; then + echo "${2}:" + else + echo "${value_1}:${2}" + fi +} + export PYTHONPATH=$(qp_prepend_export "PYTHONPATH" "${QP_EZFIO}/Python":"${QP_PYTHON}") export PATH=$(qp_prepend_export "PATH" "${QP_PYTHON}":"${QP_ROOT}"/bin:"${QP_ROOT}"/ocaml) diff --git a/src/ezfio_files/omp_lib.F b/src/ezfio_files/omp_lib.F new file mode 100644 index 00000000..b3df8e0a --- /dev/null +++ b/src/ezfio_files/omp_lib.F @@ -0,0 +1,4 @@ + module omp_lib +#include + end module + From 2a9b8c56a121fbe9880b9d6bebe344e78ded6355 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jun 2024 11:32:56 +0200 Subject: [PATCH 38/64] omp_lib was a bad idea... --- src/ezfio_files/omp_lib.F | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 src/ezfio_files/omp_lib.F diff --git a/src/ezfio_files/omp_lib.F b/src/ezfio_files/omp_lib.F deleted file mode 100644 index b3df8e0a..00000000 --- a/src/ezfio_files/omp_lib.F +++ /dev/null @@ -1,4 +0,0 @@ - module omp_lib -#include - end module - From c95a0b2d87f34e07e52f535e1548081c97eba0eb Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jun 2024 16:15:09 +0200 Subject: [PATCH 39/64] Disk-based cholesky --- src/ao_two_e_ints/cholesky.irp.f | 180 ++++++++++++------------ src/ao_two_e_ints/two_e_integrals.irp.f | 4 +- src/ezfio_files/get_unit_and_open.irp.f | 8 +- 3 files changed, 96 insertions(+), 96 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index b98dfd5b..f689a65e 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -16,6 +16,7 @@ END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_ao_num ] &BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, 1) ] + use mmap_module implicit none BEGIN_DOC ! Cholesky vectors in AO basis: (ik|a): @@ -30,19 +31,19 @@ END_PROVIDER integer*8 :: ndim8 integer :: rank double precision :: tau, tau2 - double precision, pointer :: L(:,:), L_old(:,:) + double precision, pointer :: L(:,:) double precision :: s double precision :: dscale, dscale_tmp - double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) + double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:) integer, allocatable :: addr1(:), addr2(:) integer*8, allocatable :: Lset(:), Dset(:), addr3(:) logical, allocatable :: computed(:) integer :: i,j,k,m,p,q, dj, p2, q2 - integer*8 :: i8, j8, p8, qj8 - integer :: N, np, nq, npmax + integer*8 :: i8, j8, p8, qj8, rank_max, np8 + integer :: N, np, nq double precision :: Dmax, Dmin, Qmax, f double precision, external :: get_ao_two_e_integral @@ -61,12 +62,12 @@ END_PROVIDER ndim8 = ao_num*ao_num*1_8 double precision :: wall0,wall1 + type(c_ptr) :: c_pointer(2) + integer :: fd(2) + call wall_time(wall0) deallocate(cholesky_ao) - -! TODO : Save L() to disk - if (read_ao_cholesky) then print *, 'Reading Cholesky vectors from disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'R') @@ -81,6 +82,16 @@ END_PROVIDER PROVIDE nucl_coord ao_two_e_integral_schwartz call set_multiple_levels_omp(.False.) + rank_max = min(ndim8,274877906944_8/1_8/ndim8) + call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., c_pointer(1)) + call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) +!print *, 'rank_max/ndim8', dble(rank_max) / dble(ndim8) + + ! Deleting the file while it is open makes the file invisible on the filesystem, + ! and automatically deleted, even if the program crashes + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao_tmp', 'R') + close(iunit,status='delete') + if (do_direct_integrals) then if (ao_two_e_integral(1,1,1,1) < huge(1.d0)) then ! Trigger providers inside ao_two_e_integral @@ -98,9 +109,6 @@ END_PROVIDER call print_memory_usage() - allocate(L(ndim8,1)) -!print *, 'allocate : (L(ndim8,1))', memory_of_double8(ndim8) - print *, '' print *, 'Cholesky decomposition of AO integrals' print *, '======================================' @@ -112,7 +120,7 @@ END_PROVIDER rank = 0 - allocate( D(ndim8), Lset(ndim8), Dset(ndim8) ) + allocate( D(ndim8), Lset(ndim8), Dset(ndim8), D_sorted(ndim8)) allocate( addr1(ndim8), addr2(ndim8), addr3(ndim8) ) !print *, 'allocate : (D(ndim8))', memory_of_int8(ndim8) !print *, 'allocate : (Lset(ndim8))', memory_of_int8(ndim8) @@ -132,44 +140,52 @@ END_PROVIDER if (do_direct_integrals) then !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,16) - do i8=1,ndim8 + do i8=ndim8,1,-1 D(i8) = ao_two_e_integral(addr1(i8), addr2(i8), & addr1(i8), addr2(i8)) enddo !$OMP END PARALLEL DO else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,16) - do i8=1,ndim8 + do i8=ndim8,1,-1 D(i8) = get_ao_two_e_integral(addr1(i8), addr1(i8), & addr2(i8), addr2(i8), & ao_integrals_map) enddo !$OMP END PARALLEL DO endif + D_sorted(:) = -D(:) + call dsort_noidx_big(D_sorted,ndim8) + D_sorted(:) = dabs(D_sorted(:)) - Dmax = maxval(D) + Dmax = D_sorted(1) ! 2. - npmax = huge(1_4)*1_8 - np = npmax - dscale = 1.d0 - dscale_tmp = Dmax - do while (np == npmax) - np=0 + dscale = tau2/Dmax + do i8=1,ndim8 + if (D_sorted(i8) <= dscale) exit + enddo + + + mem = qp_max_mem+1 + do while ( (mem > qp_max_mem).and.(i8>1_8) ) + dscale = min(1.d0,dsqrt(tau2/(D_sorted(i8)*Dmax))) + dscale_tmp = dscale*dscale*Dmax +! print *, 'dscale = ', dscale, dble(i8)/dble(ndim8) + np8=0_8 do p8=1,ndim8 if ( dscale_tmp*D(p8) > tau2 ) then - np = np+1 - Lset(np) = p8 - if (np == npmax) then - ! Overflow detected - dscale = dscale*0.1d0 - dscale_tmp = dscale*dscale*Dmax -!print *, 'Overflow detected ' -!print *, 'dscale = ', dscale - exit - endif + np8 = np8+1_8 + Lset(np8) = p8 endif enddo + i8 = i8*3_8/4_8 + if (np8 > huge(1_4)/64_8) cycle + np = np8 +! print *, 'np = ', np + call resident_memory(mem) + mem = mem & + + 0.1d0*np*memory_of_double(np) ! Delta(np,nq) enddo ! 3. @@ -179,7 +195,7 @@ END_PROVIDER i = 0 ! 5. - do while ( (Dmax > tau).and.(rank < min(ndim8,huge(1_4))) ) + do while ( (Dmax > tau).and.(rank*1_8 < min(ndim8,rank_max)) ) ! a. i = i+1 @@ -202,12 +218,12 @@ END_PROVIDER enddo - call total_memory(mem) + call resident_memory(mem) mem = mem & + np*memory_of_double(nq) &! Delta(np,nq) - + (rank+nq)* memory_of_double8(ndim8) &! L(ndim8,rank+nq) + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) +!print *, 'mem = ', mem if (mem > qp_max_mem) then s = s*2.d0 else @@ -217,7 +233,7 @@ END_PROVIDER if ((s > 1.d0).or.(nq == 0)) then call print_memory_usage() print *, 'Required peak memory: ', mem, 'Gb' - call total_memory(mem) + call resident_memory(mem) print *, 'Already used memory: ', mem, 'Gb' print *, 'Not enough memory. Reduce cholesky threshold' stop -1 @@ -227,26 +243,6 @@ END_PROVIDER ! d., e. - L_old => L - allocate(L(ndim8,rank+nq), stat=ierr) -!print *, 'allocate : L(ndim8,rank+nq)', memory_of_double8(ndim8*(rank+nq)) - - if (ierr /= 0) then - call print_memory_usage() - print *, irp_here, ': allocation failed : (L(ndim8,rank+nq))' - stop -1 - endif - - !$OMP PARALLEL DO PRIVATE(k,j8) - do k=1,rank - do j8=1,ndim8 - L(j8,k) = L_old(j8,k) - enddo - enddo - !$OMP END PARALLEL DO - - deallocate(L_old) - allocate(Delta(np,nq), stat=ierr) !print *, 'allocate : Delta(np,nq)', memory_of_double8(np*nq*1_8) @@ -280,39 +276,29 @@ END_PROVIDER !print *, 'N, rank, block_size', N, rank, block_size !print *, 'p1' - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q,j) - -!print *, 'computed' - !$OMP DO + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(p,q,j) do q=1,nq computed(q) = .False. - enddo - !$OMP ENDDO NOWAIT - -!print *, 'Delta' - !$OMP DO - do q=1,nq do j=1,np Delta(j,q) = 0.d0 enddo enddo - !$OMP ENDDO NOWAIT + !$OMP END PARALLEL DO -!print *, 'Ltmp_p' + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q,j) do k=1,N !$OMP DO do p=1,np - Ltmp_p(p,k) = L(Lset(p),k) + Ltmp_p(p,k) = L(Lset(p),k) enddo !$OMP END DO NOWAIT !$OMP DO do q=1,nq - Ltmp_q(q,k) = L(Dset(q),k) + Ltmp_q(q,k) = L(Dset(q),k) enddo !$OMP END DO NOWAIT enddo - !$OMP BARRIER !$OMP END PARALLEL @@ -338,7 +324,7 @@ END_PROVIDER rank = N+j if (iblock == block_size) then -!print *, 'dgemm' +!print *, 'dgemm', np, nq call dgemm('N','T',np,nq,block_size,-1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) @@ -401,7 +387,6 @@ END_PROVIDER ! iii. f = 1.d0/dsqrt(Qmax) -!print *, 'p4' !$OMP PARALLEL PRIVATE(p,q) DEFAULT(shared) !$OMP DO do p=1,np @@ -441,30 +426,42 @@ END_PROVIDER Dmax = max(Dmax, D(Lset(p))) enddo - np = npmax - dscale = 1.d0 - dscale_tmp = Dmax - do while (np == npmax) - np=0 + mem = qp_max_mem+1 + do while ( (mem > qp_max_mem).and.(i8>1_8) ) + dscale = min(1.d0,dsqrt(tau2/(D_sorted(i8)*Dmax))) + dscale_tmp = dscale*dscale*Dmax +!print *, 'dscale = ', dscale, dble(i8)/dble(ndim8) + np8=0_8 do p8=1,ndim8 if ( dscale_tmp*D(p8) > tau2 ) then - np = np+1 - Lset(np) = p8 - if (np == npmax) then - ! Overflow detected - dscale = dscale*0.5d0 - dscale_tmp = dscale*dscale*Dmax -!print *, 'Overflow detected ' -!print *, 'dscale = ', dscale - exit - endif + np8 = np8+1_8 + Lset(np8) = p8 endif enddo + i8 = i8*3_8/4_8 + if (np8 > huge(1_4)/64_8) cycle + np = np8 +!print *, 'np = ', np + call resident_memory(mem) + mem = mem & + + 0.1d0*np*memory_of_double(np) ! Delta(np,nq) enddo + if (np == 0) then + call print_memory_usage() + print *, 'Required peak memory: ', mem, 'Gb' + call resident_memory(mem) + print *, 'Already used memory: ', mem, 'Gb' + print *, 'Not enough memory. Reduce cholesky threshold' + stop -1 + endif enddo + + print *, '============ =============' + print *, '' + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) !print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', memory_of_double8(ao_num*ao_num*rank*1_8) @@ -473,18 +470,19 @@ END_PROVIDER print *, irp_here, ': Allocation failed' stop -1 endif - !$OMP PARALLEL DO PRIVATE(k) + + + !$OMP PARALLEL DO PRIVATE(k,j) do k=1,rank do j=1,ao_num cholesky_ao(1:ao_num,j,k) = L((j-1)*ao_num+1:j*ao_num,k) enddo enddo !$OMP END PARALLEL DO - deallocate(L) - cholesky_ao_num = rank - print *, '============ =============' - print *, '' + call munmap( (/ ndim8, ndim8 /), 8, fd(1), c_pointer(1) ) + + cholesky_ao_num = rank if (write_ao_cholesky) then print *, 'Writing Cholesky vectors to disk...' diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index b55b5f0d..d12f3d45 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -460,8 +460,8 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz, (ao_num, ao_num) !$OMP PARALLEL DO PRIVATE(i,k) & !$OMP DEFAULT(NONE) & !$OMP SHARED (ao_num,ao_two_e_integral_schwartz) & - !$OMP SCHEDULE(guided) - do i=1,ao_num + !$OMP SCHEDULE(dynamic) + do i=ao_num,1,-1 do k=1,i ao_two_e_integral_schwartz(i,k) = dsqrt(ao_two_e_integral(i,i,k,k)) ao_two_e_integral_schwartz(k,i) = ao_two_e_integral_schwartz(i,k) diff --git a/src/ezfio_files/get_unit_and_open.irp.f b/src/ezfio_files/get_unit_and_open.irp.f index 6440579f..d6a7efac 100644 --- a/src/ezfio_files/get_unit_and_open.irp.f +++ b/src/ezfio_files/get_unit_and_open.irp.f @@ -47,11 +47,13 @@ integer function getUnitAndOpen(f,mode) endif open(unit=getUnitAndOpen,file=f,status='OLD',action='READ',form='UNFORMATTED') else if (mode.eq.'W') then - open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='WRITE',form='UNFORMATTED') + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',form='UNFORMATTED') + else if (mode.eq.'A') then + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',position='APPEND',form='UNFORMATTED') else if (mode.eq.'w') then - open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='WRITE',form='FORMATTED') + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',form='FORMATTED') else if (mode.eq.'a') then - open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='WRITE',position='APPEND',form='FORMATTED') + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',position='APPEND',form='FORMATTED') else if (mode.eq.'x') then open(unit=getUnitAndOpen,file=new_f,form='FORMATTED') endif From b9f041e5e587e0ce96e845fd437f3fc5abeb3272 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jun 2024 19:31:39 +0200 Subject: [PATCH 40/64] More I/O in Cholesky --- src/ao_two_e_ints/cholesky.irp.f | 198 +++++++++++++++---------------- 1 file changed, 94 insertions(+), 104 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index f689a65e..34b91f0f 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -31,12 +31,12 @@ END_PROVIDER integer*8 :: ndim8 integer :: rank double precision :: tau, tau2 - double precision, pointer :: L(:,:) + double precision, pointer :: L(:,:), Delta(:,:) double precision :: s double precision :: dscale, dscale_tmp - double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:) + double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:) integer, allocatable :: addr1(:), addr2(:) integer*8, allocatable :: Lset(:), Dset(:), addr3(:) logical, allocatable :: computed(:) @@ -52,7 +52,7 @@ END_PROVIDER double precision, external :: ao_two_e_integral integer :: block_size, iblock - double precision :: mem + double precision :: mem, mem0 double precision, external :: memory_of_double, memory_of_int double precision, external :: memory_of_double8, memory_of_int8 @@ -64,8 +64,11 @@ END_PROVIDER type(c_ptr) :: c_pointer(2) integer :: fd(2) + logical :: delta_on_disk call wall_time(wall0) + + ! Will be reallocated at the end deallocate(cholesky_ao) if (read_ao_cholesky) then @@ -82,11 +85,11 @@ END_PROVIDER PROVIDE nucl_coord ao_two_e_integral_schwartz call set_multiple_levels_omp(.False.) + call resident_memory(mem0) + rank_max = min(ndim8,274877906944_8/1_8/ndim8) call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., c_pointer(1)) call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) -!print *, 'rank_max/ndim8', dble(rank_max) / dble(ndim8) - ! Deleting the file while it is open makes the file invisible on the filesystem, ! and automatically deleted, even if the program crashes iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao_tmp', 'R') @@ -161,32 +164,16 @@ END_PROVIDER Dmax = D_sorted(1) ! 2. - dscale = tau2/Dmax - do i8=1,ndim8 - if (D_sorted(i8) <= dscale) exit - enddo - - - mem = qp_max_mem+1 - do while ( (mem > qp_max_mem).and.(i8>1_8) ) - dscale = min(1.d0,dsqrt(tau2/(D_sorted(i8)*Dmax))) - dscale_tmp = dscale*dscale*Dmax -! print *, 'dscale = ', dscale, dble(i8)/dble(ndim8) - np8=0_8 - do p8=1,ndim8 - if ( dscale_tmp*D(p8) > tau2 ) then - np8 = np8+1_8 - Lset(np8) = p8 - endif - enddo - i8 = i8*3_8/4_8 - if (np8 > huge(1_4)/64_8) cycle - np = np8 -! print *, 'np = ', np - call resident_memory(mem) - mem = mem & - + 0.1d0*np*memory_of_double(np) ! Delta(np,nq) + dscale = 1.d0 + dscale_tmp = dscale*dscale*Dmax + np8=0_8 + do p8=1,ndim8 + if ( dscale_tmp*D(p8) > tau2 ) then + np8 = np8+1_8 + Lset(np8) = p8 + endif enddo + np = np8 ! 3. N = 0 @@ -218,13 +205,11 @@ END_PROVIDER enddo - call resident_memory(mem) - mem = mem & - + np*memory_of_double(nq) &! Delta(np,nq) - + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) + mem = mem0 & + + np*memory_of_double(nq) !print *, 'mem = ', mem - if (mem > qp_max_mem) then + if (mem > 300.d0) then ! 300GB max for Delta s = s*2.d0 else exit @@ -239,18 +224,33 @@ END_PROVIDER stop -1 endif + if (s > 0.1d0) then + exit + endif + enddo ! d., e. + mem = mem0 & + + memory_of_int(nq) &! computed(nq) + + np*memory_of_int(nq) &! computed(nq) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) - allocate(Delta(np,nq), stat=ierr) -!print *, 'allocate : Delta(np,nq)', memory_of_double8(np*nq*1_8) - - if (ierr /= 0) then - call print_memory_usage() - print *, irp_here, ': allocation failed : (Delta(np,nq))' - stop -1 + if (mem > qp_max_mem) then + call mmap(trim(ezfio_work_dir)//'cholesky_delta', (/ np*1_8, nq*1_8 /), 8, fd(2), .False., c_pointer(2)) + call c_f_pointer(c_pointer(2), Delta, (/ np, nq /)) + ! Deleting the file while it is open makes the file invisible on the filesystem, + ! and automatically deleted, even if the program crashes + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_delta', 'R') + close(iunit,status='delete') + delta_on_disk = .True. + else + allocate(Delta(np,nq)) + delta_on_disk = .False. endif + print *, delta_on_disk + + allocate(Delta_col(np)) allocate(Ltmp_p(np,block_size), stat=ierr) !print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double8(np*block_size*1_8), np, block_size @@ -272,40 +272,38 @@ END_PROVIDER allocate(computed(nq)) -!print *, 'allocate : computed(nq)', memory_of_int(nq) !print *, 'N, rank, block_size', N, rank, block_size -!print *, 'p1' - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(p,q,j) - do q=1,nq - computed(q) = .False. - do j=1,np - Delta(j,q) = 0.d0 - enddo - enddo - !$OMP END PARALLEL DO !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q,j) do k=1,N !$OMP DO do p=1,np - Ltmp_p(p,k) = L(Lset(p),k) + Ltmp_p(p,k) = L(Lset(p),k) enddo !$OMP END DO NOWAIT !$OMP DO do q=1,nq - Ltmp_q(q,k) = L(Dset(q),k) + computed(q) = .False. + Ltmp_q(q,k) = L(Dset(q),k) enddo !$OMP END DO NOWAIT enddo !$OMP BARRIER !$OMP END PARALLEL -!print *, 'p2', np, nq, N if (N>0) then - call dgemm('N','T', np, nq, N, -1.d0, & - Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + call dgemm('N','T', np, nq, N, -1.d0, & + Ltmp_p, np, Ltmp_q, nq, 0.d0, Delta, np) + else + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) SCHEDULE(static,1) + do q=1,nq + do j=1,np + Delta(j,q) = 0.d0 + enddo + enddo + !$OMP END PARALLEL DO endif ! f. @@ -324,10 +322,8 @@ END_PROVIDER rank = N+j if (iblock == block_size) then -!print *, 'dgemm', np, nq call dgemm('N','T',np,nq,block_size,-1.d0, & - Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) - + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) iblock = 0 endif @@ -343,43 +339,51 @@ END_PROVIDER L(i8, rank) = 0.d0 enddo + iblock = iblock+1 + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Delta(p,dj) + enddo + !$OMP END PARALLEL DO + if (.not.computed(dj)) then m = dj if (do_direct_integrals) then - !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,16) - do k=np,1,-1 + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,21) + do k=1,np if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& addr2(Lset(k)), addr2(Dset(m)) ) ) then - Delta(k,m) = Delta(k,m) + & + Delta_col(k) = & ao_two_e_integral(addr1(Lset(k)), addr2(Lset(k)),& addr1(Dset(m)), addr2(Dset(m))) endif enddo !$OMP END PARALLEL DO - else - !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,16) - do k=np,1,-1 + else + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,21) + do k=1,np if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& addr2(Lset(k)), addr2(Dset(m)) ) ) then - Delta(k,m) = Delta(k,m) + & + Delta_col(k) = & get_ao_two_e_integral( addr1(Lset(k)), addr1(Dset(m)),& addr2(Lset(k)), addr2(Dset(m)), ao_integrals_map) endif enddo !$OMP END PARALLEL DO endif + + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) + Delta(p,dj) = Ltmp_p(p,iblock) + enddo + !$OMP END PARALLEL DO + computed(dj) = .True. endif - iblock = iblock+1 -!print *, iblock - do p=1,np - Ltmp_p(p,iblock) = Delta(p,dj) - enddo - ! iv. if (iblock > 1) then -!print *, 'dgemv', iblock call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0,& Ltmp_p(1,iblock), 1) endif @@ -412,10 +416,15 @@ END_PROVIDER print '(I10, 4X, ES12.3)', rank, Qmax + deallocate(Delta_col) deallocate(Ltmp_p) deallocate(Ltmp_q) deallocate(computed) - deallocate(Delta) + if (delta_on_disk) then + call munmap( (/ np*1_8, nq*1_8 /), 8, fd(2), c_pointer(2) ) + else + deallocate(Delta) + endif ! i. N = rank @@ -426,35 +435,16 @@ END_PROVIDER Dmax = max(Dmax, D(Lset(p))) enddo - mem = qp_max_mem+1 - do while ( (mem > qp_max_mem).and.(i8>1_8) ) - dscale = min(1.d0,dsqrt(tau2/(D_sorted(i8)*Dmax))) - dscale_tmp = dscale*dscale*Dmax -!print *, 'dscale = ', dscale, dble(i8)/dble(ndim8) - np8=0_8 - do p8=1,ndim8 - if ( dscale_tmp*D(p8) > tau2 ) then - np8 = np8+1_8 - Lset(np8) = p8 - endif - enddo - i8 = i8*3_8/4_8 - if (np8 > huge(1_4)/64_8) cycle - np = np8 -!print *, 'np = ', np - call resident_memory(mem) - mem = mem & - + 0.1d0*np*memory_of_double(np) ! Delta(np,nq) + dscale = 1.d0 + dscale_tmp = dscale*dscale*Dmax + np8=0_8 + do p8=1,ndim8 + if ( dscale_tmp*D(p8) > tau2 ) then + np8 = np8+1_8 + Lset(np8) = p8 + endif enddo - - if (np == 0) then - call print_memory_usage() - print *, 'Required peak memory: ', mem, 'Gb' - call resident_memory(mem) - print *, 'Already used memory: ', mem, 'Gb' - print *, 'Not enough memory. Reduce cholesky threshold' - stop -1 - endif + np = np8 enddo @@ -480,7 +470,7 @@ END_PROVIDER enddo !$OMP END PARALLEL DO - call munmap( (/ ndim8, ndim8 /), 8, fd(1), c_pointer(1) ) + call munmap( (/ ndim8, rank_max /), 8, fd(1), c_pointer(1) ) cholesky_ao_num = rank From 19286bede43e5b01b5997ccf5e709a2ade9a4456 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Jun 2024 02:51:12 +0200 Subject: [PATCH 41/64] Initialization --- src/ao_two_e_ints/cholesky.irp.f | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 34b91f0f..a7b2389f 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -248,7 +248,7 @@ END_PROVIDER allocate(Delta(np,nq)) delta_on_disk = .False. endif - print *, delta_on_disk +!print *, delta_on_disk allocate(Delta_col(np)) @@ -275,7 +275,7 @@ END_PROVIDER !print *, 'N, rank, block_size', N, rank, block_size - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q,j) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q) do k=1,N !$OMP DO do p=1,np @@ -356,6 +356,8 @@ END_PROVIDER Delta_col(k) = & ao_two_e_integral(addr1(Lset(k)), addr2(Lset(k)),& addr1(Dset(m)), addr2(Dset(m))) + else + Delta_col(k) = 0.d0 endif enddo !$OMP END PARALLEL DO @@ -367,6 +369,8 @@ END_PROVIDER Delta_col(k) = & get_ao_two_e_integral( addr1(Lset(k)), addr1(Dset(m)),& addr2(Lset(k)), addr2(Dset(m)), ao_integrals_map) + else + Delta_col(k) = 0.d0 endif enddo !$OMP END PARALLEL DO From 36a2f0b46ff535607764b662e8013cba49347ff8 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Jun 2024 03:16:55 +0200 Subject: [PATCH 42/64] Fixed cholesky --- src/ao_two_e_ints/cholesky.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index a7b2389f..09131b5d 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -272,9 +272,10 @@ END_PROVIDER allocate(computed(nq)) + computed(:) = .False. + !print *, 'N, rank, block_size', N, rank, block_size - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q) do k=1,N !$OMP DO @@ -285,7 +286,6 @@ END_PROVIDER !$OMP DO do q=1,nq - computed(q) = .False. Ltmp_q(q,k) = L(Dset(q),k) enddo !$OMP END DO NOWAIT From 2241096a6485a71406c12e5c2ae3165a4a838aeb Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Jun 2024 13:53:30 +0200 Subject: [PATCH 43/64] Trying to improve mmap performance --- src/ao_two_e_ints/cholesky.irp.f | 4 ++-- .../dav_diag_dressed_ext_rout.irp.f | 1 - ...diag_dressed_ext_rout_nonsym_B1space.irp.f | 2 -- .../dav_double_dress_ext_rout.irp.f | 1 - .../dav_dressed_ext_rout.irp.f | 1 - src/dav_general_mat/dav_ext_rout.irp.f | 1 - .../dav_ext_rout_nonsym_B1space.irp.f | 2 -- src/dav_general_mat/dav_general.irp.f | 6 +++--- src/davidson/diagonalization_h_dressed.irp.f | 2 +- .../diagonalization_hcsf_dressed.irp.f | 2 +- .../diagonalization_hs2_dressed.irp.f | 4 ++-- .../diagonalization_nonsym_h_dressed.irp.f | 2 +- src/utils/fortran_mmap.c | 13 +++++++++---- src/utils/map_functions.irp.f | 12 ++++++------ src/utils/memory.irp.f | 2 +- src/utils/mmap.f90 | 19 +++++++++++-------- 16 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 09131b5d..3cd400f8 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -88,7 +88,7 @@ END_PROVIDER call resident_memory(mem0) rank_max = min(ndim8,274877906944_8/1_8/ndim8) - call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., c_pointer(1)) + call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1)) call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) ! Deleting the file while it is open makes the file invisible on the filesystem, ! and automatically deleted, even if the program crashes @@ -237,7 +237,7 @@ END_PROVIDER + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) if (mem > qp_max_mem) then - call mmap(trim(ezfio_work_dir)//'cholesky_delta', (/ np*1_8, nq*1_8 /), 8, fd(2), .False., c_pointer(2)) + call mmap(trim(ezfio_work_dir)//'cholesky_delta', (/ np*1_8, nq*1_8 /), 8, fd(2), .False., .True., c_pointer(2)) call c_f_pointer(c_pointer(2), Delta, (/ np, nq /)) ! Deleting the file while it is open makes the file invisible on the filesystem, ! and automatically deleted, even if the program crashes diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f index 0dc939cb..f57b7f92 100644 --- a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f @@ -1,6 +1,5 @@ subroutine davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_diag_in,converged,hcalc) - use mmap_module implicit none BEGIN_DOC ! Generic Davidson diagonalization with ONE DIAGONAL DRESSING OPERATOR diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f index 1a8269f4..c8848998 100644 --- a/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f @@ -3,8 +3,6 @@ subroutine davidson_general_diag_dressed_ext_rout_nonsym_b1space(u_in, H_jj, Dress_jj,energies, sze, N_st, N_st_diag_in, converged, hcalc) - use mmap_module - BEGIN_DOC ! Generic modified-Davidson diagonalization ! diff --git a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f index 24f4fa10..1ff6632c 100644 --- a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f +++ b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f @@ -1,5 +1,4 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies,sze,N_st,N_st_diag,converged,hcalc) - use mmap_module BEGIN_DOC ! Generic Davidson diagonalization with TWO DRESSING VECTORS ! diff --git a/src/dav_general_mat/dav_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_dressed_ext_rout.irp.f index cedaaf0a..ca59a688 100644 --- a/src/dav_general_mat/dav_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_dressed_ext_rout.irp.f @@ -1,5 +1,4 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_diag,dressing_state,dressing_vec,idress,converged,hcalc) - use mmap_module implicit none BEGIN_DOC ! Davidson diagonalization. diff --git a/src/dav_general_mat/dav_ext_rout.irp.f b/src/dav_general_mat/dav_ext_rout.irp.f index deb7e3a9..ad60b2a8 100644 --- a/src/dav_general_mat/dav_ext_rout.irp.f +++ b/src/dav_general_mat/dav_ext_rout.irp.f @@ -1,6 +1,5 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,converged,hcalc) - use mmap_module implicit none BEGIN_DOC ! Generic Davidson diagonalization diff --git a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f index d89aaadb..ca0a835e 100644 --- a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f +++ b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f @@ -3,8 +3,6 @@ subroutine davidson_general_ext_rout_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_diag_in, converged, hcalc) - use mmap_module - BEGIN_DOC ! Generic modified-Davidson diagonalization ! diff --git a/src/dav_general_mat/dav_general.irp.f b/src/dav_general_mat/dav_general.irp.f index 9940bf1e..a277d9ef 100644 --- a/src/dav_general_mat/dav_general.irp.f +++ b/src/dav_general_mat/dav_general.irp.f @@ -1,6 +1,6 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,converged,h_mat) - use mmap_module +! use mmap_module implicit none BEGIN_DOC ! Davidson diagonalization with specific diagonal elements of the H matrix @@ -160,9 +160,9 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv ! type(c_ptr) :: ptr_w, ptr_s ! integer :: fd_s, fd_w ! call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& -! 8, fd_w, .False., ptr_w) +! 8, fd_w, .False., .True., ptr_w) ! call mmap(trim(ezfio_work_dir)//'davidson_s', (/int(sze,8),int(N_st_diag*itermax,8)/),& -! 4, fd_s, .False., ptr_s) +! 4, fd_s, .False., .True., ptr_s) ! call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) ! call c_f_pointer(ptr_s, s, (/sze,N_st_diag*itermax/)) ! else diff --git a/src/davidson/diagonalization_h_dressed.irp.f b/src/davidson/diagonalization_h_dressed.irp.f index b7179c18..15bf256d 100644 --- a/src/davidson/diagonalization_h_dressed.irp.f +++ b/src/davidson/diagonalization_h_dressed.irp.f @@ -228,7 +228,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) else allocate(W(sze,N_st_diag*itermax)) diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index fa8aff80..656dd1d9 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -229,7 +229,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/)) else allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax)) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index fd967ecc..fb04b29b 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -270,9 +270,9 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call mmap(trim(ezfio_work_dir)//'davidson_s', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 4, fd_s, .False., ptr_s) + 4, fd_s, .False., .True., ptr_s) call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) call c_f_pointer(ptr_s, s, (/sze,N_st_diag*itermax/)) else diff --git a/src/davidson/diagonalization_nonsym_h_dressed.irp.f b/src/davidson/diagonalization_nonsym_h_dressed.irp.f index 96ca84ab..86df3a19 100644 --- a/src/davidson/diagonalization_nonsym_h_dressed.irp.f +++ b/src/davidson/diagonalization_nonsym_h_dressed.irp.f @@ -251,7 +251,7 @@ subroutine davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze, type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) else allocate(W(sze,N_st_diag*itermax)) diff --git a/src/utils/fortran_mmap.c b/src/utils/fortran_mmap.c index e8d85a2f..fdf7fb6f 100644 --- a/src/utils/fortran_mmap.c +++ b/src/utils/fortran_mmap.c @@ -7,7 +7,7 @@ #include -void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) +void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only, int single_node) { int fd; int result; @@ -21,7 +21,7 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) perror("Error opening mmap file for reading"); exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0); + map = mmap(NULL, bytes, PROT_READ, MAP_PRIVATE, fd, 0); } else { @@ -39,7 +39,7 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) perror("Error calling lseek() to stretch the file"); exit(EXIT_FAILURE); } - + result = write(fd, "", 1); if (result != 1) { close(fd); @@ -48,7 +48,12 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (single_node == 1) { + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_HUGETLB , fd, 0); + } else { + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_HUGETLB, fd, 0); + } } if (map == MAP_FAILED) { diff --git a/src/utils/map_functions.irp.f b/src/utils/map_functions.irp.f index 97d0e8bf..e3a62b07 100644 --- a/src/utils/map_functions.irp.f +++ b/src/utils/map_functions.irp.f @@ -21,13 +21,13 @@ subroutine map_save_to_disk(filename,map) stop 'map already consolidated' endif - call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .False., c_pointer(1)) + call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .False., .False., c_pointer(1)) call c_f_pointer(c_pointer(1),map % consolidated_idx, (/ map % map_size +2_8/)) - call mmap(trim(filename)//'_consolidated_key', (/ n_elements /), cache_key_kind, fd(2), .False., c_pointer(2)) + call mmap(trim(filename)//'_consolidated_key', (/ n_elements /), cache_key_kind, fd(2), .False., .False., c_pointer(2)) call c_f_pointer(c_pointer(2),map % consolidated_key, (/ n_elements /)) - call mmap(trim(filename)//'_consolidated_value', (/ n_elements /), integral_kind, fd(3), .False., c_pointer(3)) + call mmap(trim(filename)//'_consolidated_value', (/ n_elements /), integral_kind, fd(3), .False., .False., c_pointer(3)) call c_f_pointer(c_pointer(3),map % consolidated_value, (/ n_elements /)) if (.not.associated(map%consolidated_key)) then @@ -85,15 +85,15 @@ subroutine map_load_from_disk(filename,map) stop 'map already consolidated' endif - call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .True., c_pointer(1)) + call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .True., .False., c_pointer(1)) call c_f_pointer(c_pointer(1),map % consolidated_idx, (/ map % map_size + 2_8/)) map% n_elements = map % consolidated_idx (map % map_size+2_8)-1_8 - call mmap(trim(filename)//'_consolidated_key', (/ map % n_elements /), cache_key_kind, fd(2), .True., c_pointer(2)) + call mmap(trim(filename)//'_consolidated_key', (/ map % n_elements /), cache_key_kind, fd(2), .True., .False., c_pointer(2)) call c_f_pointer(c_pointer(2),map % consolidated_key, (/ map % n_elements /)) - call mmap(trim(filename)//'_consolidated_value', (/ map % n_elements /), integral_kind, fd(3), .True., c_pointer(3)) + call mmap(trim(filename)//'_consolidated_value', (/ map % n_elements /), integral_kind, fd(3), .True., .False., c_pointer(3)) call c_f_pointer(c_pointer(3),map % consolidated_value, (/ map % n_elements /)) l = 0_8 diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index 043562db..e2e8dd76 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -6,7 +6,7 @@ BEGIN_PROVIDER [ integer, qp_max_mem ] character*(128) :: env integer, external :: get_total_available_memory - qp_max_mem = get_total_available_memory() + qp_max_mem = max(get_total_available_memory() - 1,3) call write_int(6,qp_max_mem,'Total available memory (GB)') call getenv('QP_MAXMEM',env) if (trim(env) /= '') then diff --git a/src/utils/mmap.f90 b/src/utils/mmap.f90 index 41e60224..723cb771 100644 --- a/src/utils/mmap.f90 +++ b/src/utils/mmap.f90 @@ -7,12 +7,13 @@ module mmap_module ! File descriptors ! ---------------- - type(c_ptr) function c_mmap_fortran(filename, length, fd, read_only) bind(c,name='mmap_fortran') + type(c_ptr) function c_mmap_fortran(filename, length, fd, read_only, single_node) bind(c,name='mmap_fortran') use iso_c_binding character(c_char), intent(in) :: filename(*) integer(c_size_t), intent(in), value :: length integer(c_int), intent(out) :: fd integer(c_int), intent(in), value :: read_only + integer(c_int), intent(in), value :: single_node end function subroutine c_munmap_fortran(length, fd, map) bind(c,name='munmap_fortran') @@ -33,31 +34,33 @@ module mmap_module contains - subroutine mmap(filename, shape, bytes, fd, read_only, map) + subroutine mmap(filename, shape, bytes, fd, read_only, single_node, map) use iso_c_binding implicit none character*(*), intent(in) :: filename ! Name of the mapped file integer*8, intent(in) :: shape(:) ! Shape of the array to map integer, intent(in) :: bytes ! Number of bytes per element logical, intent(in) :: read_only ! If true, mmap is read-only + logical, intent(in) :: single_node! If true, mmap is on a single node integer, intent(out) :: fd ! File descriptor type(c_ptr), intent(out) :: map ! C Pointer integer(c_size_t) :: length integer(c_int) :: fd_ - integer :: i + integer :: i, read_only_, single_node_ + + read_only_ = 0 + single_node_ = 0 + if (read_only_) read_only_ = 1 + if (single_node_) single_node_ = 1 length = int(bytes,8) do i=1,size(shape) length = length * shape(i) enddo - if (read_only) then - map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 1) - else - map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 0) - endif + map = c_mmap_fortran( trim(filename)//char(0), length, fd_, read_only, single_node) fd = fd_ end subroutine From 38aa8ef547c2b4d68b6caf2d7cca40a3590f69ed Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Jun 2024 16:36:45 +0200 Subject: [PATCH 44/64] Update ccsd_space_orb_sub.irp.f --- src/ccsd/ccsd_space_orb_sub.irp.f | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index b48ca7da..555a2552 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -18,6 +18,8 @@ subroutine run_ccsd_space_orb integer(bit_kind) :: det(N_int,2) integer :: nO, nV, nOa, nVa + call set_multiple_levels_omp(.False.) + if (do_ao_cholesky) then PROVIDE cholesky_mo_transp FREE cholesky_ao From 6ae162b6c93d0bb31749086736dea13af9edfb56 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jun 2024 14:33:13 +0200 Subject: [PATCH 45/64] Disk-based cholesky OK --- src/ao_two_e_ints/cholesky.irp.f | 13 ++++++++++--- src/utils/fortran_mmap.c | 5 ++--- src/utils/mmap.f90 | 6 +++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 3cd400f8..6778d5c7 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -66,6 +66,10 @@ END_PROVIDER integer :: fd(2) logical :: delta_on_disk + PROVIDE nproc + PROVIDE nucl_coord ao_two_e_integral_schwartz + call set_multiple_levels_omp(.False.) + call wall_time(wall0) ! Will be reallocated at the end @@ -87,7 +91,7 @@ END_PROVIDER call resident_memory(mem0) - rank_max = min(ndim8,274877906944_8/1_8/ndim8) + rank_max = min(ndim8,(qp_max_mem*1024_8*1024_8*1024_8/8_8)/ndim8) call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1)) call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) ! Deleting the file while it is open makes the file invisible on the filesystem, @@ -209,7 +213,7 @@ END_PROVIDER + np*memory_of_double(nq) !print *, 'mem = ', mem - if (mem > 300.d0) then ! 300GB max for Delta + if (mem > qp_max_mem/2) then s = s*2.d0 else exit @@ -231,9 +235,12 @@ END_PROVIDER enddo ! d., e. - mem = mem0 & + mem = mem0 & + memory_of_int(nq) &! computed(nq) + np*memory_of_int(nq) &! computed(nq) + + memory_of_double(np) &! Delta_col(np) + + 7*memory_of_double(ndim8) &! D, Lset, Dset, D_sorted, addr[1-3] + + np*memory_of_double(nq) &! Delta(np,nq) + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) if (mem > qp_max_mem) then diff --git a/src/utils/fortran_mmap.c b/src/utils/fortran_mmap.c index fdf7fb6f..711a9c34 100644 --- a/src/utils/fortran_mmap.c +++ b/src/utils/fortran_mmap.c @@ -49,10 +49,9 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only, } if (single_node == 1) { - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_HUGETLB , fd, 0); + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE | MAP_NONBLOCK, fd, 0); } else { - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_HUGETLB, fd, 0); + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); } } diff --git a/src/utils/mmap.f90 b/src/utils/mmap.f90 index 723cb771..af3fe6ed 100644 --- a/src/utils/mmap.f90 +++ b/src/utils/mmap.f90 @@ -46,9 +46,9 @@ module mmap_module type(c_ptr), intent(out) :: map ! C Pointer integer(c_size_t) :: length - integer(c_int) :: fd_ + integer(c_int) :: fd_, read_only_, single_node_ - integer :: i, read_only_, single_node_ + integer :: i read_only_ = 0 single_node_ = 0 @@ -60,7 +60,7 @@ module mmap_module length = length * shape(i) enddo - map = c_mmap_fortran( trim(filename)//char(0), length, fd_, read_only, single_node) + map = c_mmap_fortran( trim(filename)//char(0), length, fd_, read_only_, single_node_) fd = fd_ end subroutine From af8973770e9265f6b8f997edde1873b97c1a48da Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jun 2024 14:39:34 +0200 Subject: [PATCH 46/64] Typo in mmap --- src/utils/mmap.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/mmap.f90 b/src/utils/mmap.f90 index af3fe6ed..e342b422 100644 --- a/src/utils/mmap.f90 +++ b/src/utils/mmap.f90 @@ -52,8 +52,8 @@ module mmap_module read_only_ = 0 single_node_ = 0 - if (read_only_) read_only_ = 1 - if (single_node_) single_node_ = 1 + if (read_only) read_only_ = 1 + if (single_node) single_node_ = 1 length = int(bytes,8) do i=1,size(shape) From f58df5e81669226e728f744308593db5a5e4cad0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jun 2024 16:09:53 +0200 Subject: [PATCH 47/64] Added do_mo_cholesky --- src/ao_two_e_ints/EZFIO.cfg | 2 +- src/ao_two_e_ints/cholesky.irp.f | 6 +- src/mo_two_e_ints/EZFIO.cfg | 6 ++ src/mo_two_e_ints/cholesky.irp.f | 117 +++++++++++++++------- src/mo_two_e_ints/four_idx_novvvv.irp.f | 9 -- src/mo_two_e_ints/integrals_3_index.irp.f | 2 +- src/mo_two_e_ints/mo_bi_integrals.irp.f | 2 +- src/trexio/import_trexio_integrals.irp.f | 5 + 8 files changed, 97 insertions(+), 52 deletions(-) diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index c2e083a3..a985149e 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -6,7 +6,7 @@ default: None [io_ao_cholesky] type: Disk_access -doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] +doc: Read/Write |AO| Cholesky integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 6778d5c7..a1cd8e5b 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -76,7 +76,7 @@ END_PROVIDER deallocate(cholesky_ao) if (read_ao_cholesky) then - print *, 'Reading Cholesky vectors from disk...' + print *, 'Reading Cholesky AO vectors from disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'R') read(iunit) rank allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) @@ -486,7 +486,7 @@ END_PROVIDER cholesky_ao_num = rank if (write_ao_cholesky) then - print *, 'Writing Cholesky vectors to disk...' + print *, 'Writing Cholesky AO vectors to disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'W') write(iunit) rank write(iunit) cholesky_ao @@ -499,7 +499,7 @@ END_PROVIDER print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' print *, '' call wall_time(wall1) - print*,'Time to provide AO cholesky vectors = ',wall1-wall0 + print*,'Time to provide AO cholesky vectors = ',(wall1-wall0)/60.d0, ' min' END_PROVIDER diff --git a/src/mo_two_e_ints/EZFIO.cfg b/src/mo_two_e_ints/EZFIO.cfg index 088a2416..49a2952c 100644 --- a/src/mo_two_e_ints/EZFIO.cfg +++ b/src/mo_two_e_ints/EZFIO.cfg @@ -1,3 +1,9 @@ +[io_mo_cholesky] +type: Disk_access +doc: Read/Write |MO| Cholesky integrals from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None + [io_mo_two_e_integrals] type: Disk_access doc: Read/Write |MO| integrals from/to disk [ Write | Read | None ] diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 0d0989d7..d3affd68 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -1,55 +1,98 @@ +BEGIN_PROVIDER [ logical, do_mo_cholesky ] + implicit none + BEGIN_DOC + ! If True, use Cholesky vectors for MO integrals + END_DOC + do_mo_cholesky = do_ao_cholesky +END_PROVIDER + BEGIN_PROVIDER [ integer, cholesky_mo_num ] implicit none BEGIN_DOC ! Number of Cholesky vectors in MO basis END_DOC - cholesky_mo_num = cholesky_ao_num -END_PROVIDER - -BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] - implicit none - BEGIN_DOC - ! Cholesky vectors in MO basis - END_DOC - - integer :: k, i, j - - call set_multiple_levels_omp(.False.) - !$OMP PARALLEL DO PRIVATE(k) - do k=1,cholesky_mo_num - do j=1,mo_num - do i=1,mo_num - cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j) - enddo - enddo - enddo - !$OMP END PARALLEL DO - + integer, external :: getUnitAndOpen + integer :: iunit + if (read_mo_cholesky) then + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_mo_transp', 'R') + read(iunit) cholesky_mo_num + close(iunit) + else + cholesky_mo_num = cholesky_ao_num + endif END_PROVIDER +!BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] +! implicit none +! BEGIN_DOC +! ! Cholesky vectors in MO basis +! END_DOC +! +! integer :: k, i, j +! +! call set_multiple_levels_omp(.False.) +! !$OMP PARALLEL DO PRIVATE(k) +! do k=1,cholesky_mo_num +! do j=1,mo_num +! do i=1,mo_num +! cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j) +! enddo +! enddo +! enddo +! !$OMP END PARALLEL DO +! +!END_PROVIDER +! BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, mo_num) ] implicit none BEGIN_DOC - ! Cholesky vectors in MO basis + ! Cholesky vectors in MO basis. Warning: it is transposed wrt cholesky_ao: + ! + ! - cholesky_ao is (ao_num^2 x cholesky_ao_num) + ! + ! - cholesky_mo_transp is (cholesky_mo_num x mo_num^2) END_DOC double precision, allocatable :: X(:,:,:) double precision :: wall0, wall1 - integer :: ierr - print *, 'AO->MO Transformation of Cholesky vectors' - call wall_time(wall0) + integer, external :: getUnitAndOpen + integer :: iunit, ierr, rank - allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) - if (ierr /= 0) then - print *, irp_here, ': Allocation failed' - endif - call dgemm('T','N', ao_num*cholesky_mo_num, mo_num, ao_num, 1.d0, & - cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_mo_num) - call dgemm('T','N', cholesky_mo_num*mo_num, mo_num, ao_num, 1.d0, & - X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_mo_num*mo_num) - deallocate(X) - call wall_time(wall1) - print*,'Time for AO->MO Cholesky vectors = ',wall1-wall0 + if (read_mo_cholesky) then + print *, 'Reading Cholesky MO vectors from disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_mo_transp', 'R') + read(iunit) rank + if (cholesky_mo_num /= rank) then + stop 'inconsistent rank' + endif + read(iunit) cholesky_mo_transp + close(iunit) + else + print *, 'AO->MO Transformation of Cholesky vectors' + call wall_time(wall0) + + allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': Allocation failed' + endif + call dgemm('T','N', ao_num*cholesky_mo_num, mo_num, ao_num, 1.d0, & + cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_mo_num) + call dgemm('T','N', cholesky_mo_num*mo_num, mo_num, ao_num, 1.d0, & + X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_mo_num*mo_num) + deallocate(X) + call wall_time(wall1) + print*,'Time to provide MO cholesky vectors = ',(wall1-wall0)/60.d0, ' min' + + + if (write_mo_cholesky) then + print *, 'Writing Cholesky MO vectors to disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_mo_transp', 'W') + write(iunit) rank + write(iunit) cholesky_mo_transp + close(iunit) + call ezfio_set_mo_two_e_ints_io_mo_cholesky('Read') + endif + endif END_PROVIDER diff --git a/src/mo_two_e_ints/four_idx_novvvv.irp.f b/src/mo_two_e_ints/four_idx_novvvv.irp.f index 2be09689..80af35dc 100644 --- a/src/mo_two_e_ints/four_idx_novvvv.irp.f +++ b/src/mo_two_e_ints/four_idx_novvvv.irp.f @@ -1,12 +1,3 @@ -!BEGIN_PROVIDER [ logical, no_vvvv_integrals ] -! implicit none -! BEGIN_DOC -! If `True`, computes all integrals except for the integrals having 3 or 4 virtual indices -! END_DOC -! -! no_vvvv_integrals = .False. -!END_PROVIDER - BEGIN_PROVIDER [ double precision, mo_coef_novirt, (ao_num,n_core_inact_act_orb) ] implicit none BEGIN_DOC diff --git a/src/mo_two_e_ints/integrals_3_index.irp.f b/src/mo_two_e_ints/integrals_3_index.irp.f index eb05da84..c0dab506 100644 --- a/src/mo_two_e_ints/integrals_3_index.irp.f +++ b/src/mo_two_e_ints/integrals_3_index.irp.f @@ -10,7 +10,7 @@ double precision :: get_two_e_integral double precision :: integral - if (do_ao_cholesky) then + if (do_mo_cholesky) then double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:) allocate(buffer_jj(cholesky_mo_num,mo_num), buffer(mo_num,mo_num,mo_num)) diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index 0e77b6a2..cb3f4bc6 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -1362,7 +1362,7 @@ END_PROVIDER double precision :: get_two_e_integral - if (do_ao_cholesky) then + if (do_mo_cholesky) then double precision, allocatable :: buffer(:,:) allocate (buffer(cholesky_mo_num,mo_num)) do k=1,cholesky_mo_num diff --git a/src/trexio/import_trexio_integrals.irp.f b/src/trexio/import_trexio_integrals.irp.f index 8c6b79d7..5a6b3c03 100644 --- a/src/trexio/import_trexio_integrals.irp.f +++ b/src/trexio/import_trexio_integrals.irp.f @@ -41,6 +41,11 @@ subroutine run(f) integer , allocatable :: Vi(:,:) double precision :: s +! TODO: +! - If Cholesky AO in trexio file, read cholesky ao vectors +! - If Cholesky MO in trexio file, read cholesky mo vectors +! - If Cholesky MO not in trexio file, force do_cholesky_mo to False + if (trexio_has_nucleus_repulsion(f) == TREXIO_SUCCESS) then rc = trexio_read_nucleus_repulsion(f, s) if (rc /= TREXIO_SUCCESS) then From e55390c70c98d9c475bde599ab3621246cac88af Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jun 2024 16:11:10 +0200 Subject: [PATCH 48/64] Type error in cholesky --- src/ao_two_e_ints/cholesky.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 6778d5c7..ffb37565 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -239,7 +239,7 @@ END_PROVIDER + memory_of_int(nq) &! computed(nq) + np*memory_of_int(nq) &! computed(nq) + memory_of_double(np) &! Delta_col(np) - + 7*memory_of_double(ndim8) &! D, Lset, Dset, D_sorted, addr[1-3] + + 7*memory_of_double8(ndim8) &! D, Lset, Dset, D_sorted, addr[1-3] + np*memory_of_double(nq) &! Delta(np,nq) + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) From b080a7a5e9adecf0544cc34b65dbf967872a77e3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jun 2024 16:34:14 +0200 Subject: [PATCH 49/64] Clean Cholesky MO --- src/mo_two_e_ints/EZFIO.cfg | 6 -- src/mo_two_e_ints/cholesky.irp.f | 6 +- src/mo_two_e_ints/mo_bi_integrals.irp.f | 39 +++++------ src/mo_two_e_ints/no_vvvv.irp.f | 88 ------------------------- 4 files changed, 25 insertions(+), 114 deletions(-) delete mode 100644 src/mo_two_e_ints/no_vvvv.irp.f diff --git a/src/mo_two_e_ints/EZFIO.cfg b/src/mo_two_e_ints/EZFIO.cfg index 49a2952c..c967969f 100644 --- a/src/mo_two_e_ints/EZFIO.cfg +++ b/src/mo_two_e_ints/EZFIO.cfg @@ -17,12 +17,6 @@ interface: ezfio,provider,ocaml default: 1.e-15 ezfio_name: threshold_mo -[no_vvvv_integrals] -type: logical -doc: If `True`, computes all integrals except for the integrals having 3 or 4 virtual indices -interface: ezfio,provider,ocaml -default: false - [io_mo_two_e_integrals_erf] type: Disk_access doc: Read/Write MO integrals with the long range interaction from/to disk [ Write | Read | None ] diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index d3affd68..5d34fb33 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -68,7 +68,11 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, read(iunit) cholesky_mo_transp close(iunit) else + print *, '' print *, 'AO->MO Transformation of Cholesky vectors' + print *, '-----------------------------------------' + print *, '' + call wall_time(wall0) allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) @@ -87,7 +91,7 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, if (write_mo_cholesky) then print *, 'Writing Cholesky MO vectors to disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_mo_transp', 'W') - write(iunit) rank + write(iunit) cholesky_mo_num write(iunit) cholesky_mo_transp close(iunit) call ezfio_set_mo_two_e_ints_io_mo_cholesky('Read') diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index cb3f4bc6..4b9bf97f 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -39,29 +39,16 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] return endif - if (.not. do_direct_integrals) then - PROVIDE ao_two_e_integrals_in_map - endif - - print *, '' - print *, 'AO -> MO integrals transformation' - print *, '---------------------------------' - print *, '' - call wall_time(wall_1) call cpu_time(cpu_1) - if(no_vvvv_integrals)then - call four_idx_novvvv_old + if (do_mo_cholesky) then + call add_integrals_to_map_cholesky else - if (do_ao_cholesky) then - call add_integrals_to_map_cholesky + if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then + call four_idx_dgemm else - if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then - call four_idx_dgemm - else - call add_integrals_to_map(full_ijkl_bitmask_4) - endif + call add_integrals_to_map(full_ijkl_bitmask_4) endif endif @@ -92,8 +79,15 @@ subroutine four_idx_dgemm double precision, allocatable :: a1(:,:,:,:) double precision, allocatable :: a2(:,:,:,:) + PROVIDE ao_two_e_integrals_in_map mo_coef + + print *, '' + print *, 'DGEMM-based AO->MO Transformation' + print *, '---------------------------------' + print *, '' + if (ao_num > 1289) then - print *, irp_here, ': Integer overflow in ao_num**3' + print *, irp_here, ': Integer overflow in ao_num**3. Set do_ao_cholesky=.True.' endif allocate (a1(ao_num,ao_num,ao_num,ao_num)) @@ -213,6 +207,12 @@ subroutine add_integrals_to_map(mask_ijkl) PROVIDE ao_two_e_integrals_in_map mo_coef + + print *, '' + print *, 'Sparse AO->MO Transformation' + print *, '----------------------------' + print *, '' + !Get list of MOs for i,j,k and l !------------------------------- @@ -469,6 +469,7 @@ subroutine add_integrals_to_map_cholesky integer(key_kind) , allocatable :: buffer_i(:) real(integral_kind), allocatable :: buffer_value(:) + PROVIDE cholesky_mo_transp call set_multiple_levels_omp(.False.) !$OMP PARALLEL DEFAULT(SHARED) & diff --git a/src/mo_two_e_ints/no_vvvv.irp.f b/src/mo_two_e_ints/no_vvvv.irp.f deleted file mode 100644 index 48a7f5e2..00000000 --- a/src/mo_two_e_ints/no_vvvv.irp.f +++ /dev/null @@ -1,88 +0,0 @@ - -subroutine four_idx_novvvv_old - use map_module - use bitmasks - implicit none - BEGIN_DOC - ! Retransform MO integrals for next CAS-SCF step - END_DOC - integer(bit_kind) :: mask_ijkl(N_int,4) - integer(bit_kind) :: mask_ijk(N_int,3) - - print*,'Using partial transformation' - print*,'It will not transform all integrals with at least 3 indices within the virtuals' - integer :: i,j,k,l - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I I I !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 4 - ! - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = core_inact_act_bitmask_4(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I V V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 2 (virt) ^2 - ! = J_iv - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = virt_bitmask(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - - ! (core+inact+act) ^ 2 (virt) ^2 - ! = (iv|iv) - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! V V V !!!!!!!!!!!!!!!!!!!!!!! -! if(.not.no_vvv_integrals)then - print*, '' - print*, ' and ' - do i = 1,N_int - mask_ijk(i,1) = virt_bitmask(i,1) - mask_ijk(i,2) = virt_bitmask(i,1) - mask_ijk(i,3) = virt_bitmask(i,1) - enddo - call add_integrals_to_map_three_indices(mask_ijk) -! endif - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I I V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 3 (virt) ^1 - ! - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I V V V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 1 (virt) ^3 - ! -! if(.not.no_ivvv_integrals)then - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = virt_bitmask(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map_no_exit_34(mask_ijkl) -end From ca98a6b529b3aa47d125e27d4b8031ac88a55c41 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jun 2024 16:46:25 +0200 Subject: [PATCH 50/64] Fixed previous commit --- src/mo_two_e_ints/cholesky.irp.f | 42 ++++++++++++++++---------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 5d34fb33..971ab38d 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -22,27 +22,27 @@ BEGIN_PROVIDER [ integer, cholesky_mo_num ] endif END_PROVIDER -!BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] -! implicit none -! BEGIN_DOC -! ! Cholesky vectors in MO basis -! END_DOC -! -! integer :: k, i, j -! -! call set_multiple_levels_omp(.False.) -! !$OMP PARALLEL DO PRIVATE(k) -! do k=1,cholesky_mo_num -! do j=1,mo_num -! do i=1,mo_num -! cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j) -! enddo -! enddo -! enddo -! !$OMP END PARALLEL DO -! -!END_PROVIDER -! +BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis + END_DOC + + integer :: k, i, j + + call set_multiple_levels_omp(.False.) + !$OMP PARALLEL DO PRIVATE(k) + do k=1,cholesky_mo_num + do j=1,mo_num + do i=1,mo_num + cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, mo_num) ] implicit none BEGIN_DOC From 7e1ed69eef611e6ff0336814c8ffded0eb9cf323 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jun 2024 18:03:51 +0200 Subject: [PATCH 51/64] Starting Cholesky transition --- src/mo_two_e_ints/map_integrals.irp.f | 38 +- src/mo_two_e_ints/mo_bi_integrals.irp.f | 856 +----------------------- 2 files changed, 57 insertions(+), 837 deletions(-) diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index ada256a2..290fdeab 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -98,7 +98,10 @@ double precision function get_two_e_integral(i,j,k,l,map) integer*8 :: ii_8 type(map_type), intent(inout) :: map real(integral_kind) :: tmp - PROVIDE mo_two_e_integrals_in_map mo_integrals_cache + integer :: kk + + PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky + if (use_banned_excitation) then if (banned_excitation(i,k)) then get_two_e_integral = 0.d0 @@ -109,22 +112,43 @@ double precision function get_two_e_integral(i,j,k,l,map) return endif endif + + ii = l-mo_integrals_cache_min ii = ior(ii, k-mo_integrals_cache_min) ii = ior(ii, j-mo_integrals_cache_min) ii = ior(ii, i-mo_integrals_cache_min) - if (iand(ii, -128) /= 0) then - !DIR$ FORCEINLINE - call two_e_integrals_index(i,j,k,l,idx) - !DIR$ FORCEINLINE - call map_get(map,idx,tmp) - get_two_e_integral = dble(tmp) + +! if (iand(ii, -128) /= 0) then + if (.True.) then + ! Integral is not in the cache + + if (do_mo_cholesky) then + + get_two_e_integral = 0.d0 + do kk=1,cholesky_mo_num + get_two_e_integral = get_two_e_integral + cholesky_mo_transp(kk,i,k) * cholesky_mo_transp(kk,j,l) + enddo + + else + ! Integrals is in the map + + !DIR$ FORCEINLINE + call two_e_integrals_index(i,j,k,l,idx) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + get_two_e_integral = dble(tmp) + endif + else + ! Integrals is in the cache + ii_8 = int(l,8)-mo_integrals_cache_min_8 ii_8 = ior( shiftl(ii_8,7), int(k,8)-mo_integrals_cache_min_8) ii_8 = ior( shiftl(ii_8,7), int(j,8)-mo_integrals_cache_min_8) ii_8 = ior( shiftl(ii_8,7), int(i,8)-mo_integrals_cache_min_8) get_two_e_integral = mo_integrals_cache(ii_8) + endif end diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index 4b9bf97f..d44bb38a 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -1,3 +1,26 @@ +! 1,2-index integrals are always taken from: +! - mo_two_e_integrals_jj_exchange +! - mo_two_e_integrals_jj_anti +! - mo_two_e_integrals_jj +! +! 3-index integrals are always taken from: +! - big_array_exchange_integrals +! - big_array_coulomb_integrals +! +! If (do_mo_cholesky): +! - Integrals with four 4 active orbitals are stored in the cache map, +! all other integrals are used from cholesky vectors +! - 1,2,3-index arrays are built from cholesky vectors +! Else: +! - All integrals are stored in the map or cache map +! - 1,2,3-index arrays are built from the map +! +! TODO: +! - build cache map from cholesky vectors +! - get_mo_integrals using cholesky +! - get_mo_integralss using cholesky +! - get_mo_integralss in PT2 + subroutine mo_two_e_integrals_index(i,j,k,l,i1) use map_module implicit none @@ -453,6 +476,9 @@ subroutine add_integrals_to_map(mask_ijkl) end + + + subroutine add_integrals_to_map_cholesky use bitmasks implicit none @@ -516,837 +542,7 @@ subroutine add_integrals_to_map_cholesky end -subroutine add_integrals_to_map_three_indices(mask_ijk) - use bitmasks - implicit none - BEGIN_DOC - ! Adds integrals to the MO map according to some bitmask - END_DOC - - integer(bit_kind), intent(in) :: mask_ijk(N_int,3) - - integer :: i,j,k,l - integer :: i0,j0,k0,l0 - double precision :: c, cpu_1, cpu_2, wall_1, wall_2, wall_0 - - integer, allocatable :: list_ijkl(:,:) - integer :: n_i, n_j, n_k - integer :: m - integer, allocatable :: two_e_tmp_0_idx(:) - real(integral_kind), allocatable :: two_e_tmp_0(:,:) - double precision, allocatable :: two_e_tmp_1(:) - double precision, allocatable :: two_e_tmp_2(:,:) - double precision, allocatable :: two_e_tmp_3(:,:,:) - !DIR$ ATTRIBUTES ALIGN : 64 :: two_e_tmp_1, two_e_tmp_2, two_e_tmp_3 - - integer :: n_integrals - integer :: size_buffer - integer(key_kind),allocatable :: buffer_i(:) - real(integral_kind),allocatable :: buffer_value(:) - double precision :: map_mb - - integer :: i1,j1,k1,l1, ii1, kmax, thread_num - integer :: i2,i3,i4 - double precision,parameter :: thr_coef = 1.d-10 - - PROVIDE ao_two_e_integrals_in_map mo_coef - - !Get list of MOs for i,j,k and l - !------------------------------- - - allocate(list_ijkl(mo_num,4)) - call bitstring_to_list( mask_ijk(1,1), list_ijkl(1,1), n_i, N_int ) - call bitstring_to_list( mask_ijk(1,2), list_ijkl(1,2), n_j, N_int ) - call bitstring_to_list( mask_ijk(1,3), list_ijkl(1,3), n_k, N_int ) - j = 0 - do i = 1, N_int - j += popcnt(mask_ijk(i,1)) - enddo - if(j==0)then - return - endif - - j = 0 - do i = 1, N_int - j += popcnt(mask_ijk(i,2)) - enddo - if(j==0)then - return - endif - - j = 0 - do i = 1, N_int - j += popcnt(mask_ijk(i,3)) - enddo - if(j==0)then - return - endif - - if (ao_num > 1289) then - print *, irp_here, ': Integer overflow in ao_num**3' - endif - size_buffer = min(ao_num*ao_num*ao_num,16000000) - print*, 'Providing the molecular integrals ' - print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& - ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' - - call wall_time(wall_1) - call cpu_time(cpu_1) - !$OMP PARALLEL PRIVATE(m,l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, & - !$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,& - !$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, & - !$OMP wall_0,thread_num) & - !$OMP DEFAULT(NONE) & - !$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k, & - !$OMP mo_coef_transp, & - !$OMP mo_coef_transp_is_built, list_ijkl, & - !$OMP mo_coef_is_built, wall_1, & - !$OMP mo_coef,mo_integrals_threshold,mo_integrals_map) - n_integrals = 0 - wall_0 = wall_1 - allocate(two_e_tmp_3(mo_num, n_j, n_k), & - two_e_tmp_1(mo_num), & - two_e_tmp_0(ao_num,ao_num), & - two_e_tmp_0_idx(ao_num), & - two_e_tmp_2(mo_num, n_j), & - buffer_i(size_buffer), & - buffer_value(size_buffer) ) - - thread_num = 0 - !$ thread_num = omp_get_thread_num() - !$OMP DO SCHEDULE(guided) - do l1 = 1,ao_num - two_e_tmp_3 = 0.d0 - do k1 = 1,ao_num - two_e_tmp_2 = 0.d0 - do j1 = 1,ao_num - call get_ao_two_e_integrals(j1,k1,l1,ao_num,two_e_tmp_0(1,j1)) - enddo - do j1 = 1,ao_num - kmax = 0 - do i1 = 1,ao_num - c = two_e_tmp_0(i1,j1) - if (c == 0.d0) then - cycle - endif - kmax += 1 - two_e_tmp_0(kmax,j1) = c - two_e_tmp_0_idx(kmax) = i1 - enddo - - if (kmax==0) then - cycle - endif - - two_e_tmp_1 = 0.d0 - ii1=1 - do ii1 = 1,kmax-4,4 - i1 = two_e_tmp_0_idx(ii1) - i2 = two_e_tmp_0_idx(ii1+1) - i3 = two_e_tmp_0_idx(ii1+2) - i4 = two_e_tmp_0_idx(ii1+3) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + & - mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) + & - mo_coef_transp(i,i2) * two_e_tmp_0(ii1+1,j1) + & - mo_coef_transp(i,i3) * two_e_tmp_0(ii1+2,j1) + & - mo_coef_transp(i,i4) * two_e_tmp_0(ii1+3,j1) - enddo ! i - enddo ! ii1 - - i2 = ii1 - do ii1 = i2,kmax - i1 = two_e_tmp_0_idx(ii1) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) - enddo ! i - enddo ! ii1 - c = 0.d0 - - do i = list_ijkl(1,1), list_ijkl(n_i,1) - c = max(c,abs(two_e_tmp_1(i))) - if (c>mo_integrals_threshold) exit - enddo - if ( c < mo_integrals_threshold ) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - c = mo_coef_transp(j,j1) - if (abs(c) < thr_coef) then - cycle - endif - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_2(i,j0) = two_e_tmp_2(i,j0) + c * two_e_tmp_1(i) - enddo ! i - enddo ! j - enddo !j1 - if ( maxval(abs(two_e_tmp_2)) < mo_integrals_threshold ) then - cycle - endif - - - do k0 = 1, n_k - k = list_ijkl(k0,3) - c = mo_coef_transp(k,k1) - if (abs(c) < thr_coef) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - do i = list_ijkl(1,1), k - two_e_tmp_3(i,j0,k0) = two_e_tmp_3(i,j0,k0) + c* two_e_tmp_2(i,j0) - enddo!i - enddo !j - - enddo !k - enddo !k1 - - - - do l0 = 1,n_j - l = list_ijkl(l0,2) - c = mo_coef_transp(l,l1) - if (abs(c) < thr_coef) then - cycle - endif - do k0 = 1, n_k - k = list_ijkl(k0,3) - i1 = shiftr((k*k-k),1) - two_e_tmp_1 = 0.d0 - j0 = l0 - j = list_ijkl(j0,2) - do i0 = 1, n_i - i = list_ijkl(i0,1) - if (i>k) then - exit - endif - two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0) - enddo - - do i0 = 1, n_i - i = list_ijkl(i0,1) - if (i>k) then !min(k,j1-i1) - exit - endif - if (abs(two_e_tmp_1(i)) < mo_integrals_threshold) then - cycle - endif - n_integrals += 1 - buffer_value(n_integrals) = two_e_tmp_1(i) - if(i==k .and. j==l .and. i.ne.j)then - buffer_value(n_integrals) = buffer_value(n_integrals) *0.5d0 - endif - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - n_integrals = 0 - endif - enddo - enddo - enddo - - do l0 = 1,n_j - l = list_ijkl(l0,2) - c = mo_coef_transp(l,l1) - if (abs(c) < thr_coef) then - cycle - endif - do k0 = 1, n_k - k = list_ijkl(k0,3) - i1 = shiftr((k*k-k),1) - two_e_tmp_1 = 0.d0 - j0 = k0 - j = list_ijkl(k0,2) - i0 = l0 - i = list_ijkl(i0,2) - if (k==l) then - cycle - endif - two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0) - - n_integrals += 1 - buffer_value(n_integrals) = two_e_tmp_1(i) - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - n_integrals = 0 - endif - enddo - enddo - - call wall_time(wall_2) - if (thread_num == 0) then - if (wall_2 - wall_0 > 1.d0) then - wall_0 = wall_2 - print*, 100.*float(l1)/float(ao_num), '% in ', & - wall_2-wall_1, 's', map_mb(mo_integrals_map) ,'MB' - endif - endif - enddo - !$OMP END DO NOWAIT - deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3) - - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - deallocate(buffer_i, buffer_value) - !$OMP END PARALLEL - call map_merge(mo_integrals_map) - - call wall_time(wall_2) - call cpu_time(cpu_2) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - - deallocate(list_ijkl) - - - print*,'Molecular integrals provided:' - print*,' Size of MO map ', map_mb(mo_integrals_map) ,'MB' - print*,' Number of MO integrals: ', mo_map_size - print*,' cpu time :',cpu_2 - cpu_1, 's' - print*,' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')' - -end - - -subroutine add_integrals_to_map_no_exit_34(mask_ijkl) - use bitmasks - implicit none - - BEGIN_DOC - ! Adds integrals to tha MO map according to some bitmask - END_DOC - - integer(bit_kind), intent(in) :: mask_ijkl(N_int,4) - - integer :: i,j,k,l - integer :: i0,j0,k0,l0 - double precision :: c, cpu_1, cpu_2, wall_1, wall_2, wall_0 - - integer, allocatable :: list_ijkl(:,:) - integer :: n_i, n_j, n_k, n_l - integer, allocatable :: two_e_tmp_0_idx(:) - real(integral_kind), allocatable :: two_e_tmp_0(:,:) - double precision, allocatable :: two_e_tmp_1(:) - double precision, allocatable :: two_e_tmp_2(:,:) - double precision, allocatable :: two_e_tmp_3(:,:,:) - !DIR$ ATTRIBUTES ALIGN : 64 :: two_e_tmp_1, two_e_tmp_2, two_e_tmp_3 - - integer :: n_integrals - integer :: size_buffer - integer(key_kind),allocatable :: buffer_i(:) - real(integral_kind),allocatable :: buffer_value(:) - double precision :: map_mb - - integer :: i1,j1,k1,l1, ii1, kmax, thread_num - integer :: i2,i3,i4 - double precision,parameter :: thr_coef = 1.d-10 - - PROVIDE ao_two_e_integrals_in_map mo_coef - - !Get list of MOs for i,j,k and l - !------------------------------- - - allocate(list_ijkl(mo_num,4)) - call bitstring_to_list( mask_ijkl(1,1), list_ijkl(1,1), n_i, N_int ) - call bitstring_to_list( mask_ijkl(1,2), list_ijkl(1,2), n_j, N_int ) - call bitstring_to_list( mask_ijkl(1,3), list_ijkl(1,3), n_k, N_int ) - call bitstring_to_list( mask_ijkl(1,4), list_ijkl(1,4), n_l, N_int ) - - if (ao_num > 1289) then - print *, irp_here, ': Integer overflow in ao_num**3' - endif - size_buffer = min(ao_num*ao_num*ao_num,16000000) - print*, 'Providing the molecular integrals ' - print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& - ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' - - call wall_time(wall_1) - call cpu_time(cpu_1) - - !$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, & - !$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,& - !$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, & - !$OMP wall_0,thread_num) & - !$OMP DEFAULT(NONE) & - !$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,n_l, & - !$OMP mo_coef_transp, & - !$OMP mo_coef_transp_is_built, list_ijkl, & - !$OMP mo_coef_is_built, wall_1, & - !$OMP mo_coef,mo_integrals_threshold,mo_integrals_map) - n_integrals = 0 - wall_0 = wall_1 - allocate(two_e_tmp_3(mo_num, n_j, n_k), & - two_e_tmp_1(mo_num), & - two_e_tmp_0(ao_num,ao_num), & - two_e_tmp_0_idx(ao_num), & - two_e_tmp_2(mo_num, n_j), & - buffer_i(size_buffer), & - buffer_value(size_buffer) ) - - thread_num = 0 - !$ thread_num = omp_get_thread_num() - !$OMP DO SCHEDULE(guided) - do l1 = 1,ao_num - !IRP_IF COARRAY - ! if (mod(l1-this_image(),num_images()) /= 0 ) then - ! cycle - ! endif - !IRP_ENDIF - two_e_tmp_3 = 0.d0 - do k1 = 1,ao_num - two_e_tmp_2 = 0.d0 - do j1 = 1,ao_num - call get_ao_two_e_integrals(j1,k1,l1,ao_num,two_e_tmp_0(1,j1)) - enddo - do j1 = 1,ao_num - kmax = 0 - do i1 = 1,ao_num - c = two_e_tmp_0(i1,j1) - if (c == 0.d0) then - cycle - endif - kmax += 1 - two_e_tmp_0(kmax,j1) = c - two_e_tmp_0_idx(kmax) = i1 - enddo - - if (kmax==0) then - cycle - endif - - two_e_tmp_1 = 0.d0 - ii1=1 - do ii1 = 1,kmax-4,4 - i1 = two_e_tmp_0_idx(ii1) - i2 = two_e_tmp_0_idx(ii1+1) - i3 = two_e_tmp_0_idx(ii1+2) - i4 = two_e_tmp_0_idx(ii1+3) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + & - mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) + & - mo_coef_transp(i,i2) * two_e_tmp_0(ii1+1,j1) + & - mo_coef_transp(i,i3) * two_e_tmp_0(ii1+2,j1) + & - mo_coef_transp(i,i4) * two_e_tmp_0(ii1+3,j1) - enddo ! i - enddo ! ii1 - - i2 = ii1 - do ii1 = i2,kmax - i1 = two_e_tmp_0_idx(ii1) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) - enddo ! i - enddo ! ii1 - c = 0.d0 - - do i = list_ijkl(1,1), list_ijkl(n_i,1) - c = max(c,abs(two_e_tmp_1(i))) - if (c>mo_integrals_threshold) exit - enddo - if ( c < mo_integrals_threshold ) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - c = mo_coef_transp(j,j1) - if (abs(c) < thr_coef) then - cycle - endif - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_2(i,j0) = two_e_tmp_2(i,j0) + c * two_e_tmp_1(i) - enddo ! i - enddo ! j - enddo !j1 - if ( maxval(abs(two_e_tmp_2)) < mo_integrals_threshold ) then - cycle - endif - - - do k0 = 1, n_k - k = list_ijkl(k0,3) - c = mo_coef_transp(k,k1) - if (abs(c) < thr_coef) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - do i = list_ijkl(1,1), k - two_e_tmp_3(i,j0,k0) = two_e_tmp_3(i,j0,k0) + c* two_e_tmp_2(i,j0) - enddo!i - enddo !j - - enddo !k - enddo !k1 - - - - do l0 = 1,n_l - l = list_ijkl(l0,4) - c = mo_coef_transp(l,l1) - if (abs(c) < thr_coef) then - cycle - endif - j1 = shiftr((l*l-l),1) - do j0 = 1, n_j - j = list_ijkl(j0,2) - if (j > l) then - exit - endif - j1 += 1 - do k0 = 1, n_k - k = list_ijkl(k0,3) - i1 = shiftr((k*k-k),1) - two_e_tmp_1 = 0.d0 - do i0 = 1, n_i - i = list_ijkl(i0,1) - if (i>k) then - exit - endif - two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0) - enddo - - do i0 = 1, n_i - i = list_ijkl(i0,1) - if(i> k)then - exit - endif - - if (abs(two_e_tmp_1(i)) < mo_integrals_threshold) then - cycle - endif - n_integrals += 1 - buffer_value(n_integrals) = two_e_tmp_1(i) - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - n_integrals = 0 - endif - enddo - enddo - enddo - enddo - - call wall_time(wall_2) - if (thread_num == 0) then - if (wall_2 - wall_0 > 1.d0) then - wall_0 = wall_2 - print*, 100.*float(l1)/float(ao_num), '% in ', & - wall_2-wall_1, 's', map_mb(mo_integrals_map) ,'MB' - endif - endif - enddo - !$OMP END DO NOWAIT - deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3) - - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - deallocate(buffer_i, buffer_value) - !$OMP END PARALLEL - !IRP_IF COARRAY - ! print*, 'Communicating the map' - ! call communicate_mo_integrals() - !IRP_ENDIF - call map_merge(mo_integrals_map) - - call wall_time(wall_2) - call cpu_time(cpu_2) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - - deallocate(list_ijkl) - - - print*,'Molecular integrals provided:' - print*,' Size of MO map ', map_mb(mo_integrals_map) ,'MB' - print*,' Number of MO integrals: ', mo_map_size - print*,' cpu time :',cpu_2 - cpu_1, 's' - print*,' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')' - - -end - - - - BEGIN_PROVIDER [ double precision, mo_two_e_integral_jj_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_jj_exchange_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_jj_anti_from_ao, (mo_num,mo_num) ] - implicit none - BEGIN_DOC - ! mo_two_e_integral_jj_from_ao(i,j) = J_ij - ! mo_two_e_integrals_jj_exchange_from_ao(i,j) = J_ij - ! mo_two_e_integrals_jj_anti_from_ao(i,j) = J_ij - K_ij - END_DOC - - integer :: i,j,p,q,r,s - double precision :: c - real(integral_kind) :: integral - integer :: n, pp - real(integral_kind), allocatable :: int_value(:) - integer, allocatable :: int_idx(:) - - double precision, allocatable :: iqrs(:,:), iqsr(:,:), iqis(:), iqri(:) - - if (.not.do_direct_integrals) then - PROVIDE ao_two_e_integrals_in_map mo_coef - endif - - mo_two_e_integral_jj_from_ao = 0.d0 - mo_two_e_integrals_jj_exchange_from_ao = 0.d0 - - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: iqrs, iqsr - - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE (i,j,p,q,r,s,integral,c,n,pp,int_value,int_idx, & - !$OMP iqrs, iqsr,iqri,iqis) & - !$OMP SHARED(mo_num,mo_coef_transp,ao_num, & - !$OMP ao_integrals_threshold,do_direct_integrals) & - !$OMP REDUCTION(+:mo_two_e_integral_jj_from_ao,mo_two_e_integrals_jj_exchange_from_ao) - - allocate( int_value(ao_num), int_idx(ao_num), & - iqrs(mo_num,ao_num), iqis(mo_num), iqri(mo_num), & - iqsr(mo_num,ao_num) ) - - !$OMP DO SCHEDULE (guided) - do s=1,ao_num - do q=1,ao_num - - do j=1,ao_num - do i=1,mo_num - iqrs(i,j) = 0.d0 - iqsr(i,j) = 0.d0 - enddo - enddo - - if (do_direct_integrals) then - double precision :: ao_two_e_integral - do r=1,ao_num - call compute_ao_two_e_integrals(q,r,s,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call compute_ao_two_e_integrals(q,s,r,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - - else - - do r=1,ao_num - call get_ao_two_e_integrals_non_zero(q,r,s,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call get_ao_two_e_integrals_non_zero(q,s,r,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - endif - iqis = 0.d0 - iqri = 0.d0 - do r=1,ao_num - do i=1,mo_num - iqis(i) += mo_coef_transp(i,r) * iqrs(i,r) - iqri(i) += mo_coef_transp(i,r) * iqsr(i,r) - enddo - enddo - do i=1,mo_num - do j=1,mo_num - c = mo_coef_transp(j,q)*mo_coef_transp(j,s) - mo_two_e_integral_jj_from_ao(j,i) += c * iqis(i) - mo_two_e_integrals_jj_exchange_from_ao(j,i) += c * iqri(i) - enddo - enddo - - enddo - enddo - !$OMP END DO NOWAIT - deallocate(iqrs,iqsr,int_value,int_idx) - !$OMP END PARALLEL - - mo_two_e_integrals_jj_anti_from_ao = mo_two_e_integral_jj_from_ao - mo_two_e_integrals_jj_exchange_from_ao - - -END_PROVIDER - - BEGIN_PROVIDER [ double precision, mo_two_e_integrals_vv_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_vv_exchange_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_vv_anti_from_ao, (mo_num,mo_num) ] - implicit none - BEGIN_DOC - ! mo_two_e_integrals_vv_from_ao(i,j) = J_ij - ! mo_two_e_integrals_vv_exchange_from_ao(i,j) = J_ij - ! mo_two_e_integrals_vv_anti_from_ao(i,j) = J_ij - K_ij - ! but only for the virtual orbitals - END_DOC - - integer :: i,j,p,q,r,s - integer :: i0,j0 - double precision :: c - real(integral_kind) :: integral - integer :: n, pp - real(integral_kind), allocatable :: int_value(:) - integer, allocatable :: int_idx(:) - - double precision, allocatable :: iqrs(:,:), iqsr(:,:), iqis(:), iqri(:) - - if (.not.do_direct_integrals) then - PROVIDE ao_two_e_integrals_in_map mo_coef - endif - - mo_two_e_integrals_vv_from_ao = 0.d0 - mo_two_e_integrals_vv_exchange_from_ao = 0.d0 - - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: iqrs, iqsr - - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE (i0,j0,i,j,p,q,r,s,integral,c,n,pp,int_value,int_idx, & - !$OMP iqrs, iqsr,iqri,iqis) & - !$OMP SHARED(n_virt_orb,mo_num,list_virt,mo_coef_transp,ao_num, & - !$OMP ao_integrals_threshold,do_direct_integrals) & - !$OMP REDUCTION(+:mo_two_e_integrals_vv_from_ao,mo_two_e_integrals_vv_exchange_from_ao) - - allocate( int_value(ao_num), int_idx(ao_num), & - iqrs(mo_num,ao_num), iqis(mo_num), iqri(mo_num),& - iqsr(mo_num,ao_num) ) - - !$OMP DO SCHEDULE (guided) - do s=1,ao_num - do q=1,ao_num - - do j=1,ao_num - do i0=1,n_virt_orb - i = list_virt(i0) - iqrs(i,j) = 0.d0 - iqsr(i,j) = 0.d0 - enddo - enddo - - if (do_direct_integrals) then - double precision :: ao_two_e_integral - do r=1,ao_num - call compute_ao_two_e_integrals(q,r,s,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i = list_virt(i0) - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call compute_ao_two_e_integrals(q,s,r,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i =list_virt(i0) - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - - else - - do r=1,ao_num - call get_ao_two_e_integrals_non_zero(q,r,s,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i =list_virt(i0) - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call get_ao_two_e_integrals_non_zero(q,s,r,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i = list_virt(i0) - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - endif - iqis = 0.d0 - iqri = 0.d0 - do r=1,ao_num - do i0=1,n_virt_orb - i = list_virt(i0) - iqis(i) += mo_coef_transp(i,r) * iqrs(i,r) - iqri(i) += mo_coef_transp(i,r) * iqsr(i,r) - enddo - enddo - do i0=1,n_virt_orb - i= list_virt(i0) - do j0=1,n_virt_orb - j = list_virt(j0) - c = mo_coef_transp(j,q)*mo_coef_transp(j,s) - mo_two_e_integrals_vv_from_ao(j,i) += c * iqis(i) - mo_two_e_integrals_vv_exchange_from_ao(j,i) += c * iqri(i) - enddo - enddo - - enddo - enddo - !$OMP END DO NOWAIT - deallocate(iqrs,iqsr,int_value,int_idx) - !$OMP END PARALLEL - - mo_two_e_integrals_vv_anti_from_ao = mo_two_e_integrals_vv_from_ao - mo_two_e_integrals_vv_exchange_from_ao - ! print*, '**********' - ! do i0 =1, n_virt_orb - ! i = list_virt(i0) - ! print*, mo_two_e_integrals_vv_from_ao(i,i) - ! enddo - ! print*, '**********' - - -END_PROVIDER BEGIN_PROVIDER [ double precision, mo_two_e_integrals_jj, (mo_num,mo_num) ] From bd534589e123c939de8ddd3147837b97837581dc Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 10 Jun 2024 17:36:14 +0200 Subject: [PATCH 52/64] Building mo cache from cholesky --- src/mo_two_e_ints/map_integrals.irp.f | 68 ++++++++++++++++--------- src/mo_two_e_ints/mo_bi_integrals.irp.f | 1 - 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 290fdeab..e99e89fb 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -59,29 +59,50 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:128_8*128_8*128_8*12 integer(key_kind) :: idx real(integral_kind) :: integral FREE ao_integrals_cache - !$OMP PARALLEL DO PRIVATE (i,j,k,l,i4,j4,k4,l4,idx,ii,integral) - do l=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - l4 = int(l,4) - do k=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - k4 = int(k,4) - do j=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - j4 = int(j,4) - do i=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - i4 = int(i,4) - !DIR$ FORCEINLINE - call two_e_integrals_index(i4,j4,k4,l4,idx) - !DIR$ FORCEINLINE - call map_get(mo_integrals_map,idx,integral) + if (do_mo_cholesky) then + + call set_multiple_levels_omp(.False.) + !$OMP PARALLEL DO PRIVATE (k,l,ii) + do l=mo_integrals_cache_min_8,mo_integrals_cache_max_8 + do k=mo_integrals_cache_min_8,mo_integrals_cache_max_8 ii = l-mo_integrals_cache_min_8 ii = ior( shiftl(ii,7), k-mo_integrals_cache_min_8) - ii = ior( shiftl(ii,7), j-mo_integrals_cache_min_8) - ii = ior( shiftl(ii,7), i-mo_integrals_cache_min_8) - mo_integrals_cache(ii) = integral + ii = shiftl(ii,14) + call dgemm('T','N', mo_integrals_cache_max-mo_integrals_cache_min+1, & + mo_integrals_cache_max-mo_integrals_cache_min+1, & + cholesky_mo_num, 1.d0, & + cholesky_mo_transp(1,mo_integrals_cache_min,k), cholesky_mo_num, & + cholesky_mo_transp(1,mo_integrals_cache_min,l), cholesky_mo_num, 0.d0, & + mo_integrals_cache(ii), 128) + enddo + enddo + !$OMP END PARALLEL DO + + else + !$OMP PARALLEL DO PRIVATE (i,j,k,l,i4,j4,k4,l4,idx,ii,integral) + do l=mo_integrals_cache_min_8,mo_integrals_cache_max_8 + l4 = int(l,4) + do k=mo_integrals_cache_min_8,mo_integrals_cache_max_8 + k4 = int(k,4) + do j=mo_integrals_cache_min_8,mo_integrals_cache_max_8 + j4 = int(j,4) + do i=mo_integrals_cache_min_8,mo_integrals_cache_max_8 + i4 = int(i,4) + !DIR$ FORCEINLINE + call two_e_integrals_index(i4,j4,k4,l4,idx) + !DIR$ FORCEINLINE + call map_get(mo_integrals_map,idx,integral) + ii = l-mo_integrals_cache_min_8 + ii = ior( shiftl(ii,7), k-mo_integrals_cache_min_8) + ii = ior( shiftl(ii,7), j-mo_integrals_cache_min_8) + ii = ior( shiftl(ii,7), i-mo_integrals_cache_min_8) + mo_integrals_cache(ii) = integral + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + endif END_PROVIDER @@ -100,7 +121,7 @@ double precision function get_two_e_integral(i,j,k,l,map) real(integral_kind) :: tmp integer :: kk - PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky + PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky if (use_banned_excitation) then if (banned_excitation(i,k)) then @@ -119,16 +140,13 @@ double precision function get_two_e_integral(i,j,k,l,map) ii = ior(ii, j-mo_integrals_cache_min) ii = ior(ii, i-mo_integrals_cache_min) -! if (iand(ii, -128) /= 0) then - if (.True.) then + if (iand(ii, -128) /= 0) then ! Integral is not in the cache if (do_mo_cholesky) then - get_two_e_integral = 0.d0 - do kk=1,cholesky_mo_num - get_two_e_integral = get_two_e_integral + cholesky_mo_transp(kk,i,k) * cholesky_mo_transp(kk,j,l) - enddo + double precision, external :: ddot + get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1) else ! Integrals is in the map diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index d44bb38a..6079c9f7 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -16,7 +16,6 @@ ! - 1,2,3-index arrays are built from the map ! ! TODO: -! - build cache map from cholesky vectors ! - get_mo_integrals using cholesky ! - get_mo_integralss using cholesky ! - get_mo_integralss in PT2 From 10fb3a0636d74ef5a3b78dc69bb9cc4d6be63455 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 10 Jun 2024 18:23:45 +0200 Subject: [PATCH 53/64] Introducing dgemm and dgemv to get integrals --- src/mo_two_e_ints/map_integrals.irp.f | 35 ++++++++++++++++++++----- src/mo_two_e_ints/mo_bi_integrals.irp.f | 6 +++-- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index e99e89fb..c9fa81c4 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -262,9 +262,25 @@ subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) integer :: j real(integral_kind), allocatable :: tmp_val(:) - do j=1,sze - call get_mo_two_e_integrals(j,k,l,sze,out_array(1,j),map) - enddo + if (do_mo_cholesky) then + call dgemm('T', 'N', mo_num, mo_num, cholesky_mo_num, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,1,l), cholesky_mo_num, 0.d0, & + out_array, sze) +! integer :: i +! do j=1,mo_num +! do i=1,mo_num +! double precision, external :: get_two_e_integral +! print *, i, j, real(out_array(i,j)), real(get_two_e_integral(i,j,k,l,map)) +! enddo +! enddo +! print *, irp_here +! pause + else + do j=1,sze + call get_mo_two_e_integrals(j,k,l,sze,out_array(1,j),map) + enddo + endif end subroutine get_mo_two_e_integrals_i1j1(k,l,sze,out_array,map) @@ -303,9 +319,16 @@ subroutine get_mo_two_e_integrals_coulomb_ii(k,l,sze,out_val,map) double precision, external :: get_two_e_integral PROVIDE mo_two_e_integrals_in_map - do i=1,sze - out_val(i) = get_two_e_integral(k,i,l,i,map) - enddo + if (do_mo_cholesky) then + call dgemv('T', cholesky_mo_num, mo_num, 1.d0, & + cholesky_mo_transp(1,1,1), cholesky_mo_num*(mo_num+1), & + cholesky_mo_transp(1,k,l), 1, 0.d0, & + out_val, 1) + else + do i=1,sze + out_val(i) = get_two_e_integral(k,i,l,i,map) + enddo + endif end diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index 6079c9f7..04e6c3e6 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -65,9 +65,11 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] call cpu_time(cpu_1) if (do_mo_cholesky) then - call add_integrals_to_map_cholesky + PROVIDE cholesky_mo_transp else - if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then + if (do_ao_cholesky) then + call add_integrals_to_map_cholesky + else if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then call four_idx_dgemm else call add_integrals_to_map(full_ijkl_bitmask_4) From 47b80703397ec92dfec008ef2a641efba9c22f44 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jun 2024 11:53:11 +0200 Subject: [PATCH 54/64] Cache map in integer*4 --- src/mo_two_e_ints/map_integrals.irp.f | 207 ++++++++++++++++++-------- 1 file changed, 145 insertions(+), 62 deletions(-) diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index c9fa81c4..9155cd8f 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -34,28 +34,28 @@ end BEGIN_PROVIDER [ integer*4, mo_integrals_cache_min ] &BEGIN_PROVIDER [ integer*4, mo_integrals_cache_max ] -&BEGIN_PROVIDER [ integer*8, mo_integrals_cache_min_8 ] -&BEGIN_PROVIDER [ integer*8, mo_integrals_cache_max_8 ] +&BEGIN_PROVIDER [ integer*4, mo_integrals_cache_shift] +&BEGIN_PROVIDER [ integer*4, mo_integrals_cache_size ] implicit none BEGIN_DOC ! Min and max values of the MOs for which the integrals are in the cache END_DOC - mo_integrals_cache_min_8 = max(1_8,elec_alpha_num - 63_8) - mo_integrals_cache_max_8 = min(int(mo_num,8),mo_integrals_cache_min_8+127_8) - mo_integrals_cache_min = max(1,elec_alpha_num - 63) - mo_integrals_cache_max = min(mo_num,mo_integrals_cache_min+127) + mo_integrals_cache_shift = 7 ! 7 = log(128). Max 7 + mo_integrals_cache_size = 2**mo_integrals_cache_shift + + mo_integrals_cache_min = max(1,elec_alpha_num - (mo_integrals_cache_size/2 - 1) ) + mo_integrals_cache_max = min(mo_num, mo_integrals_cache_min + mo_integrals_cache_size - 1) END_PROVIDER -BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:128_8*128_8*128_8*128_8) ] +BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0:mo_integrals_cache_size**4) ] implicit none BEGIN_DOC ! Cache of MO integrals for fast access END_DOC PROVIDE mo_two_e_integrals_in_map - integer*8 :: i,j,k,l - integer*4 :: i4,j4,k4,l4 - integer*8 :: ii + integer :: i,j,k,l + integer :: ii integer(key_kind) :: idx real(integral_kind) :: integral FREE ao_integrals_cache @@ -63,39 +63,36 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:128_8*128_8*128_8*12 call set_multiple_levels_omp(.False.) !$OMP PARALLEL DO PRIVATE (k,l,ii) - do l=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - do k=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - ii = l-mo_integrals_cache_min_8 - ii = ior( shiftl(ii,7), k-mo_integrals_cache_min_8) - ii = shiftl(ii,14) + do l=mo_integrals_cache_min,mo_integrals_cache_max + do k=mo_integrals_cache_min,mo_integrals_cache_max + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = shiftl(ii,mo_integrals_cache_shift) + ii = shiftl(ii,mo_integrals_cache_shift) call dgemm('T','N', mo_integrals_cache_max-mo_integrals_cache_min+1, & mo_integrals_cache_max-mo_integrals_cache_min+1, & cholesky_mo_num, 1.d0, & cholesky_mo_transp(1,mo_integrals_cache_min,k), cholesky_mo_num, & cholesky_mo_transp(1,mo_integrals_cache_min,l), cholesky_mo_num, 0.d0, & - mo_integrals_cache(ii), 128) + mo_integrals_cache(ii), mo_integrals_cache_size) enddo enddo !$OMP END PARALLEL DO else - !$OMP PARALLEL DO PRIVATE (i,j,k,l,i4,j4,k4,l4,idx,ii,integral) - do l=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - l4 = int(l,4) - do k=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - k4 = int(k,4) - do j=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - j4 = int(j,4) - do i=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - i4 = int(i,4) + !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral) + do l=mo_integrals_cache_min,mo_integrals_cache_max + do k=mo_integrals_cache_min,mo_integrals_cache_max + do j=mo_integrals_cache_min,mo_integrals_cache_max + do i=mo_integrals_cache_min,mo_integrals_cache_max !DIR$ FORCEINLINE - call two_e_integrals_index(i4,j4,k4,l4,idx) + call two_e_integrals_index(i,j,k,l,idx) !DIR$ FORCEINLINE call map_get(mo_integrals_map,idx,integral) - ii = l-mo_integrals_cache_min_8 - ii = ior( shiftl(ii,7), k-mo_integrals_cache_min_8) - ii = ior( shiftl(ii,7), j-mo_integrals_cache_min_8) - ii = ior( shiftl(ii,7), i-mo_integrals_cache_min_8) + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) mo_integrals_cache(ii) = integral enddo enddo @@ -116,7 +113,6 @@ double precision function get_two_e_integral(i,j,k,l,map) integer, intent(in) :: i,j,k,l integer(key_kind) :: idx integer :: ii - integer*8 :: ii_8 type(map_type), intent(inout) :: map real(integral_kind) :: tmp integer :: kk @@ -140,7 +136,7 @@ double precision function get_two_e_integral(i,j,k,l,map) ii = ior(ii, j-mo_integrals_cache_min) ii = ior(ii, i-mo_integrals_cache_min) - if (iand(ii, -128) /= 0) then + if (iand(ii, -mo_integrals_cache_size) /= 0) then ! Integral is not in the cache if (do_mo_cholesky) then @@ -161,11 +157,11 @@ double precision function get_two_e_integral(i,j,k,l,map) else ! Integrals is in the cache - ii_8 = int(l,8)-mo_integrals_cache_min_8 - ii_8 = ior( shiftl(ii_8,7), int(k,8)-mo_integrals_cache_min_8) - ii_8 = ior( shiftl(ii_8,7), int(j,8)-mo_integrals_cache_min_8) - ii_8 = ior( shiftl(ii_8,7), int(i,8)-mo_integrals_cache_min_8) - get_two_e_integral = mo_integrals_cache(ii_8) + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) + get_two_e_integral = mo_integrals_cache(ii) endif end @@ -197,19 +193,12 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) integer :: i double precision, external :: get_two_e_integral - integer :: ii, ii0 - integer*8 :: ii_8, ii0_8 + integer :: ii real(integral_kind) :: tmp integer(key_kind) :: i1, idx integer(key_kind) :: p,q,r,s,i2 PROVIDE mo_two_e_integrals_in_map mo_integrals_cache -!DEBUG -! do i=1,sze -! out_val(i) = get_two_e_integral(i,j,k,l,map) -! enddo -! return -!DEBUG out_val(1:sze) = 0.d0 if (banned_excitation(j,l)) then @@ -220,9 +209,10 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) ii0 = ior(ii0, k-mo_integrals_cache_min) ii0 = ior(ii0, j-mo_integrals_cache_min) - ii0_8 = int(l,8)-mo_integrals_cache_min_8 - ii0_8 = ior( shiftl(ii0_8,7), int(k,8)-mo_integrals_cache_min_8) - ii0_8 = ior( shiftl(ii0_8,7), int(j,8)-mo_integrals_cache_min_8) + integer :: ii0, ii0_8, ii_8 + ii0_8 = l-mo_integrals_cache_min + ii0_8 = ior( shiftl(ii0_8,mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii0_8 = ior( shiftl(ii0_8,mo_integrals_cache_shift), j-mo_integrals_cache_min) q = min(j,l) s = max(j,l) @@ -231,8 +221,8 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) do i=1,sze if (banned_excitation(i,k)) cycle ii = ior(ii0, i-mo_integrals_cache_min) - if (iand(ii, -128) == 0) then - ii_8 = ior( shiftl(ii0_8,7), int(i,8)-mo_integrals_cache_min_8) + if (iand(ii, -mo_integrals_cache_size) == 0) then + ii_8 = ior( shiftl(ii0_8,mo_integrals_cache_shift), i-mo_integrals_cache_min) out_val(i) = mo_integrals_cache(ii_8) else p = min(i,k) @@ -246,6 +236,93 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) out_val(i) = dble(tmp) endif enddo + +! if (banned_excitation(j,l)) then +! out_val(1:sze) = 0.d0 +! return +! endif +! +! if (mo_integrals_cache_min > 1) then +! +! if (do_mo_cholesky) then +! +! call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & +! cholesky_mo_transp(1,1,k), cholesky_mo_num, & +! cholesky_mo_transp(1,j,l), 1, 0.d0, & +! out_val, 1) +! +! else +! +! q = min(j,l) +! s = max(j,l) +! q = q+shiftr(s*s-s,1) +! +! do i=1,mo_integrals_cache_min-1 +! if (banned_excitation(i,k)) then +! out_val(i) = 0.d0 +! cycle +! endif +! p = min(i,k) +! r = max(i,k) +! p = p+shiftr(r*r-r,1) +! i1 = min(p,q) +! i2 = max(p,q) +! idx = i1+shiftr(i2*i2-i2,1) +! !DIR$ FORCEINLINE +! call map_get(map,idx,tmp) +! out_val(i) = dble(tmp) +! enddo +! +! endif +! +! endif +! +! +! ii = l-mo_integrals_cache_min +! ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) +! ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) +! ii = shiftl(ii, mo_integrals_cache_shift) +! do i=mo_integrals_cache_min, mo_integrals_cache_max +! ii = ii+1 +! out_val(i) = mo_integrals_cache(ii) +! enddo +! +! +! if (mo_integrals_cache_max < mo_num) then +! +! if (do_mo_cholesky) then +! +! call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & +! cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & +! cholesky_mo_transp(1,j,l), 1, 0.d0, & +! out_val(mo_integrals_cache_max+1), 1) +! +! else +! +! q = min(j,l) +! s = max(j,l) +! q = q+shiftr(s*s-s,1) +! +! do i=mo_integrals_cache_max+1,mo_num +! if (banned_excitation(i,k)) then +! out_val(i) = 0.d0 +! cycle +! endif +! p = min(i,k) +! r = max(i,k) +! p = p+shiftr(r*r-r,1) +! i1 = min(p,q) +! i2 = max(p,q) +! idx = i1+shiftr(i2*i2-i2,1) +! !DIR$ FORCEINLINE +! call map_get(map,idx,tmp) +! out_val(i) = dble(tmp) +! enddo +! +! endif +! +! endif + end subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) @@ -267,15 +344,6 @@ subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) cholesky_mo_transp(1,1,k), cholesky_mo_num, & cholesky_mo_transp(1,1,l), cholesky_mo_num, 0.d0, & out_array, sze) -! integer :: i -! do j=1,mo_num -! do i=1,mo_num -! double precision, external :: get_two_e_integral -! print *, i, j, real(out_array(i,j)), real(get_two_e_integral(i,j,k,l,map)) -! enddo -! enddo -! print *, irp_here -! pause else do j=1,sze call get_mo_two_e_integrals(j,k,l,sze,out_array(1,j),map) @@ -297,9 +365,20 @@ subroutine get_mo_two_e_integrals_i1j1(k,l,sze,out_array,map) integer :: j PROVIDE mo_two_e_integrals_in_map - do j=1,sze - call get_mo_two_e_integrals(k,j,l,sze,out_array(1,j),map) - enddo + if (do_mo_cholesky) then + + call dgemv('T', cholesky_mo_num, mo_num*mo_num, 1.d0, & + cholesky_mo_transp(1,1,1), cholesky_mo_num, & + cholesky_mo_transp(1,k,l), 1, 0.d0, & + out_array, 1) + + else + + do j=1,sze + call get_mo_two_e_integrals(k,j,l,sze,out_array(1,j),map) + enddo + + endif end @@ -320,14 +399,18 @@ subroutine get_mo_two_e_integrals_coulomb_ii(k,l,sze,out_val,map) PROVIDE mo_two_e_integrals_in_map if (do_mo_cholesky) then + call dgemv('T', cholesky_mo_num, mo_num, 1.d0, & cholesky_mo_transp(1,1,1), cholesky_mo_num*(mo_num+1), & cholesky_mo_transp(1,k,l), 1, 0.d0, & out_val, 1) + else + do i=1,sze out_val(i) = get_two_e_integral(k,i,l,i,map) enddo + endif end From a4516fb8f96acabf86e5effa497fd7b0035cc0cb Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jun 2024 12:12:14 +0200 Subject: [PATCH 55/64] Accelerated cache-map access --- src/mo_two_e_ints/map_integrals.irp.f | 188 ++++++++++---------------- 1 file changed, 74 insertions(+), 114 deletions(-) diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 9155cd8f..fb155073 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -200,128 +200,88 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) PROVIDE mo_two_e_integrals_in_map mo_integrals_cache - out_val(1:sze) = 0.d0 if (banned_excitation(j,l)) then - return + out_val(1:sze) = 0.d0 + return endif - ii0 = l-mo_integrals_cache_min - ii0 = ior(ii0, k-mo_integrals_cache_min) - ii0 = ior(ii0, j-mo_integrals_cache_min) + if (mo_integrals_cache_min > 1) then - integer :: ii0, ii0_8, ii_8 - ii0_8 = l-mo_integrals_cache_min - ii0_8 = ior( shiftl(ii0_8,mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii0_8 = ior( shiftl(ii0_8,mo_integrals_cache_shift), j-mo_integrals_cache_min) + if (do_mo_cholesky) then - q = min(j,l) - s = max(j,l) - q = q+shiftr(s*s-s,1) + call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val, 1) - do i=1,sze - if (banned_excitation(i,k)) cycle - ii = ior(ii0, i-mo_integrals_cache_min) - if (iand(ii, -mo_integrals_cache_size) == 0) then - ii_8 = ior( shiftl(ii0_8,mo_integrals_cache_shift), i-mo_integrals_cache_min) - out_val(i) = mo_integrals_cache(ii_8) else - p = min(i,k) - r = max(i,k) - p = p+shiftr(r*r-r,1) - i1 = min(p,q) - i2 = max(p,q) - idx = i1+shiftr(i2*i2-i2,1) - !DIR$ FORCEINLINE - call map_get(map,idx,tmp) - out_val(i) = dble(tmp) - endif - enddo -! if (banned_excitation(j,l)) then -! out_val(1:sze) = 0.d0 -! return -! endif -! -! if (mo_integrals_cache_min > 1) then -! -! if (do_mo_cholesky) then -! -! call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & -! cholesky_mo_transp(1,1,k), cholesky_mo_num, & -! cholesky_mo_transp(1,j,l), 1, 0.d0, & -! out_val, 1) -! -! else -! -! q = min(j,l) -! s = max(j,l) -! q = q+shiftr(s*s-s,1) -! -! do i=1,mo_integrals_cache_min-1 -! if (banned_excitation(i,k)) then -! out_val(i) = 0.d0 -! cycle -! endif -! p = min(i,k) -! r = max(i,k) -! p = p+shiftr(r*r-r,1) -! i1 = min(p,q) -! i2 = max(p,q) -! idx = i1+shiftr(i2*i2-i2,1) -! !DIR$ FORCEINLINE -! call map_get(map,idx,tmp) -! out_val(i) = dble(tmp) -! enddo -! -! endif -! -! endif -! -! -! ii = l-mo_integrals_cache_min -! ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) -! ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) -! ii = shiftl(ii, mo_integrals_cache_shift) -! do i=mo_integrals_cache_min, mo_integrals_cache_max -! ii = ii+1 -! out_val(i) = mo_integrals_cache(ii) -! enddo -! -! -! if (mo_integrals_cache_max < mo_num) then -! -! if (do_mo_cholesky) then -! -! call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & -! cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & -! cholesky_mo_transp(1,j,l), 1, 0.d0, & -! out_val(mo_integrals_cache_max+1), 1) -! -! else -! -! q = min(j,l) -! s = max(j,l) -! q = q+shiftr(s*s-s,1) -! -! do i=mo_integrals_cache_max+1,mo_num -! if (banned_excitation(i,k)) then -! out_val(i) = 0.d0 -! cycle -! endif -! p = min(i,k) -! r = max(i,k) -! p = p+shiftr(r*r-r,1) -! i1 = min(p,q) -! i2 = max(p,q) -! idx = i1+shiftr(i2*i2-i2,1) -! !DIR$ FORCEINLINE -! call map_get(map,idx,tmp) -! out_val(i) = dble(tmp) -! enddo -! -! endif -! -! endif + q = min(j,l) + s = max(j,l) + q = q+shiftr(s*s-s,1) + + do i=1,mo_integrals_cache_min-1 + if (banned_excitation(i,k)) then + out_val(i) = 0.d0 + cycle + endif + p = min(i,k) + r = max(i,k) + p = p+shiftr(r*r-r,1) + i1 = min(p,q) + i2 = max(p,q) + idx = i1+shiftr(i2*i2-i2,1) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + out_val(i) = dble(tmp) + enddo + + endif + + endif + + + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = shiftl(ii, mo_integrals_cache_shift) + out_val(mo_integrals_cache_min:mo_integrals_cache_max) = & + mo_integrals_cache(ii:ii+mo_integrals_cache_max-mo_integrals_cache_min) + + if (mo_integrals_cache_max < mo_num) then + + if (do_mo_cholesky) then + + call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & + cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val(mo_integrals_cache_max+1), 1) + + else + + q = min(j,l) + s = max(j,l) + q = q+shiftr(s*s-s,1) + + do i=mo_integrals_cache_max+1,mo_num + if (banned_excitation(i,k)) then + out_val(i) = 0.d0 + cycle + endif + p = min(i,k) + r = max(i,k) + p = p+shiftr(r*r-r,1) + i1 = min(p,q) + i2 = max(p,q) + idx = i1+shiftr(i2*i2-i2,1) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + out_val(i) = dble(tmp) + enddo + + endif + + endif end From 82654efdf9c9a19ac593b7bec54f16372fb03460 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jun 2024 13:06:32 +0200 Subject: [PATCH 56/64] Optimized get_integrals --- src/mo_two_e_ints/cholesky.irp.f | 1 + src/mo_two_e_ints/map_integrals.irp.f | 164 +++++++++++++++++--------- 2 files changed, 107 insertions(+), 58 deletions(-) diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 971ab38d..773d240a 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -4,6 +4,7 @@ BEGIN_PROVIDER [ logical, do_mo_cholesky ] ! If True, use Cholesky vectors for MO integrals END_DOC do_mo_cholesky = do_ao_cholesky + do_mo_cholesky = .False. END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_mo_num ] diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index fb155073..571de573 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -32,19 +32,27 @@ subroutine insert_into_mo_integrals_map(n_integrals, & call map_update(mo_integrals_map, buffer_i, buffer_values, n_integrals, thr) end - BEGIN_PROVIDER [ integer*4, mo_integrals_cache_min ] -&BEGIN_PROVIDER [ integer*4, mo_integrals_cache_max ] -&BEGIN_PROVIDER [ integer*4, mo_integrals_cache_shift] -&BEGIN_PROVIDER [ integer*4, mo_integrals_cache_size ] + BEGIN_PROVIDER [ integer, mo_integrals_cache_min ] +&BEGIN_PROVIDER [ integer, mo_integrals_cache_max ] +&BEGIN_PROVIDER [ integer, mo_integrals_cache_shift] +&BEGIN_PROVIDER [ integer, mo_integrals_cache_size ] implicit none BEGIN_DOC ! Min and max values of the MOs for which the integrals are in the cache END_DOC - mo_integrals_cache_shift = 7 ! 7 = log(128). Max 7 + if (qp_max_mem < 1) then + mo_integrals_cache_shift = 5 ! 5 = log(32). + else if (qp_max_mem < 2) then + mo_integrals_cache_shift = 6 ! 6 = log(64). + else + mo_integrals_cache_shift = 7 ! 7 = log(128). Max 7 + endif + mo_integrals_cache_size = 2**mo_integrals_cache_shift mo_integrals_cache_min = max(1,elec_alpha_num - (mo_integrals_cache_size/2 - 1) ) mo_integrals_cache_max = min(mo_num, mo_integrals_cache_min + mo_integrals_cache_size - 1) +print *, 'mo_integrals_cache: (', mo_integrals_cache_min, ', ', mo_integrals_cache_max, ')' END_PROVIDER @@ -136,7 +144,17 @@ double precision function get_two_e_integral(i,j,k,l,map) ii = ior(ii, j-mo_integrals_cache_min) ii = ior(ii, i-mo_integrals_cache_min) - if (iand(ii, -mo_integrals_cache_size) /= 0) then + if (iand(ii, -mo_integrals_cache_size) == 0) then + ! Integrals is in the cache + + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) + get_two_e_integral = mo_integrals_cache(ii) + + else + ! Integral is not in the cache if (do_mo_cholesky) then @@ -145,7 +163,6 @@ double precision function get_two_e_integral(i,j,k,l,map) get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1) else - ! Integrals is in the map !DIR$ FORCEINLINE call two_e_integrals_index(i,j,k,l,idx) @@ -154,15 +171,6 @@ double precision function get_two_e_integral(i,j,k,l,map) get_two_e_integral = dble(tmp) endif - else - ! Integrals is in the cache - - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) - get_two_e_integral = mo_integrals_cache(ii) - endif end @@ -200,62 +208,105 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) PROVIDE mo_two_e_integrals_in_map mo_integrals_cache + if (banned_excitation(j,l)) then out_val(1:sze) = 0.d0 return endif - if (mo_integrals_cache_min > 1) then + ii = l-mo_integrals_cache_min + ii = ior(ii, k-mo_integrals_cache_min) + ii = ior(ii, j-mo_integrals_cache_min) - if (do_mo_cholesky) then + if (iand(ii, -mo_integrals_cache_size) == 0) then + ! Some integrals are in the cache - call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & - cholesky_mo_transp(1,1,k), cholesky_mo_num, & - cholesky_mo_transp(1,j,l), 1, 0.d0, & - out_val, 1) + if (mo_integrals_cache_min > 1) then - else + if (do_mo_cholesky) then - q = min(j,l) - s = max(j,l) - q = q+shiftr(s*s-s,1) + call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val, 1) - do i=1,mo_integrals_cache_min-1 - if (banned_excitation(i,k)) then - out_val(i) = 0.d0 - cycle - endif - p = min(i,k) - r = max(i,k) - p = p+shiftr(r*r-r,1) - i1 = min(p,q) - i2 = max(p,q) - idx = i1+shiftr(i2*i2-i2,1) - !DIR$ FORCEINLINE - call map_get(map,idx,tmp) - out_val(i) = dble(tmp) - enddo + else + + q = min(j,l) + s = max(j,l) + q = q+shiftr(s*s-s,1) + + do i=1,mo_integrals_cache_min-1 + if (banned_excitation(i,k)) then + out_val(i) = 0.d0 + cycle + endif + p = min(i,k) + r = max(i,k) + p = p+shiftr(r*r-r,1) + i1 = min(p,q) + i2 = max(p,q) + idx = i1+shiftr(i2*i2-i2,1) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + out_val(i) = dble(tmp) + enddo + + endif endif - endif + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = shiftl(ii, mo_integrals_cache_shift) + out_val(mo_integrals_cache_min:mo_integrals_cache_max) = & + mo_integrals_cache(ii:ii+mo_integrals_cache_max-mo_integrals_cache_min) - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) - ii = shiftl(ii, mo_integrals_cache_shift) - out_val(mo_integrals_cache_min:mo_integrals_cache_max) = & - mo_integrals_cache(ii:ii+mo_integrals_cache_max-mo_integrals_cache_min) + if (mo_integrals_cache_max < mo_num) then - if (mo_integrals_cache_max < mo_num) then + if (do_mo_cholesky) then + + call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & + cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val(mo_integrals_cache_max+1), 1) + + else + + q = min(j,l) + s = max(j,l) + q = q+shiftr(s*s-s,1) + + do i=mo_integrals_cache_max+1,mo_num + if (banned_excitation(i,k)) then + out_val(i) = 0.d0 + cycle + endif + p = min(i,k) + r = max(i,k) + p = p+shiftr(r*r-r,1) + i1 = min(p,q) + i2 = max(p,q) + idx = i1+shiftr(i2*i2-i2,1) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + out_val(i) = dble(tmp) + enddo + + endif + + endif + + else if (do_mo_cholesky) then - call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & - cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & - cholesky_mo_transp(1,j,l), 1, 0.d0, & - out_val(mo_integrals_cache_max+1), 1) + call dgemv('T', cholesky_mo_num, mo_num, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val, 1) else @@ -263,11 +314,8 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) s = max(j,l) q = q+shiftr(s*s-s,1) - do i=mo_integrals_cache_max+1,mo_num - if (banned_excitation(i,k)) then - out_val(i) = 0.d0 - cycle - endif + do i=1,sze + if (banned_excitation(i,k)) cycle p = min(i,k) r = max(i,k) p = p+shiftr(r*r-r,1) From 90c3db31036b3aeffa3b94445960f4d092c6f929 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jun 2024 14:38:50 +0200 Subject: [PATCH 57/64] Accelerated cache --- src/mo_two_e_ints/cholesky.irp.f | 2 +- src/mo_two_e_ints/four_idx_novvvv.irp.f | 180 ------------------------ src/mo_two_e_ints/map_integrals.irp.f | 163 ++++++++++++++++----- src/mo_two_e_ints/mo_bi_integrals.irp.f | 4 - 4 files changed, 128 insertions(+), 221 deletions(-) delete mode 100644 src/mo_two_e_ints/four_idx_novvvv.irp.f diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 773d240a..7e2c8b37 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -4,7 +4,7 @@ BEGIN_PROVIDER [ logical, do_mo_cholesky ] ! If True, use Cholesky vectors for MO integrals END_DOC do_mo_cholesky = do_ao_cholesky - do_mo_cholesky = .False. +! do_mo_cholesky = .False. END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_mo_num ] diff --git a/src/mo_two_e_ints/four_idx_novvvv.irp.f b/src/mo_two_e_ints/four_idx_novvvv.irp.f deleted file mode 100644 index 80af35dc..00000000 --- a/src/mo_two_e_ints/four_idx_novvvv.irp.f +++ /dev/null @@ -1,180 +0,0 @@ -BEGIN_PROVIDER [ double precision, mo_coef_novirt, (ao_num,n_core_inact_act_orb) ] - implicit none - BEGIN_DOC - ! MO coefficients without virtual MOs - END_DOC - integer :: j,jj - - do j=1,n_core_inact_act_orb - jj = list_core_inact_act(j) - mo_coef_novirt(:,j) = mo_coef(:,jj) - enddo - -END_PROVIDER - -subroutine ao_to_mo_novirt(A_ao,LDA_ao,A_mo,LDA_mo) - implicit none - BEGIN_DOC - ! Transform A from the |AO| basis to the |MO| basis excluding virtuals - ! - ! $C^\dagger.A_{ao}.C$ - END_DOC - integer, intent(in) :: LDA_ao,LDA_mo - double precision, intent(in) :: A_ao(LDA_ao,ao_num) - double precision, intent(out) :: A_mo(LDA_mo,n_core_inact_act_orb) - double precision, allocatable :: T(:,:) - - allocate ( T(ao_num,n_core_inact_act_orb) ) - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T - - call dgemm('N','N', ao_num, n_core_inact_act_orb, ao_num, & - 1.d0, A_ao,LDA_ao, & - mo_coef_novirt, size(mo_coef_novirt,1), & - 0.d0, T, size(T,1)) - - call dgemm('T','N', n_core_inact_act_orb, n_core_inact_act_orb, ao_num,& - 1.d0, mo_coef_novirt,size(mo_coef_novirt,1), & - T, ao_num, & - 0.d0, A_mo, size(A_mo,1)) - - deallocate(T) -end - - -subroutine four_idx_novvvv - print*,'********' - print*,'********' - print*,'********' - print*,'WARNING :: Using four_idx_novvvv, and we are not sure that this routine is not bugged ...' - print*,'********' - print*,'********' - print*,'********' - use map_module - implicit none - BEGIN_DOC - ! Retransform MO integrals for next CAS-SCF step - END_DOC - print*,'Using partial transformation' - print*,'It will not transform all integrals with at least 3 indices within the virtuals' - integer :: i,j,k,l,n_integrals - double precision, allocatable :: f(:,:,:), f2(:,:,:), d(:,:), T(:,:,:,:), T2(:,:,:,:) - double precision, external :: get_ao_two_e_integral - integer(key_kind), allocatable :: idx(:) - real(integral_kind), allocatable :: values(:) - - integer :: p,q,r,s - double precision :: c - allocate( T(n_core_inact_act_orb,n_core_inact_act_orb,ao_num,ao_num) , & - T2(n_core_inact_act_orb,n_core_inact_act_orb,ao_num,ao_num) ) - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP SHARED(mo_num,ao_num,T,n_core_inact_act_orb, mo_coef_transp, & - !$OMP mo_integrals_threshold,mo_coef,mo_integrals_map, & - !$OMP list_core_inact_act,T2,ao_integrals_map) & - !$OMP PRIVATE(i,j,k,l,p,q,r,s,idx,values,n_integrals, & - !$OMP f,f2,d,c) - allocate(f(ao_num,ao_num,ao_num), f2(ao_num,ao_num,ao_num), d(mo_num,mo_num), & - idx(mo_num*mo_num), values(mo_num*mo_num) ) - - ! - !$OMP DO - do s=1,ao_num - do r=1,ao_num - do q=1,ao_num - do p=1,r - f (p,q,r) = get_ao_two_e_integral(p,q,r,s,ao_integrals_map) - f (r,q,p) = f(p,q,r) - enddo - enddo - enddo - do r=1,ao_num - do q=1,ao_num - do p=1,ao_num - f2(p,q,r) = f(p,r,q) - enddo - enddo - enddo - ! f (p,q,r) = - ! f2(p,q,r) = - - do r=1,ao_num - call ao_to_mo_novirt(f (1,1,r),size(f ,1),T (1,1,r,s),size(T,1)) - call ao_to_mo_novirt(f2(1,1,r),size(f2,1),T2(1,1,r,s),size(T,1)) - enddo - ! T (i,j,p,q) = - ! T2(i,j,p,q) = - - enddo - !$OMP END DO - - !$OMP DO - do j=1,n_core_inact_act_orb - do i=1,n_core_inact_act_orb - do s=1,ao_num - do r=1,ao_num - f (r,s,1) = T (i,j,r,s) - f2(r,s,1) = T2(i,j,r,s) - enddo - enddo - call ao_to_mo(f ,size(f ,1),d,size(d,1)) - n_integrals = 0 - do l=1,mo_num - do k=1,mo_num - n_integrals+=1 - call two_e_integrals_index(list_core_inact_act(i),list_core_inact_act(j),k,l,idx(n_integrals)) - values(n_integrals) = d(k,l) - enddo - enddo - call map_append(mo_integrals_map, idx, values, n_integrals) - - call ao_to_mo(f2,size(f2,1),d,size(d,1)) - n_integrals = 0 - do l=1,mo_num - do k=1,mo_num - n_integrals+=1 - call two_e_integrals_index(list_core_inact_act(i),k,list_core_inact_act(j),l,idx(n_integrals)) - values(n_integrals) = d(k,l) - enddo - enddo - call map_append(mo_integrals_map, idx, values, n_integrals) - enddo - enddo - !$OMP END DO - deallocate(f,f2,d,idx,values) - - !$OMP END PARALLEL - - deallocate(T,T2) - - - call map_sort(mo_integrals_map) - call map_unique(mo_integrals_map) - call map_shrink(mo_integrals_map,real(mo_integrals_threshold,integral_kind)) - -end - -subroutine four_idx_novvvv2 - use bitmasks - implicit none - integer :: i - integer(bit_kind) :: mask_ijkl(N_int,4) - - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = full_ijkl_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = full_ijkl_bitmask_4(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - -end diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 571de573..90257bbd 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -48,6 +48,8 @@ end mo_integrals_cache_shift = 7 ! 7 = log(128). Max 7 endif +!mo_integrals_cache_shift = 2 ! 5 = log(32). + mo_integrals_cache_size = 2**mo_integrals_cache_shift mo_integrals_cache_min = max(1,elec_alpha_num - (mo_integrals_cache_size/2 - 1) ) @@ -112,6 +114,24 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0:mo_integrals_cache_siz END_PROVIDER +double precision function get_two_e_integral_cache(i,j,k,l) + use map_module + implicit none + BEGIN_DOC + ! Returns one integral in the MO basis taken from the cache + END_DOC + integer, intent(in) :: i,j,k,l + integer :: ii + + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) + get_two_e_integral_cache = mo_integrals_cache(ii) + +end + + double precision function get_two_e_integral(i,j,k,l,map) use map_module implicit none @@ -123,7 +143,6 @@ double precision function get_two_e_integral(i,j,k,l,map) integer :: ii type(map_type), intent(inout) :: map real(integral_kind) :: tmp - integer :: kk PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky @@ -145,13 +164,9 @@ double precision function get_two_e_integral(i,j,k,l,map) ii = ior(ii, i-mo_integrals_cache_min) if (iand(ii, -mo_integrals_cache_size) == 0) then - ! Integrals is in the cache - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) - get_two_e_integral = mo_integrals_cache(ii) + double precision, external :: get_two_e_integral_cache + get_two_e_integral = get_two_e_integral_cache(i,j,k,l) else @@ -199,7 +214,6 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) double precision, intent(out) :: out_val(sze) type(map_type), intent(inout) :: map integer :: i - double precision, external :: get_two_e_integral integer :: ii real(integral_kind) :: tmp @@ -256,13 +270,7 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) endif - - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) - ii = shiftl(ii, mo_integrals_cache_shift) - out_val(mo_integrals_cache_min:mo_integrals_cache_max) = & - mo_integrals_cache(ii:ii+mo_integrals_cache_max-mo_integrals_cache_min) + call get_mo_two_e_integrals_cache(j,k,l,sze,out_val) if (mo_integrals_cache_max < mo_num) then @@ -333,6 +341,26 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) end +subroutine get_mo_two_e_integrals_cache(j,k,l,sze,out_val) + use map_module + implicit none + BEGIN_DOC + ! Returns multiple integrals in the MO basis, all + ! i for j,k,l fixed, all integrals from the cache + END_DOC + integer, intent(in) :: j,k,l, sze + double precision, intent(out) :: out_val(sze) + integer :: ii + + ii = l-mo_integrals_cache_min + ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = shiftl(ii, mo_integrals_cache_shift) + out_val(mo_integrals_cache_min:mo_integrals_cache_max) = & + mo_integrals_cache(ii:ii+mo_integrals_cache_max-mo_integrals_cache_min) + +end + subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) use map_module implicit none @@ -347,16 +375,32 @@ subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) integer :: j real(integral_kind), allocatable :: tmp_val(:) - if (do_mo_cholesky) then - call dgemm('T', 'N', mo_num, mo_num, cholesky_mo_num, 1.d0, & - cholesky_mo_transp(1,1,k), cholesky_mo_num, & - cholesky_mo_transp(1,1,l), cholesky_mo_num, 0.d0, & - out_array, sze) + if ( (mo_integrals_cache_min>1).or.(mo_integrals_cache_max1).or.(mo_integrals_cache_max1).or.(mo_integrals_cache_max1).or.(mo_integrals_cache_max Date: Wed, 12 Jun 2024 14:59:26 +0200 Subject: [PATCH 58/64] Introduce hij_cache in PT2 --- src/cipsi/selection.irp.f | 72 ++++++++++++++------------- src/mo_two_e_ints/map_integrals.irp.f | 16 +++++- 2 files changed, 52 insertions(+), 36 deletions(-) diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 0281a1d4..88a2cbdc 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -178,7 +178,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) logical, allocatable :: banned(:,:,:), bannedOrb(:,:) double precision, allocatable :: coef_fullminilist_rev(:,:) - double precision, allocatable :: mat(:,:,:) + double precision, allocatable :: mat(:,:,:), hij_cache(:,:,:) PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique @@ -205,7 +205,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc ! Removed to avoid introducing determinants already presents in the wf !double precision, parameter :: norm_thr = 1.d-16 - allocate (indices(N_det), & + allocate (indices(N_det), hij_cache(mo_num,mo_num,2), & exc_degree(max(N_det_alpha_unique,N_det_beta_unique))) ! Pre-compute excitation degrees wrt alpha determinants @@ -511,11 +511,15 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc maskInd = maskInd + 1 if(mod(maskInd, csubset) == (subset-1)) then + call get_mo_two_e_integrals_ij(h2,h1,mo_num,hij_cache(1,1,1),mo_integrals_map) + if (sp /= 3) then ! AA or BB + call get_mo_two_e_integrals_ij(h1,h2,mo_num,hij_cache(1,1,2),mo_integrals_map) + endif call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle - call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) + call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting, hij_cache) call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) end if @@ -531,7 +535,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc enddo enddo deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) - deallocate(banned, bannedOrb,mat) + deallocate(banned, bannedOrb, mat, hij_cache) end subroutine BEGIN_TEMPLATE @@ -914,7 +918,7 @@ single ; do p1=1,mo_num ; enddo ; p2=1 ; ; .False. ;; END_TEMPLATE -subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting) +subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting, hij_cache) use bitmasks implicit none BEGIN_DOC @@ -926,6 +930,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere integer, intent(in) :: sp, i_gen, N_sel integer, intent(in) :: interesting(0:N_sel) integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N_sel) + double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) logical, intent(inout) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num, 2) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) @@ -995,9 +1000,9 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere if(nt == 4) then call get_d2(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) else if(nt == 3) then - call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) !, hij_cache) else - call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)), hij_cache) end if else if(nt == 4) then call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) @@ -1190,6 +1195,8 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) integer, intent(in) :: h(0:2,2), p(0:4,2), sp +! double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) + double precision, external :: get_phase_bi, mo_two_e_integral logical :: ok @@ -1201,12 +1208,12 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) integer :: bant - double precision, allocatable :: hij_cache(:,:) + double precision, allocatable :: hij_cache1(:,:) double precision :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num) PROVIDE mo_integrals_map N_int allocate (lbanned(mo_num, 2)) - allocate (hij_cache(mo_num,2)) + allocate (hij_cache1(mo_num,2)) lbanned = bannedOrb do i=1, p(0,1) @@ -1230,13 +1237,13 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(1,ma) p2 = p(2,ma) if(.not. bannedOrb(puti, mi)) then - call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache1(1,2),mo_integrals_map) tmp_row = 0d0 do putj=1, hfix-1 if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle - hij = hij_cache(putj,1) - hij_cache(putj,2) + hij = hij_cache1(putj,1) - hij_cache1(putj,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1248,7 +1255,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=hfix+1, mo_num if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle - hij = hij_cache(putj,2) - hij_cache(putj,1) + hij = hij_cache1(putj,2) - hij_cache1(putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1274,15 +1281,15 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) pfix = p(1,mi) tmp_row = 0d0 tmp_row2 = 0d0 - call get_mo_two_e_integrals(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,pfix,p1,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,pfix,p2,mo_num,hij_cache1(1,2),mo_integrals_map) putj = p1 do puti=1,mo_num !HOT if(lbanned(puti,mi)) cycle !p1 fixed putj = p1 if(.not. banned(putj,puti,bant)) then - hij = hij_cache(puti,2) + hij = hij_cache1(puti,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1296,7 +1303,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) putj = p2 ! do puti=1,mo_num !HOT if(.not. banned(putj,puti,bant)) then - hij = hij_cache(puti,1) + hij = hij_cache1(puti,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) do k=1,N_states @@ -1327,13 +1334,13 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) puti = p(i, ma) p1 = p(turn3(1,i), ma) p2 = p(turn3(2,i), ma) - call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache1(1,2),mo_integrals_map) tmp_row = 0d0 do putj=1,hfix-1 if(banned(putj,puti,1)) cycle if(lbanned(putj,ma)) cycle - hij = hij_cache(putj,1) - hij_cache(putj,2) + hij = hij_cache1(putj,1) - hij_cache1(putj,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) @@ -1342,7 +1349,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=hfix+1,mo_num if(banned(putj,puti,1)) cycle if(lbanned(putj,ma)) cycle - hij = hij_cache(putj,2) - hij_cache(putj,1) + hij = hij_cache1(putj,2) - hij_cache1(putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) @@ -1364,14 +1371,14 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(2,ma) tmp_row = 0d0 tmp_row2 = 0d0 - call get_mo_two_e_integrals(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p1,pfix,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,pfix,mo_num,hij_cache1(1,2),mo_integrals_map) putj = p2 do puti=1,mo_num if(lbanned(puti,ma)) cycle putj = p2 if(.not. banned(puti,putj,1)) then - hij = hij_cache(puti,1) + hij = hij_cache1(puti,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1383,7 +1390,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) putj = p1 if(.not. banned(puti,putj,1)) then - hij = hij_cache(puti,2) + hij = hij_cache1(puti,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) do k=1,N_states @@ -1408,7 +1415,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) enddo end if end if - deallocate(lbanned,hij_cache) + deallocate(lbanned,hij_cache1) !! MONO if(sp == 3) then @@ -1439,7 +1446,7 @@ end -subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) +subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, hij_cache) use bitmasks implicit none @@ -1450,6 +1457,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) integer :: i, j, k, s, h1, h2, p1, p2, puti, putj double precision :: hij, phase @@ -1457,8 +1465,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) logical :: ok integer, parameter :: bant=1 - double precision, allocatable :: hij_cache1(:), hij_cache2(:) - allocate (hij_cache1(mo_num),hij_cache2(mo_num)) if(sp == 3) then ! AB @@ -1466,7 +1472,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) h2 = p(1,2) do p1=1, mo_num if(bannedOrb(p1, 1)) cycle - call get_mo_two_e_integrals(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map) do p2=1, mo_num if(bannedOrb(p2,2)) cycle if(banned(p1, p2, bant)) cycle ! rentable? @@ -1475,7 +1480,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) else phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) - hij = hij_cache1(p2) * phase + hij = hij_cache(p2,p1,1) * phase end if if (hij == 0.d0) cycle !DIR$ LOOP COUNT AVG(4) @@ -1490,8 +1495,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(2,sp) do puti=1, mo_num if (bannedOrb(puti, sp)) cycle - call get_mo_two_e_integrals(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map) - call get_mo_two_e_integrals(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map) do putj=puti+1, mo_num if(bannedOrb(putj, sp)) cycle if(banned(puti, putj, bant)) cycle ! rentable? @@ -1500,7 +1503,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) if (hij == 0.d0) cycle else - hij = hij_cache1(putj) - hij_cache2(putj) + hij = hij_cache(putj,puti,1) - hij_cache(putj,puti,2) if (hij == 0.d0) cycle hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) end if @@ -1512,7 +1515,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end do end if - deallocate(hij_cache1,hij_cache2) end diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 90257bbd..516851b9 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -48,7 +48,7 @@ end mo_integrals_cache_shift = 7 ! 7 = log(128). Max 7 endif -!mo_integrals_cache_shift = 2 ! 5 = log(32). +mo_integrals_cache_shift = 2 ! 5 = log(32). mo_integrals_cache_size = 2**mo_integrals_cache_shift @@ -176,6 +176,8 @@ double precision function get_two_e_integral(i,j,k,l,map) double precision, external :: ddot get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1) +! double precision, external :: get_from_mo_cholesky_cache +! get_two_e_integral = get_from_mo_cholesky_cache(i,j,k,l,.False.) else @@ -227,6 +229,11 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) out_val(1:sze) = 0.d0 return endif +! +! if (do_mo_cholesky) then +! call get_from_mo_cholesky_caches(j,k,l,out_val) +! return +! endif ii = l-mo_integrals_cache_min ii = ior(ii, k-mo_integrals_cache_min) @@ -239,6 +246,7 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) if (do_mo_cholesky) then + !TODO: here call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & cholesky_mo_transp(1,1,k), cholesky_mo_num, & cholesky_mo_transp(1,j,l), 1, 0.d0, & @@ -276,6 +284,7 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) if (do_mo_cholesky) then + !TODO: here call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & cholesky_mo_transp(1,j,l), 1, 0.d0, & @@ -311,6 +320,7 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) if (do_mo_cholesky) then + !TODO: here call dgemv('T', cholesky_mo_num, mo_num, 1.d0, & cholesky_mo_transp(1,1,k), cholesky_mo_num, & cholesky_mo_transp(1,j,l), 1, 0.d0, & @@ -425,6 +435,10 @@ subroutine get_mo_two_e_integrals_i1j1(k,l,sze,out_array,map) cholesky_mo_transp(1,1,1), cholesky_mo_num, & cholesky_mo_transp(1,k,l), 1, 0.d0, & out_array, 1) +! +! do j=1,sze +! call get_from_mo_cholesky_caches(k,j,l,out_array(1,j)) +! enddo else From acc0b97fbad1589c5453ec9e077668483b419759 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 13 Jun 2024 13:29:38 +0200 Subject: [PATCH 59/64] Inline past_d1 and past_d2 --- src/cipsi/selection.irp.f | 94 ++++++++++----------------- src/mo_two_e_ints/map_integrals.irp.f | 29 ++++----- 2 files changed, 50 insertions(+), 73 deletions(-) diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 88a2cbdc..517220a8 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -560,7 +560,7 @@ subroutine fill_buffer_$DOUBLE(i_generator, sp, h1, h2, bannedOrb, banned, fock_ double precision, external :: diag_H_mat_elem_fock double precision :: E_shift double precision :: s_weight(N_states,N_states) - PROVIDE dominant_dets_of_cfgs N_dominant_dets_of_cfgs + PROVIDE dominant_dets_of_cfgs N_dominant_dets_of_cfgs thresh_sym excitation_ref hf_bitmask elec_alpha_num do jstate=1,N_states do istate=1,N_states s_weight(istate,jstate) = dsqrt(selection_weight(istate)*selection_weight(jstate)) @@ -746,7 +746,7 @@ subroutine fill_buffer_$DOUBLE(i_generator, sp, h1, h2, bannedOrb, banned, fock_ do istate=1,N_states delta_E = E0(istate) - Hii + E_shift alpha_h_psi = mat(istate, p1, p2) - if (alpha_h_psi == 0.d0) cycle + if (dabs(alpha_h_psi) < mo_integrals_threshold) cycle val = alpha_h_psi + alpha_h_psi tmp = dsqrt(delta_E * delta_E + val * val) @@ -1000,18 +1000,36 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere if(nt == 4) then call get_d2(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) else if(nt == 3) then - call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) !, hij_cache) + call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)), hij_cache) else call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)), hij_cache) end if else if(nt == 4) then call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) - call past_d2(banned, p, sp) + if(sp == 3) then + do j=1,p(0,2) + do ii=1,p(0,1) + banned(p(ii,1), p(j,2),1) = .true. + end do + end do + else + do ii=1,p(0, sp) + do j=1,ii-1 + banned(p(j,sp), p(ii,sp),1) = .true. + banned(p(ii,sp), p(j,sp),1) = .true. + end do + end do + end if else if(nt == 3) then call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) - call past_d1(bannedOrb, p) + do ii = 1, p(0, 1) + bannedOrb(p(ii, 1), 1) = .true. + end do + do ii = 1, p(0, 2) + bannedOrb(p(ii, 2), 2) = .true. + end do end if end do @@ -1042,6 +1060,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) integer :: bant bant = 1 + PROVIDE mo_integrals_threshold tip = p(0,1) * p(0,2) ma = sp @@ -1067,7 +1086,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(i2, ma) hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2, p1, h1, h2) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) @@ -1097,7 +1116,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(turn2(i), 1) hij = mo_two_e_integral(p1, p2, h1, h2) - if (hij /= 0.d0) then + if (dabs(hij) > mo_integrals_threshold) then hij = hij * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states @@ -1125,7 +1144,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(i1, ma) p2 = p(i2, ma) hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1147,7 +1166,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(i, ma) hij = mo_two_e_integral(p1, p2, h1, h2) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) if (puti < putj) then @@ -1184,7 +1203,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end -subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) +subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, hij_cache) use bitmasks implicit none @@ -1195,7 +1214,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) integer, intent(in) :: h(0:2,2), p(0:4,2), sp -! double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) + double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) double precision, external :: get_phase_bi, mo_two_e_integral logical :: ok @@ -1237,13 +1256,11 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(1,ma) p2 = p(2,ma) if(.not. bannedOrb(puti, mi)) then - call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache1(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache1(1,2),mo_integrals_map) tmp_row = 0d0 do putj=1, hfix-1 if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle - hij = hij_cache1(putj,1) - hij_cache1(putj,2) + hij = hij_cache(hfix,putj,1) - hij_cache(putj,hfix,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1255,7 +1272,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=hfix+1, mo_num if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle - hij = hij_cache1(putj,2) - hij_cache1(putj,1) + hij = hij_cache(putj,hfix,1) - hij_cache(hfix,putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1466,6 +1483,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, integer, parameter :: bant=1 + PROVIDE mo_integrals_threshold if(sp == 3) then ! AB h1 = p(1,1) @@ -1482,7 +1500,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) hij = hij_cache(p2,p1,1) * phase end if - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle !DIR$ LOOP COUNT AVG(4) do k=1,N_states mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij ! HOTSPOT @@ -1501,10 +1519,10 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) call i_h_j(gen, det, N_int, hij) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle else hij = hij_cache(putj,puti,1) - hij_cache(putj,puti,2) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) end if !DIR$ LOOP COUNT AVG(4) @@ -1518,46 +1536,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, end -subroutine past_d1(bannedOrb, p) - use bitmasks - implicit none - - logical, intent(inout) :: bannedOrb(mo_num, 2) - integer, intent(in) :: p(0:4, 2) - integer :: i,s - - do s = 1, 2 - do i = 1, p(0, s) - bannedOrb(p(i, s), s) = .true. - end do - end do -end - - -subroutine past_d2(banned, p, sp) - use bitmasks - implicit none - - logical, intent(inout) :: banned(mo_num, mo_num) - integer, intent(in) :: p(0:4, 2), sp - integer :: i,j - - if(sp == 3) then - do j=1,p(0,2) - do i=1,p(0,1) - banned(p(i,1), p(j,2)) = .true. - end do - end do - else - do i=1,p(0, sp) - do j=1,i-1 - banned(p(j,sp), p(i,sp)) = .true. - banned(p(i,sp), p(j,sp)) = .true. - end do - end do - end if -end - subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) use bitmasks implicit none diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 516851b9..13fcc19a 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -192,19 +192,6 @@ double precision function get_two_e_integral(i,j,k,l,map) end -double precision function mo_two_e_integral(i,j,k,l) - implicit none - BEGIN_DOC - ! Returns one integral in the MO basis - END_DOC - integer, intent(in) :: i,j,k,l - double precision :: get_two_e_integral - PROVIDE mo_two_e_integrals_in_map mo_integrals_cache - !DIR$ FORCEINLINE - mo_two_e_integral = get_two_e_integral(i,j,k,l,mo_integrals_map) - return -end - subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) use map_module implicit none @@ -223,8 +210,6 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) integer(key_kind) :: p,q,r,s,i2 PROVIDE mo_two_e_integrals_in_map mo_integrals_cache - - if (banned_excitation(j,l)) then out_val(1:sze) = 0.d0 return @@ -351,6 +336,20 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) end +double precision function mo_two_e_integral(i,j,k,l) + implicit none + BEGIN_DOC + ! Returns one integral in the MO basis + END_DOC + integer, intent(in) :: i,j,k,l + double precision :: get_two_e_integral + PROVIDE mo_two_e_integrals_in_map mo_integrals_cache + !DIR$ FORCEINLINE + mo_two_e_integral = get_two_e_integral(i,j,k,l,mo_integrals_map) + return +end + + subroutine get_mo_two_e_integrals_cache(j,k,l,sze,out_val) use map_module implicit none From 70317b2a158d69404ac9047bde280f29fa8ec82f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 13 Jun 2024 14:54:32 +0200 Subject: [PATCH 60/64] Put mo_integrals_cache_shift in EZFIO --- src/mo_two_e_ints/EZFIO.cfg | 6 +++ src/mo_two_e_ints/map_integrals.irp.f | 59 +++++++++------------------ 2 files changed, 26 insertions(+), 39 deletions(-) diff --git a/src/mo_two_e_ints/EZFIO.cfg b/src/mo_two_e_ints/EZFIO.cfg index c967969f..da9d8fc9 100644 --- a/src/mo_two_e_ints/EZFIO.cfg +++ b/src/mo_two_e_ints/EZFIO.cfg @@ -10,6 +10,12 @@ doc: Read/Write |MO| integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None +[mo_integrals_cache_shift] +type: integer +doc: Adjusts the size of the MO integrals cache. 2: 2KB, 3: 32KB, 4: 512KB, 5: 8MB, 6: 128MB, 7: 2GB, 8: 32GB, 9: 512GB +interface: ezfio, provider, ocaml +default: 7 + [mo_integrals_threshold] type: Threshold doc: If | | < `mo_integrals_threshold` then is zero diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 13fcc19a..168c34b4 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -34,38 +34,28 @@ end BEGIN_PROVIDER [ integer, mo_integrals_cache_min ] &BEGIN_PROVIDER [ integer, mo_integrals_cache_max ] -&BEGIN_PROVIDER [ integer, mo_integrals_cache_shift] &BEGIN_PROVIDER [ integer, mo_integrals_cache_size ] implicit none BEGIN_DOC ! Min and max values of the MOs for which the integrals are in the cache END_DOC - if (qp_max_mem < 1) then - mo_integrals_cache_shift = 5 ! 5 = log(32). - else if (qp_max_mem < 2) then - mo_integrals_cache_shift = 6 ! 6 = log(64). - else - mo_integrals_cache_shift = 7 ! 7 = log(128). Max 7 - endif -mo_integrals_cache_shift = 2 ! 5 = log(32). - - mo_integrals_cache_size = 2**mo_integrals_cache_shift + mo_integrals_cache_size = 2_8**mo_integrals_cache_shift mo_integrals_cache_min = max(1,elec_alpha_num - (mo_integrals_cache_size/2 - 1) ) mo_integrals_cache_max = min(mo_num, mo_integrals_cache_min + mo_integrals_cache_size - 1) -print *, 'mo_integrals_cache: (', mo_integrals_cache_min, ', ', mo_integrals_cache_max, ')' + print *, 'MO integrals cache: (', mo_integrals_cache_min, ', ', mo_integrals_cache_max, ')' END_PROVIDER -BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0:mo_integrals_cache_size**4) ] +BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:(1_8*mo_integrals_cache_size)**4) ] implicit none BEGIN_DOC ! Cache of MO integrals for fast access END_DOC PROVIDE mo_two_e_integrals_in_map integer :: i,j,k,l - integer :: ii + integer*8 :: ii integer(key_kind) :: idx real(integral_kind) :: integral FREE ao_integrals_cache @@ -75,8 +65,8 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0:mo_integrals_cache_siz !$OMP PARALLEL DO PRIVATE (k,l,ii) do l=mo_integrals_cache_min,mo_integrals_cache_max do k=mo_integrals_cache_min,mo_integrals_cache_max - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) ii = shiftl(ii,mo_integrals_cache_shift) ii = shiftl(ii,mo_integrals_cache_shift) call dgemm('T','N', mo_integrals_cache_max-mo_integrals_cache_min+1, & @@ -99,10 +89,10 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0:mo_integrals_cache_siz call two_e_integrals_index(i,j,k,l,idx) !DIR$ FORCEINLINE call map_get(mo_integrals_map,idx,integral) - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(j-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(i-mo_integrals_cache_min,8)) mo_integrals_cache(ii) = integral enddo enddo @@ -121,12 +111,12 @@ double precision function get_two_e_integral_cache(i,j,k,l) ! Returns one integral in the MO basis taken from the cache END_DOC integer, intent(in) :: i,j,k,l - integer :: ii + integer*8 :: ii - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii,mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), j-mo_integrals_cache_min) - ii = ior( shiftl(ii,mo_integrals_cache_shift), i-mo_integrals_cache_min) + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(j-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(i-mo_integrals_cache_min,8)) get_two_e_integral_cache = mo_integrals_cache(ii) end @@ -215,11 +205,6 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) return endif ! -! if (do_mo_cholesky) then -! call get_from_mo_cholesky_caches(j,k,l,out_val) -! return -! endif - ii = l-mo_integrals_cache_min ii = ior(ii, k-mo_integrals_cache_min) ii = ior(ii, j-mo_integrals_cache_min) @@ -359,14 +344,14 @@ subroutine get_mo_two_e_integrals_cache(j,k,l,sze,out_val) END_DOC integer, intent(in) :: j,k,l, sze double precision, intent(out) :: out_val(sze) - integer :: ii + integer*8 :: ii - ii = l-mo_integrals_cache_min - ii = ior( shiftl(ii, mo_integrals_cache_shift), k-mo_integrals_cache_min) - ii = ior( shiftl(ii, mo_integrals_cache_shift), j-mo_integrals_cache_min) + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii, mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii, mo_integrals_cache_shift), int(j-mo_integrals_cache_min,8)) ii = shiftl(ii, mo_integrals_cache_shift) out_val(mo_integrals_cache_min:mo_integrals_cache_max) = & - mo_integrals_cache(ii:ii+mo_integrals_cache_max-mo_integrals_cache_min) + mo_integrals_cache(ii:ii+int(mo_integrals_cache_max-mo_integrals_cache_min,8)) end @@ -434,10 +419,6 @@ subroutine get_mo_two_e_integrals_i1j1(k,l,sze,out_array,map) cholesky_mo_transp(1,1,1), cholesky_mo_num, & cholesky_mo_transp(1,k,l), 1, 0.d0, & out_array, 1) -! -! do j=1,sze -! call get_from_mo_cholesky_caches(k,j,l,out_array(1,j)) -! enddo else From d89682cb7ee8beeb0e01d6129afd18d8cd9c78ee Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 13 Jun 2024 17:50:27 +0200 Subject: [PATCH 61/64] Improved disk access in Cholesky --- src/ao_two_e_ints/cholesky.irp.f | 98 +++++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 9 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 41cdb80d..05f7115d 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -41,7 +41,7 @@ END_PROVIDER integer*8, allocatable :: Lset(:), Dset(:), addr3(:) logical, allocatable :: computed(:) - integer :: i,j,k,m,p,q, dj, p2, q2 + integer :: i,j,k,m,p,q, dj, p2, q2, ii, jj integer*8 :: i8, j8, p8, qj8, rank_max, np8 integer :: N, np, nq @@ -65,6 +65,8 @@ END_PROVIDER type(c_ptr) :: c_pointer(2) integer :: fd(2) logical :: delta_on_disk + integer :: dgemm_block_size, nqq + double precision, allocatable :: dgemm_buffer1(:,:), dgemm_buffer2(:,:) PROVIDE nproc PROVIDE nucl_coord ao_two_e_integral_schwartz @@ -300,17 +302,58 @@ END_PROVIDER !$OMP BARRIER !$OMP END PARALLEL + PROVIDE nproc if (N>0) then - call dgemm('N','T', np, nq, N, -1.d0, & - Ltmp_p, np, Ltmp_q, nq, 0.d0, Delta, np) - else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) SCHEDULE(static,1) - do q=1,nq - do j=1,np - Delta(j,q) = 0.d0 + + if (delta_on_disk) then + ! Blocking improves I/O performance + + dgemm_block_size = nproc*4 + + allocate (dgemm_buffer1(np,dgemm_block_size)) + allocate (dgemm_buffer2(dgemm_block_size,N)) + + do jj=1,nq,dgemm_block_size + + nqq = min(nq, jj+dgemm_block_size-1) - jj + 1 + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,ii) + do ii=1,N + do q=jj,jj+nqq-1 + dgemm_buffer2(q-jj+1,ii) = Ltmp_q(q,ii) + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('N', 'T', np, nqq, N, 1.d0, & + Ltmp_p, np, dgemm_buffer2, dgemm_block_size, 0.d0, dgemm_buffer1, np) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) + do q=jj,jj+nqq-1 + Delta(:,q) = - dgemm_buffer1(:, q-jj+1) + enddo + !$OMP END PARALLEL DO + enddo + + deallocate(dgemm_buffer1, dgemm_buffer2) + + else + + call dgemm('N', 'T', np, nq, N, -1.d0, & + Ltmp_p(1,1), np, Ltmp_q(1,1), nq, 0.d0, Delta, np) + + endif + + + else + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) + do q=1,nq + Delta(:,q) = 0.d0 enddo !$OMP END PARALLEL DO + endif ! f. @@ -329,9 +372,46 @@ END_PROVIDER rank = N+j if (iblock == block_size) then - call dgemm('N','T',np,nq,block_size,-1.d0, & + + if (delta_on_disk) then + ! Blocking improves I/O performance + + dgemm_block_size = nproc*4 + allocate (dgemm_buffer1(np,dgemm_block_size)) + allocate (dgemm_buffer2(dgemm_block_size,block_size)) + + do jj=1,nq,dgemm_block_size + nqq = min(nq, jj+dgemm_block_size-1) - jj + 1 + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,ii) + do ii=1,block_size + do q=jj,jj+nqq-1 + dgemm_buffer2(q-jj+1,ii) = Ltmp_q(q,ii) + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('N', 'T', np, nqq, block_size, 1.d0, & + Ltmp_p(1,1), np, dgemm_buffer2, dgemm_block_size, 0.d0, dgemm_buffer1, np) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) + do q=jj,jj+nqq-1 + Delta(:,q) = Delta(:,q) - dgemm_buffer1(:, q-jj+1) + enddo + !$OMP END PARALLEL DO + + enddo + deallocate(dgemm_buffer1, dgemm_buffer2) + + else + + call dgemm('N','T',np,nq,block_size,-1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + + endif + iblock = 0 + endif ! ii. From e876f635d636b1cb878cdb9baf9e7b3906bf955f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 14 Jun 2024 16:26:23 +0200 Subject: [PATCH 62/64] Asyc Fortran I/O --- src/ao_two_e_ints/cholesky.irp.f | 161 ++++++++++++++++++++++--------- src/utils/fortran_mmap.c | 7 +- 2 files changed, 118 insertions(+), 50 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 05f7115d..d731ef04 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -31,12 +31,12 @@ END_PROVIDER integer*8 :: ndim8 integer :: rank double precision :: tau, tau2 - double precision, pointer :: L(:,:), Delta(:,:) + double precision, pointer :: L(:,:) double precision :: s double precision :: dscale, dscale_tmp - double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:) + double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:), Delta(:,:) integer, allocatable :: addr1(:), addr2(:) integer*8, allocatable :: Lset(:), Dset(:), addr3(:) logical, allocatable :: computed(:) @@ -66,7 +66,7 @@ END_PROVIDER integer :: fd(2) logical :: delta_on_disk integer :: dgemm_block_size, nqq - double precision, allocatable :: dgemm_buffer1(:,:), dgemm_buffer2(:,:) + double precision, allocatable :: dgemm_buffer1(:,:), dgemm_buffer2(:,:), dgemm_buffer3(:,:) PROVIDE nproc PROVIDE nucl_coord ao_two_e_integral_schwartz @@ -230,7 +230,7 @@ END_PROVIDER stop -1 endif - if (s > 0.1d0) then + if (s > 0.3d0) then exit endif @@ -245,13 +245,16 @@ END_PROVIDER + np*memory_of_double(nq) &! Delta(np,nq) + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) - if (mem > qp_max_mem) then - call mmap(trim(ezfio_work_dir)//'cholesky_delta', (/ np*1_8, nq*1_8 /), 8, fd(2), .False., .True., c_pointer(2)) - call c_f_pointer(c_pointer(2), Delta, (/ np, nq /)) - ! Deleting the file while it is open makes the file invisible on the filesystem, - ! and automatically deleted, even if the program crashes + if (1.1*mem > qp_max_mem) then +! call mmap(trim(ezfio_work_dir)//'cholesky_delta', (/ np*1_8, nq*1_8 /), 8, fd(2), .False., .True., c_pointer(2)) +! call c_f_pointer(c_pointer(2), Delta, (/ np, nq /)) + +! ! Deleting the file while it is open makes the file invisible on the filesystem, +! ! and automatically deleted, even if the program crashes iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_delta', 'R') close(iunit,status='delete') + open(unit=iunit, access='DIRECT', form='UNFORMATTED', RECL=(np+1)*8, & + ASYNCHRONOUS='YES', file=trim(ezfio_work_dir)//'cholesky_delta') delta_on_disk = .True. else allocate(Delta(np,nq)) @@ -303,15 +306,18 @@ END_PROVIDER !$OMP END PARALLEL PROVIDE nproc - if (N>0) then - if (delta_on_disk) then + if (delta_on_disk) then + + dgemm_block_size = nproc*4 + + allocate (dgemm_buffer1(np,dgemm_block_size)) + allocate (dgemm_buffer3(np,dgemm_block_size)) + allocate (dgemm_buffer2(dgemm_block_size,N)) + + if (N>0) then ! Blocking improves I/O performance - dgemm_block_size = nproc*4 - - allocate (dgemm_buffer1(np,dgemm_block_size)) - allocate (dgemm_buffer2(dgemm_block_size,N)) do jj=1,nq,dgemm_block_size @@ -325,34 +331,55 @@ END_PROVIDER enddo !$OMP END PARALLEL DO - call dgemm('N', 'T', np, nqq, N, 1.d0, & +print *, np, nq, jj, nqq, N + call dgemm('N', 'T', np, nqq, N, -1.d0, & Ltmp_p, np, dgemm_buffer2, dgemm_block_size, 0.d0, dgemm_buffer1, np) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) + wait(iunit) + dgemm_buffer3(:,:) = dgemm_buffer1(:,:) +! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) do q=jj,jj+nqq-1 - Delta(:,q) = - dgemm_buffer1(:, q-jj+1) + write(iunit, ASYNCHRONOUS='YES', rec=q) dgemm_buffer3(1:np, q-jj+1) enddo - !$OMP END PARALLEL DO +! !$OMP END PARALLEL DO enddo - - deallocate(dgemm_buffer1, dgemm_buffer2) +print *, 'ok1' else - call dgemm('N', 'T', np, nq, N, -1.d0, & - Ltmp_p(1,1), np, Ltmp_q(1,1), nq, 0.d0, Delta, np) + + dgemm_buffer1(1:np,1) = 0.d0 + +! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) + do q=1,nq + write(iunit, ASYNCHRONOUS='YES', rec=q) dgemm_buffer1(1:np, 1) + enddo +! !$OMP END PARALLEL DO endif + deallocate(dgemm_buffer1, dgemm_buffer2) + if (delta_on_disk) wait(iunit) + deallocate(dgemm_buffer3) + else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) - do q=1,nq - Delta(:,q) = 0.d0 - enddo - !$OMP END PARALLEL DO + if (N>0) then + + call dgemm('N', 'T', np, nq, N, -1.d0, & + Ltmp_p(1,1), np, Ltmp_q(1,1), nq, 0.d0, Delta, np) + + else + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) + do q=1,nq + Delta(:,q) = 0.d0 + enddo + !$OMP END PARALLEL DO + + endif endif @@ -383,25 +410,40 @@ END_PROVIDER do jj=1,nq,dgemm_block_size nqq = min(nq, jj+dgemm_block_size-1) - jj + 1 - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,ii) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(q,ii) + !$OMP DO do ii=1,block_size do q=jj,jj+nqq-1 dgemm_buffer2(q-jj+1,ii) = Ltmp_q(q,ii) enddo enddo - !$OMP END PARALLEL DO + !$OMP END DO + !$OMP END PARALLEL - call dgemm('N', 'T', np, nqq, block_size, 1.d0, & - Ltmp_p(1,1), np, dgemm_buffer2, dgemm_block_size, 0.d0, dgemm_buffer1, np) - - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) +! !$OMP DO do q=jj,jj+nqq-1 - Delta(:,q) = Delta(:,q) - dgemm_buffer1(:, q-jj+1) + read(iunit, rec=q) dgemm_buffer1(1:np, q-jj+1) enddo - !$OMP END PARALLEL DO +! !$OMP END DO + +print *, np, nq, jj, nqq, block_size + call dgemm('N', 'T', np, nqq, block_size, -1.d0, & + Ltmp_p(1,1), np, dgemm_buffer2, dgemm_block_size, 1.d0, dgemm_buffer1, np) + + wait(iunit) + dgemm_buffer3 = dgemm_buffer1 + +! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) + do q=jj,jj+nqq-1 + write(iunit, ASYNCHRONOUS='YES', rec=q) dgemm_buffer3(:, q-jj+1) + enddo +! !$OMP END PARALLEL DO enddo +print *, 'ok' deallocate(dgemm_buffer1, dgemm_buffer2) + wait(iunit) + deallocate(dgemm_buffer3) else @@ -427,11 +469,20 @@ END_PROVIDER enddo iblock = iblock+1 - !$OMP PARALLEL DO PRIVATE(p) - do p=1,np - Ltmp_p(p,iblock) = Delta(p,dj) - enddo - !$OMP END PARALLEL DO + + if (delta_on_disk) then + + read(iunit,rec=dj) Ltmp_p(1:np,iblock) + + else + + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Delta(p,dj) + enddo + !$OMP END PARALLEL DO + + endif if (.not.computed(dj)) then m = dj @@ -463,12 +514,26 @@ END_PROVIDER !$OMP END PARALLEL DO endif - !$OMP PARALLEL DO PRIVATE(p) - do p=1,np - Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) - Delta(p,dj) = Ltmp_p(p,iblock) - enddo - !$OMP END PARALLEL DO + if (delta_on_disk) then + + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) + enddo + !$OMP END PARALLEL DO + + write(iunit, ASYNCHRONOUS='YES', rec=dj) Ltmp_p(1:np,iblock) + + else + + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) + Delta(p,dj) = Ltmp_p(p,iblock) + enddo + !$OMP END PARALLEL DO + + endif computed(dj) = .True. endif @@ -512,7 +577,7 @@ END_PROVIDER deallocate(Ltmp_q) deallocate(computed) if (delta_on_disk) then - call munmap( (/ np*1_8, nq*1_8 /), 8, fd(2), c_pointer(2) ) + close(iunit, status='delete') else deallocate(Delta) endif diff --git a/src/utils/fortran_mmap.c b/src/utils/fortran_mmap.c index 711a9c34..2dbe42b8 100644 --- a/src/utils/fortran_mmap.c +++ b/src/utils/fortran_mmap.c @@ -40,7 +40,7 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only, exit(EXIT_FAILURE); } - result = write(fd, "", 1); + result = write(fd, " ", 1); if (result != 1) { close(fd); printf("%s:\n", filename); @@ -49,7 +49,10 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only, } if (single_node == 1) { - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE | MAP_NONBLOCK, fd, 0); + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE | MAP_NONBLOCK | MAP_NORESERVE, fd, 0); + if (map == MAP_FAILED) { + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + } } else { map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); } From f671c669f8cae460911c1e016e9e44297e817d79 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 17 Jun 2024 14:57:48 +0200 Subject: [PATCH 63/64] Use less memory in Cholesky --- src/ao_two_e_ints/cholesky.irp.f | 378 +++++++------------------- src/hartree_fock/fock_matrix_hf.irp.f | 21 +- 2 files changed, 119 insertions(+), 280 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index d731ef04..a680e7ee 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -31,14 +31,14 @@ END_PROVIDER integer*8 :: ndim8 integer :: rank double precision :: tau, tau2 - double precision, pointer :: L(:,:) + double precision, pointer :: L(:,:), Delta(:,:) double precision :: s double precision :: dscale, dscale_tmp - double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:), Delta(:,:) + double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:) integer, allocatable :: addr1(:), addr2(:) - integer*8, allocatable :: Lset(:), Dset(:), addr3(:) + integer*8, allocatable :: Lset(:), Dset(:) logical, allocatable :: computed(:) integer :: i,j,k,m,p,q, dj, p2, q2, ii, jj @@ -64,11 +64,8 @@ END_PROVIDER type(c_ptr) :: c_pointer(2) integer :: fd(2) - logical :: delta_on_disk - integer :: dgemm_block_size, nqq - double precision, allocatable :: dgemm_buffer1(:,:), dgemm_buffer2(:,:), dgemm_buffer3(:,:) - PROVIDE nproc + PROVIDE nproc ao_cholesky_threshold do_direct_integrals qp_max_mem PROVIDE nucl_coord ao_two_e_integral_schwartz call set_multiple_levels_omp(.False.) @@ -88,19 +85,8 @@ END_PROVIDER else - PROVIDE nucl_coord ao_two_e_integral_schwartz call set_multiple_levels_omp(.False.) - call resident_memory(mem0) - - rank_max = min(ndim8,(qp_max_mem*1024_8*1024_8*1024_8/8_8)/ndim8) - call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1)) - call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) - ! Deleting the file while it is open makes the file invisible on the filesystem, - ! and automatically deleted, even if the program crashes - iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao_tmp', 'R') - close(iunit,status='delete') - if (do_direct_integrals) then if (ao_two_e_integral(1,1,1,1) < huge(1.d0)) then ! Trigger providers inside ao_two_e_integral @@ -113,8 +99,12 @@ END_PROVIDER tau = ao_cholesky_threshold tau2 = tau*tau - mem = 6.d0 * memory_of_double8(ndim8) + 6.d0 * memory_of_int8(ndim8) - call check_mem(mem, irp_here) + rank = 0 + + allocate( D(ndim8), Lset(ndim8), Dset(ndim8), D_sorted(ndim8)) + allocate( addr1(ndim8), addr2(ndim8), Delta_col(ndim8) ) + + call resident_memory(mem0) call print_memory_usage() @@ -127,46 +117,35 @@ END_PROVIDER print *, '============ =============' - rank = 0 - - allocate( D(ndim8), Lset(ndim8), Dset(ndim8), D_sorted(ndim8)) - allocate( addr1(ndim8), addr2(ndim8), addr3(ndim8) ) -!print *, 'allocate : (D(ndim8))', memory_of_int8(ndim8) -!print *, 'allocate : (Lset(ndim8))', memory_of_int8(ndim8) -!print *, 'allocate : (Dset(ndim8))', memory_of_int8(ndim8) -!print *, 'allocate : (4,addr(ndim8))', memory_of_int8(4_8*ndim8) - ! 1. - k=0 + i8=0 do j=1,ao_num do i=1,ao_num - k = k+1 - addr1(k) = i - addr2(k) = j - addr3(k) = (i-1)*ao_num + j + i8 = i8+1 + addr1(i8) = i + addr2(i8) = j enddo enddo if (do_direct_integrals) then - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,16) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,21) do i8=ndim8,1,-1 D(i8) = ao_two_e_integral(addr1(i8), addr2(i8), & addr1(i8), addr2(i8)) enddo !$OMP END PARALLEL DO else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,16) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,21) do i8=ndim8,1,-1 D(i8) = get_ao_two_e_integral(addr1(i8), addr1(i8), & - addr2(i8), addr2(i8), & - ao_integrals_map) + addr2(i8), addr2(i8), ao_integrals_map) enddo !$OMP END PARALLEL DO endif + D_sorted(:) = -D(:) call dsort_noidx_big(D_sorted,ndim8) - D_sorted(:) = dabs(D_sorted(:)) - + D_sorted(:) = -D_sorted(:) Dmax = D_sorted(1) ! 2. @@ -174,12 +153,24 @@ END_PROVIDER dscale_tmp = dscale*dscale*Dmax np8=0_8 do p8=1,ndim8 - if ( dscale_tmp*D(p8) > tau2 ) then + if ( dscale_tmp*D(p8) >= tau2 ) then np8 = np8+1_8 Lset(np8) = p8 endif enddo np = np8 + if (np <= 0) stop 'np<=0' + if (np > ndim8) stop 'np>ndim8' + + rank_max = min(np,20*elec_num*elec_num) + call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1)) + call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) + + ! Deleting the file while it is open makes the file invisible on the filesystem, + ! and automatically deleted, even if the program crashes + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao_tmp', 'R') + close(iunit,status='delete') + ! 3. N = 0 @@ -187,85 +178,59 @@ END_PROVIDER ! 4. i = 0 + mem = memory_of_double(np) & ! Delta(np,nq) + + (np+1)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) + +! call check_mem(mem) + ! 5. - do while ( (Dmax > tau).and.(rank*1_8 < min(ndim8,rank_max)) ) + do while ( (Dmax > tau).and.(np > 0) ) ! a. i = i+1 - ! Inrease s until the arrays fit in memory - s = 0.01d0 block_size = max(N,24) + + ! Determine nq so that Delta fits in memory + + s = 0.1d0 + Dmin = max(s*Dmax,tau) + do nq=2,np-1 + if (D_sorted(nq) < Dmin) exit + enddo + do while (.True.) - ! b. - Dmin = max(s*Dmax,tau) + mem = mem0 & + + np*memory_of_double(nq) & ! Delta(np,nq) + + (np+nq)*memory_of_double(block_size) & ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) + + memory_of_int(nq) ! computed(nq) - ! c. - nq=0 - do p=1,np - if ( D(Lset(p)) > Dmin ) then - nq = nq+1 - Dset(nq) = Lset(p) - endif - enddo - - - mem = mem0 & - + np*memory_of_double(nq) - -!print *, 'mem = ', mem - if (mem > qp_max_mem/2) then - s = s*2.d0 + if (mem > qp_max_mem*0.5d0) then + nq = nq/2 else exit endif - if ((s > 1.d0).or.(nq == 0)) then - call print_memory_usage() - print *, 'Required peak memory: ', mem, 'Gb' - call resident_memory(mem) - print *, 'Already used memory: ', mem, 'Gb' - print *, 'Not enough memory. Reduce cholesky threshold' - stop -1 - endif - - if (s > 0.3d0) then - exit - endif - enddo - ! d., e. - mem = mem0 & - + memory_of_int(nq) &! computed(nq) - + np*memory_of_int(nq) &! computed(nq) - + memory_of_double(np) &! Delta_col(np) - + 7*memory_of_double8(ndim8) &! D, Lset, Dset, D_sorted, addr[1-3] - + np*memory_of_double(nq) &! Delta(np,nq) - + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) - - if (1.1*mem > qp_max_mem) then -! call mmap(trim(ezfio_work_dir)//'cholesky_delta', (/ np*1_8, nq*1_8 /), 8, fd(2), .False., .True., c_pointer(2)) -! call c_f_pointer(c_pointer(2), Delta, (/ np, nq /)) - -! ! Deleting the file while it is open makes the file invisible on the filesystem, -! ! and automatically deleted, even if the program crashes - iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_delta', 'R') - close(iunit,status='delete') - open(unit=iunit, access='DIRECT', form='UNFORMATTED', RECL=(np+1)*8, & - ASYNCHRONOUS='YES', file=trim(ezfio_work_dir)//'cholesky_delta') - delta_on_disk = .True. - else - allocate(Delta(np,nq)) - delta_on_disk = .False. + if (nq <= 0) then + print *, nq + stop 'bug in cholesky: nq <= 0' endif -!print *, delta_on_disk - allocate(Delta_col(np)) + Dmin = D_sorted(nq) + nq=0 + do p=1,np + if ( D(Lset(p)) >= Dmin ) then + nq = nq+1 + Dset(nq) = Lset(p) + endif + enddo + + allocate(Delta(np,nq)) allocate(Ltmp_p(np,block_size), stat=ierr) -!print *, 'allocate : Ltmp_p(np,block_size)', memory_of_double8(np*block_size*1_8), np, block_size if (ierr /= 0) then call print_memory_usage() @@ -274,7 +239,6 @@ END_PROVIDER endif allocate(Ltmp_q(nq,block_size), stat=ierr) -!print *, 'allocate : Ltmp_q(nq,block_size)', memory_of_double8(nq*block_size*1_8), nq, block_size if (ierr /= 0) then call print_memory_usage() @@ -287,7 +251,6 @@ END_PROVIDER computed(:) = .False. -!print *, 'N, rank, block_size', N, rank, block_size !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q) do k=1,N !$OMP DO @@ -305,81 +268,18 @@ END_PROVIDER !$OMP BARRIER !$OMP END PARALLEL - PROVIDE nproc - - if (delta_on_disk) then - - dgemm_block_size = nproc*4 - - allocate (dgemm_buffer1(np,dgemm_block_size)) - allocate (dgemm_buffer3(np,dgemm_block_size)) - allocate (dgemm_buffer2(dgemm_block_size,N)) - - if (N>0) then - ! Blocking improves I/O performance - - - do jj=1,nq,dgemm_block_size - - nqq = min(nq, jj+dgemm_block_size-1) - jj + 1 - - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,ii) - do ii=1,N - do q=jj,jj+nqq-1 - dgemm_buffer2(q-jj+1,ii) = Ltmp_q(q,ii) - enddo - enddo - !$OMP END PARALLEL DO - -print *, np, nq, jj, nqq, N - call dgemm('N', 'T', np, nqq, N, -1.d0, & - Ltmp_p, np, dgemm_buffer2, dgemm_block_size, 0.d0, dgemm_buffer1, np) - - wait(iunit) - dgemm_buffer3(:,:) = dgemm_buffer1(:,:) -! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) - do q=jj,jj+nqq-1 - write(iunit, ASYNCHRONOUS='YES', rec=q) dgemm_buffer3(1:np, q-jj+1) - enddo -! !$OMP END PARALLEL DO - - enddo -print *, 'ok1' - - else - - - dgemm_buffer1(1:np,1) = 0.d0 - -! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) - do q=1,nq - write(iunit, ASYNCHRONOUS='YES', rec=q) dgemm_buffer1(1:np, 1) - enddo -! !$OMP END PARALLEL DO - - endif - - deallocate(dgemm_buffer1, dgemm_buffer2) - if (delta_on_disk) wait(iunit) - deallocate(dgemm_buffer3) - - - else - - if (N>0) then + if (N>0) then call dgemm('N', 'T', np, nq, N, -1.d0, & Ltmp_p(1,1), np, Ltmp_q(1,1), nq, 0.d0, Delta, np) - else + else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) - do q=1,nq - Delta(:,q) = 0.d0 - enddo - !$OMP END PARALLEL DO - - endif + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) + do q=1,nq + Delta(:,q) = 0.d0 + enddo + !$OMP END PARALLEL DO endif @@ -395,64 +295,20 @@ print *, 'ok1' do j=1,nq if ( (Qmax <= Dmin).or.(N+j*1_8 > ndim8) ) exit + ! i. rank = N+j + if (rank == rank_max) then + print *, 'cholesky: rank_max reached' + exit + endif if (iblock == block_size) then - if (delta_on_disk) then - ! Blocking improves I/O performance - - dgemm_block_size = nproc*4 - allocate (dgemm_buffer1(np,dgemm_block_size)) - allocate (dgemm_buffer2(dgemm_block_size,block_size)) - - do jj=1,nq,dgemm_block_size - nqq = min(nq, jj+dgemm_block_size-1) - jj + 1 - - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(q,ii) - !$OMP DO - do ii=1,block_size - do q=jj,jj+nqq-1 - dgemm_buffer2(q-jj+1,ii) = Ltmp_q(q,ii) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - -! !$OMP DO - do q=jj,jj+nqq-1 - read(iunit, rec=q) dgemm_buffer1(1:np, q-jj+1) - enddo -! !$OMP END DO - -print *, np, nq, jj, nqq, block_size - call dgemm('N', 'T', np, nqq, block_size, -1.d0, & - Ltmp_p(1,1), np, dgemm_buffer2, dgemm_block_size, 1.d0, dgemm_buffer1, np) - - wait(iunit) - dgemm_buffer3 = dgemm_buffer1 - -! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q) - do q=jj,jj+nqq-1 - write(iunit, ASYNCHRONOUS='YES', rec=q) dgemm_buffer3(:, q-jj+1) - enddo -! !$OMP END PARALLEL DO - - enddo -print *, 'ok' - deallocate(dgemm_buffer1, dgemm_buffer2) - wait(iunit) - deallocate(dgemm_buffer3) - - else - - call dgemm('N','T',np,nq,block_size,-1.d0, & + call dgemm('N','T',np,nq,block_size,-1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) - endif - - iblock = 0 + iblock = 0 endif @@ -469,71 +325,47 @@ print *, 'ok' enddo iblock = iblock+1 - - if (delta_on_disk) then - - read(iunit,rec=dj) Ltmp_p(1:np,iblock) - - else - - !$OMP PARALLEL DO PRIVATE(p) - do p=1,np - Ltmp_p(p,iblock) = Delta(p,dj) - enddo - !$OMP END PARALLEL DO - - endif + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Delta(p,dj) + enddo + !$OMP END PARALLEL DO if (.not.computed(dj)) then m = dj if (do_direct_integrals) then !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,21) do k=1,np + Delta_col(k) = 0.d0 if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& addr2(Lset(k)), addr2(Dset(m)) ) ) then Delta_col(k) = & ao_two_e_integral(addr1(Lset(k)), addr2(Lset(k)),& addr1(Dset(m)), addr2(Dset(m))) - else - Delta_col(k) = 0.d0 endif enddo !$OMP END PARALLEL DO else + PROVIDE ao_integrals_map !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,21) do k=1,np + Delta_col(k) = 0.d0 if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& addr2(Lset(k)), addr2(Dset(m)) ) ) then Delta_col(k) = & get_ao_two_e_integral( addr1(Lset(k)), addr1(Dset(m)),& addr2(Lset(k)), addr2(Dset(m)), ao_integrals_map) - else - Delta_col(k) = 0.d0 endif enddo !$OMP END PARALLEL DO endif - if (delta_on_disk) then - - !$OMP PARALLEL DO PRIVATE(p) - do p=1,np - Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) - enddo - !$OMP END PARALLEL DO - - write(iunit, ASYNCHRONOUS='YES', rec=dj) Ltmp_p(1:np,iblock) - - else - - !$OMP PARALLEL DO PRIVATE(p) - do p=1,np - Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) - Delta(p,dj) = Ltmp_p(p,iblock) - enddo - !$OMP END PARALLEL DO - - endif + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) + Delta(p,dj) = Ltmp_p(p,iblock) + enddo + !$OMP END PARALLEL DO computed(dj) = .True. endif @@ -572,30 +404,26 @@ print *, 'ok' print '(I10, 4X, ES12.3)', rank, Qmax - deallocate(Delta_col) deallocate(Ltmp_p) deallocate(Ltmp_q) deallocate(computed) - if (delta_on_disk) then - close(iunit, status='delete') - else - deallocate(Delta) - endif + deallocate(Delta) ! i. N = rank ! j. - Dmax = D(Lset(1)) - do p=1,np - Dmax = max(Dmax, D(Lset(p))) - enddo + D_sorted(:) = -D(:) + call dsort_noidx_big(D_sorted,ndim8) + D_sorted(:) = -D_sorted(:) + + Dmax = D_sorted(1) dscale = 1.d0 dscale_tmp = dscale*dscale*Dmax np8=0_8 do p8=1,ndim8 - if ( dscale_tmp*D(p8) > tau2 ) then + if ( dscale_tmp*D(p8) >= tau2 ) then np8 = np8+1_8 Lset(np8) = p8 endif @@ -609,7 +437,6 @@ print *, 'ok' print *, '' allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) -!print *, 'allocate : cholesky_ao(ao_num,ao_num,rank)', memory_of_double8(ao_num*ao_num*rank*1_8) if (ierr /= 0) then call print_memory_usage() @@ -621,7 +448,7 @@ print *, 'ok' !$OMP PARALLEL DO PRIVATE(k,j) do k=1,rank do j=1,ao_num - cholesky_ao(1:ao_num,j,k) = L((j-1)*ao_num+1:j*ao_num,k) + cholesky_ao(1:ao_num,j,k) = L((j-1_8)*ao_num+1_8:1_8*j*ao_num,k) enddo enddo !$OMP END PARALLEL DO @@ -646,5 +473,6 @@ print *, 'ok' call wall_time(wall1) print*,'Time to provide AO cholesky vectors = ',(wall1-wall0)/60.d0, ' min' + END_PROVIDER diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f index 65b3d63c..6d917322 100644 --- a/src/hartree_fock/fock_matrix_hf.irp.f +++ b/src/hartree_fock/fock_matrix_hf.irp.f @@ -194,17 +194,28 @@ END_PROVIDER endif - double precision :: rss + double precision :: rss, mem0, mem double precision :: memory_of_double integer :: iblock - integer, parameter :: block_size = 32 + integer :: block_size + + call resident_memory(mem0) + + block_size = 1024 + + rss = memory_of_double(2.d0*ao_num*ao_num) + do + mem = mem0 + block_size*rss + if ( (block_size < 2).or.(mem < qp_max_mem) ) exit + block_size = block_size/2 + enddo + + call check_mem(block_size*rss, irp_here) - rss = memory_of_double(ao_num*ao_num) - call check_mem(2.d0*block_size*rss, irp_here) allocate(X2(ao_num,ao_num,block_size,2)) allocate(X3(ao_num,block_size,ao_num,2)) - + ! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j) do iblock=1,cholesky_ao_num,block_size From 4b578d9849df7fa548a0b8627f714df6248a8440 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 20 Jun 2024 13:43:46 +0200 Subject: [PATCH 64/64] mmap is now shared in cholesky --- src/ao_two_e_ints/cholesky.irp.f | 36 ++++++++++++++++++-------------- src/utils/fortran_mmap.c | 3 +++ 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index a680e7ee..063cc365 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -31,12 +31,11 @@ END_PROVIDER integer*8 :: ndim8 integer :: rank double precision :: tau, tau2 - double precision, pointer :: L(:,:), Delta(:,:) + double precision, pointer :: L(:,:) double precision :: s - double precision :: dscale, dscale_tmp - double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:) + double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:), Delta(:,:) integer, allocatable :: addr1(:), addr2(:) integer*8, allocatable :: Lset(:), Dset(:) logical, allocatable :: computed(:) @@ -102,7 +101,7 @@ END_PROVIDER rank = 0 allocate( D(ndim8), Lset(ndim8), Dset(ndim8), D_sorted(ndim8)) - allocate( addr1(ndim8), addr2(ndim8), Delta_col(ndim8) ) + allocate( addr1(ndim8), addr2(ndim8), Delta_col(ndim8), computed(ndim8) ) call resident_memory(mem0) @@ -149,11 +148,9 @@ END_PROVIDER Dmax = D_sorted(1) ! 2. - dscale = 1.d0 - dscale_tmp = dscale*dscale*Dmax np8=0_8 do p8=1,ndim8 - if ( dscale_tmp*D(p8) >= tau2 ) then + if ( Dmax*D(p8) >= tau2 ) then np8 = np8+1_8 Lset(np8) = p8 endif @@ -203,16 +200,23 @@ END_PROVIDER mem = mem0 & + np*memory_of_double(nq) & ! Delta(np,nq) - + (np+nq)*memory_of_double(block_size) & ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) - + memory_of_int(nq) ! computed(nq) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) if (mem > qp_max_mem*0.5d0) then - nq = nq/2 + Dmin = D_sorted(nq/2) + do ii=nq/2,np-1 + if (D_sorted(ii) < Dmin) then + nq = ii + exit + endif + enddo else exit endif enddo +!call print_memory_usage +!print *, 'np, nq, Predicted memory: ', np, nq, mem if (nq <= 0) then print *, nq @@ -247,8 +251,7 @@ END_PROVIDER endif - allocate(computed(nq)) - computed(:) = .False. + computed(1:nq) = .False. !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q) @@ -406,7 +409,6 @@ END_PROVIDER deallocate(Ltmp_p) deallocate(Ltmp_q) - deallocate(computed) deallocate(Delta) ! i. @@ -419,11 +421,9 @@ END_PROVIDER Dmax = D_sorted(1) - dscale = 1.d0 - dscale_tmp = dscale*dscale*Dmax np8=0_8 do p8=1,ndim8 - if ( dscale_tmp*D(p8) >= tau2 ) then + if ( Dmax*D(p8) >= tau2 ) then np8 = np8+1_8 Lset(np8) = p8 endif @@ -436,6 +436,10 @@ END_PROVIDER print *, '============ =============' print *, '' + deallocate( D, Lset, Dset, D_sorted ) + deallocate( addr1, addr2, Delta_col, computed ) + + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) if (ierr /= 0) then diff --git a/src/utils/fortran_mmap.c b/src/utils/fortran_mmap.c index 2dbe42b8..0306f64f 100644 --- a/src/utils/fortran_mmap.c +++ b/src/utils/fortran_mmap.c @@ -49,10 +49,13 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only, } if (single_node == 1) { + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); +/* map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE | MAP_NONBLOCK | MAP_NORESERVE, fd, 0); if (map == MAP_FAILED) { map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); } +*/ } else { map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); }