diff --git a/bin/zcat b/bin/zcat deleted file mode 100755 index 7ccecf07..00000000 --- a/bin/zcat +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -# On Darwin: try gzcat if available, otherwise use Python - -if [[ $(uname -s) = Darwin ]] ; then - which gzcat &> /dev/null - if [[ $? -eq 0 ]] ; then - exec gzcat $@ - else - - exec python3 << EOF -import sys -import gzip -with gzip.open("$1", "rt") as f: - print(f.read()) -EOF - fi -else - SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" - command=$(which -a zcat | grep -v "$SCRIPTPATH/" | head -1) - exec $command $@ -fi - diff --git a/config/gfortran_debug_mkl.cfg b/config/gfortran_debug_mkl.cfg new file mode 100644 index 00000000..1dc3f2b2 --- /dev/null +++ b/config/gfortran_debug_mkl.cfg @@ -0,0 +1,63 @@ +# Common flags +############## +# +# -ffree-line-length-none : Needed for IRPF90 which produces long lines +# -lblas -llapack : Link with libblas and liblapack libraries provided by the system +# -I . : Include the curent directory (Mandatory) +# +# --ninja : Allow the utilisation of ninja. (Mandatory) +# --align=32 : Align all provided arrays on a 32-byte boundary +# +# +[COMMON] +FC : gfortran -g -ffree-line-length-none -I . -fPIC -std=legacy +LAPACK_LIB : -I${MKLROOT}/include -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_core -lpthread -lm -ldl -lmkl_gnu_thread -lgomp -fopenmp +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -Ofast : Disregard strict standards compliance. Enables all -O3 optimizations. +# It also enables optimizations that are not valid +# for all standard-compliant programs. It turns on +# -ffast-math and the Fortran-specific +# -fno-protect-parens and -fstack-arrays. +[OPT] +FCFLAGS : -Ofast + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -Ofast + +# Debugging flags +################# +# +# -fcheck=all : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# +[DEBUG] +#FCFLAGS : -g -msse4.2 -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant -Wuninitialized -fbacktrace -ffpe-trap=zero,overflow,underflow -finit-real=nan +FCFLAGS : -g -mavx -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant -Wuninitialized -fbacktrace -ffpe-trap=zero,overflow -finit-real=nan + +# OpenMP flags +################# +# +[OPENMP] +FC : -fopenmp +IRPF90_FLAGS : --openmp + diff --git a/etc/paths.rc b/etc/paths.rc index dc1741e8..843ec383 100644 --- a/etc/paths.rc +++ b/etc/paths.rc @@ -28,6 +28,15 @@ function qp_prepend_export () { fi } +function qp_append_export () { + eval "value_1="\${$1}"" + if [[ -z $value_1 ]] ; then + echo "${2}:" + else + echo "${value_1}:${2}" + fi +} + export PYTHONPATH=$(qp_prepend_export "PYTHONPATH" "${QP_EZFIO}/Python":"${QP_PYTHON}") export PATH=$(qp_prepend_export "PATH" "${QP_PYTHON}":"${QP_ROOT}"/bin:"${QP_ROOT}"/ocaml) diff --git a/plugins/local/basis_correction/51.basis_c.bats b/plugins/local/basis_correction/51.basis_c.bats index 914b482b..1e20bae3 100644 --- a/plugins/local/basis_correction/51.basis_c.bats +++ b/plugins/local/basis_correction/51.basis_c.bats @@ -37,14 +37,6 @@ function run_sd() { eq $energy1 $1 $thresh } -@test "O2 CAS" { - qp set_file o2_cas.gms.ezfio - qp set_mo_class -c "[1-2]" -a "[3-10]" -d "[11-46]" - run -149.72435425 3.e-4 10000 - qp set_mo_class -c "[1-2]" -a "[3-10]" -v "[11-46]" - run_md -0.1160222327 1.e-6 -} - @test "LiF RHF" { qp set_file lif.ezfio diff --git a/plugins/local/basis_correction/basis_correction.irp.f b/plugins/local/basis_correction/basis_correction.irp.f index a7ea7244..f17b5d5b 100644 --- a/plugins/local/basis_correction/basis_correction.irp.f +++ b/plugins/local/basis_correction/basis_correction.irp.f @@ -7,10 +7,6 @@ program basis_correction touch read_wf no_core_density = .True. touch no_core_density - if(io_mo_two_e_integrals .ne. "Read")then - provide ao_two_e_integrals_in_map - endif - provide mo_two_e_integrals_in_map call print_basis_correction end diff --git a/plugins/local/basis_correction/print_routine.irp.f b/plugins/local/basis_correction/print_routine.irp.f index 96faba30..b3b38673 100644 --- a/plugins/local/basis_correction/print_routine.irp.f +++ b/plugins/local/basis_correction/print_routine.irp.f @@ -22,7 +22,7 @@ subroutine print_basis_correction print*, '****************************************' print*, '****************************************' print*, 'mu_of_r_potential = ',mu_of_r_potential - if(mu_of_r_potential.EQ."hf")then + if(mu_of_r_potential.EQ."hf".or.mu_of_r_potential.EQ."hf_old".or.mu_of_r_potential.EQ."hf_sparse")then print*, '' print*,'Using a HF-like two-body density to define mu(r)' print*,'This assumes that HF is a qualitative representation of the wave function ' diff --git a/plugins/local/basis_correction/test_chol_bas.irp.f b/plugins/local/basis_correction/test_chol_bas.irp.f new file mode 100644 index 00000000..076d888c --- /dev/null +++ b/plugins/local/basis_correction/test_chol_bas.irp.f @@ -0,0 +1,18 @@ +program pouet + implicit none + call test +end +subroutine test + implicit none +! provide mos_times_cholesky_r1 +! provide mos_times_cholesky_r2 + integer :: ipoint + double precision :: accu,weight + accu = 0.d0 + do ipoint = 1, n_points_final_grid + weight = final_weight_at_r_vector(ipoint) +! accu += dabs(mu_of_r_hf(ipoint) - mu_of_r_hf_old(ipoint)) * weight + accu += dabs(f_hf_cholesky_sparse(ipoint) - f_hf_cholesky(ipoint)) * weight + enddo + print*,'accu = ',accu +end diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f index 1e127fac..e27fdb7f 100644 --- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f +++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f @@ -259,15 +259,21 @@ BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_transp, (mo_num, mo_num, END_DOC integer :: i,j,k,l + print*,'Providing mo_bi_ortho_tc_two_e_transp' + double precision :: t0,t1 + call wall_time(t0) do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num do l = 1, mo_num - mo_bi_ortho_tc_two_e_transp(i,j,k,l) = mo_bi_ortho_tc_two_e_transp(k,l,i,j) + mo_bi_ortho_tc_two_e_transp(i,j,k,l) = mo_bi_ortho_tc_two_e(k,l,i,j) enddo enddo enddo enddo + call wall_time(t1) + + print *, ' WALL TIME for PROVIDING mo_bi_ortho_tc_two_e_transp (min)', (t1-t0)/60.d0 END_PROVIDER ! --- @@ -326,3 +332,23 @@ END_PROVIDER ! --- + BEGIN_PROVIDER [double precision, tc_2e_3idx_coulomb_integrals_transp , (mo_num,mo_num,mo_num)] +&BEGIN_PROVIDER [double precision, tc_2e_3idx_exchange_integrals_transp, (mo_num,mo_num,mo_num)] + + BEGIN_DOC + ! tc_2e_3idx_coulomb_integrals_transp (j,k,i) = + ! tc_2e_3idx_exchange_integrals_transp(j,k,i) = + END_DOC + implicit none + integer :: i, j, k + + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + tc_2e_3idx_coulomb_integrals_transp(j, k,i) = mo_bi_ortho_tc_two_e_transp(j ,k ,j ,i ) + tc_2e_3idx_exchange_integrals_transp(j,k,i) = mo_bi_ortho_tc_two_e_transp(k ,j ,j ,i ) + enddo + enddo + enddo + +END_PROVIDER diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f new file mode 100644 index 00000000..f149e7c6 --- /dev/null +++ b/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f @@ -0,0 +1,108 @@ +subroutine get_d0_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs) + !todo: indices/conjg should be okay for complex + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states,2) + double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num) + double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + integer :: i, j, k, s, h1, h2, p1, p2, puti, putj, mm + double precision :: phase + double precision :: hij,hji + double precision, external :: get_phase_bi + logical :: ok + + integer, parameter :: bant=1 + double precision, allocatable :: hij_cache1(:), hij_cache2(:) + allocate (hij_cache1(mo_num),hij_cache2(mo_num)) + double precision, allocatable :: hji_cache1(:), hji_cache2(:) + allocate (hji_cache1(mo_num),hji_cache2(mo_num)) +! print*,'in get_d0_new' +! call debug_det(gen,N_int) +! print*,'coefs',coefs(1,:) + + if(sp == 3) then ! AB + h1 = p(1,1) + h2 = p(1,2) + do p1=1, mo_num + if(bannedOrb(p1, 1)) cycle +! call get_mo_two_e_integrals_complex(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map) + do mm = 1, mo_num + hij_cache1(mm) = mo_bi_ortho_tc_two_e(mm,p1,h2,h1) + hji_cache1(mm) = mo_bi_ortho_tc_two_e_transp(mm,p1,h2,h1) + enddo + !!!!!!!!!! + do p2=1, mo_num + if(bannedOrb(p2,2)) cycle + if(banned(p1, p2, bant)) cycle ! rentable? + if(p1 == h1 .or. p2 == h2) then + call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) + ! call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this +! call i_h_j_complex(det, gen, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e_both(det,gen,N_int, hij,hji) + else + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hij = hij_cache1(p2) * phase + hji = hji_cache1(p2) * phase + end if + if (hij == 0.d0.or.hji == 0.d0) cycle + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij ! HOTSPOT + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji ! HOTSPOT + enddo + end do + end do + + else ! AA BB + p1 = p(1,sp) + p2 = p(2,sp) + do puti=1, mo_num + if(bannedOrb(puti, sp)) cycle +! call get_mo_two_e_integrals_complex(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache1(mm) = mo_bi_ortho_tc_two_e(mm,puti,p2,p1) + hij_cache2(mm) = mo_bi_ortho_tc_two_e(mm,puti,p1,p2) + hji_cache1(mm) = mo_bi_ortho_tc_two_e_transp(mm,puti,p2,p1) + hji_cache2(mm) = mo_bi_ortho_tc_two_e_transp(mm,puti,p1,p2) + enddo + !!!!!!!!!! + do putj=puti+1, mo_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, bant)) cycle ! rentable? + if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then + call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) + !call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this +! call i_h_j_complex(det, gen, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e_both(det,gen,N_int, hij,hji) + if (hij == 0.d0.or.hji == 0.d0) cycle + else +! hij = (mo_two_e_integral_complex(p1, p2, puti, putj) - mo_two_e_integral_complex(p2, p1, puti, putj)) +! hij = (mo_bi_ortho_tc_two_e(p1, p2, puti, putj) - mo_bi_ortho_tc_two_e(p2, p1, puti, putj)) + hij = (mo_bi_ortho_tc_two_e(puti, putj, p1, p2) - mo_bi_ortho_tc_two_e(puti, putj, p2, p1)) + hji = (mo_bi_ortho_tc_two_e_transp(puti, putj, p1, p2) - mo_bi_ortho_tc_two_e_transp(puti, putj, p2, p1)) + if (hij == 0.d0.or.hji == 0.d0) cycle + phase = get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + hij = (hij) * phase + hji = (hji) * phase + end if + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + end do + + end do + end if + + deallocate(hij_cache1,hij_cache2) +end + diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f new file mode 100644 index 00000000..a3d7b076 --- /dev/null +++ b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f @@ -0,0 +1,358 @@ +subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs) + !todo: indices should be okay for complex? + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states,2) + double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num) + double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision, external :: get_phase_bi + double precision, external :: mo_two_e_integral_complex + logical :: ok + + logical, allocatable :: lbanned(:,:) + integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j, istate + integer :: hfix, pfix, h1, h2, p1, p2, ib, k, l, mm + + integer, parameter :: turn2(2) = (/2,1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + double precision, allocatable :: hij_cache(:,:) + double precision :: hij, tmp_rowij(N_states, mo_num), tmp_rowij2(N_states, mo_num),phase + double precision, allocatable :: hji_cache(:,:) + double precision :: hji, tmp_rowji(N_states, mo_num), tmp_rowji2(N_states, mo_num) +! PROVIDE mo_integrals_map N_int +! print*,'in get_d1_new' +! call debug_det(gen,N_int) +! print*,'coefs',coefs(1,:) + + allocate (lbanned(mo_num, 2)) + allocate (hij_cache(mo_num,2)) + allocate (hji_cache(mo_num,2)) + lbanned = bannedOrb + + do i=1, p(0,1) + lbanned(p(i,1), 1) = .true. + end do + do i=1, p(0,2) + lbanned(p(i,2), 2) = .true. + end do + + ma = 1 + if(p(0,2) >= 2) ma = 2 + mi = turn2(ma) + + bant = 1 + + if(sp == 3) then + !move MA + if(ma == 2) bant = 2 + puti = p(1,mi) + hfix = h(1,ma) + p1 = p(1,ma) + p2 = p(2,ma) + if(.not. bannedOrb(puti, mi)) then +! call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,p2) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,p1) + do istate = 1,N_states + tmp_rowij(istate,mm) = 0.d0 + tmp_rowji(istate,mm) = 0.d0 + enddo + enddo + !! + do putj=1, hfix-1 + if(lbanned(putj, ma)) cycle + if(banned(putj, puti,bant)) cycle + hij = hij_cache(putj,1) - hij_cache(putj,2) + hji = hji_cache(putj,1) - hji_cache(putj,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) + enddo + endif + end do + do putj=hfix+1, mo_num + if(lbanned(putj, ma)) cycle + if(banned(putj, puti,bant)) cycle + hij = hij_cache(putj,2) - hij_cache(putj,1) + hji = hji_cache(putj,2) - hji_cache(putj,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) + enddo + endif + end do + + if(ma == 1) then + mat_r(1:N_states,1:mo_num,puti) = mat_r(1:N_states,1:mo_num,puti) + tmp_rowij(1:N_states,1:mo_num) + mat_l(1:N_states,1:mo_num,puti) = mat_l(1:N_states,1:mo_num,puti) + tmp_rowji(1:N_states,1:mo_num) + else + do l=1,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,puti,l) = mat_r(k,puti,l) + tmp_rowij(k,l) + mat_l(k,puti,l) = mat_l(k,puti,l) + tmp_rowji(k,l) + enddo + enddo + end if + + end if + + !MOVE MI + pfix = p(1,mi) +! call get_mo_two_e_integrals_complex(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + do istate = 1,N_states + tmp_rowij(istate,mm) = 0.d0 + tmp_rowij2(istate,mm) = 0.d0 + tmp_rowji(istate,mm) = 0.d0 + tmp_rowji2(istate,mm) = 0.d0 + enddo + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p1) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p2) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,pfix,p1) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,pfix,p2) + enddo + putj = p1 + !! + do puti=1,mo_num !HOT + if(lbanned(puti,mi)) cycle + !p1 fixed + putj = p1 + if(.not. banned(putj,puti,bant)) then + hij = hij_cache(puti,2) + hji = hji_cache(puti,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) + enddo + endif + end if +! + putj = p2 + if(.not. banned(putj,puti,bant)) then + hij = hij_cache(puti,1) + hji = hji_cache(puti,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) + hij = hij * phase + hji = hji * phase + do k=1,N_states + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) + enddo + endif + end if + end do + + if(mi == 1) then + mat_r(:,:,p1) = mat_r(:,:,p1) + tmp_rowij(:,:) + mat_r(:,:,p2) = mat_r(:,:,p2) + tmp_rowij2(:,:) + mat_l(:,:,p1) = mat_l(:,:,p1) + tmp_rowji(:,:) + mat_l(:,:,p2) = mat_l(:,:,p2) + tmp_rowji2(:,:) + else + do l=1,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,p1,l) = mat_r(k,p1,l) + tmp_rowij(k,l) + mat_r(k,p2,l) = mat_r(k,p2,l) + tmp_rowij2(k,l) + mat_l(k,p1,l) = mat_l(k,p1,l) + tmp_rowji(k,l) + mat_l(k,p2,l) = mat_l(k,p2,l) + tmp_rowji2(k,l) + enddo + enddo + end if + + else ! sp /= 3 + + if(p(0,ma) == 3) then + do i=1,3 + hfix = h(1,ma) + puti = p(i, ma) + p1 = p(turn3(1,i), ma) + p2 = p(turn3(2,i), ma) +! call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,p2) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,p1) + do istate = 1, N_states + tmp_rowij(istate,mm) = 0.d0 + tmp_rowji(istate,mm) = 0.d0 + enddo + enddo + !! + do putj=1,hfix-1 + if(banned(putj,puti,1)) cycle + if(lbanned(putj,ma)) cycle + hij = hij_cache(putj,1) - hij_cache(putj,2) + hji = hji_cache(putj,1) - hji_cache(putj,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + hij = hij * phase + hji = hji * phase + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) + endif + end do + do putj=hfix+1,mo_num + if(banned(putj,puti,1)) cycle + if(lbanned(putj,ma)) cycle + hij = hij_cache(putj,2) - hij_cache(putj,1) + hji = hji_cache(putj,2) - hji_cache(putj,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + hij = hij * phase + hji = hji * phase + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) + endif + end do + + mat_r(:, :puti-1, puti) = mat_r(:, :puti-1, puti) + tmp_rowij(:,:puti-1) + mat_l(:, :puti-1, puti) = mat_l(:, :puti-1, puti) + tmp_rowji(:,:puti-1) + do l=puti,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, l) = mat_r(k, puti,l) + tmp_rowij(k,l) + mat_l(k, puti, l) = mat_l(k, puti,l) + tmp_rowji(k,l) + enddo + enddo + end do + else + hfix = h(1,mi) + pfix = p(1,mi) + p1 = p(1,ma) + p2 = p(2,ma) +! call get_mo_two_e_integrals_complex(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,pfix) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,pfix) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,pfix) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,pfix) + do istate = 1,N_states + tmp_rowij (istate,mm) = 0.d0 + tmp_rowij2(istate,mm) = 0.d0 + tmp_rowji (istate,mm) = 0.d0 + tmp_rowji2(istate,mm) = 0.d0 + enddo + enddo + putj = p2 + !! + do puti=1,mo_num + if(lbanned(puti,ma)) cycle + putj = p2 + if(.not. banned(puti,putj,1)) then + hij = hij_cache(puti,1) + hji = hji_cache(puti,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) + enddo + endif + end if + + putj = p1 + if(.not. banned(puti,putj,1)) then + hij = hij_cache(puti,2) + hji = hji_cache(puti,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) + hij = hij * phase + hji = hji * phase + do k=1,N_states + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) + enddo + endif + end if + end do + mat_r(:,:p2-1,p2) = mat_r(:,:p2-1,p2) + tmp_rowij(:,:p2-1) + mat_l(:,:p2-1,p2) = mat_l(:,:p2-1,p2) + tmp_rowji(:,:p2-1) + do l=p2,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,p2,l) = mat_r(k,p2,l) + tmp_rowij(k,l) + mat_l(k,p2,l) = mat_l(k,p2,l) + tmp_rowji(k,l) + enddo + enddo + mat_r(:,:p1-1,p1) = mat_r(:,:p1-1,p1) + tmp_rowij2(:,:p1-1) + mat_l(:,:p1-1,p1) = mat_l(:,:p1-1,p1) + tmp_rowji2(:,:p1-1) + do l=p1,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,p1,l) = mat_r(k,p1,l) + tmp_rowij2(k,l) + mat_l(k,p1,l) = mat_l(k,p1,l) + tmp_rowji2(k,l) + enddo + enddo + end if + end if + deallocate(lbanned,hij_cache, hji_cache) + + !! MONO + if(sp == 3) then + s1 = 1 + s2 = 2 + else + s1 = sp + s2 = sp + end if + + do i1=1,p(0,s1) + ib = 1 + if(s1 == s2) ib = i1+1 + do i2=ib,p(0,s2) + p1 = p(i1,s1) + p2 = p(i2,s2) + if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + ! gen is a selector; mask is ionized generator; det is alpha + ! hij is contribution to +! call i_h_j_complex(gen, det, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e_both(det, gen, N_int, hij,hji) +! call htilde_mu_mat_opt_bi_ortho_no_3e(gen, det, N_int, hji) + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + ! take conjugate to get contribution to instead of +! mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * dconjg(hij) + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji + enddo + end do + end do +end + diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f index d01ed433..86922ae9 100644 --- a/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f @@ -25,9 +25,6 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, integer :: bant bant = 1 -! print*, 'in get_d2_new' -! call debug_det(gen,N_int) -! print*,'coefs',coefs(1,:) tip = p(0,1) * p(0,2) ! number of alpha particles times number of beta particles diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d2_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d2_transp.irp.f new file mode 100644 index 00000000..b2a7ea31 --- /dev/null +++ b/plugins/local/cipsi_tc_bi_ortho/get_d2_transp.irp.f @@ -0,0 +1,235 @@ + +subroutine get_d2_new_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs) + !todo: indices/conjg should be correct for complex + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + double precision, intent(in) :: coefs(N_states,2) + double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num) + double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + double precision, external :: get_phase_bi + + integer :: i, j, k, tip, ma, mi, puti, putj + integer :: h1, h2, p1, p2, i1, i2 + double precision :: phase + double precision :: hij,hji + + integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) + integer, parameter :: turn2(2) = (/2, 1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + bant = 1 + + tip = p(0,1) * p(0,2) ! number of alpha particles times number of beta particles + + ma = sp !1:(alpha,alpha); 2:(b,b); 3:(a,b) + if(p(0,1) > p(0,2)) ma = 1 ! more alpha particles than beta particles + if(p(0,1) < p(0,2)) ma = 2 ! fewer alpha particles than beta particles + mi = mod(ma, 2) + 1 + + if(sp == 3) then ! if one alpha and one beta xhole + !(where xholes refer to the ionizations from the generator, not the holes occupied in the ionized generator) + if(ma == 2) bant = 2 ! if more beta particles than alpha particles + + if(tip == 3) then ! if 3 of one particle spin and 1 of the other particle spin + puti = p(1, mi) + if(bannedOrb(puti, mi)) return + h1 = h(1, ma) + h2 = h(2, ma) + + !! + do i = 1, 3 ! loop over all 3 combinations of 2 particles with spin ma + putj = p(i, ma) + if(banned(putj,puti,bant)) cycle + i1 = turn3(1,i) + i2 = turn3(2,i) + p1 = p(i1, ma) + p2 = p(i2, ma) + + ! |G> = |psi_{gen,i}> + ! |G'> = a_{x1} a_{x2} |G> + ! |alpha> = a_{puti}^{\dagger} a_{putj}^{\dagger} |G'> + ! |alpha> = t_{x1,x2}^{puti,putj} |G> + ! hij = + ! |alpha> = t_{p1,p2}^{h1,h2}|psi_{selectors,i}> + !todo: = ( - ) * phase + ! += dconjg(c_i) * + ! = ( - ) * phase + ! += * c_i + +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + +! hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e( h1, h2, p2, p1) +! hji = mo_bi_ortho_tc_two_e_transp(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e_transp( h1, h2, p2, p1) + hij = mo_bi_ortho_tc_two_e_transp(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e_transp( p1, p2, h2, h1) + hji = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e( p1, p2, h2, h1) + if (hij == 0.d0.or.hji==0.d0) cycle + + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + + if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji + enddo + else ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + end if + end do + else ! if 2 alpha and 2 beta particles + h1 = h(1,1) + h2 = h(1,2) + !! + do j = 1,2 ! loop over all 4 combinations of one alpha and one beta particle + putj = p(j, 2) + if(bannedOrb(putj, 2)) cycle + p2 = p(turn2(j), 2) + do i = 1,2 + puti = p(i, 1) + if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle + p1 = p(turn2(i), 1) + ! hij = +! hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate +! hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2 ) +! hji = mo_bi_ortho_tc_two_e_transp(h1, h2, p1, p2 ) + hij = mo_bi_ortho_tc_two_e_transp(p1, p2 ,h1, h2 ) + hji = mo_bi_ortho_tc_two_e( p1, p2, h1, h2) + if (hij /= 0.d0.or.hji==0.d0) then + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + endif + end do + end do + end if + + else ! if holes are (a,a) or (b,b) + if(tip == 0) then ! if particles are (a,a,a,a) or (b,b,b,b) + h1 = h(1, ma) + h2 = h(2, ma) + !! + do i=1,3 + puti = p(i, ma) + if(bannedOrb(puti,ma)) cycle + do j=i+1,4 + putj = p(j, ma) + if(bannedOrb(putj,ma)) cycle + if(banned(puti,putj,1)) cycle + + i1 = turn2d(1, i, j) + i2 = turn2d(2, i, j) + p1 = p(i1, ma) + p2 = p(i2, ma) +! hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + hij = mo_bi_ortho_tc_two_e_transp(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e_transp(p1, p2, h2,h1 ) + hji = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p1, p2, h2,h1 ) + if (hij == 0.d0.or.hji == 0.d0) cycle + + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) +coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) +coefs(k,1) * hji + enddo + end do + end do + else if(tip == 3) then ! if particles are (a,a,a,b) (ma=1,mi=2) or (a,b,b,b) (ma=2,mi=1) + h1 = h(1, mi) + h2 = h(1, ma) + p1 = p(1, mi) + !! + do i=1,3 + puti = p(turn3(1,i), ma) + if(bannedOrb(puti,ma)) cycle + putj = p(turn3(2,i), ma) + if(bannedOrb(putj,ma)) cycle + if(banned(puti,putj,1)) cycle + p2 = p(i, ma) + +! hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + hij = mo_bi_ortho_tc_two_e_transp(p1, p2 ,h1, h2) + hji = mo_bi_ortho_tc_two_e(p1, p2,h1, h2 ) + if (hij == 0.d0) cycle + + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + if (puti < putj) then + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + else + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji + enddo + endif + end do + else ! tip == 4 (a,a,b,b) + puti = p(1, sp) + putj = p(2, sp) + if(.not. banned(puti,putj,1)) then + p1 = p(1, mi) + p2 = p(2, mi) + h1 = h(1, mi) + h2 = h(2, mi) + !! +! hij = (mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2)) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + hij = (mo_bi_ortho_tc_two_e_transp(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e_transp(p2,p1,h1, h2)) + hji = (mo_bi_ortho_tc_two_e(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e(p2,p1,h1, h2)) + if (hij /= 0.d0.or.hji==0.d0) then + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji* phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + end if + end if + end if + end if +end diff --git a/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f b/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f index 833cc0ea..22381991 100644 --- a/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f @@ -65,8 +65,12 @@ subroutine tc_pt2 call pt2_dealloc(pt2_data_err) call pt2_alloc(pt2_data, N_states) call pt2_alloc(pt2_data_err, N_states) + if(transpose_two_e_int)then + provide mo_bi_ortho_tc_two_e_transp tc_2e_3idx_coulomb_integrals_transp + endif call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) + call print_summary_tc(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, N_det, N_configuration, N_states, psi_s2) end diff --git a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f index 12163e06..72ccf9c4 100644 --- a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f @@ -636,10 +636,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere negMask(i,2) = not(mask(i,2)) end do -! print*,'in selection ' do i = 1, N_sel -! call debug_det(det(1,1,i),N_int) -! print*,i,dabs(psi_selectors_coef_transp_tc(1,2,i) * psi_selectors_coef_transp_tc(1,1,i)) if(interesting(i) < 0) then stop 'prefetch interesting(i) and det(i)' endif @@ -691,11 +688,23 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere call get_mask_phase(psi_det_sorted_tc(1,1,interesting(i)), phasemask,N_int) if(nt == 4) then - call get_d2_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + if(transpose_two_e_int)then + call get_d2_new_transp(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + else + call get_d2_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + endif elseif(nt == 3) then - call get_d1_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + if(transpose_two_e_int)then + call get_d1_transp(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + else + call get_d1_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + endif else - call get_d0_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + if(transpose_two_e_int)then + call get_d0_transp (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + else + call get_d0_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + endif endif elseif(nt == 4) then call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) @@ -887,79 +896,11 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d call diag_htilde_mu_mat_fock_bi_ortho(N_int, det, hmono, htwoe, hthree, hii) do istate = 1,N_states delta_E = E0(istate) - Hii + E_shift - double precision :: alpha_h_psi_tmp, psi_h_alpha_tmp, error - if(debug_tc_pt2 == 1)then !! Using the old version - psi_h_alpha = 0.d0 - alpha_h_psi = 0.d0 - do iii = 1, N_det_selectors - call htilde_mu_mat_bi_ortho_tot_slow(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_bi_ortho_tot_slow(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - call get_excitation_degree(psi_selectors(1,1,iii), det,degree,N_int) - if(degree == 0)then - print*,'problem !!!' - print*,'a determinant is already in the wave function !!' - print*,'it corresponds to the selector number ',iii - call debug_det(det,N_int) - stop - endif -! call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) -! call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - psi_h_alpha += i_h_alpha * psi_selectors_coef_tc(iii,2,1) ! left function - alpha_h_psi += alpha_h_i * psi_selectors_coef_tc(iii,1,1) ! right function - enddo - else if(debug_tc_pt2 == 2)then !! debugging the new version -! psi_h_alpha_tmp = 0.d0 -! alpha_h_psi_tmp = 0.d0 -! do iii = 1, N_det_selectors ! old version -! call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) -! call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) -! psi_h_alpha_tmp += i_h_alpha * psi_selectors_coef_tc(iii,1,1) ! left function -! alpha_h_psi_tmp += alpha_h_i * psi_selectors_coef_tc(iii,2,1) ! right function -! enddo - psi_h_alpha_tmp = mat_l(istate, p1, p2) ! new version - alpha_h_psi_tmp = mat_r(istate, p1, p2) ! new version - psi_h_alpha = 0.d0 - alpha_h_psi = 0.d0 - do iii = 1, N_det ! old version - call htilde_mu_mat_opt_bi_ortho_no_3e(psi_det(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_det(1,1,iii), N_int, alpha_h_i) - psi_h_alpha += i_h_alpha * psi_l_coef_bi_ortho(iii,1) ! left function - alpha_h_psi += alpha_h_i * psi_r_coef_bi_ortho(iii,1) ! right function - enddo - if(dabs(psi_h_alpha*alpha_h_psi/delta_E).gt.1.d-10)then - error = dabs(psi_h_alpha * alpha_h_psi - psi_h_alpha_tmp * alpha_h_psi_tmp)/dabs(psi_h_alpha * alpha_h_psi) - if(error.gt.1.d-2)then - call debug_det(det, N_int) - print*,'error =',error,psi_h_alpha * alpha_h_psi/delta_E,psi_h_alpha_tmp * alpha_h_psi_tmp/delta_E - print*,psi_h_alpha , alpha_h_psi - print*,psi_h_alpha_tmp , alpha_h_psi_tmp - print*,'selectors ' - do iii = 1, N_det_selectors ! old version - print*,'iii',iii,psi_selectors_coef_tc(iii,1,1),psi_selectors_coef_tc(iii,2,1) - call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - print*,i_h_alpha,alpha_h_i - call debug_det(psi_selectors(1,1,iii),N_int) - enddo -! print*,'psi_det ' -! do iii = 1, N_det! old version -! print*,'iii',iii,psi_l_coef_bi_ortho(iii,1),psi_r_coef_bi_ortho(iii,1) -! call debug_det(psi_det(1,1,iii),N_int) -! enddo - stop - endif - endif - else - psi_h_alpha = mat_l(istate, p1, p2) - alpha_h_psi = mat_r(istate, p1, p2) - endif + psi_h_alpha = mat_l(istate, p1, p2) + alpha_h_psi = mat_r(istate, p1, p2) val = 4.d0 * psi_h_alpha * alpha_h_psi tmp = dsqrt(delta_E * delta_E + val) -! if (delta_E < 0.d0) then -! tmp = -tmp -! endif e_pert(istate) = 0.25 * val / delta_E -! e_pert(istate) = 0.5d0 * (tmp - delta_E) if(dsqrt(tmp).gt.1.d-4.and.dabs(psi_h_alpha).gt.1.d-4)then coef(istate) = e_pert(istate) / psi_h_alpha else @@ -976,15 +917,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if(e_pert(istate).gt.0.d0)e_pert(istate)=0.d0 endif -! if(selection_tc == 1 )then -! if(e_pert(istate).lt.0.d0)then -! e_pert(istate) = 0.d0 -! endif -! else if(selection_tc == -1)then -! if(e_pert(istate).gt.0.d0)then -! e_pert(istate) = 0.d0 -! endif -! endif enddo diff --git a/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index 99a8de7e..e363830d 100644 --- a/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -88,6 +88,9 @@ subroutine run_stochastic_cipsi call pt2_dealloc(pt2_data_err) call pt2_alloc(pt2_data, N_states) call pt2_alloc(pt2_data_err, N_states) + if(transpose_two_e_int)then + provide mo_bi_ortho_tc_two_e_transp tc_2e_3idx_coulomb_integrals_transp + endif call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection ! stop diff --git a/plugins/local/fci_tc_bi/pt2_tc.irp.f b/plugins/local/fci_tc_bi/pt2_tc.irp.f index 390042bf..3c07e367 100644 --- a/plugins/local/fci_tc_bi/pt2_tc.irp.f +++ b/plugins/local/fci_tc_bi/pt2_tc.irp.f @@ -13,6 +13,8 @@ program tc_pt2_prog pruning = -1.d0 touch pruning + read_wf = .True. + touch read_wf ! pt2_relative_error = 0.01d0 ! touch pt2_relative_error diff --git a/plugins/local/non_h_ints_mu/deb_mos.irp.f b/plugins/local/non_h_ints_mu/deb_mos.irp.f new file mode 100644 index 00000000..26344786 --- /dev/null +++ b/plugins/local/non_h_ints_mu/deb_mos.irp.f @@ -0,0 +1,101 @@ + +! --- + +program deb_mos + + implicit none + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + if(tc_integ_type .eq. "numeric") then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + endif + + call print_mos() + +end + +! --- + +subroutine print_mos() + + implicit none + integer :: i, ipoint + double precision :: r(3) + double precision :: mo_val, mo_der(3), mo_lap + + PROVIDE final_grid_points mos_in_r_array mos_grad_in_r_array mos_lapl_in_r_array + +! do ipoint = 1, n_points_final_grid +! r(:) = final_grid_points(:,ipoint) +! print*, r +! enddo +double precision :: accu_vgl(5) +double precision :: accu_vgl_nrm(5) + + do ipoint = 1, n_points_final_grid + r(1) = final_grid_points(1,i) + r(2) = final_grid_points(2,i) + r(3) = final_grid_points(3,i) + write(1111, '(5(f15.7, 3X))') r + do i = 1, mo_num + mo_val = mos_in_r_array (i,ipoint) + mo_der(:) = mos_grad_in_r_array(i,ipoint,:) + mo_lap = mos_lapl_in_r_array(i,ipoint,1) + mos_lapl_in_r_array(i,ipoint,2) + mos_lapl_in_r_array(i,ipoint,3) + write(1111, '(5(f15.7, 3X))') mo_val, mo_der, mo_lap + enddo + enddo + + do ipoint = 1, n_points_final_grid + r(1) = final_grid_points(1,i) + r(2) = final_grid_points(2,i) + r(3) = final_grid_points(3,i) + write(2222, '(5(f15.7, 3X))') r + do i = 1, mo_num + mo_val = mos_in_r_array_qmckl (i,ipoint) + mo_der(:) = mos_grad_in_r_array_qmckl(i,ipoint,:) + mo_lap = mos_lapl_in_r_array_qmckl(i,ipoint) + write(2222, '(5(f15.7, 3X))') mo_val, mo_der, mo_lap + enddo + enddo + + accu_vgl = 0.d0 + accu_vgl_nrm = 0.d0 + do ipoint = 1, n_points_final_grid + do i = 1, mo_num + mo_val = mos_in_r_array (i,ipoint) + mo_der(:) = mos_grad_in_r_array(i,ipoint,:) + mo_lap = mos_lapl_in_r_array(i,ipoint,1) + mos_lapl_in_r_array(i,ipoint,2) + mos_lapl_in_r_array(i,ipoint,3) + accu_vgl_nrm(1) += dabs(mo_val) + accu_vgl_nrm(2) += dabs(mo_der(1)) + accu_vgl_nrm(3) += dabs(mo_der(2)) + accu_vgl_nrm(4) += dabs(mo_der(3)) + accu_vgl_nrm(5) += dabs(mo_lap) + + mo_val -= mos_in_r_array_qmckl (i,ipoint) + mo_der(:) -= mos_grad_in_r_array_qmckl(i,ipoint,:) + mo_lap -= mos_lapl_in_r_array_qmckl(i,ipoint) + accu_vgl(1) += dabs(mo_val) + accu_vgl(2) += dabs(mo_der(1)) + accu_vgl(3) += dabs(mo_der(2)) + accu_vgl(4) += dabs(mo_der(3)) + accu_vgl(5) += dabs(mo_lap) + enddo + + enddo + accu_vgl(:) *= 1.d0 / accu_vgl_nrm(:) + print *, accu_vgl + + return +end + +! --- + diff --git a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f index db06e835..2c41b535 100644 --- a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f +++ b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f @@ -340,8 +340,8 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) endif tmp1 = double_p(mpA) * f1A_power(mpA-1) * f2A_power(npA) + double_p(npA) * f1A_power(npA-1) * f2A_power(mpA) - tmp1 = tmp1 * g12_power(opA) - tmp2 = double_p(opA) * g12_power(opA-1) * (f1A_power(mpA) * f2A_power(npA) + f1A_power(npA) * f2A_power(mpA)) + tmp1 = tmp1 * g12_power(opA) * tmp + tmp2 = double_p(opA) * g12_power(opA-1) * (f1A_power(mpA) * f2A_power(npA) + f1A_power(npA) * f2A_power(mpA)) * tmp !tmp1 = 0.d0 !if(mpA .gt. 0) then @@ -356,9 +356,12 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) ! tmp2 = tmp2 + dble(opA) * g12**dble(opA-1) * (f1A**dble(mpA) * f2A**dble(npA) + f1A**dble(npA) * f2A**dble(mpA)) !endif - gradx(jpoint) = gradx(jpoint) + tmp * (tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1)) - grady(jpoint) = grady(jpoint) + tmp * (tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2)) - gradz(jpoint) = gradz(jpoint) + tmp * (tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3)) +! gradx(jpoint) = gradx(jpoint) + tmp * (tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1)) +! grady(jpoint) = grady(jpoint) + tmp * (tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2)) +! gradz(jpoint) = gradz(jpoint) + tmp * (tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3)) + gradx(jpoint) = gradx(jpoint) + tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1) + grady(jpoint) = grady(jpoint) + tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2) + gradz(jpoint) = gradz(jpoint) + tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3) enddo ! p enddo ! i_nucl enddo ! jpoint @@ -864,19 +867,20 @@ subroutine jBH_elem_fct_grad(alpha, r1, r2, fct, grad1_fct) + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) - tmp1 = 1.d0 / (1.d0 + alpha * dist) - fct = alpha * dist * tmp1 - - if(dist .lt. 1d-10) then - grad1_fct(1) = 0.d0 - grad1_fct(2) = 0.d0 - grad1_fct(3) = 0.d0 - else + if(dist .ge. 1d-10) then + tmp1 = 1.d0 / (1.d0 + alpha * dist) + + fct = alpha * dist * tmp1 tmp2 = alpha * tmp1 * tmp1 / dist grad1_fct(1) = tmp2 * (r1(1) - r2(1)) grad1_fct(2) = tmp2 * (r1(2) - r2(2)) grad1_fct(3) = tmp2 * (r1(3) - r2(3)) + else + grad1_fct(1) = 0.d0 + grad1_fct(2) = 0.d0 + grad1_fct(3) = 0.d0 + fct = 0.d0 endif return diff --git a/plugins/local/non_h_ints_mu/qmckl.irp.f b/plugins/local/non_h_ints_mu/qmckl.irp.f index 4d419e24..de440f14 100644 --- a/plugins/local/non_h_ints_mu/qmckl.irp.f +++ b/plugins/local/non_h_ints_mu/qmckl.irp.f @@ -158,7 +158,7 @@ END_PROVIDER double precision, allocatable :: vgl(:,:,:) allocate( vgl(mo_num,5,n_points_final_grid)) - rc = qmckl_get_mo_basis_mo_vgl_inplace(qmckl_ctx, vgl, n_points_final_grid*mo_num*5_8) + rc = qmckl_get_mo_basis_mo_vgl(qmckl_ctx, vgl, n_points_final_grid*mo_num*5_8) if (rc /= QMCKL_SUCCESS) then print *, irp_here, 'qmckl error in get_mo_vgl' rc = qmckl_check(qmckl_ctx, rc) diff --git a/plugins/local/normal_order_old/NEED b/plugins/local/normal_order_old/NEED index 8b137891..e8c8c478 100644 --- a/plugins/local/normal_order_old/NEED +++ b/plugins/local/normal_order_old/NEED @@ -1 +1 @@ - +tc_scf diff --git a/plugins/local/old_delta_tc_qmc/NEED b/plugins/local/old_delta_tc_qmc/NEED new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/plugins/local/old_delta_tc_qmc/NEED @@ -0,0 +1 @@ + diff --git a/plugins/local/old_delta_tc_qmc/README.rst b/plugins/local/old_delta_tc_qmc/README.rst new file mode 100644 index 00000000..1d56f96c --- /dev/null +++ b/plugins/local/old_delta_tc_qmc/README.rst @@ -0,0 +1,4 @@ +================ +old_delta_tc_qmc +================ + diff --git a/plugins/local/tc_bi_ortho/compute_deltamu_right.irp.f b/plugins/local/old_delta_tc_qmc/compute_deltamu_right.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/compute_deltamu_right.irp.f rename to plugins/local/old_delta_tc_qmc/compute_deltamu_right.irp.f diff --git a/plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f b/plugins/local/old_delta_tc_qmc/dressing_vectors_lr.irp.f similarity index 88% rename from plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f rename to plugins/local/old_delta_tc_qmc/dressing_vectors_lr.irp.f index 0aff9980..135f9d17 100644 --- a/plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f +++ b/plugins/local/old_delta_tc_qmc/dressing_vectors_lr.irp.f @@ -27,7 +27,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) call hmat_bi_ortho (psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) delta = 0.d0 @@ -39,7 +39,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I |Htilde | J > - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) ! < I |H | J > call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) @@ -78,7 +78,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta = 0.d0 !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & @@ -88,7 +88,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I |Htilde | J > - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta(i) = delta(i) + psicoef(j) * htc_tot enddo diff --git a/plugins/local/slater_tc/slater_tc.irp.f b/plugins/local/old_delta_tc_qmc/old_delta_tc_qmc.irp.f similarity index 82% rename from plugins/local/slater_tc/slater_tc.irp.f rename to plugins/local/old_delta_tc_qmc/old_delta_tc_qmc.irp.f index 27ab47c5..5ff08bd6 100644 --- a/plugins/local/slater_tc/slater_tc.irp.f +++ b/plugins/local/old_delta_tc_qmc/old_delta_tc_qmc.irp.f @@ -1,4 +1,4 @@ -program slater_tc +program old_delta_tc_qmc implicit none BEGIN_DOC ! TODO : Put the documentation of the program here diff --git a/plugins/local/slater_tc/h_mat_triple.irp.f b/plugins/local/slater_tc/h_mat_triple.irp.f index 6f5697a2..9cb4b60a 100644 --- a/plugins/local/slater_tc/h_mat_triple.irp.f +++ b/plugins/local/slater_tc/h_mat_triple.irp.f @@ -1,196 +1,3 @@ -subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) - use bitmasks - BEGIN_DOC -! returns the array, for each spin, of holes/particles between key_i and key_j -! -! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> - END_DOC - include 'utils/constants.include.F' - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) - double precision, intent(out) :: phase - integer :: ispin,k,i,pos - integer(bit_kind) :: key_hole, key_particle - integer(bit_kind) :: xorvec(N_int_max,2) - holes_array = -1 - particles_array = -1 - degree_array = 0 - do i = 1, N_int - xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) - xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) - degree_array(1) += popcnt(xorvec(i,1)) - degree_array(2) += popcnt(xorvec(i,2)) - enddo - degree_array(1) = shiftr(degree_array(1),1) - degree_array(2) = shiftr(degree_array(2),1) - - do ispin = 1, 2 - k = 1 - !!! GETTING THE HOLES - do i = 1, N_int - key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) - do while(key_hole .ne.0_bit_kind) - pos = trailz(key_hole) - holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_hole = ibclr(key_hole,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_excitation_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo - do ispin = 1, 2 - k = 1 - !!! GETTING THE PARTICLES - do i = 1, N_int - key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) - do while(key_particle .ne.0_bit_kind) - pos = trailz(key_particle) - particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_particle = ibclr(key_particle,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_excitation_general ' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo - integer :: h,p, i_ok - integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) - integer :: exc(0:2,2,2) - double precision :: phase_tmp - allocate(det_i(Nint,2),det_ip(N_int,2)) - det_i = key_i - phase = 1.d0 - do ispin = 1, 2 - do i = 1, degree_array(ispin) - h = holes_array(i,ispin) - p = particles_array(i,ispin) - det_ip = det_i - call do_single_excitation(det_ip,h,p,ispin,i_ok) - if(i_ok == -1)then - print*,'excitation was not possible ' - stop - endif - call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) - phase *= phase_tmp - det_i = det_ip - enddo - enddo - -end - -subroutine get_holes_general(key_i, key_j,Nint, holes_array) - use bitmasks - BEGIN_DOC -! returns the array, per spin, of holes between key_i and key_j -! -! with the following convention: a_{hole}|key_i> --> |key_j> - END_DOC - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: holes_array(100,2) - integer(bit_kind) :: key_hole - integer :: ispin,k,i,pos - holes_array = -1 - do ispin = 1, 2 - k = 1 - do i = 1, N_int - key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) - do while(key_hole .ne.0_bit_kind) - pos = trailz(key_hole) - holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_hole = ibclr(key_hole,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_holes_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo -end - -subroutine get_particles_general(key_i, key_j,Nint,particles_array) - use bitmasks - BEGIN_DOC -! returns the array, per spin, of particles between key_i and key_j -! -! with the following convention: a^dagger_{particle}|key_i> --> |key_j> - END_DOC - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: particles_array(100,2) - integer(bit_kind) :: key_particle - integer :: ispin,k,i,pos - particles_array = -1 - do ispin = 1, 2 - k = 1 - do i = 1, N_int - key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) - do while(key_particle .ne.0_bit_kind) - pos = trailz(key_particle) - particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_particle = ibclr(key_particle,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_holes_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'Those are the two determinants' - call debug_det(key_i, N_int) - call debug_det(key_j, N_int) - print*,'stoping ...' - stop - endif - enddo - enddo - enddo -end - -subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) - implicit none - integer, intent(in) :: degree(2), Nint - integer(bit_kind), intent(in) :: key_i(Nint,2) - integer, intent(in) :: holes_array(100,2),particles_array(100,2) - double precision, intent(out) :: phase - integer :: i,ispin,h,p, i_ok - integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) - integer :: exc(0:2,2,2) - double precision :: phase_tmp - allocate(det_i(Nint,2),det_ip(N_int,2)) - det_i = key_i - phase = 1.d0 - do ispin = 1, 2 - do i = 1, degree(ispin) - h = holes_array(i,ispin) - p = particles_array(i,ispin) - det_ip = det_i - call do_single_excitation(det_ip,h,p,ispin,i_ok) - if(i_ok == -1)then - print*,'excitation was not possible ' - stop - endif - call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) - phase *= phase_tmp - det_i = det_ip - enddo - enddo - -end - subroutine H_tc_s2_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) BEGIN_DOC ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS diff --git a/plugins/local/slater_tc/slater_tc_opt.irp.f b/plugins/local/slater_tc/slater_tc_opt.irp.f index 59efc943..5651a299 100644 --- a/plugins/local/slater_tc/slater_tc_opt.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt.irp.f @@ -181,3 +181,48 @@ end ! --- +subroutine htilde_mu_mat_opt_bi_ortho_no_3e_both(key_j, key_i, Nint, hji,hij) + + BEGIN_DOC + ! + ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis + !! + ! Returns the detail of the matrix element WITHOUT ANY CONTRIBUTION FROM THE THREE ELECTRON TERMS + !! WARNING !! + ! + ! Non hermitian !! + ! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) + double precision, intent(out) :: hji,hij + integer :: degree + + hji = 0.d0 + hij = 0.d0 + + call get_excitation_degree(key_i, key_j, degree, Nint) + if(degree.gt.2) return + + if(degree == 0) then + call diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_i,hji) + hij = hji + else if (degree == 1) then + call single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint,key_j, key_i , hji,hij) + else if(degree == 2) then + call double_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hji,hij) + endif + + if(degree==0) then + hji += nuclear_repulsion + hij += nuclear_repulsion + endif + +end + +! --- + diff --git a/plugins/local/slater_tc/slater_tc_opt_diag.irp.f b/plugins/local/slater_tc/slater_tc_opt_diag.irp.f index 78f9dc66..3c5a5d12 100644 --- a/plugins/local/slater_tc/slater_tc_opt_diag.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt_diag.irp.f @@ -19,13 +19,13 @@ PROVIDE HF_bitmask PROVIDE mo_l_coef mo_r_coef - call diag_htilde_mu_mat_bi_ortho_slow(N_int, HF_bitmask, hmono, htwoe, htot) + call diag_htc_bi_orth_2e_brute(N_int, HF_bitmask, hmono, htwoe, htot) ref_tc_energy_1e = hmono ref_tc_energy_2e = htwoe if(three_body_h_tc) then - call diag_htilde_three_body_ints_bi_ort_slow(N_int, HF_bitmask, hthree) + call diag_htc_bi_orth_3e_brute(N_int, HF_bitmask, hthree) ref_tc_energy_3e = hthree else ref_tc_energy_3e = 0.d0 @@ -524,3 +524,310 @@ end ! --- +subroutine diag_htc_bi_orth_2e_brute(Nint, key_i, hmono, htwoe, htot) + + BEGIN_DOC + ! + ! diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS + ! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + double precision, intent(out) :: hmono,htwoe,htot + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk + double precision :: get_mo_two_e_integral_tc_int + integer(bit_kind) :: key_i_core(Nint,2) + + PROVIDE mo_bi_ortho_tc_two_e + + hmono = 0.d0 + htwoe = 0.d0 + htot = 0.d0 + + call bitstring_to_list_ab(key_i, occ, Ne, Nint) + + do ispin = 1, 2 + do i = 1, Ne(ispin) + ii = occ(i,ispin) + hmono += mo_bi_ortho_tc_one_e(ii,ii) + enddo + enddo + + ! alpha/beta two-body + ispin = 1 + jspin = 2 + do i = 1, Ne(ispin) ! electron 1 (so it can be associated to mu(r1)) + ii = occ(i,ispin) + do j = 1, Ne(jspin) ! electron 2 + jj = occ(j,jspin) + htwoe += mo_bi_ortho_tc_two_e(jj,ii,jj,ii) + enddo + enddo + + ! alpha/alpha two-body + do i = 1, Ne(ispin) + ii = occ(i,ispin) + do j = i+1, Ne(ispin) + jj = occ(j,ispin) + htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) + enddo + enddo + + ! beta/beta two-body + do i = 1, Ne(jspin) + ii = occ(i,jspin) + do j = i+1, Ne(jspin) + jj = occ(j,jspin) + htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) + enddo + enddo + + htot = hmono + htwoe + +end + +! --- + +subroutine diag_htc_bi_orth_3e_brute(Nint, key_i, hthree) + + BEGIN_DOC + ! diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + double precision, intent(out) :: hthree + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2),i,j,ii,jj,ispin,jspin,m,mm + integer(bit_kind) :: key_i_core(Nint,2) + double precision :: direct_int, exchange_int, ref + double precision, external :: sym_3_e_int_from_6_idx_tensor + double precision, external :: three_e_diag_parrallel_spin + + PROVIDE mo_l_coef mo_r_coef + + if(core_tc_op) then + do i = 1, Nint + key_i_core(i,1) = xor(key_i(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(key_i(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, Nint) + else + call bitstring_to_list_ab(key_i, occ, Ne, Nint) + endif + + hthree = 0.d0 + + if((Ne(1)+Ne(2)) .ge. 3) then + + ! alpha/alpha/beta three-body + do i = 1, Ne(1) + ii = occ(i,1) + do j = i+1, Ne(1) + jj = occ(j,1) + do m = 1, Ne(2) + mm = occ(m,2) + !direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) !uses the 6-idx tensor + !exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) !uses the 6-idx tensor + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) !uses 3-idx tensor + exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) !uses 3-idx tensor + hthree += direct_int - exchange_int + enddo + enddo + enddo + + ! beta/beta/alpha three-body + do i = 1, Ne(2) + ii = occ(i,2) + do j = i+1, Ne(2) + jj = occ(j,2) + do m = 1, Ne(1) + mm = occ(m,1) + !direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) !uses the 6-idx tensor + !exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) !uses the 6-idx tensor + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) + exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) + hthree += direct_int - exchange_int + enddo + enddo + enddo + + ! alpha/alpha/alpha three-body + do i = 1, Ne(1) + ii = occ(i,1) ! 1 + do j = i+1, Ne(1) + jj = occ(j,1) ! 2 + do m = j+1, Ne(1) + mm = occ(m,1) ! 3 + !hthree += sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) !uses the 6 idx tensor + hthree += three_e_diag_parrallel_spin(mm,jj,ii) !uses only 3-idx tensors + enddo + enddo + enddo + + ! beta/beta/beta three-body + do i = 1, Ne(2) + ii = occ(i,2) ! 1 + do j = i+1, Ne(2) + jj = occ(j,2) ! 2 + do m = j+1, Ne(2) + mm = occ(m,2) ! 3 + !hthree += sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) !uses the 6 idx tensor + hthree += three_e_diag_parrallel_spin(mm,jj,ii) !uses only 3-idx tensors + enddo + enddo + enddo + + endif + +end + + + +BEGIN_PROVIDER [ double precision, three_e_diag_parrallel_spin_prov, (mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator ON A BI ORTHONORMAL BASIS + ! + ! three_e_diag_parrallel_spin_prov(m,j,i) = All combinations of the form for same spin matrix elements + ! + ! notice the -1 sign: in this way three_e_diag_parrallel_spin_prov can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, m + double precision :: integral, wall1, wall0, three_e_diag_parrallel_spin + + three_e_diag_parrallel_spin_prov = 0.d0 + print *, ' Providing the three_e_diag_parrallel_spin_prov ...' + + integral = three_e_diag_parrallel_spin(1,1,1) ! to provide all stuffs + call wall_time(wall0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,m,integral) & + !$OMP SHARED (mo_num,three_e_diag_parrallel_spin_prov) + !$OMP DO SCHEDULE (dynamic) + do i = 1, mo_num + do j = 1, mo_num + do m = j, mo_num + three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin(m,j,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + do i = 1, mo_num + do j = 1, mo_num + do m = 1, j + three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin_prov(j,m,i) + enddo + enddo + enddo + + call wall_time(wall1) + print *, ' wall time for three_e_diag_parrallel_spin_prov', wall1 - wall0 + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, three_e_single_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_single_parrallel_spin_prov(m,j,k,i) = All combination of for same spin matrix elements + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0, three_e_single_parrallel_spin + + three_e_single_parrallel_spin_prov = 0.d0 + print *, ' Providing the three_e_single_parrallel_spin_prov ...' + + integral = three_e_single_parrallel_spin(1,1,1,1) + call wall_time(wall0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_single_parrallel_spin_prov) + !$OMP DO SCHEDULE (dynamic) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_single_parrallel_spin_prov(m,j,k,i) = three_e_single_parrallel_spin(m,j,k,i) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_single_parrallel_spin_prov', wall1 - wall0 + +END_PROVIDER + + +! --- + +BEGIN_PROVIDER [ double precision, three_e_double_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_double_parrallel_spin_prov(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0, three_e_double_parrallel_spin + + three_e_double_parrallel_spin_prov = 0.d0 + print *, ' Providing the three_e_double_parrallel_spin_prov ...' + call wall_time(wall0) + + integral = three_e_double_parrallel_spin(1,1,1,1,1) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_double_parrallel_spin_prov) + !$OMP DO SCHEDULE (dynamic) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + three_e_double_parrallel_spin_prov(m,l,j,k,i) = three_e_double_parrallel_spin(m,l,j,k,i) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_double_parrallel_spin_prov', wall1 - wall0 + +END_PROVIDER + diff --git a/plugins/local/slater_tc/slater_tc_opt_double.irp.f b/plugins/local/slater_tc/slater_tc_opt_double.irp.f index 4067473c..181ae11d 100644 --- a/plugins/local/slater_tc/slater_tc_opt_double.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt_double.irp.f @@ -505,3 +505,63 @@ subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot) end +subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hji,hij) + + BEGIN_DOC + ! and for double excitation ONLY FOR ONE- AND TWO-BODY TERMS + !! + !! WARNING !! + ! + ! Non hermitian !! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2) + double precision, intent(out) :: hji,hij + double precision :: hmono, htwoe_ji, htwoe_ij + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk + integer :: degree,exc(0:2,2,2) + integer :: h1, p1, h2, p2, s1, s2 + double precision :: get_mo_two_e_integral_tc_int,phase + + + call get_excitation_degree(key_i, key_j, degree, Nint) + + hmono = 0.d0 + htwoe_ji = 0.d0 + htwoe_ij = 0.d0 + hji = 0.d0 + hij = 0.d0 + + if(degree.ne.2)then + return + endif + integer :: degree_i,degree_j + call get_excitation_degree(ref_bitmask,key_i,degree_i,N_int) + call get_excitation_degree(ref_bitmask,key_j,degree_j,N_int) + call get_double_excitation(key_i, key_j, exc, phase, Nint) + call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) + + if(s1.ne.s2)then + ! opposite spin two-body + htwoe_ji = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + htwoe_ij = mo_bi_ortho_tc_two_e_transp(p2,p1,h2,h1) + else + ! same spin two-body + ! direct terms + htwoe_ji = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + htwoe_ij = mo_bi_ortho_tc_two_e_transp(p2,p1,h2,h1) + ! exchange terms + htwoe_ji -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) + htwoe_ij -= mo_bi_ortho_tc_two_e_transp(p1,p2,h2,h1) + endif + htwoe_ji *= phase + hji = htwoe_ji + htwoe_ij *= phase + hij = htwoe_ij + +end diff --git a/plugins/local/slater_tc/slater_tc_opt_single.irp.f b/plugins/local/slater_tc/slater_tc_opt_single.irp.f index e57cb05c..47bcbe34 100644 --- a/plugins/local/slater_tc/slater_tc_opt_single.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt_single.irp.f @@ -618,3 +618,145 @@ subroutine get_single_excitation_from_fock_tc_no_3e(Nint, key_i, key_j, h, p, sp end + +subroutine single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hji,hij) + + BEGIN_DOC + ! and for single excitation ONLY FOR ONE- AND TWO-BODY TERMS + !! + !! WARNING !! + ! + ! Non hermitian !! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2) + double precision, intent(out) :: hji,hij + + double precision :: hmono, htwoe + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk + integer :: degree,exc(0:2,2,2) + integer :: h1, p1, h2, p2, s1, s2 + double precision :: get_mo_two_e_integral_tc_int, phase + double precision :: direct_int, exchange_int_12, exchange_int_23, exchange_int_13 + integer :: other_spin(2) + integer(bit_kind) :: key_j_core(Nint,2), key_i_core(Nint,2) + + other_spin(1) = 2 + other_spin(2) = 1 + + hmono = 0.d0 + htwoe = 0.d0 + hji = 0.d0 + hij = 0.d0 + call get_excitation_degree(key_i, key_j, degree, Nint) + if(degree.ne.1)then + return + endif + call bitstring_to_list_ab(key_i, occ, Ne, Nint) + + call get_single_excitation(key_i, key_j, exc, phase, Nint) + call decode_exc(exc,1,h1,p1,h2,p2,s1,s2) + call get_single_excitation_from_fock_tc_no_3e_both(Nint, key_i, key_j, h1, p1, s1, phase, hji,hij) + +end + +! --- + +subroutine get_single_excitation_from_fock_tc_no_3e_both(Nint, key_i, key_j, h, p, spin, phase, hji,hij) + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer, intent(in) :: h, p, spin + double precision, intent(in) :: phase + integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) + double precision, intent(out) :: hji,hij + double precision :: hmono_ji,htwoe_ji + double precision :: hmono_ij,htwoe_ij + + integer(bit_kind) :: differences(Nint,2) + integer(bit_kind) :: hole(Nint,2) + integer(bit_kind) :: partcl(Nint,2) + integer :: occ_hole(Nint*bit_kind_size,2) + integer :: occ_partcl(Nint*bit_kind_size,2) + integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2) + integer :: i0,i + double precision :: buffer_c_ji(mo_num), buffer_x_ji(mo_num) + double precision :: buffer_c_ij(mo_num), buffer_x_ij(mo_num) + + do i = 1, mo_num + buffer_c_ji(i) = tc_2e_3idx_coulomb_integrals(i,p,h) + buffer_x_ji(i) = tc_2e_3idx_exchange_integrals(i,p,h) + buffer_c_ij(i) = tc_2e_3idx_coulomb_integrals_transp(i,p,h) + buffer_x_ij(i) = tc_2e_3idx_exchange_integrals_transp(i,p,h) + enddo + + do i = 1, Nint + differences(i,1) = xor(key_i(i,1),ref_closed_shell_bitmask(i,1)) + differences(i,2) = xor(key_i(i,2),ref_closed_shell_bitmask(i,2)) + hole(i,1) = iand(differences(i,1),ref_closed_shell_bitmask(i,1)) + hole(i,2) = iand(differences(i,2),ref_closed_shell_bitmask(i,2)) + partcl(i,1) = iand(differences(i,1),key_i(i,1)) + partcl(i,2) = iand(differences(i,2),key_i(i,2)) + enddo + + call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, Nint) + call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, Nint) + hmono_ji = mo_bi_ortho_tc_one_e(p,h) + htwoe_ji = fock_op_2_e_tc_closed_shell(p,h) + hmono_ij = mo_bi_ortho_tc_one_e(h,p) + htwoe_ij = fock_op_2_e_tc_closed_shell(h,p) + + ! holes :: direct terms + do i0 = 1, n_occ_ab_hole(1) + i = occ_hole(i0,1) + htwoe_ji -= buffer_c_ji(i) + htwoe_ij -= buffer_c_ij(i) + enddo + do i0 = 1, n_occ_ab_hole(2) + i = occ_hole(i0,2) + htwoe_ji -= buffer_c_ji(i) + htwoe_ij -= buffer_c_ij(i) + enddo + + ! holes :: exchange terms + do i0 = 1, n_occ_ab_hole(spin) + i = occ_hole(i0,spin) + htwoe_ji += buffer_x_ji(i) + htwoe_ij += buffer_x_ij(i) + enddo + + ! particles :: direct terms + do i0 = 1, n_occ_ab_partcl(1) + i = occ_partcl(i0,1) + htwoe_ji += buffer_c_ji(i) + htwoe_ij += buffer_c_ij(i) + enddo + do i0 = 1, n_occ_ab_partcl(2) + i = occ_partcl(i0,2) + htwoe_ji += buffer_c_ji(i) + htwoe_ij += buffer_c_ij(i) + enddo + + ! particles :: exchange terms + do i0 = 1, n_occ_ab_partcl(spin) + i = occ_partcl(i0,spin) + htwoe_ji -= buffer_x_ji(i) + htwoe_ij -= buffer_x_ij(i) + enddo + htwoe_ji = htwoe_ji * phase + hmono_ji = hmono_ji * phase + hji = htwoe_ji + hmono_ji + + htwoe_ij = htwoe_ij * phase + hmono_ij = hmono_ij * phase + hij = htwoe_ij + hmono_ij + +end + diff --git a/plugins/local/slater_tc/symmetrized_3_e_int_prov.irp.f b/plugins/local/slater_tc/symmetrized_3_e_int_prov.irp.f deleted file mode 100644 index e8277a74..00000000 --- a/plugins/local/slater_tc/symmetrized_3_e_int_prov.irp.f +++ /dev/null @@ -1,140 +0,0 @@ - -BEGIN_PROVIDER [ double precision, three_e_diag_parrallel_spin_prov, (mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator ON A BI ORTHONORMAL BASIS - ! - ! three_e_diag_parrallel_spin_prov(m,j,i) = All combinations of the form for same spin matrix elements - ! - ! notice the -1 sign: in this way three_e_diag_parrallel_spin_prov can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, m - double precision :: integral, wall1, wall0, three_e_diag_parrallel_spin - - three_e_diag_parrallel_spin_prov = 0.d0 - print *, ' Providing the three_e_diag_parrallel_spin_prov ...' - - integral = three_e_diag_parrallel_spin(1,1,1) ! to provide all stuffs - call wall_time(wall0) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,m,integral) & - !$OMP SHARED (mo_num,three_e_diag_parrallel_spin_prov) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num - do j = 1, mo_num - do m = j, mo_num - three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin(m,j,i) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - do i = 1, mo_num - do j = 1, mo_num - do m = 1, j - three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin_prov(j,m,i) - enddo - enddo - enddo - - call wall_time(wall1) - print *, ' wall time for three_e_diag_parrallel_spin_prov', wall1 - wall0 - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, three_e_single_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_single_parrallel_spin_prov(m,j,k,i) = All combination of for same spin matrix elements - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0, three_e_single_parrallel_spin - - three_e_single_parrallel_spin_prov = 0.d0 - print *, ' Providing the three_e_single_parrallel_spin_prov ...' - - integral = three_e_single_parrallel_spin(1,1,1,1) - call wall_time(wall0) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_single_parrallel_spin_prov) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_single_parrallel_spin_prov(m,j,k,i) = three_e_single_parrallel_spin(m,j,k,i) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_single_parrallel_spin_prov', wall1 - wall0 - -END_PROVIDER - - -! --- - -BEGIN_PROVIDER [ double precision, three_e_double_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_double_parrallel_spin_prov(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0, three_e_double_parrallel_spin - - three_e_double_parrallel_spin_prov = 0.d0 - print *, ' Providing the three_e_double_parrallel_spin_prov ...' - call wall_time(wall0) - - integral = three_e_double_parrallel_spin(1,1,1,1,1) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_double_parrallel_spin_prov) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - three_e_double_parrallel_spin_prov(m,l,j,k,i) = three_e_double_parrallel_spin(m,l,j,k,i) - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_double_parrallel_spin_prov', wall1 - wall0 - -END_PROVIDER - diff --git a/plugins/local/slater_tc_no_opt/.gitignore b/plugins/local/slater_tc_no_opt/.gitignore new file mode 100644 index 00000000..1561915b --- /dev/null +++ b/plugins/local/slater_tc_no_opt/.gitignore @@ -0,0 +1,59 @@ +IRPF90_temp/ +IRPF90_man/ +build.ninja +irpf90.make +ezfio_interface.irp.f +irpf90_entities +tags +Makefile +ao_basis +ao_one_e_ints +ao_two_e_erf_ints +ao_two_e_ints +aux_quantities +becke_numerical_grid +bitmask +cis +cisd +cipsi +davidson +davidson_dressed +davidson_undressed +density_for_dft +determinants +dft_keywords +dft_utils_in_r +dft_utils_one_e +dft_utils_two_body +dressing +dummy +electrons +ezfio_files +fci +generators_cas +generators_full +hartree_fock +iterations +kohn_sham +kohn_sham_rs +mo_basis +mo_guess +mo_one_e_ints +mo_two_e_erf_ints +mo_two_e_ints +mpi +mrpt_utils +nuclei +perturbation +pseudo +psiref_cas +psiref_utils +scf_utils +selectors_cassd +selectors_full +selectors_utils +single_ref_method +slave +tools +utils +zmq diff --git a/plugins/local/slater_tc_no_opt/NEED b/plugins/local/slater_tc_no_opt/NEED new file mode 100644 index 00000000..a8669866 --- /dev/null +++ b/plugins/local/slater_tc_no_opt/NEED @@ -0,0 +1,8 @@ +determinants +normal_order_old +bi_ort_ints +bi_ortho_mos +tc_keywords +non_hermit_dav +dav_general_mat +tc_scf diff --git a/plugins/local/slater_tc_no_opt/README.rst b/plugins/local/slater_tc_no_opt/README.rst new file mode 100644 index 00000000..90679e4c --- /dev/null +++ b/plugins/local/slater_tc_no_opt/README.rst @@ -0,0 +1,4 @@ +================ +slater_tc_no_opt +================ + diff --git a/plugins/local/slater_tc/h_biortho.irp.f b/plugins/local/slater_tc_no_opt/h_biortho.irp.f similarity index 100% rename from plugins/local/slater_tc/h_biortho.irp.f rename to plugins/local/slater_tc_no_opt/h_biortho.irp.f diff --git a/plugins/local/slater_tc/h_tc_bi_ortho_psi.irp.f b/plugins/local/slater_tc_no_opt/h_tc_bi_ortho_psi.irp.f similarity index 100% rename from plugins/local/slater_tc/h_tc_bi_ortho_psi.irp.f rename to plugins/local/slater_tc_no_opt/h_tc_bi_ortho_psi.irp.f diff --git a/plugins/local/slater_tc/slater_tc_3e_slow.irp.f b/plugins/local/slater_tc_no_opt/slater_tc_3e_slow.irp.f similarity index 99% rename from plugins/local/slater_tc/slater_tc_3e_slow.irp.f rename to plugins/local/slater_tc_no_opt/slater_tc_3e_slow.irp.f index cb33d343..f7919653 100644 --- a/plugins/local/slater_tc/slater_tc_3e_slow.irp.f +++ b/plugins/local/slater_tc_no_opt/slater_tc_3e_slow.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) +subroutine diag_htc_bi_orth_3e_brute(Nint, key_i, hthree) BEGIN_DOC ! diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS diff --git a/plugins/local/slater_tc_no_opt/slater_tc_no_opt.irp.f b/plugins/local/slater_tc_no_opt/slater_tc_no_opt.irp.f new file mode 100644 index 00000000..0fcc587f --- /dev/null +++ b/plugins/local/slater_tc_no_opt/slater_tc_no_opt.irp.f @@ -0,0 +1,7 @@ +program slater_tc_no_opt + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + print *, 'Hello world' +end diff --git a/plugins/local/slater_tc/slater_tc_slow.irp.f b/plugins/local/slater_tc_no_opt/slater_tc_slow.irp.f similarity index 80% rename from plugins/local/slater_tc/slater_tc_slow.irp.f rename to plugins/local/slater_tc_no_opt/slater_tc_slow.irp.f index caf7d665..b06fd12f 100644 --- a/plugins/local/slater_tc/slater_tc_slow.irp.f +++ b/plugins/local/slater_tc_no_opt/slater_tc_slow.irp.f @@ -61,7 +61,7 @@ subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, if(degree.gt.2) return if(degree == 0) then - call diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) + call diag_htc_bi_orth_2e_brute(Nint, key_i, hmono, htwoe, htot) else if (degree == 1) then call single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) else if(degree == 2) then @@ -76,7 +76,7 @@ subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, else if((degree == 1) .and. (elec_num .gt. 2) .and. three_e_4_idx_term) then call single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) else if((degree == 0) .and. (elec_num .gt. 2) .and. three_e_3_idx_term) then - call diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) + call diag_htc_bi_orth_3e_brute(Nint, key_i, hthree) endif endif @@ -95,75 +95,6 @@ end ! --- -subroutine diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) - - BEGIN_DOC - ! - ! diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS - ! - END_DOC - - use bitmasks - - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_i(Nint,2) - double precision, intent(out) :: hmono,htwoe,htot - integer :: occ(Nint*bit_kind_size,2) - integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk - double precision :: get_mo_two_e_integral_tc_int - integer(bit_kind) :: key_i_core(Nint,2) - - PROVIDE mo_bi_ortho_tc_two_e - - hmono = 0.d0 - htwoe = 0.d0 - htot = 0.d0 - - call bitstring_to_list_ab(key_i, occ, Ne, Nint) - - do ispin = 1, 2 - do i = 1, Ne(ispin) - ii = occ(i,ispin) - hmono += mo_bi_ortho_tc_one_e(ii,ii) - enddo - enddo - - ! alpha/beta two-body - ispin = 1 - jspin = 2 - do i = 1, Ne(ispin) ! electron 1 (so it can be associated to mu(r1)) - ii = occ(i,ispin) - do j = 1, Ne(jspin) ! electron 2 - jj = occ(j,jspin) - htwoe += mo_bi_ortho_tc_two_e(jj,ii,jj,ii) - enddo - enddo - - ! alpha/alpha two-body - do i = 1, Ne(ispin) - ii = occ(i,ispin) - do j = i+1, Ne(ispin) - jj = occ(j,ispin) - htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) - enddo - enddo - - ! beta/beta two-body - do i = 1, Ne(jspin) - ii = occ(i,jspin) - do j = i+1, Ne(jspin) - jj = occ(j,jspin) - htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) - enddo - enddo - - htot = hmono + htwoe - -end - -! --- - subroutine double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) BEGIN_DOC diff --git a/plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f b/plugins/local/slater_tc_no_opt/test_tc_bi_ortho.irp.f similarity index 96% rename from plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f rename to plugins/local/slater_tc_no_opt/test_tc_bi_ortho.irp.f index 369efd15..559c0200 100644 --- a/plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/plugins/local/slater_tc_no_opt/test_tc_bi_ortho.irp.f @@ -88,7 +88,7 @@ subroutine test_slater_tc_opt i_count = 0.d0 do i = 1, N_det do j = 1,N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hnewmono, hnewtwoe, hnewthree, hnewtot) if(dabs(htot).gt.1.d-15)then i_count += 1.D0 @@ -124,7 +124,7 @@ subroutine timing_tot do j = 1, N_det ! call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int) i_count += 1.d0 - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -171,7 +171,7 @@ subroutine timing_diag do i = 1, N_det do j = i,i i_count += 1.d0 - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -208,7 +208,7 @@ subroutine timing_single if(degree.ne.1)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo @@ -250,7 +250,7 @@ subroutine timing_double if(degree.ne.2)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo diff --git a/plugins/local/spher_harm/.gitignore b/plugins/local/spher_harm/.gitignore new file mode 100644 index 00000000..1561915b --- /dev/null +++ b/plugins/local/spher_harm/.gitignore @@ -0,0 +1,59 @@ +IRPF90_temp/ +IRPF90_man/ +build.ninja +irpf90.make +ezfio_interface.irp.f +irpf90_entities +tags +Makefile +ao_basis +ao_one_e_ints +ao_two_e_erf_ints +ao_two_e_ints +aux_quantities +becke_numerical_grid +bitmask +cis +cisd +cipsi +davidson +davidson_dressed +davidson_undressed +density_for_dft +determinants +dft_keywords +dft_utils_in_r +dft_utils_one_e +dft_utils_two_body +dressing +dummy +electrons +ezfio_files +fci +generators_cas +generators_full +hartree_fock +iterations +kohn_sham +kohn_sham_rs +mo_basis +mo_guess +mo_one_e_ints +mo_two_e_erf_ints +mo_two_e_ints +mpi +mrpt_utils +nuclei +perturbation +pseudo +psiref_cas +psiref_utils +scf_utils +selectors_cassd +selectors_full +selectors_utils +single_ref_method +slave +tools +utils +zmq diff --git a/plugins/local/spher_harm/NEED b/plugins/local/spher_harm/NEED new file mode 100644 index 00000000..92df7f12 --- /dev/null +++ b/plugins/local/spher_harm/NEED @@ -0,0 +1 @@ +dft_utils_in_r diff --git a/plugins/local/spher_harm/README.rst b/plugins/local/spher_harm/README.rst new file mode 100644 index 00000000..9c9b12a6 --- /dev/null +++ b/plugins/local/spher_harm/README.rst @@ -0,0 +1,7 @@ +========== +spher_harm +========== + +Routines for spherical Harmonics evaluation in real space. +The main routine is "spher_harm_func_r3(r,l,m,re_ylm, im_ylm)". +The test routine is "test_spher_harm" where everything is explained in details. diff --git a/plugins/local/spher_harm/assoc_gaus_pol.irp.f b/plugins/local/spher_harm/assoc_gaus_pol.irp.f new file mode 100644 index 00000000..fa790307 --- /dev/null +++ b/plugins/local/spher_harm/assoc_gaus_pol.irp.f @@ -0,0 +1,50 @@ +double precision function plgndr(l,m,x) + integer, intent(in) :: l,m + double precision, intent(in) :: x + BEGIN_DOC + ! associated Legenre polynom P_l,m(x). Used for the Y_lm(theta,phi) + ! Taken from https://iate.oac.uncor.edu/~mario/materia/nr/numrec/f6-8.pdf + END_DOC + integer :: i,ll + double precision :: fact,pll,pmm,pmmp1,somx2 + if(m.lt.0.or.m.gt.l.or.dabs(x).gt.1.d0)then + print*,'bad arguments in plgndr' + pause + endif + pmm=1.d0 + if(m.gt.0) then + somx2=dsqrt((1.d0-x)*(1.d0+x)) + fact=1.d0 + do i=1,m + pmm=-pmm*fact*somx2 + fact=fact+2.d0 + enddo + endif ! m > 0 + if(l.eq.m) then + plgndr=pmm + else + pmmp1=x*(2*m+1)*pmm ! Compute P_m+1^m + if(l.eq.m+1) then + plgndr=pmmp1 + else ! Compute P_l^m, l> m+1 + do ll=m+2,l + pll=(x*dble(2*ll-1)*pmmp1-dble(ll+m-1)*pmm)/(ll-m) + pmm=pmmp1 + pmmp1=pll + enddo + plgndr=pll + endif ! l.eq.m+1 + endif ! l.eq.m + return +end + +double precision function ortho_assoc_gaus_pol(l1,m1,l2) + implicit none + integer, intent(in) :: l1,m1,l2 + double precision :: fact + if(l1.ne.l2)then + ortho_assoc_gaus_pol= 0.d0 + else + ortho_assoc_gaus_pol = 2.d0*fact(l1+m1) / (dble(2*l1+1)*fact(l1-m1)) + endif +end diff --git a/plugins/local/spher_harm/routines_test.irp.f b/plugins/local/spher_harm/routines_test.irp.f new file mode 100644 index 00000000..fe8fc422 --- /dev/null +++ b/plugins/local/spher_harm/routines_test.irp.f @@ -0,0 +1,231 @@ +subroutine test_spher_harm + implicit none + BEGIN_DOC + ! routine to test the generic spherical harmonics routine "spher_harm_func_r3" from R^3 --> C + ! + ! We test = delta_m1,m2 delta_l1,l2 + ! + ! The test is done through the integration on a sphere with the Lebedev grid. + END_DOC + include 'constants.include.F' + integer :: l1,m1,i,l2,m2,lmax + double precision :: r(3),weight,accu_re, accu_im,accu + double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 + double precision :: theta,phi,r_abs + lmax = 5 ! Maximum angular momentum until which we are going to test orthogonality conditions + do l1 = 0,lmax + do m1 = -l1 ,l1 + do l2 = 0,lmax + do m2 = -l2 ,l2 + accu_re = 0.d0 ! accumulator for the REAL part of + accu_im = 0.d0 ! accumulator for the IMAGINARY part of + accu = 0.d0 ! accumulator for the weights ==> should be \int dOmega == 4 pi + ! = \int dOmega Y_l1,m1^* Y_l2,m2 + ! \approx \sum_i W_i Y_l1,m1^*(r_i) Y_l2,m2(r_i) WITH r_i being on the spher of radius 1 + do i = 1, n_points_integration_angular + r(1:3) = angular_quadrature_points(i,1:3) ! ith Lebedev point (x,y,z) on the sphere of radius 1 + weight = weights_angular_points(i) ! associated Lebdev weight not necessarily positive + +!!!!!!!!!!! Test of the Cartesian --> Spherical coordinates + ! theta MUST belong to [0,pi] and phi to [0,2pi] + ! gets the cartesian to spherical change of coordinates + call cartesian_to_spherical(r,theta,phi,r_abs) + if(theta.gt.pi.or.theta.lt.0.d0)then + print*,'pb with theta, it should be in [0,pi]',theta + print*,r + endif + if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then + print*,'pb with phi, it should be in [0,2 pi]',phi/pi + print*,r + endif + +!!!!!!!!!!! Routines returning the Spherical harmonics on the grid point + call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) + call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) + +!!!!!!!!!!! Integration of Y_l1,m1^*(r) Y_l2,m2(r) + ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) + ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + enddo + ! Test that the sum of the weights is 4 pi + if(dabs(accu - dfour_pi).gt.1.d-6)then + print*,'Problem !! The sum of the Lebedev weight is not 4 pi ..' + print*,accu + stop + endif + ! Test for the delta l1,l2 and delta m1,m2 + ! + ! Test for the off-diagonal part of the Kronecker delta + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + ! Test for the diagonal part of the Kronecker delta + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + enddo + enddo + enddo + enddo +end + +subroutine test_cart + implicit none + BEGIN_DOC + ! test for the cartesian --> spherical change of coordinates + ! + ! test the routine "cartesian_to_spherical" such that the polar angle theta ranges in [0,pi] + ! + ! and the asymuthal angle phi ranges in [0,2pi] + END_DOC + include 'constants.include.F' + double precision :: r(3),theta,phi,r_abs + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi +end + + +subroutine test_brutal_spheric + implicit none + include 'constants.include.F' + BEGIN_DOC + ! Test for the = delta_m1,m2 delta_l1,l2 using the following two dimentional integration + ! + ! \int_0^2pi d Phi \int_-1^+1 d(cos(Theta)) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) + ! + != \int_0^2pi d Phi \int_0^pi dTheta sin(Theta) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) + ! + ! Allows to test for the general functions "spher_harm_func_m_pos" with "spher_harm_func_expl" + END_DOC + integer :: itheta, iphi,ntheta,nphi + double precision :: theta_min, theta_max, dtheta,theta + double precision :: phi_min, phi_max, dphi,phi + double precision :: accu_re, accu_im,weight + double precision :: re_ylm_1, im_ylm_1 ,re_ylm_2, im_ylm_2,accu + integer :: l1,m1,i,l2,m2,lmax + phi_min = 0.d0 + phi_max = 2.D0 * pi + theta_min = 0.d0 + theta_max = 1.D0 * pi + ntheta = 1000 + nphi = 1000 + dphi = (phi_max - phi_min)/dble(nphi) + dtheta = (theta_max - theta_min)/dble(ntheta) + + lmax = 2 + do l1 = 0,lmax + do m1 = 0 ,l1 + do l2 = 0,lmax + do m2 = 0 ,l2 + accu_re = 0.d0 + accu_im = 0.d0 + accu = 0.d0 + theta = theta_min + do itheta = 1, ntheta + phi = phi_min + do iphi = 1, nphi +! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) +! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) + call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + weight = dtheta * dphi * dsin(theta) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + phi += dphi + enddo + theta += dtheta + enddo + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + print*,'accu = ',accu + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + endif + endif + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + endif + endif + enddo + enddo + enddo + enddo + + +end + +subroutine test_assoc_leg_pol + implicit none + BEGIN_DOC +! Test for the associated Legendre Polynoms. The test is done through the orthogonality condition. + END_DOC + print *, 'Hello world' + integer :: l1,m1,ngrid,i,l2,m2 + l1 = 0 + m1 = 0 + l2 = 2 + m2 = 0 + double precision :: x, dx,xmax,accu,xmin + double precision :: plgndr,func_1,func_2,ortho_assoc_gaus_pol + ngrid = 100000 + xmax = 1.d0 + xmin = -1.d0 + dx = (xmax-xmin)/dble(ngrid) + do l2 = 0,10 + x = xmin + accu = 0.d0 + do i = 1, ngrid + func_1 = plgndr(l1,m1,x) + func_2 = plgndr(l2,m2,x) + write(33,*)x, func_1,func_2 + accu += func_1 * func_2 * dx + x += dx + enddo + print*,'l2 = ',l2 + print*,'accu = ',accu + print*,ortho_assoc_gaus_pol(l1,m1,l2) + enddo +end diff --git a/plugins/local/spher_harm/spher_harm.irp.f b/plugins/local/spher_harm/spher_harm.irp.f new file mode 100644 index 00000000..7a2eea06 --- /dev/null +++ b/plugins/local/spher_harm/spher_harm.irp.f @@ -0,0 +1,7 @@ +program spher_harm + implicit none +! call test_spher_harm +! call test_cart + call test_brutal_spheric +end + diff --git a/plugins/local/spher_harm/spher_harm_func.irp.f b/plugins/local/spher_harm/spher_harm_func.irp.f new file mode 100644 index 00000000..825bd8ac --- /dev/null +++ b/plugins/local/spher_harm/spher_harm_func.irp.f @@ -0,0 +1,151 @@ +subroutine spher_harm_func_r3(r,l,m,re_ylm, im_ylm) + implicit none + integer, intent(in) :: l,m + double precision, intent(in) :: r(3) + double precision, intent(out) :: re_ylm, im_ylm + + double precision :: theta, phi,r_abs + call cartesian_to_spherical(r,theta,phi,r_abs) + call spher_harm_func(l,m,theta,phi,re_ylm, im_ylm) +end + + +subroutine spher_harm_func_m_pos(l,m,theta,phi,re_ylm, im_ylm) + include 'constants.include.F' + implicit none + BEGIN_DOC +! Y_lm(theta,phi) with m >0 +! + END_DOC + double precision, intent(in) :: theta, phi + integer, intent(in) :: l,m + double precision, intent(out):: re_ylm,im_ylm + double precision :: prefact,fact,cos_theta,plgndr,p_lm + double precision :: tmp + prefact = dble(2*l+1)*fact(l-m)/(dfour_pi * fact(l+m)) + prefact = dsqrt(prefact) + cos_theta = dcos(theta) + p_lm = plgndr(l,m,cos_theta) + tmp = prefact * p_lm + re_ylm = dcos(dble(m)*phi) * tmp + im_ylm = dsin(dble(m)*phi) * tmp +end + +subroutine spher_harm_func(l,m,theta,phi,re_ylm, im_ylm) + implicit none + BEGIN_DOC + ! Y_lm(theta,phi) with -l l in spher_harm_func !! stopping ...' + stop + endif + if(m.ge.0)then + call spher_harm_func_m_pos(l,m,theta,phi,re_ylm_pos, im_ylm_pos) + re_ylm = re_ylm_pos + im_ylm = im_ylm_pos + else + minus_m = -m !> 0 + call spher_harm_func_m_pos(l,minus_m,theta,phi,re_ylm_pos, im_ylm_pos) + tmp = (-1)**minus_m + re_ylm = tmp * re_ylm_pos + im_ylm = -tmp * im_ylm_pos ! complex conjugate + endif +end + +subroutine cartesian_to_spherical(r,theta,phi,r_abs) + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out):: theta, phi,r_abs + double precision :: r_2,x_2_y_2,tmp + include 'constants.include.F' + x_2_y_2 = r(1)*r(1) + r(2)*r(2) + r_2 = x_2_y_2 + r(3)*r(3) + r_abs = dsqrt(r_2) + + if(r_abs.gt.1.d-20)then + theta = dacos(r(3)/r_abs) + else + theta = 0.d0 + endif + + if(.true.)then + if(dabs(r(1)).gt.0.d0)then + tmp = datan(r(2)/r(1)) +! phi = datan2(r(2),r(1)) + endif + ! From Wikipedia on Spherical Harmonics + if(r(1).gt.0.d0)then + phi = tmp + else if(r(1).lt.0.d0.and.r(2).ge.0.d0)then + phi = tmp + pi + else if(r(1).lt.0.d0.and.r(2).lt.0.d0)then + phi = tmp - pi + else if(r(1)==0.d0.and.r(2).gt.0.d0)then + phi = 0.5d0*pi + else if(r(1)==0.d0.and.r(2).lt.0.d0)then + phi =-0.5d0*pi + else if(r(1)==0.d0.and.r(2)==0.d0)then + phi = 0.d0 + endif + if(r(2).lt.0.d0.and.r(1).le.0.d0)then + tmp = pi - dabs(phi) + phi = pi + tmp + else if(r(2).lt.0.d0.and.r(1).gt.0.d0)then + phi = dtwo_pi + phi + endif + endif + + if(.false.)then + x_2_y_2 = dsqrt(x_2_y_2) + if(dabs(x_2_y_2).gt.1.d-20.and.dabs(r(2)).gt.1.d-20)then + phi = dabs(r(2))/r(2) * dacos(r(1)/x_2_y_2) + else + phi = 0.d0 + endif + endif +end + + +subroutine spher_harm_func_expl(l,m,theta,phi,re_ylm, im_ylm) + implicit none + BEGIN_DOC + ! Y_lm(theta,phi) with -l' + accu_tot = 0.D0 + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + integral = get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + integral_cholesky = 0.D0 + do m = 1, cholesky_ao_num + integral_cholesky += cholesky_ao_transp(m,i,k) * cholesky_ao_transp(m,j,l) + enddo + accu = dabs(integral_cholesky-integral) + accu_tot += accu + if(accu.gt.1.d-10)then + print*,i,j,k,l + print*,accu, integral, integral_cholesky + endif + enddo + enddo + enddo + enddo + print*,'accu_tot',accu_tot + + print*,'MO integrals, physicist notations : ' + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + integral = get_two_e_integral(i, j, k, l, mo_integrals_map) + accu = 0.D0 + integral_cholesky = 0.D0 + do m = 1, cholesky_mo_num + integral_cholesky += cholesky_mo_transp(m,i,k) * cholesky_mo_transp(m,j,l) + enddo + accu = dabs(integral_cholesky-integral) + accu_tot += accu + if(accu.gt.1.d-10)then + print*,i,j,k,l + print*,accu, integral, integral_cholesky + endif + enddo + enddo + enddo + enddo +end diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index ff932b0c..a985149e 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -6,7 +6,7 @@ default: None [io_ao_cholesky] type: Disk_access -doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] +doc: Read/Write |AO| Cholesky integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None @@ -25,16 +25,16 @@ default: 1.e-12 [do_direct_integrals] type: logical -doc: Compute integrals on the fly (very slow, only for debugging) +doc: Compute integrals on the fly (Useful only for Cholesky decomposition) interface: ezfio,provider,ocaml -default: False +default: True ezfio_name: direct [do_ao_cholesky] type: logical doc: Perform Cholesky decomposition of AO integrals interface: ezfio,provider,ocaml -default: False +default: True [io_ao_two_e_integrals_erf] type: Disk_access diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 33304026..063cc365 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -6,7 +6,7 @@ BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, integer :: i,j,k do j=1,ao_num do i=1,ao_num - do k=1,ao_num + do k=1,cholesky_ao_num cholesky_ao_transp(k,i,j) = cholesky_ao(i,j,k) enddo enddo @@ -16,27 +16,32 @@ END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_ao_num ] &BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, 1) ] + use mmap_module implicit none BEGIN_DOC ! Cholesky vectors in AO basis: (ik|a): ! = (ik|jl) = sum_a (ik|a).(a|jl) ! ! Last dimension of cholesky_ao is cholesky_ao_num + ! + ! https://mogp-emulator.readthedocs.io/en/latest/methods/proc/ProcPivotedCholesky.html + ! https://doi.org/10.1016/j.apnum.2011.10.001 : Page 4, Algorithm 1 END_DOC - integer :: rank, ndim - double precision :: tau - double precision, pointer :: L(:,:), L_old(:,:) - + integer*8 :: ndim8 + integer :: rank + double precision :: tau, tau2 + double precision, pointer :: L(:,:) double precision :: s - double precision, parameter :: dscale = 1.d0 - double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) - integer, allocatable :: Lset(:), Dset(:), addr(:,:) + double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:), Delta(:,:) + integer, allocatable :: addr1(:), addr2(:) + integer*8, allocatable :: Lset(:), Dset(:) logical, allocatable :: computed(:) - integer :: i,j,k,m,p,q, qj, dj, p2, q2 + integer :: i,j,k,m,p,q, dj, p2, q2, ii, jj + integer*8 :: i8, j8, p8, qj8, rank_max, np8 integer :: N, np, nq double precision :: Dmax, Dmin, Qmax, f @@ -44,19 +49,32 @@ END_PROVIDER logical, external :: ao_two_e_integral_zero double precision, external :: ao_two_e_integral - integer :: block_size, iblock, ierr + integer :: block_size, iblock - double precision :: mem + double precision :: mem, mem0 double precision, external :: memory_of_double, memory_of_int + double precision, external :: memory_of_double8, memory_of_int8 integer, external :: getUnitAndOpen - integer :: iunit + integer :: iunit, ierr - ndim = ao_num*ao_num + ndim8 = ao_num*ao_num*1_8 + double precision :: wall0,wall1 + + type(c_ptr) :: c_pointer(2) + integer :: fd(2) + + PROVIDE nproc ao_cholesky_threshold do_direct_integrals qp_max_mem + PROVIDE nucl_coord ao_two_e_integral_schwartz + call set_multiple_levels_omp(.False.) + + call wall_time(wall0) + + ! Will be reallocated at the end deallocate(cholesky_ao) if (read_ao_cholesky) then - print *, 'Reading Cholesky vectors from disk...' + print *, 'Reading Cholesky AO vectors from disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'R') read(iunit) rank allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) @@ -66,7 +84,6 @@ END_PROVIDER else - PROVIDE nucl_coord ao_two_e_integral_schwartz call set_multiple_levels_omp(.False.) if (do_direct_integrals) then @@ -79,66 +96,78 @@ END_PROVIDER endif tau = ao_cholesky_threshold + tau2 = tau*tau - mem = 6.d0 * memory_of_double(ndim) + 6.d0 * memory_of_int(ndim) - call check_mem(mem, irp_here) + rank = 0 + + allocate( D(ndim8), Lset(ndim8), Dset(ndim8), D_sorted(ndim8)) + allocate( addr1(ndim8), addr2(ndim8), Delta_col(ndim8), computed(ndim8) ) + + call resident_memory(mem0) call print_memory_usage() - allocate(L(ndim,1)) - print *, '' print *, 'Cholesky decomposition of AO integrals' print *, '======================================' print *, '' print *, '============ =============' - print *, ' Rank Threshold' + print *, ' Rank Threshold' print *, '============ =============' - rank = 0 - - allocate( D(ndim), Lset(ndim), Dset(ndim) ) - allocate( addr(3,ndim) ) - ! 1. - k=0 + i8=0 do j=1,ao_num do i=1,ao_num - k = k+1 - addr(1,k) = i - addr(2,k) = j - addr(3,k) = (i-1)*ao_num + j + i8 = i8+1 + addr1(i8) = i + addr2(i8) = j enddo enddo if (do_direct_integrals) then - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) - do i=1,ndim - D(i) = ao_two_e_integral(addr(1,i), addr(2,i), & - addr(1,i), addr(2,i)) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,21) + do i8=ndim8,1,-1 + D(i8) = ao_two_e_integral(addr1(i8), addr2(i8), & + addr1(i8), addr2(i8)) enddo !$OMP END PARALLEL DO else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) - do i=1,ndim - D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & - addr(2,i), addr(2,i), & - ao_integrals_map) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,21) + do i8=ndim8,1,-1 + D(i8) = get_ao_two_e_integral(addr1(i8), addr1(i8), & + addr2(i8), addr2(i8), ao_integrals_map) enddo !$OMP END PARALLEL DO endif - Dmax = maxval(D) + D_sorted(:) = -D(:) + call dsort_noidx_big(D_sorted,ndim8) + D_sorted(:) = -D_sorted(:) + Dmax = D_sorted(1) ! 2. - np=0 - do p=1,ndim - if ( dscale*dscale*Dmax*D(p) > tau*tau ) then - np = np+1 - Lset(np) = p + np8=0_8 + do p8=1,ndim8 + if ( Dmax*D(p8) >= tau2 ) then + np8 = np8+1_8 + Lset(np8) = p8 endif enddo + np = np8 + if (np <= 0) stop 'np<=0' + if (np > ndim8) stop 'np>ndim8' + + rank_max = min(np,20*elec_num*elec_num) + call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1)) + call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) + + ! Deleting the file while it is open makes the file invisible on the filesystem, + ! and automatically deleted, even if the program crashes + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao_tmp', 'R') + close(iunit,status='delete') + ! 3. N = 0 @@ -146,77 +175,67 @@ END_PROVIDER ! 4. i = 0 + mem = memory_of_double(np) & ! Delta(np,nq) + + (np+1)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) + +! call check_mem(mem) + ! 5. - do while ( (Dmax > tau).and.(rank < ndim) ) + do while ( (Dmax > tau).and.(np > 0) ) ! a. i = i+1 - s = 0.01d0 - ! Inrease s until the arrays fit in memory + block_size = max(N,24) + + ! Determine nq so that Delta fits in memory + + s = 0.1d0 + Dmin = max(s*Dmax,tau) + do nq=2,np-1 + if (D_sorted(nq) < Dmin) exit + enddo + do while (.True.) - ! b. - Dmin = max(s*Dmax,tau) + mem = mem0 & + + np*memory_of_double(nq) & ! Delta(np,nq) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) - ! c. - nq=0 - do p=1,np - if ( D(Lset(p)) > Dmin ) then - nq = nq+1 - Dset(nq) = Lset(p) - endif - enddo - - call total_memory(mem) - mem = mem & - + np*memory_of_double(nq) &! Delta(np,nq) - + (rank+nq)* memory_of_double(ndim) &! L(ndim,rank+nq) - + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) - - if (mem > qp_max_mem) then - s = s*2.d0 + if (mem > qp_max_mem*0.5d0) then + Dmin = D_sorted(nq/2) + do ii=nq/2,np-1 + if (D_sorted(ii) < Dmin) then + nq = ii + exit + endif + enddo else exit endif - if ((s > 1.d0).or.(nq == 0)) then - call print_memory_usage() - print *, 'Not enough memory. Reduce cholesky threshold' - stop -1 + enddo +!call print_memory_usage +!print *, 'np, nq, Predicted memory: ', np, nq, mem + + if (nq <= 0) then + print *, nq + stop 'bug in cholesky: nq <= 0' + endif + + Dmin = D_sorted(nq) + nq=0 + do p=1,np + if ( D(Lset(p)) >= Dmin ) then + nq = nq+1 + Dset(nq) = Lset(p) endif - enddo - ! d., e. - block_size = max(N,24) - - L_old => L - allocate(L(ndim,rank+nq), stat=ierr) - if (ierr /= 0) then - call print_memory_usage() - print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' - stop -1 - endif - - !$OMP PARALLEL DO PRIVATE(k,j) - do k=1,rank - do j=1,ndim - L(j,k) = L_old(j,k) - enddo - enddo - !$OMP END PARALLEL DO - - deallocate(L_old) - - allocate(Delta(np,nq), stat=ierr) - if (ierr /= 0) then - call print_memory_usage() - print *, irp_here, ': allocation failed : (Delta(np,nq))' - stop -1 - endif + allocate(Delta(np,nq)) allocate(Ltmp_p(np,block_size), stat=ierr) + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' @@ -224,6 +243,7 @@ END_PROVIDER endif allocate(Ltmp_q(nq,block_size), stat=ierr) + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' @@ -231,36 +251,39 @@ END_PROVIDER endif - allocate(computed(nq)) + computed(1:nq) = .False. - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) - !$OMP DO - do q=1,nq - do j=1,np - Delta(j,q) = 0.d0 - enddo - computed(q) = .False. - enddo - !$OMP ENDDO NOWAIT - - !$OMP DO + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q) do k=1,N + !$OMP DO do p=1,np - Ltmp_p(p,k) = L(Lset(p),k) + Ltmp_p(p,k) = L(Lset(p),k) enddo + !$OMP END DO NOWAIT + + !$OMP DO do q=1,nq Ltmp_q(q,k) = L(Dset(q),k) enddo + !$OMP END DO NOWAIT enddo - !$OMP END DO NOWAIT - !$OMP BARRIER !$OMP END PARALLEL if (N>0) then - call dgemm('N','T', np, nq, N, -1.d0, & - Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + + call dgemm('N', 'T', np, nq, N, -1.d0, & + Ltmp_p(1,1), np, Ltmp_q(1,1), nq, 0.d0, Delta, np) + + else + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) + do q=1,nq + Delta(:,q) = 0.d0 + enddo + !$OMP END PARALLEL DO + endif ! f. @@ -274,51 +297,81 @@ END_PROVIDER iblock = 0 do j=1,nq - if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit + if ( (Qmax <= Dmin).or.(N+j*1_8 > ndim8) ) exit + ! i. rank = N+j + if (rank == rank_max) then + print *, 'cholesky: rank_max reached' + exit + endif if (iblock == block_size) then - call dgemm('N','T',np,nq,block_size,-1.d0, & - Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) - iblock = 0 + + call dgemm('N','T',np,nq,block_size,-1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + + iblock = 0 + endif ! ii. do dj=1,nq - qj = Dset(dj) - if (D(qj) == Qmax) then + qj8 = Dset(dj) + if (D(qj8) == Qmax) then exit endif enddo - L(1:ndim, rank) = 0.d0 - - if (.not.computed(dj)) then - m = dj - !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(guided) - do k=np,1,-1 - if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)),& - addr(2,Lset(k)), addr(2,Dset(m)) ) ) then - if (do_direct_integrals) then - Delta(k,m) = Delta(k,m) + & - ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)),& - addr(1,Dset(m)), addr(2,Dset(m))) - else - Delta(k,m) = Delta(k,m) + & - get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)),& - addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) - endif - endif - enddo - !$OMP END PARALLEL DO - computed(dj) = .True. - endif + do i8=1,ndim8 + L(i8, rank) = 0.d0 + enddo iblock = iblock+1 + !$OMP PARALLEL DO PRIVATE(p) do p=1,np Ltmp_p(p,iblock) = Delta(p,dj) enddo + !$OMP END PARALLEL DO + + if (.not.computed(dj)) then + m = dj + if (do_direct_integrals) then + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,21) + do k=1,np + Delta_col(k) = 0.d0 + if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)) ) ) then + Delta_col(k) = & + ao_two_e_integral(addr1(Lset(k)), addr2(Lset(k)),& + addr1(Dset(m)), addr2(Dset(m))) + endif + enddo + !$OMP END PARALLEL DO + else + PROVIDE ao_integrals_map + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,21) + do k=1,np + Delta_col(k) = 0.d0 + if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)) ) ) then + Delta_col(k) = & + get_ao_two_e_integral( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)), ao_integrals_map) + endif + enddo + !$OMP END PARALLEL DO + endif + + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) + Delta(p,dj) = Ltmp_p(p,iblock) + enddo + !$OMP END PARALLEL DO + + computed(dj) = .True. + endif ! iv. if (iblock > 1) then @@ -329,7 +382,7 @@ END_PROVIDER ! iii. f = 1.d0/dsqrt(Qmax) - !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) + !$OMP PARALLEL PRIVATE(p,q) DEFAULT(shared) !$OMP DO do p=1,np Ltmp_p(p,iblock) = Ltmp_p(p,iblock) * f @@ -343,7 +396,6 @@ END_PROVIDER Ltmp_q(q,iblock) = L(Dset(q), rank) enddo !$OMP END DO - !$OMP END PARALLEL Qmax = D(Dset(1)) @@ -355,49 +407,62 @@ END_PROVIDER print '(I10, 4X, ES12.3)', rank, Qmax - deallocate(computed) - deallocate(Delta) deallocate(Ltmp_p) deallocate(Ltmp_q) + deallocate(Delta) ! i. N = rank ! j. - Dmax = D(Lset(1)) - do p=1,np - Dmax = max(Dmax, D(Lset(p))) - enddo + D_sorted(:) = -D(:) + call dsort_noidx_big(D_sorted,ndim8) + D_sorted(:) = -D_sorted(:) - np=0 - do p=1,ndim - if ( dscale*dscale*Dmax*D(p) > tau*tau ) then - np = np+1 - Lset(np) = p + Dmax = D_sorted(1) + + np8=0_8 + do p8=1,ndim8 + if ( Dmax*D(p8) >= tau2 ) then + np8 = np8+1_8 + Lset(np8) = p8 endif enddo + np = np8 enddo + + print *, '============ =============' + print *, '' + + deallocate( D, Lset, Dset, D_sorted ) + deallocate( addr1, addr2, Delta_col, computed ) + + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': Allocation failed' stop -1 endif - !$OMP PARALLEL DO PRIVATE(k) + + + !$OMP PARALLEL DO PRIVATE(k,j) do k=1,rank - call dcopy(ndim, L(1,k), 1, cholesky_ao(1,1,k), 1) + do j=1,ao_num + cholesky_ao(1:ao_num,j,k) = L((j-1_8)*ao_num+1_8:1_8*j*ao_num,k) + enddo enddo !$OMP END PARALLEL DO - deallocate(L) + + call munmap( (/ ndim8, rank_max /), 8, fd(1), c_pointer(1) ) + cholesky_ao_num = rank - print *, '============ =============' - print *, '' - if (write_ao_cholesky) then - print *, 'Writing Cholesky vectors to disk...' + print *, 'Writing Cholesky AO vectors to disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'W') write(iunit) rank write(iunit) cholesky_ao @@ -409,6 +474,9 @@ END_PROVIDER print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' print *, '' + call wall_time(wall1) + print*,'Time to provide AO cholesky vectors = ',(wall1-wall0)/60.d0, ' min' + END_PROVIDER diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index b55b5f0d..d12f3d45 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -460,8 +460,8 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz, (ao_num, ao_num) !$OMP PARALLEL DO PRIVATE(i,k) & !$OMP DEFAULT(NONE) & !$OMP SHARED (ao_num,ao_two_e_integral_schwartz) & - !$OMP SCHEDULE(guided) - do i=1,ao_num + !$OMP SCHEDULE(dynamic) + do i=ao_num,1,-1 do k=1,i ao_two_e_integral_schwartz(i,k) = dsqrt(ao_two_e_integral(i,i,k,k)) ao_two_e_integral_schwartz(k,i) = ao_two_e_integral_schwartz(i,k) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index b48ca7da..9d4ae7f9 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -18,6 +18,8 @@ subroutine run_ccsd_space_orb integer(bit_kind) :: det(N_int,2) integer :: nO, nV, nOa, nVa + call set_multiple_levels_omp(.False.) + if (do_ao_cholesky) then PROVIDE cholesky_mo_transp FREE cholesky_ao @@ -192,7 +194,7 @@ subroutine run_ccsd_space_orb deallocate(H_vv,H_oo,H_vo,r1,r2,tau) ! CCSD(T) - double precision :: e_t + double precision :: e_t, e_t_err e_t = 0.d0 if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then @@ -210,22 +212,24 @@ subroutine run_ccsd_space_orb !print*,'' ! New + e_t = uncorr_energy + energy ! For print in (T) call + e_t_err = 0.d0 + print*,'Computing (T) correction...' call wall_time(ta) ! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & ! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) - e_t = uncorr_energy + energy ! For print in next call call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & - ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t, e_t_err) call wall_time(tb) print*,'Time: ',tb-ta, ' s' print*,'' - write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' - write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' - write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + write(*,'(A15,F18.12,A7,F18.12)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha +/- ', e_t_err + write(*,'(A15,F18.12,A7,F18.12)') ' E(T) = ', e_t, ' Ha +/- ', e_t_err + write(*,'(A15,F18.12,A7,F18.12)') ' Correlation = ', energy + e_t, ' Ha +/- ', e_t_err print*,'' endif diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 1093c59d..851b6a9f 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -1,5 +1,5 @@ ! Main -subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) +subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy,t_error) implicit none @@ -7,7 +7,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) double precision, intent(in) :: t2(nO,nO,nV,nV) double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) - double precision, intent(inout) :: energy + double precision, intent(inout) :: energy, t_error double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) @@ -220,8 +220,10 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ call set_multiple_levels_omp(.False.) call wall_time(t00) imin = 1_8 - !$OMP PARALLEL & - !$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample) & + t_error = huge(1.d0) + + !$OMP PARALLEL & + !$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample) & !$OMP DEFAULT(SHARED) NUM_THREADS(nthreads_pt2) do kiter=1,Nabc @@ -328,15 +330,23 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ if (norm > 0.d0) then energy_stoch = ET / norm variance = ET2 / norm - energy_stoch*energy_stoch + if (norm > 1.d0) then + t_error = dsqrt(variance/(norm-1.d0)) + else + t_error = dsqrt(variance) + endif endif energy = energy_det + energy_stoch - print '('' '',F20.8, '' '', ES12.4,'' '', F8.2,'' '')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) + print '('' '',F20.8, '' '', ES12.4,'' '', F8.2,'' '')', eccsd+energy, t_error, 100.*real(Ncomputed)/real(Nabc) + endif !$OMP END MASTER + if (t_error < cc_par_t_stop) exit if (imin > Nabc) exit enddo + !$OMP TASKWAIT !$OMP END PARALLEL print '(A)', ' ======================= ============== ========== ' diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 0281a1d4..517220a8 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -178,7 +178,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) logical, allocatable :: banned(:,:,:), bannedOrb(:,:) double precision, allocatable :: coef_fullminilist_rev(:,:) - double precision, allocatable :: mat(:,:,:) + double precision, allocatable :: mat(:,:,:), hij_cache(:,:,:) PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique @@ -205,7 +205,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc ! Removed to avoid introducing determinants already presents in the wf !double precision, parameter :: norm_thr = 1.d-16 - allocate (indices(N_det), & + allocate (indices(N_det), hij_cache(mo_num,mo_num,2), & exc_degree(max(N_det_alpha_unique,N_det_beta_unique))) ! Pre-compute excitation degrees wrt alpha determinants @@ -511,11 +511,15 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc maskInd = maskInd + 1 if(mod(maskInd, csubset) == (subset-1)) then + call get_mo_two_e_integrals_ij(h2,h1,mo_num,hij_cache(1,1,1),mo_integrals_map) + if (sp /= 3) then ! AA or BB + call get_mo_two_e_integrals_ij(h1,h2,mo_num,hij_cache(1,1,2),mo_integrals_map) + endif call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle - call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) + call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting, hij_cache) call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) end if @@ -531,7 +535,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc enddo enddo deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) - deallocate(banned, bannedOrb,mat) + deallocate(banned, bannedOrb, mat, hij_cache) end subroutine BEGIN_TEMPLATE @@ -556,7 +560,7 @@ subroutine fill_buffer_$DOUBLE(i_generator, sp, h1, h2, bannedOrb, banned, fock_ double precision, external :: diag_H_mat_elem_fock double precision :: E_shift double precision :: s_weight(N_states,N_states) - PROVIDE dominant_dets_of_cfgs N_dominant_dets_of_cfgs + PROVIDE dominant_dets_of_cfgs N_dominant_dets_of_cfgs thresh_sym excitation_ref hf_bitmask elec_alpha_num do jstate=1,N_states do istate=1,N_states s_weight(istate,jstate) = dsqrt(selection_weight(istate)*selection_weight(jstate)) @@ -742,7 +746,7 @@ subroutine fill_buffer_$DOUBLE(i_generator, sp, h1, h2, bannedOrb, banned, fock_ do istate=1,N_states delta_E = E0(istate) - Hii + E_shift alpha_h_psi = mat(istate, p1, p2) - if (alpha_h_psi == 0.d0) cycle + if (dabs(alpha_h_psi) < mo_integrals_threshold) cycle val = alpha_h_psi + alpha_h_psi tmp = dsqrt(delta_E * delta_E + val * val) @@ -914,7 +918,7 @@ single ; do p1=1,mo_num ; enddo ; p2=1 ; ; .False. ;; END_TEMPLATE -subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting) +subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting, hij_cache) use bitmasks implicit none BEGIN_DOC @@ -926,6 +930,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere integer, intent(in) :: sp, i_gen, N_sel integer, intent(in) :: interesting(0:N_sel) integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N_sel) + double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) logical, intent(inout) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num, 2) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) @@ -995,18 +1000,36 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere if(nt == 4) then call get_d2(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) else if(nt == 3) then - call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)), hij_cache) else - call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)), hij_cache) end if else if(nt == 4) then call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) - call past_d2(banned, p, sp) + if(sp == 3) then + do j=1,p(0,2) + do ii=1,p(0,1) + banned(p(ii,1), p(j,2),1) = .true. + end do + end do + else + do ii=1,p(0, sp) + do j=1,ii-1 + banned(p(j,sp), p(ii,sp),1) = .true. + banned(p(ii,sp), p(j,sp),1) = .true. + end do + end do + end if else if(nt == 3) then call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) - call past_d1(bannedOrb, p) + do ii = 1, p(0, 1) + bannedOrb(p(ii, 1), 1) = .true. + end do + do ii = 1, p(0, 2) + bannedOrb(p(ii, 2), 2) = .true. + end do end if end do @@ -1037,6 +1060,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) integer :: bant bant = 1 + PROVIDE mo_integrals_threshold tip = p(0,1) * p(0,2) ma = sp @@ -1062,7 +1086,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(i2, ma) hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2, p1, h1, h2) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) @@ -1092,7 +1116,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(turn2(i), 1) hij = mo_two_e_integral(p1, p2, h1, h2) - if (hij /= 0.d0) then + if (dabs(hij) > mo_integrals_threshold) then hij = hij * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states @@ -1120,7 +1144,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(i1, ma) p2 = p(i2, ma) hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1142,7 +1166,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(i, ma) hij = mo_two_e_integral(p1, p2, h1, h2) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) if (puti < putj) then @@ -1179,7 +1203,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end -subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) +subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, hij_cache) use bitmasks implicit none @@ -1190,6 +1214,8 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) + double precision, external :: get_phase_bi, mo_two_e_integral logical :: ok @@ -1201,12 +1227,12 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) integer :: bant - double precision, allocatable :: hij_cache(:,:) + double precision, allocatable :: hij_cache1(:,:) double precision :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num) PROVIDE mo_integrals_map N_int allocate (lbanned(mo_num, 2)) - allocate (hij_cache(mo_num,2)) + allocate (hij_cache1(mo_num,2)) lbanned = bannedOrb do i=1, p(0,1) @@ -1230,13 +1256,11 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(1,ma) p2 = p(2,ma) if(.not. bannedOrb(puti, mi)) then - call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map) tmp_row = 0d0 do putj=1, hfix-1 if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle - hij = hij_cache(putj,1) - hij_cache(putj,2) + hij = hij_cache(hfix,putj,1) - hij_cache(putj,hfix,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1248,7 +1272,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=hfix+1, mo_num if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle - hij = hij_cache(putj,2) - hij_cache(putj,1) + hij = hij_cache(putj,hfix,1) - hij_cache(hfix,putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1274,15 +1298,15 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) pfix = p(1,mi) tmp_row = 0d0 tmp_row2 = 0d0 - call get_mo_two_e_integrals(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,pfix,p1,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,pfix,p2,mo_num,hij_cache1(1,2),mo_integrals_map) putj = p1 do puti=1,mo_num !HOT if(lbanned(puti,mi)) cycle !p1 fixed putj = p1 if(.not. banned(putj,puti,bant)) then - hij = hij_cache(puti,2) + hij = hij_cache1(puti,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1296,7 +1320,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) putj = p2 ! do puti=1,mo_num !HOT if(.not. banned(putj,puti,bant)) then - hij = hij_cache(puti,1) + hij = hij_cache1(puti,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) do k=1,N_states @@ -1327,13 +1351,13 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) puti = p(i, ma) p1 = p(turn3(1,i), ma) p2 = p(turn3(2,i), ma) - call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache1(1,2),mo_integrals_map) tmp_row = 0d0 do putj=1,hfix-1 if(banned(putj,puti,1)) cycle if(lbanned(putj,ma)) cycle - hij = hij_cache(putj,1) - hij_cache(putj,2) + hij = hij_cache1(putj,1) - hij_cache1(putj,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) @@ -1342,7 +1366,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=hfix+1,mo_num if(banned(putj,puti,1)) cycle if(lbanned(putj,ma)) cycle - hij = hij_cache(putj,2) - hij_cache(putj,1) + hij = hij_cache1(putj,2) - hij_cache1(putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) @@ -1364,14 +1388,14 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(2,ma) tmp_row = 0d0 tmp_row2 = 0d0 - call get_mo_two_e_integrals(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p1,pfix,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,pfix,mo_num,hij_cache1(1,2),mo_integrals_map) putj = p2 do puti=1,mo_num if(lbanned(puti,ma)) cycle putj = p2 if(.not. banned(puti,putj,1)) then - hij = hij_cache(puti,1) + hij = hij_cache1(puti,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1383,7 +1407,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) putj = p1 if(.not. banned(puti,putj,1)) then - hij = hij_cache(puti,2) + hij = hij_cache1(puti,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) do k=1,N_states @@ -1408,7 +1432,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) enddo end if end if - deallocate(lbanned,hij_cache) + deallocate(lbanned,hij_cache1) !! MONO if(sp == 3) then @@ -1439,7 +1463,7 @@ end -subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) +subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, hij_cache) use bitmasks implicit none @@ -1450,6 +1474,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) integer :: i, j, k, s, h1, h2, p1, p2, puti, putj double precision :: hij, phase @@ -1457,16 +1482,14 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) logical :: ok integer, parameter :: bant=1 - double precision, allocatable :: hij_cache1(:), hij_cache2(:) - allocate (hij_cache1(mo_num),hij_cache2(mo_num)) + PROVIDE mo_integrals_threshold if(sp == 3) then ! AB h1 = p(1,1) h2 = p(1,2) do p1=1, mo_num if(bannedOrb(p1, 1)) cycle - call get_mo_two_e_integrals(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map) do p2=1, mo_num if(bannedOrb(p2,2)) cycle if(banned(p1, p2, bant)) cycle ! rentable? @@ -1475,9 +1498,9 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) else phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) - hij = hij_cache1(p2) * phase + hij = hij_cache(p2,p1,1) * phase end if - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle !DIR$ LOOP COUNT AVG(4) do k=1,N_states mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij ! HOTSPOT @@ -1490,18 +1513,16 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(2,sp) do puti=1, mo_num if (bannedOrb(puti, sp)) cycle - call get_mo_two_e_integrals(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map) - call get_mo_two_e_integrals(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map) do putj=puti+1, mo_num if(bannedOrb(putj, sp)) cycle if(banned(puti, putj, bant)) cycle ! rentable? if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) call i_h_j(gen, det, N_int, hij) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle else - hij = hij_cache1(putj) - hij_cache2(putj) - if (hij == 0.d0) cycle + hij = hij_cache(putj,puti,1) - hij_cache(putj,puti,2) + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) end if !DIR$ LOOP COUNT AVG(4) @@ -1512,50 +1533,9 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end do end if - deallocate(hij_cache1,hij_cache2) end -subroutine past_d1(bannedOrb, p) - use bitmasks - implicit none - - logical, intent(inout) :: bannedOrb(mo_num, 2) - integer, intent(in) :: p(0:4, 2) - integer :: i,s - - do s = 1, 2 - do i = 1, p(0, s) - bannedOrb(p(i, s), s) = .true. - end do - end do -end - - -subroutine past_d2(banned, p, sp) - use bitmasks - implicit none - - logical, intent(inout) :: banned(mo_num, mo_num) - integer, intent(in) :: p(0:4, 2), sp - integer :: i,j - - if(sp == 3) then - do j=1,p(0,2) - do i=1,p(0,1) - banned(p(i,1), p(j,2)) = .true. - end do - end do - else - do i=1,p(0, sp) - do j=1,i-1 - banned(p(j,sp), p(i,sp)) = .true. - banned(p(i,sp), p(j,sp)) = .true. - end do - end do - end if -end - subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) use bitmasks implicit none diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f index 0dc939cb..f57b7f92 100644 --- a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f @@ -1,6 +1,5 @@ subroutine davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_diag_in,converged,hcalc) - use mmap_module implicit none BEGIN_DOC ! Generic Davidson diagonalization with ONE DIAGONAL DRESSING OPERATOR diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f index 1a8269f4..c8848998 100644 --- a/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f @@ -3,8 +3,6 @@ subroutine davidson_general_diag_dressed_ext_rout_nonsym_b1space(u_in, H_jj, Dress_jj,energies, sze, N_st, N_st_diag_in, converged, hcalc) - use mmap_module - BEGIN_DOC ! Generic modified-Davidson diagonalization ! diff --git a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f index 24f4fa10..1ff6632c 100644 --- a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f +++ b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f @@ -1,5 +1,4 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies,sze,N_st,N_st_diag,converged,hcalc) - use mmap_module BEGIN_DOC ! Generic Davidson diagonalization with TWO DRESSING VECTORS ! diff --git a/src/dav_general_mat/dav_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_dressed_ext_rout.irp.f index cedaaf0a..ca59a688 100644 --- a/src/dav_general_mat/dav_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_dressed_ext_rout.irp.f @@ -1,5 +1,4 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_diag,dressing_state,dressing_vec,idress,converged,hcalc) - use mmap_module implicit none BEGIN_DOC ! Davidson diagonalization. diff --git a/src/dav_general_mat/dav_ext_rout.irp.f b/src/dav_general_mat/dav_ext_rout.irp.f index deb7e3a9..ad60b2a8 100644 --- a/src/dav_general_mat/dav_ext_rout.irp.f +++ b/src/dav_general_mat/dav_ext_rout.irp.f @@ -1,6 +1,5 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,converged,hcalc) - use mmap_module implicit none BEGIN_DOC ! Generic Davidson diagonalization diff --git a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f index d89aaadb..ca0a835e 100644 --- a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f +++ b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f @@ -3,8 +3,6 @@ subroutine davidson_general_ext_rout_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_diag_in, converged, hcalc) - use mmap_module - BEGIN_DOC ! Generic modified-Davidson diagonalization ! diff --git a/src/dav_general_mat/dav_general.irp.f b/src/dav_general_mat/dav_general.irp.f index 9940bf1e..a277d9ef 100644 --- a/src/dav_general_mat/dav_general.irp.f +++ b/src/dav_general_mat/dav_general.irp.f @@ -1,6 +1,6 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,converged,h_mat) - use mmap_module +! use mmap_module implicit none BEGIN_DOC ! Davidson diagonalization with specific diagonal elements of the H matrix @@ -160,9 +160,9 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv ! type(c_ptr) :: ptr_w, ptr_s ! integer :: fd_s, fd_w ! call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& -! 8, fd_w, .False., ptr_w) +! 8, fd_w, .False., .True., ptr_w) ! call mmap(trim(ezfio_work_dir)//'davidson_s', (/int(sze,8),int(N_st_diag*itermax,8)/),& -! 4, fd_s, .False., ptr_s) +! 4, fd_s, .False., .True., ptr_s) ! call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) ! call c_f_pointer(ptr_s, s, (/sze,N_st_diag*itermax/)) ! else diff --git a/src/davidson/diagonalization_h_dressed.irp.f b/src/davidson/diagonalization_h_dressed.irp.f index b7179c18..15bf256d 100644 --- a/src/davidson/diagonalization_h_dressed.irp.f +++ b/src/davidson/diagonalization_h_dressed.irp.f @@ -228,7 +228,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) else allocate(W(sze,N_st_diag*itermax)) diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index fa8aff80..656dd1d9 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -229,7 +229,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/)) else allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax)) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index fd967ecc..fb04b29b 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -270,9 +270,9 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call mmap(trim(ezfio_work_dir)//'davidson_s', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 4, fd_s, .False., ptr_s) + 4, fd_s, .False., .True., ptr_s) call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) call c_f_pointer(ptr_s, s, (/sze,N_st_diag*itermax/)) else diff --git a/src/davidson/diagonalization_nonsym_h_dressed.irp.f b/src/davidson/diagonalization_nonsym_h_dressed.irp.f index 96ca84ab..86df3a19 100644 --- a/src/davidson/diagonalization_nonsym_h_dressed.irp.f +++ b/src/davidson/diagonalization_nonsym_h_dressed.irp.f @@ -251,7 +251,7 @@ subroutine davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze, type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) else allocate(W(sze,N_st_diag*itermax)) diff --git a/src/determinants/slater_rules_general.irp.f b/src/determinants/slater_rules_general.irp.f new file mode 100644 index 00000000..e987c846 --- /dev/null +++ b/src/determinants/slater_rules_general.irp.f @@ -0,0 +1,192 @@ +subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) + use bitmasks + BEGIN_DOC +! returns the array, for each spin, of holes/particles between key_i and key_j +! +! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> + END_DOC + include 'utils/constants.include.F' + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision, intent(out) :: phase + integer :: ispin,k,i,pos + integer(bit_kind) :: key_hole, key_particle + integer(bit_kind) :: xorvec(N_int_max,2) + holes_array = -1 + particles_array = -1 + degree_array = 0 + do i = 1, N_int + xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) + xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) + degree_array(1) += popcnt(xorvec(i,1)) + degree_array(2) += popcnt(xorvec(i,2)) + enddo + degree_array(1) = shiftr(degree_array(1),1) + degree_array(2) = shiftr(degree_array(2),1) + + do ispin = 1, 2 + k = 1 + !!! GETTING THE HOLES + do i = 1, N_int + key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + do ispin = 1, 2 + k = 1 + !!! GETTING THE PARTICLES + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general ' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + integer :: h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree_array(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + +subroutine get_holes_general(key_i, key_j,Nint, holes_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of holes between key_i and key_j +! +! with the following convention: a_{hole}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2) + integer(bit_kind) :: key_hole + integer :: ispin,k,i,pos + holes_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_particles_general(key_i, key_j,Nint,particles_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of particles between key_i and key_j +! +! with the following convention: a^dagger_{particle}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: particles_array(100,2) + integer(bit_kind) :: key_particle + integer :: ispin,k,i,pos + particles_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'Those are the two determinants' + call debug_det(key_i, N_int) + call debug_det(key_j, N_int) + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) + implicit none + integer, intent(in) :: degree(2), Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + integer, intent(in) :: holes_array(100,2),particles_array(100,2) + double precision, intent(out) :: phase + integer :: i,ispin,h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end diff --git a/src/dft_utils_func/on_top_from_ueg.irp.f b/src/dft_utils_func/on_top_from_ueg.irp.f index 4e28ad89..711ffc39 100644 --- a/src/dft_utils_func/on_top_from_ueg.irp.f +++ b/src/dft_utils_func/on_top_from_ueg.irp.f @@ -32,7 +32,6 @@ double precision function g0_UEG_mu_inf(rho_a,rho_b) C = 0.08193d0 D = -0.01277d0 E = 0.001859d0 - x = -d2*rs if (dabs(rho) > 1.d-20) then rs = (3d0 / (4d0*pi*rho))**(1d0/3d0) ! JT: serious bug fixed 20/03/19 x = -d2*rs diff --git a/src/dft_utils_in_r/mo_in_r.irp.f b/src/dft_utils_in_r/mo_in_r.irp.f index 192cb25a..ad931402 100644 --- a/src/dft_utils_in_r/mo_in_r.irp.f +++ b/src/dft_utils_in_r/mo_in_r.irp.f @@ -48,7 +48,7 @@ integer :: i,j do i = 1, n_points_final_grid do j = 1, mo_num - mos_in_r_array_transp(i,j) = mos_in_r_array(j,i) + mos_in_r_array_transp(i,j) = mos_in_r_array_omp(j,i) enddo enddo END_PROVIDER diff --git a/src/ezfio_files/01.convert.bats b/src/ezfio_files/convert_bats_old similarity index 100% rename from src/ezfio_files/01.convert.bats rename to src/ezfio_files/convert_bats_old diff --git a/src/ezfio_files/get_unit_and_open.irp.f b/src/ezfio_files/get_unit_and_open.irp.f index 6440579f..d6a7efac 100644 --- a/src/ezfio_files/get_unit_and_open.irp.f +++ b/src/ezfio_files/get_unit_and_open.irp.f @@ -47,11 +47,13 @@ integer function getUnitAndOpen(f,mode) endif open(unit=getUnitAndOpen,file=f,status='OLD',action='READ',form='UNFORMATTED') else if (mode.eq.'W') then - open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='WRITE',form='UNFORMATTED') + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',form='UNFORMATTED') + else if (mode.eq.'A') then + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',position='APPEND',form='UNFORMATTED') else if (mode.eq.'w') then - open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='WRITE',form='FORMATTED') + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',form='FORMATTED') else if (mode.eq.'a') then - open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='WRITE',position='APPEND',form='FORMATTED') + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',position='APPEND',form='FORMATTED') else if (mode.eq.'x') then open(unit=getUnitAndOpen,file=new_f,form='FORMATTED') endif diff --git a/src/hartree_fock/10.hf.bats b/src/hartree_fock/10.hf.bats index b496a089..214dfa86 100644 --- a/src/hartree_fock/10.hf.bats +++ b/src/hartree_fock/10.hf.bats @@ -115,9 +115,6 @@ rm -rf $EZFIO run hco.ezfio -113.1841002944744 } -@test "HBO" { # 0.805600 1.4543s - run hbo.ezfio -100.018582259096 -} @test "H2S" { # 1.655600 4.21402s run h2s.ezfio -398.6944130421982 @@ -127,9 +124,6 @@ rm -rf $EZFIO run h3coh.ezfio -114.9865030596373 } -@test "H2O" { # 1.811100 1.84387s - run h2o.ezfio -0.760270218692179E+02 -} @test "H2O2" { # 2.217000 8.50267s run h2o2.ezfio -150.7806608469964 @@ -187,13 +181,6 @@ rm -rf $EZFIO run oh.ezfio -75.42025413469165 } -@test "[Cu(NH3)4]2+" { # 59.610100 4.18766m - [[ -n $TRAVIS ]] && skip - qp set_file cu_nh3_4_2plus.ezfio - qp set scf_utils thresh_scf 1.e-10 - run cu_nh3_4_2plus.ezfio -1862.97590358903 -} - @test "SO2" { # 71.894900 3.22567m [[ -n $TRAVIS ]] && skip run so2.ezfio -41.55800401346361 diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f index 65b3d63c..6d917322 100644 --- a/src/hartree_fock/fock_matrix_hf.irp.f +++ b/src/hartree_fock/fock_matrix_hf.irp.f @@ -194,17 +194,28 @@ END_PROVIDER endif - double precision :: rss + double precision :: rss, mem0, mem double precision :: memory_of_double integer :: iblock - integer, parameter :: block_size = 32 + integer :: block_size + + call resident_memory(mem0) + + block_size = 1024 + + rss = memory_of_double(2.d0*ao_num*ao_num) + do + mem = mem0 + block_size*rss + if ( (block_size < 2).or.(mem < qp_max_mem) ) exit + block_size = block_size/2 + enddo + + call check_mem(block_size*rss, irp_here) - rss = memory_of_double(ao_num*ao_num) - call check_mem(2.d0*block_size*rss, irp_here) allocate(X2(ao_num,ao_num,block_size,2)) allocate(X3(ao_num,block_size,ao_num,2)) - + ! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j) do iblock=1,cholesky_ao_num,block_size diff --git a/src/mo_two_e_ints/EZFIO.cfg b/src/mo_two_e_ints/EZFIO.cfg index 088a2416..da9d8fc9 100644 --- a/src/mo_two_e_ints/EZFIO.cfg +++ b/src/mo_two_e_ints/EZFIO.cfg @@ -1,9 +1,21 @@ +[io_mo_cholesky] +type: Disk_access +doc: Read/Write |MO| Cholesky integrals from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None + [io_mo_two_e_integrals] type: Disk_access doc: Read/Write |MO| integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None +[mo_integrals_cache_shift] +type: integer +doc: Adjusts the size of the MO integrals cache. 2: 2KB, 3: 32KB, 4: 512KB, 5: 8MB, 6: 128MB, 7: 2GB, 8: 32GB, 9: 512GB +interface: ezfio, provider, ocaml +default: 7 + [mo_integrals_threshold] type: Threshold doc: If | | < `mo_integrals_threshold` then is zero @@ -11,12 +23,6 @@ interface: ezfio,provider,ocaml default: 1.e-15 ezfio_name: threshold_mo -[no_vvvv_integrals] -type: logical -doc: If `True`, computes all integrals except for the integrals having 3 or 4 virtual indices -interface: ezfio,provider,ocaml -default: false - [io_mo_two_e_integrals_erf] type: Disk_access doc: Read/Write MO integrals with the long range interaction from/to disk [ Write | Read | None ] diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 349f13b9..7e2c8b37 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -1,9 +1,26 @@ +BEGIN_PROVIDER [ logical, do_mo_cholesky ] + implicit none + BEGIN_DOC + ! If True, use Cholesky vectors for MO integrals + END_DOC + do_mo_cholesky = do_ao_cholesky +! do_mo_cholesky = .False. +END_PROVIDER + BEGIN_PROVIDER [ integer, cholesky_mo_num ] implicit none BEGIN_DOC ! Number of Cholesky vectors in MO basis END_DOC - cholesky_mo_num = cholesky_ao_num + integer, external :: getUnitAndOpen + integer :: iunit + if (read_mo_cholesky) then + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_mo_transp', 'R') + read(iunit) cholesky_mo_num + close(iunit) + else + cholesky_mo_num = cholesky_ao_num + endif END_PROVIDER BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] @@ -30,22 +47,57 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, mo_num) ] implicit none BEGIN_DOC - ! Cholesky vectors in MO basis + ! Cholesky vectors in MO basis. Warning: it is transposed wrt cholesky_ao: + ! + ! - cholesky_ao is (ao_num^2 x cholesky_ao_num) + ! + ! - cholesky_mo_transp is (cholesky_mo_num x mo_num^2) END_DOC double precision, allocatable :: X(:,:,:) - integer :: ierr - print *, 'AO->MO Transformation of Cholesky vectors' + double precision :: wall0, wall1 + integer, external :: getUnitAndOpen + integer :: iunit, ierr, rank - allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) - if (ierr /= 0) then - print *, irp_here, ': Allocation failed' - endif - call dgemm('T','N', ao_num*cholesky_mo_num, mo_num, ao_num, 1.d0, & - cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_mo_num) - call dgemm('T','N', cholesky_mo_num*mo_num, mo_num, ao_num, 1.d0, & - X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_mo_num*mo_num) - deallocate(X) + if (read_mo_cholesky) then + print *, 'Reading Cholesky MO vectors from disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_mo_transp', 'R') + read(iunit) rank + if (cholesky_mo_num /= rank) then + stop 'inconsistent rank' + endif + read(iunit) cholesky_mo_transp + close(iunit) + else + print *, '' + print *, 'AO->MO Transformation of Cholesky vectors' + print *, '-----------------------------------------' + print *, '' + + call wall_time(wall0) + + allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': Allocation failed' + endif + call dgemm('T','N', ao_num*cholesky_mo_num, mo_num, ao_num, 1.d0, & + cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_mo_num) + call dgemm('T','N', cholesky_mo_num*mo_num, mo_num, ao_num, 1.d0, & + X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_mo_num*mo_num) + deallocate(X) + call wall_time(wall1) + print*,'Time to provide MO cholesky vectors = ',(wall1-wall0)/60.d0, ' min' + + + if (write_mo_cholesky) then + print *, 'Writing Cholesky MO vectors to disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_mo_transp', 'W') + write(iunit) cholesky_mo_num + write(iunit) cholesky_mo_transp + close(iunit) + call ezfio_set_mo_two_e_ints_io_mo_cholesky('Read') + endif + endif END_PROVIDER diff --git a/src/mo_two_e_ints/four_idx_novvvv.irp.f b/src/mo_two_e_ints/four_idx_novvvv.irp.f deleted file mode 100644 index 2be09689..00000000 --- a/src/mo_two_e_ints/four_idx_novvvv.irp.f +++ /dev/null @@ -1,189 +0,0 @@ -!BEGIN_PROVIDER [ logical, no_vvvv_integrals ] -! implicit none -! BEGIN_DOC -! If `True`, computes all integrals except for the integrals having 3 or 4 virtual indices -! END_DOC -! -! no_vvvv_integrals = .False. -!END_PROVIDER - -BEGIN_PROVIDER [ double precision, mo_coef_novirt, (ao_num,n_core_inact_act_orb) ] - implicit none - BEGIN_DOC - ! MO coefficients without virtual MOs - END_DOC - integer :: j,jj - - do j=1,n_core_inact_act_orb - jj = list_core_inact_act(j) - mo_coef_novirt(:,j) = mo_coef(:,jj) - enddo - -END_PROVIDER - -subroutine ao_to_mo_novirt(A_ao,LDA_ao,A_mo,LDA_mo) - implicit none - BEGIN_DOC - ! Transform A from the |AO| basis to the |MO| basis excluding virtuals - ! - ! $C^\dagger.A_{ao}.C$ - END_DOC - integer, intent(in) :: LDA_ao,LDA_mo - double precision, intent(in) :: A_ao(LDA_ao,ao_num) - double precision, intent(out) :: A_mo(LDA_mo,n_core_inact_act_orb) - double precision, allocatable :: T(:,:) - - allocate ( T(ao_num,n_core_inact_act_orb) ) - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T - - call dgemm('N','N', ao_num, n_core_inact_act_orb, ao_num, & - 1.d0, A_ao,LDA_ao, & - mo_coef_novirt, size(mo_coef_novirt,1), & - 0.d0, T, size(T,1)) - - call dgemm('T','N', n_core_inact_act_orb, n_core_inact_act_orb, ao_num,& - 1.d0, mo_coef_novirt,size(mo_coef_novirt,1), & - T, ao_num, & - 0.d0, A_mo, size(A_mo,1)) - - deallocate(T) -end - - -subroutine four_idx_novvvv - print*,'********' - print*,'********' - print*,'********' - print*,'WARNING :: Using four_idx_novvvv, and we are not sure that this routine is not bugged ...' - print*,'********' - print*,'********' - print*,'********' - use map_module - implicit none - BEGIN_DOC - ! Retransform MO integrals for next CAS-SCF step - END_DOC - print*,'Using partial transformation' - print*,'It will not transform all integrals with at least 3 indices within the virtuals' - integer :: i,j,k,l,n_integrals - double precision, allocatable :: f(:,:,:), f2(:,:,:), d(:,:), T(:,:,:,:), T2(:,:,:,:) - double precision, external :: get_ao_two_e_integral - integer(key_kind), allocatable :: idx(:) - real(integral_kind), allocatable :: values(:) - - integer :: p,q,r,s - double precision :: c - allocate( T(n_core_inact_act_orb,n_core_inact_act_orb,ao_num,ao_num) , & - T2(n_core_inact_act_orb,n_core_inact_act_orb,ao_num,ao_num) ) - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP SHARED(mo_num,ao_num,T,n_core_inact_act_orb, mo_coef_transp, & - !$OMP mo_integrals_threshold,mo_coef,mo_integrals_map, & - !$OMP list_core_inact_act,T2,ao_integrals_map) & - !$OMP PRIVATE(i,j,k,l,p,q,r,s,idx,values,n_integrals, & - !$OMP f,f2,d,c) - allocate(f(ao_num,ao_num,ao_num), f2(ao_num,ao_num,ao_num), d(mo_num,mo_num), & - idx(mo_num*mo_num), values(mo_num*mo_num) ) - - ! - !$OMP DO - do s=1,ao_num - do r=1,ao_num - do q=1,ao_num - do p=1,r - f (p,q,r) = get_ao_two_e_integral(p,q,r,s,ao_integrals_map) - f (r,q,p) = f(p,q,r) - enddo - enddo - enddo - do r=1,ao_num - do q=1,ao_num - do p=1,ao_num - f2(p,q,r) = f(p,r,q) - enddo - enddo - enddo - ! f (p,q,r) = - ! f2(p,q,r) = - - do r=1,ao_num - call ao_to_mo_novirt(f (1,1,r),size(f ,1),T (1,1,r,s),size(T,1)) - call ao_to_mo_novirt(f2(1,1,r),size(f2,1),T2(1,1,r,s),size(T,1)) - enddo - ! T (i,j,p,q) = - ! T2(i,j,p,q) = - - enddo - !$OMP END DO - - !$OMP DO - do j=1,n_core_inact_act_orb - do i=1,n_core_inact_act_orb - do s=1,ao_num - do r=1,ao_num - f (r,s,1) = T (i,j,r,s) - f2(r,s,1) = T2(i,j,r,s) - enddo - enddo - call ao_to_mo(f ,size(f ,1),d,size(d,1)) - n_integrals = 0 - do l=1,mo_num - do k=1,mo_num - n_integrals+=1 - call two_e_integrals_index(list_core_inact_act(i),list_core_inact_act(j),k,l,idx(n_integrals)) - values(n_integrals) = d(k,l) - enddo - enddo - call map_append(mo_integrals_map, idx, values, n_integrals) - - call ao_to_mo(f2,size(f2,1),d,size(d,1)) - n_integrals = 0 - do l=1,mo_num - do k=1,mo_num - n_integrals+=1 - call two_e_integrals_index(list_core_inact_act(i),k,list_core_inact_act(j),l,idx(n_integrals)) - values(n_integrals) = d(k,l) - enddo - enddo - call map_append(mo_integrals_map, idx, values, n_integrals) - enddo - enddo - !$OMP END DO - deallocate(f,f2,d,idx,values) - - !$OMP END PARALLEL - - deallocate(T,T2) - - - call map_sort(mo_integrals_map) - call map_unique(mo_integrals_map) - call map_shrink(mo_integrals_map,real(mo_integrals_threshold,integral_kind)) - -end - -subroutine four_idx_novvvv2 - use bitmasks - implicit none - integer :: i - integer(bit_kind) :: mask_ijkl(N_int,4) - - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = full_ijkl_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = full_ijkl_bitmask_4(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - -end diff --git a/src/mo_two_e_ints/integrals_3_index.irp.f b/src/mo_two_e_ints/integrals_3_index.irp.f index eb05da84..c0dab506 100644 --- a/src/mo_two_e_ints/integrals_3_index.irp.f +++ b/src/mo_two_e_ints/integrals_3_index.irp.f @@ -10,7 +10,7 @@ double precision :: get_two_e_integral double precision :: integral - if (do_ao_cholesky) then + if (do_mo_cholesky) then double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:) allocate(buffer_jj(cholesky_mo_num,mo_num), buffer(mo_num,mo_num,mo_num)) diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index ada256a2..168c34b4 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -32,60 +32,96 @@ subroutine insert_into_mo_integrals_map(n_integrals, & call map_update(mo_integrals_map, buffer_i, buffer_values, n_integrals, thr) end - BEGIN_PROVIDER [ integer*4, mo_integrals_cache_min ] -&BEGIN_PROVIDER [ integer*4, mo_integrals_cache_max ] -&BEGIN_PROVIDER [ integer*8, mo_integrals_cache_min_8 ] -&BEGIN_PROVIDER [ integer*8, mo_integrals_cache_max_8 ] + BEGIN_PROVIDER [ integer, mo_integrals_cache_min ] +&BEGIN_PROVIDER [ integer, mo_integrals_cache_max ] +&BEGIN_PROVIDER [ integer, mo_integrals_cache_size ] implicit none BEGIN_DOC ! Min and max values of the MOs for which the integrals are in the cache END_DOC - mo_integrals_cache_min_8 = max(1_8,elec_alpha_num - 63_8) - mo_integrals_cache_max_8 = min(int(mo_num,8),mo_integrals_cache_min_8+127_8) - mo_integrals_cache_min = max(1,elec_alpha_num - 63) - mo_integrals_cache_max = min(mo_num,mo_integrals_cache_min+127) + + mo_integrals_cache_size = 2_8**mo_integrals_cache_shift + + mo_integrals_cache_min = max(1,elec_alpha_num - (mo_integrals_cache_size/2 - 1) ) + mo_integrals_cache_max = min(mo_num, mo_integrals_cache_min + mo_integrals_cache_size - 1) + print *, 'MO integrals cache: (', mo_integrals_cache_min, ', ', mo_integrals_cache_max, ')' END_PROVIDER -BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:128_8*128_8*128_8*128_8) ] +BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:(1_8*mo_integrals_cache_size)**4) ] implicit none BEGIN_DOC ! Cache of MO integrals for fast access END_DOC PROVIDE mo_two_e_integrals_in_map - integer*8 :: i,j,k,l - integer*4 :: i4,j4,k4,l4 + integer :: i,j,k,l integer*8 :: ii integer(key_kind) :: idx real(integral_kind) :: integral FREE ao_integrals_cache - !$OMP PARALLEL DO PRIVATE (i,j,k,l,i4,j4,k4,l4,idx,ii,integral) - do l=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - l4 = int(l,4) - do k=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - k4 = int(k,4) - do j=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - j4 = int(j,4) - do i=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - i4 = int(i,4) - !DIR$ FORCEINLINE - call two_e_integrals_index(i4,j4,k4,l4,idx) - !DIR$ FORCEINLINE - call map_get(mo_integrals_map,idx,integral) - ii = l-mo_integrals_cache_min_8 - ii = ior( shiftl(ii,7), k-mo_integrals_cache_min_8) - ii = ior( shiftl(ii,7), j-mo_integrals_cache_min_8) - ii = ior( shiftl(ii,7), i-mo_integrals_cache_min_8) - mo_integrals_cache(ii) = integral + if (do_mo_cholesky) then + + call set_multiple_levels_omp(.False.) + !$OMP PARALLEL DO PRIVATE (k,l,ii) + do l=mo_integrals_cache_min,mo_integrals_cache_max + do k=mo_integrals_cache_min,mo_integrals_cache_max + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) + ii = shiftl(ii,mo_integrals_cache_shift) + ii = shiftl(ii,mo_integrals_cache_shift) + call dgemm('T','N', mo_integrals_cache_max-mo_integrals_cache_min+1, & + mo_integrals_cache_max-mo_integrals_cache_min+1, & + cholesky_mo_num, 1.d0, & + cholesky_mo_transp(1,mo_integrals_cache_min,k), cholesky_mo_num, & + cholesky_mo_transp(1,mo_integrals_cache_min,l), cholesky_mo_num, 0.d0, & + mo_integrals_cache(ii), mo_integrals_cache_size) + enddo + enddo + !$OMP END PARALLEL DO + + else + !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral) + do l=mo_integrals_cache_min,mo_integrals_cache_max + do k=mo_integrals_cache_min,mo_integrals_cache_max + do j=mo_integrals_cache_min,mo_integrals_cache_max + do i=mo_integrals_cache_min,mo_integrals_cache_max + !DIR$ FORCEINLINE + call two_e_integrals_index(i,j,k,l,idx) + !DIR$ FORCEINLINE + call map_get(mo_integrals_map,idx,integral) + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(j-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(i-mo_integrals_cache_min,8)) + mo_integrals_cache(ii) = integral + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + endif END_PROVIDER +double precision function get_two_e_integral_cache(i,j,k,l) + use map_module + implicit none + BEGIN_DOC + ! Returns one integral in the MO basis taken from the cache + END_DOC + integer, intent(in) :: i,j,k,l + integer*8 :: ii + + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(j-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(i-mo_integrals_cache_min,8)) + get_two_e_integral_cache = mo_integrals_cache(ii) + +end + + double precision function get_two_e_integral(i,j,k,l,map) use map_module implicit none @@ -95,10 +131,11 @@ double precision function get_two_e_integral(i,j,k,l,map) integer, intent(in) :: i,j,k,l integer(key_kind) :: idx integer :: ii - integer*8 :: ii_8 type(map_type), intent(inout) :: map real(integral_kind) :: tmp - PROVIDE mo_two_e_integrals_in_map mo_integrals_cache + + PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky + if (use_banned_excitation) then if (banned_excitation(i,k)) then get_two_e_integral = 0.d0 @@ -109,26 +146,181 @@ double precision function get_two_e_integral(i,j,k,l,map) return endif endif + + ii = l-mo_integrals_cache_min ii = ior(ii, k-mo_integrals_cache_min) ii = ior(ii, j-mo_integrals_cache_min) ii = ior(ii, i-mo_integrals_cache_min) - if (iand(ii, -128) /= 0) then - !DIR$ FORCEINLINE - call two_e_integrals_index(i,j,k,l,idx) - !DIR$ FORCEINLINE - call map_get(map,idx,tmp) - get_two_e_integral = dble(tmp) + + if (iand(ii, -mo_integrals_cache_size) == 0) then + + double precision, external :: get_two_e_integral_cache + get_two_e_integral = get_two_e_integral_cache(i,j,k,l) + else - ii_8 = int(l,8)-mo_integrals_cache_min_8 - ii_8 = ior( shiftl(ii_8,7), int(k,8)-mo_integrals_cache_min_8) - ii_8 = ior( shiftl(ii_8,7), int(j,8)-mo_integrals_cache_min_8) - ii_8 = ior( shiftl(ii_8,7), int(i,8)-mo_integrals_cache_min_8) - get_two_e_integral = mo_integrals_cache(ii_8) + + ! Integral is not in the cache + + if (do_mo_cholesky) then + + double precision, external :: ddot + get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1) +! double precision, external :: get_from_mo_cholesky_cache +! get_two_e_integral = get_from_mo_cholesky_cache(i,j,k,l,.False.) + + else + + !DIR$ FORCEINLINE + call two_e_integrals_index(i,j,k,l,idx) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + get_two_e_integral = dble(tmp) + endif + endif end +subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) + use map_module + implicit none + BEGIN_DOC + ! Returns multiple integrals in the MO basis, all + ! i for j,k,l fixed. + END_DOC + integer, intent(in) :: j,k,l, sze + double precision, intent(out) :: out_val(sze) + type(map_type), intent(inout) :: map + integer :: i + + integer :: ii + real(integral_kind) :: tmp + integer(key_kind) :: i1, idx + integer(key_kind) :: p,q,r,s,i2 + PROVIDE mo_two_e_integrals_in_map mo_integrals_cache + + if (banned_excitation(j,l)) then + out_val(1:sze) = 0.d0 + return + endif +! + ii = l-mo_integrals_cache_min + ii = ior(ii, k-mo_integrals_cache_min) + ii = ior(ii, j-mo_integrals_cache_min) + + if (iand(ii, -mo_integrals_cache_size) == 0) then + ! Some integrals are in the cache + + if (mo_integrals_cache_min > 1) then + + if (do_mo_cholesky) then + + !TODO: here + call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val, 1) + + else + + q = min(j,l) + s = max(j,l) + q = q+shiftr(s*s-s,1) + + do i=1,mo_integrals_cache_min-1 + if (banned_excitation(i,k)) then + out_val(i) = 0.d0 + cycle + endif + p = min(i,k) + r = max(i,k) + p = p+shiftr(r*r-r,1) + i1 = min(p,q) + i2 = max(p,q) + idx = i1+shiftr(i2*i2-i2,1) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + out_val(i) = dble(tmp) + enddo + + endif + + endif + + call get_mo_two_e_integrals_cache(j,k,l,sze,out_val) + + if (mo_integrals_cache_max < mo_num) then + + if (do_mo_cholesky) then + + !TODO: here + call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & + cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val(mo_integrals_cache_max+1), 1) + + else + + q = min(j,l) + s = max(j,l) + q = q+shiftr(s*s-s,1) + + do i=mo_integrals_cache_max+1,mo_num + if (banned_excitation(i,k)) then + out_val(i) = 0.d0 + cycle + endif + p = min(i,k) + r = max(i,k) + p = p+shiftr(r*r-r,1) + i1 = min(p,q) + i2 = max(p,q) + idx = i1+shiftr(i2*i2-i2,1) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + out_val(i) = dble(tmp) + enddo + + endif + + endif + + else + + if (do_mo_cholesky) then + + !TODO: here + call dgemv('T', cholesky_mo_num, mo_num, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val, 1) + + else + + q = min(j,l) + s = max(j,l) + q = q+shiftr(s*s-s,1) + + do i=1,sze + if (banned_excitation(i,k)) cycle + p = min(i,k) + r = max(i,k) + p = p+shiftr(r*r-r,1) + i1 = min(p,q) + i2 = max(p,q) + idx = i1+shiftr(i2*i2-i2,1) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + out_val(i) = dble(tmp) + enddo + + endif + + endif + +end + double precision function mo_two_e_integral(i,j,k,l) implicit none BEGIN_DOC @@ -142,68 +334,25 @@ double precision function mo_two_e_integral(i,j,k,l) return end -subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) + +subroutine get_mo_two_e_integrals_cache(j,k,l,sze,out_val) use map_module implicit none BEGIN_DOC ! Returns multiple integrals in the MO basis, all - ! i for j,k,l fixed. + ! i for j,k,l fixed, all integrals from the cache END_DOC integer, intent(in) :: j,k,l, sze double precision, intent(out) :: out_val(sze) - type(map_type), intent(inout) :: map - integer :: i - double precision, external :: get_two_e_integral + integer*8 :: ii - integer :: ii, ii0 - integer*8 :: ii_8, ii0_8 - real(integral_kind) :: tmp - integer(key_kind) :: i1, idx - integer(key_kind) :: p,q,r,s,i2 - PROVIDE mo_two_e_integrals_in_map mo_integrals_cache + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii, mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii, mo_integrals_cache_shift), int(j-mo_integrals_cache_min,8)) + ii = shiftl(ii, mo_integrals_cache_shift) + out_val(mo_integrals_cache_min:mo_integrals_cache_max) = & + mo_integrals_cache(ii:ii+int(mo_integrals_cache_max-mo_integrals_cache_min,8)) -!DEBUG -! do i=1,sze -! out_val(i) = get_two_e_integral(i,j,k,l,map) -! enddo -! return -!DEBUG - - out_val(1:sze) = 0.d0 - if (banned_excitation(j,l)) then - return - endif - - ii0 = l-mo_integrals_cache_min - ii0 = ior(ii0, k-mo_integrals_cache_min) - ii0 = ior(ii0, j-mo_integrals_cache_min) - - ii0_8 = int(l,8)-mo_integrals_cache_min_8 - ii0_8 = ior( shiftl(ii0_8,7), int(k,8)-mo_integrals_cache_min_8) - ii0_8 = ior( shiftl(ii0_8,7), int(j,8)-mo_integrals_cache_min_8) - - q = min(j,l) - s = max(j,l) - q = q+shiftr(s*s-s,1) - - do i=1,sze - if (banned_excitation(i,k)) cycle - ii = ior(ii0, i-mo_integrals_cache_min) - if (iand(ii, -128) == 0) then - ii_8 = ior( shiftl(ii0_8,7), int(i,8)-mo_integrals_cache_min_8) - out_val(i) = mo_integrals_cache(ii_8) - else - p = min(i,k) - r = max(i,k) - p = p+shiftr(r*r-r,1) - i1 = min(p,q) - i2 = max(p,q) - idx = i1+shiftr(i2*i2-i2,1) - !DIR$ FORCEINLINE - call map_get(map,idx,tmp) - out_val(i) = dble(tmp) - endif - enddo end subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) @@ -220,9 +369,32 @@ subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) integer :: j real(integral_kind), allocatable :: tmp_val(:) - do j=1,sze - call get_mo_two_e_integrals(j,k,l,sze,out_array(1,j),map) - enddo + if ( (mo_integrals_cache_min>1).or.(mo_integrals_cache_max1).or.(mo_integrals_cache_max1).or.(mo_integrals_cache_max1).or.(mo_integrals_cache_max MO integrals transformation' - print *, '---------------------------------' - print *, '' - call wall_time(wall_1) call cpu_time(cpu_1) - if(no_vvvv_integrals)then - call four_idx_novvvv_old + if (do_mo_cholesky) then + PROVIDE cholesky_mo_transp else if (do_ao_cholesky) then call add_integrals_to_map_cholesky + else if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then + call four_idx_dgemm else - if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then - call four_idx_dgemm - else - call add_integrals_to_map(full_ijkl_bitmask_4) - endif + call add_integrals_to_map(full_ijkl_bitmask_4) endif endif @@ -92,8 +99,15 @@ subroutine four_idx_dgemm double precision, allocatable :: a1(:,:,:,:) double precision, allocatable :: a2(:,:,:,:) + PROVIDE ao_two_e_integrals_in_map mo_coef + + print *, '' + print *, 'DGEMM-based AO->MO Transformation' + print *, '---------------------------------' + print *, '' + if (ao_num > 1289) then - print *, irp_here, ': Integer overflow in ao_num**3' + print *, irp_here, ': Integer overflow in ao_num**3. Set do_ao_cholesky=.True.' endif allocate (a1(ao_num,ao_num,ao_num,ao_num)) @@ -213,6 +227,12 @@ subroutine add_integrals_to_map(mask_ijkl) PROVIDE ao_two_e_integrals_in_map mo_coef + + print *, '' + print *, 'Sparse AO->MO Transformation' + print *, '----------------------------' + print *, '' + !Get list of MOs for i,j,k and l !------------------------------- @@ -453,6 +473,9 @@ subroutine add_integrals_to_map(mask_ijkl) end + + + subroutine add_integrals_to_map_cholesky use bitmasks implicit none @@ -469,6 +492,7 @@ subroutine add_integrals_to_map_cholesky integer(key_kind) , allocatable :: buffer_i(:) real(integral_kind), allocatable :: buffer_value(:) + PROVIDE cholesky_mo_transp call set_multiple_levels_omp(.False.) !$OMP PARALLEL DEFAULT(SHARED) & @@ -515,837 +539,7 @@ subroutine add_integrals_to_map_cholesky end -subroutine add_integrals_to_map_three_indices(mask_ijk) - use bitmasks - implicit none - BEGIN_DOC - ! Adds integrals to the MO map according to some bitmask - END_DOC - - integer(bit_kind), intent(in) :: mask_ijk(N_int,3) - - integer :: i,j,k,l - integer :: i0,j0,k0,l0 - double precision :: c, cpu_1, cpu_2, wall_1, wall_2, wall_0 - - integer, allocatable :: list_ijkl(:,:) - integer :: n_i, n_j, n_k - integer :: m - integer, allocatable :: two_e_tmp_0_idx(:) - real(integral_kind), allocatable :: two_e_tmp_0(:,:) - double precision, allocatable :: two_e_tmp_1(:) - double precision, allocatable :: two_e_tmp_2(:,:) - double precision, allocatable :: two_e_tmp_3(:,:,:) - !DIR$ ATTRIBUTES ALIGN : 64 :: two_e_tmp_1, two_e_tmp_2, two_e_tmp_3 - - integer :: n_integrals - integer :: size_buffer - integer(key_kind),allocatable :: buffer_i(:) - real(integral_kind),allocatable :: buffer_value(:) - double precision :: map_mb - - integer :: i1,j1,k1,l1, ii1, kmax, thread_num - integer :: i2,i3,i4 - double precision,parameter :: thr_coef = 1.d-10 - - PROVIDE ao_two_e_integrals_in_map mo_coef - - !Get list of MOs for i,j,k and l - !------------------------------- - - allocate(list_ijkl(mo_num,4)) - call bitstring_to_list( mask_ijk(1,1), list_ijkl(1,1), n_i, N_int ) - call bitstring_to_list( mask_ijk(1,2), list_ijkl(1,2), n_j, N_int ) - call bitstring_to_list( mask_ijk(1,3), list_ijkl(1,3), n_k, N_int ) - j = 0 - do i = 1, N_int - j += popcnt(mask_ijk(i,1)) - enddo - if(j==0)then - return - endif - - j = 0 - do i = 1, N_int - j += popcnt(mask_ijk(i,2)) - enddo - if(j==0)then - return - endif - - j = 0 - do i = 1, N_int - j += popcnt(mask_ijk(i,3)) - enddo - if(j==0)then - return - endif - - if (ao_num > 1289) then - print *, irp_here, ': Integer overflow in ao_num**3' - endif - size_buffer = min(ao_num*ao_num*ao_num,16000000) - print*, 'Providing the molecular integrals ' - print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& - ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' - - call wall_time(wall_1) - call cpu_time(cpu_1) - !$OMP PARALLEL PRIVATE(m,l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, & - !$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,& - !$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, & - !$OMP wall_0,thread_num) & - !$OMP DEFAULT(NONE) & - !$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k, & - !$OMP mo_coef_transp, & - !$OMP mo_coef_transp_is_built, list_ijkl, & - !$OMP mo_coef_is_built, wall_1, & - !$OMP mo_coef,mo_integrals_threshold,mo_integrals_map) - n_integrals = 0 - wall_0 = wall_1 - allocate(two_e_tmp_3(mo_num, n_j, n_k), & - two_e_tmp_1(mo_num), & - two_e_tmp_0(ao_num,ao_num), & - two_e_tmp_0_idx(ao_num), & - two_e_tmp_2(mo_num, n_j), & - buffer_i(size_buffer), & - buffer_value(size_buffer) ) - - thread_num = 0 - !$ thread_num = omp_get_thread_num() - !$OMP DO SCHEDULE(guided) - do l1 = 1,ao_num - two_e_tmp_3 = 0.d0 - do k1 = 1,ao_num - two_e_tmp_2 = 0.d0 - do j1 = 1,ao_num - call get_ao_two_e_integrals(j1,k1,l1,ao_num,two_e_tmp_0(1,j1)) - enddo - do j1 = 1,ao_num - kmax = 0 - do i1 = 1,ao_num - c = two_e_tmp_0(i1,j1) - if (c == 0.d0) then - cycle - endif - kmax += 1 - two_e_tmp_0(kmax,j1) = c - two_e_tmp_0_idx(kmax) = i1 - enddo - - if (kmax==0) then - cycle - endif - - two_e_tmp_1 = 0.d0 - ii1=1 - do ii1 = 1,kmax-4,4 - i1 = two_e_tmp_0_idx(ii1) - i2 = two_e_tmp_0_idx(ii1+1) - i3 = two_e_tmp_0_idx(ii1+2) - i4 = two_e_tmp_0_idx(ii1+3) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + & - mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) + & - mo_coef_transp(i,i2) * two_e_tmp_0(ii1+1,j1) + & - mo_coef_transp(i,i3) * two_e_tmp_0(ii1+2,j1) + & - mo_coef_transp(i,i4) * two_e_tmp_0(ii1+3,j1) - enddo ! i - enddo ! ii1 - - i2 = ii1 - do ii1 = i2,kmax - i1 = two_e_tmp_0_idx(ii1) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) - enddo ! i - enddo ! ii1 - c = 0.d0 - - do i = list_ijkl(1,1), list_ijkl(n_i,1) - c = max(c,abs(two_e_tmp_1(i))) - if (c>mo_integrals_threshold) exit - enddo - if ( c < mo_integrals_threshold ) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - c = mo_coef_transp(j,j1) - if (abs(c) < thr_coef) then - cycle - endif - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_2(i,j0) = two_e_tmp_2(i,j0) + c * two_e_tmp_1(i) - enddo ! i - enddo ! j - enddo !j1 - if ( maxval(abs(two_e_tmp_2)) < mo_integrals_threshold ) then - cycle - endif - - - do k0 = 1, n_k - k = list_ijkl(k0,3) - c = mo_coef_transp(k,k1) - if (abs(c) < thr_coef) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - do i = list_ijkl(1,1), k - two_e_tmp_3(i,j0,k0) = two_e_tmp_3(i,j0,k0) + c* two_e_tmp_2(i,j0) - enddo!i - enddo !j - - enddo !k - enddo !k1 - - - - do l0 = 1,n_j - l = list_ijkl(l0,2) - c = mo_coef_transp(l,l1) - if (abs(c) < thr_coef) then - cycle - endif - do k0 = 1, n_k - k = list_ijkl(k0,3) - i1 = shiftr((k*k-k),1) - two_e_tmp_1 = 0.d0 - j0 = l0 - j = list_ijkl(j0,2) - do i0 = 1, n_i - i = list_ijkl(i0,1) - if (i>k) then - exit - endif - two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0) - enddo - - do i0 = 1, n_i - i = list_ijkl(i0,1) - if (i>k) then !min(k,j1-i1) - exit - endif - if (abs(two_e_tmp_1(i)) < mo_integrals_threshold) then - cycle - endif - n_integrals += 1 - buffer_value(n_integrals) = two_e_tmp_1(i) - if(i==k .and. j==l .and. i.ne.j)then - buffer_value(n_integrals) = buffer_value(n_integrals) *0.5d0 - endif - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - n_integrals = 0 - endif - enddo - enddo - enddo - - do l0 = 1,n_j - l = list_ijkl(l0,2) - c = mo_coef_transp(l,l1) - if (abs(c) < thr_coef) then - cycle - endif - do k0 = 1, n_k - k = list_ijkl(k0,3) - i1 = shiftr((k*k-k),1) - two_e_tmp_1 = 0.d0 - j0 = k0 - j = list_ijkl(k0,2) - i0 = l0 - i = list_ijkl(i0,2) - if (k==l) then - cycle - endif - two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0) - - n_integrals += 1 - buffer_value(n_integrals) = two_e_tmp_1(i) - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - n_integrals = 0 - endif - enddo - enddo - - call wall_time(wall_2) - if (thread_num == 0) then - if (wall_2 - wall_0 > 1.d0) then - wall_0 = wall_2 - print*, 100.*float(l1)/float(ao_num), '% in ', & - wall_2-wall_1, 's', map_mb(mo_integrals_map) ,'MB' - endif - endif - enddo - !$OMP END DO NOWAIT - deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3) - - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - deallocate(buffer_i, buffer_value) - !$OMP END PARALLEL - call map_merge(mo_integrals_map) - - call wall_time(wall_2) - call cpu_time(cpu_2) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - - deallocate(list_ijkl) - - - print*,'Molecular integrals provided:' - print*,' Size of MO map ', map_mb(mo_integrals_map) ,'MB' - print*,' Number of MO integrals: ', mo_map_size - print*,' cpu time :',cpu_2 - cpu_1, 's' - print*,' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')' - -end - - -subroutine add_integrals_to_map_no_exit_34(mask_ijkl) - use bitmasks - implicit none - - BEGIN_DOC - ! Adds integrals to tha MO map according to some bitmask - END_DOC - - integer(bit_kind), intent(in) :: mask_ijkl(N_int,4) - - integer :: i,j,k,l - integer :: i0,j0,k0,l0 - double precision :: c, cpu_1, cpu_2, wall_1, wall_2, wall_0 - - integer, allocatable :: list_ijkl(:,:) - integer :: n_i, n_j, n_k, n_l - integer, allocatable :: two_e_tmp_0_idx(:) - real(integral_kind), allocatable :: two_e_tmp_0(:,:) - double precision, allocatable :: two_e_tmp_1(:) - double precision, allocatable :: two_e_tmp_2(:,:) - double precision, allocatable :: two_e_tmp_3(:,:,:) - !DIR$ ATTRIBUTES ALIGN : 64 :: two_e_tmp_1, two_e_tmp_2, two_e_tmp_3 - - integer :: n_integrals - integer :: size_buffer - integer(key_kind),allocatable :: buffer_i(:) - real(integral_kind),allocatable :: buffer_value(:) - double precision :: map_mb - - integer :: i1,j1,k1,l1, ii1, kmax, thread_num - integer :: i2,i3,i4 - double precision,parameter :: thr_coef = 1.d-10 - - PROVIDE ao_two_e_integrals_in_map mo_coef - - !Get list of MOs for i,j,k and l - !------------------------------- - - allocate(list_ijkl(mo_num,4)) - call bitstring_to_list( mask_ijkl(1,1), list_ijkl(1,1), n_i, N_int ) - call bitstring_to_list( mask_ijkl(1,2), list_ijkl(1,2), n_j, N_int ) - call bitstring_to_list( mask_ijkl(1,3), list_ijkl(1,3), n_k, N_int ) - call bitstring_to_list( mask_ijkl(1,4), list_ijkl(1,4), n_l, N_int ) - - if (ao_num > 1289) then - print *, irp_here, ': Integer overflow in ao_num**3' - endif - size_buffer = min(ao_num*ao_num*ao_num,16000000) - print*, 'Providing the molecular integrals ' - print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& - ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' - - call wall_time(wall_1) - call cpu_time(cpu_1) - - !$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, & - !$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,& - !$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, & - !$OMP wall_0,thread_num) & - !$OMP DEFAULT(NONE) & - !$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,n_l, & - !$OMP mo_coef_transp, & - !$OMP mo_coef_transp_is_built, list_ijkl, & - !$OMP mo_coef_is_built, wall_1, & - !$OMP mo_coef,mo_integrals_threshold,mo_integrals_map) - n_integrals = 0 - wall_0 = wall_1 - allocate(two_e_tmp_3(mo_num, n_j, n_k), & - two_e_tmp_1(mo_num), & - two_e_tmp_0(ao_num,ao_num), & - two_e_tmp_0_idx(ao_num), & - two_e_tmp_2(mo_num, n_j), & - buffer_i(size_buffer), & - buffer_value(size_buffer) ) - - thread_num = 0 - !$ thread_num = omp_get_thread_num() - !$OMP DO SCHEDULE(guided) - do l1 = 1,ao_num - !IRP_IF COARRAY - ! if (mod(l1-this_image(),num_images()) /= 0 ) then - ! cycle - ! endif - !IRP_ENDIF - two_e_tmp_3 = 0.d0 - do k1 = 1,ao_num - two_e_tmp_2 = 0.d0 - do j1 = 1,ao_num - call get_ao_two_e_integrals(j1,k1,l1,ao_num,two_e_tmp_0(1,j1)) - enddo - do j1 = 1,ao_num - kmax = 0 - do i1 = 1,ao_num - c = two_e_tmp_0(i1,j1) - if (c == 0.d0) then - cycle - endif - kmax += 1 - two_e_tmp_0(kmax,j1) = c - two_e_tmp_0_idx(kmax) = i1 - enddo - - if (kmax==0) then - cycle - endif - - two_e_tmp_1 = 0.d0 - ii1=1 - do ii1 = 1,kmax-4,4 - i1 = two_e_tmp_0_idx(ii1) - i2 = two_e_tmp_0_idx(ii1+1) - i3 = two_e_tmp_0_idx(ii1+2) - i4 = two_e_tmp_0_idx(ii1+3) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + & - mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) + & - mo_coef_transp(i,i2) * two_e_tmp_0(ii1+1,j1) + & - mo_coef_transp(i,i3) * two_e_tmp_0(ii1+2,j1) + & - mo_coef_transp(i,i4) * two_e_tmp_0(ii1+3,j1) - enddo ! i - enddo ! ii1 - - i2 = ii1 - do ii1 = i2,kmax - i1 = two_e_tmp_0_idx(ii1) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) - enddo ! i - enddo ! ii1 - c = 0.d0 - - do i = list_ijkl(1,1), list_ijkl(n_i,1) - c = max(c,abs(two_e_tmp_1(i))) - if (c>mo_integrals_threshold) exit - enddo - if ( c < mo_integrals_threshold ) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - c = mo_coef_transp(j,j1) - if (abs(c) < thr_coef) then - cycle - endif - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_2(i,j0) = two_e_tmp_2(i,j0) + c * two_e_tmp_1(i) - enddo ! i - enddo ! j - enddo !j1 - if ( maxval(abs(two_e_tmp_2)) < mo_integrals_threshold ) then - cycle - endif - - - do k0 = 1, n_k - k = list_ijkl(k0,3) - c = mo_coef_transp(k,k1) - if (abs(c) < thr_coef) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - do i = list_ijkl(1,1), k - two_e_tmp_3(i,j0,k0) = two_e_tmp_3(i,j0,k0) + c* two_e_tmp_2(i,j0) - enddo!i - enddo !j - - enddo !k - enddo !k1 - - - - do l0 = 1,n_l - l = list_ijkl(l0,4) - c = mo_coef_transp(l,l1) - if (abs(c) < thr_coef) then - cycle - endif - j1 = shiftr((l*l-l),1) - do j0 = 1, n_j - j = list_ijkl(j0,2) - if (j > l) then - exit - endif - j1 += 1 - do k0 = 1, n_k - k = list_ijkl(k0,3) - i1 = shiftr((k*k-k),1) - two_e_tmp_1 = 0.d0 - do i0 = 1, n_i - i = list_ijkl(i0,1) - if (i>k) then - exit - endif - two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0) - enddo - - do i0 = 1, n_i - i = list_ijkl(i0,1) - if(i> k)then - exit - endif - - if (abs(two_e_tmp_1(i)) < mo_integrals_threshold) then - cycle - endif - n_integrals += 1 - buffer_value(n_integrals) = two_e_tmp_1(i) - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - n_integrals = 0 - endif - enddo - enddo - enddo - enddo - - call wall_time(wall_2) - if (thread_num == 0) then - if (wall_2 - wall_0 > 1.d0) then - wall_0 = wall_2 - print*, 100.*float(l1)/float(ao_num), '% in ', & - wall_2-wall_1, 's', map_mb(mo_integrals_map) ,'MB' - endif - endif - enddo - !$OMP END DO NOWAIT - deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3) - - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - deallocate(buffer_i, buffer_value) - !$OMP END PARALLEL - !IRP_IF COARRAY - ! print*, 'Communicating the map' - ! call communicate_mo_integrals() - !IRP_ENDIF - call map_merge(mo_integrals_map) - - call wall_time(wall_2) - call cpu_time(cpu_2) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - - deallocate(list_ijkl) - - - print*,'Molecular integrals provided:' - print*,' Size of MO map ', map_mb(mo_integrals_map) ,'MB' - print*,' Number of MO integrals: ', mo_map_size - print*,' cpu time :',cpu_2 - cpu_1, 's' - print*,' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')' - - -end - - - - BEGIN_PROVIDER [ double precision, mo_two_e_integral_jj_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_jj_exchange_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_jj_anti_from_ao, (mo_num,mo_num) ] - implicit none - BEGIN_DOC - ! mo_two_e_integral_jj_from_ao(i,j) = J_ij - ! mo_two_e_integrals_jj_exchange_from_ao(i,j) = J_ij - ! mo_two_e_integrals_jj_anti_from_ao(i,j) = J_ij - K_ij - END_DOC - - integer :: i,j,p,q,r,s - double precision :: c - real(integral_kind) :: integral - integer :: n, pp - real(integral_kind), allocatable :: int_value(:) - integer, allocatable :: int_idx(:) - - double precision, allocatable :: iqrs(:,:), iqsr(:,:), iqis(:), iqri(:) - - if (.not.do_direct_integrals) then - PROVIDE ao_two_e_integrals_in_map mo_coef - endif - - mo_two_e_integral_jj_from_ao = 0.d0 - mo_two_e_integrals_jj_exchange_from_ao = 0.d0 - - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: iqrs, iqsr - - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE (i,j,p,q,r,s,integral,c,n,pp,int_value,int_idx, & - !$OMP iqrs, iqsr,iqri,iqis) & - !$OMP SHARED(mo_num,mo_coef_transp,ao_num, & - !$OMP ao_integrals_threshold,do_direct_integrals) & - !$OMP REDUCTION(+:mo_two_e_integral_jj_from_ao,mo_two_e_integrals_jj_exchange_from_ao) - - allocate( int_value(ao_num), int_idx(ao_num), & - iqrs(mo_num,ao_num), iqis(mo_num), iqri(mo_num), & - iqsr(mo_num,ao_num) ) - - !$OMP DO SCHEDULE (guided) - do s=1,ao_num - do q=1,ao_num - - do j=1,ao_num - do i=1,mo_num - iqrs(i,j) = 0.d0 - iqsr(i,j) = 0.d0 - enddo - enddo - - if (do_direct_integrals) then - double precision :: ao_two_e_integral - do r=1,ao_num - call compute_ao_two_e_integrals(q,r,s,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call compute_ao_two_e_integrals(q,s,r,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - - else - - do r=1,ao_num - call get_ao_two_e_integrals_non_zero(q,r,s,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call get_ao_two_e_integrals_non_zero(q,s,r,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - endif - iqis = 0.d0 - iqri = 0.d0 - do r=1,ao_num - do i=1,mo_num - iqis(i) += mo_coef_transp(i,r) * iqrs(i,r) - iqri(i) += mo_coef_transp(i,r) * iqsr(i,r) - enddo - enddo - do i=1,mo_num - do j=1,mo_num - c = mo_coef_transp(j,q)*mo_coef_transp(j,s) - mo_two_e_integral_jj_from_ao(j,i) += c * iqis(i) - mo_two_e_integrals_jj_exchange_from_ao(j,i) += c * iqri(i) - enddo - enddo - - enddo - enddo - !$OMP END DO NOWAIT - deallocate(iqrs,iqsr,int_value,int_idx) - !$OMP END PARALLEL - - mo_two_e_integrals_jj_anti_from_ao = mo_two_e_integral_jj_from_ao - mo_two_e_integrals_jj_exchange_from_ao - - -END_PROVIDER - - BEGIN_PROVIDER [ double precision, mo_two_e_integrals_vv_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_vv_exchange_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_vv_anti_from_ao, (mo_num,mo_num) ] - implicit none - BEGIN_DOC - ! mo_two_e_integrals_vv_from_ao(i,j) = J_ij - ! mo_two_e_integrals_vv_exchange_from_ao(i,j) = J_ij - ! mo_two_e_integrals_vv_anti_from_ao(i,j) = J_ij - K_ij - ! but only for the virtual orbitals - END_DOC - - integer :: i,j,p,q,r,s - integer :: i0,j0 - double precision :: c - real(integral_kind) :: integral - integer :: n, pp - real(integral_kind), allocatable :: int_value(:) - integer, allocatable :: int_idx(:) - - double precision, allocatable :: iqrs(:,:), iqsr(:,:), iqis(:), iqri(:) - - if (.not.do_direct_integrals) then - PROVIDE ao_two_e_integrals_in_map mo_coef - endif - - mo_two_e_integrals_vv_from_ao = 0.d0 - mo_two_e_integrals_vv_exchange_from_ao = 0.d0 - - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: iqrs, iqsr - - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE (i0,j0,i,j,p,q,r,s,integral,c,n,pp,int_value,int_idx, & - !$OMP iqrs, iqsr,iqri,iqis) & - !$OMP SHARED(n_virt_orb,mo_num,list_virt,mo_coef_transp,ao_num, & - !$OMP ao_integrals_threshold,do_direct_integrals) & - !$OMP REDUCTION(+:mo_two_e_integrals_vv_from_ao,mo_two_e_integrals_vv_exchange_from_ao) - - allocate( int_value(ao_num), int_idx(ao_num), & - iqrs(mo_num,ao_num), iqis(mo_num), iqri(mo_num),& - iqsr(mo_num,ao_num) ) - - !$OMP DO SCHEDULE (guided) - do s=1,ao_num - do q=1,ao_num - - do j=1,ao_num - do i0=1,n_virt_orb - i = list_virt(i0) - iqrs(i,j) = 0.d0 - iqsr(i,j) = 0.d0 - enddo - enddo - - if (do_direct_integrals) then - double precision :: ao_two_e_integral - do r=1,ao_num - call compute_ao_two_e_integrals(q,r,s,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i = list_virt(i0) - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call compute_ao_two_e_integrals(q,s,r,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i =list_virt(i0) - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - - else - - do r=1,ao_num - call get_ao_two_e_integrals_non_zero(q,r,s,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i =list_virt(i0) - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call get_ao_two_e_integrals_non_zero(q,s,r,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i = list_virt(i0) - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - endif - iqis = 0.d0 - iqri = 0.d0 - do r=1,ao_num - do i0=1,n_virt_orb - i = list_virt(i0) - iqis(i) += mo_coef_transp(i,r) * iqrs(i,r) - iqri(i) += mo_coef_transp(i,r) * iqsr(i,r) - enddo - enddo - do i0=1,n_virt_orb - i= list_virt(i0) - do j0=1,n_virt_orb - j = list_virt(j0) - c = mo_coef_transp(j,q)*mo_coef_transp(j,s) - mo_two_e_integrals_vv_from_ao(j,i) += c * iqis(i) - mo_two_e_integrals_vv_exchange_from_ao(j,i) += c * iqri(i) - enddo - enddo - - enddo - enddo - !$OMP END DO NOWAIT - deallocate(iqrs,iqsr,int_value,int_idx) - !$OMP END PARALLEL - - mo_two_e_integrals_vv_anti_from_ao = mo_two_e_integrals_vv_from_ao - mo_two_e_integrals_vv_exchange_from_ao - ! print*, '**********' - ! do i0 =1, n_virt_orb - ! i = list_virt(i0) - ! print*, mo_two_e_integrals_vv_from_ao(i,i) - ! enddo - ! print*, '**********' - - -END_PROVIDER BEGIN_PROVIDER [ double precision, mo_two_e_integrals_jj, (mo_num,mo_num) ] @@ -1362,7 +556,7 @@ END_PROVIDER double precision :: get_two_e_integral - if (do_ao_cholesky) then + if (do_mo_cholesky) then double precision, allocatable :: buffer(:,:) allocate (buffer(cholesky_mo_num,mo_num)) do k=1,cholesky_mo_num diff --git a/src/mo_two_e_ints/no_vvvv.irp.f b/src/mo_two_e_ints/no_vvvv.irp.f deleted file mode 100644 index 48a7f5e2..00000000 --- a/src/mo_two_e_ints/no_vvvv.irp.f +++ /dev/null @@ -1,88 +0,0 @@ - -subroutine four_idx_novvvv_old - use map_module - use bitmasks - implicit none - BEGIN_DOC - ! Retransform MO integrals for next CAS-SCF step - END_DOC - integer(bit_kind) :: mask_ijkl(N_int,4) - integer(bit_kind) :: mask_ijk(N_int,3) - - print*,'Using partial transformation' - print*,'It will not transform all integrals with at least 3 indices within the virtuals' - integer :: i,j,k,l - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I I I !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 4 - ! - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = core_inact_act_bitmask_4(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I V V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 2 (virt) ^2 - ! = J_iv - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = virt_bitmask(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - - ! (core+inact+act) ^ 2 (virt) ^2 - ! = (iv|iv) - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! V V V !!!!!!!!!!!!!!!!!!!!!!! -! if(.not.no_vvv_integrals)then - print*, '' - print*, ' and ' - do i = 1,N_int - mask_ijk(i,1) = virt_bitmask(i,1) - mask_ijk(i,2) = virt_bitmask(i,1) - mask_ijk(i,3) = virt_bitmask(i,1) - enddo - call add_integrals_to_map_three_indices(mask_ijk) -! endif - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I I V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 3 (virt) ^1 - ! - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I V V V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 1 (virt) ^3 - ! -! if(.not.no_ivvv_integrals)then - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = virt_bitmask(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map_no_exit_34(mask_ijkl) -end diff --git a/src/mu_of_r/EZFIO.cfg b/src/mu_of_r/EZFIO.cfg index a66b00ef..7a39b012 100644 --- a/src/mu_of_r/EZFIO.cfg +++ b/src/mu_of_r/EZFIO.cfg @@ -6,9 +6,9 @@ size: (becke_numerical_grid.n_points_final_grid,determinants.n_states) [mu_of_r_potential] type: character*(32) -doc: type of potential for the mu(r) interaction: can be [ hf| cas_full | cas_truncated | pure_act] +doc: type of potential for the mu(r) interaction: can be [ hf| hf_sparse | cas_full | cas_truncated | pure_act] interface: ezfio, provider, ocaml -default: hf +default: hf_sparse [io_mu_of_r] type: Disk_access diff --git a/src/mu_of_r/basis_def.irp.f b/src/mu_of_r/basis_def.irp.f index fff9f581..e433f4d8 100644 --- a/src/mu_of_r/basis_def.irp.f +++ b/src/mu_of_r/basis_def.irp.f @@ -114,3 +114,48 @@ BEGIN_PROVIDER [double precision, basis_mos_in_r_array, (n_basis_orb,n_points_fi enddo enddo END_PROVIDER + +! BEGIN_PROVIDER [integer, n_docc_val_orb_for_cas] +!&BEGIN_PROVIDER [integer, n_max_docc_val_orb_for_cas] +! implicit none +! BEGIN_DOC +! ! Number of DOUBLY OCCUPIED VALENCE ORBITALS for the CAS wave function +! ! +! ! This determines the size of the space \mathcal{A} of Eqs. (15-16) of Phys.Chem.Lett.2019, 10, 2931 2937 +! END_DOC +! integer :: i +! n_docc_val_orb_for_cas = 0 +! ! You browse the BETA ELECTRONS and check if its not a CORE ORBITAL +! do i = 1, elec_beta_num +! if( trim(mo_class(i))=="Inactive" & +! .or. trim(mo_class(i))=="Active" & +! .or. trim(mo_class(i))=="Virtual" )then +! n_docc_val_orb_for_cas +=1 +! endif +! enddo +! n_max_docc_val_orb_for_cas = maxval(n_docc_val_orb_for_cas) +! +!END_PROVIDER +! +!BEGIN_PROVIDER [integer, list_doc_valence_orb_for_cas, (n_max_docc_val_orb_for_cas)] +! implicit none +! BEGIN_DOC +! ! List of OCCUPIED valence orbitals for each spin to build the f_{HF}(r_1,r_2) function +! ! +! ! This corresponds to ALL OCCUPIED orbitals in the HF wave function, except those defined as "core" +! ! +! ! This determines the space \mathcal{A} of Eqs. (15-16) of Phys.Chem.Lett.2019, 10, 2931 2937 +! END_DOC +! j = 0 +! ! You browse the BETA ELECTRONS and check if its not a CORE ORBITAL +! do i = 1, elec_beta_num +! if( trim(mo_class(i))=="Inactive" & +! .or. trim(mo_class(i))=="Active" & +! .or. trim(mo_class(i))=="Virtual" )then +! j +=1 +! list_doc_valence_orb_for_cas(j) = i +! endif +! enddo +! +!END_PROVIDER + diff --git a/src/mu_of_r/f_hf_cholesky.irp.f b/src/mu_of_r/f_hf_cholesky.irp.f new file mode 100644 index 00000000..5dd69eb6 --- /dev/null +++ b/src/mu_of_r/f_hf_cholesky.irp.f @@ -0,0 +1,320 @@ +BEGIN_PROVIDER [integer, list_couple_hf_orb_r1, (2,n_couple_orb_r1)] + implicit none + integer :: ii,i,mm,m,itmp + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + list_couple_hf_orb_r1(1,itmp) = i + list_couple_hf_orb_r1(2,itmp) = m + enddo + enddo +END_PROVIDER + + +BEGIN_PROVIDER [integer, list_couple_hf_orb_r2, (2,n_couple_orb_r2)] + implicit none + integer :: ii,i,mm,m,itmp + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + list_couple_hf_orb_r2(1,itmp) = i + list_couple_hf_orb_r2(2,itmp) = m + enddo + enddo +END_PROVIDER + + +BEGIN_PROVIDER [integer, n_couple_orb_r1] + implicit none + BEGIN_DOC + ! number of couples of alpha occupied times any basis orbital + END_DOC + n_couple_orb_r1 = n_occ_val_orb_for_hf(1) * n_basis_orb +END_PROVIDER + +BEGIN_PROVIDER [integer, n_couple_orb_r2] + implicit none + BEGIN_DOC + ! number of couples of beta occupied times any basis orbital + END_DOC + n_couple_orb_r2 = n_occ_val_orb_for_hf(2) * n_basis_orb +END_PROVIDER + +BEGIN_PROVIDER [ double precision, mos_times_cholesky_r1, (cholesky_mo_num,n_points_final_grid)] + implicit none + BEGIN_DOC + ! V1_AR = \sum_{I}V_AI Phi_IR where "R" specifies the index of the grid point and A the number of cholesky point + ! + ! here Phi_IR is phi_i(R)xphi_b(R) for r1 and V_AI = (ib|A) chollesky vector + END_DOC + double precision, allocatable :: mos_ib_r1(:,:),mo_chol_r1(:,:) + double precision, allocatable :: test(:,:) + double precision :: mo_i_r1,mo_b_r1 + integer :: ii,i,mm,m,itmp,ipoint,ll + allocate(mos_ib_r1(n_couple_orb_r1,n_points_final_grid)) + allocate(mo_chol_r1(cholesky_mo_num,n_couple_orb_r1)) + + do ipoint = 1, n_points_final_grid + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + itmp += 1 + mos_ib_r1(itmp,ipoint) = mo_i_r1 * mo_b_r1 + enddo + enddo + enddo + + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + do ll = 1, cholesky_mo_num + mo_chol_r1(ll,itmp) = cholesky_mo_transp(ll,m,i) + enddo + enddo + enddo + + call get_AB_prod(mo_chol_r1,cholesky_mo_num,n_couple_orb_r1,mos_ib_r1,n_points_final_grid,mos_times_cholesky_r1) + + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, mos_times_cholesky_r2, (cholesky_mo_num,n_points_final_grid)] + implicit none + BEGIN_DOC + ! V1_AR = \sum_{I}V_AI Phi_IR where "R" specifies the index of the grid point and A the number of cholesky point + ! + ! here Phi_IR is phi_i(R)xphi_b(R) for r2 and V_AI = (ib|A) chollesky vector + END_DOC + double precision, allocatable :: mos_ib_r2(:,:),mo_chol_r2(:,:) + double precision, allocatable :: test(:,:) + double precision :: mo_i_r2,mo_b_r2 + integer :: ii,i,mm,m,itmp,ipoint,ll + allocate(mos_ib_r2(n_couple_orb_r2,n_points_final_grid)) + allocate(mo_chol_r2(cholesky_mo_num,n_couple_orb_r2)) + + do ipoint = 1, n_points_final_grid + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + mo_i_r2 = mos_in_r_array_omp(i,ipoint) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r2 = mos_in_r_array_omp(m,ipoint) + itmp += 1 + mos_ib_r2(itmp,ipoint) = mo_i_r2 * mo_b_r2 + enddo + enddo + enddo + + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + do ll = 1, cholesky_mo_num + mo_chol_r2(ll,itmp) = cholesky_mo_transp(ll,m,i) + enddo + enddo + enddo + + call get_AB_prod(mo_chol_r2,cholesky_mo_num,n_couple_orb_r2,mos_ib_r2,n_points_final_grid,mos_times_cholesky_r2) + +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, f_hf_cholesky, (n_points_final_grid)] + implicit none + integer :: ipoint,m,k + !!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ + !! = \sum_{I}\sum_{J}\sum_A Phi_I(R) Phi_J(R) V_AI V_AJ + !! = \sum_A \sum_{I}Phi_I(R)V_AI \sum_{J}V_AJ Phi_J(R) + !! = \sum_A V_AR G_AR + !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI + double precision :: u_dot_v,wall0,wall1 + if(elec_alpha_num == elec_beta_num)then + print*,'providing f_hf_cholesky ...' + call wall_time(wall0) + provide mos_times_cholesky_r1 + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,m) & + !$OMP ShARED (mos_times_cholesky_r1,cholesky_mo_num,f_hf_cholesky,n_points_final_grid) + do ipoint = 1, n_points_final_grid + f_hf_cholesky(ipoint) = 0.d0 + do m = 1, cholesky_mo_num + f_hf_cholesky(ipoint) = f_hf_cholesky(ipoint) + & + mos_times_cholesky_r1(m,ipoint) * mos_times_cholesky_r1(m,ipoint) + enddo + f_hf_cholesky(ipoint) *= 2.D0 + enddo + !$OMP END PARALLEL DO + + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky = ',wall1-wall0 + free mos_times_cholesky_r1 + else + print*,'providing f_hf_cholesky ...' + call wall_time(wall0) + provide mos_times_cholesky_r2 mos_times_cholesky_r1 + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,m) & + !$OMP ShARED (mos_times_cholesky_r2,mos_times_cholesky_r1,cholesky_mo_num,f_hf_cholesky,n_points_final_grid) + do ipoint = 1, n_points_final_grid + f_hf_cholesky(ipoint) = 0.D0 + do m = 1, cholesky_mo_num + f_hf_cholesky(ipoint) = f_hf_cholesky(ipoint) + & + mos_times_cholesky_r2(m,ipoint)*mos_times_cholesky_r1(m,ipoint) + enddo + f_hf_cholesky(ipoint) *= 2.D0 + enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky = ',wall1-wall0 + free mos_times_cholesky_r2 mos_times_cholesky_r1 + endif +END_PROVIDER + +BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse, (n_points_final_grid)] + implicit none + integer :: ipoint,m,mm,i,ii,p + !!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ + !! = \sum_{I}\sum_{J}\sum_A Phi_I(R) Phi_J(R) V_AI V_AJ + !! = \sum_A \sum_{I}Phi_I(R)V_AI \sum_{J}V_AJ Phi_J(R) + !! = \sum_A V_AR G_AR + !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI + double precision :: u_dot_v,wall0,wall1,accu_1, accu_2,mo_i_r1,mo_b_r1 + double precision :: thresh_1,thresh_2 + double precision, allocatable :: accu_vec(:),delta_vec(:) + thresh_2 = ao_cholesky_threshold * 100.d0 + thresh_1 = dsqrt(thresh_2) + provide cholesky_mo_transp + if(elec_alpha_num == elec_beta_num)then + call wall_time(wall0) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE (accu_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp,thresh_1,thresh_2) & + !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse,n_points_final_grid,cholesky_mo_transp,n_basis_orb) + allocate(accu_vec(cholesky_mo_num)) + !$OMP DO + do ipoint = 1, n_points_final_grid + f_hf_cholesky_sparse(ipoint) = 0.d0 + accu_vec = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint)*mo_i_r1 + if(dabs(mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + accu_vec(p) = accu_vec(p) + mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo + enddo + enddo + do p = 1, cholesky_mo_num + f_hf_cholesky_sparse(ipoint) = f_hf_cholesky_sparse(ipoint) + accu_vec(p) * accu_vec(p) + enddo + f_hf_cholesky_sparse(ipoint) *= 2.D0 + enddo + !$OMP END DO + deallocate(accu_vec) + !$OMP END PARALLEL + + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky_sparse = ',wall1-wall0 + else + call wall_time(wall0) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE (accu_vec,delta_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp,thresh_1,thresh_2) & + !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse,n_points_final_grid,cholesky_mo_transp,n_basis_orb) + allocate(accu_vec(cholesky_mo_num),delta_vec(cholesky_mo_num)) + !$OMP DO + do ipoint = 1, n_points_final_grid + f_hf_cholesky_sparse(ipoint) = 0.d0 + accu_vec = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + accu_vec(p) = accu_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo + enddo + enddo + delta_vec = 0.d0 + do ii = n_occ_val_orb_for_hf(2)+1,n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + delta_vec(p) = delta_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo + enddo + enddo + do p = 1, cholesky_mo_num + f_hf_cholesky_sparse(ipoint) = f_hf_cholesky_sparse(ipoint) + accu_vec(p) * accu_vec(p) + accu_vec(p) * delta_vec(p) + enddo + f_hf_cholesky_sparse(ipoint) *= 2.D0 + enddo + !$OMP END DO + deallocate(accu_vec) + !$OMP END PARALLEL + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky_sparse = ',wall1-wall0 + endif +END_PROVIDER + +BEGIN_PROVIDER [ double precision, on_top_hf_grid, (n_points_final_grid)] + implicit none + integer :: ipoint,i,ii + double precision :: dm_a, dm_b,wall0,wall1 + print*,'providing on_top_hf_grid ...' + provide mos_in_r_array_omp + call wall_time(wall0) + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,dm_a,dm_b,ii,i) & + !$OMP ShARED (n_points_final_grid,n_occ_val_orb_for_hf,mos_in_r_array_omp,list_valence_orb_for_hf,on_top_hf_grid) + do ipoint = 1, n_points_final_grid + dm_a = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + dm_a += mos_in_r_array_omp(i,ipoint)*mos_in_r_array_omp(i,ipoint) + enddo + dm_b = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + dm_b += mos_in_r_array_omp(i,ipoint)*mos_in_r_array_omp(i,ipoint) + enddo + on_top_hf_grid(ipoint) = 2.D0 * dm_a*dm_b + enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide on_top_hf_grid = ',wall1-wall0 +END_PROVIDER + diff --git a/src/mu_of_r/mu_of_r_conditions.irp.f b/src/mu_of_r/mu_of_r_conditions.irp.f index 6b49b9df..f2bb7145 100644 --- a/src/mu_of_r/mu_of_r_conditions.irp.f +++ b/src/mu_of_r/mu_of_r_conditions.irp.f @@ -13,7 +13,6 @@ integer :: ipoint,istate double precision :: wall0,wall1 print*,'providing mu_of_r ...' -! PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals call wall_time(wall0) if (read_mu_of_r) then @@ -26,6 +25,10 @@ do ipoint = 1, n_points_final_grid if(mu_of_r_potential.EQ."hf")then mu_of_r_prov(ipoint,istate) = mu_of_r_hf(ipoint) + else if(mu_of_r_potential.EQ."hf_old")then + mu_of_r_prov(ipoint,istate) = mu_of_r_hf_old(ipoint) + else if(mu_of_r_potential.EQ."hf_sparse")then + mu_of_r_prov(ipoint,istate) = mu_of_r_hf_sparse(ipoint) else if(mu_of_r_potential.EQ."cas_full".or.mu_of_r_potential.EQ."cas_truncated".or.mu_of_r_potential.EQ."pure_act")then mu_of_r_prov(ipoint,istate) = mu_of_r_psi_cas(ipoint,istate) else @@ -61,18 +64,17 @@ END_DOC integer :: ipoint double precision :: wall0,wall1,f_hf,on_top,w_hf,sqpi - PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals print*,'providing mu_of_r_hf ...' call wall_time(wall0) + PROVIDE f_hf_cholesky on_top_hf_grid sqpi = dsqrt(dacos(-1.d0)) - provide f_psi_hf_ab !$OMP PARALLEL DO & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint,f_hf,on_top,w_hf) & - !$OMP ShARED (n_points_final_grid,mu_of_r_hf,f_psi_hf_ab,on_top_hf_mu_r,sqpi) + !$OMP ShARED (n_points_final_grid,mu_of_r_hf,f_hf_cholesky,on_top_hf_grid,sqpi) do ipoint = 1, n_points_final_grid - f_hf = f_psi_hf_ab(ipoint) - on_top = on_top_hf_mu_r(ipoint) + f_hf = f_hf_cholesky(ipoint) + on_top = on_top_hf_grid(ipoint) if(on_top.le.1.d-12.or.f_hf.le.0.d0.or.f_hf * on_top.lt.0.d0)then w_hf = 1.d+10 else @@ -85,6 +87,80 @@ print*,'Time to provide mu_of_r_hf = ',wall1-wall0 END_PROVIDER + BEGIN_PROVIDER [double precision, mu_of_r_hf_sparse, (n_points_final_grid) ] + implicit none + BEGIN_DOC + ! mu(r) computed with a HF wave function (assumes that HF MOs are stored in the EZFIO) + ! + ! corresponds to Eq. (37) of J. Chem. Phys. 149, 194301 (2018) but for \Psi^B = HF^B + ! + ! !!!!!! WARNING !!!!!! if no_core_density == .True. then all contributions from the core orbitals + ! + ! in the two-body density matrix are excluded + END_DOC + integer :: ipoint + double precision :: wall0,wall1,f_hf,on_top,w_hf,sqpi + print*,'providing mu_of_r_hf_sparse ...' + call wall_time(wall0) + sqpi = dsqrt(dacos(-1.d0)) + PROVIDE f_hf_cholesky_sparse on_top_hf_grid + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,f_hf,on_top,w_hf) & + !$OMP ShARED (n_points_final_grid,mu_of_r_hf_sparse,f_hf_cholesky_sparse,on_top_hf_grid,sqpi) + do ipoint = 1, n_points_final_grid + f_hf = f_hf_cholesky_sparse(ipoint) + on_top = on_top_hf_grid(ipoint) + if(on_top.le.1.d-12.or.f_hf.le.0.d0.or.f_hf * on_top.lt.0.d0)then + w_hf = 1.d+10 + else + w_hf = f_hf / on_top + endif + mu_of_r_hf_sparse(ipoint) = w_hf * sqpi * 0.5d0 + enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide mu_of_r_hf_sparse = ',wall1-wall0 + END_PROVIDER + + BEGIN_PROVIDER [double precision, mu_of_r_hf_old, (n_points_final_grid) ] + implicit none + BEGIN_DOC + ! mu(r) computed with a HF wave function (assumes that HF MOs are stored in the EZFIO) + ! + ! corresponds to Eq. (37) of J. Chem. Phys. 149, 194301 (2018) but for \Psi^B = HF^B + ! + ! !!!!!! WARNING !!!!!! if no_core_density == .True. then all contributions from the core orbitals + ! + ! in the two-body density matrix are excluded + END_DOC + integer :: ipoint + double precision :: wall0,wall1,f_hf,on_top,w_hf,sqpi + PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals + print*,'providing mu_of_r_hf_old ...' + call wall_time(wall0) + sqpi = dsqrt(dacos(-1.d0)) + provide f_psi_hf_ab + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,f_hf,on_top,w_hf) & + !$OMP ShARED (n_points_final_grid,mu_of_r_hf_old,f_psi_hf_ab,on_top_hf_mu_r,sqpi) + do ipoint = 1, n_points_final_grid + f_hf = f_psi_hf_ab(ipoint) + on_top = on_top_hf_mu_r(ipoint) + if(on_top.le.1.d-12.or.f_hf.le.0.d0.or.f_hf * on_top.lt.0.d0)then + w_hf = 1.d+10 + else + w_hf = f_hf / on_top + endif + mu_of_r_hf_old(ipoint) = w_hf * sqpi * 0.5d0 + enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide mu_of_r_hf_old = ',wall1-wall0 + END_PROVIDER + + BEGIN_PROVIDER [double precision, mu_of_r_psi_cas, (n_points_final_grid,N_states) ] implicit none BEGIN_DOC diff --git a/src/trexio/import_trexio_integrals.irp.f b/src/trexio/import_trexio_integrals.irp.f index 8c6b79d7..5a6b3c03 100644 --- a/src/trexio/import_trexio_integrals.irp.f +++ b/src/trexio/import_trexio_integrals.irp.f @@ -41,6 +41,11 @@ subroutine run(f) integer , allocatable :: Vi(:,:) double precision :: s +! TODO: +! - If Cholesky AO in trexio file, read cholesky ao vectors +! - If Cholesky MO in trexio file, read cholesky mo vectors +! - If Cholesky MO not in trexio file, force do_cholesky_mo to False + if (trexio_has_nucleus_repulsion(f) == TREXIO_SUCCESS) then rc = trexio_read_nucleus_repulsion(f, s) if (rc /= TREXIO_SUCCESS) then diff --git a/src/utils/fortran_mmap.c b/src/utils/fortran_mmap.c index e8d85a2f..0306f64f 100644 --- a/src/utils/fortran_mmap.c +++ b/src/utils/fortran_mmap.c @@ -7,7 +7,7 @@ #include -void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) +void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only, int single_node) { int fd; int result; @@ -21,7 +21,7 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) perror("Error opening mmap file for reading"); exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0); + map = mmap(NULL, bytes, PROT_READ, MAP_PRIVATE, fd, 0); } else { @@ -39,8 +39,8 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) perror("Error calling lseek() to stretch the file"); exit(EXIT_FAILURE); } - - result = write(fd, "", 1); + + result = write(fd, " ", 1); if (result != 1) { close(fd); printf("%s:\n", filename); @@ -48,7 +48,17 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (single_node == 1) { + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); +/* + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE | MAP_NONBLOCK | MAP_NORESERVE, fd, 0); + if (map == MAP_FAILED) { + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + } +*/ + } else { + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + } } if (map == MAP_FAILED) { diff --git a/src/utils/map_functions.irp.f b/src/utils/map_functions.irp.f index 97d0e8bf..e3a62b07 100644 --- a/src/utils/map_functions.irp.f +++ b/src/utils/map_functions.irp.f @@ -21,13 +21,13 @@ subroutine map_save_to_disk(filename,map) stop 'map already consolidated' endif - call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .False., c_pointer(1)) + call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .False., .False., c_pointer(1)) call c_f_pointer(c_pointer(1),map % consolidated_idx, (/ map % map_size +2_8/)) - call mmap(trim(filename)//'_consolidated_key', (/ n_elements /), cache_key_kind, fd(2), .False., c_pointer(2)) + call mmap(trim(filename)//'_consolidated_key', (/ n_elements /), cache_key_kind, fd(2), .False., .False., c_pointer(2)) call c_f_pointer(c_pointer(2),map % consolidated_key, (/ n_elements /)) - call mmap(trim(filename)//'_consolidated_value', (/ n_elements /), integral_kind, fd(3), .False., c_pointer(3)) + call mmap(trim(filename)//'_consolidated_value', (/ n_elements /), integral_kind, fd(3), .False., .False., c_pointer(3)) call c_f_pointer(c_pointer(3),map % consolidated_value, (/ n_elements /)) if (.not.associated(map%consolidated_key)) then @@ -85,15 +85,15 @@ subroutine map_load_from_disk(filename,map) stop 'map already consolidated' endif - call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .True., c_pointer(1)) + call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .True., .False., c_pointer(1)) call c_f_pointer(c_pointer(1),map % consolidated_idx, (/ map % map_size + 2_8/)) map% n_elements = map % consolidated_idx (map % map_size+2_8)-1_8 - call mmap(trim(filename)//'_consolidated_key', (/ map % n_elements /), cache_key_kind, fd(2), .True., c_pointer(2)) + call mmap(trim(filename)//'_consolidated_key', (/ map % n_elements /), cache_key_kind, fd(2), .True., .False., c_pointer(2)) call c_f_pointer(c_pointer(2),map % consolidated_key, (/ map % n_elements /)) - call mmap(trim(filename)//'_consolidated_value', (/ map % n_elements /), integral_kind, fd(3), .True., c_pointer(3)) + call mmap(trim(filename)//'_consolidated_value', (/ map % n_elements /), integral_kind, fd(3), .True., .False., c_pointer(3)) call c_f_pointer(c_pointer(3),map % consolidated_value, (/ map % n_elements /)) l = 0_8 diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index e69bf71e..e2e8dd76 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -6,7 +6,7 @@ BEGIN_PROVIDER [ integer, qp_max_mem ] character*(128) :: env integer, external :: get_total_available_memory - qp_max_mem = get_total_available_memory() + qp_max_mem = max(get_total_available_memory() - 1,3) call write_int(6,qp_max_mem,'Total available memory (GB)') call getenv('QP_MAXMEM',env) if (trim(env) /= '') then @@ -79,6 +79,26 @@ IRP_ENDIF call unlock_io() end function +double precision function memory_of_double8(n) + implicit none + BEGIN_DOC +! Computes the memory required for n double precision elements in gigabytes. + END_DOC + integer*8, intent(in) :: n + double precision, parameter :: f = 8.d0 / (1024.d0*1024.d0*1024.d0) + memory_of_double8 = dble(n) * f +end function + +double precision function memory_of_int8(n) + implicit none + BEGIN_DOC +! Computes the memory required for n double precision elements in gigabytes. + END_DOC + integer*8, intent(in) :: n + double precision, parameter :: f = 4.d0 / (1024.d0*1024.d0*1024.d0) + memory_of_int8 = dble(n) * f +end function + double precision function memory_of_double(n) implicit none BEGIN_DOC diff --git a/src/utils/mmap.f90 b/src/utils/mmap.f90 index 41e60224..e342b422 100644 --- a/src/utils/mmap.f90 +++ b/src/utils/mmap.f90 @@ -7,12 +7,13 @@ module mmap_module ! File descriptors ! ---------------- - type(c_ptr) function c_mmap_fortran(filename, length, fd, read_only) bind(c,name='mmap_fortran') + type(c_ptr) function c_mmap_fortran(filename, length, fd, read_only, single_node) bind(c,name='mmap_fortran') use iso_c_binding character(c_char), intent(in) :: filename(*) integer(c_size_t), intent(in), value :: length integer(c_int), intent(out) :: fd integer(c_int), intent(in), value :: read_only + integer(c_int), intent(in), value :: single_node end function subroutine c_munmap_fortran(length, fd, map) bind(c,name='munmap_fortran') @@ -33,31 +34,33 @@ module mmap_module contains - subroutine mmap(filename, shape, bytes, fd, read_only, map) + subroutine mmap(filename, shape, bytes, fd, read_only, single_node, map) use iso_c_binding implicit none character*(*), intent(in) :: filename ! Name of the mapped file integer*8, intent(in) :: shape(:) ! Shape of the array to map integer, intent(in) :: bytes ! Number of bytes per element logical, intent(in) :: read_only ! If true, mmap is read-only + logical, intent(in) :: single_node! If true, mmap is on a single node integer, intent(out) :: fd ! File descriptor type(c_ptr), intent(out) :: map ! C Pointer integer(c_size_t) :: length - integer(c_int) :: fd_ + integer(c_int) :: fd_, read_only_, single_node_ integer :: i + read_only_ = 0 + single_node_ = 0 + if (read_only) read_only_ = 1 + if (single_node) single_node_ = 1 + length = int(bytes,8) do i=1,size(shape) length = length * shape(i) enddo - if (read_only) then - map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 1) - else - map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 0) - endif + map = c_mmap_fortran( trim(filename)//char(0), length, fd_, read_only_, single_node_) fd = fd_ end subroutine diff --git a/src/utils_cc/EZFIO.cfg b/src/utils_cc/EZFIO.cfg index fb6d9034..7d50d66a 100644 --- a/src/utils_cc/EZFIO.cfg +++ b/src/utils_cc/EZFIO.cfg @@ -58,6 +58,12 @@ doc: If true, the CCSD(T) will be computed. interface: ezfio,ocaml,provider default: False +[cc_par_t_stop] +type: double precision +doc: Stops the calculation when the statistical error bar is below the given value. +interface: ezfio,ocaml,provider +default: 1.e-5 + [cc_dev] type: logical doc: Only for dev purposes.