From 7ecc086cac4f9104e6df05dd46d1df4010bb208d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 12 Jun 2024 14:59:26 +0200 Subject: [PATCH] Introduce hij_cache in PT2 --- src/cipsi/selection.irp.f | 72 ++++++++++++++------------- src/mo_two_e_ints/map_integrals.irp.f | 16 +++++- 2 files changed, 52 insertions(+), 36 deletions(-) diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 0281a1d4..88a2cbdc 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -178,7 +178,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) logical, allocatable :: banned(:,:,:), bannedOrb(:,:) double precision, allocatable :: coef_fullminilist_rev(:,:) - double precision, allocatable :: mat(:,:,:) + double precision, allocatable :: mat(:,:,:), hij_cache(:,:,:) PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique @@ -205,7 +205,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc ! Removed to avoid introducing determinants already presents in the wf !double precision, parameter :: norm_thr = 1.d-16 - allocate (indices(N_det), & + allocate (indices(N_det), hij_cache(mo_num,mo_num,2), & exc_degree(max(N_det_alpha_unique,N_det_beta_unique))) ! Pre-compute excitation degrees wrt alpha determinants @@ -511,11 +511,15 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc maskInd = maskInd + 1 if(mod(maskInd, csubset) == (subset-1)) then + call get_mo_two_e_integrals_ij(h2,h1,mo_num,hij_cache(1,1,1),mo_integrals_map) + if (sp /= 3) then ! AA or BB + call get_mo_two_e_integrals_ij(h1,h2,mo_num,hij_cache(1,1,2),mo_integrals_map) + endif call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle - call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) + call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting, hij_cache) call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) end if @@ -531,7 +535,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc enddo enddo deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) - deallocate(banned, bannedOrb,mat) + deallocate(banned, bannedOrb, mat, hij_cache) end subroutine BEGIN_TEMPLATE @@ -914,7 +918,7 @@ single ; do p1=1,mo_num ; enddo ; p2=1 ; ; .False. ;; END_TEMPLATE -subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting) +subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting, hij_cache) use bitmasks implicit none BEGIN_DOC @@ -926,6 +930,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere integer, intent(in) :: sp, i_gen, N_sel integer, intent(in) :: interesting(0:N_sel) integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N_sel) + double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) logical, intent(inout) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num, 2) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) @@ -995,9 +1000,9 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere if(nt == 4) then call get_d2(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) else if(nt == 3) then - call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) !, hij_cache) else - call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)), hij_cache) end if else if(nt == 4) then call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) @@ -1190,6 +1195,8 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) integer, intent(in) :: h(0:2,2), p(0:4,2), sp +! double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) + double precision, external :: get_phase_bi, mo_two_e_integral logical :: ok @@ -1201,12 +1208,12 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) integer :: bant - double precision, allocatable :: hij_cache(:,:) + double precision, allocatable :: hij_cache1(:,:) double precision :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num) PROVIDE mo_integrals_map N_int allocate (lbanned(mo_num, 2)) - allocate (hij_cache(mo_num,2)) + allocate (hij_cache1(mo_num,2)) lbanned = bannedOrb do i=1, p(0,1) @@ -1230,13 +1237,13 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(1,ma) p2 = p(2,ma) if(.not. bannedOrb(puti, mi)) then - call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache1(1,2),mo_integrals_map) tmp_row = 0d0 do putj=1, hfix-1 if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle - hij = hij_cache(putj,1) - hij_cache(putj,2) + hij = hij_cache1(putj,1) - hij_cache1(putj,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1248,7 +1255,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=hfix+1, mo_num if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle - hij = hij_cache(putj,2) - hij_cache(putj,1) + hij = hij_cache1(putj,2) - hij_cache1(putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1274,15 +1281,15 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) pfix = p(1,mi) tmp_row = 0d0 tmp_row2 = 0d0 - call get_mo_two_e_integrals(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,pfix,p1,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,pfix,p2,mo_num,hij_cache1(1,2),mo_integrals_map) putj = p1 do puti=1,mo_num !HOT if(lbanned(puti,mi)) cycle !p1 fixed putj = p1 if(.not. banned(putj,puti,bant)) then - hij = hij_cache(puti,2) + hij = hij_cache1(puti,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1296,7 +1303,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) putj = p2 ! do puti=1,mo_num !HOT if(.not. banned(putj,puti,bant)) then - hij = hij_cache(puti,1) + hij = hij_cache1(puti,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) do k=1,N_states @@ -1327,13 +1334,13 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) puti = p(i, ma) p1 = p(turn3(1,i), ma) p2 = p(turn3(2,i), ma) - call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache1(1,2),mo_integrals_map) tmp_row = 0d0 do putj=1,hfix-1 if(banned(putj,puti,1)) cycle if(lbanned(putj,ma)) cycle - hij = hij_cache(putj,1) - hij_cache(putj,2) + hij = hij_cache1(putj,1) - hij_cache1(putj,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) @@ -1342,7 +1349,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=hfix+1,mo_num if(banned(putj,puti,1)) cycle if(lbanned(putj,ma)) cycle - hij = hij_cache(putj,2) - hij_cache(putj,1) + hij = hij_cache1(putj,2) - hij_cache1(putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) @@ -1364,14 +1371,14 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(2,ma) tmp_row = 0d0 tmp_row2 = 0d0 - call get_mo_two_e_integrals(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p1,pfix,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,pfix,mo_num,hij_cache1(1,2),mo_integrals_map) putj = p2 do puti=1,mo_num if(lbanned(puti,ma)) cycle putj = p2 if(.not. banned(puti,putj,1)) then - hij = hij_cache(puti,1) + hij = hij_cache1(puti,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1383,7 +1390,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) putj = p1 if(.not. banned(puti,putj,1)) then - hij = hij_cache(puti,2) + hij = hij_cache1(puti,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) do k=1,N_states @@ -1408,7 +1415,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) enddo end if end if - deallocate(lbanned,hij_cache) + deallocate(lbanned,hij_cache1) !! MONO if(sp == 3) then @@ -1439,7 +1446,7 @@ end -subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) +subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, hij_cache) use bitmasks implicit none @@ -1450,6 +1457,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) integer :: i, j, k, s, h1, h2, p1, p2, puti, putj double precision :: hij, phase @@ -1457,8 +1465,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) logical :: ok integer, parameter :: bant=1 - double precision, allocatable :: hij_cache1(:), hij_cache2(:) - allocate (hij_cache1(mo_num),hij_cache2(mo_num)) if(sp == 3) then ! AB @@ -1466,7 +1472,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) h2 = p(1,2) do p1=1, mo_num if(bannedOrb(p1, 1)) cycle - call get_mo_two_e_integrals(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map) do p2=1, mo_num if(bannedOrb(p2,2)) cycle if(banned(p1, p2, bant)) cycle ! rentable? @@ -1475,7 +1480,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) else phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) - hij = hij_cache1(p2) * phase + hij = hij_cache(p2,p1,1) * phase end if if (hij == 0.d0) cycle !DIR$ LOOP COUNT AVG(4) @@ -1490,8 +1495,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(2,sp) do puti=1, mo_num if (bannedOrb(puti, sp)) cycle - call get_mo_two_e_integrals(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map) - call get_mo_two_e_integrals(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map) do putj=puti+1, mo_num if(bannedOrb(putj, sp)) cycle if(banned(puti, putj, bant)) cycle ! rentable? @@ -1500,7 +1503,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) if (hij == 0.d0) cycle else - hij = hij_cache1(putj) - hij_cache2(putj) + hij = hij_cache(putj,puti,1) - hij_cache(putj,puti,2) if (hij == 0.d0) cycle hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) end if @@ -1512,7 +1515,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end do end if - deallocate(hij_cache1,hij_cache2) end diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 90257bbd..516851b9 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -48,7 +48,7 @@ end mo_integrals_cache_shift = 7 ! 7 = log(128). Max 7 endif -!mo_integrals_cache_shift = 2 ! 5 = log(32). +mo_integrals_cache_shift = 2 ! 5 = log(32). mo_integrals_cache_size = 2**mo_integrals_cache_shift @@ -176,6 +176,8 @@ double precision function get_two_e_integral(i,j,k,l,map) double precision, external :: ddot get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1) +! double precision, external :: get_from_mo_cholesky_cache +! get_two_e_integral = get_from_mo_cholesky_cache(i,j,k,l,.False.) else @@ -227,6 +229,11 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) out_val(1:sze) = 0.d0 return endif +! +! if (do_mo_cholesky) then +! call get_from_mo_cholesky_caches(j,k,l,out_val) +! return +! endif ii = l-mo_integrals_cache_min ii = ior(ii, k-mo_integrals_cache_min) @@ -239,6 +246,7 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) if (do_mo_cholesky) then + !TODO: here call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & cholesky_mo_transp(1,1,k), cholesky_mo_num, & cholesky_mo_transp(1,j,l), 1, 0.d0, & @@ -276,6 +284,7 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) if (do_mo_cholesky) then + !TODO: here call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & cholesky_mo_transp(1,j,l), 1, 0.d0, & @@ -311,6 +320,7 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) if (do_mo_cholesky) then + !TODO: here call dgemv('T', cholesky_mo_num, mo_num, 1.d0, & cholesky_mo_transp(1,1,k), cholesky_mo_num, & cholesky_mo_transp(1,j,l), 1, 0.d0, & @@ -425,6 +435,10 @@ subroutine get_mo_two_e_integrals_i1j1(k,l,sze,out_array,map) cholesky_mo_transp(1,1,1), cholesky_mo_num, & cholesky_mo_transp(1,k,l), 1, 0.d0, & out_array, 1) +! +! do j=1,sze +! call get_from_mo_cholesky_caches(k,j,l,out_array(1,j)) +! enddo else