From 0c6d513bbf126e2ae92643a1e3cbfc94dbceab95 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 24 Jan 2025 19:07:16 +0100 Subject: [PATCH 1/3] Implemented single precision for cholesky mo --- src/ao_two_e_ints/cholesky.irp.f | 3 +- src/fci/pt2.irp.f | 2 +- src/mo_two_e_ints/cholesky.irp.f | 59 +++++++- src/mo_two_e_ints/map_integrals.irp.f | 188 +++++++++++++++++++------- src/mp2/mp2.irp.f | 2 +- 5 files changed, 198 insertions(+), 56 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 69b18900..fdc729d5 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -466,10 +466,11 @@ END_PROVIDER endif + ! Reverse order of Cholesky vectors to increase precision in dot products !$OMP PARALLEL DO PRIVATE(k,j) do k=1,rank do j=1,ao_num - cholesky_ao(1:ao_num,j,k) = L((j-1_8)*ao_num+1_8:1_8*j*ao_num,k) + cholesky_ao(1:ao_num,j,rank-k+1) = L((j-1_8)*ao_num+1_8:1_8*j*ao_num,rank-k+1) enddo enddo !$OMP END PARALLEL DO diff --git a/src/fci/pt2.irp.f b/src/fci/pt2.irp.f index 53bf1699..186f1ff6 100644 --- a/src/fci/pt2.irp.f +++ b/src/fci/pt2.irp.f @@ -15,11 +15,11 @@ program pt2 ! sampling. ! END_DOC + PROVIDE all_mo_integrals if (.not. is_zmq_slave) then read_wf = .True. threshold_generators = 1.d0 SOFT_TOUCH read_wf threshold_generators - PROVIDE all_mo_integrals PROVIDE psi_energy call run else diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 1fed949d..835110de 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -7,7 +7,8 @@ BEGIN_PROVIDER [ logical, do_mo_cholesky ] ! do_mo_cholesky = .False. END_PROVIDER -BEGIN_PROVIDER [ integer, cholesky_mo_num ] + BEGIN_PROVIDER [ integer, cholesky_mo_num ] +&BEGIN_PROVIDER [ integer, cholesky_mo_num_split, (1:5)] implicit none BEGIN_DOC ! Number of Cholesky vectors in MO basis @@ -21,6 +22,12 @@ BEGIN_PROVIDER [ integer, cholesky_mo_num ] else cholesky_mo_num = cholesky_ao_num endif + cholesky_mo_num_split(1) = 0 + cholesky_mo_num_split(2) = cholesky_mo_num/4 + cholesky_mo_num_split(3) = 2*cholesky_mo_num_split(2) + cholesky_mo_num_split(4) = 3*cholesky_mo_num_split(2) + cholesky_mo_num_split(5) = cholesky_mo_num + cholesky_mo_num_split += 1 END_PROVIDER BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] @@ -49,7 +56,7 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, BEGIN_DOC ! Cholesky vectors in MO basis. Warning: it is transposed wrt cholesky_ao: ! - ! - cholesky_ao is (ao_num^2 x cholesky_ao_num) + ! - cholesky_ao is (ao_num^2 x cholesky_ao_num) ! ! - cholesky_mo_transp is (cholesky_mo_num x mo_num^2) END_DOC @@ -132,3 +139,51 @@ BEGIN_PROVIDER [ double precision, cholesky_semi_mo_transp_simple, (cholesky_mo_ END_PROVIDER + + + +BEGIN_PROVIDER [ real, cholesky_mo_sp, (mo_num, mo_num, cholesky_mo_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis in stored in single precision + END_DOC + + integer :: k, i, j + + call set_multiple_levels_omp(.False.) + !$OMP PARALLEL DO PRIVATE(k) + do k=1,cholesky_mo_num + do j=1,mo_num + do i=1,mo_num + cholesky_mo_sp(i,j,k) = cholesky_mo_transp_sp(k,i,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +BEGIN_PROVIDER [ real, cholesky_mo_transp_sp, (cholesky_mo_num, mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis in s. Warning: it is transposed wrt cholesky_ao: + ! + ! - cholesky_ao is (ao_num^2 x cholesky_ao_num) + ! + ! - cholesky_mo_transp is (cholesky_mo_num x mo_num^2) + END_DOC + + integer :: i,j,k + !$OMP PARALLEL DO PRIVATE(k) + do k=1,cholesky_mo_num + do j=1,mo_num + do i=1,mo_num + cholesky_mo_transp_sp(k,i,j) = cholesky_mo_transp(k,i,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + + diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 76f169b4..6040842e 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -9,6 +9,14 @@ BEGIN_PROVIDER [ logical, all_mo_integrals ] PROVIDE mo_two_e_integrals_in_map mo_integrals_cache mo_two_e_integrals_jj_exchange mo_two_e_integrals_jj_anti mo_two_e_integrals_jj big_array_exchange_integrals big_array_coulomb_integrals mo_one_e_integrals END_PROVIDER +BEGIN_PROVIDER [ logical, mo_cholesky_double ] + implicit none + BEGIN_DOC +! If true, use double precision to compute integrals from cholesky vectors + END_DOC + mo_cholesky_double = .True. +END_PROVIDER + !! MO Map !! ====== @@ -147,7 +155,7 @@ double precision function get_two_e_integral(i,j,k,l,map) type(map_type), intent(inout) :: map real(integral_kind) :: tmp - PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky + PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky mo_cholesky_double cholesky_mo_transp_sp cholesky_mo_transp if (use_banned_excitation) then if (banned_excitation(i,k)) then @@ -178,12 +186,19 @@ double precision function get_two_e_integral(i,j,k,l,map) if (do_mo_cholesky) then double precision, external :: ddot - get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1) - -! get_two_e_integral = 0.d0 -! do kk=1,cholesky_mo_num -! get_two_e_integral = get_two_e_integral + cholesky_mo_transp(kk,i,k)*cholesky_mo_transp(kk,i,l) -! enddo + real, external :: sdot + integer :: isplit + if (mo_cholesky_double) then + get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1) + else + get_two_e_integral = 0.d0 + do isplit=1,4 + get_two_e_integral = get_two_e_integral + & + sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1) + enddo + endif else @@ -214,7 +229,8 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) real(integral_kind) :: tmp integer(key_kind) :: i1, idx integer(key_kind) :: p,q,r,s,i2 - PROVIDE mo_two_e_integrals_in_map mo_integrals_cache + real, allocatable :: out_val_sp(:) + PROVIDE mo_two_e_integrals_in_map mo_integrals_cache cholesky_mo_transp cholesky_mo_transp_sp if (banned_excitation(j,l)) then out_val(1:sze) = 0.d0 @@ -225,6 +241,10 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) ii = ior(ii, k-mo_integrals_cache_min) ii = ior(ii, j-mo_integrals_cache_min) + if (do_mo_cholesky.and. .not.mo_cholesky_double) then + allocate(out_val_sp(sze)) + endif + if (iand(ii, -mo_integrals_cache_size) == 0) then ! Some integrals are in the cache @@ -232,11 +252,24 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) if (do_mo_cholesky) then - !TODO: here - call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & - cholesky_mo_transp(1,1,k), cholesky_mo_num, & - cholesky_mo_transp(1,j,l), 1, 0.d0, & - out_val, 1) + !TODO: bottleneck here + if (mo_cholesky_double) then + call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val, 1) + else + integer :: isplit + out_val = 0.d0 + do isplit=1,4 + call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & + mo_integrals_cache_min-1, 1., & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & + out_val_sp, 1) + out_val(1:mo_integrals_cache_min-1) += out_val_sp(1:mo_integrals_cache_min-1) + enddo + endif else @@ -270,11 +303,23 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) if (do_mo_cholesky) then - !TODO: here - call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & - cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & - cholesky_mo_transp(1,j,l), 1, 0.d0, & - out_val(mo_integrals_cache_max+1), 1) + !TODO: bottleneck here + if (mo_cholesky_double) then + call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & + cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val(mo_integrals_cache_max+1), 1) + else + out_val = 0.d0 + do isplit=1,4 + call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & + mo_num-mo_integrals_cache_max, 1., & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),mo_integrals_cache_max+1,k), cholesky_mo_num, & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & + out_val_sp(mo_integrals_cache_max+1), 1) + out_val(mo_integrals_cache_max+1:sze) += out_val_sp(mo_integrals_cache_max+1:sze) + enddo + endif else @@ -306,11 +351,23 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) if (do_mo_cholesky) then - !TODO: here - call dgemv('T', cholesky_mo_num, mo_num, 1.d0, & - cholesky_mo_transp(1,1,k), cholesky_mo_num, & - cholesky_mo_transp(1,j,l), 1, 0.d0, & - out_val, 1) + !TODO: bottleneck here + if (mo_cholesky_double) then + call dgemv('T', cholesky_mo_num, sze, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val, 1) + else + out_val = 0.d0 + do isplit=1,4 + call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & + sze, 1., & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & + out_val_sp, 1) + out_val(1:sze) += out_val_sp(1:sze) + enddo + endif else @@ -513,7 +570,7 @@ subroutine get_mo_two_e_integrals_exch_ii(k,l,sze,out_val,map) type(map_type), intent(inout) :: map integer :: i double precision, external :: get_two_e_integral - PROVIDE mo_two_e_integrals_in_map + PROVIDE mo_two_e_integrals_in_map mo_cholesky_double cholesky_mo_transp cholesky_mo_transp_sp if ( (mo_integrals_cache_min>1).or.(mo_integrals_cache_max=mo_integrals_cache_min).and.(l<=mo_integrals_cache_max) ) then double precision, external :: ddot + real, external :: sdot integer :: kk - do i=1,mo_integrals_cache_min-1 - out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, & - cholesky_mo_transp(1,i,l), 1) -! out_val(i) = 0.d0 -! do kk=1,cholesky_mo_num -! out_val(i) = out_val(i) + cholesky_mo_transp(kk,i,k)*cholesky_mo_transp(kk,i,l) -! enddo - enddo + if (mo_cholesky_double) then - do i=mo_integrals_cache_min,mo_integrals_cache_max - out_val(i) = get_two_e_integral_cache(i,i,k,l) - enddo + do i=1,mo_integrals_cache_min-1 + out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, & + cholesky_mo_transp(1,i,l), 1) + enddo - do i=mo_integrals_cache_max, sze - out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, & - cholesky_mo_transp(1,i,l), 1) -! out_val(i) = 0.d0 -! do kk=1,cholesky_mo_num -! out_val(i) = out_val(i) + cholesky_mo_transp(kk,i,k)*cholesky_mo_transp(kk,i,l) -! enddo - enddo + do i=mo_integrals_cache_min,mo_integrals_cache_max + out_val(i) = get_two_e_integral_cache(i,i,k,l) + enddo + + do i=mo_integrals_cache_max, sze + out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, & + cholesky_mo_transp(1,i,l), 1) + enddo + + else + + integer :: isplit + do i=1,mo_integrals_cache_min-1 + out_val(i) = 0.d0 + do isplit=1,4 + out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1) + enddo + enddo + + do i=mo_integrals_cache_min,mo_integrals_cache_max + out_val(i) = get_two_e_integral_cache(i,i,k,l) + enddo + + do i=mo_integrals_cache_max, sze + out_val(i) = 0.d0 + do isplit=1,4 + out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1) + enddo + enddo + + endif else - do i=1,sze - out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, & - cholesky_mo_transp(1,i,l), 1) -! out_val(i) = 0.d0 -! do kk=1,cholesky_mo_num -! out_val(i) = out_val(i) + cholesky_mo_transp(kk,i,k)*cholesky_mo_transp(kk,i,l) -! enddo - enddo + if (mo_cholesky_double) then + do i=1,sze + out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, & + cholesky_mo_transp(1,i,l), 1) + enddo + else + do i=1,sze + out_val(i) = 0.d0 + do isplit=1,4 + out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1) + enddo + enddo + endif endif diff --git a/src/mp2/mp2.irp.f b/src/mp2/mp2.irp.f index b8e0cc4a..ecf2da1b 100644 --- a/src/mp2/mp2.irp.f +++ b/src/mp2/mp2.irp.f @@ -7,7 +7,7 @@ subroutine run double precision, allocatable :: pt2(:), norm_pert(:) double precision :: H_pert_diag, E_old integer :: N_st, iter - PROVIDE Fock_matrix_diag_mo H_apply_buffer_allocated + PROVIDE all_mo_integrals Fock_matrix_diag_mo H_apply_buffer_allocated N_st = N_states allocate (pt2(N_st), norm_pert(N_st)) E_old = HF_energy From 11b65259969aa612464e2b24fbd7b3b21f980627 Mon Sep 17 00:00:00 2001 From: Yann Damour <77277447+Ydrnan@users.noreply.github.com> Date: Fri, 24 Jan 2025 19:08:38 +0100 Subject: [PATCH 2/3] Fix deallocation pt2_serialized --- src/cipsi_utils/run_pt2_slave.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cipsi_utils/run_pt2_slave.irp.f b/src/cipsi_utils/run_pt2_slave.irp.f index cb1dd1f5..90c0e086 100644 --- a/src/cipsi_utils/run_pt2_slave.irp.f +++ b/src/cipsi_utils/run_pt2_slave.irp.f @@ -350,7 +350,6 @@ subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task enddo rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE) - deallocate(pt2_serialized) if (rc == -1) then print *, irp_here, ': error sending result' stop 3 @@ -358,6 +357,7 @@ subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task else if(rc /= size(pt2_serialized)*8) then stop 'push' endif + deallocate(pt2_serialized) rc = f77_zmq_send( zmq_socket_push, task_id, n_tasks*4, ZMQ_SNDMORE) From 5a591f52405d5dd32f4597ee9cfd5bed8477fa5b Mon Sep 17 00:00:00 2001 From: Yann Damour <77277447+Ydrnan@users.noreply.github.com> Date: Fri, 24 Jan 2025 20:13:32 +0100 Subject: [PATCH 3/3] Fix segfault in scf --- src/scf_utils/roothaan_hall_scf.irp.f | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index 274f08d6..9e2ca4bc 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -228,9 +228,10 @@ END_DOC do while (i mo_num) exit enddo if (m>1) then call dgemm('N','T',ao_num,ao_num,m,1.d0,mo_coef(1,i),size(mo_coef,1),mo_coef(1,i),size(mo_coef,1),0.d0,S,size(S,1))