From 0c6d513bbf126e2ae92643a1e3cbfc94dbceab95 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 24 Jan 2025 19:07:16 +0100 Subject: [PATCH 01/59] Implemented single precision for cholesky mo --- src/ao_two_e_ints/cholesky.irp.f | 3 +- src/fci/pt2.irp.f | 2 +- src/mo_two_e_ints/cholesky.irp.f | 59 +++++++- src/mo_two_e_ints/map_integrals.irp.f | 188 +++++++++++++++++++------- src/mp2/mp2.irp.f | 2 +- 5 files changed, 198 insertions(+), 56 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 69b18900..fdc729d5 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -466,10 +466,11 @@ END_PROVIDER endif + ! Reverse order of Cholesky vectors to increase precision in dot products !$OMP PARALLEL DO PRIVATE(k,j) do k=1,rank do j=1,ao_num - cholesky_ao(1:ao_num,j,k) = L((j-1_8)*ao_num+1_8:1_8*j*ao_num,k) + cholesky_ao(1:ao_num,j,rank-k+1) = L((j-1_8)*ao_num+1_8:1_8*j*ao_num,rank-k+1) enddo enddo !$OMP END PARALLEL DO diff --git a/src/fci/pt2.irp.f b/src/fci/pt2.irp.f index 53bf1699..186f1ff6 100644 --- a/src/fci/pt2.irp.f +++ b/src/fci/pt2.irp.f @@ -15,11 +15,11 @@ program pt2 ! sampling. ! END_DOC + PROVIDE all_mo_integrals if (.not. is_zmq_slave) then read_wf = .True. threshold_generators = 1.d0 SOFT_TOUCH read_wf threshold_generators - PROVIDE all_mo_integrals PROVIDE psi_energy call run else diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 1fed949d..835110de 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -7,7 +7,8 @@ BEGIN_PROVIDER [ logical, do_mo_cholesky ] ! do_mo_cholesky = .False. END_PROVIDER -BEGIN_PROVIDER [ integer, cholesky_mo_num ] + BEGIN_PROVIDER [ integer, cholesky_mo_num ] +&BEGIN_PROVIDER [ integer, cholesky_mo_num_split, (1:5)] implicit none BEGIN_DOC ! Number of Cholesky vectors in MO basis @@ -21,6 +22,12 @@ BEGIN_PROVIDER [ integer, cholesky_mo_num ] else cholesky_mo_num = cholesky_ao_num endif + cholesky_mo_num_split(1) = 0 + cholesky_mo_num_split(2) = cholesky_mo_num/4 + cholesky_mo_num_split(3) = 2*cholesky_mo_num_split(2) + cholesky_mo_num_split(4) = 3*cholesky_mo_num_split(2) + cholesky_mo_num_split(5) = cholesky_mo_num + cholesky_mo_num_split += 1 END_PROVIDER BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] @@ -49,7 +56,7 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, BEGIN_DOC ! Cholesky vectors in MO basis. Warning: it is transposed wrt cholesky_ao: ! - ! - cholesky_ao is (ao_num^2 x cholesky_ao_num) + ! - cholesky_ao is (ao_num^2 x cholesky_ao_num) ! ! - cholesky_mo_transp is (cholesky_mo_num x mo_num^2) END_DOC @@ -132,3 +139,51 @@ BEGIN_PROVIDER [ double precision, cholesky_semi_mo_transp_simple, (cholesky_mo_ END_PROVIDER + + + +BEGIN_PROVIDER [ real, cholesky_mo_sp, (mo_num, mo_num, cholesky_mo_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis in stored in single precision + END_DOC + + integer :: k, i, j + + call set_multiple_levels_omp(.False.) + !$OMP PARALLEL DO PRIVATE(k) + do k=1,cholesky_mo_num + do j=1,mo_num + do i=1,mo_num + cholesky_mo_sp(i,j,k) = cholesky_mo_transp_sp(k,i,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +BEGIN_PROVIDER [ real, cholesky_mo_transp_sp, (cholesky_mo_num, mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis in s. Warning: it is transposed wrt cholesky_ao: + ! + ! - cholesky_ao is (ao_num^2 x cholesky_ao_num) + ! + ! - cholesky_mo_transp is (cholesky_mo_num x mo_num^2) + END_DOC + + integer :: i,j,k + !$OMP PARALLEL DO PRIVATE(k) + do k=1,cholesky_mo_num + do j=1,mo_num + do i=1,mo_num + cholesky_mo_transp_sp(k,i,j) = cholesky_mo_transp(k,i,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + + diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 76f169b4..6040842e 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -9,6 +9,14 @@ BEGIN_PROVIDER [ logical, all_mo_integrals ] PROVIDE mo_two_e_integrals_in_map mo_integrals_cache mo_two_e_integrals_jj_exchange mo_two_e_integrals_jj_anti mo_two_e_integrals_jj big_array_exchange_integrals big_array_coulomb_integrals mo_one_e_integrals END_PROVIDER +BEGIN_PROVIDER [ logical, mo_cholesky_double ] + implicit none + BEGIN_DOC +! If true, use double precision to compute integrals from cholesky vectors + END_DOC + mo_cholesky_double = .True. +END_PROVIDER + !! MO Map !! ====== @@ -147,7 +155,7 @@ double precision function get_two_e_integral(i,j,k,l,map) type(map_type), intent(inout) :: map real(integral_kind) :: tmp - PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky + PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky mo_cholesky_double cholesky_mo_transp_sp cholesky_mo_transp if (use_banned_excitation) then if (banned_excitation(i,k)) then @@ -178,12 +186,19 @@ double precision function get_two_e_integral(i,j,k,l,map) if (do_mo_cholesky) then double precision, external :: ddot - get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1) - -! get_two_e_integral = 0.d0 -! do kk=1,cholesky_mo_num -! get_two_e_integral = get_two_e_integral + cholesky_mo_transp(kk,i,k)*cholesky_mo_transp(kk,i,l) -! enddo + real, external :: sdot + integer :: isplit + if (mo_cholesky_double) then + get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1) + else + get_two_e_integral = 0.d0 + do isplit=1,4 + get_two_e_integral = get_two_e_integral + & + sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1) + enddo + endif else @@ -214,7 +229,8 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) real(integral_kind) :: tmp integer(key_kind) :: i1, idx integer(key_kind) :: p,q,r,s,i2 - PROVIDE mo_two_e_integrals_in_map mo_integrals_cache + real, allocatable :: out_val_sp(:) + PROVIDE mo_two_e_integrals_in_map mo_integrals_cache cholesky_mo_transp cholesky_mo_transp_sp if (banned_excitation(j,l)) then out_val(1:sze) = 0.d0 @@ -225,6 +241,10 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) ii = ior(ii, k-mo_integrals_cache_min) ii = ior(ii, j-mo_integrals_cache_min) + if (do_mo_cholesky.and. .not.mo_cholesky_double) then + allocate(out_val_sp(sze)) + endif + if (iand(ii, -mo_integrals_cache_size) == 0) then ! Some integrals are in the cache @@ -232,11 +252,24 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) if (do_mo_cholesky) then - !TODO: here - call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & - cholesky_mo_transp(1,1,k), cholesky_mo_num, & - cholesky_mo_transp(1,j,l), 1, 0.d0, & - out_val, 1) + !TODO: bottleneck here + if (mo_cholesky_double) then + call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val, 1) + else + integer :: isplit + out_val = 0.d0 + do isplit=1,4 + call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & + mo_integrals_cache_min-1, 1., & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & + out_val_sp, 1) + out_val(1:mo_integrals_cache_min-1) += out_val_sp(1:mo_integrals_cache_min-1) + enddo + endif else @@ -270,11 +303,23 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) if (do_mo_cholesky) then - !TODO: here - call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & - cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & - cholesky_mo_transp(1,j,l), 1, 0.d0, & - out_val(mo_integrals_cache_max+1), 1) + !TODO: bottleneck here + if (mo_cholesky_double) then + call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & + cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val(mo_integrals_cache_max+1), 1) + else + out_val = 0.d0 + do isplit=1,4 + call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & + mo_num-mo_integrals_cache_max, 1., & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),mo_integrals_cache_max+1,k), cholesky_mo_num, & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & + out_val_sp(mo_integrals_cache_max+1), 1) + out_val(mo_integrals_cache_max+1:sze) += out_val_sp(mo_integrals_cache_max+1:sze) + enddo + endif else @@ -306,11 +351,23 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) if (do_mo_cholesky) then - !TODO: here - call dgemv('T', cholesky_mo_num, mo_num, 1.d0, & - cholesky_mo_transp(1,1,k), cholesky_mo_num, & - cholesky_mo_transp(1,j,l), 1, 0.d0, & - out_val, 1) + !TODO: bottleneck here + if (mo_cholesky_double) then + call dgemv('T', cholesky_mo_num, sze, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val, 1) + else + out_val = 0.d0 + do isplit=1,4 + call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & + sze, 1., & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & + out_val_sp, 1) + out_val(1:sze) += out_val_sp(1:sze) + enddo + endif else @@ -513,7 +570,7 @@ subroutine get_mo_two_e_integrals_exch_ii(k,l,sze,out_val,map) type(map_type), intent(inout) :: map integer :: i double precision, external :: get_two_e_integral - PROVIDE mo_two_e_integrals_in_map + PROVIDE mo_two_e_integrals_in_map mo_cholesky_double cholesky_mo_transp cholesky_mo_transp_sp if ( (mo_integrals_cache_min>1).or.(mo_integrals_cache_max=mo_integrals_cache_min).and.(l<=mo_integrals_cache_max) ) then double precision, external :: ddot + real, external :: sdot integer :: kk - do i=1,mo_integrals_cache_min-1 - out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, & - cholesky_mo_transp(1,i,l), 1) -! out_val(i) = 0.d0 -! do kk=1,cholesky_mo_num -! out_val(i) = out_val(i) + cholesky_mo_transp(kk,i,k)*cholesky_mo_transp(kk,i,l) -! enddo - enddo + if (mo_cholesky_double) then - do i=mo_integrals_cache_min,mo_integrals_cache_max - out_val(i) = get_two_e_integral_cache(i,i,k,l) - enddo + do i=1,mo_integrals_cache_min-1 + out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, & + cholesky_mo_transp(1,i,l), 1) + enddo - do i=mo_integrals_cache_max, sze - out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, & - cholesky_mo_transp(1,i,l), 1) -! out_val(i) = 0.d0 -! do kk=1,cholesky_mo_num -! out_val(i) = out_val(i) + cholesky_mo_transp(kk,i,k)*cholesky_mo_transp(kk,i,l) -! enddo - enddo + do i=mo_integrals_cache_min,mo_integrals_cache_max + out_val(i) = get_two_e_integral_cache(i,i,k,l) + enddo + + do i=mo_integrals_cache_max, sze + out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, & + cholesky_mo_transp(1,i,l), 1) + enddo + + else + + integer :: isplit + do i=1,mo_integrals_cache_min-1 + out_val(i) = 0.d0 + do isplit=1,4 + out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1) + enddo + enddo + + do i=mo_integrals_cache_min,mo_integrals_cache_max + out_val(i) = get_two_e_integral_cache(i,i,k,l) + enddo + + do i=mo_integrals_cache_max, sze + out_val(i) = 0.d0 + do isplit=1,4 + out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1) + enddo + enddo + + endif else - do i=1,sze - out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, & - cholesky_mo_transp(1,i,l), 1) -! out_val(i) = 0.d0 -! do kk=1,cholesky_mo_num -! out_val(i) = out_val(i) + cholesky_mo_transp(kk,i,k)*cholesky_mo_transp(kk,i,l) -! enddo - enddo + if (mo_cholesky_double) then + do i=1,sze + out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, & + cholesky_mo_transp(1,i,l), 1) + enddo + else + do i=1,sze + out_val(i) = 0.d0 + do isplit=1,4 + out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1) + enddo + enddo + endif endif diff --git a/src/mp2/mp2.irp.f b/src/mp2/mp2.irp.f index b8e0cc4a..ecf2da1b 100644 --- a/src/mp2/mp2.irp.f +++ b/src/mp2/mp2.irp.f @@ -7,7 +7,7 @@ subroutine run double precision, allocatable :: pt2(:), norm_pert(:) double precision :: H_pert_diag, E_old integer :: N_st, iter - PROVIDE Fock_matrix_diag_mo H_apply_buffer_allocated + PROVIDE all_mo_integrals Fock_matrix_diag_mo H_apply_buffer_allocated N_st = N_states allocate (pt2(N_st), norm_pert(N_st)) E_old = HF_energy From 11b65259969aa612464e2b24fbd7b3b21f980627 Mon Sep 17 00:00:00 2001 From: Yann Damour <77277447+Ydrnan@users.noreply.github.com> Date: Fri, 24 Jan 2025 19:08:38 +0100 Subject: [PATCH 02/59] Fix deallocation pt2_serialized --- src/cipsi_utils/run_pt2_slave.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cipsi_utils/run_pt2_slave.irp.f b/src/cipsi_utils/run_pt2_slave.irp.f index cb1dd1f5..90c0e086 100644 --- a/src/cipsi_utils/run_pt2_slave.irp.f +++ b/src/cipsi_utils/run_pt2_slave.irp.f @@ -350,7 +350,6 @@ subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task enddo rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE) - deallocate(pt2_serialized) if (rc == -1) then print *, irp_here, ': error sending result' stop 3 @@ -358,6 +357,7 @@ subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task else if(rc /= size(pt2_serialized)*8) then stop 'push' endif + deallocate(pt2_serialized) rc = f77_zmq_send( zmq_socket_push, task_id, n_tasks*4, ZMQ_SNDMORE) From 5a591f52405d5dd32f4597ee9cfd5bed8477fa5b Mon Sep 17 00:00:00 2001 From: Yann Damour <77277447+Ydrnan@users.noreply.github.com> Date: Fri, 24 Jan 2025 20:13:32 +0100 Subject: [PATCH 03/59] Fix segfault in scf --- src/scf_utils/roothaan_hall_scf.irp.f | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index 274f08d6..9e2ca4bc 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -228,9 +228,10 @@ END_DOC do while (i mo_num) exit enddo if (m>1) then call dgemm('N','T',ao_num,ao_num,m,1.d0,mo_coef(1,i),size(mo_coef,1),mo_coef(1,i),size(mo_coef,1),0.d0,S,size(S,1)) From d102c0f54f222b4ccbcc242951686f42da9e42c2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 Jan 2025 13:29:42 +0100 Subject: [PATCH 04/59] Fix Cholesky H2O 6-31G --- src/ao_two_e_ints/cholesky.irp.f | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 69b18900..3178a5f1 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -194,14 +194,11 @@ END_PROVIDER + (np+1)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) ! call check_mem(mem) - ! 5. do while ( (Dmax > tau).and.(np > 0) ) ! a. i = i+1 - - block_size = max(N,24) ! Determine nq so that Delta fits in memory @@ -308,6 +305,8 @@ END_PROVIDER Qmax = max(Qmax, D(Dset(q))) enddo + if (Qmax <= Dmin) exit + ! g. iblock = 0 From d01e24c30287f71886da6f8b04370fe7075fbfce Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 Jan 2025 16:22:31 +0100 Subject: [PATCH 05/59] Fixed Pointer argument 'map' is not associated --- src/utils/map_module.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/map_module.f90 b/src/utils/map_module.f90 index ceaec874..9ebcbc6d 100644 --- a/src/utils/map_module.f90 +++ b/src/utils/map_module.f90 @@ -157,7 +157,7 @@ subroutine cache_map_reallocate(map,sze) stop 2 endif if (associated(map%value)) then - do i=1_8,min(size(map%key),map%n_elements) + do i=1_8,min(size(map%value),map%n_elements) value_new(i) = map%value(i) enddo deallocate(map%value) From 7d46b5ce3451d33d65ca457963dd3d436ca9668d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 Jan 2025 16:36:35 +0100 Subject: [PATCH 06/59] Removed ZeroMQ from AO integrals calculation --- src/ao_two_e_ints/NEED | 1 - .../integrals_erf_in_map_slave.irp.f | 194 -------------- .../integrals_in_map_slave.irp.f | 244 ------------------ src/ao_two_e_ints/providers_ao_erf.irp.f | 53 ++-- src/ao_two_e_ints/two_e_integrals.irp.f | 104 +++----- 5 files changed, 63 insertions(+), 533 deletions(-) delete mode 100644 src/ao_two_e_ints/integrals_erf_in_map_slave.irp.f delete mode 100644 src/ao_two_e_ints/integrals_in_map_slave.irp.f diff --git a/src/ao_two_e_ints/NEED b/src/ao_two_e_ints/NEED index 542962ec..34b4a641 100644 --- a/src/ao_two_e_ints/NEED +++ b/src/ao_two_e_ints/NEED @@ -2,5 +2,4 @@ hamiltonian ao_one_e_ints pseudo bitmask -zmq ao_basis diff --git a/src/ao_two_e_ints/integrals_erf_in_map_slave.irp.f b/src/ao_two_e_ints/integrals_erf_in_map_slave.irp.f deleted file mode 100644 index ded49233..00000000 --- a/src/ao_two_e_ints/integrals_erf_in_map_slave.irp.f +++ /dev/null @@ -1,194 +0,0 @@ -subroutine ao_two_e_integrals_erf_in_map_slave_tcp(i) - implicit none - integer, intent(in) :: i - BEGIN_DOC -! Computes a buffer of integrals. i is the ID of the current thread. - END_DOC - call ao_two_e_integrals_erf_in_map_slave(0,i) -end - - -subroutine ao_two_e_integrals_erf_in_map_slave_inproc(i) - implicit none - integer, intent(in) :: i - BEGIN_DOC -! Computes a buffer of integrals. i is the ID of the current thread. - END_DOC - call ao_two_e_integrals_erf_in_map_slave(1,i) -end - - - - - -subroutine ao_two_e_integrals_erf_in_map_slave(thread,iproc) - use map_module - use f77_zmq - implicit none - BEGIN_DOC -! Computes a buffer of integrals - END_DOC - - integer, intent(in) :: thread, iproc - - integer :: j,l,n_integrals - integer :: rc - real(integral_kind), allocatable :: buffer_value(:) - integer(key_kind), allocatable :: buffer_i(:) - - integer :: worker_id, task_id - character*(512) :: task - - integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket - integer(ZMQ_PTR) :: zmq_to_qp_run_socket - - integer(ZMQ_PTR), external :: new_zmq_push_socket - integer(ZMQ_PTR) :: zmq_socket_push - - character*(64) :: state - - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - - integer, external :: connect_to_taskserver - if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - return - endif - - zmq_socket_push = new_zmq_push_socket(thread) - - allocate ( buffer_i(ao_num*ao_num), buffer_value(ao_num*ao_num) ) - - - do - integer, external :: get_task_from_taskserver - if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) == -1) then - exit - endif - if (task_id == 0) exit - read(task,*) j, l - integer, external :: task_done_to_taskserver - call compute_ao_integrals_erf_jl(j,l,n_integrals,buffer_i,buffer_value) - if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then - stop 'Unable to send task_done' - endif - call push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value, task_id) - enddo - - integer, external :: disconnect_from_taskserver - if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then - continue - endif - deallocate( buffer_i, buffer_value ) - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - call end_zmq_push_socket(zmq_socket_push,thread) - -end - - -subroutine ao_two_e_integrals_erf_in_map_collector(zmq_socket_pull) - use map_module - use f77_zmq - implicit none - BEGIN_DOC -! Collects results from the AO integral calculation - END_DOC - - integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - integer :: j,l,n_integrals - integer :: rc - - real(integral_kind), allocatable :: buffer_value(:) - integer(key_kind), allocatable :: buffer_i(:) - - integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket - integer(ZMQ_PTR) :: zmq_to_qp_run_socket - - integer(ZMQ_PTR), external :: new_zmq_pull_socket - - integer*8 :: control, accu, sze - integer :: task_id, more - - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - - sze = ao_num*ao_num - allocate ( buffer_i(sze), buffer_value(sze) ) - - accu = 0_8 - more = 1 - do while (more == 1) - - rc = f77_zmq_recv( zmq_socket_pull, n_integrals, 4, 0) - if (rc == -1) then - n_integrals = 0 - return - endif - if (rc /= 4) then - print *, irp_here, ': f77_zmq_recv( zmq_socket_pull, n_integrals, 4, 0)' - stop 'error' - endif - - if (n_integrals >= 0) then - - if (n_integrals > sze) then - deallocate (buffer_value, buffer_i) - sze = n_integrals - allocate (buffer_value(sze), buffer_i(sze)) - endif - - rc = f77_zmq_recv( zmq_socket_pull, buffer_i, key_kind*n_integrals, 0) - if (rc /= key_kind*n_integrals) then - print *, rc, key_kind, n_integrals - print *, irp_here, ': f77_zmq_recv( zmq_socket_pull, buffer_i, key_kind*n_integrals, 0)' - stop 'error' - endif - - rc = f77_zmq_recv( zmq_socket_pull, buffer_value, integral_kind*n_integrals, 0) - if (rc /= integral_kind*n_integrals) then - print *, irp_here, ': f77_zmq_recv( zmq_socket_pull, buffer_value, integral_kind*n_integrals, 0)' - stop 'error' - endif - - rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) - -IRP_IF ZMQ_PUSH -IRP_ELSE - rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0) - if (rc /= 4) then - print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...' - stop 'error' - endif -IRP_ENDIF - - - call insert_into_ao_integrals_erf_map(n_integrals,buffer_i,buffer_value) - accu += n_integrals - if (task_id /= 0) then - integer, external :: zmq_delete_task - if (zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) == -1) then - stop 'Unable to delete task' - endif - endif - endif - - enddo - - deallocate( buffer_i, buffer_value ) - - integer (map_size_kind) :: get_ao_erf_map_size - control = get_ao_erf_map_size(ao_integrals_erf_map) - - if (control /= accu) then - print *, '' - print *, irp_here - print *, 'Control : ', control - print *, 'Accu : ', accu - print *, 'Some integrals were lost during the parallel computation.' - print *, 'Try to reduce the number of threads.' - stop - endif - - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - -end - diff --git a/src/ao_two_e_ints/integrals_in_map_slave.irp.f b/src/ao_two_e_ints/integrals_in_map_slave.irp.f deleted file mode 100644 index 122fa2ac..00000000 --- a/src/ao_two_e_ints/integrals_in_map_slave.irp.f +++ /dev/null @@ -1,244 +0,0 @@ -subroutine ao_two_e_integrals_in_map_slave_tcp(i) - implicit none - integer, intent(in) :: i - BEGIN_DOC -! Computes a buffer of integrals. i is the ID of the current thread. - END_DOC - call ao_two_e_integrals_in_map_slave(0,i) -end - - -subroutine ao_two_e_integrals_in_map_slave_inproc(i) - implicit none - integer, intent(in) :: i - BEGIN_DOC -! Computes a buffer of integrals. i is the ID of the current thread. - END_DOC - call ao_two_e_integrals_in_map_slave(1,i) -end - - -subroutine push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value, task_id) - use f77_zmq - use map_module - implicit none - BEGIN_DOC -! Push integrals in the push socket - END_DOC - integer(ZMQ_PTR), intent(in) :: zmq_socket_push - integer, intent(in) :: n_integrals - integer(key_kind), intent(in) :: buffer_i(*) - real(integral_kind), intent(in) :: buffer_value(*) - integer, intent(in) :: task_id - integer :: rc - - rc = f77_zmq_send( zmq_socket_push, n_integrals, 4, ZMQ_SNDMORE) - if (rc /= 4) then - print *, irp_here, ': f77_zmq_send( zmq_socket_push, n_integrals, 4, ZMQ_SNDMORE)' - stop 'error' - endif - - rc = f77_zmq_send( zmq_socket_push, buffer_i, key_kind*n_integrals, ZMQ_SNDMORE) - if (rc /= key_kind*n_integrals) then - print *, irp_here, ': f77_zmq_send( zmq_socket_push, buffer_i, key_kind*n_integrals, ZMQ_SNDMORE)' - stop 'error' - endif - - rc = f77_zmq_send( zmq_socket_push, buffer_value, integral_kind*n_integrals, ZMQ_SNDMORE) - if (rc /= integral_kind*n_integrals) then - print *, irp_here, ': f77_zmq_send( zmq_socket_push, buffer_value, integral_kind*n_integrals, 0)' - stop 'error' - endif - - rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) - if (rc /= 4) then - print *, irp_here, ': f77_zmq_send( zmq_socket_push, task_id, 4, 0)' - stop 'error' - endif - -IRP_IF ZMQ_PUSH -IRP_ELSE - integer :: idummy - rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) - if (rc /= 4) then - print *, irp_here, ': f77_zmq_send( zmq_socket_push, idummy, 4, 0)' - stop 'error' - endif -IRP_ENDIF -end - - - - - -subroutine ao_two_e_integrals_in_map_slave(thread,iproc) - use map_module - use f77_zmq - implicit none - BEGIN_DOC -! Computes a buffer of integrals - END_DOC - - integer, intent(in) :: thread, iproc - - integer :: j,l,n_integrals - integer :: rc - real(integral_kind), allocatable :: buffer_value(:) - integer(key_kind), allocatable :: buffer_i(:) - - integer :: worker_id, task_id - character*(512) :: task - - integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket - integer(ZMQ_PTR) :: zmq_to_qp_run_socket - - integer(ZMQ_PTR), external :: new_zmq_push_socket - integer(ZMQ_PTR) :: zmq_socket_push - - character*(64) :: state - - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - - integer, external :: connect_to_taskserver - if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - return - endif - - zmq_socket_push = new_zmq_push_socket(thread) - - allocate ( buffer_i(ao_num*ao_num), buffer_value(ao_num*ao_num) ) - - - do - integer, external :: get_task_from_taskserver - if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) == -1) then - exit - endif - if (task_id == 0) exit - call sscanf_dd(task, j, l) - integer, external :: task_done_to_taskserver - call compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value) - if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then - stop 'Unable to send task_done' - endif - call push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value, task_id) - enddo - - integer, external :: disconnect_from_taskserver - if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then - continue - endif - deallocate( buffer_i, buffer_value ) - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - call end_zmq_push_socket(zmq_socket_push,thread) - -end - - -subroutine ao_two_e_integrals_in_map_collector(zmq_socket_pull) - use map_module - use f77_zmq - implicit none - BEGIN_DOC -! Collects results from the AO integral calculation - END_DOC - - integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - integer :: j,l,n_integrals - integer :: rc - - real(integral_kind), allocatable :: buffer_value(:) - integer(key_kind), allocatable :: buffer_i(:) - - integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket - integer(ZMQ_PTR) :: zmq_to_qp_run_socket - - integer(ZMQ_PTR), external :: new_zmq_pull_socket - - integer*8 :: control, accu, sze - integer :: task_id, more - - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - - sze = ao_num*ao_num - allocate ( buffer_i(sze), buffer_value(sze) ) - - accu = 0_8 - more = 1 - do while (more == 1) - - rc = f77_zmq_recv( zmq_socket_pull, n_integrals, 4, 0) - if (rc == -1) then - n_integrals = 0 - return - endif - if (rc /= 4) then - print *, irp_here, ': f77_zmq_recv( zmq_socket_pull, n_integrals, 4, 0)' - stop 'error' - endif - - if (n_integrals >= 0) then - - if (n_integrals > sze) then - deallocate (buffer_value, buffer_i) - sze = n_integrals - allocate (buffer_value(sze), buffer_i(sze)) - endif - - rc = f77_zmq_recv( zmq_socket_pull, buffer_i, key_kind*n_integrals, 0) - if (rc /= key_kind*n_integrals) then - print *, rc, key_kind, n_integrals - print *, irp_here, ': f77_zmq_recv( zmq_socket_pull, buffer_i, key_kind*n_integrals, 0)' - stop 'error' - endif - - rc = f77_zmq_recv( zmq_socket_pull, buffer_value, integral_kind*n_integrals, 0) - if (rc /= integral_kind*n_integrals) then - print *, irp_here, ': f77_zmq_recv( zmq_socket_pull, buffer_value, integral_kind*n_integrals, 0)' - stop 'error' - endif - - rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) - -IRP_IF ZMQ_PUSH -IRP_ELSE - rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0) - if (rc /= 4) then - print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...' - stop 'error' - endif -IRP_ENDIF - - - call insert_into_ao_integrals_map(n_integrals,buffer_i,buffer_value) - accu += n_integrals - if (task_id /= 0) then - integer, external :: zmq_delete_task - if (zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) == -1) then - stop 'Unable to delete task' - endif - endif - endif - - enddo - - deallocate( buffer_i, buffer_value ) - - integer (map_size_kind) :: get_ao_map_size - control = get_ao_map_size(ao_integrals_map) - - if (control /= accu) then - print *, '' - print *, irp_here - print *, 'Control : ', control - print *, 'Accu : ', accu - print *, 'Some integrals were lost during the parallel computation.' - print *, 'Try to reduce the number of threads.' - stop - endif - - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - -end - diff --git a/src/ao_two_e_ints/providers_ao_erf.irp.f b/src/ao_two_e_ints/providers_ao_erf.irp.f index ff8c31a2..8468fb0a 100644 --- a/src/ao_two_e_ints/providers_ao_erf.irp.f +++ b/src/ao_two_e_ints/providers_ao_erf.irp.f @@ -1,7 +1,5 @@ - BEGIN_PROVIDER [ logical, ao_two_e_integrals_erf_in_map ] implicit none - use f77_zmq use map_module BEGIN_DOC ! Map of Atomic integrals @@ -15,17 +13,16 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_erf_in_map ] ! For integrals file integer(key_kind),allocatable :: buffer_i(:) - integer,parameter :: size_buffer = 1024*64 + integer :: size_buffer real(integral_kind),allocatable :: buffer_value(:) integer :: n_integrals, rc integer :: kk, m, j1, i1, lmax character*(64) :: fmt - integral = ao_two_e_integral_erf(1,1,1,1) - double precision :: map_mb - PROVIDE read_ao_two_e_integrals_erf io_ao_two_e_integrals_erf + PROVIDE read_ao_two_e_integrals_erf io_ao_two_e_integrals_erf ao_integrals_erf_map + if (read_ao_two_e_integrals_erf) then print*,'Reading the AO ERF integrals' call map_load_from_disk(trim(ezfio_filename)//'/work/ao_ints_erf',ao_integrals_erf_map) @@ -39,37 +36,27 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_erf_in_map ] call wall_time(wall_1) call cpu_time(cpu_1) - integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull - call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'ao_integrals_erf') - - character(len=:), allocatable :: task - allocate(character(len=ao_num*12) :: task) - write(fmt,*) '(', ao_num, '(I5,X,I5,''|''))' - do l=1,ao_num - write(task,fmt) (i,l, i=1,l) - integer, external :: add_task_to_taskserver - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) == -1) then - stop 'Unable to add task to server' - endif - enddo - deallocate(task) - - integer, external :: zmq_set_running - if (zmq_set_running(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Failed in zmq_set_running' + if (.True.) then + ! Avoid openMP + integral = ao_two_e_integral_erf(1,1,1,1) endif - PROVIDE nproc - !$OMP PARALLEL DEFAULT(shared) private(i) num_threads(nproc+1) - i = omp_get_thread_num() - if (i==0) then - call ao_two_e_integrals_erf_in_map_collector(zmq_socket_pull) - else - call ao_two_e_integrals_erf_in_map_slave_inproc(i) - endif + size_buffer = ao_num*ao_num + !$OMP PARALLEL DEFAULT(shared) private(j,l) & + !$OMP PRIVATE(buffer_i, buffer_value, n_integrals) + allocate(buffer_i(size_buffer), buffer_value(size_buffer)) + n_integrals = 0 + !$OMP DO COLLAPSE(1) SCHEDULE(dynamic) + do l=1,ao_num + do j=1,l + call compute_ao_integrals_erf_jl(j,l,n_integrals,buffer_i,buffer_value) + call insert_into_ao_integrals_erf_map(n_integrals,buffer_i,buffer_value) + enddo + enddo + !$OMP END DO + deallocate(buffer_i, buffer_value) !$OMP END PARALLEL - call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'ao_integrals_erf') print*, 'Sorting the map' diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index e4bd9d1d..fb376ce1 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -397,7 +397,6 @@ end BEGIN_PROVIDER [ logical, ao_two_e_integrals_in_map ] implicit none - use f77_zmq use map_module BEGIN_DOC ! Map of Atomic integrals @@ -411,7 +410,7 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_in_map ] ! For integrals file integer(key_kind),allocatable :: buffer_i(:) - integer,parameter :: size_buffer = 1024*64 + integer :: size_buffer real(integral_kind),allocatable :: buffer_value(:) integer :: n_integrals, rc @@ -419,78 +418,61 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_in_map ] character*(64) :: fmt double precision :: map_mb - PROVIDE read_ao_two_e_integrals io_ao_two_e_integrals + PROVIDE read_ao_two_e_integrals io_ao_two_e_integrals ao_integrals_map + if (read_ao_two_e_integrals) then print*,'Reading the AO integrals' call map_load_from_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) print*, 'AO integrals provided' ao_two_e_integrals_in_map = .True. - else + return + endif - print*, 'Providing the AO integrals' - call wall_time(wall_0) - call wall_time(wall_1) - call cpu_time(cpu_1) + print*, 'Providing the AO integrals' + call wall_time(wall_0) + call wall_time(wall_1) + call cpu_time(cpu_1) - if (.True.) then - ! Avoid openMP - integral = ao_two_e_integral(1,1,1,1) - endif + if (.True.) then + ! Avoid openMP + integral = ao_two_e_integral(1,1,1,1) + endif - integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull - call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'ao_integrals') - - character(len=:), allocatable :: task - allocate(character(len=ao_num*12) :: task) - write(fmt,*) '(', ao_num, '(I5,X,I5,''|''))' - do l=1,ao_num - write(task,fmt) (i,l, i=1,l) - integer, external :: add_task_to_taskserver - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) == -1) then - stop 'Unable to add task to server' - endif + size_buffer = ao_num*ao_num + !$OMP PARALLEL DEFAULT(shared) private(j,l) & + !$OMP PRIVATE(buffer_i, buffer_value, n_integrals) + allocate(buffer_i(size_buffer), buffer_value(size_buffer)) + n_integrals = 0 + !$OMP DO COLLAPSE(1) SCHEDULE(dynamic) + do l=1,ao_num + do j=1,l + call compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value) + call insert_into_ao_integrals_map(n_integrals,buffer_i,buffer_value) enddo - deallocate(task) + enddo + !$OMP END DO + deallocate(buffer_i, buffer_value) + !$OMP END PARALLEL - integer, external :: zmq_set_running - if (zmq_set_running(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Failed in zmq_set_running' - endif + print*, 'Sorting the map' + call map_sort(ao_integrals_map) + call cpu_time(cpu_2) + call wall_time(wall_2) + integer(map_size_kind) :: get_ao_map_size, ao_map_size + ao_map_size = get_ao_map_size() - PROVIDE nproc - !$OMP PARALLEL DEFAULT(shared) private(i) num_threads(nproc+1) - i = omp_get_thread_num() - if (i==0) then - call ao_two_e_integrals_in_map_collector(zmq_socket_pull) - else - call ao_two_e_integrals_in_map_slave_inproc(i) - endif - !$OMP END PARALLEL + print*, 'AO integrals provided:' + print*, ' Size of AO map : ', map_mb(ao_integrals_map) ,'MB' + print*, ' Number of AO integrals :', ao_map_size + print*, ' cpu time :',cpu_2 - cpu_1, 's' + print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' - call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'ao_integrals') - - - print*, 'Sorting the map' - call map_sort(ao_integrals_map) - call cpu_time(cpu_2) - call wall_time(wall_2) - integer(map_size_kind) :: get_ao_map_size, ao_map_size - ao_map_size = get_ao_map_size() - - print*, 'AO integrals provided:' - print*, ' Size of AO map : ', map_mb(ao_integrals_map) ,'MB' - print*, ' Number of AO integrals :', ao_map_size - print*, ' cpu time :',cpu_2 - cpu_1, 's' - print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' - - ao_two_e_integrals_in_map = .True. - - if (write_ao_two_e_integrals.and.mpi_master) then - call ezfio_set_work_empty(.False.) - call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) - call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') - endif + ao_two_e_integrals_in_map = .True. + if (write_ao_two_e_integrals.and.mpi_master) then + call ezfio_set_work_empty(.False.) + call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) + call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') endif END_PROVIDER From 3478e741a4c332b7373ad00aa3c443d9f4dbd2d2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 30 Jan 2025 13:13:34 +0100 Subject: [PATCH 07/59] Updated EZFIO --- external/ezfio | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/ezfio b/external/ezfio index dba01c4f..d02132ea 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3 +Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091 From e1523c492941cd8a9d290743956b6b2a20f589bf Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 31 Jan 2025 11:40:49 +0100 Subject: [PATCH 08/59] Removed irrelevent dependency on cholesky --- src/mo_two_e_ints/map_integrals.irp.f | 4 ++-- src/tools/print_energy.irp.f | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 6040842e..5b233899 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -155,7 +155,7 @@ double precision function get_two_e_integral(i,j,k,l,map) type(map_type), intent(inout) :: map real(integral_kind) :: tmp - PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky mo_cholesky_double cholesky_mo_transp_sp cholesky_mo_transp + PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky if (use_banned_excitation) then if (banned_excitation(i,k)) then @@ -570,7 +570,7 @@ subroutine get_mo_two_e_integrals_exch_ii(k,l,sze,out_val,map) type(map_type), intent(inout) :: map integer :: i double precision, external :: get_two_e_integral - PROVIDE mo_two_e_integrals_in_map mo_cholesky_double cholesky_mo_transp cholesky_mo_transp_sp + PROVIDE mo_two_e_integrals_in_map mo_cholesky_double if ( (mo_integrals_cache_min>1).or.(mo_integrals_cache_max Date: Fri, 31 Jan 2025 17:14:49 +0100 Subject: [PATCH 09/59] Stabilize PT2 when expected accuracy is low --- external/ezfio | 2 +- src/cipsi_utils/pt2_stoch_routines.irp.f | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/ezfio b/external/ezfio index d02132ea..dba01c4f 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091 +Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3 diff --git a/src/cipsi_utils/pt2_stoch_routines.irp.f b/src/cipsi_utils/pt2_stoch_routines.irp.f index 144d052d..162ab02c 100644 --- a/src/cipsi_utils/pt2_stoch_routines.irp.f +++ b/src/cipsi_utils/pt2_stoch_routines.irp.f @@ -530,7 +530,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_ avg = E0 + pt2_data_S(t) % pt2(pt2_stoch_istate) / dble(c) avg2 = v0 + pt2_data_S(t) % variance(pt2_stoch_istate) / dble(c) avg3(:) = n0(:) + pt2_data_S(t) % overlap(:,pt2_stoch_istate) / dble(c) - if ((avg /= 0.d0) .or. (n == N_det_generators) ) then + if (((c>=10).and.(avg /= 0.d0)) .or. (n == N_det_generators) ) then do_exit = .true. endif if (qp_stop()) then From 4b9939e738fbecf831847c9047a8bd11b99abd9b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 3 Feb 2025 13:41:46 +0100 Subject: [PATCH 10/59] Fix qp_exc_energy.py:195: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated --- scripts/qp_exc_energy.py | 12 ++++++++++-- src/davidson/u0_hs2_u0.irp.f | 4 ++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/scripts/qp_exc_energy.py b/scripts/qp_exc_energy.py index 44136311..e08866e3 100755 --- a/scripts/qp_exc_energy.py +++ b/scripts/qp_exc_energy.py @@ -157,11 +157,15 @@ A = np.array( [ [ data[-1][1], 1. ], B = np.array( [ [ data[-1][0] ], [ data[-2][0] ] ] ) E0 = np.linalg.solve(A,B)[1] +E0 = E0[0] + A = np.array( [ [ data[-1][4], 1. ], [ data[-2][4], 1. ] ] ) B = np.array( [ [ data[-1][3] ], [ data[-2][3] ] ] ) E1 = np.linalg.solve(A,B)[1] +E1 = E1[0] + average_2 = (E1-E0)*to_eV A = np.array( [ [ data[-1][1], 1. ], @@ -170,14 +174,18 @@ A = np.array( [ [ data[-1][1], 1. ], B = np.array( [ [ data[-1][0] ], [ data[-2][0] ], [ data[-3][0] ] ] ) -E0 = np.linalg.lstsq(A,B,rcond=None)[0][1] +E0 = np.linalg.lstsq(A,B,rcond=None)[0] +E0 = E0[0][0] + A = np.array( [ [ data[-1][4], 1. ], [ data[-2][4], 1. ], [ data[-3][4], 1. ] ] ) B = np.array( [ [ data[-1][3] ], [ data[-2][3] ], [ data[-3][3] ] ] ) -E1 = np.linalg.lstsq(A,B,rcond=None)[0][1] +E1 = np.linalg.lstsq(A,B,rcond=None)[0] +E1 = E1[0][0] + average_3 = (E1-E0)*to_eV exc = ((data[-1][3] + data[-1][4]) - (data[-1][0] + data[-1][1])) * to_eV diff --git a/src/davidson/u0_hs2_u0.irp.f b/src/davidson/u0_hs2_u0.irp.f index 3afe4ec6..f2ce7aa9 100644 --- a/src/davidson/u0_hs2_u0.irp.f +++ b/src/davidson/u0_hs2_u0.irp.f @@ -291,7 +291,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend, ASSERT (istart > 0) ASSERT (istep > 0) - !$OMP DO SCHEDULE(guided,64) + !$OMP DO SCHEDULE(dynamic,64) do k_a=istart+ishift,iend,istep krow = psi_bilinear_matrix_rows(k_a) @@ -469,7 +469,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend, enddo !$OMP END DO - !$OMP DO SCHEDULE(guided,64) + !$OMP DO SCHEDULE(dynamic,64) do k_a=istart+ishift,iend,istep From dc75f495001a13dd38209a220d7230fb5ad3e450 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 3 Feb 2025 14:05:28 +0100 Subject: [PATCH 11/59] Added selected MRCI module --- src/cipsi_utils/pt2_stoch_routines.irp.f | 1 + src/mo_basis/utils.irp.f | 2 +- src/mrci/EZFIO.cfg | 24 +++++++++ src/mrci/NEED | 4 ++ src/mrci/README.rst | 17 +++++++ src/mrci/class.irp.f | 8 +++ src/mrci/mrci.irp.f | 64 ++++++++++++++++++++++++ src/mrci/save_energy.irp.f | 10 ++++ 8 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 src/mrci/EZFIO.cfg create mode 100644 src/mrci/NEED create mode 100644 src/mrci/README.rst create mode 100644 src/mrci/class.irp.f create mode 100644 src/mrci/mrci.irp.f create mode 100644 src/mrci/save_energy.irp.f diff --git a/src/cipsi_utils/pt2_stoch_routines.irp.f b/src/cipsi_utils/pt2_stoch_routines.irp.f index 162ab02c..68a3c9fc 100644 --- a/src/cipsi_utils/pt2_stoch_routines.irp.f +++ b/src/cipsi_utils/pt2_stoch_routines.irp.f @@ -212,6 +212,7 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in) ipos += 1 endif enddo + call write_int(6,pt2_stoch_istate,'State') call write_int(6,sum(pt2_F),'Number of tasks') call write_int(6,ipos,'Number of fragmented tasks') diff --git a/src/mo_basis/utils.irp.f b/src/mo_basis/utils.irp.f index 987c394a..3f83b518 100644 --- a/src/mo_basis/utils.irp.f +++ b/src/mo_basis/utils.irp.f @@ -228,7 +228,7 @@ subroutine mo_as_svd_vectors_of_mo_matrix_eig(matrix,lda,m,n,eig,label) call dgemm('N','N',ao_num,m,m,1.d0,mo_coef_new,size(mo_coef_new,1),U,size(U,1),0.d0,mo_coef,size(mo_coef,1)) do i=1,m - if (eig(i) > 1.d-20) then + if (D(i) > 1.d-20) then eig(i) = D(i) else eig(i) = 0.d0 diff --git a/src/mrci/EZFIO.cfg b/src/mrci/EZFIO.cfg new file mode 100644 index 00000000..c8efd4b9 --- /dev/null +++ b/src/mrci/EZFIO.cfg @@ -0,0 +1,24 @@ +[energy] +type: double precision +doc: Calculated Selected CASSD energy +interface: ezfio +size: (determinants.n_states) + +[energy_pt2] +type: double precision +doc: Calculated CASSD energy + PT2 +interface: ezfio +size: (determinants.n_states) + + +[do_ddci] +type: logical +doc: If true, remove purely inactive double excitations +interface: ezfio,provider,ocaml +default: False + +[do_only_1h1p] +type: logical +doc: If true, do only one hole/one particle excitations +interface: ezfio,provider,ocaml +default: False diff --git a/src/mrci/NEED b/src/mrci/NEED new file mode 100644 index 00000000..ad99293f --- /dev/null +++ b/src/mrci/NEED @@ -0,0 +1,4 @@ +cipsi +generators_cas +selectors_full +davidson_undressed diff --git a/src/mrci/README.rst b/src/mrci/README.rst new file mode 100644 index 00000000..ac3c0139 --- /dev/null +++ b/src/mrci/README.rst @@ -0,0 +1,17 @@ +==== +mrci +==== + + +|CIPSI| algorithm in the multi-reference CI space (CAS + Singles and Doubles). + + +This module is the same as the :ref:`fci` module, except for the choice of the +generator and selector determinants. + +The inactive, active and virtual |MOs| will need to be set with the +:ref:`qp_set_mo_class` program. + +.. seealso:: + + The documentation of the :ref:`fci` module. diff --git a/src/mrci/class.irp.f b/src/mrci/class.irp.f new file mode 100644 index 00000000..271eb930 --- /dev/null +++ b/src/mrci/class.irp.f @@ -0,0 +1,8 @@ +BEGIN_PROVIDER [ logical, do_only_cas ] + implicit none + BEGIN_DOC + ! In the CAS+SD case, always false + END_DOC + do_only_cas = .False. +END_PROVIDER + diff --git a/src/mrci/mrci.irp.f b/src/mrci/mrci.irp.f new file mode 100644 index 00000000..ef4a2454 --- /dev/null +++ b/src/mrci/mrci.irp.f @@ -0,0 +1,64 @@ +program mrci + implicit none + BEGIN_DOC +! Selected CAS+Singles and Doubles with stochastic selection +! and PT2. +! +! This program performs a |CIPSI|-like selected |CI| using a +! stochastic scheme for both the selection of the important Slater +! determinants and the computation of the |PT2| correction. This +! |CIPSI|-like algorithm will be performed for the lowest states of +! the variational space (see :option:`determinants n_states`). The +! program will stop when reaching at least one the two following +! conditions: +! +! * number of Slater determinants > :option:`determinants n_det_max` +! * |PT2| < :option:`perturbation pt2_max` +! +! The following other options can be of interest: +! +! :option:`determinants read_wf` +! When set to |false|, the program starts with a ROHF-like Slater +! determinant as a guess wave function. When set to |true|, the +! program starts with the wave function(s) stored in the |EZFIO| +! directory as guess wave function(s). +! +! :option:`determinants s2_eig` +! When set to |true|, the selection will systematically add all the +! necessary Slater determinants in order to have a pure spin wave +! function with an |S^2| value corresponding to +! :option:`determinants expected_s2`. +! +! For excited states calculations, it is recommended to start with +! :ref:`.cis.` or :ref:`.cisd.` guess wave functions, eventually in +! a restricted set of |MOs|, and to set :option:`determinants s2_eig` +! to |true|. +! + END_DOC + + PROVIDE all_mo_integrals + if (.not.is_zmq_slave) then + PROVIDE psi_det psi_coef + + write(json_unit,json_array_open_fmt) 'fci' + + double precision, allocatable :: Ev(:),PT2(:) + allocate(Ev(N_states), PT2(N_states)) + if (do_pt2) then + call run_stochastic_cipsi(Ev,PT2) + else + call run_cipsi + endif + + write(json_unit,json_dict_uopen_fmt) + write(json_unit,json_dict_close_fmtx) + write(json_unit,json_array_close_fmtx) + call json_close + + else + PROVIDE pt2_min_parallel_tasks + + call run_slave_cipsi + + endif +end diff --git a/src/mrci/save_energy.irp.f b/src/mrci/save_energy.irp.f new file mode 100644 index 00000000..28840b6c --- /dev/null +++ b/src/mrci/save_energy.irp.f @@ -0,0 +1,10 @@ +subroutine save_energy(E,pt2) + implicit none + BEGIN_DOC +! Saves the energy in |EZFIO|. + END_DOC + double precision, intent(in) :: E(N_states), pt2(N_states) + call ezfio_set_mrci_energy(E(1:N_states)) + call ezfio_set_mrci_energy_pt2(E(1:N_states)+pt2(1:N_states)) +end + From 6611722d32aad4ac8852de6eba7adef3834bfb49 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 4 Feb 2025 11:31:11 +0100 Subject: [PATCH 12/59] put more explicit print in print_su_pbe_ot.irp.f --- plugins/local/basis_correction/print_su_pbe_ot.irp.f | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/local/basis_correction/print_su_pbe_ot.irp.f b/plugins/local/basis_correction/print_su_pbe_ot.irp.f index 49f90ade..0f196e60 100644 --- a/plugins/local/basis_correction/print_su_pbe_ot.irp.f +++ b/plugins/local/basis_correction/print_su_pbe_ot.irp.f @@ -18,6 +18,7 @@ end subroutine print_su_pbe_ot implicit none integer :: istate + print*,'Two flavours of PBE functionals :' do istate = 1, N_states write(*, '(A29,X,I3,X,A3,X,F16.10)') ' ECMD PBE-UEG , state ',istate,' = ',ecmd_pbe_ueg_mu_of_r(istate) enddo From 0647a9db5f7173189cf38451a7032436b21f0d57 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Feb 2025 14:49:39 +0100 Subject: [PATCH 13/59] Starting optimization of open-shell ccsd --- src/ccsd/ccsd_space_orb.irp.f | 5 +- src/ccsd/ccsd_spin_orb.irp.f | 2 - src/ccsd/ccsd_spin_orb_sub.irp.f | 444 ++++++++++++++--------------- src/utils_cc/mo_integrals_cc.irp.f | 16 ++ 4 files changed, 229 insertions(+), 238 deletions(-) diff --git a/src/ccsd/ccsd_space_orb.irp.f b/src/ccsd/ccsd_space_orb.irp.f index 53028ec0..91f703a0 100644 --- a/src/ccsd/ccsd_space_orb.irp.f +++ b/src/ccsd/ccsd_space_orb.irp.f @@ -1,9 +1,10 @@ -! Code - program ccsd implicit none + BEGIN_DOC + ! Closed-shell CCSD + END_DOC read_wf = .True. touch read_wf diff --git a/src/ccsd/ccsd_spin_orb.irp.f b/src/ccsd/ccsd_spin_orb.irp.f index 6f2de11c..04344fbb 100644 --- a/src/ccsd/ccsd_spin_orb.irp.f +++ b/src/ccsd/ccsd_spin_orb.irp.f @@ -1,5 +1,3 @@ -! Prog - program ccsd implicit none diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f index 09d6a0fe..fe202ebf 100644 --- a/src/ccsd/ccsd_spin_orb_sub.irp.f +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -11,9 +11,9 @@ subroutine run_ccsd_spin_orb double precision, allocatable :: t1(:,:), t2(:,:,:,:), tau(:,:,:,:), tau_t(:,:,:,:) double precision, allocatable :: r1(:,:), r2(:,:,:,:) double precision, allocatable :: cF_oo(:,:), cF_ov(:,:), cF_vv(:,:) - double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:), cW_vvvv(:,:,:,:) - - double precision, allocatable :: f_oo(:,:), f_ov(:,:), f_vv(:,:), f_o(:), f_v(:) + double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:) !, cW_vvvv(:,:,:,:) + + double precision, allocatable :: f_o(:), f_v(:) double precision, allocatable :: v_oooo(:,:,:,:), v_vooo(:,:,:,:), v_ovoo(:,:,:,:) double precision, allocatable :: v_oovo(:,:,:,:), v_ooov(:,:,:,:), v_vvoo(:,:,:,:) double precision, allocatable :: v_vovo(:,:,:,:), v_voov(:,:,:,:), v_ovvo(:,:,:,:) @@ -24,8 +24,7 @@ subroutine run_ccsd_spin_orb double precision, allocatable :: all_err(:,:), all_t(:,:) logical :: not_converged - integer, allocatable :: list_occ(:,:), list_vir(:,:) - integer :: nO,nV,nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4) + integer :: nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4) integer :: nb_iter, i,j,a,b double precision :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi integer(bit_kind) :: det(N_int,2) @@ -33,7 +32,7 @@ subroutine run_ccsd_spin_orb det = psi_det(:,:,cc_ref) print*,'Reference determinant:' call print_det(det,N_int) - + ! Extract number of occ/vir alpha/beta spin orbitals !call extract_n_spin(det,n_spin) nOa = cc_nOa !n_spin(1) @@ -41,107 +40,83 @@ subroutine run_ccsd_spin_orb nVa = cc_nVa !n_spin(3) nVb = cc_nVb !n_spin(4) - ! Total number of occ/vir spin orb - nO = cc_nOab !nOa + nOb - nV = cc_nVab !nVa + nVb - ! Debug - !print*,nO,nV - ! Number of occ/vir spin orb per spin nO_S = cc_nO_S !(/nOa,nOb/) nV_S = cc_nV_S !(/nVa,nVb/) ! Debug !print*,nO_S,nV_S - ! Maximal number of occ/vir + ! Maximal number of occ/vir nO_m = cc_nO_m !max(nOa, nOb) nV_m = cc_nV_m !max(nVa, nVb) ! Debug !print*,nO_m,nV_m - - allocate(list_occ(nO_m,2), list_vir(nV_m,2)) - list_occ = cc_list_occ_spin - list_vir = cc_list_vir_spin - ! Debug - !call extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir) - !print*,list_occ(:,1) - !print*,list_occ(:,2) - !print*,list_vir(:,1) - !print*,list_vir(:,2) ! Allocation - allocate(t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV), tau_t(nO,nO,nV,nV)) - allocate(r1(nO,nV), r2(nO,nO,nV,nV)) - allocate(cF_oo(nO,nO), cF_ov(nO,nV), cF_vv(nV,nV)) - allocate(cW_oooo(nO,nO,nO,nO), cW_ovvo(nO,nV,nV,nO))!, cW_vvvv(nV,nV,nV,nV)) - allocate(v_oooo(nO,nO,nO,nO)) - !allocate(v_vooo(nV,nO,nO,nO)) - allocate(v_ovoo(nO,nV,nO,nO)) - allocate(v_oovo(nO,nO,nV,nO)) - allocate(v_ooov(nO,nO,nO,nV)) - allocate(v_vvoo(nV,nV,nO,nO)) - !allocate(v_vovo(nV,nO,nV,nO)) - !allocate(v_voov(nV,nO,nO,nV)) - allocate(v_ovvo(nO,nV,nV,nO)) - allocate(v_ovov(nO,nV,nO,nV)) - allocate(v_oovv(nO,nO,nV,nV)) - !allocate(v_vvvo(nV,nV,nV,nO)) - !allocate(v_vvov(nV,nV,nO,nV)) - !allocate(v_vovv(nV,nO,nV,nV)) - !allocate(v_ovvv(nO,nV,nV,nV)) - !allocate(v_vvvv(nV,nV,nV,nV)) - allocate(f_o(nO), f_v(nV)) - allocate(f_oo(nO, nO)) - allocate(f_ov(nO, nV)) - allocate(f_vv(nV, nV)) - + allocate(t1(cc_nOab,cc_nVab), t2(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau_t(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) + allocate(r1(cc_nOab,cc_nVab), r2(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) + allocate(cF_oo(cc_nOab,cc_nOab), cF_ov(cc_nOab,cc_nVab), cF_vv(cc_nVab,cc_nVab)) + allocate(cW_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab), cW_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab))!, cW_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab)) + allocate(v_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab)) + !allocate(v_vooo(cc_nVab,cc_nOab,cc_nOab,cc_nOab)) + allocate(v_ovoo(cc_nOab,cc_nVab,cc_nOab,cc_nOab)) + allocate(v_oovo(cc_nOab,cc_nOab,cc_nVab,cc_nOab)) + allocate(v_ooov(cc_nOab,cc_nOab,cc_nOab,cc_nVab)) + allocate(v_vvoo(cc_nVab,cc_nVab,cc_nOab,cc_nOab)) + !allocate(v_vovo(cc_nVab,cc_nOab,cc_nVab,cc_nOab)) + !allocate(v_voov(cc_nVab,cc_nOab,cc_nOab,cc_nVab)) + allocate(v_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab)) + allocate(v_ovov(cc_nOab,cc_nVab,cc_nOab,cc_nVab)) + allocate(v_oovv(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) + !allocate(v_vvvo(cc_nVab,cc_nVab,cc_nVab,cc_nOab)) + !allocate(v_vvov(cc_nVab,cc_nVab,cc_nOab,cc_nVab)) + !allocate(v_vovv(cc_nVab,cc_nOab,cc_nVab,cc_nVab)) + !allocate(v_ovvv(cc_nOab,cc_nVab,cc_nVab,cc_nVab)) + !allocate(v_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab)) + allocate(f_o(cc_nOab), f_v(cc_nVab)) + ! Allocation for the diis if (cc_update_method == 'diis') then - allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + allocate(all_err(cc_nOab*cc_nVab+cc_nOab*cc_nOab*cc_nVab*cc_nVab,cc_diis_depth), all_t(cc_nOab*cc_nVab+cc_nOab*cc_nOab*cc_nVab*cc_nVab,cc_diis_depth)) all_err = 0d0 all_t = 0d0 endif - ! Fock elements - call gen_f_spin(det, nO_m,nO_m, nO_S,nO_S, list_occ,list_occ, nO,nO, f_oo) - call gen_f_spin(det, nO_m,nV_m, nO_S,nV_S, list_occ,list_vir, nO,nV, f_ov) - call gen_f_spin(det, nV_m,nV_m, nV_S,nV_S, list_vir,list_vir, nV,nV, f_vv) - ! Diag elements - do i = 1, nO - f_o(i) = f_oo(i,i) + do i = 1, cc_nOab + f_o(i) = cc_spin_f_oo(i,i) enddo - do i = 1, nV - f_v(i) = f_vv(i,i) + do i = 1, cc_nVab + f_v(i) = cc_spin_f_vv(i,i) enddo ! Bi electronic integrals from list call wall_time(ti) ! OOOO - call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, list_occ,list_occ,list_occ,list_occ, nO,nO,nO,nO, v_oooo) + call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nOab,cc_nOab,cc_nOab,cc_nOab, v_oooo) ! OOO V - !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, list_vir,list_occ,list_occ,list_occ, nV,nO,nO,nO, v_vooo) - call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, list_occ,list_vir,list_occ,list_occ, nO,nV,nO,nO, v_ovoo) - call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, list_occ,list_occ,list_vir,list_occ, nO,nO,nV,nO, v_oovo) - call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, list_occ,list_occ,list_occ,list_vir, nO,nO,nO,nV, v_ooov) + !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nVab,cc_nOab,cc_nOab,cc_nOab, v_vooo) + call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nOab,cc_nVab,cc_nOab,cc_nOab, v_ovoo) + call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nOab,cc_nOab,cc_nVab,cc_nOab, v_oovo) + call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nOab,cc_nOab,cc_nOab,cc_nVab, v_ooov) ! OO VV - call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, list_vir,list_vir,list_occ,list_occ, nV,nV,nO,nO, v_vvoo) - !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, list_vir,list_occ,list_vir,list_occ, nV,nO,nV,nO, v_vovo) - !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, list_vir,list_occ,list_occ,list_vir, nV,nO,nO,nV, v_voov) - call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, list_occ,list_vir,list_vir,list_occ, nO,nV,nV,nO, v_ovvo) - call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, list_occ,list_vir,list_occ,list_vir, nO,nV,nO,nV, v_ovov) - call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, list_occ,list_occ,list_vir,list_vir, nO,nO,nV,nV, v_oovv) + call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nVab,cc_nVab,cc_nOab,cc_nOab, v_vvoo) + !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nVab,cc_nOab,cc_nVab,cc_nOab, v_vovo) + !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nVab,cc_nOab,cc_nOab,cc_nVab, v_voov) + call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nOab,cc_nVab,cc_nVab,cc_nOab, v_ovvo) + call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nOab,cc_nVab,cc_nOab,cc_nVab, v_ovov) + call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nOab,cc_nOab,cc_nVab,cc_nVab, v_oovv) ! O VVV - !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, list_vir,list_vir,list_vir,list_occ, nV,nV,nV,nO, v_vvvo) - !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, list_vir,list_vir,list_occ,list_vir, nV,nV,nO,nV, v_vvov) - !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, list_vir,list_occ,list_vir,list_vir, nV,nO,nV,nV, v_vovv) - !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, list_occ,list_vir,list_vir,list_vir, nO,nV,nV,nV, v_ovvv) + !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nVab,cc_nVab,cc_nVab,cc_nOab, v_vvvo) + !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nVab,cc_nVab,cc_nOab,cc_nVab, v_vvov) + !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nVab,cc_nOab,cc_nVab,cc_nVab, v_vovv) + !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nOab,cc_nVab,cc_nVab,cc_nVab, v_ovvv) ! VVVV - !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, list_vir,list_vir,list_vir,list_vir, nV,nV,nV,nV, v_vvvv) + !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nVab,cc_nVab,cc_nVab,cc_nVab, v_vvvv) call wall_time(tf) if (cc_dev) then print*,'Load bi elec int:',tf-ti,'s' @@ -149,11 +124,11 @@ subroutine run_ccsd_spin_orb ! Init of T t1 = 0d0 - call guess_t1(nO,nV,f_o,f_v,f_ov,t1) - call guess_t2(nO,nV,f_o,f_v,v_oovv,t2) - call compute_tau_spin(nO,nV,t1,t2,tau) - call compute_tau_t_spin(nO,nV,t1,t2,tau_t) - + call guess_t1(cc_nOab,cc_nVab,f_o,f_v,cc_spin_f_ov,t1) + call guess_t2(cc_nOab,cc_nVab,f_o,f_v,v_oovv,t2) + call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau) + call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t) + ! Loop init nb_iter = 0 not_converged = .True. @@ -164,9 +139,9 @@ subroutine run_ccsd_spin_orb call det_energy(det,uncorr_energy) print*,'Det energy', uncorr_energy - call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,v_oovv,energy) print*,'guess energy', uncorr_energy+energy, energy - + write(*,'(A77)') ' -----------------------------------------------------------------------------' write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' write(*,'(A77)') ' -----------------------------------------------------------------------------' @@ -179,18 +154,18 @@ subroutine run_ccsd_spin_orb ! Intermediates call wall_time(tbi) call wall_time(ti) - call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) - call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) - call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,cF_vv) + call compute_cF_oo(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_oo,cc_spin_F_ov,v_ooov,v_oovv,cF_oo) + call compute_cF_ov(cc_nOab,cc_nVab,t1,cc_spin_F_ov,v_oovv,cF_ov) + call compute_cF_vv(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_ov,cc_spin_F_vv,v_oovv,cF_vv) call wall_time(tf) if (cc_dev) then print*,'Compute cFs:',tf-ti,'s' endif - + call wall_time(ti) - call compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) - call compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) - !call compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) + call compute_cW_oooo(cc_nOab,cc_nVab,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) + call compute_cW_ovvo(cc_nOab,cc_nVab,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) + !call compute_cW_vvvv(cc_nOab,cc_nVab,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) call wall_time(tf) if (cc_dev) then print*,'Compute cFs:',tf-ti,'s' @@ -198,13 +173,13 @@ subroutine run_ccsd_spin_orb ! Residuals call wall_time(ti) - call compute_r1_spin(nO,nV,t1,t2,f_o,f_v,F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) + call compute_r1_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) call wall_time(tf) if (cc_dev) then print*,'Compute r1:',tf-ti,'s' endif call wall_time(ti) - call compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) + call compute_r2_spin(cc_nOab,cc_nVab,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) call wall_time(tf) if (cc_dev) then print*,'Compute r2:',tf-ti,'s' @@ -218,29 +193,29 @@ subroutine run_ccsd_spin_orb call wall_time(ti) ! Update if (cc_update_method == 'diis') then - !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) - !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) - call update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) + !call update_t_ccsd(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + !call update_t_ccsd_diis(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + call update_t_ccsd_diis_v3(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) ! Standard update as T = T - Delta elseif (cc_update_method == 'none') then - call update_t1(nO,nV,f_o,f_v,r1,t1) - call update_t2(nO,nV,f_o,f_v,r2,t2) + call update_t1(cc_nOab,cc_nVab,f_o,f_v,r1,t1) + call update_t2(cc_nOab,cc_nVab,f_o,f_v,r2,t2) else print*,'Unkonw cc_method_method: '//cc_update_method endif - call compute_tau_spin(nO,nV,t1,t2,tau) - call compute_tau_t_spin(nO,nV,t1,t2,tau_t) + call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau) + call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t) call wall_time(tf) if (cc_dev) then print*,'Update:',tf-ti,'s' endif ! Print - call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,v_oovv,energy) call wall_time(tfi) - + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', & uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' if (cc_dev) then @@ -270,8 +245,8 @@ subroutine run_ccsd_spin_orb print*,'' if (write_amplitudes) then - call write_t1(nO,nV,t1) - call write_t2(nO,nV,t2) + call write_t1(cc_nOab,cc_nVab,t1) + call write_t2(cc_nOab,cc_nVab,t2) call ezfio_set_utils_cc_io_amplitudes('Read') endif @@ -286,20 +261,20 @@ subroutine run_ccsd_spin_orb deallocate(v_oooo) deallocate(v_ovoo,v_oovo) deallocate(v_ovvo,v_ovov,v_oovv) - + double precision :: t_corr t_corr = 0.d0 if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then print*,'CCSD(T) calculation...' call wall_time(ta) - !allocate(v_vvvo(nV,nV,nV,nO)) + !allocate(v_vvvo(cc_nVab,cc_nVab,cc_nVab,cc_nOab)) !call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & ! cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & ! cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & - ! nV,nV,nV,nO, v_vvvo) + ! cc_nVab,cc_nVab,cc_nVab,cc_nOab, v_vvvo) - !call ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,t_corr) - call ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,t_corr) + !call ccsd_par_t_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,v_ooov,v_vvoo,v_vvvo,t_corr) + call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,v_ooov,v_vvoo,t_corr) !print*,'Working on it...' !call abort call wall_time(tb) @@ -313,12 +288,12 @@ subroutine run_ccsd_spin_orb endif call save_energy(uncorr_energy + energy, t_corr) - - deallocate(f_oo,f_ov,f_vv,f_o,f_v) + + deallocate(f_o,f_v) deallocate(v_ooov,v_vvoo,t1,t2) !deallocate(v_ovvv,v_vvvo,v_vovv) !deallocate(v_vvvv) - + end ! Energy @@ -354,7 +329,7 @@ subroutine ccsd_energy_spin(nO,nV,t1,t2,Fov,v_oovv,energy) do j=1,nO do a=1,nV do b=1,nV - energy = energy & + energy = energy & + 0.5d0 * v_oovv(i,j,a,b) * t1(i,a) * t1(j,b) & + 0.25d0 * v_oovv(i,j,a,b) * t2(i,j,a,b) end do @@ -375,7 +350,7 @@ subroutine compute_tau_spin(nO,nV,t1,t2,tau) double precision,intent(in) :: t2(nO,nO,nV,nV) double precision,intent(out) :: tau(nO,nO,nV,nV) - + integer :: i,j,k,l integer :: a,b,c,d @@ -463,7 +438,7 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov !$OMP v_ovov,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,f,m,n) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(1) do a=1,nV do i=1,nO @@ -494,7 +469,7 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov 1d0, t1 , size(t1,1), & cF_vv, size(cF_vv,1), & 1d0, r1 , size(r1,1)) - + !do a=1,nV ! do i=1,nO ! do m=1,nO @@ -531,7 +506,7 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov !$OMP SHARED(r1,t1,t2,X_vovf,v_ovvf,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,f,m,n) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) !do f = 1, nV @@ -546,28 +521,28 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov !enddo !$OMP END DO !$OMP END PARALLEL - + call dgemm('N','T', nO, nV, nO*nV, & -0.5d0, t2(1,1,1,f), size(t2,1), & X_vovf, size(X_vovf,1), & 1d0 , r1 , size(r1,1)) enddo - + !call dgemm('N','T', nO, nV, nO*nV*nV, & ! -0.5d0, t2 , size(t2,1), & ! X_vovv, size(X_vovv,1), & ! 1d0 , r1 , size(r1,1)) - + deallocate(X_vovf) !deallocate(X_vovv) allocate(X_oovv(nO,nO,nV,nV)) - + !$OMP PARALLEL & !$OMP SHARED(r1,t1,t2,X_oovv, & !$OMP f_o,f_v,v_oovo,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,f,m,n) & !$OMP DEFAULT(NONE) - + !do a=1,nV ! do i=1,nO ! do e=1,nV @@ -579,7 +554,7 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov ! end do ! end do !end do - + !$OMP DO collapse(3) do a = 1, nV do e = 1, nV @@ -592,12 +567,12 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov enddo !$OMP END DO !$OMP END PARALLEL - + call dgemm('T','N', nO, nV, nO*nO*nV, & -0.5d0, v_oovo, size(v_oovo,1) * size(v_oovo,2) * size(v_oovo,3), & X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & 1d0 , r1 , size(r1,1)) - + !$OMP PARALLEL & !$OMP SHARED(r1,t1,X_oovv,f_o,f_v,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,f,m,n) & @@ -610,7 +585,7 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov enddo !$OMP END DO !$OMP END PARALLEL - + deallocate(X_oovv) end @@ -684,7 +659,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -697,7 +672,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ end do !$OMP END DO !$OMP END PARALLEL - + !deallocate(X_oovv) !do b=1,nV @@ -726,25 +701,25 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ 0.5d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & A_vv , size(A_vv,1), & 0d0 , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) - + !$OMP PARALLEL & !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV do j=1,nO do i=1,nO - r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) end do end do end do end do !$OMP END DO !$OMP END PARALLEL - + deallocate(A_vv)!,X_oovv) !do b=1,nV @@ -766,7 +741,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & !$OMP PRIVATE(i,m,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -789,13 +764,13 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV do j=1,nO do i=1,nO - r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) end do end do end do @@ -821,17 +796,17 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ ! end do !end do allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) - + call dgemm('N','T', nO, nO, nV, & 1d0, t1 , size(t1,1), & cF_ov, size(cF_ov,1), & 0d0, A_oo , size(A_oo,1)) - + !$OMP PARALLEL & !$OMP SHARED(t2,B_oovv,nO,nV) & !$OMP PRIVATE(i,m,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b = 1, nV do a = 1, nV @@ -844,17 +819,17 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ enddo !$OMP END DO !$OMP END PARALLEL - + call dgemm('N','N', nO, nO*nV*nV, nO, & 0.5d0, A_oo, size(A_oo,1), & B_oovv, size(B_oovv,1), & 0d0 , X_oovv, size(X_oovv,1)) - + !$OMP PARALLEL & !$OMP SHARED(r2,X_oovv,nO,nV) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -888,7 +863,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ 0.5d0, cW_oooo, size(cW_oooo,1) * size(cW_oooo,2), & tau , size(tau,1) * size(tau,2), & 1d0 , r2 , size(r2,1) * size(r2,2)) - + !do b=1,nV ! do a=1,nV ! do j=1,nO @@ -908,6 +883,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ ! 0.5d0, tau , size(tau,1) * size(tau,2), & ! cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2), & ! 1d0 , r2 , size(r2,1) * size(r2,2)) + double precision :: ti,tf call wall_time(ti) call use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) @@ -915,7 +891,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ if (cc_dev) then print*,'cW_vvvv:',tf-ti,'s' endif - + !do b=1,nV ! do a=1,nV ! do j=1,nO @@ -923,7 +899,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ ! do e=1,nV ! do m=1,nO - ! r2(i,j,a,b) = r2(i,j,a,b) & + ! r2(i,j,a,b) = r2(i,j,a,b) & ! + t2(i,m,a,e)*cW_ovvo(m,b,e,j) & ! - t2(j,m,a,e)*cW_ovvo(m,b,e,i) & ! - t2(i,m,b,e)*cW_ovvo(m,a,e,j) & @@ -944,7 +920,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ !$OMP SHARED(t2,A_ovov,B_ovvo,cW_ovvo,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do a = 1, nV do i = 1, nO @@ -961,24 +937,24 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ do b = 1, nV do e = 1, nV do m = 1, nO - B_ovvo(m,e,b,j) = cW_ovvo(m,b,e,j) + B_ovvo(m,e,b,j) = cW_ovvo(m,b,e,j) enddo enddo enddo enddo !$OMP END DO !$OMP END PARALLEL - + call dgemm('T','N', nO*nV, nV*nO, nO*nV, & 1d0, A_ovov, size(A_ovov,1) * size(A_ovov,2), & B_ovvo, size(B_ovvo,1) * size(B_ovvo,2), & 0d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2)) - + !$OMP PARALLEL & !$OMP SHARED(r2,X_ovvo,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b = 1, nV do a = 1, nV @@ -992,15 +968,15 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ enddo !$OMP END DO !$OMP END PARALLEL - + deallocate(A_ovov,B_ovvo,X_ovvo) allocate(A_vvoo(nV,nV,nO,nO), B_ovoo(nO,nV,nO,nO), C_ovov(nO,nV,nO,nV)) - + !$OMP PARALLEL & !$OMP SHARED(A_vvoo,v_ovvo,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do m = 1, nO do j = 1, nO @@ -1013,22 +989,22 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ enddo !$OMP END DO !$OMP END PARALLEL - + call dgemm('N','N', nO, nV*nO*nO, nV, & 1d0, t1 , size(t1,1), & A_vvoo, size(A_vvoo,1), & 0d0, B_ovoo, size(B_ovoo,1)) - + call dgemm('N','N', nO*nV*nO, nV, nO, & 1d0, B_ovoo, size(B_ovoo,1) * size(B_ovoo,2) * size(B_ovoo,3), & t1 , size(t1,1), & 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2) * size(C_ovov,3)) - + !$OMP PARALLEL & !$OMP SHARED(r2,C_ovov,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -1042,9 +1018,9 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ end do !$OMP END DO !$OMP END PARALLEL - + deallocate(A_vvoo, B_ovoo, C_ovov) - + !do b=1,nV ! do a=1,nV ! do j=1,nO @@ -1065,12 +1041,12 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ call gen_v_spin_3idx_i_kl(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, b, cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & nV,nV,nO, v_vbvo) - + !$OMP PARALLEL & !$OMP SHARED(b,A_vbov,v_vbvo,nO,nV) & !$OMP PRIVATE(i,j,a,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(2) do e = 1, nV do j = 1, nO @@ -1093,12 +1069,12 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ ! 1d0, A_vvov, size(A_vvov,1) * size(A_vvov,2) * size(A_vvov,3), & ! t1 , size(t1,1), & ! 0d0, X_vvoo, size(X_vvoo,1) * size(X_vvoo,2) * size(X_vvoo,3)) - + !$OMP PARALLEL & !$OMP SHARED(b,r2,X_vboo,nO,nV) & !$OMP PRIVATE(i,j,a,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(2) !do b = 1, nV do a = 1, nV @@ -1113,7 +1089,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ !$OMP END DO !$OMP END PARALLEL enddo - + !deallocate(A_vvov)!,X_vvoo) deallocate(A_vbov, X_vboo, v_vbvo) allocate(X_vvoo(nV,nV,nO,nO)) @@ -1132,7 +1108,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ ! end do !end do !allocate(X_vvoo(nV,nV,nO,nO)) - + call dgemm('T','N', nV, nV*nO*nO, nO, & 1d0, t1 , size(t1,1), & v_ovoo, size(v_ovoo,1), & @@ -1142,7 +1118,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ !$OMP SHARED(r2,X_vvoo,f_o,f_v,t2,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -1154,7 +1130,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ end do end do !$OMP END DO - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -1167,7 +1143,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ end do !$OMP END DO !$OMP END PARALLEL - + deallocate(X_vvoo) end @@ -1182,16 +1158,16 @@ subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2) double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau_t(nO,nO,nV,nV) double precision, intent(in) :: F_oo(nO,nV), F_ov(nO,nV) double precision, intent(in) :: v_ooov(nO,nO,nO,nV), v_oovv(nO,nO,nV,nV) - + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) - + double precision, allocatable :: cF_oo(:,:), X_oovv(:,:,:,:),Y_oovv(:,:,:,:) integer :: i,j,m,a,b allocate(cF_oo(nO,nO)) - + call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) - + !do a=1,nV ! do i=1,nO ! do m=1,nO @@ -1218,13 +1194,13 @@ subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2) ! end do ! end do !end do - + allocate(Y_oovv(nO,nO,nV,nV),X_oovv(nO,nO,nV,nV)) !$OMP PARALLEL & !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & !$OMP PRIVATE(i,m,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -1247,20 +1223,20 @@ subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2) !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV do j=1,nO do i=1,nO - r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) end do end do end do end do !$OMP END DO !$OMP END PARALLEL - + deallocate(cF_oo,X_oovv,Y_oovv) end @@ -1274,7 +1250,7 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) integer, intent(in) :: nO,nV double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) double precision, intent(in) :: F_ov(nO,nV), v_oovv(nO,nO,nV,nV) - + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) double precision, allocatable :: cF_ov(:,:), A_oo(:,:), A_vv(:,:) @@ -1282,14 +1258,14 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) integer :: i,j,a,b,e,m allocate(cF_ov(nO,nV)) - + call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) !$OMP PARALLEL & !$OMP SHARED(r1,t2,cF_ov,nO,nV) & !$OMP PRIVATE(i,a,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(1) do a=1,nV do i=1,nO @@ -1334,22 +1310,22 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) !$OMP SHARED(nO,nV,r2,X_oovv) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV do j=1,nO do i=1,nO - r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) end do end do end do end do !$OMP END DO !$OMP END PARALLEL - + deallocate(A_vv) - + !do b=1,nV ! do a=1,nV ! do j=1,nO @@ -1367,17 +1343,17 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) ! end do !end do allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) - + call dgemm('N','T', nO, nO, nV, & 1d0, t1 , size(t1,1), & cF_ov, size(cF_ov,1), & 0d0, A_oo , size(A_oo,1)) - + !$OMP PARALLEL & !$OMP SHARED(t2,B_oovv,nO,nV) & !$OMP PRIVATE(i,m,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b = 1, nV do a = 1, nV @@ -1390,7 +1366,7 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) enddo !$OMP END DO !$OMP END PARALLEL - + call dgemm('N','N', nO, nO*nV*nV, nO, & 0.5d0, A_oo, size(A_oo,1), & B_oovv, size(B_oovv,1), & @@ -1400,7 +1376,7 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) !$OMP SHARED(r2,X_oovv,nO,nV) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -1413,9 +1389,9 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) end do !$OMP END DO !$OMP END PARALLEL - + deallocate(cF_ov,A_oo,B_oovv,X_oovv) - + end ! Use cF_vv @@ -1426,18 +1402,18 @@ subroutine use_cF_vv(nO,nV,t1,t2,r1,r2) integer, intent(in) :: nO,nV double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) - + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) double precision, allocatable :: cF_vv(:,:) integer :: i,j,a,b,e,m allocate(cF_vv(nV,nV)) - + !call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,v_ovvv,cF_vv) deallocate(cF_vv) - + end ! Use cW_vvvd @@ -1450,7 +1426,7 @@ subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) double precision, intent(in) :: v_oovv(nO,nO,nV,nV) !double precision, intent(in) :: v_vovv(nV,nO,nV,nV) - + double precision, intent(inout) :: r2(nO,nO,nV,nV) double precision, allocatable :: cW_vvvf(:,:,:), v_vvvf(:,:,:), tau_f(:,:,:), v_vovf(:,:,:) @@ -1460,7 +1436,7 @@ subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) allocate(cW_vvvf(nV,nV,nV),v_vvvf(nV,nV,nV),tau_f(nO,nO,nV),v_vovf(nV,nO,nV)) !PROVIDE cc_nVab - + !do b=1,nV ! do a=1,nV ! do j=1,nO @@ -1476,14 +1452,14 @@ subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) ! end do ! end do !end do - + do f = 1, nV call wall_time(ti) !$OMP PARALLEL & !$OMP SHARED(tau,tau_f,f,nO,nV) & !$OMP PRIVATE(i,j,e) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(2) do e = 1, nV do j = 1, nO @@ -1515,7 +1491,7 @@ subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) if (cc_dev .and. f == 1) then print*,'vovf', tf-ti endif - + call wall_time(ti) call compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) call wall_time(tf) @@ -1535,7 +1511,7 @@ subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) enddo deallocate(cW_vvvf,v_vvvf,v_vovf) - + end ! cF_oo @@ -1562,7 +1538,7 @@ subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo) !$OMP SHARED(cF_oo,Foo,t1,v_ooov,nO,nV) & !$OMP PRIVATE(i,m,n,e) & !$OMP DEFAULT(NONE) - + !do i=1,nO ! do m=1,nO ! cF_oo(m,i) = (1d0 - Kronecker_delta(m,i))*Foo(m,i) @@ -1580,7 +1556,7 @@ subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo) cF_oo(i,i) = 0d0 end do !$OMP END DO - + do e=1,nV do n=1,nO !$OMP DO collapse(1) @@ -1620,8 +1596,8 @@ subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo) call dgemm('N','T', nO, nO, nO*nV*nV, & 0.5d0, v_oovv, size(v_oovv,1), & tau_t , size(tau_t,1), & - 1d0 , cF_oo , size(cF_oo,1)) - + 1d0 , cF_oo , size(cF_oo,1)) + end ! cF_ov @@ -1643,7 +1619,7 @@ subroutine compute_cF_ov(nO,nV,t1,Fov,v_oovv,cF_ov) !$OMP SHARED(cF_ov,Fov,t1,v_oovv,nO,nV) & !$OMP PRIVATE(i,a,m,n,e,f) & !$OMP DEFAULT(NONE) - + !cF_ov = Fov !$OMP DO collapse(1) @@ -1659,7 +1635,7 @@ subroutine compute_cF_ov(nO,nV,t1,Fov,v_oovv,cF_ov) end do !$OMP END DO !$OMP END PARALLEL - + end ! cF_vv @@ -1677,7 +1653,7 @@ subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv) !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) double precision,intent(out) :: cF_vv(nV,nV) - + double precision, allocatable :: v_ovfv(:,:,:),X_ovfv(:,:,:) integer :: i,j,m,n integer :: a,b,e,f @@ -1699,7 +1675,7 @@ subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv) enddo !$OMP END DO !$OMP END PARALLEL - + !do e=1,nV ! do a=1,nV ! do m=1,nO @@ -1711,7 +1687,7 @@ subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv) -0.5d0, t1 , size(t1,1), & Fov , size(Fov,1), & 1d0 , cF_vv, size(cF_vv,1)) - + !do e=1,nV ! do a=1,nV ! do m=1,nO @@ -1791,7 +1767,7 @@ subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) integer :: a,b,e,f double precision, allocatable :: X_oooo(:,:,:,:) - ! oooo block + ! oooo block !cW_oooo = v_oooo @@ -1809,7 +1785,7 @@ subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) ! end do !end do allocate(X_oooo(nO,nO,nO,nO)) - + call dgemm('N','T', nO*nO*nO, nO, nV, & 1d0, v_ooov, size(v_ooov,1) * size(v_ooov,2) * size(v_ooov,3), & t1 , size(t1,1), & @@ -1830,14 +1806,14 @@ subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) end do !$OMP END DO !$OMP END PARALLEL - + deallocate(X_oooo) - + !do m=1,nO ! do n=1,nO ! do i=1,nO ! do j=1,nO - ! + ! ! do e=1,nV ! do f=1,nV ! cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + 0.25d0*tau(i,j,e,f)*v_oovv(m,n,e,f) @@ -1853,7 +1829,7 @@ subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) 0.25d0, v_oovv , size(v_oovv,1) * size(v_oovv,2), & tau , size(tau,1) * size(tau,2), & 1.d0 , cW_oooo, size(cW_oooo,1) * size(cW_oooo,2)) - + end ! cW_ovvo @@ -1913,7 +1889,7 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, e, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & nO,nV,nV, v_ovev) - + call dgemm('N','T', nO*nV, nO, nV, & 1.d0, v_ovev , size(v_ovev,1) * size(v_ovev,2), & t1 , size(t1,1), & @@ -1950,14 +1926,14 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) ! end do ! end do !end do - + allocate(A_oovo(nO,nO,nV,nO), B_vovo(nV,nO,nV,nO)) - + !$OMP PARALLEL & !$OMP SHARED(A_oovo,v_oovo,nO,nV) & !$OMP PRIVATE(j,e,m,n) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do j=1,nO do e=1,nV @@ -1970,17 +1946,17 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) end do !$OMP END DO !$OMP END PARALLEL - + call dgemm('T','N', nV, nO*nV*nO, nO, & 1d0, t1 , size(t1,1), & A_oovo, size(A_oovo,1), & 0d0, B_vovo, size(B_vovo,1)) - + !$OMP PARALLEL & !$OMP SHARED(cW_ovvo,B_vovo,nO,nV) & !$OMP PRIVATE(j,e,m,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do j=1,nO do e=1,nV @@ -2015,7 +1991,7 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) !$OMP SHARED(nO,nV,A_voov,B_voov,v_oovv,t2,t1) & !$OMP PRIVATE(f,n,m,e,j,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b = 1, nV do j = 1, nO @@ -2039,19 +2015,19 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) enddo !$OMP END DO !$OMP END PARALLEL - + call dgemm('T','N', nO*nV, nV*nO, nV*nO, & 1d0, A_voov, size(A_voov,1) * size(A_voov,2), & B_voov, size(B_voov,1) * size(B_voov,2), & 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2)) - + deallocate(A_voov,B_voov) !$OMP PARALLEL & !$OMP SHARED(cW_ovvo,C_ovov,nO,nV) & !$OMP PRIVATE(j,e,m,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do j = 1, nO do e = 1, nV @@ -2064,7 +2040,7 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) enddo !$OMP END DO !$OMP END PARALLEL - + deallocate(C_ovov) end @@ -2072,7 +2048,7 @@ end ! cW_vvvv subroutine compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) - + implicit none integer,intent(in) :: nO,nV @@ -2154,14 +2130,14 @@ subroutine compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) end do !$OMP END DO !$OMP END PARALLEL - + deallocate(A_ovvv,B_vvvv) !do a=1,nV ! do b=1,nV ! do e=1,nV ! do f=1,nV - ! + ! ! do m=1,nO ! do n=1,nO ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) @@ -2182,7 +2158,7 @@ end ! cW_vvvf subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) - + implicit none integer,intent(in) :: nO,nV,f @@ -2207,7 +2183,7 @@ subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) !$OMP SHARED(nO,nV,cW_vvvf,A_ovvf,v_vovf,v_vvvf,f) & !$OMP PRIVATE(a,b,c,d,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(2) do c = 1, nV do b = 1, nV @@ -2248,7 +2224,7 @@ subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) 1d0, t1 , size(t1,1), & A_ovvf, size(A_ovvf,1), & 0d0, B_vvvf, size(B_vvvf,1)) - + !$OMP PARALLEL & !$OMP SHARED(nO,nV,cW_vvvf,B_vvvf,v_oovf,v_oovv,f) & !$OMP PRIVATE(a,b,c,d,e,m,n) & @@ -2264,14 +2240,14 @@ subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) end do end do !$OMP END DO NOWAIT - + !deallocate(A_ovvf,B_vvvf) !do a=1,nV ! do b=1,nV ! do e=1,nV ! do f=1,nV - ! + ! ! do m=1,nO ! do n=1,nO ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) @@ -2292,13 +2268,13 @@ subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) enddo enddo !$OMP END DO - !$OMP END PARALLEL - + !$OMP END PARALLEL + call dgemm('T','N', nV*nV, nV, nO*nO, & 0.25d0, tau , size(tau,1) * size(tau,2), & v_oovf , size(v_oovf,1) * size(v_oovf,2), & 1.d0 , cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2)) - + deallocate(v_oovf) deallocate(A_ovvf,B_vvvf) diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 6f21c316..eebc84ca 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -1006,6 +1006,22 @@ BEGIN_PROVIDER [double precision, cc_space_f_v, (cc_nVa)] END_PROVIDER + +BEGIN_PROVIDER [ double precision, cc_spin_f_oo, (cc_nOab, cc_nOab)] + implicit none + call gen_f_spin(psi_det(1,1,cc_ref), cc_nO_m, cc_nO_m, cc_nO_S, cc_nO_S, cc_list_occ_spin, cc_list_occ_spin, cc_nOab, cc_nOab, cc_spin_f_oo) +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cc_spin_f_ov, (cc_nOab, cc_nVab)] + implicit none + call gen_f_spin(psi_det(1,1,cc_ref), cc_nO_m, cc_nV_m, cc_nO_S, cc_nV_S, cc_list_occ_spin, cc_list_vir_spin, cc_nOab, cc_nVab, cc_spin_f_ov) +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cc_spin_f_vv, (cc_nVab, cc_nVab)] + implicit none + call gen_f_spin(psi_det(1,1,cc_ref), cc_nV_m, cc_nV_m, cc_nV_S, cc_nV_S, cc_list_vir_spin, cc_list_vir_spin, cc_nVab, cc_nVab, cc_spin_f_vv) +END_PROVIDER + ! Shift subroutine shift_idx_spin(s,n_S,shift) From e3e874879f47013b48963d729f8f312bf6ddf33b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Feb 2025 18:11:47 +0100 Subject: [PATCH 14/59] Introduced Cholesky in gen_spin_space --- src/utils_cc/mo_integrals_cc.irp.f | 1384 ++++++++++++++++++++-------- 1 file changed, 1022 insertions(+), 362 deletions(-) diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index eebc84ca..813c186a 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -47,7 +47,7 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k - if (do_ao_cholesky) then + if (do_mo_cholesky) then double precision, allocatable :: buffer(:,:,:,:) double precision, allocatable :: v1(:,:,:), v2(:,:,:) allocate(v1(cholesky_mo_num,n1,n3), v2(cholesky_mo_num,n2,n4)) @@ -132,7 +132,7 @@ end BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1,i2,i3,i4 double precision, allocatable :: buffer(:,:,:) call set_multiple_levels_omp(.False.) @@ -190,7 +190,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_n implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -233,7 +233,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_n implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -277,7 +277,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovoo, (cc_nOa, cc_nVa, cc_nOa, cc_n implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -311,7 +311,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_n implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -345,7 +345,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_n implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -379,7 +379,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -422,7 +422,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_n implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -465,7 +465,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_voov, (cc_nVa, cc_nOa, cc_nOa, cc_n implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -499,7 +499,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovvo, (cc_nOa, cc_nVa, cc_nVa, cc_n implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -533,7 +533,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_n implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -567,7 +567,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_n implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -1169,7 +1169,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, implicit none BEGIN_DOC - ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Compute the 2e-integrals corresponding to four lists of spin orbitals. ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... END_DOC @@ -1178,129 +1178,306 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, integer, intent(in) :: dim1, dim2, dim3, dim4 double precision, intent(out) :: v(dim1,dim2,dim3,dim4) - double precision :: mo_two_e_integral + double precision, external :: mo_two_e_integral integer :: i,j,k,l,idx_i,idx_j,idx_k,idx_l integer :: i_shift,j_shift,k_shift,l_shift integer :: tmp_i,tmp_j,tmp_k,tmp_l integer :: si,sj,sk,sl,s - PROVIDE cc_space_v + double precision, allocatable :: buffer(:,:,:,:) + double precision, allocatable :: v1(:,:,:), v2(:,:,:) - !$OMP PARALLEL & - !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) & - !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, & - !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,& - !$OMP tmp_i,tmp_j,tmp_k,tmp_l)& - !$OMP DEFAULT(NONE) + if (do_mo_cholesky) then - do sl = 1, 2 - call shift_idx_spin(sl,n4_S,l_shift) - do sk = 1, 2 - call shift_idx_spin(sk,n3_S,k_shift) - do sj = 1, 2 - call shift_idx_spin(sj,n2_S,j_shift) - do si = 1, 2 - call shift_idx_spin(si,n1_S,i_shift) + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) - s = si+sj+sk+sl - ! or - if (s == 4 .or. s == 8) then - !$OMP DO collapse(3) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) - l = list4(tmp_l,sl) - idx_l = tmp_l + l_shift - k = list3(tmp_k,sk) - idx_k = tmp_k + k_shift - j = list2(tmp_j,sj) - idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) - v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + + allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,n2_S(sj),n4_S(sl))) + allocate(buffer(n1_S(si),n3_S(sk),n2_S(sj),n4_S(sl))) + + call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n2_S(sj),n4_S(sl),list2(1,sj),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si)*n3_S(sk), n2_S(sj)*n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,l,idx_i,idx_j,idx_k,idx_l) + !$OMP DO collapse(3) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = buffer(i,k,j,l) + enddo enddo enddo enddo - enddo - !$OMP END DO + !$OMP END DO + !$OMP END PARALLEL - ! or - elseif (si == sk .and. sj == sl) then - !$OMP DO collapse(3) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) - l = list4(tmp_l,sl) - idx_l = tmp_l + l_shift - k = list3(tmp_k,sk) - idx_k = tmp_k + k_shift - j = list2(tmp_j,sj) - idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) + deallocate(v1, v2, buffer) + + allocate(v1(cholesky_mo_num,n2_S(sj),n3_S(sk)), v2(cholesky_mo_num,n1_S(si),n4_S(sl))) + allocate(buffer(n2_S(sj),n3_S(sk),n1_S(si),n4_S(sl))) + + call gen_v_space_chol(n2_S(sj),n3_S(sk),list2(1,sj),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n2_S(sj)*n3_S(sk), n1_S(si)*n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,l,idx_i,idx_j,idx_k,idx_l) + !$OMP DO collapse(3) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = v(idx_i,idx_j,idx_k,idx_l) - buffer(j,k,i,l) + enddo enddo enddo enddo - enddo - !$OMP END DO + !$OMP END DO + !$OMP END PARALLEL - ! or - elseif (si == sl .and. sj == sk) then - !$OMP DO collapse(3) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) - l = list4(tmp_l,sl) - idx_l = tmp_l + l_shift - k = list3(tmp_k,sk) - idx_k = tmp_k + k_shift - j = list2(tmp_j,sj) - idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) - v(idx_i,idx_j,idx_k,idx_l) = - cc_space_v(j,i,k,l) + deallocate(v1, v2, buffer) + + ! or + elseif (si == sk .and. sj == sl) then + + allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,n2_S(sj),n4_S(sl))) + allocate(buffer(n1_S(si),n3_S(sk),n2_S(sj),n4_S(sl))) + + call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n2_S(sj),n4_S(sl),list2(1,sj),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si)*n3_S(sk), n2_S(sj)*n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,l,idx_i,idx_j,idx_k,idx_l) + !$OMP DO collapse(3) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = buffer(i,k,j,l) + enddo enddo enddo enddo - enddo - !$OMP END DO - else - !$OMP DO collapse(3) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) - l = list4(tmp_l,sl) - idx_l = tmp_l + l_shift - k = list3(tmp_k,sk) - idx_k = tmp_k + k_shift - j = list2(tmp_j,sj) - idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - v(idx_i,idx_j,idx_k,idx_l) = 0d0 + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + ! or + elseif (si == sl .and. sj == sk) then + + allocate(v1(cholesky_mo_num,n2_S(sj),n3_S(sk)), v2(cholesky_mo_num,n1_S(si),n4_S(sl))) + allocate(buffer(n2_S(sj),n3_S(sk),n1_S(si),n4_S(sl))) + + call gen_v_space_chol(n2_S(sj),n3_S(sk),list2(1,sj),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n2_S(sj)*n3_S(sk), n1_S(si)*n4_S(sl), cholesky_mo_num, -1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,l,idx_i,idx_j,idx_k,idx_l) + !$OMP DO collapse(3) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = buffer(j,k,i,l) + enddo enddo enddo enddo - enddo - !$OMP END DO - endif + !$OMP END DO + !$OMP END PARALLEL + deallocate(v1, v2, buffer) + + else + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,l,idx_i,idx_j,idx_k,idx_l) + !$OMP DO collapse(3) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + endif + + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL + + else + !$OMP PARALLEL & + !$OMP SHARED(n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) & + !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, & + !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,& + !$OMP tmp_i,tmp_j,tmp_k,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + idx_l = tmp_l + l_shift + idx_k = tmp_k + k_shift + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + endif end + ! V_3idx subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_l) @@ -1323,7 +1500,8 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, integer :: tmp_i,tmp_j,tmp_k,tmp_l integer :: si,sj,sk,sl,s - PROVIDE cc_space_v + double precision, allocatable :: buffer(:,:,:) + double precision, allocatable :: v1(:,:,:), v2(:,:,:) if (idx_l <= n4_S(1)) then sl = 1 @@ -1334,99 +1512,255 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, tmp_l = idx_l - l_shift l = list4(tmp_l,sl) - !$OMP PARALLEL & - !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) & - !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, & - !$OMP i,j,k,idx_i,idx_j,idx_k,& - !$OMP tmp_i,tmp_j,tmp_k)& - !$OMP DEFAULT(NONE) + if (do_mo_cholesky) then - do sk = 1, 2 - call shift_idx_spin(sk,n3_S,k_shift) - do sj = 1, 2 - call shift_idx_spin(sj,n2_S,j_shift) - do si = 1, 2 - call shift_idx_spin(si,n1_S,i_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) - s = si+sj+sk+sl - ! or - if (s == 4 .or. s == 8) then - !$OMP DO collapse(2) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) - k = list3(tmp_k,sk) + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + + allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,n2_S(sj),1)) + allocate(buffer(n1_S(si),n3_S(sk),n2_S(sj))) + + call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n2_S(sj),1,list2(1,sj),list4(tmp_l,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si)*n3_S(sk), n2_S(sj), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,idx_i,idx_j,idx_k) + !$OMP DO collapse(2) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_l(idx_i,idx_j,idx_k) = buffer(i,k,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + allocate(v1(cholesky_mo_num,n2_S(sj),n3_S(sk)), v2(cholesky_mo_num,n1_S(si),1)) + allocate(buffer(n2_S(sj),n3_S(sk),n1_S(si))) + + call gen_v_space_chol(n2_S(sj),n3_S(sk),list2(1,sj),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),1,list1(1,si),list4(tmp_l,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n2_S(sj)*n3_S(sk), n1_S(si), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,idx_i,idx_j,idx_k) + !$OMP DO collapse(2) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_l(idx_i,idx_j,idx_k) = v_l(idx_i,idx_j,idx_k) - buffer(j,k,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + ! or + elseif (si == sk .and. sj == sl) then + + allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,n2_S(sj),1)) + allocate(buffer(n1_S(si),n3_S(sk),n2_S(sj))) + + call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n2_S(sj),1,list2(1,sj),list4(tmp_l,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si)*n3_S(sk), n2_S(sj), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,idx_i,idx_j,idx_k) + !$OMP DO collapse(2) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_l(idx_i,idx_j,idx_k) = buffer(i,k,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + ! or + elseif (si == sl .and. sj == sk) then + + allocate(v1(cholesky_mo_num,n2_S(sj),n3_S(sk)), v2(cholesky_mo_num,n1_S(si),1)) + allocate(buffer(n2_S(sj),n3_S(sk),n1_S(si))) + + call gen_v_space_chol(n2_S(sj),n3_S(sk),list2(1,sj),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),1,list1(1,si),list4(tmp_l,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n2_S(sj)*n3_S(sk), n1_S(si), cholesky_mo_num, -1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,idx_i,idx_j,idx_k,idx_l) + !$OMP DO collapse(2) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_l(idx_i,idx_j,idx_k) = buffer(j,k,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + else + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) idx_k = tmp_k + k_shift - j = list2(tmp_j,sj) idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) - v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + do tmp_i = 1, n1_S(si) + idx_i = tmp_i + i_shift + v_l(idx_i,idx_j,idx_k) = 0d0 + enddo enddo enddo - enddo - !$OMP END DO + !$OMP END DO - ! or - elseif (si == sk .and. sj == sl) then - !$OMP DO collapse(2) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) - k = list3(tmp_k,sk) - idx_k = tmp_k + k_shift - j = list2(tmp_j,sj) - idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) - enddo - enddo - enddo - !$OMP END DO - - ! or - elseif (si == sl .and. sj == sk) then - !$OMP DO collapse(2) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) - k = list3(tmp_k,sk) - idx_k = tmp_k + k_shift - j = list2(tmp_j,sj) - idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) - v_l(idx_i,idx_j,idx_k) = - cc_space_v(j,i,k,l) - enddo - enddo - enddo - !$OMP END DO - else - !$OMP DO collapse(2) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) - k = list3(tmp_k,sk) - idx_k = tmp_k + k_shift - j = list2(tmp_j,sj) - idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - v_l(idx_i,idx_j,idx_k) = 0d0 - enddo - enddo - enddo - !$OMP END DO - endif + endif + enddo enddo enddo - enddo - !$OMP END PARALLEL + + + else + + PROVIDE cc_space_v + + !$OMP PARALLEL & + !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) & + !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, & + !$OMP i,j,k,idx_i,idx_j,idx_k,& + !$OMP tmp_i,tmp_j,tmp_k)& + !$OMP DEFAULT(NONE) + + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_l(idx_i,idx_j,idx_k) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + idx_k = tmp_k + k_shift + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + idx_i = tmp_i + i_shift + v_l(idx_i,idx_j,idx_k) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + + endif end @@ -1452,7 +1786,8 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l integer :: tmp_i,tmp_j,tmp_k,tmp_l integer :: si,sj,sk,sl,s - PROVIDE cc_space_v + double precision, allocatable :: buffer(:,:,:) + double precision, allocatable :: v1(:,:,:), v2(:,:,:) if (idx_k <= n3_S(1)) then sk = 1 @@ -1463,100 +1798,257 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l tmp_k = idx_k - k_shift k = list3(tmp_k,sk) - !$OMP PARALLEL & - !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) & - !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, & - !$OMP i,j,l,idx_i,idx_j,idx_l,& - !$OMP tmp_i,tmp_j,tmp_l)& - !$OMP DEFAULT(NONE) + if (do_mo_cholesky) then - do sl = 1, 2 - call shift_idx_spin(sl,n4_S,l_shift) - do sj = 1, 2 - call shift_idx_spin(sj,n2_S,j_shift) - do si = 1, 2 - call shift_idx_spin(si,n1_S,i_shift) + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) - s = si+sj+sk+sl - ! or - if (s == 4 .or. s == 8) then - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) - l = list4(tmp_l,sl) + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + + allocate(v1(cholesky_mo_num,n1_S(si),1), v2(cholesky_mo_num,n2_S(sj),n4_S(sl))) + allocate(buffer(n1_S(si),n2_S(sj),n4_S(sl))) + + call gen_v_space_chol(n1_S(si),1,list1(1,si),list3(tmp_k,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n2_S(sj),n4_S(sl),list2(1,sj),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si), n2_S(sj)*n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,l,idx_i,idx_j,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_k(idx_i,idx_j,idx_l) = buffer(i,j,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + allocate(v1(cholesky_mo_num,n2_S(sj),1), v2(cholesky_mo_num,n1_S(si),n4_S(sl))) + allocate(buffer(n2_S(sj),n1_S(si),n4_S(sl))) + + call gen_v_space_chol(n2_S(sj),1,list2(1,sj),list3(tmp_k,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n2_S(sj), n1_S(si)*n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,l,idx_i,idx_j,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_k(idx_i,idx_j,idx_l) = v_k(idx_i,idx_j,idx_l) - buffer(j,i,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + ! or + elseif (si == sk .and. sj == sl) then + + allocate(v1(cholesky_mo_num,n1_S(si),1), v2(cholesky_mo_num,n2_S(sj),n4_S(sl))) + allocate(buffer(n1_S(si),n2_S(sj),n4_S(sl))) + + call gen_v_space_chol(n1_S(si),1,list1(1,si),list3(tmp_k,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n2_S(sj),n4_S(sl),list2(1,sj),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si), n2_S(sj)*n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,l,idx_i,idx_j,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_k(idx_i,idx_j,idx_l) = buffer(i,j,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + ! or + elseif (si == sl .and. sj == sk) then + + allocate(v1(cholesky_mo_num,n2_S(sj),1), v2(cholesky_mo_num,n1_S(si),n4_S(sl))) + allocate(buffer(n2_S(sj),n1_S(si),n4_S(sl))) + + call gen_v_space_chol(n2_S(sj),1,list2(1,sj),list3(tmp_k,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n2_S(sj), n1_S(si)*n4_S(sl), cholesky_mo_num, -1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,l,idx_i,idx_j,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_k(idx_i,idx_j,idx_l) = buffer(j,i,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) idx_l = tmp_l + l_shift - j = list2(tmp_j,sj) idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) - v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + do tmp_i = 1, n1_S(si) + idx_i = tmp_i + i_shift + v_k(idx_i,idx_j,idx_l) = 0d0 + enddo enddo enddo - enddo - !$OMP END DO + !$OMP END DO - ! or - elseif (si == sk .and. sj == sl) then - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) - l = list4(tmp_l,sl) - idx_l = tmp_l + l_shift - j = list2(tmp_j,sj) - idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) - enddo - enddo - enddo - !$OMP END DO - - ! or - elseif (si == sl .and. sj == sk) then - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) - l = list4(tmp_l,sl) - idx_l = tmp_l + l_shift - j = list2(tmp_j,sj) - idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) - v_k(idx_i,idx_j,idx_l) = - cc_space_v(j,i,k,l) - enddo - enddo - enddo - !$OMP END DO - else - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) - l = list4(tmp_l,sl) - idx_l = tmp_l + l_shift - j = list2(tmp_j,sj) - idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - v_k(idx_i,idx_j,idx_l) = 0d0 - enddo - enddo - enddo - !$OMP END DO - endif + endif + enddo enddo enddo - enddo - !$OMP END PARALLEL + else + + PROVIDE cc_space_v + + !$OMP PARALLEL & + !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) & + !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, & + !$OMP i,j,l,idx_i,idx_j,idx_l,& + !$OMP tmp_i,tmp_j,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_k(idx_i,idx_j,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_k(idx_i,idx_j,idx_l) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + + endif end ! V_3idx_i_kl @@ -1581,7 +2073,8 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l integer :: tmp_i,tmp_j,tmp_k,tmp_l integer :: si,sj,sk,sl,s - PROVIDE cc_space_v + double precision, allocatable :: buffer(:,:,:) + double precision, allocatable :: v1(:,:,:), v2(:,:,:) if (idx_j <= n2_S(1)) then sj = 1 @@ -1592,98 +2085,265 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l tmp_j = idx_j - j_shift j = list2(tmp_j,sj) - !$OMP PARALLEL & - !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) & - !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, & - !$OMP i,k,l,idx_i,idx_k,idx_l,& - !$OMP tmp_i,tmp_k,tmp_l)& - !$OMP DEFAULT(NONE) - do sl = 1, 2 - call shift_idx_spin(sl,n4_S,l_shift) - do sk = 1, 2 - call shift_idx_spin(sk,n3_S,k_shift) - do si = 1, 2 - call shift_idx_spin(si,n1_S,i_shift) + if (do_mo_cholesky) then + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) - s = si+sj+sk+sl - ! or - if (s == 4 .or. s == 8) then - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) - l = list4(tmp_l,sl) - idx_l = tmp_l + l_shift - k = list3(tmp_k,sk) - idx_k = tmp_k + k_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) - v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + + allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,1,n4_S(sl))) + allocate(buffer(n1_S(si),n3_S(sk),n4_S(sl))) + + call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(1,n4_S(sl),list2(tmp_j,sj),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si)*n3_S(sk), n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,k,l,idx_i,idx_k,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + idx_l = l + l_shift + idx_k = k + k_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_j(idx_i,idx_k,idx_l) = buffer(i,k,l) + enddo + enddo enddo - enddo - enddo - !$OMP END DO + !$OMP END DO + !$OMP END PARALLEL - ! or - elseif (si == sk .and. sj == sl) then - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) - l = list4(tmp_l,sl) - idx_l = tmp_l + l_shift - k = list3(tmp_k,sk) - idx_k = tmp_k + k_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) - enddo - enddo - enddo - !$OMP END DO + deallocate(v1, v2, buffer) - ! or - elseif (si == sl .and. sj == sk) then - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) - l = list4(tmp_l,sl) - idx_l = tmp_l + l_shift - k = list3(tmp_k,sk) - idx_k = tmp_k + k_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) - v_j(idx_i,idx_k,idx_l) = - cc_space_v(j,i,k,l) - enddo - enddo - enddo - !$OMP END DO - else - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) - l = list4(tmp_l,sl) - idx_l = tmp_l + l_shift - k = list3(tmp_k,sk) - idx_k = tmp_k + k_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - v_j(idx_i,idx_k,idx_l) = 0d0 - enddo - enddo - enddo - !$OMP END DO - endif + allocate(v1(cholesky_mo_num,1,n3_S(sk)), v2(cholesky_mo_num,n1_S(si),n4_S(sl))) + allocate(buffer(n3_S(sk),n1_S(si),n4_S(sl))) + call gen_v_space_chol(1,n3_S(sk),list2(tmp_j,sj),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n3_S(sk), n1_S(si)*n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,k,l,idx_i,idx_k,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + idx_l = l + l_shift + idx_k = k + k_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_j(idx_i,idx_k,idx_l) = v_j(idx_i,idx_k,idx_l) - buffer(k,i,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + ! or + elseif (si == sk .and. sj == sl) then + + allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,1,n4_S(sl))) + allocate(buffer(n1_S(si),n3_S(sk),n4_S(sl))) + + call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(1,n4_S(sl),list2(tmp_j,sj),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si)*n3_S(sk), n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,k,l,idx_i,idx_k,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + idx_l = l + l_shift + idx_k = k + k_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_j(idx_i,idx_k,idx_l) = buffer(i,k,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + ! or + elseif (si == sl .and. sj == sk) then + + allocate(v1(cholesky_mo_num,1,n3_S(sk)), v2(cholesky_mo_num,n1_S(si),n4_S(sl))) + allocate(buffer(n3_S(sk),n1_S(si),n4_S(sl))) + + call gen_v_space_chol(1,n3_S(sk),list2(tmp_j,sj),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n3_S(sk), n1_S(si)*n4_S(sl), cholesky_mo_num, -1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,k,l,idx_i,idx_k,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + idx_l = l + l_shift + idx_k = k + k_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_j(idx_i,idx_k,idx_l) = buffer(k,i,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + else + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,k,l,idx_i,idx_k,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + idx_l = l + l_shift + idx_k = k + k_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_j(idx_i,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + endif + + enddo enddo enddo - enddo - !$OMP END PARALLEL + + + else + + PROVIDE cc_space_v + + !$OMP PARALLEL & + !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) & + !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, & + !$OMP i,k,l,idx_i,idx_k,idx_l,& + !$OMP tmp_i,tmp_k,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_j(idx_i,idx_k,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_j(idx_i,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + + endif end From 3d46cde2e46c8f6dd0eb0d56d5ff446d649907bb Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Feb 2025 13:35:38 +0100 Subject: [PATCH 15/59] Cleaning ccsd_spin --- src/ccsd/ccsd_space_orb_sub.irp.f | 10 +- src/ccsd/ccsd_spin_orb_sub.irp.f | 156 ++++++----------------------- src/utils_cc/mo_integrals_cc.irp.f | 101 +++++++++++++++++++ 3 files changed, 131 insertions(+), 136 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 30f134fc..7cd4c50c 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -26,7 +26,6 @@ subroutine run_ccsd_space_orb double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) - integer(bit_kind) :: det(N_int,2) integer :: nO, nV, nOa, nVa call set_multiple_levels_omp(.False.) @@ -38,9 +37,8 @@ subroutine run_ccsd_space_orb PROVIDE all_mo_integrals endif - det = psi_det(:,:,cc_ref) print*,'Reference determinant:' - call print_det(det,N_int) + call print_det(psi_det(1,1,cc_ref),N_int) nOa = cc_nOa nVa = cc_nVa @@ -57,10 +55,6 @@ subroutine run_ccsd_space_orb allocate(list_occ(nO),list_vir(nV)) list_occ = cc_list_occ list_vir = cc_list_vir - ! Debug - !call extract_list_orb_space(det,nO,nV,list_occ,list_vir) - !print*,'occ',list_occ - !print*,'vir',list_vir ! GPU arrays call gpu_allocate(d_cc_space_f_oo, nO, nO) @@ -186,7 +180,7 @@ subroutine run_ccsd_space_orb call update_tau_space(nO,nV,h_t1,t1,t2,tau) call update_tau_x_space(nO,nV,tau,tau_x) - call det_energy(det,uncorr_energy) + call det_energy(psi_det(1,1,cc_ref),uncorr_energy) print*,'Det energy', uncorr_energy call ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,energy) diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f index fe202ebf..fa0983cd 100644 --- a/src/ccsd/ccsd_spin_orb_sub.irp.f +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -14,65 +14,31 @@ subroutine run_ccsd_spin_orb double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:) !, cW_vvvv(:,:,:,:) double precision, allocatable :: f_o(:), f_v(:) - double precision, allocatable :: v_oooo(:,:,:,:), v_vooo(:,:,:,:), v_ovoo(:,:,:,:) - double precision, allocatable :: v_oovo(:,:,:,:), v_ooov(:,:,:,:), v_vvoo(:,:,:,:) - double precision, allocatable :: v_vovo(:,:,:,:), v_voov(:,:,:,:), v_ovvo(:,:,:,:) - double precision, allocatable :: v_ovov(:,:,:,:), v_oovv(:,:,:,:), v_vvvo(:,:,:,:) - double precision, allocatable :: v_vvov(:,:,:,:), v_vovv(:,:,:,:), v_ovvv(:,:,:,:) - double precision, allocatable :: v_vvvv(:,:,:,:) +! double precision, allocatable :: v_ovvv(:,:,:,:) double precision, allocatable :: all_err(:,:), all_t(:,:) logical :: not_converged - integer :: nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4) + integer :: n_spin(4) integer :: nb_iter, i,j,a,b double precision :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi - integer(bit_kind) :: det(N_int,2) - det = psi_det(:,:,cc_ref) + if (do_mo_cholesky) then + PROVIDE cholesky_mo_transp + FREE cholesky_ao + else + PROVIDE all_mo_integrals + endif + print*,'Reference determinant:' - call print_det(det,N_int) - - ! Extract number of occ/vir alpha/beta spin orbitals - !call extract_n_spin(det,n_spin) - nOa = cc_nOa !n_spin(1) - nOb = cc_nOb !n_spin(2) - nVa = cc_nVa !n_spin(3) - nVb = cc_nVb !n_spin(4) - - ! Number of occ/vir spin orb per spin - nO_S = cc_nO_S !(/nOa,nOb/) - nV_S = cc_nV_S !(/nVa,nVb/) - ! Debug - !print*,nO_S,nV_S - - ! Maximal number of occ/vir - nO_m = cc_nO_m !max(nOa, nOb) - nV_m = cc_nV_m !max(nVa, nVb) - ! Debug - !print*,nO_m,nV_m + call print_det(psi_det(1,1,cc_ref),N_int) ! Allocation allocate(t1(cc_nOab,cc_nVab), t2(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau_t(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) allocate(r1(cc_nOab,cc_nVab), r2(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) allocate(cF_oo(cc_nOab,cc_nOab), cF_ov(cc_nOab,cc_nVab), cF_vv(cc_nVab,cc_nVab)) allocate(cW_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab), cW_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab))!, cW_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab)) - allocate(v_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab)) - !allocate(v_vooo(cc_nVab,cc_nOab,cc_nOab,cc_nOab)) - allocate(v_ovoo(cc_nOab,cc_nVab,cc_nOab,cc_nOab)) - allocate(v_oovo(cc_nOab,cc_nOab,cc_nVab,cc_nOab)) - allocate(v_ooov(cc_nOab,cc_nOab,cc_nOab,cc_nVab)) - allocate(v_vvoo(cc_nVab,cc_nVab,cc_nOab,cc_nOab)) - !allocate(v_vovo(cc_nVab,cc_nOab,cc_nVab,cc_nOab)) - !allocate(v_voov(cc_nVab,cc_nOab,cc_nOab,cc_nVab)) - allocate(v_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab)) - allocate(v_ovov(cc_nOab,cc_nVab,cc_nOab,cc_nVab)) - allocate(v_oovv(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) - !allocate(v_vvvo(cc_nVab,cc_nVab,cc_nVab,cc_nOab)) - !allocate(v_vvov(cc_nVab,cc_nVab,cc_nOab,cc_nVab)) - !allocate(v_vovv(cc_nVab,cc_nOab,cc_nVab,cc_nVab)) - !allocate(v_ovvv(cc_nOab,cc_nVab,cc_nVab,cc_nVab)) - !allocate(v_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab)) + allocate(f_o(cc_nOab), f_v(cc_nVab)) ! Allocation for the diis @@ -90,45 +56,20 @@ subroutine run_ccsd_spin_orb f_v(i) = cc_spin_f_vv(i,i) enddo - ! Bi electronic integrals from list - call wall_time(ti) - ! OOOO - call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nOab,cc_nOab,cc_nOab,cc_nOab, v_oooo) - - ! OOO V - !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nVab,cc_nOab,cc_nOab,cc_nOab, v_vooo) - call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nOab,cc_nVab,cc_nOab,cc_nOab, v_ovoo) - call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nOab,cc_nOab,cc_nVab,cc_nOab, v_oovo) - call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nOab,cc_nOab,cc_nOab,cc_nVab, v_ooov) - - ! OO VV - call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nVab,cc_nVab,cc_nOab,cc_nOab, v_vvoo) - !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nVab,cc_nOab,cc_nVab,cc_nOab, v_vovo) - !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nVab,cc_nOab,cc_nOab,cc_nVab, v_voov) - call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nOab,cc_nVab,cc_nVab,cc_nOab, v_ovvo) - call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nOab,cc_nVab,cc_nOab,cc_nVab, v_ovov) - call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nOab,cc_nOab,cc_nVab,cc_nVab, v_oovv) - - ! O VVV - !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nVab,cc_nVab,cc_nVab,cc_nOab, v_vvvo) - !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nVab,cc_nVab,cc_nOab,cc_nVab, v_vvov) - !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nVab,cc_nOab,cc_nVab,cc_nVab, v_vovv) - !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nOab,cc_nVab,cc_nVab,cc_nVab, v_ovvv) - - ! VVVV - !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nVab,cc_nVab,cc_nVab,cc_nVab, v_vvvv) - call wall_time(tf) - if (cc_dev) then - print*,'Load bi elec int:',tf-ti,'s' - endif ! Init of T t1 = 0d0 call guess_t1(cc_nOab,cc_nVab,f_o,f_v,cc_spin_f_ov,t1) - call guess_t2(cc_nOab,cc_nVab,f_o,f_v,v_oovv,t2) + call guess_t2(cc_nOab,cc_nVab,f_o,f_v,cc_spin_v_oovv,t2) call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau) call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t) + call det_energy(psi_det(1,1,cc_ref),uncorr_energy) + print*,'Det energy', uncorr_energy + + call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,cc_spin_v_oovv,energy) + print*,'guess energy', uncorr_energy+energy, energy + ! Loop init nb_iter = 0 not_converged = .True. @@ -137,11 +78,6 @@ subroutine run_ccsd_spin_orb max_r1 = 0d0 max_r2 = 0d0 - call det_energy(det,uncorr_energy) - print*,'Det energy', uncorr_energy - call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,v_oovv,energy) - print*,'guess energy', uncorr_energy+energy, energy - write(*,'(A77)') ' -----------------------------------------------------------------------------' write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' write(*,'(A77)') ' -----------------------------------------------------------------------------' @@ -152,38 +88,16 @@ subroutine run_ccsd_spin_orb do while (not_converged) ! Intermediates - call wall_time(tbi) - call wall_time(ti) - call compute_cF_oo(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_oo,cc_spin_F_ov,v_ooov,v_oovv,cF_oo) - call compute_cF_ov(cc_nOab,cc_nVab,t1,cc_spin_F_ov,v_oovv,cF_ov) - call compute_cF_vv(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_ov,cc_spin_F_vv,v_oovv,cF_vv) - call wall_time(tf) - if (cc_dev) then - print*,'Compute cFs:',tf-ti,'s' - endif + call compute_cF_oo(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_oo,cc_spin_F_ov,cc_spin_v_ooov,cc_spin_v_oovv,cF_oo) + call compute_cF_ov(cc_nOab,cc_nVab,t1,cc_spin_F_ov,cc_spin_v_oovv,cF_ov) + call compute_cF_vv(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_ov,cc_spin_F_vv,cc_spin_v_oovv,cF_vv) - call wall_time(ti) - call compute_cW_oooo(cc_nOab,cc_nVab,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) - call compute_cW_ovvo(cc_nOab,cc_nVab,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) - !call compute_cW_vvvv(cc_nOab,cc_nVab,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) - call wall_time(tf) - if (cc_dev) then - print*,'Compute cFs:',tf-ti,'s' - endif + call compute_cW_oooo(cc_nOab,cc_nVab,t1,t2,tau,cc_spin_v_oooo,cc_spin_v_ooov,cc_spin_v_oovv,cW_oooo) + call compute_cW_ovvo(cc_nOab,cc_nVab,t1,t2,tau,cc_spin_v_ovvo,cc_spin_v_oovo,cc_spin_v_oovv,cW_ovvo) ! Residuals - call wall_time(ti) - call compute_r1_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) - call wall_time(tf) - if (cc_dev) then - print*,'Compute r1:',tf-ti,'s' - endif - call wall_time(ti) - call compute_r2_spin(cc_nOab,cc_nVab,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) - call wall_time(tf) - if (cc_dev) then - print*,'Compute r2:',tf-ti,'s' - endif + call compute_r1_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,cc_spin_v_oovo,cc_spin_v_ovov,r1) + call compute_r2_spin(cc_nOab,cc_nVab,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,cc_spin_v_ovoo,cc_spin_v_oovv,cc_spin_v_ovvo,r2) ! Max elements in the residuals max_r1 = maxval(abs(r1(:,:))) @@ -213,7 +127,7 @@ subroutine run_ccsd_spin_orb endif ! Print - call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,v_oovv,energy) + call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,cc_spin_v_oovv,energy) call wall_time(tfi) write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', & @@ -258,25 +172,13 @@ subroutine run_ccsd_spin_orb deallocate(r1,r2) deallocate(cF_oo,cF_ov,cF_vv) deallocate(cW_oooo,cW_ovvo)!,cW_vvvv) - deallocate(v_oooo) - deallocate(v_ovoo,v_oovo) - deallocate(v_ovvo,v_ovov,v_oovv) double precision :: t_corr t_corr = 0.d0 if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then print*,'CCSD(T) calculation...' call wall_time(ta) - !allocate(v_vvvo(cc_nVab,cc_nVab,cc_nVab,cc_nOab)) - !call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & - ! cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & - ! cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & - ! cc_nVab,cc_nVab,cc_nVab,cc_nOab, v_vvvo) - - !call ccsd_par_t_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,v_ooov,v_vvoo,v_vvvo,t_corr) - call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,v_ooov,v_vvoo,t_corr) - !print*,'Working on it...' - !call abort + call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,cc_spin_v_ooov,cc_spin_v_vvoo,t_corr) call wall_time(tb) print*,'Done' print*,'Time: ',tb-ta, ' s' @@ -290,9 +192,7 @@ subroutine run_ccsd_spin_orb call save_energy(uncorr_energy + energy, t_corr) deallocate(f_o,f_v) - deallocate(v_ooov,v_vvoo,t1,t2) - !deallocate(v_ovvv,v_vvvo,v_vovv) - !deallocate(v_vvvv) + deallocate(t1,t2) end diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 813c186a..4053c49b 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -227,6 +227,19 @@ BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_n END_PROVIDER +BEGIN_PROVIDER [double precision, cc_spin_v_oooo, (cc_nOab, cc_nOab, cc_nOab, cc_nOab)] + + implicit none + + !TODO + call gen_v_spin(cc_nO_m,cc_nO_m,cc_nO_m,cc_nO_m, & + cc_nO_S,cc_nO_S,cc_nO_S,cc_nO_S, & + cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin, & + cc_nOab,cc_nOab,cc_nOab,cc_nOab, & + cc_spin_v_oooo) + +END_PROVIDER + ! vooo BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_nOa)] @@ -305,6 +318,17 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovoo, (cc_nOa, cc_nVa, cc_nOa, cc_n END_PROVIDER +BEGIN_PROVIDER [ double precision, cc_spin_v_ovoo, (cc_nOab,cc_nVab,cc_nOab,cc_nOab)] + implicit none + + call gen_v_spin(cc_nO_m,cc_nV_m,cc_nO_m,cc_nO_m, & + cc_nO_S,cc_nV_S,cc_nO_S,cc_nO_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, & + cc_nOab,cc_nVab,cc_nOab,cc_nOab, & + cc_spin_v_ovoo) + +END_PROVIDER + ! oovo BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_nOa)] @@ -339,6 +363,17 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_n END_PROVIDER +BEGIN_PROVIDER [ double precision, cc_spin_v_oovo, (cc_nOab,cc_nOab,cc_nVab,cc_nOab)] + implicit none + + call gen_v_spin(cc_nO_m,cc_nO_m,cc_nV_m,cc_nO_m, & + cc_nO_S,cc_nO_S,cc_nV_S,cc_nO_S, & + cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin, & + cc_nOab,cc_nOab,cc_nVab,cc_nOab, & + cc_spin_v_oovo) +END_PROVIDER + + ! ooov BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_nVa)] @@ -373,6 +408,17 @@ BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_n END_PROVIDER +BEGIN_PROVIDER [ double precision, cc_spin_v_ooov, (cc_nOab,cc_nOab,cc_nOab,cc_nVab)] + implicit none + + call gen_v_spin(cc_nO_m,cc_nO_m,cc_nO_m,cc_nV_m, & + cc_nO_S,cc_nO_S,cc_nO_S,cc_nV_S, & + cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin, & + cc_nOab,cc_nOab,cc_nOab,cc_nVab, & + cc_spin_v_ooov) +END_PROVIDER + + ! vvoo BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)] @@ -416,6 +462,18 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n END_PROVIDER + +BEGIN_PROVIDER [ double precision, cc_spin_v_vvoo, (cc_nVab,cc_nVab,cc_nOab,cc_nOab)] + implicit none + + call gen_v_spin(cc_nV_m,cc_nV_m,cc_nO_m,cc_nO_m, & + cc_nV_S,cc_nV_S,cc_nO_S,cc_nO_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, & + cc_nVab,cc_nVab,cc_nOab,cc_nOab, & + cc_spin_v_vvoo) +END_PROVIDER + + ! vovo BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_nOa)] @@ -527,6 +585,16 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovvo, (cc_nOa, cc_nVa, cc_nVa, cc_n END_PROVIDER +BEGIN_PROVIDER [ double precision, cc_spin_v_ovvo, (cc_nOab,cc_nVab,cc_nVab,cc_nOab) ] + implicit none + + call gen_v_spin(cc_nO_m,cc_nV_m,cc_nV_m,cc_nO_m, & + cc_nO_S,cc_nV_S,cc_nV_S,cc_nO_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + cc_nOab,cc_nVab,cc_nVab,cc_nOab, & + cc_spin_v_ovvo) +END_PROVIDER + ! ovov BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_nVa)] @@ -561,6 +629,16 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_n END_PROVIDER +BEGIN_PROVIDER [ double precision, cc_spin_v_ovov, (cc_nOab,cc_nVab,cc_nOab,cc_nVab) ] + implicit none + + call gen_v_spin(cc_nO_m,cc_nV_m,cc_nO_m,cc_nV_m, & + cc_nO_S,cc_nV_S,cc_nO_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin, & + cc_nOab,cc_nVab,cc_nOab,cc_nVab, & + cc_spin_v_ovov) +END_PROVIDER + ! oovv BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)] @@ -595,6 +673,16 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_n END_PROVIDER +BEGIN_PROVIDER [ double precision, cc_spin_v_oovv, (cc_nOab,cc_nOab,cc_nVab,cc_nVab) ] + implicit none + + call gen_v_spin(cc_nO_m,cc_nO_m,cc_nV_m,cc_nV_m, & + cc_nO_S,cc_nO_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, & + cc_nOab,cc_nOab,cc_nVab,cc_nVab, & + cc_spin_v_oovv) +END_PROVIDER + ! vvvo BEGIN_PROVIDER [double precision, cc_space_v_vvvo, (cc_nVa, cc_nVa, cc_nVa, cc_nOa)] @@ -625,6 +713,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_vovv, (cc_nVa, cc_nOa, cc_nVa, cc_n END_PROVIDER + ! ovvv BEGIN_PROVIDER [double precision, cc_space_v_ovvv, (cc_nOa, cc_nVa, cc_nVa, cc_nVa)] @@ -677,6 +766,18 @@ BEGIN_PROVIDER [double precision, cc_space_v_oo_chol, (cholesky_mo_num, cc_nOa, END_PROVIDER +BEGIN_PROVIDER [double precision, cc_spin_v_oo_chol, (cholesky_mo_num, cc_nOab, cc_nOab)] + + implicit none + integer :: list_occ(cc_nOab) + + list_occ(1:cc_nOa) = cc_list_occ_spin(1:cc_nOa,1) + list_occ(cc_nOa+1:cc_nOab) = cc_list_occ_spin(1:cc_nOb,2) + call gen_v_space_chol(cc_nOab, cc_nOab, list_occ, list_occ, & + cc_spin_v_oo_chol, cholesky_mo_num) + +END_PROVIDER + ! ppqq BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)] From 243ee0ed1457d0bb3ea725a2676620584e2b27db Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Feb 2025 13:59:37 +0100 Subject: [PATCH 16/59] Introduced GPU arrays --- src/ccsd/ccsd_space_orb_sub.irp.f | 5 +- src/ccsd/ccsd_spin_orb_sub.irp.f | 89 ++++++++++++++++++------------- 2 files changed, 55 insertions(+), 39 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 7cd4c50c..af7e9285 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -177,9 +177,11 @@ subroutine run_ccsd_space_orb call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,h_t2) call gpu_upload(h_t2, t2) + deallocate(h_t1, h_t2) - call update_tau_space(nO,nV,h_t1,t1,t2,tau) + call update_tau_space(nO,nV,t1%f,t1,t2,tau) call update_tau_x_space(nO,nV,tau,tau_x) + call det_energy(psi_det(1,1,cc_ref),uncorr_energy) print*,'Det energy', uncorr_energy @@ -310,7 +312,6 @@ subroutine run_ccsd_space_orb call save_energy(uncorr_energy + energy, e_t) - deallocate(h_t1, h_t2) if (do_mo_cholesky) then call gpu_deallocate(d_cc_space_v_oo_chol) call gpu_deallocate(d_cc_space_v_ov_chol) diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f index fa0983cd..16062356 100644 --- a/src/ccsd/ccsd_spin_orb_sub.irp.f +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -1,6 +1,5 @@ -! Code - subroutine run_ccsd_spin_orb + use gpu implicit none @@ -8,8 +7,6 @@ subroutine run_ccsd_spin_orb ! CCSD in spin orbitals END_DOC - double precision, allocatable :: t1(:,:), t2(:,:,:,:), tau(:,:,:,:), tau_t(:,:,:,:) - double precision, allocatable :: r1(:,:), r2(:,:,:,:) double precision, allocatable :: cF_oo(:,:), cF_ov(:,:), cF_vv(:,:) double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:) !, cW_vvvv(:,:,:,:) @@ -23,6 +20,9 @@ subroutine run_ccsd_spin_orb integer :: nb_iter, i,j,a,b double precision :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi + type(gpu_double4) :: t2, r2, tau, tau_t + type(gpu_double2) :: t1, r1 + if (do_mo_cholesky) then PROVIDE cholesky_mo_transp FREE cholesky_ao @@ -34,13 +34,18 @@ subroutine run_ccsd_spin_orb call print_det(psi_det(1,1,cc_ref),N_int) ! Allocation - allocate(t1(cc_nOab,cc_nVab), t2(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau_t(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) - allocate(r1(cc_nOab,cc_nVab), r2(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) allocate(cF_oo(cc_nOab,cc_nOab), cF_ov(cc_nOab,cc_nVab), cF_vv(cc_nVab,cc_nVab)) allocate(cW_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab), cW_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab))!, cW_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab)) allocate(f_o(cc_nOab), f_v(cc_nVab)) + call gpu_allocate(t1, cc_nOab,cc_nVab) + call gpu_allocate(r1, cc_nOab,cc_nVab) + call gpu_allocate(t2, cc_nOab,cc_nOab,cc_nVab,cc_nVab) + call gpu_allocate(r2, cc_nOab,cc_nOab,cc_nVab,cc_nVab) + call gpu_allocate(tau, cc_nOab,cc_nOab,cc_nVab,cc_nVab) + call gpu_allocate(tau_t, cc_nOab,cc_nOab,cc_nVab,cc_nVab) + ! Allocation for the diis if (cc_update_method == 'diis') then allocate(all_err(cc_nOab*cc_nVab+cc_nOab*cc_nOab*cc_nVab*cc_nVab,cc_diis_depth), all_t(cc_nOab*cc_nVab+cc_nOab*cc_nOab*cc_nVab*cc_nVab,cc_diis_depth)) @@ -58,23 +63,29 @@ subroutine run_ccsd_spin_orb ! Init of T - t1 = 0d0 - call guess_t1(cc_nOab,cc_nVab,f_o,f_v,cc_spin_f_ov,t1) - call guess_t2(cc_nOab,cc_nVab,f_o,f_v,cc_spin_v_oovv,t2) - call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau) - call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t) + double precision, allocatable :: h_t1(:,:), h_t2(:,:,:,:) + allocate(h_t1(cc_nOab,cc_nVab), h_t2(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) + h_t1 = 0d0 + call guess_t1(cc_nOab,cc_nVab,f_o,f_v,cc_spin_f_ov,h_t1) + call gpu_upload(h_t1, t1) + + call guess_t2(cc_nOab,cc_nVab,f_o,f_v,cc_spin_v_oovv,h_t2) + call gpu_upload(h_t2, t2) + + deallocate(h_t1,h_t2) + + call compute_tau_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau%f) + call compute_tau_t_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau_t%f) call det_energy(psi_det(1,1,cc_ref),uncorr_energy) print*,'Det energy', uncorr_energy - call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,cc_spin_v_oovv,energy) + call ccsd_energy_spin(cc_nOab,cc_nVab,t1%f,t2%f,cc_spin_F_ov,cc_spin_v_oovv,energy) print*,'guess energy', uncorr_energy+energy, energy ! Loop init nb_iter = 0 not_converged = .True. - r1 = 0d0 - r2 = 0d0 max_r1 = 0d0 max_r2 = 0d0 @@ -88,46 +99,46 @@ subroutine run_ccsd_spin_orb do while (not_converged) ! Intermediates - call compute_cF_oo(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_oo,cc_spin_F_ov,cc_spin_v_ooov,cc_spin_v_oovv,cF_oo) - call compute_cF_ov(cc_nOab,cc_nVab,t1,cc_spin_F_ov,cc_spin_v_oovv,cF_ov) - call compute_cF_vv(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_ov,cc_spin_F_vv,cc_spin_v_oovv,cF_vv) + call compute_cF_oo(cc_nOab,cc_nVab,t1%f,tau_t%f,cc_spin_F_oo,cc_spin_F_ov,cc_spin_v_ooov,cc_spin_v_oovv,cF_oo) + call compute_cF_ov(cc_nOab,cc_nVab,t1%f,cc_spin_F_ov,cc_spin_v_oovv,cF_ov) + call compute_cF_vv(cc_nOab,cc_nVab,t1%f,tau_t%f,cc_spin_F_ov,cc_spin_F_vv,cc_spin_v_oovv,cF_vv) - call compute_cW_oooo(cc_nOab,cc_nVab,t1,t2,tau,cc_spin_v_oooo,cc_spin_v_ooov,cc_spin_v_oovv,cW_oooo) - call compute_cW_ovvo(cc_nOab,cc_nVab,t1,t2,tau,cc_spin_v_ovvo,cc_spin_v_oovo,cc_spin_v_oovv,cW_ovvo) + call compute_cW_oooo(cc_nOab,cc_nVab,t1%f,t2%f,tau%f,cc_spin_v_oooo,cc_spin_v_ooov,cc_spin_v_oovv,cW_oooo) + call compute_cW_ovvo(cc_nOab,cc_nVab,t1%f,t2%f,tau%f,cc_spin_v_ovvo,cc_spin_v_oovo,cc_spin_v_oovv,cW_ovvo) ! Residuals - call compute_r1_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,cc_spin_v_oovo,cc_spin_v_ovov,r1) - call compute_r2_spin(cc_nOab,cc_nVab,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,cc_spin_v_ovoo,cc_spin_v_oovv,cc_spin_v_ovvo,r2) + call compute_r1_spin(cc_nOab,cc_nVab,t1%f,t2%f,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,cc_spin_v_oovo,cc_spin_v_ovov,r1%f) + call compute_r2_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau%f,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,cc_spin_v_ovoo,cc_spin_v_oovv,cc_spin_v_ovvo,r2%f) ! Max elements in the residuals - max_r1 = maxval(abs(r1(:,:))) - max_r2 = maxval(abs(r2(:,:,:,:))) + max_r1 = maxval(abs(r1%f(:,:))) + max_r2 = maxval(abs(r2%f(:,:,:,:))) max_r = max(max_r1,max_r2) call wall_time(ti) ! Update if (cc_update_method == 'diis') then - !call update_t_ccsd(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) - !call update_t_ccsd_diis(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) - call update_t_ccsd_diis_v3(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) + !call update_t_ccsd(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1%f,r2%f,t1%f,t2%f,all_err1,all_err2,all_t1%f,all_t2) + !call update_t_ccsd_diis(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1%f,r2%f,t1%f,t2%f,all_err1,all_err2,all_t1%f,all_t2) + call update_t_ccsd_diis_v3(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1%f,r2%f,t1%f,t2%f,all_err,all_t) ! Standard update as T = T - Delta elseif (cc_update_method == 'none') then - call update_t1(cc_nOab,cc_nVab,f_o,f_v,r1,t1) - call update_t2(cc_nOab,cc_nVab,f_o,f_v,r2,t2) + call update_t1(cc_nOab,cc_nVab,f_o,f_v,r1%f,t1%f) + call update_t2(cc_nOab,cc_nVab,f_o,f_v,r2%f,t2%f) else print*,'Unkonw cc_method_method: '//cc_update_method endif - call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau) - call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t) + call compute_tau_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau%f) + call compute_tau_t_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau_t%f) call wall_time(tf) if (cc_dev) then print*,'Update:',tf-ti,'s' endif ! Print - call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,cc_spin_v_oovv,energy) + call ccsd_energy_spin(cc_nOab,cc_nVab,t1%f,t2%f,cc_spin_F_ov,cc_spin_v_oovv,energy) call wall_time(tfi) write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', & @@ -159,8 +170,8 @@ subroutine run_ccsd_spin_orb print*,'' if (write_amplitudes) then - call write_t1(cc_nOab,cc_nVab,t1) - call write_t2(cc_nOab,cc_nVab,t2) + call write_t1(cc_nOab,cc_nVab,t1%f) + call write_t2(cc_nOab,cc_nVab,t2%f) call ezfio_set_utils_cc_io_amplitudes('Read') endif @@ -168,8 +179,6 @@ subroutine run_ccsd_spin_orb if (cc_update_method == 'diis') then deallocate(all_err,all_t) endif - deallocate(tau,tau_t) - deallocate(r1,r2) deallocate(cF_oo,cF_ov,cF_vv) deallocate(cW_oooo,cW_ovvo)!,cW_vvvv) @@ -178,7 +187,7 @@ subroutine run_ccsd_spin_orb if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then print*,'CCSD(T) calculation...' call wall_time(ta) - call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,cc_spin_v_ooov,cc_spin_v_vvoo,t_corr) + call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1%f,t2%f,f_o,f_v,cc_spin_f_ov,cc_spin_v_ooov,cc_spin_v_vvoo,t_corr) call wall_time(tb) print*,'Done' print*,'Time: ',tb-ta, ' s' @@ -192,7 +201,13 @@ subroutine run_ccsd_spin_orb call save_energy(uncorr_energy + energy, t_corr) deallocate(f_o,f_v) - deallocate(t1,t2) + + call gpu_deallocate(t1) + call gpu_deallocate(r1) + call gpu_deallocate(t2) + call gpu_deallocate(r2) + call gpu_deallocate(tau) + call gpu_deallocate(tau_t) end From 6b597c5ceeb7ccb12e32f477c36989c957c7189a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Feb 2025 11:49:42 +0100 Subject: [PATCH 17/59] Optimization in CASSCF --- src/casscf_cipsi/chol_bielec.irp.f | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/casscf_cipsi/chol_bielec.irp.f b/src/casscf_cipsi/chol_bielec.irp.f index f69832c1..a22ad7f3 100644 --- a/src/casscf_cipsi/chol_bielec.irp.f +++ b/src/casscf_cipsi/chol_bielec.irp.f @@ -191,10 +191,15 @@ double precision function bielec_PQxx_no(i_mo, j_mo, i_ca, j_ca) END_DOC integer, intent(in) :: i_ca, j_ca, i_mo, j_mo integer :: ii_ca, jj_ca - double precision :: bielec_no_basis ii_ca = list_core_inact_act(i_ca) jj_ca = list_core_inact_act(j_ca) - bielec_PQxx_no = bielec_no_basis(i_mo,j_mo,ii_ca,jj_ca) +! double precision :: bielec_no_basis +! bielec_PQxx_no = bielec_no_basis(i_mo,j_mo,ii_ca,jj_ca) + integer :: i + bielec_PQxx_no = 0.d0 + do i = 1, cholesky_mo_num + bielec_PQxx_no = bielec_PQxx_no + cholesky_no_total_transp(i,i_mo, j_mo) * cholesky_no_total_transp(i,ii_ca,jj_ca) + enddo end double precision function bielec_PxxQ_no(i_mo, j_ca, i_ca, j_mo) @@ -206,10 +211,15 @@ double precision function bielec_PxxQ_no(i_mo, j_ca, i_ca, j_mo) END_DOC integer, intent(in) :: i_ca, j_ca, i_mo, j_mo integer :: ii_ca, jj_ca - double precision :: bielec_no_basis ii_ca = list_core_inact_act(i_ca) jj_ca = list_core_inact_act(j_ca) - bielec_PxxQ_no = bielec_no_basis(i_mo, jj_ca, ii_ca, j_mo) + double precision :: bielec_no_basis +! bielec_PxxQ_no = bielec_no_basis(i_mo, jj_ca, ii_ca, j_mo) + integer :: i + bielec_PxxQ_no = 0.d0 + do i = 1, cholesky_mo_num + bielec_PxxQ_no = bielec_PxxQ_no + cholesky_no_total_transp(i,i_mo, jj_ca) * cholesky_no_total_transp(i,ii_ca,j_mo) + enddo end From 8c7184fb774706add1e56ed536cb3f18126aa79d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Feb 2025 11:50:11 +0100 Subject: [PATCH 18/59] Speed up cache in integrals --- src/davidson/u0_hs2_u0.irp.f | 2 +- src/mo_two_e_ints/map_integrals.irp.f | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/davidson/u0_hs2_u0.irp.f b/src/davidson/u0_hs2_u0.irp.f index f2ce7aa9..dd5e01eb 100644 --- a/src/davidson/u0_hs2_u0.irp.f +++ b/src/davidson/u0_hs2_u0.irp.f @@ -158,7 +158,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift, double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) - PROVIDE ref_bitmask_energy N_int + PROVIDE ref_bitmask_energy N_int all_mo_integrals select case (N_int) case (1) diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 5b233899..b5f78b7b 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -81,11 +81,15 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:mo_integrals_cache_s integer(key_kind) :: idx real(integral_kind) :: integral FREE ao_integrals_cache + if (do_mo_cholesky) then call set_multiple_levels_omp(.False.) - !$OMP PARALLEL DO PRIVATE (k,l,ii) + + + !$OMP PARALLEL DO PRIVATE(k,l,ii) SCHEDULE(dynamic) do l=mo_integrals_cache_min,mo_integrals_cache_max + print *, l do k=mo_integrals_cache_min,mo_integrals_cache_max ii = int(l-mo_integrals_cache_min,8) ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) @@ -101,7 +105,7 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:mo_integrals_cache_s !$OMP END PARALLEL DO else - !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral) + !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral) SCHEDULE(dynamic) do l=mo_integrals_cache_min,mo_integrals_cache_max do k=mo_integrals_cache_min,mo_integrals_cache_max do j=mo_integrals_cache_min,mo_integrals_cache_max From 4792e86e4d724fedb20dea047d9f7ce0107f40d2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Feb 2025 12:31:31 +0100 Subject: [PATCH 19/59] DGEMM in mcscf_fock --- src/casscf_cipsi/mcscf_fock.irp.f | 118 +++++++++++++++++++----------- 1 file changed, 74 insertions(+), 44 deletions(-) diff --git a/src/casscf_cipsi/mcscf_fock.irp.f b/src/casscf_cipsi/mcscf_fock.irp.f index 82b710a7..738dee2c 100644 --- a/src/casscf_cipsi/mcscf_fock.irp.f +++ b/src/casscf_cipsi/mcscf_fock.irp.f @@ -3,37 +3,52 @@ BEGIN_PROVIDER [real*8, Fipq, (mo_num,mo_num) ] ! the inactive Fock matrix, in molecular orbitals END_DOC implicit none - integer :: p,q,k,kk,t,tt,u,uu - double precision :: bielec_pxxq_no, bielec_pqxx_no - + integer :: i,p,q,k,kk,t,tt,u,uu + do q=1,mo_num do p=1,mo_num Fipq(p,q)=one_ints_no(p,q) end do end do - + ! the inactive Fock matrix do k=1,n_core_inact_orb - kk=list_core_inact(k) - do q=1,mo_num - do p=1,mo_num - Fipq(p,q)+=2.D0*bielec_pqxx_no(p,q,k,k) -bielec_pxxq_no(p,k,k,q) - end do - end do + kk=list_core_inact_act(k) +! do q=1,mo_num +! do p=1,mo_num +! do i=1,cholesky_mo_num +! Fipq(p,q) = Fipq(p,q) + 2.d0* cholesky_no_total_transp(i,p,q) * cholesky_no_total_transp(i,kk,kk) +! enddo +! end do +! end do + call dgemm('T','N', mo_num*mo_num, 1, cholesky_mo_num, 2.d0, & + cholesky_no_total_transp, cholesky_mo_num, & + cholesky_no_total_transp(1,kk,kk), cholesky_mo_num, 1.d0, & + Fipq, mo_num*mo_num) +! do q=1,mo_num +! do p=1,mo_num +! do i=1,cholesky_mo_num +! Fipq(p,q) = Fipq(p,q) - cholesky_no_total_transp(i,p,kk) * cholesky_no_total_transp(i,kk,q) +! enddo +! end do +! end do + call dgemm('T','N', mo_num, mo_num, cholesky_mo_num, -1.d0, & + cholesky_no_total_transp(1,1,kk), cholesky_mo_num, & + cholesky_no_total_transp(1,kk,1), cholesky_mo_num*mo_num, 1.d0, & + Fipq, mo_num) end do - + if (bavard) then - integer :: i write(6,*) write(6,*) ' the diagonal of the inactive effective Fock matrix ' write(6,'(5(i3,F12.5))') (i,Fipq(i,i),i=1,mo_num) write(6,*) end if - - + + END_PROVIDER - - + + BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ] BEGIN_DOC ! the active active Fock matrix, in molecular orbitals @@ -45,27 +60,42 @@ BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ] END_DOC implicit none integer :: p,q,k,kk,t,tt,u,uu - double precision :: bielec_pxxq_no, bielec_pqxx_no - + Fapq = 0.d0 - + ! the active Fock matrix, D0tu is diagonal do t=1,n_act_orb tt=list_act(t) - do q=1,mo_num - do p=1,mo_num - Fapq(p,q)+=occnum(tt) & - *(bielec_pqxx_no(p,q,tt,tt)-0.5D0*bielec_pxxq_no(p,tt,tt,q)) - end do - end do +! do q=1,mo_num +! do p=1,mo_num +! do i=1,cholesky_mo_num +! Fapq(p,q) = Fapq(p,q) + occnum(tt)* cholesky_no_total_transp(i,p,q) * cholesky_no_total_transp(i,tt,tt) +! enddo +! end do +! end do + call dgemm('T','N', mo_num*mo_num, 1, cholesky_mo_num, occnum(tt), & + cholesky_no_total_transp, cholesky_mo_num, & + cholesky_no_total_transp(1,tt,tt), cholesky_mo_num, 1.d0, & + Fapq, mo_num*mo_num) +! do q=1,mo_num +! do p=1,mo_num +! do i=1,cholesky_mo_num +! Fapq(p,q) = Fapq(p,q) - 0.5d0*occnum(tt)*cholesky_no_total_transp(i,p,tt) * cholesky_no_total_transp(i,tt,q) +! enddo +! end do +! end do + call dgemm('T','N', mo_num, mo_num, cholesky_mo_num, -0.5d0*occnum(tt), & + cholesky_no_total_transp(1,1,tt), cholesky_mo_num, & + cholesky_no_total_transp(1,tt,1), cholesky_mo_num*mo_num, 1.d0, & + Fapq, mo_num) end do - + if (bavard) then integer :: i write(6,*) write(6,*) ' the effective Fock matrix over MOs' write(6,*) - + write(6,*) write(6,*) ' the diagonal of the inactive effective Fock matrix ' write(6,'(5(i3,F12.5))') (i,Fipq(i,i),i=1,mo_num) @@ -75,35 +105,35 @@ BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ] write(6,'(5(i3,F12.5))') (i,Fapq(i,i),i=1,mo_num) write(6,*) end if - - + + END_PROVIDER - - BEGIN_PROVIDER [ double precision, mcscf_fock_alpha_ao, (ao_num, ao_num)] -&BEGIN_PROVIDER [ double precision, mcscf_fock_beta_ao, (ao_num, ao_num)] + + BEGIN_PROVIDER [ double precision, mcscf_fock_alpha_ao, (ao_num, ao_num)] +&BEGIN_PROVIDER [ double precision, mcscf_fock_beta_ao, (ao_num, ao_num)] implicit none BEGIN_DOC - ! mcscf_fock_alpha_ao are set to usual Fock like operator but computed with the MCSCF densities on the AO basis + ! mcscf_fock_alpha_ao are set to usual Fock like operator but computed with the MCSCF densities on the AO basis END_DOC SCF_density_matrix_ao_alpha = D0tu_alpha_ao SCF_density_matrix_ao_beta = D0tu_beta_ao - soft_touch SCF_density_matrix_ao_alpha SCF_density_matrix_ao_beta + soft_touch SCF_density_matrix_ao_alpha SCF_density_matrix_ao_beta mcscf_fock_beta_ao = fock_matrix_ao_beta mcscf_fock_alpha_ao = fock_matrix_ao_alpha -END_PROVIDER +END_PROVIDER - BEGIN_PROVIDER [ double precision, mcscf_fock_alpha_mo, (mo_num, mo_num)] -&BEGIN_PROVIDER [ double precision, mcscf_fock_beta_mo, (mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, mcscf_fock_alpha_mo, (mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, mcscf_fock_beta_mo, (mo_num, mo_num)] implicit none BEGIN_DOC - ! Mo_mcscf_fock_alpha are set to usual Fock like operator but computed with the MCSCF densities on the MO basis + ! Mo_mcscf_fock_alpha are set to usual Fock like operator but computed with the MCSCF densities on the MO basis END_DOC call ao_to_mo(mcscf_fock_alpha_ao,ao_num,mcscf_fock_alpha_mo,mo_num) call ao_to_mo(mcscf_fock_beta_ao,ao_num,mcscf_fock_beta_mo,mo_num) -END_PROVIDER +END_PROVIDER BEGIN_PROVIDER [ double precision, mcscf_fock_mo, (mo_num,mo_num) ] &BEGIN_PROVIDER [ double precision, mcscf_fock_diag_mo, (mo_num)] @@ -118,13 +148,13 @@ END_PROVIDER ! |-----------------------| ! | Fcv | F^a | Rvv | ! - ! C: Core, O: Open, V: Virtual - ! + ! C: Core, O: Open, V: Virtual + ! ! Rcc = Acc Fcc^a + Bcc Fcc^b ! Roo = Aoo Foo^a + Boo Foo^b ! Rvv = Avv Fvv^a + Bvv Fvv^b ! Fcv = (F^a + F^b)/2 - ! + ! ! F^a: Fock matrix alpha (MO), F^b: Fock matrix beta (MO) ! A,B: Coupling parameters ! @@ -133,7 +163,7 @@ END_PROVIDER ! cc oo vv ! A -0.5 0.5 1.5 ! B 1.5 0.5 -0.5 - ! + ! END_DOC integer :: i,j,n if (elec_alpha_num == elec_beta_num) then @@ -194,4 +224,4 @@ END_PROVIDER do i = 1, mo_num mcscf_fock_diag_mo(i) = mcscf_fock_mo(i,i) enddo -END_PROVIDER +END_PROVIDER From 077f32836b294f4c17d340a48528e1fa2082b9da Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Feb 2025 12:52:33 +0100 Subject: [PATCH 20/59] DGEMV in mcscf_fock --- src/casscf_cipsi/mcscf_fock.irp.f | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/casscf_cipsi/mcscf_fock.irp.f b/src/casscf_cipsi/mcscf_fock.irp.f index 738dee2c..87cc062c 100644 --- a/src/casscf_cipsi/mcscf_fock.irp.f +++ b/src/casscf_cipsi/mcscf_fock.irp.f @@ -13,7 +13,7 @@ BEGIN_PROVIDER [real*8, Fipq, (mo_num,mo_num) ] ! the inactive Fock matrix do k=1,n_core_inact_orb - kk=list_core_inact_act(k) + kk=list_core_inact(k) ! do q=1,mo_num ! do p=1,mo_num ! do i=1,cholesky_mo_num @@ -21,10 +21,11 @@ BEGIN_PROVIDER [real*8, Fipq, (mo_num,mo_num) ] ! enddo ! end do ! end do - call dgemm('T','N', mo_num*mo_num, 1, cholesky_mo_num, 2.d0, & + call dgemv('T', cholesky_mo_num, mo_num*mo_num, 2.d0, & cholesky_no_total_transp, cholesky_mo_num, & - cholesky_no_total_transp(1,kk,kk), cholesky_mo_num, 1.d0, & - Fipq, mo_num*mo_num) + cholesky_no_total_transp(1,kk,kk), 1, 1.d0, & + Fipq, 1) + ! do q=1,mo_num ! do p=1,mo_num ! do i=1,cholesky_mo_num @@ -73,10 +74,10 @@ BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ] ! enddo ! end do ! end do - call dgemm('T','N', mo_num*mo_num, 1, cholesky_mo_num, occnum(tt), & + call dgemv('T', cholesky_mo_num, mo_num*mo_num, occnum(tt), & cholesky_no_total_transp, cholesky_mo_num, & - cholesky_no_total_transp(1,tt,tt), cholesky_mo_num, 1.d0, & - Fapq, mo_num*mo_num) + cholesky_no_total_transp(1,tt,tt), 1, 1.d0, & + Fapq, 1) ! do q=1,mo_num ! do p=1,mo_num ! do i=1,cholesky_mo_num From 410ed1d5625c2f7837f9b94ea8ac455c8a70bf29 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Feb 2025 14:18:02 +0100 Subject: [PATCH 21/59] Fast 4idx in CASSCF --- src/casscf_cipsi/gradient.irp.f | 59 ++++++++------- src/casscf_cipsi/hessian.irp.f | 75 +++++++++---------- src/casscf_cipsi/natorb.irp.f | 102 ++++++-------------------- src/mo_two_e_ints/map_integrals.irp.f | 2 - 4 files changed, 92 insertions(+), 146 deletions(-) diff --git a/src/casscf_cipsi/gradient.irp.f b/src/casscf_cipsi/gradient.irp.f index 961d260d..cd608fb3 100644 --- a/src/casscf_cipsi/gradient.irp.f +++ b/src/casscf_cipsi/gradient.irp.f @@ -14,8 +14,8 @@ END_PROVIDER implicit none n_c_a_prov = n_core_inact_orb * n_act_orb n_c_v_prov = n_core_inact_orb * n_virt_orb - n_a_v_prov = n_act_orb * n_virt_orb - END_PROVIDER + n_a_v_prov = n_act_orb * n_virt_orb + END_PROVIDER BEGIN_PROVIDER [integer, excit, (2,nMonoEx)] &BEGIN_PROVIDER [character*3, excit_class, (nMonoEx)] @@ -28,7 +28,7 @@ END_PROVIDER BEGIN_DOC ! a list of the orbitals involved in the excitation END_DOC - + implicit none integer :: i,t,a,ii,tt,aa,indx,indx_tmp indx=0 @@ -48,7 +48,7 @@ END_PROVIDER mat_idx_c_a(ii,tt) = indx end do end do - + indx_tmp = 0 do ii=1,n_core_inact_orb i=list_core_inact(ii) @@ -61,11 +61,11 @@ END_PROVIDER indx_tmp += 1 list_idx_c_v(1,indx_tmp) = indx list_idx_c_v(2,indx_tmp) = ii - list_idx_c_v(3,indx_tmp) = aa + list_idx_c_v(3,indx_tmp) = aa mat_idx_c_v(ii,aa) = indx end do end do - + indx_tmp = 0 do tt=1,n_act_orb t=list_act(tt) @@ -82,7 +82,7 @@ END_PROVIDER mat_idx_a_v(tt,aa) = indx end do end do - + if (bavard) then write(6,*) ' Filled the table of the Monoexcitations ' do indx=1,nMonoEx @@ -90,7 +90,7 @@ END_PROVIDER ,excit(2,indx),' ',excit_class(indx) end do end if - + END_PROVIDER BEGIN_PROVIDER [real*8, gradvec2, (nMonoEx)] @@ -104,7 +104,7 @@ END_PROVIDER implicit none integer :: i,t,a,indx real*8 :: gradvec_it,gradvec_ia,gradvec_ta - + indx=0 norm_grad_vec2_tab = 0.d0 do i=1,n_core_inact_orb @@ -114,7 +114,7 @@ END_PROVIDER norm_grad_vec2_tab(1) += gradvec2(indx)*gradvec2(indx) end do end do - + do i=1,n_core_inact_orb do a=1,n_virt_orb indx+=1 @@ -122,7 +122,7 @@ END_PROVIDER norm_grad_vec2_tab(2) += gradvec2(indx)*gradvec2(indx) end do end do - + do t=1,n_act_orb do a=1,n_virt_orb indx+=1 @@ -130,7 +130,7 @@ END_PROVIDER norm_grad_vec2_tab(3) += gradvec2(indx)*gradvec2(indx) end do end do - + norm_grad_vec2=0.d0 do indx=1,nMonoEx norm_grad_vec2+=gradvec2(indx)*gradvec2(indx) @@ -144,7 +144,7 @@ END_PROVIDER write(6,*) ' Norm of the orbital gradient (via D, P and integrals): ', norm_grad_vec2 write(6,*) endif - + END_PROVIDER real*8 function gradvec_it(i,t) @@ -154,23 +154,30 @@ real*8 function gradvec_it(i,t) END_DOC implicit none integer :: i,t - + integer :: ii,tt,v,vv,x,y integer :: x3,y3 double precision :: bielec_PQxx_no - + ii=list_core_inact(i) tt=list_act(t) gradvec_it=2.D0*(Fipq(tt,ii)+Fapq(tt,ii)) gradvec_it-=occnum(tt)*Fipq(ii,tt) - do v=1,n_act_orb ! active - vv=list_act(v) - do x=1,n_act_orb ! active - x3=x+n_core_inact_orb ! list_act(x) - do y=1,n_act_orb ! active - y3=y+n_core_inact_orb ! list_act(y) + do y=1,n_act_orb ! active +! y3=y+n_core_inact_orb ! list_act(y) + do x=1,n_act_orb ! active +! x3=x+n_core_inact_orb ! list_act(x) + do v=1,n_act_orb ! active + vv=list_act(v) ! Gamma(2) a a a a 1/r12 i a a a - gradvec_it-=2.D0*P0tuvx_no(t,v,x,y)*bielec_PQxx_no(ii,vv,x3,y3) +! gradvec_it-=2.D0*P0tuvx_no(t,v,x,y)*bielec_PQxx_no(ii,vv,x3,y3) + integer :: ichol + double precision :: tmp + tmp = 0.d0 + do ichol=1,cholesky_mo_num + tmp = tmp + cholesky_no_total_transp(ichol,vv,ii) * cholesky_no_total_transp(ichol,list_act(x),list_act(y)) + enddo + gradvec_it = gradvec_it - 2.D0*P0tuvx_no(t,v,x,y)*tmp end do end do end do @@ -183,12 +190,12 @@ real*8 function gradvec_ia(i,a) END_DOC implicit none integer :: i,a,ii,aa - + ii=list_core_inact(i) aa=list_virt(a) gradvec_ia=2.D0*(Fipq(aa,ii)+Fapq(aa,ii)) gradvec_ia*=2.D0 - + end function gradvec_ia real*8 function gradvec_ta(t,a) @@ -198,7 +205,7 @@ real*8 function gradvec_ta(t,a) END_DOC implicit none integer :: t,a,tt,aa,v,vv,x,y - + tt=list_act(t) aa=list_virt(a) gradvec_ta=0.D0 @@ -211,6 +218,6 @@ real*8 function gradvec_ta(t,a) end do end do gradvec_ta*=2.D0 - + end function gradvec_ta diff --git a/src/casscf_cipsi/hessian.irp.f b/src/casscf_cipsi/hessian.irp.f index 1ee073d2..431a6979 100644 --- a/src/casscf_cipsi/hessian.irp.f +++ b/src/casscf_cipsi/hessian.irp.f @@ -11,13 +11,14 @@ real*8 function hessmat_itju(i,t,j,u) integer :: i,t,j,u,ii,tt,uu,v,vv,x,xx,y,jj real*8 :: term,t2 double precision :: bielec_pqxx_no,bielec_pxxq_no - + ii=list_core_inact(i) tt=list_act(t) if (i.eq.j) then if (t.eq.u) then ! diagonal element - term=occnum(tt)*Fipq(ii,ii)+2.D0*(Fipq(tt,tt)+Fapq(tt,tt)) & + term = occnum(tt)*Fipq(ii,ii) + & + 2.D0*(Fipq(tt,tt)+Fapq(tt,tt)) & -2.D0*(Fipq(ii,ii)+Fapq(ii,ii)) term+=2.D0*(3.D0*bielec_pxxq_no(tt,i,i,tt)-bielec_pqxx_no(tt,tt,i,i)) term-=2.D0*occnum(tt)*(3.D0*bielec_pxxq_no(tt,i,i,tt) & @@ -83,10 +84,10 @@ real*8 function hessmat_itju(i,t,j,u) end do end do end if - + term*=2.D0 hessmat_itju=term - + end function hessmat_itju real*8 function hessmat_itja(i,t,j,a) @@ -97,7 +98,7 @@ real*8 function hessmat_itja(i,t,j,a) integer :: i,t,j,a,ii,tt,jj,aa,v,vv,x,y real*8 :: term double precision :: bielec_pqxx_no,bielec_pxxq_no - + ! it/ja ii=list_core_inact(i) tt=list_act(t) @@ -120,7 +121,7 @@ real*8 function hessmat_itja(i,t,j,a) end if term*=2.D0 hessmat_itja=term - + end function hessmat_itja real*8 function hessmat_itua(i,t,u,a) @@ -131,7 +132,7 @@ real*8 function hessmat_itua(i,t,u,a) integer :: i,t,u,a,ii,tt,uu,aa,v,vv,x,xx,u3,t3,v3 real*8 :: term double precision :: bielec_pqxx_no,bielec_pxxq_no - + ii=list_core_inact(i) tt=list_act(t) t3=t+n_core_inact_orb @@ -162,7 +163,7 @@ real*8 function hessmat_itua(i,t,u,a) end if term*=2.D0 hessmat_itua=term - + end function hessmat_itua real*8 function hessmat_iajb(i,a,j,b) @@ -173,7 +174,7 @@ real*8 function hessmat_iajb(i,a,j,b) integer :: i,a,j,b,ii,aa,jj,bb real*8 :: term double precision :: bielec_pqxx_no,bielec_pxxq_no - + ii=list_core_inact(i) aa=list_virt(a) if (i.eq.j) then @@ -199,7 +200,7 @@ real*8 function hessmat_iajb(i,a,j,b) end if term*=2.D0 hessmat_iajb=term - + end function hessmat_iajb real*8 function hessmat_iatb(i,a,t,b) @@ -210,7 +211,7 @@ real*8 function hessmat_iatb(i,a,t,b) integer :: i,a,t,b,ii,aa,tt,bb,v,vv,x,y,v3,t3 real*8 :: term double precision :: bielec_pqxx_no,bielec_pxxq_no - + ii=list_core_inact(i) aa=list_virt(a) tt=list_act(t) @@ -231,7 +232,7 @@ real*8 function hessmat_iatb(i,a,t,b) end if term*=2.D0 hessmat_iatb=term - + end function hessmat_iatb real*8 function hessmat_taub(t,a,u,b) @@ -243,7 +244,7 @@ real*8 function hessmat_taub(t,a,u,b) integer :: v3,x3 real*8 :: term,t1,t2,t3 double precision :: bielec_pqxx_no,bielec_pxxq_no - + tt=list_act(t) aa=list_virt(a) if (t == u) then @@ -311,12 +312,12 @@ real*8 function hessmat_taub(t,a,u,b) end do end do end if - + end if - + term*=2.D0 hessmat_taub=term - + end function hessmat_taub BEGIN_PROVIDER [real*8, hessdiag, (nMonoEx)] @@ -326,7 +327,7 @@ BEGIN_PROVIDER [real*8, hessdiag, (nMonoEx)] implicit none integer :: i,t,a,indx,indx_shift real*8 :: hessmat_itju,hessmat_iajb,hessmat_taub - + !$OMP PARALLEL DEFAULT(NONE) & !$OMP SHARED(hessdiag,n_core_inact_orb,n_act_orb,n_virt_orb,nMonoEx) & !$OMP PRIVATE(i,indx,t,a,indx_shift) @@ -339,9 +340,9 @@ BEGIN_PROVIDER [real*8, hessdiag, (nMonoEx)] end do end do !$OMP END DO NOWAIT - + indx_shift = n_core_inact_orb*n_act_orb - !$OMP DO + !$OMP DO do a=1,n_virt_orb do i=1,n_core_inact_orb indx = a + (i-1)*n_virt_orb + indx_shift @@ -349,9 +350,9 @@ BEGIN_PROVIDER [real*8, hessdiag, (nMonoEx)] end do end do !$OMP END DO NOWAIT - + indx_shift += n_core_inact_orb*n_virt_orb - !$OMP DO + !$OMP DO do a=1,n_virt_orb do t=1,n_act_orb indx = a + (t-1)*n_virt_orb + indx_shift @@ -360,7 +361,7 @@ BEGIN_PROVIDER [real*8, hessdiag, (nMonoEx)] end do !$OMP END DO !$OMP END PARALLEL - + END_PROVIDER @@ -377,7 +378,7 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)] real*8 :: hessmat_taub ! c-a c-v a-v ! c-a | X X X - ! c-v | X X + ! c-v | X X ! a-v | X provide all_mo_integrals @@ -390,12 +391,12 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)] !$OMP DO !!!! < Core-active| H |Core-active > - ! Core-active excitations + ! Core-active excitations do indx_tmp = 1, n_c_a_prov indx = list_idx_c_a(1,indx_tmp) i = list_idx_c_a(2,indx_tmp) t = list_idx_c_a(3,indx_tmp) - ! Core-active excitations + ! Core-active excitations do j = 1, n_core_inact_orb if (i.eq.j) then ustart=t @@ -418,12 +419,12 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)] !$OMP DO !!!! < Core-active| H |Core-VIRTUAL > - ! Core-active excitations + ! Core-active excitations do indx_tmp = 1, n_c_a_prov indx = list_idx_c_a(1,indx_tmp) i = list_idx_c_a(2,indx_tmp) t = list_idx_c_a(3,indx_tmp) - ! Core-VIRTUAL excitations + ! Core-VIRTUAL excitations do jndx_tmp = 1, n_c_v_prov jndx = list_idx_c_v(1,jndx_tmp) j = list_idx_c_v(2,jndx_tmp) @@ -441,12 +442,12 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)] !$OMP DO !!!! < Core-active| H |ACTIVE-VIRTUAL > - ! Core-active excitations + ! Core-active excitations do indx_tmp = 1, n_c_a_prov indx = list_idx_c_a(1,indx_tmp) i = list_idx_c_a(2,indx_tmp) t = list_idx_c_a(3,indx_tmp) - ! ACTIVE-VIRTUAL excitations + ! ACTIVE-VIRTUAL excitations do jndx_tmp = 1, n_a_v_prov jndx = list_idx_a_v(1,jndx_tmp) u = list_idx_a_v(2,jndx_tmp) @@ -466,12 +467,12 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)] !$OMP PRIVATE(indx_tmp,indx,i,a,j,b,bstart,jndx) !$OMP DO !!!!! < Core-VIRTUAL | H |Core-VIRTUAL > - ! Core-VIRTUAL excitations + ! Core-VIRTUAL excitations do indx_tmp = 1, n_c_v_prov indx = list_idx_c_v(1,indx_tmp) i = list_idx_c_v(2,indx_tmp) a = list_idx_c_v(3,indx_tmp) - ! Core-VIRTUAL excitations + ! Core-VIRTUAL excitations do j = 1, n_core_inact_orb if (i.eq.j) then bstart=a @@ -485,7 +486,7 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)] enddo enddo enddo - + !$OMP END DO NOWAIT !$OMP END PARALLEL endif @@ -496,12 +497,12 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)] !$OMP DO !!!! < Core-VIRTUAL | H |Active-VIRTUAL > - ! Core-VIRTUAL excitations + ! Core-VIRTUAL excitations do indx_tmp = 1, n_c_v_prov indx = list_idx_c_v(1,indx_tmp) i = list_idx_c_v(2,indx_tmp) a = list_idx_c_v(3,indx_tmp) - ! Active-VIRTUAL excitations + ! Active-VIRTUAL excitations do jndx_tmp = 1, n_a_v_prov jndx = list_idx_a_v(1,jndx_tmp) t = list_idx_a_v(2,jndx_tmp) @@ -520,12 +521,12 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)] !$OMP DO !!!! < Active-VIRTUAL | H |Active-VIRTUAL > - ! Active-VIRTUAL excitations + ! Active-VIRTUAL excitations do indx_tmp = 1, n_a_v_prov indx = list_idx_a_v(1,indx_tmp) t = list_idx_a_v(2,indx_tmp) a = list_idx_a_v(3,indx_tmp) - ! Active-VIRTUAL excitations + ! Active-VIRTUAL excitations do u=t,n_act_orb if (t.eq.u) then bstart=a @@ -542,4 +543,4 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)] !$OMP END DO NOWAIT !$OMP END PARALLEL -END_PROVIDER +END_PROVIDER diff --git a/src/casscf_cipsi/natorb.irp.f b/src/casscf_cipsi/natorb.irp.f index 9ce90304..6376308d 100644 --- a/src/casscf_cipsi/natorb.irp.f +++ b/src/casscf_cipsi/natorb.irp.f @@ -72,84 +72,27 @@ BEGIN_PROVIDER [real*8, P0tuvx_no, (n_act_orb,n_act_orb,n_act_orb,n_act_orb)] BEGIN_DOC ! 4-index transformation of 2part matrices END_DOC - integer :: i,j,k,l,p,q - real*8 :: d(n_act_orb) - ! index per index - ! first quarter - P0tuvx_no(:,:,:,:) = P0tuvx(:,:,:,:) + double precision, allocatable :: tmp(:,:,:,:) + allocate(tmp(n_act_orb,n_act_orb,n_act_orb,n_act_orb)) - do j=1,n_act_orb - do k=1,n_act_orb - do l=1,n_act_orb - do p=1,n_act_orb - d(p)=0.D0 - end do - do p=1,n_act_orb - do q=1,n_act_orb - d(p)+=P0tuvx_no(q,j,k,l)*natorbsCI(q,p) - end do - end do - do p=1,n_act_orb - P0tuvx_no(p,j,k,l)=d(p) - end do - end do - end do - end do - ! 2nd quarter - do j=1,n_act_orb - do k=1,n_act_orb - do l=1,n_act_orb - do p=1,n_act_orb - d(p)=0.D0 - end do - do p=1,n_act_orb - do q=1,n_act_orb - d(p)+=P0tuvx_no(j,q,k,l)*natorbsCI(q,p) - end do - end do - do p=1,n_act_orb - P0tuvx_no(j,p,k,l)=d(p) - end do - end do - end do - end do - ! 3rd quarter - do j=1,n_act_orb - do k=1,n_act_orb - do l=1,n_act_orb - do p=1,n_act_orb - d(p)=0.D0 - end do - do p=1,n_act_orb - do q=1,n_act_orb - d(p)+=P0tuvx_no(j,k,q,l)*natorbsCI(q,p) - end do - end do - do p=1,n_act_orb - P0tuvx_no(j,k,p,l)=d(p) - end do - end do - end do - end do - ! 4th quarter - do j=1,n_act_orb - do k=1,n_act_orb - do l=1,n_act_orb - do p=1,n_act_orb - d(p)=0.D0 - end do - do p=1,n_act_orb - do q=1,n_act_orb - d(p)+=P0tuvx_no(j,k,l,q)*natorbsCI(q,p) - end do - end do - do p=1,n_act_orb - P0tuvx_no(j,k,l,p)=d(p) - end do - end do - end do - end do + call dgemm('T','N',(n_act_orb*n_act_orb*n_act_orb), n_act_orb, n_act_orb, 1.d0, & + P0tuvx, n_act_orb, natorbsCI, n_act_orb, 0.d0, & + tmp, (n_act_orb*n_act_orb*n_act_orb)) + + call dgemm('T','N',(n_act_orb*n_act_orb*n_act_orb), n_act_orb, n_act_orb, 1.d0, & + tmp, n_act_orb, natorbsCI, n_act_orb, 0.d0, & + P0tuvx_no, (n_act_orb*n_act_orb*n_act_orb)) + + call dgemm('T','N',(n_act_orb*n_act_orb*n_act_orb), n_act_orb, n_act_orb, 1.d0, & + P0tuvx_no, n_act_orb, natorbsCI, n_act_orb, 0.d0, & + tmp, (n_act_orb*n_act_orb*n_act_orb)) + + call dgemm('T','N',(n_act_orb*n_act_orb*n_act_orb), n_act_orb, n_act_orb, 1.d0, & + tmp, n_act_orb, natorbsCI, n_act_orb, 0.d0, & + P0tuvx_no, (n_act_orb*n_act_orb*n_act_orb)) + + deallocate(tmp) END_PROVIDER @@ -160,6 +103,7 @@ BEGIN_PROVIDER [real*8, one_ints_no, (mo_num,mo_num)] BEGIN_DOC ! Transformed one-e integrals END_DOC + integer :: i,j, p, q real*8 :: d(n_act_orb) one_ints_no(:,:)=mo_one_e_integrals(:,:) @@ -168,10 +112,8 @@ BEGIN_PROVIDER [real*8, one_ints_no, (mo_num,mo_num)] do j=1,mo_num do p=1,n_act_orb d(p)=0.D0 - end do - do p=1,n_act_orb do q=1,n_act_orb - d(p)+=one_ints_no(list_act(q),j)*natorbsCI(q,p) + d(p) = d(p) + one_ints_no(list_act(q),j)*natorbsCI(q,p) end do end do do p=1,n_act_orb @@ -183,8 +125,6 @@ BEGIN_PROVIDER [real*8, one_ints_no, (mo_num,mo_num)] do j=1,mo_num do p=1,n_act_orb d(p)=0.D0 - end do - do p=1,n_act_orb do q=1,n_act_orb d(p)+=one_ints_no(j,list_act(q))*natorbsCI(q,p) end do diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index b5f78b7b..5a005d7b 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -86,10 +86,8 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:mo_integrals_cache_s call set_multiple_levels_omp(.False.) - !$OMP PARALLEL DO PRIVATE(k,l,ii) SCHEDULE(dynamic) do l=mo_integrals_cache_min,mo_integrals_cache_max - print *, l do k=mo_integrals_cache_min,mo_integrals_cache_max ii = int(l-mo_integrals_cache_min,8) ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) From 8975617bf27e55f54d33c473a4c2ab927b6a9719 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Feb 2025 15:45:26 +0100 Subject: [PATCH 22/59] Dgemmized hessmat_taub --- src/casscf_cipsi/hessian.irp.f | 173 ++++++++++++++++++++++++++------- 1 file changed, 138 insertions(+), 35 deletions(-) diff --git a/src/casscf_cipsi/hessian.irp.f b/src/casscf_cipsi/hessian.irp.f index 431a6979..1bcb64b0 100644 --- a/src/casscf_cipsi/hessian.irp.f +++ b/src/casscf_cipsi/hessian.irp.f @@ -241,73 +241,176 @@ real*8 function hessmat_taub(t,a,u,b) END_DOC implicit none integer :: t,a,u,b,tt,aa,uu,bb,v,vv,x,xx,y - integer :: v3,x3 - real*8 :: term,t1,t2,t3 + integer :: v3,x3, ichol + real*8 :: term,t1,t2,t3, tmp double precision :: bielec_pqxx_no,bielec_pxxq_no + double precision, allocatable :: tmp1(:), tmp2(:,:) + allocate(tmp1(n_act_orb)) + allocate(tmp2(n_act_orb,n_act_orb)) + tt=list_act(t) aa=list_virt(a) + if (t == u) then if (a == b) then ! ta/ta - t1=occnum(tt)*Fipq(aa,aa) + t1=occnum(tt)*Fipq(aa,aa) - occnum(tt)*Fipq(tt,tt) + t2=0.D0 - t3=0.D0 - t1-=occnum(tt)*Fipq(tt,tt) +! do x=1,n_act_orb +! x3=x+n_core_inact_orb +! do v=1,n_act_orb +! v3=v+n_core_inact_orb +! tmp = 0.d0 +! do ichol = 1, cholesky_mo_num +! tmp = tmp + cholesky_no_total_transp(ichol,aa,aa) * cholesky_no_total_transp(ichol,v3,x3) +! enddo +! t2 = t2 + 2.D0*P0tuvx_no(t,t,v,x)*tmp +! enddo +! enddo + + do x=1,n_act_orb + x3=x+n_core_inact_orb + call dgemv('T', cholesky_mo_num, n_act_orb, 2.d0, & + cholesky_no_total_transp(1,n_core_inact_orb+1,x3), cholesky_mo_num, & + cholesky_no_total_transp(1,aa,aa), 1, 0.d0, & + tmp1, 1) + do v=1,n_act_orb + t2 = t2 + P0tuvx_no(t,t,v,x)*tmp1(v) + enddo + enddo +! do v=1,n_act_orb +! v3=v+n_core_inact_orb +! do x=1,n_act_orb +! x3=x+n_core_inact_orb +! tmp = 0.d0 +! do ichol = 1, cholesky_mo_num +! tmp = tmp + cholesky_no_total_transp(ichol,aa,x3) * cholesky_no_total_transp(ichol,v3,aa) +! enddo +! t2 = t2 + 2.d0*(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))*tmp +! end do +! end do + call dgemm('T','N', n_act_orb, n_act_orb, cholesky_mo_num, 2.d0, & + cholesky_no_total_transp(1,n_core_inact_orb+1,aa), cholesky_mo_num, & + cholesky_no_total_transp(1,n_core_inact_orb+1,aa), cholesky_mo_num, 0.d0, & + tmp2, n_act_orb) do v=1,n_act_orb - vv=list_act(v) - v3=v+n_core_inact_orb do x=1,n_act_orb - xx=list_act(x) - x3=x+n_core_inact_orb - t2+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,aa,v3,x3) & - +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))* & - bielec_pxxq_no(aa,x3,v3,aa)) - do y=1,n_act_orb - t3-=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(t,v,y,xx) + t2 = t2 + P0tuvx_no(t,x,v,t)*tmp2(x,v) + P0tuvx_no(t,x,t,v)*tmp2(x,v) + enddo + enddo + + t3=0.D0 + do x=1,n_act_orb + xx=list_act(x) + do y=1,n_act_orb + do v=1,n_act_orb + t3 = t3 - P0tuvx_no(t,v,x,y)*bielecCI_no(v,t,y,xx) end do end do end do - term=t1+t2+t3 + term=t1+t2+t3*2.d0 + else + bb=list_virt(b) ! ta/tb b/=a term=occnum(tt)*Fipq(aa,bb) +! do v=1,n_act_orb +! vv=list_act(v) +! v3=v+n_core_inact_orb +! do x=1,n_act_orb +! xx=list_act(x) +! x3=x+n_core_inact_orb +! term+=2.D0*P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,bb,v3,x3) +! end do +! end do + do x=1,n_act_orb + x3=x+n_core_inact_orb + call dgemv('T', cholesky_mo_num, n_act_orb, 2.d0, & + cholesky_no_total_transp(1,n_core_inact_orb+1,x3), cholesky_mo_num, & + cholesky_no_total_transp(1,aa,bb), 1, 0.d0, & + tmp1, 1) + do v=1,n_act_orb + term = term + P0tuvx_no(t,t,v,x)*tmp1(v) + enddo + enddo + +! do v=1,n_act_orb +! vv=list_act(v) +! v3=v+n_core_inact_orb +! do x=1,n_act_orb +! xx=list_act(x) +! x3=x+n_core_inact_orb +! term+=2.d0*(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))*bielec_pxxq_no(aa,x3,v3,bb) +! end do +! end do + call dgemm('T','N', n_act_orb, n_act_orb, cholesky_mo_num, 2.d0, & + cholesky_no_total_transp(1,n_core_inact_orb+1,aa), cholesky_mo_num, & + cholesky_no_total_transp(1,n_core_inact_orb+1,bb), cholesky_mo_num, 0.d0, & + tmp2, n_act_orb) do v=1,n_act_orb - vv=list_act(v) - v3=v+n_core_inact_orb do x=1,n_act_orb - xx=list_act(x) - x3=x+n_core_inact_orb - term+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,bb,v3,x3) & - +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v)) & - *bielec_pxxq_no(aa,x3,v3,bb)) - end do - end do + term = term + P0tuvx_no(t,x,v,t)*tmp2(x,v) + P0tuvx_no(t,x,t,v)*tmp2(x,v) + enddo + enddo + end if + else + ! ta/ub t/=u uu=list_act(u) bb=list_virt(b) + term=0.D0 +! do v=1,n_act_orb +! vv=list_act(v) +! v3=v+n_core_inact_orb +! do x=1,n_act_orb +! xx=list_act(x) +! x3=x+n_core_inact_orb +! term+=2.D0*(P0tuvx_no(t,u,v,x)*bielec_pqxx_no(aa,bb,v3,x3) +! end do +! end do + do x=1,n_act_orb + x3=x+n_core_inact_orb + call dgemv('T', cholesky_mo_num, n_act_orb, 2.d0, & + cholesky_no_total_transp(1,n_core_inact_orb+1,x3), cholesky_mo_num, & + cholesky_no_total_transp(1,aa,bb), 1, 0.d0, & + tmp1, 1) + do v=1,n_act_orb + term = term + P0tuvx_no(t,u,v,x)*tmp1(v) + enddo + enddo + +! do v=1,n_act_orb +! vv=list_act(v) +! v3=v+n_core_inact_orb +! do x=1,n_act_orb +! xx=list_act(x) +! x3=x+n_core_inact_orb +! term+=2.D0*(P0tuvx_no(t,x,v,u)+P0tuvx_no(t,x,u,v))*bielec_pxxq_no(aa,x3,v3,bb) +! end do +! end do + call dgemm('T','N', n_act_orb, n_act_orb, cholesky_mo_num, 2.d0, & + cholesky_no_total_transp(1,n_core_inact_orb+1,aa), cholesky_mo_num, & + cholesky_no_total_transp(1,n_core_inact_orb+1,bb), cholesky_mo_num, 0.d0, & + tmp2, n_act_orb) do v=1,n_act_orb - vv=list_act(v) - v3=v+n_core_inact_orb do x=1,n_act_orb - xx=list_act(x) - x3=x+n_core_inact_orb - term+=2.D0*(P0tuvx_no(t,u,v,x)*bielec_pqxx_no(aa,bb,v3,x3) & - +(P0tuvx_no(t,x,v,u)+P0tuvx_no(t,x,u,v)) & - *bielec_pxxq_no(aa,x3,v3,bb)) - end do - end do + term = term + P0tuvx_no(t,x,v,u)*tmp2(x,v)+P0tuvx_no(t,x,u,v)*tmp2(x,v) + enddo + enddo + if (a.eq.b) then term-=0.5D0*(occnum(tt)*Fipq(uu,tt)+occnum(uu)*Fipq(tt,uu)) do v=1,n_act_orb do y=1,n_act_orb do x=1,n_act_orb - term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,uu) - term-=P0tuvx_no(u,v,x,y)*bielecCI_no(x,y,v,tt) + term = term - P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,uu) & + - P0tuvx_no(u,v,x,y)*bielecCI_no(x,y,v,tt) end do end do end do From 4ecb15a727373d6a29f122dc165babaed237ffe9 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Feb 2025 19:17:43 +0100 Subject: [PATCH 23/59] Trying to fix Davidson in CASSCF --- src/dav_general_mat/dav_general.irp.f | 41 ++++++++++++--------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/src/dav_general_mat/dav_general.irp.f b/src/dav_general_mat/dav_general.irp.f index a277d9ef..114476c2 100644 --- a/src/dav_general_mat/dav_general.irp.f +++ b/src/dav_general_mat/dav_general.irp.f @@ -82,7 +82,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv nproc_target = nproc double precision :: rss integer :: maxab - maxab = sze + maxab = sze m=1 disk_based = .False. @@ -204,7 +204,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv u_in(i,k) = r1*dcos(r2) enddo enddo - ! Normalize all states + ! Normalize all states do k=1,N_st_diag call normalize(u_in(:,k),sze) enddo @@ -228,20 +228,13 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv shift = N_st_diag*(iter-1) shift2 = N_st_diag*iter - if ((iter > 1).or.(itertot == 1)) then - ! Compute |W_k> = \sum_i |i> - ! ----------------------------------- + ! Compute |W_k> = \sum_i |i> + ! ----------------------------------- - ! Gram-Smitt to orthogonalize all new guess with the previous vectors - call ortho_qr(U,size(U,1),sze,shift2) - call ortho_qr(U,size(U,1),sze,shift2) + ! Gram-Smitt to orthogonalize all new guess with the previous vectors + call ortho_qr(U,size(U,1),sze,shift2) -! call H_S2_u_0_nstates_openmp(W(:,shift+1),U(:,shift+1),N_st_diag,sze) - call hpsi(W(:,shift+1),U(:,shift+1),N_st_diag,sze,h_mat) - else - ! Already computed in update below - continue - endif + call hpsi(W(:,shift+1),U(:,shift+1),N_st_diag,sze,h_mat) ! Compute h_kl = = ! ------------------------------------------- @@ -311,12 +304,12 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv do i=1,sze U(i,shift2+k) = & (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & - /max(H_jj(i) - lambda (k),1.d-2) + /max(dabs(H_jj(i) - lambda (k)),1.d-2) * dsign(1d0,H_jj(i) - lambda (k)) enddo if (k <= N_st) then residual_norm(k) = u_dot_u(U(:,shift2+k),sze) - to_print(1,k) = lambda(k) + to_print(1,k) = lambda(k) to_print(2,k) = residual_norm(k) endif enddo @@ -324,7 +317,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv if ((itertot>1).and.(iter == 1)) then - !don't print + !don't print continue else write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) @@ -333,11 +326,11 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv ! Check convergence if (iter > 1) then converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson - endif - + endif + do k=1,N_st - if (residual_norm(k) > 1.e8) then + if (residual_norm(k) > 1.d8) then print *, 'Davidson failed' stop -1 endif @@ -365,13 +358,15 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv call dgemm('N','N', sze, N_st_diag, shift2, 1.d0, & U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + do k=1,N_st_diag do i=1,sze U(i,k) = u_in(i,k) enddo enddo - call ortho_qr(U,size(U,1),sze,N_st_diag) - call ortho_qr(U,size(U,1),sze,N_st_diag) + + call ortho_qr(U,size(U,1),sze,N_st_diag) + do j=1,N_st_diag k=1 do while ((k Date: Fri, 7 Feb 2025 18:38:52 +0100 Subject: [PATCH 24/59] added mo_in_r on extra grid --- src/dft_utils_in_r/ao_in_r.irp.f | 6 ++--- src/dft_utils_in_r/mo_in_r.irp.f | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/src/dft_utils_in_r/ao_in_r.irp.f b/src/dft_utils_in_r/ao_in_r.irp.f index e9c003d4..c8822776 100644 --- a/src/dft_utils_in_r/ao_in_r.irp.f +++ b/src/dft_utils_in_r/ao_in_r.irp.f @@ -185,9 +185,9 @@ END_PROVIDER END_PROVIDER BEGIN_PROVIDER[double precision, aos_in_r_array_extra, (ao_num,n_points_extra_final_grid)] - implicit none + implicit none BEGIN_DOC - ! aos_in_r_array_extra(i,j) = value of the ith ao on the jth grid point + ! aos_in_r_array_extra(i,j) = value of the ith ao on the jth grid point of the EXTRA grid END_DOC integer :: i,j double precision :: aos_array(ao_num), r(3) @@ -214,7 +214,7 @@ END_PROVIDER BEGIN_PROVIDER[double precision, aos_in_r_array_extra_transp, (n_points_extra_final_grid,ao_num)] BEGIN_DOC - ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point + ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point of the EXTRA grid END_DOC implicit none diff --git a/src/dft_utils_in_r/mo_in_r.irp.f b/src/dft_utils_in_r/mo_in_r.irp.f index ad931402..623de4f8 100644 --- a/src/dft_utils_in_r/mo_in_r.irp.f +++ b/src/dft_utils_in_r/mo_in_r.irp.f @@ -181,3 +181,44 @@ END_PROVIDER + +!!!!!EXTRA GRID + + BEGIN_PROVIDER[double precision, mos_in_r_array_extra_omp, (mo_num,n_points_extra_final_grid)] + implicit none + BEGIN_DOC + ! mos_in_r_array_extra(i,j) = value of the ith mo on the jth grid point on the EXTRA GRID + END_DOC + integer :: i,j + double precision :: mos_array_extra(mo_num), r(3) + print*,'coucou' + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,r,mos_array_extra,j) & + !$OMP SHARED(mos_in_r_array_extra_omp,n_points_extra_final_grid,mo_num,final_grid_points_extra) + do i = 1, n_points_extra_final_grid + r(1) = final_grid_points_extra(1,i) + r(2) = final_grid_points_extra(2,i) + r(3) = final_grid_points_extra(3,i) + call give_all_mos_at_r(r,mos_array_extra) + do j = 1, mo_num + mos_in_r_array_extra_omp(j,i) = mos_array_extra(j) + enddo + enddo + !$OMP END PARALLEL DO + print*,'coucou fin' + END_PROVIDER + + + BEGIN_PROVIDER[double precision, mos_in_r_array_extra_transp,(n_points_extra_final_grid,mo_num)] + implicit none + BEGIN_DOC + ! mos_in_r_array_extra_transp(i,j) = value of the jth mo on the ith grid point + END_DOC + integer :: i,j + do i = 1, n_points_extra_final_grid + do j = 1, mo_num + mos_in_r_array_extra_transp(i,j) = mos_in_r_array_extra_omp(j,i) + enddo + enddo + END_PROVIDER From fe9ea75ae79a5c7b641733c27d0801aeb4bf3af5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Feb 2025 19:02:00 +0100 Subject: [PATCH 25/59] Added Spd module in OCaml --- bin/qp_convert_output_to_ezfio | 3 +-- ocaml/Angmom.ml | 31 ++++++++++++++++++++++++--- ocaml/Angmom.mli | 25 +++++++++++++++++++-- ocaml/Long_basis.ml | 4 ++-- src/mo_two_e_ints/map_integrals.irp.f | 1 - src/scf_utils/roothaan_hall_scf.irp.f | 2 +- 6 files changed, 55 insertions(+), 11 deletions(-) diff --git a/bin/qp_convert_output_to_ezfio b/bin/qp_convert_output_to_ezfio index 6f2d02d0..95822770 100755 --- a/bin/qp_convert_output_to_ezfio +++ b/bin/qp_convert_output_to_ezfio @@ -154,8 +154,7 @@ def write_ezfio(res, filename): prim_num_max = ezfio.get_ao_basis_ao_prim_num_max() for i in range(len(res.basis)): - coefficient[ - i] += [0. for j in range(len(coefficient[i]), prim_num_max)] + coefficient[i] += [0. for j in range(len(coefficient[i]), prim_num_max)] exponent[i] += [0. for j in range(len(exponent[i]), prim_num_max)] coefficient = reduce(lambda x, y: x + y, coefficient, []) diff --git a/ocaml/Angmom.ml b/ocaml/Angmom.ml index 2da09340..4e315a95 100644 --- a/ocaml/Angmom.ml +++ b/ocaml/Angmom.ml @@ -77,6 +77,7 @@ module Xyz = struct type t = { x: Positive_int.t ; y: Positive_int.t ; z: Positive_int.t } [@@deriving sexp] + type state_type = Null | X | Y | Z (** Builds an XYZ triplet from a string. @@ -147,8 +148,8 @@ module Xyz = struct in Positive_int.of_int (x+y+z) - (** Returns a list of XYZ powers for a given symmetry *) - let of_symmetry sym = + (** Returns a list of XYZ powers for a given angular momentum *) + let of_angmom sym = let l = Positive_int.to_int (to_l sym) in let create_z xyz = { x=xyz.x ; @@ -179,7 +180,31 @@ module Xyz = struct |> List.rev - (** Returns the symmetry corresponding to the XYZ triplet *) + (** Returns the angular momentum corresponding to the XYZ triplet *) + let to_symmetry sym = of_l (get_l sym) + +end + +module Spd = struct + type t = { l: st; m: int } [@@deriving sexp] + + let to_string { l ; m } = + (to_string l) ^ " " ^ (if m > 0 then "+" else "") ^ (string_of_int m) + + let of_string s = match String_ext.lsplit2 ~on:' ' s with + | Some (l, m) -> { l=of_string l ; m=int_of_string m } + | _ -> failwith ("Invalid Spd: "^s) + + (** Returns the l quantum number from a XYZ powers triplet *) + let get_l { l ; _ } = to_l l + + (** Returns a list of XYZ powers for a given angular momentum *) + let of_angmom sym = + let l = Positive_int.to_int (to_l sym) in + Array.init (2*l+1) (fun i -> { l=sym ; m=i-l }) + |> Array.to_list + + (** Returns the angular momentum corresponding to the XYZ triplet *) let to_symmetry sym = of_l (get_l sym) end diff --git a/ocaml/Angmom.mli b/ocaml/Angmom.mli index 2ab63003..006717aa 100644 --- a/ocaml/Angmom.mli +++ b/ocaml/Angmom.mli @@ -28,9 +28,30 @@ module Xyz : val get_l : t -> Qptypes.Positive_int.t (** Returns a list of XYZ powers for a given symmetry *) - val of_symmetry : st -> t list + val of_angmom : st -> t list (** Returns the symmetry corresponding to the XYZ powers *) val to_symmetry : t -> st - end +end + + +module Spd : + sig + type t = { l: st; m: int } [@@deriving sexp] + + (** The string format contains the l and m quantum numbers *) + + val of_string : string -> t + val to_string : t -> string + + (** Returns the quantum number l *) + val get_l : t -> Qptypes.Positive_int.t + + (** Returns a list of XYZ powers for a given symmetry *) + val of_angmom : st -> t list + + (** Returns the symmetry corresponding to the XYZ powers *) + val to_symmetry : t -> st + +end diff --git a/ocaml/Long_basis.ml b/ocaml/Long_basis.ml index a8ea3c66..96f7566b 100644 --- a/ocaml/Long_basis.ml +++ b/ocaml/Long_basis.ml @@ -10,7 +10,7 @@ let of_basis b = | (g,n)::tail -> begin let new_accu = - Angmom.Xyz.of_symmetry g.Gto.sym + Angmom.Xyz.of_angmom g.Gto.sym |> List.rev_map (fun x-> (x,g,n)) in do_work (new_accu@accu) tail @@ -25,7 +25,7 @@ let to_basis b = | [] -> List.rev accu | (s,g,n)::tail -> let first_sym = - Angmom.Xyz.of_symmetry g.Gto.sym + Angmom.Xyz.of_angmom g.Gto.sym |> List.hd in let new_accu = diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index b5f78b7b..c95a8f44 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -89,7 +89,6 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:mo_integrals_cache_s !$OMP PARALLEL DO PRIVATE(k,l,ii) SCHEDULE(dynamic) do l=mo_integrals_cache_min,mo_integrals_cache_max - print *, l do k=mo_integrals_cache_min,mo_integrals_cache_max ii = int(l-mo_integrals_cache_min,8) ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index 9e2ca4bc..4ba0964a 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -70,7 +70,7 @@ END_DOC dim_DIIS = min(dim_DIIS+1,max_dim_DIIS) - if ( (scf_algorithm == 'DIIS').and.(dabs(Delta_energy_SCF) > 1.d-6) ) then + if ( (scf_algorithm == 'DIIS').and.(dabs(Delta_energy_SCF) > 1.d-10) ) then ! Store Fock and error matrices at each iteration do j=1,ao_num From 9d2a2ee754b69972fe38ae8e4190bfa2df268ea5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 12 Feb 2025 12:01:04 +0100 Subject: [PATCH 26/59] Introducing trexion convention --- bin/qp_convert_output_to_ezfio | 2 + external/ezfio | 2 +- ocaml/qp_create_ezfio.ml | 1 + scripts/qp_import_trexio.py | 16 +++-- src/ao_basis/aos.irp.f | 92 ++++++++++++++++++------- src/basis/EZFIO.cfg | 1 - src/basis/basis.irp.f | 69 ------------------- src/trexio/export_trexio_routines.irp.f | 35 ++++++---- 8 files changed, 101 insertions(+), 117 deletions(-) diff --git a/bin/qp_convert_output_to_ezfio b/bin/qp_convert_output_to_ezfio index 95822770..9c0441b8 100755 --- a/bin/qp_convert_output_to_ezfio +++ b/bin/qp_convert_output_to_ezfio @@ -44,6 +44,7 @@ def write_ezfio(res, filename): res.clean_uncontractions() ezfio.set_file(filename) + ezfio.set_ezfio_files_ezfio_convention(20250211) # _ # |_ | _ _ _|_ ._ _ ._ _ @@ -172,6 +173,7 @@ def write_ezfio(res, filename): # ~#~#~#~#~ # ezfio.set_ao_basis_ao_coef(coef) + ezfio.set_basis_ao_normalized(True) ezfio.set_ao_basis_ao_expo(expo) ezfio.set_ao_basis_ao_basis("Read by resultsFile") diff --git a/external/ezfio b/external/ezfio index dba01c4f..d02132ea 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3 +Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091 diff --git a/ocaml/qp_create_ezfio.ml b/ocaml/qp_create_ezfio.ml index 4e17c0ad..ad8d44ca 100644 --- a/ocaml/qp_create_ezfio.ml +++ b/ocaml/qp_create_ezfio.ml @@ -265,6 +265,7 @@ let run ?o b au c d m p cart xyz_file = let write_file () = (* Create EZFIO *) Ezfio.set_file ezfio_file; + Ezfio.set_files_ezfio_ezfio_convention 20250211; (* Write Pseudo *) let pseudo = diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py index 23f48eef..a515efba 100755 --- a/scripts/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -84,6 +84,7 @@ def write_ezfio(trexio_filename, filename): ezfio.set_file(filename) ezfio.set_trexio_trexio_file(trexio_filename) + ezfio.set_ezfio_files_ezfio_convention(20250211) print("Nuclei\t\t...\t", end=' ') @@ -315,8 +316,8 @@ def write_ezfio(trexio_filename, filename): power_x.append(x) power_y.append(y) power_z.append(z) - coefficient.append(coef[i]) - exponent.append(expo[i]) + coefficient.append(list(coef[i])) + exponent.append(list(expo[i])) num_prim.append(num_prim0[i]) assert (len(coefficient) == ao_num) @@ -326,15 +327,15 @@ def write_ezfio(trexio_filename, filename): prim_num_max = max( [ len(x) for x in coefficient ] ) - ao_normalization = trexio.read_ao_normalization(trexio_file_cart) - for i, coef in enumerate(coefficient): - for j in range(len(coef)): - coef[j] *= ao_normalization[i] - for i in range(ao_num): coefficient[i] += [0. for j in range(len(coefficient[i]), prim_num_max)] exponent [i] += [0. for j in range(len(exponent[i]), prim_num_max)] + ao_normalization = trexio.read_ao_normalization(trexio_file_cart) + for i in range(ao_num): + for j in range(prim_num_max): + coefficient[i][j] *= ao_normalization[i] + coefficient = reduce(lambda x, y: x + y, coefficient, []) exponent = reduce(lambda x, y: x + y, exponent , []) @@ -345,6 +346,7 @@ def write_ezfio(trexio_filename, filename): coef.append(coefficient[j]) expo.append(exponent[j]) + ezfio.set_ao_basis_ao_coef(coef) ezfio.set_ao_basis_ao_expo(expo) diff --git a/src/ao_basis/aos.irp.f b/src/ao_basis/aos.irp.f index d718e935..440cc865 100644 --- a/src/ao_basis/aos.irp.f +++ b/src/ao_basis/aos.irp.f @@ -53,42 +53,84 @@ END_PROVIDER C_A(3) = 0.d0 ao_coef_normalized = 0.d0 - do i=1,ao_num + if (primitives_normalized) then - powA(1) = ao_power(i,1) - powA(2) = ao_power(i,2) - powA(3) = ao_power(i,3) + if (ezfio_convention >= 20250211) then + ! Same primitive normalization factors for all AOs of the same shell, or read from trexio file - ! Normalization of the primitives - if (primitives_normalized) then - do j=1,ao_prim_num(i) - call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,j), & - powA,powA,overlap_x,overlap_y,overlap_z,norm,nz) - ao_coef_normalized(i,j) = ao_coef(i,j)/dsqrt(norm) + do i=1,ao_num + k=1 + do while (k<=prim_num .and. shell_index(k) /= ao_shell(i)) + k = k+1 + end do + do j=1,ao_prim_num(i) + ao_coef_normalized(i,j) = ao_coef(i,j)*prim_normalization_factor(k+j-1) + enddo enddo + else + ! GAMESS convention for primitive factors + + do i=1,ao_num + powA(1) = ao_power(i,1) + powA(2) = ao_power(i,2) + powA(3) = ao_power(i,3) + + do j=1,ao_prim_num(i) + call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,j), & + powA,powA,overlap_x,overlap_y,overlap_z,norm,nz) + ao_coef_normalized(i,j) = ao_coef(i,j)/dsqrt(norm) + enddo + enddo + + endif + + else + + do i=1,ao_num do j=1,ao_prim_num(i) ao_coef_normalized(i,j) = ao_coef(i,j) enddo - endif - - ! Normalization of the contracted basis functions - norm = 0.d0 - do j=1,ao_prim_num(i) - do k=1,ao_prim_num(i) - call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,k),powA,powA,overlap_x,overlap_y,overlap_z,c,nz) - norm = norm+c*ao_coef_normalized(i,j)*ao_coef_normalized(i,k) - enddo enddo - ao_coef_normalization_factor(i) = 1.d0/dsqrt(norm) - if (ao_normalized) then - do j=1,ao_prim_num(i) - ao_coef_normalized(i,j) = ao_coef_normalized(i,j) * ao_coef_normalization_factor(i) + endif + + double precision, allocatable :: self_overlap(:) + allocate(self_overlap(ao_num)) + + do i=1,ao_num + powA(1) = ao_power(i,1) + powA(2) = ao_power(i,2) + powA(3) = ao_power(i,3) + self_overlap(i) = 0.d0 + do j=1,ao_prim_num(i) + do k=1,j-1 + call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,k),powA,powA,overlap_x,overlap_y,overlap_z,c,nz) + self_overlap(i) = self_overlap(i) + 2.d0*c*ao_coef_normalized(i,j)*ao_coef_normalized(i,k) enddo - else + call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,j),powA,powA,overlap_x,overlap_y,overlap_z,c,nz) + self_overlap(i) = self_overlap(i) +c*ao_coef_normalized(i,j)*ao_coef_normalized(i,j) + enddo + enddo + + if (ao_normalized) then + + do i=1,ao_num + ao_coef_normalization_factor(i) = 1.d0/dsqrt(self_overlap(i)) + enddo + + else + + do i=1,ao_num ao_coef_normalization_factor(i) = 1.d0 - endif + enddo + + endif + + do i=1,ao_num + do j=1,ao_prim_num(i) + ao_coef_normalized(i,j) = ao_coef_normalized(i,j) * ao_coef_normalization_factor(i) + enddo enddo END_PROVIDER diff --git a/src/basis/EZFIO.cfg b/src/basis/EZFIO.cfg index 03e224e4..a9ec2c1b 100644 --- a/src/basis/EZFIO.cfg +++ b/src/basis/EZFIO.cfg @@ -84,4 +84,3 @@ type: logical doc: If true, normalize the basis functions interface: ezfio, provider, ocaml default: false - diff --git a/src/basis/basis.irp.f b/src/basis/basis.irp.f index 5374e5be..81282cb5 100644 --- a/src/basis/basis.irp.f +++ b/src/basis/basis.irp.f @@ -1,72 +1,3 @@ -BEGIN_PROVIDER [ double precision, shell_normalization_factor , (shell_num) ] - implicit none - BEGIN_DOC - ! Number of primitives per |AO| - END_DOC - - logical :: has - PROVIDE ezfio_filename - if (.not.ao_normalized) then - shell_normalization_factor = 1.d0 - return - endif - - if (mpi_master) then - if (size(shell_normalization_factor) == 0) return - - call ezfio_has_basis_shell_normalization_factor(has) - if (has) then - write(6,'(A)') '.. >>>>> [ IO READ: shell_normalization_factor ] <<<<< ..' - call ezfio_get_basis_shell_normalization_factor(shell_normalization_factor) - else - - double precision :: norm,overlap_x,overlap_y,overlap_z,C_A(3), c - integer :: l, powA(3), nz - integer :: i,j,k - nz=100 - C_A(1) = 0.d0 - C_A(2) = 0.d0 - C_A(3) = 0.d0 - - do i=1,shell_num - - powA(1) = shell_ang_mom(i) - powA(2) = 0 - powA(3) = 0 - - norm = 0.d0 - do k=1, prim_num - if (shell_index(k) /= i) cycle - do j=1, prim_num - if (shell_index(j) /= i) cycle - call overlap_gaussian_xyz(C_A,C_A,prim_expo(j),prim_expo(k), & - powA,powA,overlap_x,overlap_y,overlap_z,c,nz) - norm = norm+c*prim_coef(j)*prim_coef(k) * prim_normalization_factor(j) * prim_normalization_factor(k) - enddo - enddo - shell_normalization_factor(i) = 1.d0/dsqrt(norm) - enddo - - endif - endif - IRP_IF MPI_DEBUG - print *, irp_here, mpi_rank - call MPI_BARRIER(MPI_COMM_WORLD, ierr) - IRP_ENDIF - IRP_IF MPI - include 'mpif.h' - integer :: ierr - call MPI_BCAST( shell_normalization_factor, (shell_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) - if (ierr /= MPI_SUCCESS) then - stop 'Unable to read shell_normalization_factor with MPI' - endif - IRP_ENDIF - - call write_time(6) - -END_PROVIDER - - BEGIN_PROVIDER [ double precision, prim_normalization_factor , (prim_num) ] implicit none BEGIN_DOC diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f index c60b1aa0..6391233a 100644 --- a/src/trexio/export_trexio_routines.irp.f +++ b/src/trexio/export_trexio_routines.irp.f @@ -15,6 +15,8 @@ subroutine export_trexio(update,full_path) integer, external :: getunitandopen + integer :: i,j,l + if (full_path) then fp = trexio_filename call system('realpath '//trim(fp)//' > '//trim(fp)//'.tmp') @@ -271,7 +273,7 @@ subroutine export_trexio(update,full_path) call trexio_assert(rc, TREXIO_SUCCESS) allocate(factor(shell_num)) - factor(1:shell_num) = shell_normalization_factor(1:shell_num) + factor(1:shell_num) = 1.d0 rc = trexio_write_basis_shell_factor(f(1), factor) call trexio_assert(rc, TREXIO_SUCCESS) @@ -312,22 +314,27 @@ subroutine export_trexio(update,full_path) rc = trexio_write_ao_shell(f(1), ao_shell) call trexio_assert(rc, TREXIO_SUCCESS) - integer :: i, pow0(3), powA(3), j, l, nz - double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c - nz=100 + if (ezfio_convention >= 20250211) then + rc = trexio_write_ao_normalization(f(1), ao_coef_normalization_factor) + print *, ao_coef_normalization_factor(:) + else + integer :: pow0(3), powA(3), nz + double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c + nz=100 - C_A(1) = 0.d0 - C_A(2) = 0.d0 - C_A(3) = 0.d0 + C_A(1) = 0.d0 + C_A(2) = 0.d0 + C_A(3) = 0.d0 - allocate(factor(ao_num)) - do i=1,ao_num - l = ao_first_of_shell(ao_shell(i)) - factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) - enddo - rc = trexio_write_ao_normalization(f(1), factor) + allocate(factor(ao_num)) + do i=1,ao_num + l = ao_first_of_shell(ao_shell(i)) + factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) + enddo + rc = trexio_write_ao_normalization(f(1), factor) + deallocate(factor) + endif call trexio_assert(rc, TREXIO_SUCCESS) - deallocate(factor) endif From 74d56370e8c6e8758fc22b50f02618d7bbb17d7a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 12 Feb 2025 12:02:04 +0100 Subject: [PATCH 27/59] Added 1 to unsigned shift to avoid zero index --- external/ezfio | 2 +- src/determinants/spindeterminants.irp.f | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/ezfio b/external/ezfio index dba01c4f..d02132ea 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3 +Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091 diff --git a/src/determinants/spindeterminants.irp.f b/src/determinants/spindeterminants.irp.f index 87c5d360..01979c02 100644 --- a/src/determinants/spindeterminants.irp.f +++ b/src/determinants/spindeterminants.irp.f @@ -14,7 +14,7 @@ integer*8 function spin_det_search_key(det,Nint) END_DOC integer, intent(in) :: Nint integer(bit_kind), intent(in) :: det(Nint) - integer(bit_kind), parameter :: unsigned_shift = -huge(1_bit_kind) ! 100...00 + integer(bit_kind), parameter :: unsigned_shift = 1_bit_kind-huge(1_bit_kind) ! 100...00 integer :: i spin_det_search_key = det(1) do i=2,Nint From 4a8551be153a7ef1937ed887a33dfb0b870be196 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 12 Feb 2025 13:26:36 +0100 Subject: [PATCH 28/59] Fixing previous commit --- ocaml/Angmom.ml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ocaml/Angmom.ml b/ocaml/Angmom.ml index 4e315a95..0f1658f0 100644 --- a/ocaml/Angmom.ml +++ b/ocaml/Angmom.ml @@ -2,6 +2,7 @@ open Qptypes open Sexplib.Std type t = S|P|D|F|G|H|I|J|K|L [@@deriving sexp] +type st = t [@@deriving sexp] let to_string = function | S -> "S" @@ -70,9 +71,6 @@ let of_l i = | x -> raise (Failure ("Angmom should be S|P|D|F|G|H|I|J|K|L")) -type st = t - - module Xyz = struct type t = { x: Positive_int.t ; y: Positive_int.t ; From eb6e1d4339fe6342da40c56184119c0545b97507 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 13 Feb 2025 10:23:52 +0100 Subject: [PATCH 29/59] Introduced qp_bug.irp.f --- src/determinants/spindeterminants.irp.f | 22 ++++++++++++++++------ src/utils/bug.irp.f | 23 +++++++++++++++++++++++ 2 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 src/utils/bug.irp.f diff --git a/src/determinants/spindeterminants.irp.f b/src/determinants/spindeterminants.irp.f index 01979c02..2f497bd7 100644 --- a/src/determinants/spindeterminants.irp.f +++ b/src/determinants/spindeterminants.irp.f @@ -197,7 +197,9 @@ integer function get_index_in_psi_det_alpha_unique(key,Nint) enddo i += 1 - ASSERT (i <= N_det_alpha_unique) + if (i> N_det_alpha_unique) then + call qp_bug(irp_here, i, 'i> N_det_alpha_unique') + endif !DIR$ FORCEINLINE do while (spin_det_search_key(psi_det_alpha_unique(1,i),Nint) == det_ref) @@ -219,12 +221,15 @@ integer function get_index_in_psi_det_alpha_unique(key,Nint) endif i += 1 if (i > N_det_alpha_unique) then - ASSERT (get_index_in_psi_det_alpha_unique > 0) - return + exit endif enddo + if (get_index_in_psi_det_alpha_unique <= 0) then + call qp_bug(irp_here, get_index_in_psi_det_alpha_unique, 'get_index_in_psi_det_alpha_unique <= 0') + endif + end integer function get_index_in_psi_det_beta_unique(key,Nint) @@ -277,7 +282,9 @@ integer function get_index_in_psi_det_beta_unique(key,Nint) enddo i += 1 - ASSERT (i <= N_det_beta_unique) + if (i > N_det_beta_unique) then + call qp_bug(irp_here, i, 'i> N_det_beta_unique') + endif !DIR$ FORCEINLINE do while (spin_det_search_key(psi_det_beta_unique(1,i),Nint) == det_ref) @@ -299,12 +306,15 @@ integer function get_index_in_psi_det_beta_unique(key,Nint) endif i += 1 if (i > N_det_beta_unique) then - ASSERT (get_index_in_psi_det_beta_unique > 0) - return + exit endif enddo + if (get_index_in_psi_det_beta_unique <= 0) then + call qp_bug(irp_here, i, 'get_index_in_psi_det_beta_unique <= 0') + endif + end diff --git a/src/utils/bug.irp.f b/src/utils/bug.irp.f new file mode 100644 index 00000000..0e2ad551 --- /dev/null +++ b/src/utils/bug.irp.f @@ -0,0 +1,23 @@ +subroutine qp_bug(from, code, message) + implicit none + BEGIN_DOC +! This routine prints a bug report + END_DOC + character*(*) :: from + integer :: code + character*(*) :: message + + print *, '' + print *, '=======================' + print *, 'Bug in Quantum Package!' + print *, '=======================' + print *, '' + print *, ' from: ', trim(from) + print *, ' code: ', code + print *, ' info: ', trim(message) + print *, '' + print *, 'Please report this bug at https://github.com/QuantumPackage/qp2/issues' + print *, 'with your output file attached.' + print *, '' + stop -1 +end subroutine qp_bug From 70ad9f31b390b5931392a52ae375fab7c1e9f241 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 13 Feb 2025 11:58:46 +0100 Subject: [PATCH 30/59] Accelerated PT2-cholesky with dgemm --- external/ezfio | 2 +- src/cipsi/selection.irp.f | 28 +++++++++++++------ src/mo_two_e_ints/map_integrals.irp.f | 40 ++++++++++++++++++++++----- 3 files changed, 54 insertions(+), 16 deletions(-) diff --git a/external/ezfio b/external/ezfio index d02132ea..dba01c4f 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091 +Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3 diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index af7420c8..8c22ec85 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -1478,17 +1478,21 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) integer, parameter :: bant=1 - double precision, allocatable :: hij_cache1(:), hij_cache2(:) - allocate (hij_cache1(mo_num),hij_cache2(mo_num)) +! double precision, allocatable :: hij_cache1(:), hij_cache2(:) + double precision, allocatable :: hij_cache1(:,:), hij_cache2(:,:) +! allocate (hij_cache1(mo_num),hij_cache2(mo_num)) PROVIDE mo_integrals_threshold if(sp == 3) then ! AB + + allocate(hij_cache1(mo_num,mo_num)) + h1 = p(1,1) h2 = p(1,2) + call get_mo_two_e_integrals_ij(h2,h1,mo_num,hij_cache1,mo_integrals_map) do p1=1, mo_num if(bannedOrb(p1, 1)) cycle - call get_mo_two_e_integrals(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map) do p2=1, mo_num if(bannedOrb(p2,2)) cycle if(banned(p1, p2, bant)) cycle ! rentable? @@ -1497,7 +1501,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) else phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) - hij = hij_cache1(p2) * phase + hij = hij_cache1(p2,p1) * phase end if if (dabs(hij) < mo_integrals_threshold) cycle !DIR$ LOOP COUNT AVG(4) @@ -1507,13 +1511,18 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end do end do + deallocate(hij_cache1) + else ! AA BB + + allocate(hij_cache1(mo_num,mo_num),hij_cache2(mo_num,mo_num)) + p1 = p(1,sp) p2 = p(2,sp) + call get_mo_two_e_integrals_ij(p2,p1,mo_num,hij_cache1,mo_integrals_map) + call get_mo_two_e_integrals_ij(p1,p2,mo_num,hij_cache2,mo_integrals_map) do puti=1, mo_num if (bannedOrb(puti, sp)) cycle - call get_mo_two_e_integrals(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map) - call get_mo_two_e_integrals(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map) do putj=puti+1, mo_num if(bannedOrb(putj, sp)) cycle if(banned(puti, putj, bant)) cycle ! rentable? @@ -1522,7 +1531,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) if (dabs(hij) < mo_integrals_threshold) cycle else - hij = hij_cache1(putj) - hij_cache2(putj) + hij = hij_cache1(putj,puti) - hij_cache2(putj,puti) if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) end if @@ -1532,9 +1541,12 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) enddo end do end do + + deallocate(hij_cache1,hij_cache2) + end if - deallocate(hij_cache1,hij_cache2) +! deallocate(hij_cache1,hij_cache2) end diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 5a005d7b..8f3c72d0 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -262,8 +262,14 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) out_val, 1) else integer :: isplit - out_val = 0.d0 - do isplit=1,4 + call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & + mo_integrals_cache_min-1, 1., & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & + cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & + out_val_sp, 1) + out_val(1:mo_integrals_cache_min-1) += out_val_sp(1:mo_integrals_cache_min-1) + out_val(1:mo_integrals_cache_min-1) = out_val_sp(1:mo_integrals_cache_min-1) + do isplit=2,4 call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & mo_integrals_cache_min-1, 1., & cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & @@ -442,16 +448,36 @@ subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) double precision, intent(out) :: out_array(sze,sze) type(map_type), intent(inout) :: map integer :: j - real(integral_kind), allocatable :: tmp_val(:) if ( (mo_integrals_cache_min>1).or.(mo_integrals_cache_max Date: Thu, 13 Feb 2025 15:05:27 +0100 Subject: [PATCH 31/59] Add possibility of single precision cholesky vectors --- src/ao_two_e_ints/cholesky.irp.f | 2 +- src/mo_two_e_ints/EZFIO.cfg | 6 + src/mo_two_e_ints/cholesky.irp.f | 6 +- src/mo_two_e_ints/map_integrals.irp.f | 193 +++++++++++++++----------- 4 files changed, 121 insertions(+), 86 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index eb2f092a..d15ebdc3 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -469,7 +469,7 @@ END_PROVIDER !$OMP PARALLEL DO PRIVATE(k,j) do k=1,rank do j=1,ao_num - cholesky_ao(1:ao_num,j,rank-k+1) = L((j-1_8)*ao_num+1_8:1_8*j*ao_num,rank-k+1) + cholesky_ao(1:ao_num,j,k) = L((j-1_8)*ao_num+1_8:1_8*j*ao_num,rank-k+1) enddo enddo !$OMP END PARALLEL DO diff --git a/src/mo_two_e_ints/EZFIO.cfg b/src/mo_two_e_ints/EZFIO.cfg index da9d8fc9..f26bfb61 100644 --- a/src/mo_two_e_ints/EZFIO.cfg +++ b/src/mo_two_e_ints/EZFIO.cfg @@ -29,4 +29,10 @@ doc: Read/Write MO integrals with the long range interaction from/to disk [ W interface: ezfio,provider,ocaml default: None +[mo_cholesky_double] +type: logical +doc: Use double precision to build integrals from Cholesky vectors +interface: ezfio,provider,ocaml +default: True + diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 835110de..062e52e2 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -175,9 +175,9 @@ BEGIN_PROVIDER [ real, cholesky_mo_transp_sp, (cholesky_mo_num, mo_num, mo_num) integer :: i,j,k !$OMP PARALLEL DO PRIVATE(k) - do k=1,cholesky_mo_num - do j=1,mo_num - do i=1,mo_num + do j=1,mo_num + do i=1,mo_num + do k=1,cholesky_mo_num cholesky_mo_transp_sp(k,i,j) = cholesky_mo_transp(k,i,j) enddo enddo diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 8f3c72d0..bc83fbdd 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -9,15 +9,6 @@ BEGIN_PROVIDER [ logical, all_mo_integrals ] PROVIDE mo_two_e_integrals_in_map mo_integrals_cache mo_two_e_integrals_jj_exchange mo_two_e_integrals_jj_anti mo_two_e_integrals_jj big_array_exchange_integrals big_array_coulomb_integrals mo_one_e_integrals END_PROVIDER -BEGIN_PROVIDER [ logical, mo_cholesky_double ] - implicit none - BEGIN_DOC -! If true, use double precision to compute integrals from cholesky vectors - END_DOC - mo_cholesky_double = .True. -END_PROVIDER - - !! MO Map !! ====== @@ -193,13 +184,14 @@ double precision function get_two_e_integral(i,j,k,l,map) if (mo_cholesky_double) then get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1) else - get_two_e_integral = 0.d0 - do isplit=1,4 - get_two_e_integral = get_two_e_integral + & - sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1) - enddo + get_two_e_integral = sdot(cholesky_mo_num, cholesky_mo_transp_sp(1,i,k), 1, cholesky_mo_transp_sp(1,j,l), 1) +! get_two_e_integral = 0.d0 +! do isplit=1,4 +! get_two_e_integral = get_two_e_integral + & +! sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1) +! enddo endif else @@ -244,7 +236,7 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) ii = ior(ii, j-mo_integrals_cache_min) if (do_mo_cholesky.and. .not.mo_cholesky_double) then - allocate(out_val_sp(sze)) + allocate(out_val_sp(mo_num)) endif if (iand(ii, -mo_integrals_cache_size) == 0) then @@ -261,22 +253,27 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) cholesky_mo_transp(1,j,l), 1, 0.d0, & out_val, 1) else - integer :: isplit - call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & - mo_integrals_cache_min-1, 1., & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & - out_val_sp, 1) - out_val(1:mo_integrals_cache_min-1) += out_val_sp(1:mo_integrals_cache_min-1) + call sgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1., & + cholesky_mo_transp_sp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp_sp(1,j,l), 1, 0., & + out_val_sp, 1) out_val(1:mo_integrals_cache_min-1) = out_val_sp(1:mo_integrals_cache_min-1) - do isplit=2,4 - call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & - mo_integrals_cache_min-1, 1., & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & - out_val_sp, 1) - out_val(1:mo_integrals_cache_min-1) += out_val_sp(1:mo_integrals_cache_min-1) - enddo +! integer :: isplit +! isplit=1 +! call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & +! mo_integrals_cache_min-1, 1., & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & +! out_val_sp, 1) +! out_val(1:mo_integrals_cache_min-1) = out_val_sp(1:mo_integrals_cache_min-1) +! do isplit=2,4 +! call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & +! mo_integrals_cache_min-1, 1., & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & +! out_val_sp, 1) +! out_val(1:mo_integrals_cache_min-1) += out_val_sp(1:mo_integrals_cache_min-1) +! enddo endif else @@ -318,15 +315,26 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) cholesky_mo_transp(1,j,l), 1, 0.d0, & out_val(mo_integrals_cache_max+1), 1) else - out_val = 0.d0 - do isplit=1,4 - call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & - mo_num-mo_integrals_cache_max, 1., & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),mo_integrals_cache_max+1,k), cholesky_mo_num, & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & - out_val_sp(mo_integrals_cache_max+1), 1) - out_val(mo_integrals_cache_max+1:sze) += out_val_sp(mo_integrals_cache_max+1:sze) - enddo + call sgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1., & + cholesky_mo_transp_sp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & + cholesky_mo_transp_sp(1,j,l), 1, 0., & + out_val_sp(mo_integrals_cache_max+1), 1) + out_val(mo_integrals_cache_max+1:sze) = out_val_sp(mo_integrals_cache_max+1:sze) +! isplit=1 +! call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & +! mo_num-mo_integrals_cache_max, 1., & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),mo_integrals_cache_max+1,k), cholesky_mo_num, & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & +! out_val_sp(mo_integrals_cache_max+1), 1) +! out_val(mo_integrals_cache_max+1:sze) = out_val_sp(mo_integrals_cache_max+1:sze) +! do isplit=2,4 +! call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & +! mo_num-mo_integrals_cache_max, 1., & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),mo_integrals_cache_max+1,k), cholesky_mo_num, & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & +! out_val_sp(mo_integrals_cache_max+1), 1) +! out_val(mo_integrals_cache_max+1:sze) += out_val_sp(mo_integrals_cache_max+1:sze) +! enddo endif else @@ -366,15 +374,26 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) cholesky_mo_transp(1,j,l), 1, 0.d0, & out_val, 1) else - out_val = 0.d0 - do isplit=1,4 - call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & - sze, 1., & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & - out_val_sp, 1) - out_val(1:sze) += out_val_sp(1:sze) - enddo + call sgemv('T', cholesky_mo_num, sze, 1., & + cholesky_mo_transp_sp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp_sp(1,j,l), 1, 0., & + out_val_sp, 1) + out_val(1:sze) = out_val_sp(1:sze) +! isplit=1 +! call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & +! sze, 1., & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & +! out_val_sp, 1) +! out_val(1:sze) = out_val_sp(1:sze) +! do isplit=2,4 +! call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & +! sze, 1., & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., & +! out_val_sp, 1) +! out_val(1:sze) += out_val_sp(1:sze) +! enddo endif else @@ -459,23 +478,30 @@ subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) cholesky_mo_transp(1,1,l), cholesky_mo_num, 0.d0, & out_array, sze) else - integer :: isplit - double precision, allocatable :: out_array_sp(:,:) + real, allocatable :: out_array_sp(:,:) allocate(out_array_sp(sze,sze)) - call sgemm('T', 'N', mo_num, mo_num, & - cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), 1.d0, & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,l), cholesky_mo_num, 0.d0, & + call sgemm('T', 'N', mo_num, mo_num, cholesky_mo_num, 1.0, & + cholesky_mo_transp_sp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp_sp(1,1,l), cholesky_mo_num, 0.0, & out_array_sp, sze) out_array(1:sze,1:sze) = out_array_sp(1:sze,1:sze) - do isplit=2,4 - call sgemm('T', 'N', mo_num, mo_num, & - cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), 1.d0, & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,l), cholesky_mo_num, 0.d0, & - out_array_sp, sze) - out_array(1:sze,1:sze) = out_array(1:sze,1:sze) + out_array_sp(1:sze,1:sze) - enddo +! +! isplit=1 +! call sgemm('T', 'N', mo_num, mo_num, & +! cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), 1., & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,l), cholesky_mo_num, 0., & +! out_array_sp, sze) +! out_array(1:sze,1:sze) = out_array_sp(1:sze,1:sze) +! integer :: isplit +! do isplit=2,4 +! call sgemm('T', 'N', mo_num, mo_num, & +! cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), 1., & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,l), cholesky_mo_num, 0., & +! out_array_sp, sze) +! out_array(1:sze,1:sze) = out_array(1:sze,1:sze) + out_array_sp(1:sze,1:sze) +! enddo deallocate(out_array_sp) endif @@ -631,12 +657,13 @@ subroutine get_mo_two_e_integrals_exch_ii(k,l,sze,out_val,map) integer :: isplit do i=1,mo_integrals_cache_min-1 - out_val(i) = 0.d0 - do isplit=1,4 - out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1) - enddo + out_val(i) = sdot(cholesky_mo_num, cholesky_mo_transp_sp(1,i,k), 1, cholesky_mo_transp_sp(1,i,l), 1) +! out_val(i) = 0.d0 +! do isplit=1,4 +! out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1) +! enddo enddo do i=mo_integrals_cache_min,mo_integrals_cache_max @@ -644,12 +671,13 @@ subroutine get_mo_two_e_integrals_exch_ii(k,l,sze,out_val,map) enddo do i=mo_integrals_cache_max, sze - out_val(i) = 0.d0 - do isplit=1,4 - out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1) - enddo + out_val(i) = sdot(cholesky_mo_num, cholesky_mo_transp_sp(1,i,k), 1, cholesky_mo_transp_sp(1,i,l), 1) +! out_val(i) = 0.d0 +! do isplit=1,4 +! out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1) +! enddo enddo endif @@ -663,12 +691,13 @@ subroutine get_mo_two_e_integrals_exch_ii(k,l,sze,out_val,map) enddo else do i=1,sze - out_val(i) = 0.d0 - do isplit=1,4 - out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & - cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1) - enddo + out_val(i) = sdot(cholesky_mo_num, cholesky_mo_transp_sp(1,i,k), 1, cholesky_mo_transp_sp(1,i,l), 1) +! out_val(i) = 0.d0 +! do isplit=1,4 +! out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, & +! cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1) +! enddo enddo endif From bce68e7461440405f2cdaf8462a97163ed89899a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 13 Feb 2025 16:06:05 +0100 Subject: [PATCH 32/59] Simplified hij_cache in pt2 --- src/cipsi/selection.irp.f | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 8c22ec85..99bc7013 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -1478,19 +1478,17 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) integer, parameter :: bant=1 -! double precision, allocatable :: hij_cache1(:), hij_cache2(:) - double precision, allocatable :: hij_cache1(:,:), hij_cache2(:,:) -! allocate (hij_cache1(mo_num),hij_cache2(mo_num)) + double precision, allocatable :: hij_cache(:,:) PROVIDE mo_integrals_threshold - if(sp == 3) then ! AB + allocate(hij_cache(mo_num,mo_num)) - allocate(hij_cache1(mo_num,mo_num)) + if(sp == 3) then ! AB h1 = p(1,1) h2 = p(1,2) - call get_mo_two_e_integrals_ij(h2,h1,mo_num,hij_cache1,mo_integrals_map) + call get_mo_two_e_integrals_ij(h2,h1,mo_num,hij_cache,mo_integrals_map) do p1=1, mo_num if(bannedOrb(p1, 1)) cycle do p2=1, mo_num @@ -1501,7 +1499,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) else phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) - hij = hij_cache1(p2,p1) * phase + hij = hij_cache(p2,p1) * phase end if if (dabs(hij) < mo_integrals_threshold) cycle !DIR$ LOOP COUNT AVG(4) @@ -1511,16 +1509,11 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end do end do - deallocate(hij_cache1) - else ! AA BB - allocate(hij_cache1(mo_num,mo_num),hij_cache2(mo_num,mo_num)) - p1 = p(1,sp) p2 = p(2,sp) - call get_mo_two_e_integrals_ij(p2,p1,mo_num,hij_cache1,mo_integrals_map) - call get_mo_two_e_integrals_ij(p1,p2,mo_num,hij_cache2,mo_integrals_map) + call get_mo_two_e_integrals_ij(p2,p1,mo_num,hij_cache,mo_integrals_map) do puti=1, mo_num if (bannedOrb(puti, sp)) cycle do putj=puti+1, mo_num @@ -1531,7 +1524,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) if (dabs(hij) < mo_integrals_threshold) cycle else - hij = hij_cache1(putj,puti) - hij_cache2(putj,puti) + hij = hij_cache(putj,puti) - hij_cache(puti,putj) if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) end if @@ -1542,11 +1535,10 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end do end do - deallocate(hij_cache1,hij_cache2) - end if -! deallocate(hij_cache1,hij_cache2) + deallocate(hij_cache) + end From ed47408948436dd67146c49ddb59c8bb12e9b484 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 14 Feb 2025 11:26:06 +0100 Subject: [PATCH 33/59] Forgot EZFIO.cfg in ezfio_files --- src/ezfio_files/EZFIO.cfg | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 src/ezfio_files/EZFIO.cfg diff --git a/src/ezfio_files/EZFIO.cfg b/src/ezfio_files/EZFIO.cfg new file mode 100644 index 00000000..a65cda1a --- /dev/null +++ b/src/ezfio_files/EZFIO.cfg @@ -0,0 +1,9 @@ +[ezfio_convention] +type: integer +doc: Version of the EZFIO conventions +interface: ezfio, provider, ocaml +default: 20210101 + +# EZFIO conventions +# 20210101: Old conventions +# 20250211: Changed normalization of AOs: Moved GAMESS convention from primitives to AOs for compatibility with trexio. From 7738a8ab8138301e953c4872b0a536c3c77ec643 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 14 Feb 2025 12:42:49 +0100 Subject: [PATCH 34/59] Fixed 9a840ae --- ocaml/qp_create_ezfio.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/qp_create_ezfio.ml b/ocaml/qp_create_ezfio.ml index ad8d44ca..9e58a18f 100644 --- a/ocaml/qp_create_ezfio.ml +++ b/ocaml/qp_create_ezfio.ml @@ -265,7 +265,7 @@ let run ?o b au c d m p cart xyz_file = let write_file () = (* Create EZFIO *) Ezfio.set_file ezfio_file; - Ezfio.set_files_ezfio_ezfio_convention 20250211; + Ezfio.set_ezfio_files_ezfio_convention 20250211; (* Write Pseudo *) let pseudo = From a1ff1a3efc97e9d8a3a86c4dd0b7227aa5820196 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 17 Feb 2025 14:07:12 +0100 Subject: [PATCH 35/59] Fixed qp_create for cc-pcvnz basis --- external/ezfio | 2 +- ocaml/qp_create_ezfio.ml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/external/ezfio b/external/ezfio index d02132ea..dba01c4f 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091 +Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3 diff --git a/ocaml/qp_create_ezfio.ml b/ocaml/qp_create_ezfio.ml index 9e58a18f..bc36593e 100644 --- a/ocaml/qp_create_ezfio.ml +++ b/ocaml/qp_create_ezfio.ml @@ -131,7 +131,8 @@ let run ?o b au c d m p cart xyz_file = let key = Element.to_string elem.Atom.element in - Hashtbl.add basis_table key new_channel + if not (Hashtbl.mem basis_table key) then + Hashtbl.add basis_table key new_channel ) nuclei end | Some (key, basis) -> (*Aux basis *) From d04232bb7bf964896f355be29c114d7f959ac387 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 18 Feb 2025 11:36:56 +0100 Subject: [PATCH 36/59] Foce ao_normalized=true for qp_create --- ocaml/qp_create_ezfio.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/qp_create_ezfio.ml b/ocaml/qp_create_ezfio.ml index bc36593e..ba0e2b7b 100644 --- a/ocaml/qp_create_ezfio.ml +++ b/ocaml/qp_create_ezfio.ml @@ -267,6 +267,7 @@ let run ?o b au c d m p cart xyz_file = (* Create EZFIO *) Ezfio.set_file ezfio_file; Ezfio.set_ezfio_files_ezfio_convention 20250211; + Ezfio.set_basis_ao_normalized true ; (* Write Pseudo *) let pseudo = From d20ac13c4f3d5b4d7d2007253aaee689a6b121b5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 18 Feb 2025 11:55:25 +0100 Subject: [PATCH 37/59] Added del orbs in reorder_casscf --- src/casscf_cipsi/reorder_orb.irp.f | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/casscf_cipsi/reorder_orb.irp.f b/src/casscf_cipsi/reorder_orb.irp.f index 3cb90522..865edd65 100644 --- a/src/casscf_cipsi/reorder_orb.irp.f +++ b/src/casscf_cipsi/reorder_orb.irp.f @@ -26,6 +26,11 @@ subroutine reorder_orbitals_for_casscf array(iorb) = 3 * mo_num + i enddo + do i = 1, n_del_orb + iorb = list_del(i) + array(iorb) = 4 * mo_num + i + enddo + do i = 1, mo_num iorder(i) = i enddo From cbad838a341ab98e1f6397951553d47caa9154a3 Mon Sep 17 00:00:00 2001 From: Antoine MARIE Date: Tue, 25 Feb 2025 16:06:01 +0100 Subject: [PATCH 38/59] increase hard coded max rank of cholesky for ao_two_e_int --- src/ao_two_e_ints/cholesky.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index d15ebdc3..36061ef0 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -178,7 +178,7 @@ END_PROVIDER rank_max = np ! Avoid too large arrays when there are many electrons if (elec_num > 10) then - rank_max = min(np,20*elec_num*elec_num) + rank_max = min(np,25*elec_num*elec_num) endif call mmap_create_d('', (/ ndim8, rank_max /), .False., .True., map) From ed1253f62956c10866ea83c707cf532b7f3d4d67 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 25 Feb 2025 18:09:57 +0100 Subject: [PATCH 39/59] Sort indices for faster access in RDM --- external/ezfio | 2 +- .../basis_correction/print_routine.irp.f | 97 ++++++++------- src/ao_two_e_ints/two_e_integrals.irp.f | 1 + src/cas_based_on_top/on_top_cas_prov.irp.f | 13 +- src/dft_utils_in_r/ao_in_r.irp.f | 112 ++++++++---------- src/dft_utils_in_r/mo_in_r.irp.f | 15 +-- src/mu_of_r/mu_of_r_conditions.irp.f | 85 ++++++++++++- src/two_rdm_routines/davidson_like_2rdm.irp.f | 88 ++++++++------ 8 files changed, 248 insertions(+), 165 deletions(-) diff --git a/external/ezfio b/external/ezfio index dba01c4f..d02132ea 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3 +Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091 diff --git a/plugins/local/basis_correction/print_routine.irp.f b/plugins/local/basis_correction/print_routine.irp.f index b3b38673..8879fd5d 100644 --- a/plugins/local/basis_correction/print_routine.irp.f +++ b/plugins/local/basis_correction/print_routine.irp.f @@ -22,53 +22,58 @@ subroutine print_basis_correction print*, '****************************************' print*, '****************************************' print*, 'mu_of_r_potential = ',mu_of_r_potential - if(mu_of_r_potential.EQ."hf".or.mu_of_r_potential.EQ."hf_old".or.mu_of_r_potential.EQ."hf_sparse")then - print*, '' - print*,'Using a HF-like two-body density to define mu(r)' - print*,'This assumes that HF is a qualitative representation of the wave function ' - print*,'********************************************' - print*,'Functionals more suited for weak correlation' - print*,'********************************************' - print*,'+) LDA Ecmd functional : purely based on the UEG (JCP,149,194301,1-15 (2018)) ' - do istate = 1, N_states - write(*, '(A29,X,I3,X,A3,X,F16.10)') ' ECMD LDA , state ',istate,' = ',ecmd_lda_mu_of_r(istate) - enddo - print*,'+) PBE-UEG Ecmd functional : PBE at mu=0, UEG ontop pair density at large mu (JPCL, 10, 2931-2937 (2019))' - do istate = 1, N_states - write(*, '(A29,X,I3,X,A3,X,F16.10)') ' ECMD PBE-UEG , state ',istate,' = ',ecmd_pbe_ueg_mu_of_r(istate) - enddo + if(mu_of_r_potential.EQ."hf".or. & + mu_of_r_potential.EQ."hf_old".or.& + mu_of_r_potential.EQ."hf_sparse".or.& + mu_of_r_potential.EQ."proj")then + print*, '' + print*,'Using a HF-like two-body density to define mu(r)' + print*,'This assumes that HF is a qualitative representation of the wave function ' + print*,'********************************************' + print*,'Functionals more suited for weak correlation' + print*,'********************************************' + print*,'+) LDA Ecmd functional : purely based on the UEG (JCP,149,194301,1-15 (2018)) ' + do istate = 1, N_states + write(*, '(A29,X,I3,X,A3,X,F16.10)') ' ECMD LDA , state ',istate,' = ',ecmd_lda_mu_of_r(istate) + enddo + print*,'+) PBE-UEG Ecmd functional : PBE at mu=0, UEG ontop pair density at large mu (JPCL, 10, 2931-2937 (2019))' + do istate = 1, N_states + write(*, '(A29,X,I3,X,A3,X,F16.10)') ' ECMD PBE-UEG , state ',istate,' = ',ecmd_pbe_ueg_mu_of_r(istate) + enddo - else if(mu_of_r_potential.EQ."cas_full".or.mu_of_r_potential.EQ."cas_truncated".or.mu_of_r_potential.EQ."pure_act")then - print*, '' - print*,'Using a CAS-like two-body density to define mu(r)' - print*,'This assumes that the CAS is a qualitative representation of the wave function ' - print*,'********************************************' - print*,'Functionals more suited for weak correlation' - print*,'********************************************' - print*,'+) LDA Ecmd functional : purely based on the UEG (JCP,149,194301,1-15 (2018)) ' - do istate = 1, N_states - write(*, '(A29,X,I3,X,A3,X,F16.10)') ' ECMD LDA , state ',istate,' = ',ecmd_lda_mu_of_r(istate) - enddo - print*,'+) PBE-UEG Ecmd functional : PBE at mu=0, UEG ontop pair density at large mu (JPCL, 10, 2931-2937 (2019))' - do istate = 1, N_states - write(*, '(A29,X,I3,X,A3,X,F16.10)') ' ECMD PBE-UEG , state ',istate,' = ',ecmd_pbe_ueg_mu_of_r(istate) - enddo - print*,'' - print*,'********************************************' - print*,'********************************************' - print*,'+) PBE-on-top Ecmd functional : JCP, 152, 174104 (2020) ' - print*,'PBE at mu=0, extrapolated ontop pair density at large mu, usual spin-polarization' - do istate = 1, N_states - write(*, '(A29,X,I3,X,A3,X,F16.10)') ' ECMD PBE-OT , state ',istate,' = ',ecmd_pbe_on_top_mu_of_r(istate) - enddo - print*,'' - print*,'********************************************' - print*,'+) PBE-on-top no spin polarization Ecmd functional : JCP, 152, 174104 (2020)' - print*,'PBE at mu=0, extrapolated ontop pair density at large mu, and ZERO SPIN POLARIZATION' - do istate = 1, N_states - write(*, '(A29,X,I3,X,A3,X,F16.10)') ' ECMD SU-PBE-OT , state ',istate,' = ',ecmd_pbe_on_top_su_mu_of_r(istate) - enddo - print*,'' + else if(mu_of_r_potential.EQ."cas_full".or. & + mu_of_r_potential.EQ."cas_truncated".or. & + mu_of_r_potential.EQ."pure_act") then + print*, '' + print*,'Using a CAS-like two-body density to define mu(r)' + print*,'This assumes that the CAS is a qualitative representation of the wave function ' + print*,'********************************************' + print*,'Functionals more suited for weak correlation' + print*,'********************************************' + print*,'+) LDA Ecmd functional : purely based on the UEG (JCP,149,194301,1-15 (2018)) ' + do istate = 1, N_states + write(*, '(A29,X,I3,X,A3,X,F16.10)') ' ECMD LDA , state ',istate,' = ',ecmd_lda_mu_of_r(istate) + enddo + print*,'+) PBE-UEG Ecmd functional : PBE at mu=0, UEG ontop pair density at large mu (JPCL, 10, 2931-2937 (2019))' + do istate = 1, N_states + write(*, '(A29,X,I3,X,A3,X,F16.10)') ' ECMD PBE-UEG , state ',istate,' = ',ecmd_pbe_ueg_mu_of_r(istate) + enddo + print*,'' + print*,'********************************************' + print*,'********************************************' + print*,'+) PBE-on-top Ecmd functional : JCP, 152, 174104 (2020) ' + print*,'PBE at mu=0, extrapolated ontop pair density at large mu, usual spin-polarization' + do istate = 1, N_states + write(*, '(A29,X,I3,X,A3,X,F16.10)') ' ECMD PBE-OT , state ',istate,' = ',ecmd_pbe_on_top_mu_of_r(istate) + enddo + print*,'' + print*,'********************************************' + print*,'+) PBE-on-top no spin polarization Ecmd functional : JCP, 152, 174104 (2020)' + print*,'PBE at mu=0, extrapolated ontop pair density at large mu, and ZERO SPIN POLARIZATION' + do istate = 1, N_states + write(*, '(A29,X,I3,X,A3,X,F16.10)') ' ECMD SU-PBE-OT , state ',istate,' = ',ecmd_pbe_on_top_su_mu_of_r(istate) + enddo + print*,'' endif print*,'' diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index fb376ce1..1cb7617e 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -54,6 +54,7 @@ double precision function ao_two_e_integral(i, j, k, l) else if (use_only_lr) then ao_two_e_integral = ao_two_e_integral_erf(i, j, k, l) + return else if (do_schwartz_accel(i,j,k,l)) then diff --git a/src/cas_based_on_top/on_top_cas_prov.irp.f b/src/cas_based_on_top/on_top_cas_prov.irp.f index dd93ed40..9f9a1f06 100644 --- a/src/cas_based_on_top/on_top_cas_prov.irp.f +++ b/src/cas_based_on_top/on_top_cas_prov.irp.f @@ -15,14 +15,17 @@ pure_act_on_top_of_r = 0.d0 do l = 1, n_act_orb phi_l = act_mos_in_r_array(l,ipoint) + if (dabs(phi_l) < 1.d-12) cycle do k = 1, n_act_orb - phi_k = act_mos_in_r_array(k,ipoint) + phi_k = act_mos_in_r_array(k,ipoint) * phi_l + if (dabs(phi_k) < 1.d-12) cycle do j = 1, n_act_orb - phi_j = act_mos_in_r_array(j,ipoint) + phi_j = act_mos_in_r_array(j,ipoint) * phi_k + if (dabs(phi_j) < 1.d-12) cycle do i = 1, n_act_orb - phi_i = act_mos_in_r_array(i,ipoint) - ! 1 2 1 2 - pure_act_on_top_of_r += act_2_rdm_ab_mo(i,j,k,l,istate) * phi_i * phi_j * phi_k * phi_l + phi_i = act_mos_in_r_array(i,ipoint) * phi_j + ! 1 2 1 2 + pure_act_on_top_of_r = pure_act_on_top_of_r + act_2_rdm_ab_mo(i,j,k,l,istate) * phi_i !* phi_j * phi_k * phi_l enddo enddo enddo diff --git a/src/dft_utils_in_r/ao_in_r.irp.f b/src/dft_utils_in_r/ao_in_r.irp.f index c8822776..ffd8c9d5 100644 --- a/src/dft_utils_in_r/ao_in_r.irp.f +++ b/src/dft_utils_in_r/ao_in_r.irp.f @@ -8,21 +8,14 @@ BEGIN_PROVIDER[double precision, aos_in_r_array, (ao_num,n_points_final_grid)] END_DOC implicit none - integer :: i, j - double precision :: tmp_array(ao_num), r(3) + integer :: i !$OMP PARALLEL DO & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,r,tmp_array,j) & - !$OMP SHARED(aos_in_r_array,n_points_final_grid,ao_num,final_grid_points) + !$OMP PRIVATE (i) & + !$OMP SHARED(aos_in_r_array,n_points_final_grid,final_grid_points) do i = 1, n_points_final_grid - r(1) = final_grid_points(1,i) - r(2) = final_grid_points(2,i) - r(3) = final_grid_points(3,i) - call give_all_aos_at_r(r, tmp_array) - do j = 1, ao_num - aos_in_r_array(j,i) = tmp_array(j) - enddo + call give_all_aos_at_r(final_grid_points(1,i), aos_in_r_array(1,i)) enddo !$OMP END PARALLEL DO @@ -42,7 +35,7 @@ BEGIN_PROVIDER[double precision, aos_in_r_array_transp, (n_points_final_grid,ao_ do i = 1, n_points_final_grid do j = 1, ao_num - aos_in_r_array_transp(i,j) = aos_in_r_array(j,i) + aos_in_r_array_transp(i,j) = aos_in_r_array(j,i) enddo enddo @@ -62,27 +55,29 @@ BEGIN_PROVIDER[double precision, aos_grad_in_r_array, (ao_num,n_points_final_gri implicit none integer :: i, j, m - double precision :: aos_array(ao_num), r(3) - double precision :: aos_grad_array(3,ao_num) + double precision :: r(3) + double precision, allocatable :: aos_grad_array(:,:), aos_array(:) - !$OMP PARALLEL DO & + !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,m,r,aos_array,aos_grad_array) & + !$OMP PRIVATE (i,j,m,r,aos_array,aos_grad_array) & !$OMP SHARED(aos_grad_in_r_array,n_points_final_grid,ao_num,final_grid_points) + allocate(aos_grad_array(3,ao_num), aos_array(ao_num)) + + !$OMP DO do i = 1, n_points_final_grid - r(1) = final_grid_points(1,i) - r(2) = final_grid_points(2,i) - r(3) = final_grid_points(3,i) - call give_all_aos_and_grad_at_r(r,aos_array,aos_grad_array) + call give_all_aos_and_grad_at_r(final_grid_points(1,i),aos_array,aos_grad_array) do m = 1, 3 do j = 1, ao_num aos_grad_in_r_array(j,i,m) = aos_grad_array(m,j) enddo enddo enddo - !$OMP END PARALLEL DO + !$OMP END DO + deallocate(aos_grad_array,aos_array) + !$OMP END PARALLEL -END_PROVIDER +END_PROVIDER ! --- @@ -116,7 +111,7 @@ END_PROVIDER enddo enddo enddo - END_PROVIDER + END_PROVIDER BEGIN_PROVIDER [double precision, aos_lapl_in_r_array, (3,ao_num,n_points_final_grid)] implicit none @@ -126,32 +121,32 @@ END_PROVIDER ! k = 1 : x, k= 2, y, k 3, z END_DOC integer :: i,j,m - double precision :: aos_array(ao_num), r(3) - double precision :: aos_grad_array(3,ao_num) - double precision :: aos_lapl_array(3,ao_num) - !$OMP PARALLEL DO & + double precision, allocatable :: aos_lapl_array(:,:), aos_grad_array(:,:), aos_array(:) + + !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,r,aos_array,aos_grad_array,aos_lapl_array,j,m) & + !$OMP PRIVATE (i,aos_array,aos_grad_array,aos_lapl_array,j,m) & !$OMP SHARED(aos_lapl_in_r_array,n_points_final_grid,ao_num,final_grid_points) + allocate( aos_array(ao_num), aos_grad_array(3,ao_num), aos_lapl_array(3,ao_num)) + !$OMP DO do i = 1, n_points_final_grid - r(1) = final_grid_points(1,i) - r(2) = final_grid_points(2,i) - r(3) = final_grid_points(3,i) - call give_all_aos_and_grad_and_lapl_at_r(r,aos_array,aos_grad_array,aos_lapl_array) + call give_all_aos_and_grad_and_lapl_at_r(final_grid_points(1,i),aos_array,aos_grad_array,aos_lapl_array) do j = 1, ao_num do m = 1, 3 aos_lapl_in_r_array(m,j,i) = aos_lapl_array(m,j) enddo enddo enddo - !$OMP END PARALLEL DO + !$OMP END DO + deallocate( aos_array, aos_grad_array, aos_lapl_array) + !$OMP END PARALLEL END_PROVIDER BEGIN_PROVIDER[double precision, aos_grad_in_r_array_transp_bis, (n_points_final_grid,ao_num,3)] implicit none BEGIN_DOC -! Transposed gradients -! +! Transposed gradients +! END_DOC integer :: i,j,m double precision :: aos_array(ao_num), r(3) @@ -169,8 +164,8 @@ END_PROVIDER BEGIN_PROVIDER[double precision, aos_grad_in_r_array_transp_3, (3,n_points_final_grid,ao_num)] implicit none BEGIN_DOC -! Transposed gradients -! +! Transposed gradients +! END_DOC integer :: i,j,m double precision :: aos_array(ao_num), r(3) @@ -187,22 +182,14 @@ END_PROVIDER BEGIN_PROVIDER[double precision, aos_in_r_array_extra, (ao_num,n_points_extra_final_grid)] implicit none BEGIN_DOC - ! aos_in_r_array_extra(i,j) = value of the ith ao on the jth grid point of the EXTRA grid + ! aos_in_r_array_extra(i,j) = value of the ith ao on the jth grid point of the EXTRA grid END_DOC - integer :: i,j - double precision :: aos_array(ao_num), r(3) + integer :: i !$OMP PARALLEL DO & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,r,aos_array,j) & - !$OMP SHARED(aos_in_r_array_extra,n_points_extra_final_grid,ao_num,final_grid_points_extra) + !$OMP DEFAULT (NONE) PRIVATE (i) & + !$OMP SHARED(aos_in_r_array_extra,n_points_extra_final_grid,final_grid_points_extra) do i = 1, n_points_extra_final_grid - r(1) = final_grid_points_extra(1,i) - r(2) = final_grid_points_extra(2,i) - r(3) = final_grid_points_extra(3,i) - call give_all_aos_at_r(r,aos_array) - do j = 1, ao_num - aos_in_r_array_extra(j,i) = aos_array(j) - enddo + call give_all_aos_at_r(final_grid_points_extra(1,i),aos_in_r_array_extra(1,i)) enddo !$OMP END PARALLEL DO @@ -214,9 +201,9 @@ END_PROVIDER BEGIN_PROVIDER[double precision, aos_in_r_array_extra_transp, (n_points_extra_final_grid,ao_num)] BEGIN_DOC - ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point of the EXTRA grid + ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point of the EXTRA grid END_DOC - + implicit none integer :: i, j double precision :: aos_array(ao_num), r(3) @@ -235,27 +222,28 @@ BEGIN_PROVIDER[double precision, aos_grad_in_r_array_extra, (ao_num,n_points_ext implicit none integer :: i, j, m - double precision :: aos_array(ao_num), r(3) - double precision :: aos_grad_array(3,ao_num) + double precision, allocatable :: aos_array(:), aos_grad_array(:,:) - !$OMP PARALLEL DO & + + !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,m,r,aos_array,aos_grad_array) & + !$OMP PRIVATE (i,j,m,aos_array,aos_grad_array) & !$OMP SHARED(aos_grad_in_r_array_extra,n_points_extra_final_grid,ao_num,final_grid_points_extra) + allocate(aos_array(ao_num), aos_grad_array(3,ao_num)) + !$OMP DO do i = 1, n_points_extra_final_grid - r(1) = final_grid_points_extra(1,i) - r(2) = final_grid_points_extra(2,i) - r(3) = final_grid_points_extra(3,i) - call give_all_aos_and_grad_at_r(r, aos_array, aos_grad_array) + call give_all_aos_and_grad_at_r(final_grid_points_extra(1,i), aos_array, aos_grad_array) do m = 1, 3 do j = 1, ao_num aos_grad_in_r_array_extra(j,i,m) = aos_grad_array(m,j) enddo enddo enddo - !$OMP END PARALLEL DO + !$OMP END DO + deallocate(aos_array,aos_grad_array) + !$OMP END PARALLEL -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/dft_utils_in_r/mo_in_r.irp.f b/src/dft_utils_in_r/mo_in_r.irp.f index 623de4f8..f3cc5559 100644 --- a/src/dft_utils_in_r/mo_in_r.irp.f +++ b/src/dft_utils_in_r/mo_in_r.irp.f @@ -21,20 +21,11 @@ BEGIN_DOC ! mos_in_r_array(i,j) = value of the ith mo on the jth grid point END_DOC - integer :: i,j - double precision :: mos_array(mo_num), r(3) - !$OMP PARALLEL DO & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,r,mos_array,j) & + integer :: i + !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE (i) & !$OMP SHARED(mos_in_r_array_omp,n_points_final_grid,mo_num,final_grid_points) do i = 1, n_points_final_grid - r(1) = final_grid_points(1,i) - r(2) = final_grid_points(2,i) - r(3) = final_grid_points(3,i) - call give_all_mos_at_r(r,mos_array) - do j = 1, mo_num - mos_in_r_array_omp(j,i) = mos_array(j) - enddo + call give_all_mos_at_r(final_grid_points(1,i),mos_in_r_array_omp(1,i)) enddo !$OMP END PARALLEL DO END_PROVIDER diff --git a/src/mu_of_r/mu_of_r_conditions.irp.f b/src/mu_of_r/mu_of_r_conditions.irp.f index 88dad8c3..3ad52a05 100644 --- a/src/mu_of_r/mu_of_r_conditions.irp.f +++ b/src/mu_of_r/mu_of_r_conditions.irp.f @@ -22,22 +22,32 @@ endif do istate = 1, N_states - do ipoint = 1, n_points_final_grid if(mu_of_r_potential.EQ."hf")then - mu_of_r_prov(ipoint,istate) = mu_of_r_hf(ipoint) + do ipoint = 1, n_points_final_grid + mu_of_r_prov(ipoint,istate) = mu_of_r_hf(ipoint) + enddo else if(mu_of_r_potential.EQ."hf_old")then - mu_of_r_prov(ipoint,istate) = mu_of_r_hf_old(ipoint) + do ipoint = 1, n_points_final_grid + mu_of_r_prov(ipoint,istate) = mu_of_r_hf_old(ipoint) + enddo else if(mu_of_r_potential.EQ."hf_sparse")then - mu_of_r_prov(ipoint,istate) = mu_of_r_hf_sparse(ipoint) + do ipoint = 1, n_points_final_grid + mu_of_r_prov(ipoint,istate) = mu_of_r_hf_sparse(ipoint) + enddo else if(mu_of_r_potential.EQ."cas_full".or.mu_of_r_potential.EQ."cas_truncated".or.mu_of_r_potential.EQ."pure_act")then - mu_of_r_prov(ipoint,istate) = mu_of_r_psi_cas(ipoint,istate) + do ipoint = 1, n_points_final_grid + mu_of_r_prov(ipoint,istate) = mu_of_r_psi_cas(ipoint,istate) + enddo + else if(mu_of_r_potential.EQ."proj")then + do ipoint = 1, n_points_final_grid + mu_of_r_prov(ipoint,istate) = mu_of_r_projector_mo(ipoint) + enddo else print*,'you requested the following mu_of_r_potential' print*,mu_of_r_potential print*,'which does not correspond to any of the options for such keyword' stop endif - enddo enddo if (write_mu_of_r) then @@ -225,3 +235,66 @@ enddo END_PROVIDER + +BEGIN_PROVIDER [double precision, mu_of_r_projector_mo, (n_points_final_grid) ] + implicit none + BEGIN_DOC + ! mu(r) computed with the projector onto the atomic basis + ! P_B(\mathbf{r},\mathbf{r}') = \sum_{ij} | + ! \chi_{i} \rangle \left[S^{-1}\right]_{ij} \langle \chi_{j} | + ! \] where $i$ and $j$ denote all atomic orbitals. + END_DOC + + double precision, parameter :: factor = dsqrt(2.d0*dacos(-1.d0)) + double precision, allocatable :: tmp(:,:) + integer :: ipoint + + + do ipoint=1,n_points_final_grid + mu_of_r_projector_mo(ipoint) = 0.d0 + integer :: i,j + do j=1,n_inact_act_orb + i = list_inact_act(j) + mu_of_r_projector_mo(ipoint) = mu_of_r_projector_mo(ipoint) + & + mos_in_r_array_omp(i,ipoint) * mos_in_r_array_omp(i,ipoint) + enddo + do j=1,n_virt_orb + i = list_virt(j) + mu_of_r_projector_mo(ipoint) = mu_of_r_projector_mo(ipoint) + & + mos_in_r_array_omp(i,ipoint) * mos_in_r_array_omp(i,ipoint) + enddo + enddo + + do ipoint=1,n_points_final_grid + ! epsilon + mu_of_r_projector_mo(ipoint) = 1.d0/(2.d0*dacos(-1.d0) * mu_of_r_projector_mo(ipoint)**(2.d0/3.d0)) + ! mu + mu_of_r_projector_mo(ipoint) = 1.d0/dsqrt( mu_of_r_projector_mo(ipoint) ) + enddo +END_PROVIDER + + + +BEGIN_PROVIDER [double precision, mu_average_proj, (N_states)] + implicit none + BEGIN_DOC + ! average value of mu(r) weighted with the total one-e density and divided by the number of electrons + ! + ! !!!!!! WARNING !!!!!! if no_core_density == .True. then all contributions from the core orbitals + ! + ! in the one- and two-body density matrix are excluded + END_DOC + integer :: ipoint,istate + double precision :: weight,density + do istate = 1, N_states + mu_average_proj(istate) = 0.d0 + do ipoint = 1, n_points_final_grid + weight =final_weight_at_r_vector(ipoint) + density = one_e_dm_and_grad_alpha_in_r(4,ipoint,istate) & + + one_e_dm_and_grad_beta_in_r(4,ipoint,istate) + mu_average_proj(istate) += mu_of_r_projector_mo(ipoint) * weight * density + enddo + mu_average_proj(istate) = mu_average_proj(istate) / elec_num_grid_becke(istate) + enddo +END_PROVIDER + diff --git a/src/two_rdm_routines/davidson_like_2rdm.irp.f b/src/two_rdm_routines/davidson_like_2rdm.irp.f index 09436663..f0b40459 100644 --- a/src/two_rdm_routines/davidson_like_2rdm.irp.f +++ b/src/two_rdm_routines/davidson_like_2rdm.irp.f @@ -139,6 +139,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin call list_to_bitstring( orb_bitmask, list_orb, norb, N_int) sze_buff = 6 * norb + elec_alpha_num * elec_alpha_num * 60 + sze_buff = sze_buff*100 list_orb_reverse = -1000 do i = 1, norb list_orb_reverse(list_orb(i)) = i @@ -191,7 +192,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin ASSERT (istart > 0) ASSERT (istep > 0) - !$OMP DO SCHEDULE(dynamic,64) + !$OMP DO SCHEDULE(dynamic) do k_a=istart+ishift,iend,istep krow = psi_bilinear_matrix_rows(k_a) @@ -272,14 +273,14 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin enddo endif - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) - nkeys = 0 enddo +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! nkeys = 0 enddo !$OMP END DO - !$OMP DO SCHEDULE(dynamic,64) + !$OMP DO SCHEDULE(dynamic) do k_a=istart+ishift,iend,istep @@ -343,24 +344,24 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin c_1(l) = u_t(l,l_a) * u_t(l,k_a) enddo if(alpha_beta.or.spin_trace.or.alpha_alpha)then - ! increment the alpha/beta part for single excitations - if (nkeys+ 2 * elec_alpha_num .ge. sze_buff) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) - nkeys = 0 - endif - call orb_range_off_diag_single_to_all_states_ab_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) - ! increment the alpha/alpha part for single excitations - if (nkeys+4 * elec_alpha_num .ge. sze_buff ) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) - nkeys = 0 - endif - call orb_range_off_diag_single_to_all_states_aa_dm_buffer(tmp_det,tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) + ! increment the alpha/beta part for single excitations + if (nkeys+ 2 * elec_alpha_num .ge. sze_buff) then + call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + nkeys = 0 + endif + call orb_range_off_diag_single_to_all_states_ab_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) + ! increment the alpha/alpha part for single excitations + if (nkeys+4 * elec_alpha_num .ge. sze_buff ) then + call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + nkeys = 0 + endif + call orb_range_off_diag_single_to_all_states_aa_dm_buffer(tmp_det,tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) endif enddo - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) - nkeys = 0 +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! nkeys = 0 ! Compute Hij for all alpha doubles ! ---------------------------------- @@ -383,8 +384,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin call orb_range_off_diag_double_to_all_states_aa_dm_buffer(tmp_det(1,1),psi_det_alpha_unique(1, lrow),c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) enddo endif - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) - nkeys = 0 +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! nkeys = 0 ! Single and double beta excitations @@ -459,8 +460,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin call orb_range_off_diag_single_to_all_states_bb_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) endif enddo - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) - nkeys = 0 +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! nkeys = 0 ! Compute Hij for all beta doubles ! ---------------------------------- @@ -487,8 +488,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin enddo endif - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) - nkeys = 0 +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! nkeys = 0 ! Diagonal contribution @@ -550,22 +551,43 @@ subroutine update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,loc integer :: istate integer :: i,h1,h2,p1,p2 - call omp_set_lock(lock_2rdm) + integer, allocatable :: iorder(:) + integer*8, allocatable :: to_sort(:) + + allocate(iorder(nkeys)) + do i=1,nkeys + iorder(i) = i + enddo + + ! If the lock is already taken, sort the keys while waiting for a faster access + if (.not.omp_test_lock(lock_2rdm)) then + allocate(to_sort(nkeys)) + do i=1,nkeys + h1 = keys(1,iorder(i)) + h2 = keys(2,iorder(i))-1 + p1 = keys(3,iorder(i))-1 + p2 = keys(4,iorder(i))-1 + to_sort(i) = int(h1,8) + int(dim1,8)*(int(h2,8) + int(dim1,8)*(int(p1,8) + int(dim1,8)*int(p2,8))) + enddo + call i8sort(to_sort, iorder, nkeys) + deallocate(to_sort) + call omp_set_lock(lock_2rdm) + endif ! print*,'*************' ! print*,'updating' ! print*,'nkeys',nkeys - do i = 1, nkeys - h1 = keys(1,i) - h2 = keys(2,i) - p1 = keys(3,i) - p2 = keys(4,i) - do istate = 1, N_st -! print*,h1,h2,p1,p2,values(istate,i) - big_array(h1,h2,p1,p2,istate) += values(istate,i) + do istate = 1, N_st + do i = 1, nkeys + h1 = keys(1,iorder(i)) + h2 = keys(2,iorder(i)) + p1 = keys(3,iorder(i)) + p2 = keys(4,iorder(i)) + big_array(h1,h2,p1,p2,istate) = big_array(h1,h2,p1,p2,istate) + values(istate,iorder(i)) enddo enddo call omp_unset_lock(lock_2rdm) + deallocate(iorder) end From 62c13860bae511e8641208b880d71c6176129325 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 26 Feb 2025 14:42:39 +0100 Subject: [PATCH 40/59] update two_rdm_routines --- src/bitmask/bitmasks_routines.irp.f | 39 +- src/two_body_rdm/act_2_rdm.irp.f | 1 + src/two_rdm_routines/davidson_like_2rdm.irp.f | 2 + src/two_rdm_routines/update_rdm.irp.f | 1647 +++++++++-------- 4 files changed, 854 insertions(+), 835 deletions(-) diff --git a/src/bitmask/bitmasks_routines.irp.f b/src/bitmask/bitmasks_routines.irp.f index 9c6f4f0c..be0a2dd1 100644 --- a/src/bitmask/bitmasks_routines.irp.f +++ b/src/bitmask/bitmasks_routines.irp.f @@ -283,33 +283,16 @@ subroutine print_det_one_dimension(string,Nint) end -logical function is_integer_in_string(bite,string,Nint) - use bitmasks +logical function is_integer_in_string(orb,bitmask,Nint) + use bitmasks implicit none - integer, intent(in) :: bite,Nint - integer(bit_kind), intent(in) :: string(Nint) - integer(bit_kind) :: string_bite(Nint) - integer :: i,itot,itot_and - character*(2048) :: output(1) - string_bite = 0_bit_kind - call set_bit_to_integer(bite,string_bite,Nint) - itot = 0 - itot_and = 0 - is_integer_in_string = .False. -!print*,'' -!print*,'' -!print*,'bite = ',bite -!call bitstring_to_str( output(1), string_bite, Nint ) -! print *, trim(output(1)) -!call bitstring_to_str( output(1), string, Nint ) -! print *, trim(output(1)) - do i = 1, Nint - itot += popcnt(string(i)) - itot_and += popcnt(ior(string(i),string_bite(i))) - enddo -!print*,'itot,itot_and',itot,itot_and - if(itot == itot_and)then - is_integer_in_string = .True. - endif -!pause + BEGIN_DOC +! Checks is the orbital orb is set to 1 in the bit string + END_DOC + integer, intent(in) :: orb, Nint + integer(bit_kind), intent(in) :: bitmask(Nint) + integer :: j, k + k = ishft(orb-1,-bit_kind_shift)+1 + j = orb-ishft(k-1,bit_kind_shift)-1 + is_integer_in_string = iand(bitmask(k), ibset(0_bit_kind, j)) /= 0_bit_kind end diff --git a/src/two_body_rdm/act_2_rdm.irp.f b/src/two_body_rdm/act_2_rdm.irp.f index 9e2ea018..6cad72be 100644 --- a/src/two_body_rdm/act_2_rdm.irp.f +++ b/src/two_body_rdm/act_2_rdm.irp.f @@ -145,6 +145,7 @@ print*,'' print*,'Providing act_2_rdm_spin_trace_mo ' character*(128) :: name_file + PROVIDE all_mo_integrals name_file = 'act_2_rdm_spin_trace_mo' ispin = 4 act_2_rdm_spin_trace_mo = 0.d0 diff --git a/src/two_rdm_routines/davidson_like_2rdm.irp.f b/src/two_rdm_routines/davidson_like_2rdm.irp.f index f0b40459..0e6899f8 100644 --- a/src/two_rdm_routines/davidson_like_2rdm.irp.f +++ b/src/two_rdm_routines/davidson_like_2rdm.irp.f @@ -194,6 +194,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin !$OMP DO SCHEDULE(dynamic) do k_a=istart+ishift,iend,istep +!print *, 'aa', k_a, '/', iend krow = psi_bilinear_matrix_rows(k_a) ASSERT (krow <= N_det_alpha_unique) @@ -282,6 +283,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin !$OMP DO SCHEDULE(dynamic) do k_a=istart+ishift,iend,istep +!print *, 'ab', k_a, '/', iend ! Single and double alpha exitations diff --git a/src/two_rdm_routines/update_rdm.irp.f b/src/two_rdm_routines/update_rdm.irp.f index 8aeb0699..978f6e33 100644 --- a/src/two_rdm_routines/update_rdm.irp.f +++ b/src/two_rdm_routines/update_rdm.irp.f @@ -1,762 +1,897 @@ - subroutine orb_range_diag_to_all_states_2_rdm_dm_buffer(det_1,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) +logical function is_integer_in_string_local(orb,bitmask,Nint) + use bitmasks + implicit none + integer, intent(in) :: orb, Nint + integer(bit_kind), intent(in) :: bitmask(Nint) + integer :: j, k + k = ishft(orb-1,-bit_kind_shift)+1 + j = orb-ishft(k-1,bit_kind_shift)-1 + is_integer_in_string_local = iand(bitmask(k), ibset(0_bit_kind, j)) /= 0_bit_kind +end + +subroutine orb_range_diag_to_all_states_2_rdm_dm_buffer(det_1,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) use bitmasks BEGIN_DOC - ! routine that update the DIAGONAL PART of the two body rdms in a specific range of orbitals for a given determinant det_1 - ! - ! c_1 is the array of the contributions to the rdm for all states - ! - ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals - ! - ! ispin determines which spin-spin component of the two-rdm you will update - ! - ! ispin == 1 :: alpha/ alpha - ! ispin == 2 :: beta / beta - ! ispin == 3 :: alpha/ beta - ! ispin == 4 :: spin traced <=> total two-rdm - END_DOC - implicit none - integer, intent(in) :: ispin,sze_buff,N_st - integer, intent(in) :: list_orb_reverse(mo_num) - integer(bit_kind), intent(in) :: det_1(N_int,2) - integer(bit_kind), intent(in) :: orb_bitmask(N_int) - double precision, intent(in) :: c_1(N_st) - double precision, intent(out) :: values(N_st,sze_buff) - integer , intent(out) :: keys(4,sze_buff) - integer , intent(inout):: nkeys - - integer :: occ(N_int*bit_kind_size,2) - integer :: n_occ_ab(2) - integer :: i,j,h1,h2 - integer(bit_kind) :: det_1_act(N_int,2) - logical :: alpha_alpha,beta_beta,alpha_beta,spin_trace - do i = 1, N_int - det_1_act(i,1) = iand(det_1(i,1),orb_bitmask(i)) - det_1_act(i,2) = iand(det_1(i,2),orb_bitmask(i)) - enddo - - alpha_alpha = .False. - beta_beta = .False. - alpha_beta = .False. - spin_trace = .False. - if( ispin == 1)then - alpha_alpha = .True. - else if(ispin == 2)then - beta_beta = .True. - else if(ispin == 3)then - alpha_beta = .True. - else if(ispin == 4)then - spin_trace = .True. - endif - call bitstring_to_list_ab(det_1_act, occ, n_occ_ab, N_int) - logical :: is_integer_in_string - integer :: i1,i2,istate - if(alpha_beta)then - do i = 1, n_occ_ab(1) - i1 = occ(i,1) - do j = 1, n_occ_ab(2) - i2 = occ(j,2) - h1 = list_orb_reverse(i1) - h2 = list_orb_reverse(i2) - ! If alpha/beta, electron 1 is alpha, electron 2 is beta - ! Therefore you don't necessayr have symmetry between electron 1 and 2 - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = h1 - keys(4,nkeys) = h2 - - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = h2 - keys(4,nkeys) = h1 - enddo - enddo - - else if (alpha_alpha)then - do i = 1, n_occ_ab(1) - i1 = occ(i,1) - do j = 1, n_occ_ab(1) - i2 = occ(j,1) - h1 = list_orb_reverse(i1) - h2 = list_orb_reverse(i2) - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = h1 - keys(4,nkeys) = h2 - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = -0.5d0 * c_1(istate) - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = h2 - keys(4,nkeys) = h1 - enddo - enddo - else if (beta_beta)then - do i = 1, n_occ_ab(2) - i1 = occ(i,2) - do j = 1, n_occ_ab(2) - i2 = occ(j,2) - h1 = list_orb_reverse(i1) - h2 = list_orb_reverse(i2) - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = h1 - keys(4,nkeys) = h2 - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = -0.5d0 * c_1(istate) - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = h2 - keys(4,nkeys) = h1 - enddo - enddo - else if(spin_trace)then - ! 0.5 * (alpha beta + beta alpha) - do i = 1, n_occ_ab(1) - i1 = occ(i,1) - do j = 1, n_occ_ab(2) - i2 = occ(j,2) - h1 = list_orb_reverse(i1) - h2 = list_orb_reverse(i2) - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = h1 - keys(4,nkeys) = h2 - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = h2 - keys(4,nkeys) = h1 - enddo - enddo - do i = 1, n_occ_ab(1) - i1 = occ(i,1) - do j = 1, n_occ_ab(1) - i2 = occ(j,1) - h1 = list_orb_reverse(i1) - h2 = list_orb_reverse(i2) - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = h1 - keys(4,nkeys) = h2 - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = -0.5d0 * c_1(istate) - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = h2 - keys(4,nkeys) = h1 - enddo - enddo - do i = 1, n_occ_ab(2) - i1 = occ(i,2) - do j = 1, n_occ_ab(2) - i2 = occ(j,2) - h1 = list_orb_reverse(i1) - h2 = list_orb_reverse(i2) - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = h1 - keys(4,nkeys) = h2 - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = -0.5d0 * c_1(istate) - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = h2 - keys(4,nkeys) = h1 - enddo - enddo - endif - end - - - subroutine orb_range_off_diag_double_to_all_states_ab_dm_buffer(det_1,det_2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) - use bitmasks - BEGIN_DOC -! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for -! -! a given couple of determinant det_1, det_2 being a alpha/beta DOUBLE excitation with respect to one another -! -! c_1 is the array of the contributions to the rdm for all states -! -! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals -! -! ispin determines which spin-spin component of the two-rdm you will update -! -! ispin == 1 :: alpha/ alpha -! ispin == 2 :: beta / beta -! ispin == 3 :: alpha/ beta -! ispin == 4 :: spin traced <=> total two-rdm -! -! here, only ispin == 3 or 4 will do something - END_DOC - implicit none - integer, intent(in) :: ispin,sze_buff,N_st - integer(bit_kind), intent(in) :: det_1(N_int,2),det_2(N_int,2) - integer, intent(in) :: list_orb_reverse(mo_num) - double precision, intent(in) :: c_1(N_st) - double precision, intent(out) :: values(N_st,sze_buff) - integer , intent(out) :: keys(4,sze_buff) - integer , intent(inout):: nkeys - integer :: i,j,h1,h2,p1,p2,istate - integer :: exc(0:2,2,2) - double precision :: phase - logical :: alpha_alpha,beta_beta,alpha_beta,spin_trace - logical :: is_integer_in_string - alpha_alpha = .False. - beta_beta = .False. - alpha_beta = .False. - spin_trace = .False. - if( ispin == 1)then - alpha_alpha = .True. - else if(ispin == 2)then - beta_beta = .True. - else if(ispin == 3)then - alpha_beta = .True. - else if(ispin == 4)then - spin_trace = .True. - endif - call get_double_excitation(det_1,det_2,exc,phase,N_int) - h1 = exc(1,1,1) - if(list_orb_reverse(h1).lt.0)return - h1 = list_orb_reverse(h1) - h2 = exc(1,1,2) - if(list_orb_reverse(h2).lt.0)return - h2 = list_orb_reverse(h2) - p1 = exc(1,2,1) - if(list_orb_reverse(p1).lt.0)return - p1 = list_orb_reverse(p1) - p2 = exc(1,2,2) - if(list_orb_reverse(p2).lt.0)return - p2 = list_orb_reverse(p2) - if(alpha_beta)then - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = p1 - keys(4,nkeys) = p2 - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = p2 - keys(4,nkeys) = p1 - else if(spin_trace)then - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = p1 - keys(4,nkeys) = p2 - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = p2 - keys(4,nkeys) = p1 - endif - end - - subroutine orb_range_off_diag_single_to_all_states_ab_dm_buffer(det_1,det_2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) - use bitmasks - BEGIN_DOC - ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for - ! - ! a given couple of determinant det_1, det_2 being a SINGLE excitation with respect to one another - ! - ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1 - ! - ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation - ! - ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals - ! - ! ispin determines which spin-spin component of the two-rdm you will update - ! - ! ispin == 1 :: alpha/ alpha - ! ispin == 2 :: beta / beta - ! ispin == 3 :: alpha/ beta - ! ispin == 4 :: spin traced <=> total two-rdm - ! - ! here, only ispin == 3 or 4 will do something + ! routine that update the DIAGONAL PART of the two body rdms in a specific range of orbitals for a given determinant det_1 + ! + ! c_1 is the array of the contributions to the rdm for all states + ! + ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals + ! + ! ispin determines which spin-spin component of the two-rdm you will update + ! + ! ispin == 1 :: alpha/ alpha + ! ispin == 2 :: beta / beta + ! ispin == 3 :: alpha/ beta + ! ispin == 4 :: spin traced <=> total two-rdm END_DOC implicit none - integer, intent(in) :: ispin,sze_buff,N_st - integer(bit_kind), intent(in) :: det_1(N_int,2),det_2(N_int,2) - integer, intent(in) :: list_orb_reverse(mo_num) - integer(bit_kind), intent(in) :: orb_bitmask(N_int) - double precision, intent(in) :: c_1(N_st) - double precision, intent(out) :: values(N_st,sze_buff) - integer , intent(out) :: keys(4,sze_buff) - integer , intent(inout):: nkeys - + integer, intent(in) :: ispin,sze_buff,N_st + integer, intent(in) :: list_orb_reverse(mo_num) + integer(bit_kind), intent(in) :: det_1(N_int,2) + integer(bit_kind), intent(in) :: orb_bitmask(N_int) + double precision, intent(in) :: c_1(N_st) + double precision, intent(out) :: values(N_st,sze_buff) + integer , intent(out) :: keys(4,sze_buff) + integer , intent(inout) :: nkeys + integer :: occ(N_int*bit_kind_size,2) integer :: n_occ_ab(2) - integer :: i,j,h1,h2,p1,istate - integer :: exc(0:2,2,2) - double precision :: phase - + integer :: i,j,h1,h2 + integer(bit_kind) :: det_1_act(N_int,2) logical :: alpha_alpha,beta_beta,alpha_beta,spin_trace - logical :: is_integer_in_string - alpha_alpha = .False. + do i = 1, N_int + det_1_act(i,1) = iand(det_1(i,1),orb_bitmask(i)) + det_1_act(i,2) = iand(det_1(i,2),orb_bitmask(i)) + enddo + + alpha_alpha = .False. beta_beta = .False. alpha_beta = .False. spin_trace = .False. if( ispin == 1)then - alpha_alpha = .True. + alpha_alpha = .True. else if(ispin == 2)then - beta_beta = .True. + beta_beta = .True. else if(ispin == 3)then - alpha_beta = .True. + alpha_beta = .True. else if(ispin == 4)then - spin_trace = .True. + spin_trace = .True. endif - + + call bitstring_to_list_ab(det_1_act, occ, n_occ_ab, N_int) + logical :: is_integer_in_string_local + integer :: i1,i2,istate + if(alpha_beta)then + do i = 1, n_occ_ab(1) + i1 = occ(i,1) + h1 = list_orb_reverse(i1) + do j = 1, n_occ_ab(2) + i2 = occ(j,2) + h2 = list_orb_reverse(i2) + ! If alpha/beta, electron 1 is alpha, electron 2 is beta + ! Therefore you don't necessayr have symmetry between electron 1 and 2 + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = 0.5d0 * c_1(istate) + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = h1 + keys(4,nkeys) = h2 + + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = 0.5d0 * c_1(istate) + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = h2 + keys(4,nkeys) = h1 + enddo + enddo + + else if (alpha_alpha)then + + do i = 1, n_occ_ab(1) + i1 = occ(i,1) + h1 = list_orb_reverse(i1) + do j = 1, n_occ_ab(1) + i2 = occ(j,1) + h2 = list_orb_reverse(i2) + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = 0.5d0 * c_1(istate) + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = h1 + keys(4,nkeys) = h2 + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = -0.5d0 * c_1(istate) + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = h2 + keys(4,nkeys) = h1 + enddo + enddo + + else if (beta_beta)then + + do i = 1, n_occ_ab(2) + i1 = occ(i,2) + h1 = list_orb_reverse(i1) + do j = 1, n_occ_ab(2) + i2 = occ(j,2) + h2 = list_orb_reverse(i2) + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = 0.5d0 * c_1(istate) + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = h1 + keys(4,nkeys) = h2 + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = -0.5d0 * c_1(istate) + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = h2 + keys(4,nkeys) = h1 + enddo + enddo + + else if(spin_trace)then + + ! 0.5 * (alpha beta + beta alpha) + do i = 1, n_occ_ab(1) + i1 = occ(i,1) + h1 = list_orb_reverse(i1) + do j = 1, n_occ_ab(2) + i2 = occ(j,2) + h2 = list_orb_reverse(i2) + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = 0.5d0 * c_1(istate) + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = h1 + keys(4,nkeys) = h2 + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = 0.5d0 * c_1(istate) + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = h2 + keys(4,nkeys) = h1 + enddo + do j = 1, n_occ_ab(1) + i2 = occ(j,1) + h2 = list_orb_reverse(i2) + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = 0.5d0 * c_1(istate) + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = h1 + keys(4,nkeys) = h2 + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = -0.5d0 * c_1(istate) + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = h2 + keys(4,nkeys) = h1 + enddo + enddo + do i = 1, n_occ_ab(2) + i1 = occ(i,2) + h1 = list_orb_reverse(i1) + do j = 1, n_occ_ab(2) + i2 = occ(j,2) + h2 = list_orb_reverse(i2) + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = 0.5d0 * c_1(istate) + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = h1 + keys(4,nkeys) = h2 + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = -0.5d0 * c_1(istate) + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = h2 + keys(4,nkeys) = h1 + enddo + enddo + endif +end + + +subroutine orb_range_off_diag_double_to_all_states_ab_dm_buffer(det_1,det_2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) + use bitmasks + BEGIN_DOC + ! routine that updates the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for + ! + ! a given couple of determinant det_1, det_2 being a alpha/beta DOUBLE excitation with respect to one another + ! + ! c_1 is the array of the contributions to the rdm for all states + ! + ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals + ! + ! ispin determines which spin-spin component of the two-rdm you will update + ! + ! ispin == 1 :: alpha/ alpha + ! ispin == 2 :: beta / beta + ! ispin == 3 :: alpha/ beta + ! ispin == 4 :: spin traced <=> total two-rdm + ! + ! here, only ispin == 3 or 4 will do something + END_DOC + implicit none + integer, intent(in) :: ispin,sze_buff,N_st + integer(bit_kind), intent(in) :: det_1(N_int,2),det_2(N_int,2) + integer, intent(in) :: list_orb_reverse(mo_num) + double precision, intent(in) :: c_1(N_st) + double precision, intent(out) :: values(N_st,sze_buff) + integer , intent(out) :: keys(4,sze_buff) + integer , intent(inout) :: nkeys + integer :: i,j,h1,h2,p1,p2,istate + integer :: exc(0:2,2,2) + double precision :: phase + logical :: alpha_alpha,beta_beta,alpha_beta,spin_trace + logical :: is_integer_in_string_local + if (ispin <= 2) return + +! alpha_alpha = .False. +! beta_beta = .False. +! alpha_beta = .False. +! spin_trace = .False. +! if( ispin == 1)then +! alpha_alpha = .True. +! else if(ispin == 2)then +! beta_beta = .True. +! else if(ispin == 3)then +! alpha_beta = .True. +! else if(ispin == 4)then +! spin_trace = .True. +! endif + call get_double_excitation(det_1,det_2,exc,phase,N_int) + + h1 = exc(1,1,1) + if(list_orb_reverse(h1).lt.0)return + h1 = list_orb_reverse(h1) + h2 = exc(1,1,2) + if(list_orb_reverse(h2).lt.0)return + h2 = list_orb_reverse(h2) + p1 = exc(1,2,1) + if(list_orb_reverse(p1).lt.0)return + p1 = list_orb_reverse(p1) + p2 = exc(1,2,2) + if(list_orb_reverse(p2).lt.0)return + p2 = list_orb_reverse(p2) +! if(alpha_beta)then + nkeys += 1 + phase = phase * 0.5d0 + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = p1 + keys(4,nkeys) = p2 + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = p2 + keys(4,nkeys) = p1 +! else if(spin_trace)then +! nkeys += 1 +! do istate = 1, N_st +! values(istate,nkeys) = 0.5d0 * c_1(istate) * phase +! enddo +! keys(1,nkeys) = h1 +! keys(2,nkeys) = h2 +! keys(3,nkeys) = p1 +! keys(4,nkeys) = p2 +! nkeys += 1 +! do istate = 1, N_st +! values(istate,nkeys) = 0.5d0 * c_1(istate) * phase +! enddo +! keys(1,nkeys) = h2 +! keys(2,nkeys) = h1 +! keys(3,nkeys) = p2 +! keys(4,nkeys) = p1 +! endif +end + +subroutine orb_range_off_diag_single_to_all_states_ab_dm_buffer(det_1,det_2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) + use bitmasks + BEGIN_DOC + ! routine that updates the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for + ! + ! a given couple of determinant det_1, det_2 being a SINGLE excitation with respect to one another + ! + ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1 + ! + ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation + ! + ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals + ! + ! ispin determines which spin-spin component of the two-rdm you will update + ! + ! ispin == 1 :: alpha/ alpha + ! ispin == 2 :: beta / beta + ! ispin == 3 :: alpha/ beta + ! ispin == 4 :: spin traced <=> total two-rdm + ! + ! here, only ispin == 3 or 4 will do something + END_DOC + implicit none + integer, intent(in) :: ispin,sze_buff,N_st + integer(bit_kind), intent(in) :: det_1(N_int,2),det_2(N_int,2) + integer, intent(in) :: list_orb_reverse(mo_num) + integer(bit_kind), intent(in) :: orb_bitmask(N_int) + double precision, intent(in) :: c_1(N_st) + double precision, intent(out) :: values(N_st,sze_buff) + integer , intent(out) :: keys(4,sze_buff) + integer , intent(inout) :: nkeys + + integer :: occ(N_int*bit_kind_size,2) + integer :: n_occ_ab(2) + integer :: i,j,h1,h2,p1,istate + integer :: exc(0:2,2,2) + double precision :: phase + + logical :: alpha_alpha,beta_beta,alpha_beta,spin_trace + logical :: is_integer_in_string_local + if (ispin <= 2) return + + alpha_beta = .False. + spin_trace = .False. + if(ispin == 3)then + alpha_beta = .True. + else if(ispin == 4)then + spin_trace = .True. + endif + call bitstring_to_list_ab(det_1, occ, n_occ_ab, N_int) call get_single_excitation(det_1,det_2,exc,phase,N_int) if(alpha_beta)then - if (exc(0,1,1) == 1) then - ! Mono alpha - h1 = exc(1,1,1) - if(.not.is_integer_in_string(h1,orb_bitmask,N_int))return - h1 = list_orb_reverse(h1) - p1 = exc(1,2,1) - if(.not.is_integer_in_string(p1,orb_bitmask,N_int))return - p1 = list_orb_reverse(p1) - do i = 1, n_occ_ab(2) - h2 = occ(i,2) - if(.not.is_integer_in_string(h2,orb_bitmask,N_int))cycle - h2 = list_orb_reverse(h2) - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase + + if (exc(0,1,1) == 1) then + ! Mono alpha + h1 = exc(1,1,1) + if(.not.is_integer_in_string_local(h1,orb_bitmask,N_int))return + p1 = exc(1,2,1) + if(.not.is_integer_in_string_local(p1,orb_bitmask,N_int))return + + h1 = list_orb_reverse(h1) + p1 = list_orb_reverse(p1) + + phase = 0.5d0 * phase + do i = 1, n_occ_ab(2) + h2 = occ(i,2) + if(.not.is_integer_in_string_local(h2,orb_bitmask,N_int))cycle + h2 = list_orb_reverse(h2) + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = p1 + keys(4,nkeys) = h2 + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = h2 + keys(4,nkeys) = p1 enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = p1 - keys(4,nkeys) = h2 - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase + else + ! Mono beta + h1 = exc(1,1,2) + if(.not.is_integer_in_string_local(h1,orb_bitmask,N_int))return + p1 = exc(1,2,2) + if(.not.is_integer_in_string_local(p1,orb_bitmask,N_int))return + + h1 = list_orb_reverse(h1) + p1 = list_orb_reverse(p1) + phase = 0.5d0 * phase + do i = 1, n_occ_ab(1) + h2 = occ(i,1) + if(.not.is_integer_in_string_local(h2,orb_bitmask,N_int))cycle + h2 = list_orb_reverse(h2) + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = p1 + keys(4,nkeys) = h2 + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = h2 + keys(4,nkeys) = p1 enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = h2 - keys(4,nkeys) = p1 - enddo - else - ! Mono beta - h1 = exc(1,1,2) - if(.not.is_integer_in_string(h1,orb_bitmask,N_int))return - h1 = list_orb_reverse(h1) - p1 = exc(1,2,2) - if(.not.is_integer_in_string(p1,orb_bitmask,N_int))return - p1 = list_orb_reverse(p1) - do i = 1, n_occ_ab(1) - h2 = occ(i,1) - if(.not.is_integer_in_string(h2,orb_bitmask,N_int))cycle - h2 = list_orb_reverse(h2) - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = p1 - keys(4,nkeys) = h2 - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = h2 - keys(4,nkeys) = p1 - enddo - endif + endif + else if(spin_trace)then - if (exc(0,1,1) == 1) then - ! Mono alpha - h1 = exc(1,1,1) - if(.not.is_integer_in_string(h1,orb_bitmask,N_int))return - h1 = list_orb_reverse(h1) - p1 = exc(1,2,1) - if(.not.is_integer_in_string(p1,orb_bitmask,N_int))return - p1 = list_orb_reverse(p1) - do i = 1, n_occ_ab(2) - h2 = occ(i,2) - if(.not.is_integer_in_string(h2,orb_bitmask,N_int))cycle - h2 = list_orb_reverse(h2) - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = p1 - keys(4,nkeys) = h2 - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = h2 - keys(4,nkeys) = p1 - enddo - else - ! Mono beta - h1 = exc(1,1,2) - if(.not.is_integer_in_string(h1,orb_bitmask,N_int))return - h1 = list_orb_reverse(h1) - p1 = exc(1,2,2) - if(.not.is_integer_in_string(p1,orb_bitmask,N_int))return - p1 = list_orb_reverse(p1) - do i = 1, n_occ_ab(1) - h2 = occ(i,1) - if(.not.is_integer_in_string(h2,orb_bitmask,N_int))cycle - h2 = list_orb_reverse(h2) - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = p1 - keys(4,nkeys) = h2 - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = h2 - keys(4,nkeys) = p1 - enddo - endif - endif - end - subroutine orb_range_off_diag_single_to_all_states_aa_dm_buffer(det_1,det_2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) + if (exc(0,1,1) == 1) then + ! Mono alpha + h1 = exc(1,1,1) + if(.not.is_integer_in_string_local(h1,orb_bitmask,N_int))return + p1 = exc(1,2,1) + if(.not.is_integer_in_string_local(p1,orb_bitmask,N_int))return + + h1 = list_orb_reverse(h1) + p1 = list_orb_reverse(p1) + phase = 0.5d0 * phase + do i = 1, n_occ_ab(2) + h2 = occ(i,2) + if(.not.is_integer_in_string_local(h2,orb_bitmask,N_int))cycle + h2 = list_orb_reverse(h2) + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = p1 + keys(4,nkeys) = h2 + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = h2 + keys(4,nkeys) = p1 + enddo + + else + + ! Mono beta + h1 = exc(1,1,2) + if(.not.is_integer_in_string_local(h1,orb_bitmask,N_int))return + p1 = exc(1,2,2) + if(.not.is_integer_in_string_local(p1,orb_bitmask,N_int))return + + h1 = list_orb_reverse(h1) + p1 = list_orb_reverse(p1) + + phase = 0.5d0 * phase + do i = 1, n_occ_ab(1) + h2 = occ(i,1) + if(.not.is_integer_in_string_local(h2,orb_bitmask,N_int))cycle + h2 = list_orb_reverse(h2) + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = p1 + keys(4,nkeys) = h2 + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = h2 + keys(4,nkeys) = p1 + enddo + endif + + endif +end + +subroutine orb_range_off_diag_single_to_all_states_aa_dm_buffer(det_1,det_2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) BEGIN_DOC - ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for - ! - ! a given couple of determinant det_1, det_2 being a ALPHA SINGLE excitation with respect to one another - ! - ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1 - ! - ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation - ! - ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals - ! - ! ispin determines which spin-spin component of the two-rdm you will update - ! - ! ispin == 1 :: alpha/ alpha - ! ispin == 2 :: beta / beta - ! ispin == 3 :: alpha/ beta - ! ispin == 4 :: spin traced <=> total two-rdm - ! - ! here, only ispin == 1 or 4 will do something + ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for + ! + ! a given couple of determinant det_1, det_2 being a ALPHA SINGLE excitation with respect to one another + ! + ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1 + ! + ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation + ! + ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals + ! + ! ispin determines which spin-spin component of the two-rdm you will update + ! + ! ispin == 1 :: alpha/ alpha + ! ispin == 2 :: beta / beta + ! ispin == 3 :: alpha/ beta + ! ispin == 4 :: spin traced <=> total two-rdm + ! + ! here, only ispin == 1 or 4 will do something END_DOC use bitmasks implicit none - integer, intent(in) :: ispin,sze_buff,N_st - integer(bit_kind), intent(in) :: det_1(N_int,2),det_2(N_int,2) - integer, intent(in) :: list_orb_reverse(mo_num) - integer(bit_kind), intent(in) :: orb_bitmask(N_int) - double precision, intent(in) :: c_1(N_st) - double precision, intent(out) :: values(N_st,sze_buff) - integer , intent(out) :: keys(4,sze_buff) - integer , intent(inout):: nkeys - + integer, intent(in) :: ispin,sze_buff,N_st + integer(bit_kind), intent(in) :: det_1(N_int,2),det_2(N_int,2) + integer, intent(in) :: list_orb_reverse(mo_num) + integer(bit_kind), intent(in) :: orb_bitmask(N_int) + double precision, intent(in) :: c_1(N_st) + double precision, intent(out) :: values(N_st,sze_buff) + integer , intent(out) :: keys(4,sze_buff) + integer , intent(inout) :: nkeys + integer :: occ(N_int*bit_kind_size,2) integer :: n_occ_ab(2) - integer :: i,j,h1,h2,p1,istate + integer :: i,j,h1,h2,p1,istate integer :: exc(0:2,2,2) double precision :: phase - + logical :: alpha_alpha,beta_beta,alpha_beta,spin_trace - logical :: is_integer_in_string - alpha_alpha = .False. - beta_beta = .False. - alpha_beta = .False. + logical :: is_integer_in_string_local + alpha_alpha = .False. +! beta_beta = .False. +! alpha_beta = .False. spin_trace = .False. if( ispin == 1)then - alpha_alpha = .True. - else if(ispin == 2)then - beta_beta = .True. - else if(ispin == 3)then - alpha_beta = .True. + alpha_alpha = .True. else if(ispin == 4)then - spin_trace = .True. - endif - - call bitstring_to_list_ab(det_1, occ, n_occ_ab, N_int) - call get_single_excitation(det_1,det_2,exc,phase,N_int) - if(alpha_alpha.or.spin_trace)then - if (exc(0,1,1) == 1) then - ! Mono alpha - h1 = exc(1,1,1) - if(.not.is_integer_in_string(h1,orb_bitmask,N_int))return - h1 = list_orb_reverse(h1) - p1 = exc(1,2,1) - if(.not.is_integer_in_string(p1,orb_bitmask,N_int))return - p1 = list_orb_reverse(p1) - do i = 1, n_occ_ab(1) - h2 = occ(i,1) - if(.not.is_integer_in_string(h2,orb_bitmask,N_int))cycle - h2 = list_orb_reverse(h2) - - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = p1 - keys(4,nkeys) = h2 - - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = h2 - keys(4,nkeys) = p1 - - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = h2 - keys(4,nkeys) = p1 - - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = p1 - keys(4,nkeys) = h2 - enddo - else + spin_trace = .True. + else return - endif endif - end - subroutine orb_range_off_diag_single_to_all_states_bb_dm_buffer(det_1,det_2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) +! if(alpha_alpha.or.spin_trace)then + call get_single_excitation(det_1,det_2,exc,phase,N_int) + if (exc(0,1,1) == 1) then + ! Mono alpha + h1 = exc(1,1,1) + if(.not.is_integer_in_string_local(h1,orb_bitmask,N_int))return + p1 = exc(1,2,1) + if(.not.is_integer_in_string_local(p1,orb_bitmask,N_int))return + + call bitstring_to_list_ab(det_1, occ, n_occ_ab, N_int) + + h1 = list_orb_reverse(h1) + p1 = list_orb_reverse(p1) + + phase = 0.5d0*phase + do i = 1, n_occ_ab(1) + h2 = occ(i,1) + if(.not.is_integer_in_string_local(h2,orb_bitmask,N_int))cycle + h2 = list_orb_reverse(h2) + + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = p1 + keys(4,nkeys) = h2 + + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = - c_1(istate) * phase + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = h2 + keys(4,nkeys) = p1 + + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = h2 + keys(4,nkeys) = p1 + + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = - c_1(istate) * phase + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = p1 + keys(4,nkeys) = h2 + enddo + else + return + endif +! endif +end + +subroutine orb_range_off_diag_single_to_all_states_bb_dm_buffer(det_1,det_2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) use bitmasks BEGIN_DOC - ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for - ! - ! a given couple of determinant det_1, det_2 being a BETA SINGLE excitation with respect to one another - ! - ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1 - ! - ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation - ! - ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals - ! - ! ispin determines which spin-spin component of the two-rdm you will update - ! - ! ispin == 1 :: alpha/ alpha - ! ispin == 2 :: beta / beta - ! ispin == 3 :: alpha/ beta - ! ispin == 4 :: spin traced <=> total two-rdm - ! - ! here, only ispin == 2 or 4 will do something + ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for + ! + ! a given couple of determinant det_1, det_2 being a BETA SINGLE excitation with respect to one another + ! + ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1 + ! + ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation + ! + ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals + ! + ! ispin determines which spin-spin component of the two-rdm you will update + ! + ! ispin == 1 :: alpha/ alpha + ! ispin == 2 :: beta / beta + ! ispin == 3 :: alpha/ beta + ! ispin == 4 :: spin traced <=> total two-rdm + ! + ! here, only ispin == 2 or 4 will do something END_DOC implicit none - integer, intent(in) :: ispin,sze_buff,N_st - integer(bit_kind), intent(in) :: det_1(N_int,2),det_2(N_int,2) - integer, intent(in) :: list_orb_reverse(mo_num) - integer(bit_kind), intent(in) :: orb_bitmask(N_int) - double precision, intent(in) :: c_1(N_st) - double precision, intent(out) :: values(N_st,sze_buff) - integer , intent(out) :: keys(4,sze_buff) - integer , intent(inout):: nkeys - + integer, intent(in) :: ispin,sze_buff,N_st + integer(bit_kind), intent(in) :: det_1(N_int,2),det_2(N_int,2) + integer, intent(in) :: list_orb_reverse(mo_num) + integer(bit_kind), intent(in) :: orb_bitmask(N_int) + double precision, intent(in) :: c_1(N_st) + double precision, intent(out) :: values(N_st,sze_buff) + integer , intent(out) :: keys(4,sze_buff) + integer , intent(inout) :: nkeys + integer :: occ(N_int*bit_kind_size,2) integer :: n_occ_ab(2) - integer :: i,j,h1,h2,p1,istate + integer :: i,j,h1,h2,p1,istate integer :: exc(0:2,2,2) double precision :: phase logical :: alpha_alpha,beta_beta,alpha_beta,spin_trace - logical :: is_integer_in_string - alpha_alpha = .False. + logical :: is_integer_in_string_local +! alpha_alpha = .False. beta_beta = .False. - alpha_beta = .False. +! alpha_beta = .False. spin_trace = .False. - if( ispin == 1)then - alpha_alpha = .True. - else if(ispin == 2)then - beta_beta = .True. - else if(ispin == 3)then - alpha_beta = .True. + if(ispin == 2)then + beta_beta = .True. else if(ispin == 4)then - spin_trace = .True. - endif - - - call bitstring_to_list_ab(det_1, occ, n_occ_ab, N_int) - call get_single_excitation(det_1,det_2,exc,phase,N_int) - if(beta_beta.or.spin_trace)then - if (exc(0,1,1) == 1) then + spin_trace = .True. + else return - else - ! Mono beta - h1 = exc(1,1,2) - if(.not.is_integer_in_string(h1,orb_bitmask,N_int))return - h1 = list_orb_reverse(h1) - p1 = exc(1,2,2) - if(.not.is_integer_in_string(p1,orb_bitmask,N_int))return - p1 = list_orb_reverse(p1) - do i = 1, n_occ_ab(2) - h2 = occ(i,2) - if(.not.is_integer_in_string(h2,orb_bitmask,N_int))cycle - h2 = list_orb_reverse(h2) - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = p1 - keys(4,nkeys) = h2 - - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = h2 - keys(4,nkeys) = p1 - - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = h2 - keys(4,nkeys) = p1 - - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = p1 - keys(4,nkeys) = h2 - enddo - endif endif - end - subroutine orb_range_off_diag_double_to_all_states_aa_dm_buffer(det_1,det_2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) +! if(beta_beta.or.spin_trace)then + call get_single_excitation(det_1,det_2,exc,phase,N_int) + if (exc(0,1,1) == 1) then + return + else + ! Mono beta + h1 = exc(1,1,2) + if(.not.is_integer_in_string_local(h1,orb_bitmask,N_int))return + p1 = exc(1,2,2) + if(.not.is_integer_in_string_local(p1,orb_bitmask,N_int))return + + call bitstring_to_list_ab(det_1, occ, n_occ_ab, N_int) + h1 = list_orb_reverse(h1) + p1 = list_orb_reverse(p1) + + phase = 0.5d0*phase + do i = 1, n_occ_ab(2) + h2 = occ(i,2) + if(.not.is_integer_in_string_local(h2,orb_bitmask,N_int))cycle + h2 = list_orb_reverse(h2) + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = p1 + keys(4,nkeys) = h2 + + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = - c_1(istate) * phase + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = h2 + keys(4,nkeys) = p1 + + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = h2 + keys(4,nkeys) = p1 + + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = - c_1(istate) * phase + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = p1 + keys(4,nkeys) = h2 + enddo + endif +! endif +end + + +subroutine orb_range_off_diag_double_to_all_states_aa_dm_buffer(det_1,det_2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) use bitmasks BEGIN_DOC - ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for - ! - ! a given couple of determinant det_1, det_2 being a ALPHA/ALPHA DOUBLE excitation with respect to one another - ! - ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1 - ! - ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation - ! - ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals - ! - ! ispin determines which spin-spin component of the two-rdm you will update - ! - ! ispin == 1 :: alpha/ alpha - ! ispin == 2 :: beta / beta - ! ispin == 3 :: alpha/ beta - ! ispin == 4 :: spin traced <=> total two-rdm - ! - ! here, only ispin == 1 or 4 will do something + ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for + ! + ! a given couple of determinant det_1, det_2 being a ALPHA/ALPHA DOUBLE excitation with respect to one another + ! + ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1 + ! + ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation + ! + ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals + ! + ! ispin determines which spin-spin component of the two-rdm you will update + ! + ! ispin == 1 :: alpha/ alpha + ! ispin == 2 :: beta / beta + ! ispin == 3 :: alpha/ beta + ! ispin == 4 :: spin traced <=> total two-rdm + ! + ! here, only ispin == 1 or 4 will do something END_DOC implicit none - integer, intent(in) :: ispin,sze_buff,N_st - integer(bit_kind), intent(in) :: det_1(N_int),det_2(N_int) - integer, intent(in) :: list_orb_reverse(mo_num) - double precision, intent(in) :: c_1(N_st) - double precision, intent(out) :: values(N_st,sze_buff) - integer , intent(out) :: keys(4,sze_buff) - integer , intent(inout):: nkeys - - - integer :: i,j,h1,h2,p1,p2,istate + integer, intent(in) :: ispin,sze_buff,N_st + integer(bit_kind), intent(in) :: det_1(N_int),det_2(N_int) + integer, intent(in) :: list_orb_reverse(mo_num) + double precision, intent(in) :: c_1(N_st) + double precision, intent(out) :: values(N_st,sze_buff) + integer , intent(out) :: keys(4,sze_buff) + integer , intent(inout) :: nkeys + + + integer :: i,j,h1,h2,p1,p2,istate integer :: exc(0:2,2) double precision :: phase - + logical :: alpha_alpha,beta_beta,alpha_beta,spin_trace - logical :: is_integer_in_string - alpha_alpha = .False. - beta_beta = .False. - alpha_beta = .False. + logical :: is_integer_in_string_local + alpha_alpha = .False. +! beta_beta = .False. +! alpha_beta = .False. spin_trace = .False. if( ispin == 1)then - alpha_alpha = .True. - else if(ispin == 2)then - beta_beta = .True. - else if(ispin == 3)then - alpha_beta = .True. + alpha_alpha = .True. else if(ispin == 4)then - spin_trace = .True. + spin_trace = .True. + else + return endif + call get_double_excitation_spin(det_1,det_2,exc,phase,N_int) + h1 =exc(1,1) + if(list_orb_reverse(h1).lt.0)return + h2 =exc(2,1) + if(list_orb_reverse(h2).lt.0)return + p1 =exc(1,2) + if(list_orb_reverse(p1).lt.0)return + p2 =exc(2,2) + if(list_orb_reverse(p2).lt.0)return + + h1 = list_orb_reverse(h1) + h2 = list_orb_reverse(h2) + p1 = list_orb_reverse(p1) + p2 = list_orb_reverse(p2) + + phase = 0.5d0*phase +! if(alpha_alpha.or.spin_trace)then + nkeys += 1 + + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = p1 + keys(4,nkeys) = p2 + + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = - c_1(istate) * phase + enddo + keys(1,nkeys) = h1 + keys(2,nkeys) = h2 + keys(3,nkeys) = p2 + keys(4,nkeys) = p1 + + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = c_1(istate) * phase + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = p2 + keys(4,nkeys) = p1 + + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = - c_1(istate) * phase + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = p1 + keys(4,nkeys) = p2 +! endif +end + +subroutine orb_range_off_diag_double_to_all_states_bb_dm_buffer(det_1,det_2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) + use bitmasks + BEGIN_DOC + ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for + ! + ! a given couple of determinant det_1, det_2 being a BETA /BETA DOUBLE excitation with respect to one another + ! + ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1 + ! + ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation + ! + ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals + ! + ! ispin determines which spin-spin component of the two-rdm you will update + ! + ! ispin == 1 :: alpha/ alpha + ! ispin == 2 :: beta / beta + ! ispin == 3 :: alpha/ beta + ! ispin == 4 :: spin traced <=> total two-rdm + ! + ! here, only ispin == 2 or 4 will do something + END_DOC + implicit none + + integer, intent(in) :: ispin,sze_buff,N_st + integer(bit_kind), intent(in) :: det_1(N_int),det_2(N_int) + integer, intent(in) :: list_orb_reverse(mo_num) + double precision, intent(in) :: c_1(N_st) + double precision, intent(out) :: values(N_st,sze_buff) + integer , intent(out) :: keys(4,sze_buff) + integer , intent(inout) :: nkeys + + integer :: i,j,h1,h2,p1,p2,istate + integer :: exc(0:2,2) + double precision :: phase + logical :: alpha_alpha,beta_beta,alpha_beta,spin_trace + logical :: is_integer_in_string_local +! alpha_alpha = .False. + beta_beta = .False. +! alpha_beta = .False. + spin_trace = .False. + if(ispin == 2)then + beta_beta = .True. + else if(ispin == 4)then + spin_trace = .True. + else + return + endif + call get_double_excitation_spin(det_1,det_2,exc,phase,N_int) h1 =exc(1,1) if(list_orb_reverse(h1).lt.0)return @@ -770,11 +905,12 @@ p2 =exc(2,2) if(list_orb_reverse(p2).lt.0)return p2 = list_orb_reverse(p2) - if(alpha_alpha.or.spin_trace)then + +! if(beta_beta.or.spin_trace)then + phase = 0.5d0*phase nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase + values(istate,nkeys) = c_1(istate) * phase enddo keys(1,nkeys) = h1 keys(2,nkeys) = h2 @@ -783,16 +919,16 @@ nkeys += 1 do istate = 1, N_st - values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase + values(istate,nkeys) = - c_1(istate) * phase enddo keys(1,nkeys) = h1 keys(2,nkeys) = h2 keys(3,nkeys) = p2 keys(4,nkeys) = p1 - + nkeys += 1 do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase + values(istate,nkeys) = c_1(istate) * phase enddo keys(1,nkeys) = h2 keys(2,nkeys) = h1 @@ -801,115 +937,12 @@ nkeys += 1 do istate = 1, N_st - values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase + values(istate,nkeys) = - c_1(istate) * phase enddo keys(1,nkeys) = h2 keys(2,nkeys) = h1 keys(3,nkeys) = p1 keys(4,nkeys) = p2 - endif - end - - subroutine orb_range_off_diag_double_to_all_states_bb_dm_buffer(det_1,det_2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) - use bitmasks - BEGIN_DOC - ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for - ! - ! a given couple of determinant det_1, det_2 being a BETA /BETA DOUBLE excitation with respect to one another - ! - ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1 - ! - ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation - ! - ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals - ! - ! ispin determines which spin-spin component of the two-rdm you will update - ! - ! ispin == 1 :: alpha/ alpha - ! ispin == 2 :: beta / beta - ! ispin == 3 :: alpha/ beta - ! ispin == 4 :: spin traced <=> total two-rdm - ! - ! here, only ispin == 2 or 4 will do something - END_DOC - implicit none - - integer, intent(in) :: ispin,sze_buff,N_st - integer(bit_kind), intent(in) :: det_1(N_int),det_2(N_int) - integer, intent(in) :: list_orb_reverse(mo_num) - double precision, intent(in) :: c_1(N_st) - double precision, intent(out) :: values(N_st,sze_buff) - integer , intent(out) :: keys(4,sze_buff) - integer , intent(inout):: nkeys - - integer :: i,j,h1,h2,p1,p2,istate - integer :: exc(0:2,2) - double precision :: phase - logical :: alpha_alpha,beta_beta,alpha_beta,spin_trace - logical :: is_integer_in_string - alpha_alpha = .False. - beta_beta = .False. - alpha_beta = .False. - spin_trace = .False. - if( ispin == 1)then - alpha_alpha = .True. - else if(ispin == 2)then - beta_beta = .True. - else if(ispin == 3)then - alpha_beta = .True. - else if(ispin == 4)then - spin_trace = .True. - endif - - call get_double_excitation_spin(det_1,det_2,exc,phase,N_int) - h1 =exc(1,1) - if(list_orb_reverse(h1).lt.0)return - h1 = list_orb_reverse(h1) - h2 =exc(2,1) - if(list_orb_reverse(h2).lt.0)return - h2 = list_orb_reverse(h2) - p1 =exc(1,2) - if(list_orb_reverse(p1).lt.0)return - p1 = list_orb_reverse(p1) - p2 =exc(2,2) - if(list_orb_reverse(p2).lt.0)return - p2 = list_orb_reverse(p2) - if(beta_beta.or.spin_trace)then - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = p1 - keys(4,nkeys) = p2 - - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h1 - keys(2,nkeys) = h2 - keys(3,nkeys) = p2 - keys(4,nkeys) = p1 - - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = p2 - keys(4,nkeys) = p1 - - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = p1 - keys(4,nkeys) = p2 - endif - end +! endif +end From 2655f996228808592ab2e146f98eb52aa65292c5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 26 Feb 2025 15:14:51 +0100 Subject: [PATCH 41/59] Accelerated 2rdm --- src/two_rdm_routines/davidson_like_2rdm.irp.f | 156 ++++++++++++------ 1 file changed, 103 insertions(+), 53 deletions(-) diff --git a/src/two_rdm_routines/davidson_like_2rdm.irp.f b/src/two_rdm_routines/davidson_like_2rdm.irp.f index 0e6899f8..342b4318 100644 --- a/src/two_rdm_routines/davidson_like_2rdm.irp.f +++ b/src/two_rdm_routines/davidson_like_2rdm.irp.f @@ -13,7 +13,7 @@ subroutine orb_range_2_rdm_openmp(big_array,dim1,norb,list_orb,ispin,u_0,N_st,sz END_DOC integer, intent(in) :: N_st,sze integer, intent(in) :: dim1,norb,list_orb(norb),ispin - double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1) + double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1,N_st) double precision, intent(in) :: u_0(sze,N_st) integer :: k @@ -50,7 +50,7 @@ subroutine orb_range_2_rdm_openmp_work(big_array,dim1,norb,list_orb,ispin,u_t,N_ END_DOC integer, intent(in) :: N_st,sze,istart,iend,ishift,istep integer, intent(in) :: dim1,norb,list_orb(norb),ispin - double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1) + double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1,N_st) double precision, intent(in) :: u_t(N_st,N_det) integer :: k @@ -91,7 +91,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin integer, intent(in) :: N_st,sze,istart,iend,ishift,istep double precision, intent(in) :: u_t(N_st,N_det) integer, intent(in) :: dim1,norb,list_orb(norb),ispin - double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1) + double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1,N_st) integer(omp_lock_kind) :: lock_2rdm integer :: i,j,k,l @@ -155,6 +155,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin ! Prepare the array of all alpha single excitations ! ------------------------------------------------- + double precision, allocatable :: big_array_local(:,:,:,:,:) + PROVIDE N_int nthreads_davidson elec_alpha_num !$OMP PARALLEL DEFAULT(NONE) NUM_THREADS(nthreads_davidson) & !$OMP SHARED(psi_bilinear_matrix_rows, N_det,lock_2rdm,& @@ -174,7 +176,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin !$OMP buffer, doubles, n_doubles, & !$OMP tmp_det2, idx, l, kcol_prev, & !$OMP singles_a, n_singles_a, singles_b, & - !$OMP n_singles_b, nkeys, keys, values) + !$OMP n_singles_b, nkeys, keys, values, big_array_local) ! Alpha/Beta double excitations ! ============================= @@ -185,6 +187,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin singles_b(maxab), & doubles(maxab), & idx(maxab)) + allocate( big_array_local(N_states,dim1, dim1, dim1, dim1) ) + big_array_local(:,:,:,:,:) = 0.d0 kcol_prev=-1 @@ -256,30 +260,32 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin do l= 1, N_states c_1(l) = u_t(l,l_a) * u_t(l,k_a) enddo - if(alpha_beta)then - ! only ONE contribution - if (nkeys+1 .ge. sze_buff) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) - nkeys = 0 - endif - else if (spin_trace)then - ! TWO contributions +! if(alpha_beta)then +! ! only ONE contribution +! if (nkeys+1 .ge. sze_buff) then +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! nkeys = 0 +! endif +! else if (spin_trace)then +! ! TWO contributions if (nkeys+2 .ge. sze_buff) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 endif - endif +! endif call orb_range_off_diag_double_to_all_states_ab_dm_buffer(tmp_det,tmp_det2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) enddo endif enddo -! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) -! nkeys = 0 enddo - !$OMP END DO + !$OMP END DO NOWAIT +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) + nkeys = 0 !$OMP DO SCHEDULE(dynamic) do k_a=istart+ishift,iend,istep @@ -334,33 +340,36 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin ! ---------------------------------- tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) - do i=1,n_singles_a - l_a = singles_a(i) - ASSERT (l_a <= N_det) + if(alpha_beta.or.spin_trace.or.alpha_alpha)then + do i=1,n_singles_a + l_a = singles_a(i) + ASSERT (l_a <= N_det) - lrow = psi_bilinear_matrix_rows(l_a) - ASSERT (lrow <= N_det_alpha_unique) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) + do l= 1, N_states + c_1(l) = u_t(l,l_a) * u_t(l,k_a) + enddo - tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) - do l= 1, N_states - c_1(l) = u_t(l,l_a) * u_t(l,k_a) - enddo - if(alpha_beta.or.spin_trace.or.alpha_alpha)then ! increment the alpha/beta part for single excitations if (nkeys+ 2 * elec_alpha_num .ge. sze_buff) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 endif call orb_range_off_diag_single_to_all_states_ab_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) ! increment the alpha/alpha part for single excitations if (nkeys+4 * elec_alpha_num .ge. sze_buff ) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 endif call orb_range_off_diag_single_to_all_states_aa_dm_buffer(tmp_det,tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) - endif - enddo + enddo + endif ! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) ! nkeys = 0 @@ -380,7 +389,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin c_1(l) = u_t(l,l_a) * u_t(l,k_a) enddo if (nkeys+4 .ge. sze_buff) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 endif call orb_range_off_diag_double_to_all_states_aa_dm_buffer(tmp_det(1,1),psi_det_alpha_unique(1, lrow),c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) @@ -435,33 +445,37 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin ! ---------------------------------- tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) - do i=1,n_singles_b - l_b = singles_b(i) - ASSERT (l_b <= N_det) + if(alpha_beta.or.spin_trace.or.beta_beta)then + do i=1,n_singles_b + l_b = singles_b(i) + ASSERT (l_b <= N_det) - lcol = psi_bilinear_matrix_transp_columns(l_b) - ASSERT (lcol <= N_det_beta_unique) + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol) + l_a = psi_bilinear_matrix_transp_order(l_b) + do l= 1, N_states + c_1(l) = u_t(l,l_a) * u_t(l,k_a) + enddo - tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol) - l_a = psi_bilinear_matrix_transp_order(l_b) - do l= 1, N_states - c_1(l) = u_t(l,l_a) * u_t(l,k_a) - enddo - if(alpha_beta.or.spin_trace.or.beta_beta)then ! increment the alpha/beta part for single excitations if (nkeys+2 * elec_alpha_num .ge. sze_buff ) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 endif call orb_range_off_diag_single_to_all_states_ab_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) ! increment the beta /beta part for single excitations if (nkeys+4 * elec_alpha_num .ge. sze_buff) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 endif call orb_range_off_diag_single_to_all_states_bb_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) - endif - enddo + enddo + endif + ! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) ! nkeys = 0 @@ -481,7 +495,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin c_1(l) = u_t(l,l_a) * u_t(l,k_a) enddo if (nkeys+4 .ge. sze_buff) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 endif call orb_range_off_diag_double_to_all_states_bb_dm_buffer(tmp_det(1,2),psi_det_beta_unique(1, lcol),c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) @@ -517,16 +532,28 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin c_1(l) = u_t(l,k_a) * u_t(l,k_a) enddo - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) - nkeys = 0 + if (nkeys+elec_alpha_num*elec_alpha_num .ge. sze_buff) then +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) + nkeys = 0 + endif + call orb_range_diag_to_all_states_2_rdm_dm_buffer(tmp_det,c_1,N_states,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 end do - !$OMP END DO + !$OMP END DO NOWAIT deallocate(buffer, singles_a, singles_b, doubles, idx, keys, values) - !$OMP END PARALLEL + !$OMP CRITICAL + do i=1,N_states + big_array(:,:,:,:,i) = big_array(:,:,:,:,i) + big_array_local(i,:,:,:,:) + enddo + !$OMP END CRITICAL + deallocate(big_array_local) + !$OMP END PARALLEL end @@ -593,3 +620,26 @@ subroutine update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,loc end +subroutine update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) + use omp_lib + implicit none + integer, intent(in) :: n_st,nkeys,dim1 + integer, intent(in) :: keys(4,nkeys) + double precision, intent(in) :: values(n_st,nkeys) + double precision, intent(inout) :: big_array_local(n_st,dim1,dim1,dim1,dim1) + + integer :: istate + integer :: i,h1,h2,p1,p2 + + do i = 1, nkeys + do istate = 1, N_st + h1 = keys(1,i) + h2 = keys(2,i) + p1 = keys(3,i) + p2 = keys(4,i) + big_array_local(istate,h1,h2,p1,p2) = big_array_local(istate,h1,h2,p1,p2) + values(istate,i) + enddo + enddo + +end + From 1d5f2f37351775a51ac61404985ef07a9901b188 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 26 Feb 2025 15:26:22 +0100 Subject: [PATCH 42/59] Optim in 2RDM --- src/two_rdm_routines/davidson_like_2rdm.irp.f | 8 +-- src/two_rdm_routines/update_rdm.irp.f | 72 +++++++++---------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/two_rdm_routines/davidson_like_2rdm.irp.f b/src/two_rdm_routines/davidson_like_2rdm.irp.f index 342b4318..d92f1924 100644 --- a/src/two_rdm_routines/davidson_like_2rdm.irp.f +++ b/src/two_rdm_routines/davidson_like_2rdm.irp.f @@ -632,11 +632,11 @@ subroutine update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_arr integer :: i,h1,h2,p1,p2 do i = 1, nkeys + h1 = keys(1,i) + h2 = keys(2,i) + p1 = keys(3,i) + p2 = keys(4,i) do istate = 1, N_st - h1 = keys(1,i) - h2 = keys(2,i) - p1 = keys(3,i) - p2 = keys(4,i) big_array_local(istate,h1,h2,p1,p2) = big_array_local(istate,h1,h2,p1,p2) + values(istate,i) enddo enddo diff --git a/src/two_rdm_routines/update_rdm.irp.f b/src/two_rdm_routines/update_rdm.irp.f index 978f6e33..f01706f0 100644 --- a/src/two_rdm_routines/update_rdm.irp.f +++ b/src/two_rdm_routines/update_rdm.irp.f @@ -592,6 +592,15 @@ subroutine orb_range_off_diag_single_to_all_states_aa_dm_buffer(det_1,det_2,c_1, keys(3,nkeys) = p1 keys(4,nkeys) = h2 + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = - c_1(istate) * phase + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = p1 + keys(4,nkeys) = h2 + nkeys += 1 do istate = 1, N_st values(istate,nkeys) = - c_1(istate) * phase @@ -609,15 +618,6 @@ subroutine orb_range_off_diag_single_to_all_states_aa_dm_buffer(det_1,det_2,c_1, keys(2,nkeys) = h1 keys(3,nkeys) = h2 keys(4,nkeys) = p1 - - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = - c_1(istate) * phase - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = p1 - keys(4,nkeys) = h2 enddo else return @@ -706,6 +706,15 @@ subroutine orb_range_off_diag_single_to_all_states_bb_dm_buffer(det_1,det_2,c_1, keys(3,nkeys) = p1 keys(4,nkeys) = h2 + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = - c_1(istate) * phase + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = p1 + keys(4,nkeys) = h2 + nkeys += 1 do istate = 1, N_st values(istate,nkeys) = - c_1(istate) * phase @@ -723,15 +732,6 @@ subroutine orb_range_off_diag_single_to_all_states_bb_dm_buffer(det_1,det_2,c_1, keys(2,nkeys) = h1 keys(3,nkeys) = h2 keys(4,nkeys) = p1 - - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = - c_1(istate) * phase - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = p1 - keys(4,nkeys) = h2 enddo endif ! endif @@ -814,6 +814,15 @@ subroutine orb_range_off_diag_double_to_all_states_aa_dm_buffer(det_1,det_2,c_1, keys(3,nkeys) = p1 keys(4,nkeys) = p2 + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = - c_1(istate) * phase + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = p1 + keys(4,nkeys) = p2 + nkeys += 1 do istate = 1, N_st values(istate,nkeys) = - c_1(istate) * phase @@ -831,15 +840,6 @@ subroutine orb_range_off_diag_double_to_all_states_aa_dm_buffer(det_1,det_2,c_1, keys(2,nkeys) = h1 keys(3,nkeys) = p2 keys(4,nkeys) = p1 - - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = - c_1(istate) * phase - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = p1 - keys(4,nkeys) = p2 ! endif end @@ -917,6 +917,15 @@ subroutine orb_range_off_diag_double_to_all_states_bb_dm_buffer(det_1,det_2,c_1, keys(3,nkeys) = p1 keys(4,nkeys) = p2 + nkeys += 1 + do istate = 1, N_st + values(istate,nkeys) = - c_1(istate) * phase + enddo + keys(1,nkeys) = h2 + keys(2,nkeys) = h1 + keys(3,nkeys) = p1 + keys(4,nkeys) = p2 + nkeys += 1 do istate = 1, N_st values(istate,nkeys) = - c_1(istate) * phase @@ -934,15 +943,6 @@ subroutine orb_range_off_diag_double_to_all_states_bb_dm_buffer(det_1,det_2,c_1, keys(2,nkeys) = h1 keys(3,nkeys) = p2 keys(4,nkeys) = p1 - - nkeys += 1 - do istate = 1, N_st - values(istate,nkeys) = - c_1(istate) * phase - enddo - keys(1,nkeys) = h2 - keys(2,nkeys) = h1 - keys(3,nkeys) = p1 - keys(4,nkeys) = p2 ! endif end From db48267259b6d8c627bb6987925714945c590895 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 27 Feb 2025 13:07:59 +0100 Subject: [PATCH 43/59] 1/sqrt(2) in basis correction proj --- external/ezfio | 2 +- src/mu_of_r/mu_of_r_conditions.irp.f | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/ezfio b/external/ezfio index d02132ea..dba01c4f 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091 +Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3 diff --git a/src/mu_of_r/mu_of_r_conditions.irp.f b/src/mu_of_r/mu_of_r_conditions.irp.f index 3ad52a05..ba3675d4 100644 --- a/src/mu_of_r/mu_of_r_conditions.irp.f +++ b/src/mu_of_r/mu_of_r_conditions.irp.f @@ -269,7 +269,7 @@ BEGIN_PROVIDER [double precision, mu_of_r_projector_mo, (n_points_final_grid) ] ! epsilon mu_of_r_projector_mo(ipoint) = 1.d0/(2.d0*dacos(-1.d0) * mu_of_r_projector_mo(ipoint)**(2.d0/3.d0)) ! mu - mu_of_r_projector_mo(ipoint) = 1.d0/dsqrt( mu_of_r_projector_mo(ipoint) ) + mu_of_r_projector_mo(ipoint) = 1.d0/dsqrt( 2.d0*mu_of_r_projector_mo(ipoint) ) enddo END_PROVIDER From de26e8494ec59afdbae0e8f3c70337d94f498e1d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 28 Feb 2025 11:39:43 +0100 Subject: [PATCH 44/59] Added do_mo_cholesky keyword --- src/mo_two_e_ints/EZFIO.cfg | 6 ++++++ src/mo_two_e_ints/cholesky.irp.f | 9 --------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/mo_two_e_ints/EZFIO.cfg b/src/mo_two_e_ints/EZFIO.cfg index f26bfb61..8844b8a6 100644 --- a/src/mo_two_e_ints/EZFIO.cfg +++ b/src/mo_two_e_ints/EZFIO.cfg @@ -1,3 +1,9 @@ +[do_mo_cholesky] +type: logical +doc: Use Cholesky decomposition of MO integrals in CI calculations +interface: ezfio,provider,ocaml +default: False + [io_mo_cholesky] type: Disk_access doc: Read/Write |MO| Cholesky integrals from/to disk [ Write | Read | None ] diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 062e52e2..f4e390af 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -1,12 +1,3 @@ -BEGIN_PROVIDER [ logical, do_mo_cholesky ] - implicit none - BEGIN_DOC - ! If True, use Cholesky vectors for MO integrals - END_DOC - do_mo_cholesky = do_ao_cholesky -! do_mo_cholesky = .False. -END_PROVIDER - BEGIN_PROVIDER [ integer, cholesky_mo_num ] &BEGIN_PROVIDER [ integer, cholesky_mo_num_split, (1:5)] implicit none From 61ddf14ab9665ac8af4f2bd0aac07b4d071a5c69 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 28 Feb 2025 18:04:16 +0100 Subject: [PATCH 45/59] Added average mu(r) with rho^2 --- .../basis_correction/print_routine.irp.f | 3 +- src/mu_of_r/mu_of_r_conditions.irp.f | 30 +++++++++++++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/plugins/local/basis_correction/print_routine.irp.f b/plugins/local/basis_correction/print_routine.irp.f index 8879fd5d..cc9744f2 100644 --- a/plugins/local/basis_correction/print_routine.irp.f +++ b/plugins/local/basis_correction/print_routine.irp.f @@ -79,7 +79,8 @@ subroutine print_basis_correction print*,'' print*,'**************' do istate = 1, N_states - write(*, '(A29,X,I3,X,A3,X,F16.10)') ' Average mu(r) , state ',istate,' = ',mu_average_prov(istate) + write(*, '(A29,X,I3,X,A3,X,F16.10)') ' Average mu(r) [rho ], state ',istate,' = ',mu_average_prov(istate) + write(*, '(A29,X,I3,X,A3,X,F16.10)') ' Average mu(r) [rho^2], state ',istate,' = ',mu_average_prov2(istate) enddo end diff --git a/src/mu_of_r/mu_of_r_conditions.irp.f b/src/mu_of_r/mu_of_r_conditions.irp.f index ba3675d4..d13605f6 100644 --- a/src/mu_of_r/mu_of_r_conditions.irp.f +++ b/src/mu_of_r/mu_of_r_conditions.irp.f @@ -211,7 +211,7 @@ END_PROVIDER - BEGIN_PROVIDER [double precision, mu_average_prov, (N_states)] +BEGIN_PROVIDER [double precision, mu_average_prov, (N_states)] implicit none BEGIN_DOC ! average value of mu(r) weighted with the total one-e density and divided by the number of electrons @@ -233,7 +233,33 @@ enddo mu_average_prov(istate) = mu_average_prov(istate) / elec_num_grid_becke(istate) enddo - END_PROVIDER +END_PROVIDER + +BEGIN_PROVIDER [double precision, mu_average_prov2, (N_states)] + implicit none + BEGIN_DOC + ! average value of mu(r) weighted with square of the total one-e density + ! + ! !!!!!! WARNING !!!!!! if no_core_density == .True. then all contributions from the core orbitals + ! + ! in the one- and two-body density matrix are excluded + END_DOC + integer :: ipoint,istate + double precision :: weight,density,norm + mu_average_prov2 = 0.d0 + do istate = 1, N_states + norm = 0.d0 + do ipoint = 1, n_points_final_grid + weight =final_weight_at_r_vector(ipoint) + density = one_e_dm_and_grad_alpha_in_r(4,ipoint,istate) & + + one_e_dm_and_grad_beta_in_r(4,ipoint,istate) + if(mu_of_r_prov(ipoint,istate).gt.1.d+09)cycle + mu_average_prov2(istate) += mu_of_r_prov(ipoint,istate) * weight * density*density + norm = norm + density*density*weight + enddo + mu_average_prov2(istate) = mu_average_prov2(istate) / norm + enddo +END_PROVIDER BEGIN_PROVIDER [double precision, mu_of_r_projector_mo, (n_points_final_grid) ] From 7d940958a3978b6dee271a3487e14d4a6a8d076f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Mar 2025 18:36:40 +0100 Subject: [PATCH 46/59] Update irpf90 --- external/ezfio | 2 +- external/irpf90 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/ezfio b/external/ezfio index dba01c4f..d02132ea 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3 +Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091 diff --git a/external/irpf90 b/external/irpf90 index 4ab1b175..bd35d581 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 4ab1b175fc7ed0d96c1912f13dc53579b24157a6 +Subproject commit bd35d581d917c452054c074b88667fe4b5e7ca27 From fa75be002a1fe17c9fd68041e2429084f609c94d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Mar 2025 18:45:46 +0100 Subject: [PATCH 47/59] Fix irpf90 --- external/irpf90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/irpf90 b/external/irpf90 index bd35d581..43160c60 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit bd35d581d917c452054c074b88667fe4b5e7ca27 +Subproject commit 43160c60d88d9f61fb97cc0b35477c8eb0df862b From 1775df957c53f5139076e79f963e2191ca8c4d64 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Mar 2025 15:31:48 +0100 Subject: [PATCH 48/59] Added geometry optimization script --- scripts/qp_cipsi_rsh | 1 + scripts/qp_cipsi_rsh_mu_of_r | 1 + scripts/qp_geom_opt.py | 136 +++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+) create mode 120000 scripts/qp_cipsi_rsh create mode 120000 scripts/qp_cipsi_rsh_mu_of_r create mode 100755 scripts/qp_geom_opt.py diff --git a/scripts/qp_cipsi_rsh b/scripts/qp_cipsi_rsh new file mode 120000 index 00000000..c3d4376b --- /dev/null +++ b/scripts/qp_cipsi_rsh @@ -0,0 +1 @@ +/home/scemama/qp2/plugins/qp_plugins_lct/stable/rsdft_cipsi/qp_cipsi_rsh \ No newline at end of file diff --git a/scripts/qp_cipsi_rsh_mu_of_r b/scripts/qp_cipsi_rsh_mu_of_r new file mode 120000 index 00000000..feef7119 --- /dev/null +++ b/scripts/qp_cipsi_rsh_mu_of_r @@ -0,0 +1 @@ +/home/scemama/qp2/plugins/qp_plugins_lct/stable/rsdft_cipsi/qp_cipsi_rsh_mu_of_r \ No newline at end of file diff --git a/scripts/qp_geom_opt.py b/scripts/qp_geom_opt.py new file mode 100755 index 00000000..240a411f --- /dev/null +++ b/scripts/qp_geom_opt.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Usage: + qp_geom_opt [-s state] [-r executable] [-f] [-t tolerance] + +Options: + -s --state= Excited state to optimize + -f --scf Perform an SCF after each geomety change + -r --qp_run=executable Excited state to optimize + -t --tol=tolerance Convergence criterion on the energy +""" + + +try: + from docopt import docopt + from module_handler import ModuleHandler, get_dict_child + from module_handler import get_l_module_descendant + from qp_path import QP_SRC, QP_PLUGINS, QP_DATA, QP_ROOT +except ImportError: + print("Please check if you have sourced the ${QP_ROOT}/quantum_package.rc") + print("(`source ${QP_ROOT}/quantum_package.rc`)") + print(sys.exit(1)) + + +import numpy as np +import subprocess +from scipy.optimize import minimize +from ezfio import ezfio + +import sys + + +def set_unbuffered_output(): + """Ensure sys.stdout is unbuffered or line-buffered in a portable way.""" + if hasattr(sys.stdout, "reconfigure"): # Python 3.7+ + sys.stdout.reconfigure(line_buffering=True) + else: + sys.stdout = open(sys.stdout.fileno(), mode='w', buffering=1) + +set_unbuffered_output() + + + + +def get_energy(file, state, arguments): + """Compute the energy of the given state by calling Quantum Package.""" + if not arguments["--qp_run"]: + raise ValueError("--qp_run option missing") + + if arguments["--scf"]: + executable = "scf" + else: + executable = "save_ortho_mos" + + result = subprocess.run(f"qp_run {executable} {file} > {file}.energy.out", + shell=True, capture_output=True, text=True, check=True + ) + + executable = arguments["--qp_run"] + result = subprocess.run( f"qp_run {executable} {file} > {file}.energy.out", + shell=True) + + energy = None + with open(f"{file}.energy.out", 'r') as f: + for line in f: + if "Energy of state" in line and f"{state}" in line: + energy = float(line.split()[-1]) # Extracts the energy value + + return energy + raise ValueError("Energy not found in Quantum Package output. Update script {sys.argv[0]}") + +def set_coordinates(coord): + """Update the nuclear coordinates in EZFIO.""" + ezfio.set_nuclei_nucl_coord(coord) + + +def get_coordinates(): + """Retrieve the current nuclear coordinates from EZFIO.""" + return np.array(ezfio.get_nuclei_nucl_coord()) + + +def energy_function(coord, file, state, arguments): + """Wrapper for the energy calculation, ensuring coordinates are updated.""" + set_coordinates(coord) + energy = get_energy(file, state, arguments) + + label = ezfio.get_nuclei_nucl_label() + num_atoms = len(label) + coord = coord.reshape(3, num_atoms).T # Reshape into (num_atoms, 3) + coord_angstrom = coord * 0.529177 # Convert atomic units to angstroms + + print(num_atoms) + print(f"Energy: {energy:15.10f}") + for i, (x, y, z) in enumerate(coord_angstrom): + print(f"{label[i]:3s} {x:15.8f} {y:15.8f} {z:15.8f}") # Replace 'X' with actual atomic symbols + return energy + + +def optimize_geometry(file, state, arguments): + """Perform geometry optimization using SciPy's minimize function.""" + + x0 = get_coordinates().flatten() + + if arguments["--tol"]: + tolerance = float(tol=arguments["--tol"]) + else: + tolerance = 1.e-3 + + result = minimize(energy_function, x0, args=(file, state, arguments), method='BFGS', jac=None, tol=tolerance, options={'eps': 1.e-3}) + + if result.success: + print("Optimization successful!") + print("Final energy:", result.fun) + print("Optimized coordinates:", result.x) + else: + print("Optimization failed:", result.message) + + set_coordinates(result.x) # Store the optimized geometry + return result + + +def main(arguments): + if arguments["--state"]: + state=arguments["--state"] + else: + state=1 + ezfio_filename = arguments[""] + ezfio.set_file(ezfio_filename) + + optimize_geometry(ezfio_filename, state, arguments) + + +if __name__ == "__main__": + ARG = docopt(__doc__) + main(ARG) From ad0c807a53c909b7c59b6fcae6d92d3fdcb8bf23 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Mar 2025 15:41:18 +0100 Subject: [PATCH 49/59] Change for Powell in geom_opt --- scripts/qp_geom_opt.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scripts/qp_geom_opt.py b/scripts/qp_geom_opt.py index 240a411f..73c8ce8e 100755 --- a/scripts/qp_geom_opt.py +++ b/scripts/qp_geom_opt.py @@ -107,7 +107,16 @@ def optimize_geometry(file, state, arguments): else: tolerance = 1.e-3 - result = minimize(energy_function, x0, args=(file, state, arguments), method='BFGS', jac=None, tol=tolerance, options={'eps': 1.e-3}) + result = minimize(energy_function, x0, args=(file, state, arguments), + method='Powell', + tol=tolerance, + options={'xtol': tolerance, 'ftol': tolerance}) + +# result = minimize(energy_function, x0, args=(file, state, arguments), +# method='BFGS', +# jac=None, +# tol=tolerance, +# options={'eps': 1.e-3}) if result.success: print("Optimization successful!") From 69d5811d14dbaebce578aedd5ebf3d81b179ae22 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Mar 2025 15:54:55 +0100 Subject: [PATCH 50/59] Change for Powell in geom_opt --- scripts/qp_geom_opt.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/qp_geom_opt.py b/scripts/qp_geom_opt.py index 73c8ce8e..c089a5d8 100755 --- a/scripts/qp_geom_opt.py +++ b/scripts/qp_geom_opt.py @@ -80,10 +80,16 @@ def get_coordinates(): return np.array(ezfio.get_nuclei_nucl_coord()) +memo_energy = {} def energy_function(coord, file, state, arguments): """Wrapper for the energy calculation, ensuring coordinates are updated.""" + h = np.array_str(coord) + if h in memo_energy: + return memo_energy[h] + set_coordinates(coord) energy = get_energy(file, state, arguments) + memo_energy[h] = energy label = ezfio.get_nuclei_nucl_label() num_atoms = len(label) From 3552856ca79f40687f331a885dbb603857fcc38b Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 11 Mar 2025 18:37:00 +0100 Subject: [PATCH 51/59] working on extra_basis_int --- external/ezfio | 2 +- .../extra_basis_int/extra_basis_int.irp.f | 22 ++++++++++---- .../local/extra_basis_int/pot_ao_ints.irp.f | 29 +++++++++++++++++++ src/ao_extra_basis/qp_copy_extra_basis | 2 +- 4 files changed, 47 insertions(+), 8 deletions(-) diff --git a/external/ezfio b/external/ezfio index dba01c4f..d02132ea 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3 +Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091 diff --git a/plugins/local/extra_basis_int/extra_basis_int.irp.f b/plugins/local/extra_basis_int/extra_basis_int.irp.f index 1d35b1c2..13c31f36 100644 --- a/plugins/local/extra_basis_int/extra_basis_int.irp.f +++ b/plugins/local/extra_basis_int/extra_basis_int.irp.f @@ -11,18 +11,28 @@ program extra_basis_int ! call routine_pot_ne ! call routine_test_pot_ne_extra_mixed ! call routine_test_coul_1s - call print_v_ne_extra_basis - call print_v_ne_basis +! call print_v_ne_extra_basis +! call print_v_ne_basis + call test_v_ne_a_extra_basis end +subroutine test_v_ne_a_extra_basis + implicit none + integer :: i,j + do i = 1, ao_extra_num + write(*,'(100(F16.10,X))')pot_vne_A_extra_basis(1:ao_extra_num,i) + enddo +end + + subroutine test_overlap implicit none integer :: i,j - do i = 1, ao_extra_num - do j = 1, ao_extra_num - write(33,*)ao_extra_overlap(j,i) - enddo + do i = 1, ao_num +! do j = 1, ao_num + write(33,'(100(F16.10,X))')ao_extra_overlap_mixed(i,1:ao_extra_num) +! enddo enddo end diff --git a/plugins/local/extra_basis_int/pot_ao_ints.irp.f b/plugins/local/extra_basis_int/pot_ao_ints.irp.f index 5f3af244..1828f980 100644 --- a/plugins/local/extra_basis_int/pot_ao_ints.irp.f +++ b/plugins/local/extra_basis_int/pot_ao_ints.irp.f @@ -1,3 +1,32 @@ +BEGIN_PROVIDER [ double precision, pot_vne_A_extra_basis, (ao_extra_num,ao_extra_num)] + implicit none + BEGIN_DOC + ! + ! Computes the following integral : + ! $\sum_{R in nuclei} -Z $ + ! + ! + ! where $\chi_i(r)$ AND $\chi_j(r)$ belongs to the extra basis + END_DOC + integer :: mu,nu,k_nucl + double precision :: mu_in, R_nucl(3),charge_nucl, integral + double precision :: NAI_pol_mult_erf_ao_extra + mu_in = 10.d0**10 + pot_vne_A_extra_basis = 0.d0 + do mu = 1, ao_extra_num + do nu = 1, ao_extra_num + do k_nucl = 1, nucl_num + R_nucl(1:3) = nucl_coord_transp(1:3,k_nucl) + charge_nucl = nucl_charge(k_nucl) + integral = NAI_pol_mult_erf_ao_extra(mu, nu, mu_in, R_nucl) + pot_vne_A_extra_basis(nu,mu) += -integral * charge_nucl + enddo + enddo + enddo + +END_PROVIDER + + double precision function NAI_pol_mult_erf_ao_extra(i_ao, j_ao, mu_in, C_center) diff --git a/src/ao_extra_basis/qp_copy_extra_basis b/src/ao_extra_basis/qp_copy_extra_basis index cb435e18..6d0e17b8 100755 --- a/src/ao_extra_basis/qp_copy_extra_basis +++ b/src/ao_extra_basis/qp_copy_extra_basis @@ -58,7 +58,7 @@ do done i=primitives_normalized newfile=primitives_normalized_extra -cp ${EZFIO_extra}/ao_basis/$i ${EZFIO_target}/ao_extra_basis/$newfile +cp ${EZFIO_extra}/basis/$i ${EZFIO_target}/ao_extra_basis/$newfile echo "COPYING ALL DATA FROM "$EZFIO_extra"/aux_quantities/ to "${EZFIO_target}"/ao_extra_basis/" i=data_one_e_dm_tot_ao.gz From a9a91f5a9fae8e34de7eed39b70d6a957607072f Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 11 Mar 2025 23:51:57 +0100 Subject: [PATCH 52/59] modified fit_1s_basis.irp.f --- external/irpf90 | 2 +- plugins/local/extra_basis_int/coul_1s.irp.f | 10 +++++ .../local/extra_basis_int/pot_ao_ints.irp.f | 44 ++++++++++++++----- .../local/extra_basis_int/v_mixed_extra.irp.f | 27 ++++++++++++ src/ao_extra_basis/fit_1s_basis.irp.f | 1 + 5 files changed, 72 insertions(+), 12 deletions(-) diff --git a/external/irpf90 b/external/irpf90 index 43160c60..4ab1b175 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 43160c60d88d9f61fb97cc0b35477c8eb0df862b +Subproject commit 4ab1b175fc7ed0d96c1912f13dc53579b24157a6 diff --git a/plugins/local/extra_basis_int/coul_1s.irp.f b/plugins/local/extra_basis_int/coul_1s.irp.f index 964ed36a..62ec4331 100644 --- a/plugins/local/extra_basis_int/coul_1s.irp.f +++ b/plugins/local/extra_basis_int/coul_1s.irp.f @@ -11,6 +11,11 @@ double precision function coul_full_ao_pq_r_1s(p,q,R,R_p,R_q) double precision, intent(in) :: R(3),R_p(3),R_q(3) integer, intent(in) :: p,q double precision :: coef,dist,P_pq(3),coefaos + if(.not.ao_extra_only_1s)then + print*,'You are using a function assuming that the extra basis is fitted on 1s functions' + print*,'But this is not the case apparently ... stopping' + stop + endif coefaos= ao_extra_coef_normalized(p,1) * ao_extra_coef_normalized(q,1) coef = inv_pi_gamma_pq_3_2_ao_extra(p,q) * E_pq_ao_extra(p,q) P_pq = ao_extra_expo(p,1) * R_p + ao_extra_expo(q,1) * R_q @@ -40,6 +45,11 @@ double precision function coul_pq_r_1s(p,q,R,R_p,R_q) double precision, intent(in) :: R(3),R_p(3),R_q(3) integer, intent(in) :: p,q double precision :: dist,P_pq(3) + if(.not.ao_extra_only_1s)then + print*,'You are using a function assuming that the extra basis is fitted on 1s functions' + print*,'But this is not the case apparently ... stopping' + stop + endif P_pq = ao_extra_expo(p,1) * R_p + ao_extra_expo(q,1) * R_q P_pq = P_pq * inv_gamma_pq_ao_extra(q,p) dist = (P_pq(1)-R(1)) * (P_pq(1)-R(1)) diff --git a/plugins/local/extra_basis_int/pot_ao_ints.irp.f b/plugins/local/extra_basis_int/pot_ao_ints.irp.f index 1828f980..5a7d7580 100644 --- a/plugins/local/extra_basis_int/pot_ao_ints.irp.f +++ b/plugins/local/extra_basis_int/pot_ao_ints.irp.f @@ -3,23 +3,45 @@ BEGIN_PROVIDER [ double precision, pot_vne_A_extra_basis, (ao_extra_num,ao_extra BEGIN_DOC ! ! Computes the following integral : - ! $\sum_{R in nuclei} -Z $ + ! $\sum_{R in the USUAL nuclei} -Z $ ! ! - ! where $\chi_i(r)$ AND $\chi_j(r)$ belongs to the extra basis + ! where $\chi_i(r)$ AND $\chi_j(r)$ belongs to the EXTRA basis END_DOC - integer :: mu,nu,k_nucl - double precision :: mu_in, R_nucl(3),charge_nucl, integral - double precision :: NAI_pol_mult_erf_ao_extra - mu_in = 10.d0**10 + integer :: mu,nu + double precision :: v_nucl_extra_ao pot_vne_A_extra_basis = 0.d0 do mu = 1, ao_extra_num do nu = 1, ao_extra_num - do k_nucl = 1, nucl_num - R_nucl(1:3) = nucl_coord_transp(1:3,k_nucl) - charge_nucl = nucl_charge(k_nucl) - integral = NAI_pol_mult_erf_ao_extra(mu, nu, mu_in, R_nucl) - pot_vne_A_extra_basis(nu,mu) += -integral * charge_nucl + pot_vne_A_extra_basis(nu,mu)= v_nucl_extra_ao(mu,nu) + enddo + enddo + +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, pot_vne_extra_basis, (ao_num,ao_num)] + implicit none + BEGIN_DOC + ! + ! Computes the following integral : + ! $\sum_{R in EXTRA nuclei} -Z $ + ! + ! + ! where $\chi_i(r)$ AND $\chi_j(r)$ belongs to the USUAL basis + END_DOC + integer :: mu,nu,k_nucl + double precision :: mu_in, R_nucl(3),charge_nucl, integral + double precision :: NAI_pol_mult_erf_ao + mu_in = 10.d0**10 + pot_vne_extra_basis = 0.d0 + do mu = 1, ao_num + do nu = 1, ao_num + do k_nucl = 1, extra_nucl_num + R_nucl(1:3) = extra_nucl_coord_transp(1:3,k_nucl) + charge_nucl = extra_nucl_charge(k_nucl) + integral = NAI_pol_mult_erf_ao(mu, nu, mu_in, R_nucl) + pot_vne_extra_basis(nu,mu) += -integral * charge_nucl enddo enddo enddo diff --git a/plugins/local/extra_basis_int/v_mixed_extra.irp.f b/plugins/local/extra_basis_int/v_mixed_extra.irp.f index ac856692..15958ab5 100644 --- a/plugins/local/extra_basis_int/v_mixed_extra.irp.f +++ b/plugins/local/extra_basis_int/v_mixed_extra.irp.f @@ -1,3 +1,6 @@ +!!! TODO:: optimize when "ao_extra_only_1s" is True + + double precision function v_extra_nucl_extra_ao(i_ao,j_ao) implicit none BEGIN_DOC @@ -23,6 +26,30 @@ double precision function v_extra_nucl_extra_ao(i_ao,j_ao) enddo end +double precision function v_extra_nucl_ao(i_ao,j_ao) + implicit none + BEGIN_DOC + ! + ! Computes the following integral : + ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) v_ne(r)$. + ! + ! + ! where BOTH $\chi_i(r)$ AND $\chi_j(r)$ belongs to the REGULAR basis + ! + ! and v_ne(r) is the Coulomb potential coming from the EXTRA nuclei + END_DOC + integer, intent(in) ::i_ao,j_ao + integer :: i + double precision :: mu_in, coord(3),charge, integral + double precision :: NAI_pol_mult_erf_ao + mu_in = 1.d+10 + do i = 1, extra_nucl_num + coord(1:3) = extra_nucl_coord_transp(1:3,i) + charge = extra_nucl_charge(i) + v_extra_nucl_ao += -NAI_pol_mult_erf_ao(i_ao, j_ao, mu_in, coord) * charge + enddo +end + double precision function v_nucl_extra_ao(i_ao,j_ao) implicit none diff --git a/src/ao_extra_basis/fit_1s_basis.irp.f b/src/ao_extra_basis/fit_1s_basis.irp.f index ef09d5b2..9c788173 100644 --- a/src/ao_extra_basis/fit_1s_basis.irp.f +++ b/src/ao_extra_basis/fit_1s_basis.irp.f @@ -31,6 +31,7 @@ program fit_1s_basis call ezfio_set_extra_nuclei_extra_nucl_label(new_nucl_label_1s) ! call ezfio_set_ao_extra_basis_ao_extra_num(n_func_tot) + call ezfio_set_ao_extra_basis_ao_extra_only_1s(.True.) call ezfio_set_ao_extra_basis_ao_extra_center(ao_extra_center) call ezfio_set_ao_extra_basis_ao_extra_nucl(new_ao_nucl_1s) call ezfio_set_ao_extra_basis_ao_extra_prim_num(new_ao_prim_num_1s) From a6091ef989993ea447c63d5dda4a56bf33ecec7c Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 12 Mar 2025 11:18:35 +0100 Subject: [PATCH 53/59] two_e_int.irp.f is OK --- .../extra_basis_int/extra_basis_int.irp.f | 19 ++++- .../extra_basis_int/ref_extra_basis.irp.f | 19 ++++- plugins/local/extra_basis_int/two_e_int.irp.f | 71 +++++++++++++++++++ 3 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 plugins/local/extra_basis_int/two_e_int.irp.f diff --git a/plugins/local/extra_basis_int/extra_basis_int.irp.f b/plugins/local/extra_basis_int/extra_basis_int.irp.f index 13c31f36..1f2919ba 100644 --- a/plugins/local/extra_basis_int/extra_basis_int.irp.f +++ b/plugins/local/extra_basis_int/extra_basis_int.irp.f @@ -13,7 +13,8 @@ program extra_basis_int ! call routine_test_coul_1s ! call print_v_ne_extra_basis ! call print_v_ne_basis - call test_v_ne_a_extra_basis +! call test_v_ne_a_extra_basis + call print_v_ee_mixed_direct end @@ -199,3 +200,19 @@ subroutine print_v_ne_basis print*,'accu = ',accu end + +subroutine print_v_ee_mixed_direct + implicit none + integer :: i,j,k,l + double precision :: ao_two_e_integral_mixed_direct + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_extra_num + do l = 1, ao_extra_num + write(34,*)ao_two_e_integral_mixed_direct(i, j, k, l) + enddo + enddo + enddo + enddo + +end diff --git a/plugins/local/extra_basis_int/ref_extra_basis.irp.f b/plugins/local/extra_basis_int/ref_extra_basis.irp.f index 39055fd0..8265bb9c 100644 --- a/plugins/local/extra_basis_int/ref_extra_basis.irp.f +++ b/plugins/local/extra_basis_int/ref_extra_basis.irp.f @@ -6,7 +6,8 @@ program pouet ! call routine_pot_ne_extra ! call ref_pot_ne_mixed ! call ref_pot_ne - call ref_pot_ne_extra_mixed +! call ref_pot_ne_extra_mixed + call ref_v_ee_mixed_direct end @@ -113,3 +114,19 @@ subroutine ref_pot_ne_extra_mixed enddo enddo end + +subroutine ref_v_ee_mixed_direct + implicit none + integer :: i,j,k,l + double precision :: ao_two_e_integral + do i = 1, 15 + do j = 1, 15 + do k = 16, ao_num + do l = 16, ao_num + write(33,*)ao_two_e_integral(i, j, k, l) + enddo + enddo + enddo + enddo + +end diff --git a/plugins/local/extra_basis_int/two_e_int.irp.f b/plugins/local/extra_basis_int/two_e_int.irp.f new file mode 100644 index 00000000..a055c437 --- /dev/null +++ b/plugins/local/extra_basis_int/two_e_int.irp.f @@ -0,0 +1,71 @@ +double precision function ao_two_e_integral_mixed_direct(i, j, k, l) + + BEGIN_DOC + ! integral of the AO basis or (ij|kl) + ! i(r1) j(r1) 1/r12 k(r2) l(r2) + ! + ! where i,j belong to the REGULAR AO basis and k,l to the EXTRA basis + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: i, j, k, l + + integer :: p, q, r, s + integer :: num_i,num_j,num_k,num_l,dim1,I_power(3),J_power(3),K_power(3),L_power(3) + integer :: iorder_p(3), iorder_q(3) + double precision :: I_center(3), J_center(3), K_center(3), L_center(3) + double precision :: integral + double precision :: P_new(0:max_dim,3),P_center(3),fact_p,pp + double precision :: Q_new(0:max_dim,3),Q_center(3),fact_q,qq + double precision :: general_primitive_integral + + dim1 = n_pt_max_integrals + + num_i = ao_nucl(i) + num_j = ao_nucl(j) + num_k = ao_extra_nucl(k) + num_l = ao_extra_nucl(l) + ao_two_e_integral_mixed_direct = 0.d0 + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_extra_power(k,p) + L_power(p) = ao_extra_power(l,p) + I_center(p) = nucl_coord(num_i,p) + J_center(p) = nucl_coord(num_j,p) + K_center(p) = extra_nucl_coord(num_k,p) + L_center(p) = extra_nucl_coord(num_l,p) + enddo + + double precision :: coef1, coef2, coef3, coef4 + double precision :: p_inv,q_inv + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_normalized_ordered_transp(p,i) + do q = 1, ao_prim_num(j) + coef2 = coef1*ao_coef_normalized_ordered_transp(q,j) + call give_explicit_poly_and_gaussian(P_new,P_center,pp,fact_p,iorder_p,& + ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j), & + I_power,J_power,I_center,J_center,dim1) + p_inv = 1.d0/pp + do r = 1, ao_extra_prim_num(k) + coef3 = coef2*ao_extra_coef_normalized_ordered_transp(r,k) + do s = 1, ao_extra_prim_num(l) + coef4 = coef3*ao_extra_coef_normalized_ordered_transp(s,l) + call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q,& + ao_extra_expo_ordered_transp(r,k),ao_extra_expo_ordered_transp(s,l), & + K_power,L_power,K_center,L_center,dim1) + q_inv = 1.d0/qq + integral = general_primitive_integral(dim1, & + P_new,P_center,fact_p,pp,p_inv,iorder_p, & + Q_new,Q_center,fact_q,qq,q_inv,iorder_q) + ao_two_e_integral_mixed_direct = ao_two_e_integral_mixed_direct + coef4 * integral + enddo ! s + enddo ! r + enddo ! q + enddo ! p + +end From 52fdf18e74bf7551280299aeee51cfd18d59d2e8 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 12 Mar 2025 11:25:27 +0100 Subject: [PATCH 54/59] added exchange integrals --- .../extra_basis_int/extra_basis_int.irp.f | 19 ++++- .../extra_basis_int/ref_extra_basis.irp.f | 19 ++++- plugins/local/extra_basis_int/two_e_int.irp.f | 76 ++++++++++++++++++- 3 files changed, 111 insertions(+), 3 deletions(-) diff --git a/plugins/local/extra_basis_int/extra_basis_int.irp.f b/plugins/local/extra_basis_int/extra_basis_int.irp.f index 1f2919ba..59b90374 100644 --- a/plugins/local/extra_basis_int/extra_basis_int.irp.f +++ b/plugins/local/extra_basis_int/extra_basis_int.irp.f @@ -14,7 +14,8 @@ program extra_basis_int ! call print_v_ne_extra_basis ! call print_v_ne_basis ! call test_v_ne_a_extra_basis - call print_v_ee_mixed_direct +! call print_v_ee_mixed_direct + call print_v_ee_mixed_exchange end @@ -216,3 +217,19 @@ subroutine print_v_ee_mixed_direct enddo end + +subroutine print_v_ee_mixed_exchange + implicit none + integer :: i,j,k,l + double precision :: ao_two_e_integral_mixed_exchange + do i = 1, ao_num + do j = 1, ao_extra_num + do k = 1, ao_num + do l = 1, ao_extra_num + write(34,*)ao_two_e_integral_mixed_exchange(i, j, k, l) + enddo + enddo + enddo + enddo + +end diff --git a/plugins/local/extra_basis_int/ref_extra_basis.irp.f b/plugins/local/extra_basis_int/ref_extra_basis.irp.f index 8265bb9c..70d77733 100644 --- a/plugins/local/extra_basis_int/ref_extra_basis.irp.f +++ b/plugins/local/extra_basis_int/ref_extra_basis.irp.f @@ -7,7 +7,8 @@ program pouet ! call ref_pot_ne_mixed ! call ref_pot_ne ! call ref_pot_ne_extra_mixed - call ref_v_ee_mixed_direct +! call ref_v_ee_mixed_direct + call ref_v_ee_mixed_exchange end @@ -130,3 +131,19 @@ subroutine ref_v_ee_mixed_direct enddo end + +subroutine ref_v_ee_mixed_exchange + implicit none + integer :: i,j,k,l + double precision :: ao_two_e_integral + do i = 1, 15 + do j = 16, ao_num + do k = 1, 15 + do l = 16, ao_num + write(33,*)ao_two_e_integral(i, j, k, l) + enddo + enddo + enddo + enddo + +end diff --git a/plugins/local/extra_basis_int/two_e_int.irp.f b/plugins/local/extra_basis_int/two_e_int.irp.f index a055c437..2cde4153 100644 --- a/plugins/local/extra_basis_int/two_e_int.irp.f +++ b/plugins/local/extra_basis_int/two_e_int.irp.f @@ -3,8 +3,9 @@ double precision function ao_two_e_integral_mixed_direct(i, j, k, l) BEGIN_DOC ! integral of the AO basis or (ij|kl) ! i(r1) j(r1) 1/r12 k(r2) l(r2) + ! A A B B ! - ! where i,j belong to the REGULAR AO basis and k,l to the EXTRA basis + ! where i,j belong to the REGULAR AO basis (system A) and k,l to the EXTRA basis (system B) END_DOC implicit none @@ -69,3 +70,76 @@ double precision function ao_two_e_integral_mixed_direct(i, j, k, l) enddo ! p end + +double precision function ao_two_e_integral_mixed_exchange(i, j, k, l) + + BEGIN_DOC + ! integral of the AO basis or (ij|kl) + ! i(r1) j(r1) 1/r12 k(r2) l(r2) + ! A B A B + ! + ! where i,k belong to the REGULAR AO basis (system A) and j,l to the EXTRA basis (system B) + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: i, j, k, l + + integer :: p, q, r, s + integer :: num_i,num_j,num_k,num_l,dim1,I_power(3),J_power(3),K_power(3),L_power(3) + integer :: iorder_p(3), iorder_q(3) + double precision :: I_center(3), J_center(3), K_center(3), L_center(3) + double precision :: integral + double precision :: P_new(0:max_dim,3),P_center(3),fact_p,pp + double precision :: Q_new(0:max_dim,3),Q_center(3),fact_q,qq + double precision :: general_primitive_integral + + dim1 = n_pt_max_integrals + + num_i = ao_nucl(i) + num_j = ao_extra_nucl(j) + num_k = ao_nucl(k) + num_l = ao_extra_nucl(l) + ao_two_e_integral_mixed_exchange = 0.d0 + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_extra_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_extra_power(l,p) + I_center(p) = nucl_coord(num_i,p) + J_center(p) = extra_nucl_coord(num_j,p) + K_center(p) = nucl_coord(num_k,p) + L_center(p) = extra_nucl_coord(num_l,p) + enddo + + double precision :: coef1, coef2, coef3, coef4 + double precision :: p_inv,q_inv + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_normalized_ordered_transp(p,i) + do q = 1, ao_extra_prim_num(j) + coef2 = coef1*ao_extra_coef_normalized_ordered_transp(q,j) + call give_explicit_poly_and_gaussian(P_new,P_center,pp,fact_p,iorder_p,& + ao_expo_ordered_transp(p,i),ao_extra_expo_ordered_transp(q,j), & + I_power,J_power,I_center,J_center,dim1) + p_inv = 1.d0/pp + do r = 1, ao_prim_num(k) + coef3 = coef2*ao_coef_normalized_ordered_transp(r,k) + do s = 1, ao_extra_prim_num(l) + coef4 = coef3*ao_extra_coef_normalized_ordered_transp(s,l) + call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q,& + ao_expo_ordered_transp(r,k),ao_extra_expo_ordered_transp(s,l), & + K_power,L_power,K_center,L_center,dim1) + q_inv = 1.d0/qq + integral = general_primitive_integral(dim1, & + P_new,P_center,fact_p,pp,p_inv,iorder_p, & + Q_new,Q_center,fact_q,qq,q_inv,iorder_q) + ao_two_e_integral_mixed_exchange = ao_two_e_integral_mixed_exchange + coef4 * integral + enddo ! s + enddo ! r + enddo ! q + enddo ! p + +end From ffe7a8485f56c15a0b099a04c680a1fbd9eb57e9 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 12 Mar 2025 12:25:09 +0100 Subject: [PATCH 55/59] added direct, exchange between extra_basis and usual basis, together with a tuto in ao_extra_basis --- .../local/extra_basis_int/ao_overlap.irp.f | 6 +++-- .../local/extra_basis_int/pot_ao_ints.irp.f | 1 - .../local/extra_basis_int/v_mixed_extra.irp.f | 12 ++++----- src/ao_extra_basis/README.rst | 5 +++- src/ao_extra_basis/tuto/He_A.xyz | 3 +++ src/ao_extra_basis/tuto/example_copy.sh | 26 +++++++++++++++++++ src/ao_extra_basis/tuto/h2o.xyz | 7 +++++ 7 files changed, 50 insertions(+), 10 deletions(-) create mode 100644 src/ao_extra_basis/tuto/He_A.xyz create mode 100755 src/ao_extra_basis/tuto/example_copy.sh create mode 100644 src/ao_extra_basis/tuto/h2o.xyz diff --git a/plugins/local/extra_basis_int/ao_overlap.irp.f b/plugins/local/extra_basis_int/ao_overlap.irp.f index 4f8debb6..9e45e690 100644 --- a/plugins/local/extra_basis_int/ao_overlap.irp.f +++ b/plugins/local/extra_basis_int/ao_overlap.irp.f @@ -4,7 +4,7 @@ BEGIN_PROVIDER [double precision, ao_extra_overlap , (ao_extra_num, ao_extra_num)] BEGIN_DOC - ! Overlap between atomic basis functions: + ! Overlap between atomic basis functions belonging to the EXTRA BASIS ! ! :math:`\int \chi_i(r) \chi_j(r) dr` END_DOC @@ -69,7 +69,9 @@ END_PROVIDER BEGIN_PROVIDER [double precision, ao_extra_overlap_mixed , (ao_num, ao_extra_num)] BEGIN_DOC - ! Overlap between atomic basis functions: + ! Overlap between atomic basis functions: + ! + ! first index belongs to the REGULAR AO basis, second to the EXTRA basis ! ! END_DOC diff --git a/plugins/local/extra_basis_int/pot_ao_ints.irp.f b/plugins/local/extra_basis_int/pot_ao_ints.irp.f index 5a7d7580..f52e7579 100644 --- a/plugins/local/extra_basis_int/pot_ao_ints.irp.f +++ b/plugins/local/extra_basis_int/pot_ao_ints.irp.f @@ -5,7 +5,6 @@ BEGIN_PROVIDER [ double precision, pot_vne_A_extra_basis, (ao_extra_num,ao_extra ! Computes the following integral : ! $\sum_{R in the USUAL nuclei} -Z $ ! - ! ! where $\chi_i(r)$ AND $\chi_j(r)$ belongs to the EXTRA basis END_DOC integer :: mu,nu diff --git a/plugins/local/extra_basis_int/v_mixed_extra.irp.f b/plugins/local/extra_basis_int/v_mixed_extra.irp.f index 15958ab5..8b8ce92e 100644 --- a/plugins/local/extra_basis_int/v_mixed_extra.irp.f +++ b/plugins/local/extra_basis_int/v_mixed_extra.irp.f @@ -9,9 +9,9 @@ double precision function v_extra_nucl_extra_ao(i_ao,j_ao) ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) v_ne^{extra}(r)$. ! ! - ! where BOTH $\chi_i(r)$ AND $\chi_j(r)$ belongs to the EXTRA basis + ! where BOTH $\chi_i(r)$ AND $\chi_j(r)$ belongs to the EXTRA basis (system B) ! - ! and v_ne^{extra}(r) is the Coulomb potential coming from the EXTRA nuclei + ! and v_ne^{extra}(r) is the Coulomb potential coming from the EXTRA nuclei (system B) END_DOC integer, intent(in) ::i_ao,j_ao double precision :: mu_in,charge,coord(3) @@ -34,9 +34,9 @@ double precision function v_extra_nucl_ao(i_ao,j_ao) ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) v_ne(r)$. ! ! - ! where BOTH $\chi_i(r)$ AND $\chi_j(r)$ belongs to the REGULAR basis + ! where BOTH $\chi_i(r)$ AND $\chi_j(r)$ belongs to the REGULAR basis (system A) ! - ! and v_ne(r) is the Coulomb potential coming from the EXTRA nuclei + ! and v_ne(r) is the Coulomb potential coming from the EXTRA nuclei (system B) END_DOC integer, intent(in) ::i_ao,j_ao integer :: i @@ -59,9 +59,9 @@ double precision function v_nucl_extra_ao(i_ao,j_ao) ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) v_ne(r)$. ! ! - ! where BOTH $\chi_i(r)$ AND $\chi_j(r)$ belongs to the EXTRA basis + ! where BOTH $\chi_i(r)$ AND $\chi_j(r)$ belongs to the EXTRA basis (system B) ! - ! and v_ne(r) is the Coulomb potential coming from the REGULAR nuclei + ! and v_ne(r) is the Coulomb potential coming from the REGULAR nuclei (system A) END_DOC integer, intent(in) ::i_ao,j_ao double precision :: mu_in,charge,coord(3) diff --git a/src/ao_extra_basis/README.rst b/src/ao_extra_basis/README.rst index f60d71c0..5f850255 100644 --- a/src/ao_extra_basis/README.rst +++ b/src/ao_extra_basis/README.rst @@ -5,7 +5,9 @@ extra_basis Plugin to handle an extra basis, which is attached to the extra_nuclei. It is essentially a duplication of all important quantities (coefficients, exponents and so on) of the usual |AO| basis. -An interesting feature is the possibility to fit any basis made at most with "p" functions onto a purely "s" basis. +Check in the directory "tuto" for a simple example of how to create a fictious system "B" attached independently to a system "A" + +Another interesting feature is the possibility to fit any basis made at most with "p" functions onto a purely "s" basis. This is done with the various scripts here: - qp_fit_1s_basis : script that creates an |EZFIO| folder corresponding to an .xyz file and a basis fitted with only "s" functions @@ -13,3 +15,4 @@ This is done with the various scripts here: Ex: qp_add_extra_fit_system LiH.ezfio/ h2o.xyz # takes the EZFIO folder "LiH.ezfio" and creates all necessary additional basis and nuclei based on h2o.xyz, but only with 1s functions. + diff --git a/src/ao_extra_basis/tuto/He_A.xyz b/src/ao_extra_basis/tuto/He_A.xyz new file mode 100644 index 00000000..d5285ade --- /dev/null +++ b/src/ao_extra_basis/tuto/He_A.xyz @@ -0,0 +1,3 @@ +1 +He atom "A" +He 0. 0. 0. diff --git a/src/ao_extra_basis/tuto/example_copy.sh b/src/ao_extra_basis/tuto/example_copy.sh new file mode 100755 index 00000000..0677b183 --- /dev/null +++ b/src/ao_extra_basis/tuto/example_copy.sh @@ -0,0 +1,26 @@ +source ~/qp2/quantum_package.rc +## Example of how to generate an additional h2o molecule, stored as a extra basis/nuclei etc .. to an He + +sys_B=h2o.xyz +basis_B=sto-3g +output_B=${sys_B%.xyz}_${basis_B} + +sys_A=He_A.xyz +basis_A=cc-pvtz +output_A=${sys_A%.xyz}_${basis_A}_extra_${output_B} + +# we create the system "B" that will be attached as an "extra system" to the syste "A" +qp create_ezfio -b $basis_B $sys_B -o ${output_B} +# we perform an HF calculation to obtain the AO density matrix +qp run scf +# we save the density matrix in the EZFIO +qp run save_one_e_dm +# we create the system "A" +qp create_ezfio -b $basis_A $sys_A -o ${output_A} +# We perform an SCF calculation +qp run scf +# we copy the system "B" information as extra nuclei/basis etc in the EZFIO of system "A" +qp_copy_extra_basis ${output_B} ${output_A} + +# we execute an example of progra that prints a lot of useful integrals/information on the A-B interaction +qp run test_extra_basis | tee ${output_A}.test_extra_basis diff --git a/src/ao_extra_basis/tuto/h2o.xyz b/src/ao_extra_basis/tuto/h2o.xyz new file mode 100644 index 00000000..d3928214 --- /dev/null +++ b/src/ao_extra_basis/tuto/h2o.xyz @@ -0,0 +1,7 @@ +3 + +O 0.000000 -0.399441 3.000000 +H 0.761232 0.199721 3.000000 +H -0.761232 0.199721 3.000000 + + From 2037939abb877d983991d9a15dfff361549a6972 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 12 Mar 2025 12:29:29 +0100 Subject: [PATCH 56/59] corrected a bug in qp2/plugins/local/tuto_plugins/tuto_I/traces_one_e.irp.f --- plugins/local/tuto_plugins/tuto_I/traces_one_e.irp.f | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/local/tuto_plugins/tuto_I/traces_one_e.irp.f b/plugins/local/tuto_plugins/tuto_I/traces_one_e.irp.f index e71d49fc..68d965cb 100644 --- a/plugins/local/tuto_plugins/tuto_I/traces_one_e.irp.f +++ b/plugins/local/tuto_plugins/tuto_I/traces_one_e.irp.f @@ -30,6 +30,7 @@ BEGIN_PROVIDER [ double precision, trace_ao_one_e_ints] ! have the same number of functions END_DOC integer :: i,j + double precision :: accu double precision, allocatable :: inv_overlap_times_integrals(:,:) ! = h S^{-1} allocate(inv_overlap_times_integrals(ao_num,ao_num)) ! routine that computes the product of two matrices, you can check it with From d84edaad5f386a316280e02a9deb2c7821ac6057 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 13 Mar 2025 14:36:24 +0100 Subject: [PATCH 57/59] Fix ao_sphe_overlap --- etc/openmp.rc | 2 +- src/ao_one_e_ints/ao_ortho_canonical.irp.f | 49 +++++++++++----------- src/mo_basis/mos.irp.f | 9 ++++ 3 files changed, 35 insertions(+), 25 deletions(-) diff --git a/etc/openmp.rc b/etc/openmp.rc index 7f71c9b8..a8ced5a0 100644 --- a/etc/openmp.rc +++ b/etc/openmp.rc @@ -1 +1 @@ -export OMP_NESTED=True +#export OMP_NESTED=True diff --git a/src/ao_one_e_ints/ao_ortho_canonical.irp.f b/src/ao_one_e_ints/ao_ortho_canonical.irp.f index eff7e7be..747d9183 100644 --- a/src/ao_one_e_ints/ao_ortho_canonical.irp.f +++ b/src/ao_one_e_ints/ao_ortho_canonical.irp.f @@ -1,5 +1,5 @@ BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_coef, (ao_num,ao_num)] -&BEGIN_PROVIDER [ integer, ao_cart_to_sphe_num ] +&BEGIN_PROVIDER [ integer, ao_sphe_num ] implicit none BEGIN_DOC ! Coefficients to go from cartesian to spherical coordinates in the current @@ -13,23 +13,23 @@ ao_cart_to_sphe_coef(:,:) = 0.d0 ! Assume order provided by ao_power_index i = 1 - ao_cart_to_sphe_num = 0 + ao_sphe_num = 0 do while (i <= ao_num) select case ( ao_l(i) ) case (0) - ao_cart_to_sphe_num += 1 - ao_cart_to_sphe_coef(i,ao_cart_to_sphe_num) = 1.d0 + ao_sphe_num += 1 + ao_cart_to_sphe_coef(i,ao_sphe_num) = 1.d0 i += 1 BEGIN_TEMPLATE case ($SHELL) if (ao_power(i,1) == $SHELL) then do k=1,size(cart_to_sphe_$SHELL,2) do j=1,size(cart_to_sphe_$SHELL,1) - ao_cart_to_sphe_coef(i+j-1,ao_cart_to_sphe_num+k) = cart_to_sphe_$SHELL(j,k) + ao_cart_to_sphe_coef(i+j-1,ao_sphe_num+k) = cart_to_sphe_$SHELL(j,k) enddo enddo i += size(cart_to_sphe_$SHELL,1) - ao_cart_to_sphe_num += size(cart_to_sphe_$SHELL,2) + ao_sphe_num += size(cart_to_sphe_$SHELL,2) endif SUBST [ SHELL ] 1;; @@ -49,36 +49,36 @@ END_PROVIDER -BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_overlap, (ao_cart_to_sphe_num,ao_cart_to_sphe_num) ] +BEGIN_PROVIDER [ double precision, ao_sphe_overlap, (ao_sphe_num,ao_sphe_num) ] implicit none BEGIN_DOC ! |AO| overlap matrix in the spherical basis set END_DOC - double precision, allocatable :: S(:,:) - allocate (S(ao_cart_to_sphe_num,ao_num)) + double precision, allocatable :: tmp(:,:) + allocate (tmp(ao_sphe_num,ao_num)) - call dgemm('T','N',ao_cart_to_sphe_num,ao_num,ao_num, 1.d0, & + call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, & ao_cart_to_sphe_coef,size(ao_cart_to_sphe_coef,1), & - ao_overlap,size(ao_overlap,1), 0.d0, & - S, size(S,1)) + S_inv,size(ao_overlap,1), 0.d0, & + tmp, size(tmp,1)) - call dgemm('N','N',ao_cart_to_sphe_num,ao_cart_to_sphe_num,ao_num, 1.d0, & - S, size(S,1), & + call dgemm('N','N',ao_sphe_num,ao_sphe_num,ao_num, 1.d0, & + tmp, size(tmp,1), & ao_cart_to_sphe_coef,size(ao_cart_to_sphe_coef,1), 0.d0, & - ao_cart_to_sphe_overlap,size(ao_cart_to_sphe_overlap,1)) + ao_sphe_overlap,size(ao_sphe_overlap,1)) - deallocate(S) + deallocate(tmp) END_PROVIDER -BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_inv, (ao_cart_to_sphe_num,ao_num) ] +BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_inv, (ao_sphe_num,ao_num) ] implicit none BEGIN_DOC ! Inverse of :c:data:`ao_cart_to_sphe_coef` END_DOC call get_pseudo_inverse(ao_cart_to_sphe_coef,size(ao_cart_to_sphe_coef,1),& - ao_num,ao_cart_to_sphe_num, & + ao_num,ao_sphe_num, & ao_cart_to_sphe_inv, size(ao_cart_to_sphe_inv,1), lin_dep_cutoff) END_PROVIDER @@ -120,17 +120,17 @@ END_PROVIDER double precision, allocatable :: S(:,:) - allocate(S(ao_cart_to_sphe_num,ao_cart_to_sphe_num)) + allocate(S(ao_sphe_num,ao_sphe_num)) S = 0.d0 - do i=1,ao_cart_to_sphe_num + do i=1,ao_sphe_num S(i,i) = 1.d0 enddo - ao_ortho_canonical_num = ao_cart_to_sphe_num - call ortho_canonical(ao_cart_to_sphe_overlap, size(ao_cart_to_sphe_overlap,1), & - ao_cart_to_sphe_num, S, size(S,1), ao_ortho_canonical_num, lin_dep_cutoff) + ao_ortho_canonical_num = ao_sphe_num + call ortho_canonical(ao_sphe_overlap, size(ao_sphe_overlap,1), & + ao_sphe_num, S, size(S,1), ao_ortho_canonical_num, lin_dep_cutoff) - call dgemm('N','N', ao_num, ao_ortho_canonical_num, ao_cart_to_sphe_num, 1.d0, & + call dgemm('N','N', ao_num, ao_ortho_canonical_num, ao_sphe_num, 1.d0, & ao_cart_to_sphe_coef, size(ao_cart_to_sphe_coef,1), & S, size(S,1), & 0.d0, ao_ortho_canonical_coef, size(ao_ortho_canonical_coef,1)) @@ -167,3 +167,4 @@ BEGIN_PROVIDER [double precision, ao_ortho_canonical_overlap, (ao_ortho_canonica enddo enddo END_PROVIDER + diff --git a/src/mo_basis/mos.irp.f b/src/mo_basis/mos.irp.f index 1ed859ee..a8b5441d 100644 --- a/src/mo_basis/mos.irp.f +++ b/src/mo_basis/mos.irp.f @@ -346,3 +346,12 @@ subroutine ao_ortho_cano_to_ao(A_ao,LDA_ao,A,LDA) deallocate(T) end + +BEGIN_PROVIDER [ double precision, mo_coef_spherical] + implicit none + BEGIN_DOC + ! MO coefficients in the basis of spherical harmonics AOs. + END_DOC + +END_PROVIDER + From 8ae207b3432f8ac3aab44b21614e1bb6ee213c51 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 14 Mar 2025 18:11:37 +0100 Subject: [PATCH 58/59] Implemented cartesian -> spherical AO conversion --- scripts/qp_import_trexio.py | 5 +- src/ao_basis/aos.irp.f | 25 +++++ src/ao_basis/spherical_to_cartesian.irp.f | 120 +++++++++++++++++++-- src/ao_one_e_ints/ao_one_e_ints.irp.f | 16 +++ src/ao_one_e_ints/ao_ortho_canonical.irp.f | 54 +++++----- src/ao_one_e_ints/ao_overlap.irp.f | 23 ++++ src/ao_one_e_ints/kin_ao_ints.irp.f | 22 ++++ src/ao_one_e_ints/pot_ao_ints.irp.f | 22 ++++ src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f | 64 +++++++++++ src/mo_basis/mos.irp.f | 12 ++- src/trexio/EZFIO.cfg | 7 ++ src/trexio/export_trexio_routines.irp.f | 114 +++++++++++++------- src/utils/linear_algebra.irp.f | 9 -- 13 files changed, 407 insertions(+), 86 deletions(-) diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py index a515efba..b8d0d8ed 100755 --- a/scripts/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -271,12 +271,11 @@ def write_ezfio(trexio_filename, filename): if basis_type.lower() == "gaussian" and not cartesian: try: import trexio_tools - fd, tmp = tempfile.mkstemp() - os.close(fd) + tmp = "cartesian_"+trexio_filename retcode = subprocess.call(["trexio", "convert-to", "-t", "cartesian", "-o", tmp, trexio_filename]) trexio_file_cart = trexio.File(tmp,mode='r',back_end=trexio.TREXIO_AUTO) cartesian = trexio.read_ao_cartesian(trexio_file_cart) - os.unlink(tmp) + ezfio.set_trexio_trexio_file(tmp) except: pass diff --git a/src/ao_basis/aos.irp.f b/src/ao_basis/aos.irp.f index 440cc865..d9381015 100644 --- a/src/ao_basis/aos.irp.f +++ b/src/ao_basis/aos.irp.f @@ -20,7 +20,22 @@ BEGIN_PROVIDER [ integer, ao_shell, (ao_num) ] ao_shell(k) = i enddo enddo +END_PROVIDER +BEGIN_PROVIDER [ integer, ao_sphe_shell, (ao_sphe_num) ] + implicit none + BEGIN_DOC + ! Index of the shell to which the AO corresponds + END_DOC + integer :: i, j, k, n + k=0 + do i=1,shell_num + n = shell_ang_mom(i) + do j=-n,n + k = k+1 + ao_sphe_shell(k) = i + enddo + enddo END_PROVIDER BEGIN_PROVIDER [ integer, ao_first_of_shell, (shell_num) ] @@ -133,6 +148,16 @@ END_PROVIDER enddo enddo +END_PROVIDER + +BEGIN_PROVIDER [ double precision, ao_sphe_coef_normalization_factor, (ao_sphe_num) ] + implicit none + BEGIN_DOC + ! Normalization factor in spherical AO basis + END_DOC + + ao_sphe_coef_normalization_factor(:) = 1.d0 + END_PROVIDER BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered, (ao_num,ao_prim_num_max) ] diff --git a/src/ao_basis/spherical_to_cartesian.irp.f b/src/ao_basis/spherical_to_cartesian.irp.f index 336161f8..53a60413 100644 --- a/src/ao_basis/spherical_to_cartesian.irp.f +++ b/src/ao_basis/spherical_to_cartesian.irp.f @@ -4,7 +4,8 @@ ! First index is the index of the cartesian AO, obtained by ao_power_index ! Second index is the index of the spherical AO -BEGIN_PROVIDER [ double precision, cart_to_sphe_0, (1,1) ] + BEGIN_PROVIDER [ double precision, cart_to_sphe_0, (1,1) ] +&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_0, (1) ] implicit none BEGIN_DOC ! Spherical -> Cartesian Transformation matrix for l=0 @@ -12,10 +13,12 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_0, (1,1) ] cart_to_sphe_0 = 0.d0 cart_to_sphe_0 ( 1, 1) = 1.0d0 + cart_to_sphe_norm_0 (1) = 1.d0 END_PROVIDER -BEGIN_PROVIDER [ double precision, cart_to_sphe_1, (3,3) ] + BEGIN_PROVIDER [ double precision, cart_to_sphe_1, (3,3) ] +&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_1, (3) ] implicit none BEGIN_DOC ! Spherical -> Cartesian Transformation matrix for l=1 @@ -25,10 +28,14 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_1, (3,3) ] cart_to_sphe_1 ( 3, 1) = 1.0d0 cart_to_sphe_1 ( 1, 2) = 1.0d0 cart_to_sphe_1 ( 2, 3) = 1.0d0 + cart_to_sphe_norm_1 (1) = 1.d0 + cart_to_sphe_norm_1 (2) = 1.d0 + cart_to_sphe_norm_1 (3) = 1.d0 END_PROVIDER -BEGIN_PROVIDER [ double precision, cart_to_sphe_2, (6,5) ] + BEGIN_PROVIDER [ double precision, cart_to_sphe_2, (6,5) ] +&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_2, (6) ] implicit none BEGIN_DOC ! Spherical -> Cartesian Transformation matrix for l=2 @@ -43,10 +50,14 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_2, (6,5) ] cart_to_sphe_2 ( 1, 4) = 0.86602540378443864676d0 cart_to_sphe_2 ( 4, 4) = -0.86602540378443864676d0 cart_to_sphe_2 ( 2, 5) = 1.0d0 + + cart_to_sphe_norm_2 = (/ 1.0d0, 1.7320508075688772d0, 1.7320508075688772d0, 1.0d0, & + 1.7320508075688772d0, 1.0d0 /) END_PROVIDER -BEGIN_PROVIDER [ double precision, cart_to_sphe_3, (10,7) ] + BEGIN_PROVIDER [ double precision, cart_to_sphe_3, (10,7) ] +&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_3, (10) ] implicit none BEGIN_DOC ! Spherical -> Cartesian Transformation matrix for l=3 @@ -69,10 +80,15 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_3, (10,7) ] cart_to_sphe_3 ( 4, 6) = -1.0606601717798212866d0 cart_to_sphe_3 ( 2, 7) = 1.0606601717798212866d0 cart_to_sphe_3 ( 7, 7) = -0.790569415042094833d0 + + cart_to_sphe_norm_3 = (/ 1.0d0, 2.23606797749979d0, 2.23606797749979d0, & + 2.23606797749979d0, 3.872983346207417d0, 2.23606797749979d0, 1.0d0, 2.23606797749979d0, & + 2.23606797749979d0, 1.d00 /) END_PROVIDER -BEGIN_PROVIDER [ double precision, cart_to_sphe_4, (15,9) ] + BEGIN_PROVIDER [ double precision, cart_to_sphe_4, (15,9) ] +&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_4, (15) ] implicit none BEGIN_DOC ! Spherical -> Cartesian Transformation matrix for l=4 @@ -107,10 +123,18 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_4, (15,9) ] cart_to_sphe_4 (11, 8) = 0.73950997288745200532d0 cart_to_sphe_4 ( 2, 9) = 1.1180339887498948482d0 cart_to_sphe_4 ( 7, 9) = -1.1180339887498948482d0 + + cart_to_sphe_norm_4 = (/ 1.0d0, 2.6457513110645907d0, 2.6457513110645907d0, & + 3.4156502553198664d0, 5.916079783099616d0, 3.415650255319866d0, & + 2.6457513110645907d0, 5.916079783099616d0, 5.916079783099616d0, & + 2.6457513110645907d0, 1.0d0, 2.6457513110645907d0, 3.415650255319866d0, & + 2.6457513110645907d0, 1.d00 /) + END_PROVIDER -BEGIN_PROVIDER [ double precision, cart_to_sphe_5, (21,11) ] + BEGIN_PROVIDER [ double precision, cart_to_sphe_5, (21,11) ] +&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_5, (21) ] implicit none BEGIN_DOC ! Spherical -> Cartesian Transformation matrix for l=5 @@ -163,10 +187,18 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_5, (21,11) ] cart_to_sphe_5 ( 2,11) = 1.169267933366856683d0 cart_to_sphe_5 ( 7,11) = -1.5309310892394863114d0 cart_to_sphe_5 (16,11) = 0.7015607600201140098d0 + + cart_to_sphe_norm_5 = (/ 1.0d0, 3.0d0, 3.0d0, 4.58257569495584d0, & + 7.937253933193773d0, 4.58257569495584d0, 4.58257569495584d0, & + 10.246950765959598d0, 10.246950765959598d0, 4.582575694955841d0, 3.0d0, & + 7.937253933193773d0, 10.246950765959598d0, 7.937253933193773d0, 3.0d0, 1.0d0, & + 3.0d0, 4.58257569495584d0, 4.582575694955841d0, 3.0d0, 1.d00 /) + END_PROVIDER -BEGIN_PROVIDER [ double precision, cart_to_sphe_6, (28,13) ] + BEGIN_PROVIDER [ double precision, cart_to_sphe_6, (28,13) ] +&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_6, (28) ] implicit none BEGIN_DOC ! Spherical -> Cartesian Transformation matrix for l=6 @@ -243,10 +275,22 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_6, (28,13) ] cart_to_sphe_6 ( 2,13) = 1.2151388809514737933d0 cart_to_sphe_6 ( 7,13) = -1.9764235376052370825d0 cart_to_sphe_6 (16,13) = 1.2151388809514737933d0 + + cart_to_sphe_norm_6 = (/ 1.0d0, 3.3166247903554003d0, 3.3166247903554003d0, & + 5.744562646538029d0, 9.949874371066201d0, 5.744562646538029d0, & + 6.797058187186571d0, 15.198684153570666d0, 15.198684153570664d0, & + 6.797058187186572d0, 5.744562646538029d0, 15.198684153570666d0, & + 19.621416870348583d0, 15.198684153570666d0, 5.744562646538029d0, & + 3.3166247903554003d0, 9.949874371066201d0, 15.198684153570664d0, & + 15.198684153570666d0, 9.9498743710662d0, 3.3166247903554003d0, 1.0d0, & + 3.3166247903554003d0, 5.744562646538029d0, 6.797058187186572d0, & + 5.744562646538029d0, 3.3166247903554003d0, 1.d00 /) + END_PROVIDER -BEGIN_PROVIDER [ double precision, cart_to_sphe_7, (36,15) ] + BEGIN_PROVIDER [ double precision, cart_to_sphe_7, (36,15) ] +&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_7, (36) ] implicit none BEGIN_DOC ! Spherical -> Cartesian Transformation matrix for l=7 @@ -355,10 +399,25 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_7, (36,15) ] cart_to_sphe_7 ( 7,15) = -2.4456993503903949804d0 cart_to_sphe_7 (16,15) = 1.96875d0 cart_to_sphe_7 (29,15) = -0.64725984928774934788d0 + + cart_to_sphe_norm_7 = (/ 1.0d0, 3.6055512754639896d0, 3.605551275463989d0, & + 6.904105059069327d0, 11.958260743101398d0, 6.904105059069326d0, & + 9.26282894152753d0, 20.712315177207984d0, 20.71231517720798d0, & + 9.26282894152753d0, 9.26282894152753d0, 24.507141816213494d0, & + 31.63858403911275d0, 24.507141816213494d0, 9.262828941527529d0, & + 6.904105059069327d0, 20.712315177207984d0, 31.63858403911275d0, & + 31.63858403911275d0, 20.71231517720798d0, 6.904105059069327d0, & + 3.6055512754639896d0, 11.958260743101398d0, 20.71231517720798d0, & + 24.507141816213494d0, 20.71231517720798d0, 11.958260743101398d0, & + 3.6055512754639896d0, 1.0d0, 3.605551275463989d0, 6.904105059069326d0, & + 9.26282894152753d0, 9.262828941527529d0, 6.904105059069327d0, & + 3.6055512754639896d0, 1.d00 /) + END_PROVIDER -BEGIN_PROVIDER [ double precision, cart_to_sphe_8, (45,17) ] + BEGIN_PROVIDER [ double precision, cart_to_sphe_8, (45,17) ] +&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_8, (45) ] implicit none BEGIN_DOC ! Spherical -> Cartesian Transformation matrix for l=8 @@ -506,10 +565,28 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_8, (45,17) ] cart_to_sphe_8 ( 7,17) = -2.9348392204684739765d0 cart_to_sphe_8 (16,17) = 2.9348392204684739765d0 cart_to_sphe_8 (29,17) = -1.2945196985754986958d0 + + cart_to_sphe_norm_8 = (/ 1.0d0, 3.872983346207417d0, 3.872983346207417d0, & + 8.062257748298551d0, 13.964240043768942d0, 8.06225774829855d0, & + 11.958260743101398d0, 26.739483914241877d0, 26.739483914241877d0, & + 11.958260743101398d0, 13.55939315961975d0, 35.874782229304195d0, & + 46.31414470763765d0, 35.874782229304195d0, 13.55939315961975d0, & + 11.958260743101398d0, 35.874782229304195d0, 54.79963503528103d0, & + 54.79963503528103d0, 35.874782229304195d0, 11.958260743101398d0, & + 8.062257748298551d0, 26.739483914241877d0, 46.31414470763765d0, & + 54.79963503528103d0, 46.314144707637645d0, 26.739483914241877d0, & + 8.06225774829855d0, 3.872983346207417d0, 13.964240043768942d0, & + 26.739483914241877d0, 35.874782229304195d0, 35.874782229304195d0, & + 26.739483914241877d0, 13.96424004376894d0, 3.8729833462074166d0, 1.0d0, & + 3.872983346207417d0, 8.06225774829855d0, 11.958260743101398d0, & + 13.55939315961975d0, 11.958260743101398d0, 8.06225774829855d0, & + 3.8729833462074166d0, 1.d0 /) + END_PROVIDER -BEGIN_PROVIDER [ double precision, cart_to_sphe_9, (55,19) ] + BEGIN_PROVIDER [ double precision, cart_to_sphe_9, (55,19) ] +&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_9, (55) ] implicit none BEGIN_DOC ! Spherical -> Cartesian Transformation matrix for l=9 @@ -703,5 +780,28 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_9, (55,19) ] cart_to_sphe_9 (16,19) = 4.1179360680974030877d0 cart_to_sphe_9 (29,19) = -2.3781845426185916576d0 cart_to_sphe_9 (46,19) = 0.60904939217552380708d0 + + cart_to_sphe_norm_9 = (/ 1.0d0, 4.1231056256176615d0, 4.1231056256176615d0, & + 9.219544457292889d0, 15.968719422671313d0, 9.219544457292889d0, & + 14.86606874731851d0, 33.24154027718933d0, 33.24154027718933d0, & + 14.866068747318508d0, 18.635603405463275d0, 49.30517214248421d0, & + 63.652703529910404d0, 49.30517214248421d0, 18.635603405463275d0, & + 18.635603405463275d0, 55.90681021638982d0, 85.39906322671229d0, & + 85.39906322671229d0, 55.90681021638983d0, 18.635603405463275d0, & + 14.86606874731851d0, 49.30517214248421d0, 85.39906322671229d0, & + 101.04553429023969d0, 85.3990632267123d0, 49.30517214248421d0, & + 14.866068747318508d0, 9.219544457292889d0, 33.24154027718933d0, & + 63.652703529910404d0, 85.39906322671229d0, 85.3990632267123d0, & + 63.65270352991039d0, 33.24154027718933d0, 9.219544457292887d0, & + 4.1231056256176615d0, 15.968719422671313d0, 33.24154027718933d0, & + 49.30517214248421d0, 55.90681021638983d0, 49.30517214248421d0, & + 33.24154027718933d0, 15.968719422671313d0, 4.1231056256176615d0, 1.0d0, & + 4.1231056256176615d0, 9.219544457292889d0, 14.866068747318508d0, & + 18.635603405463275d0, 18.635603405463275d0, 14.866068747318508d0, & + 9.219544457292887d0, 4.1231056256176615d0, 1.d0 /) + END_PROVIDER + + + diff --git a/src/ao_one_e_ints/ao_one_e_ints.irp.f b/src/ao_one_e_ints/ao_one_e_ints.irp.f index 2c6b8e7e..8b0edfbc 100644 --- a/src/ao_one_e_ints/ao_one_e_ints.irp.f +++ b/src/ao_one_e_ints/ao_one_e_ints.irp.f @@ -45,3 +45,19 @@ BEGIN_PROVIDER [ double precision, ao_one_e_integrals_imag,(ao_num,ao_num)] END_PROVIDER + BEGIN_PROVIDER [ double precision, ao_sphe_one_e_integrals,(ao_sphe_num,ao_sphe_num)] +&BEGIN_PROVIDER [ double precision, ao_sphe_one_e_integrals_diag,(ao_sphe_num)] + implicit none + integer :: i,j,n,l + BEGIN_DOC + ! One-electron Hamiltonian in the spherical |AO| basis. + END_DOC + + ao_sphe_one_e_integrals = ao_sphe_integrals_n_e + ao_sphe_kinetic_integrals + + do j = 1, ao_num + ao_sphe_one_e_integrals_diag(j) = ao_sphe_one_e_integrals(j,j) + enddo + +END_PROVIDER + diff --git a/src/ao_one_e_ints/ao_ortho_canonical.irp.f b/src/ao_one_e_ints/ao_ortho_canonical.irp.f index 747d9183..4e4d33f0 100644 --- a/src/ao_one_e_ints/ao_ortho_canonical.irp.f +++ b/src/ao_one_e_ints/ao_ortho_canonical.irp.f @@ -1,9 +1,12 @@ BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_coef, (ao_num,ao_num)] +&BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_normalization, (ao_num)] &BEGIN_PROVIDER [ integer, ao_sphe_num ] implicit none BEGIN_DOC ! Coefficients to go from cartesian to spherical coordinates in the current ! basis set +! +! S_cart^-1 END_DOC integer :: i integer, external :: ao_power_index @@ -11,6 +14,7 @@ integer :: prev prev = 0 ao_cart_to_sphe_coef(:,:) = 0.d0 + ao_cart_to_sphe_normalization(:) = 1.d0 ! Assume order provided by ao_power_index i = 1 ao_sphe_num = 0 @@ -19,6 +23,7 @@ case (0) ao_sphe_num += 1 ao_cart_to_sphe_coef(i,ao_sphe_num) = 1.d0 + ao_cart_to_sphe_normalization(i) = 1.d0 i += 1 BEGIN_TEMPLATE case ($SHELL) @@ -28,6 +33,9 @@ ao_cart_to_sphe_coef(i+j-1,ao_sphe_num+k) = cart_to_sphe_$SHELL(j,k) enddo enddo + do j=1,size(cart_to_sphe_$SHELL,1) + ao_cart_to_sphe_normalization(i+j-1) = cart_to_sphe_norm_$SHELL(j) + enddo i += size(cart_to_sphe_$SHELL,1) ao_sphe_num += size(cart_to_sphe_$SHELL,2) endif @@ -47,28 +55,7 @@ end select enddo -END_PROVIDER - -BEGIN_PROVIDER [ double precision, ao_sphe_overlap, (ao_sphe_num,ao_sphe_num) ] - implicit none - BEGIN_DOC - ! |AO| overlap matrix in the spherical basis set - END_DOC - double precision, allocatable :: tmp(:,:) - allocate (tmp(ao_sphe_num,ao_num)) - - call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, & - ao_cart_to_sphe_coef,size(ao_cart_to_sphe_coef,1), & - S_inv,size(ao_overlap,1), 0.d0, & - tmp, size(tmp,1)) - - call dgemm('N','N',ao_sphe_num,ao_sphe_num,ao_num, 1.d0, & - tmp, size(tmp,1), & - ao_cart_to_sphe_coef,size(ao_cart_to_sphe_coef,1), 0.d0, & - ao_sphe_overlap,size(ao_sphe_overlap,1)) - - deallocate(tmp) - +print *, ao_cart_to_sphe_normalization(:) END_PROVIDER BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_inv, (ao_sphe_num,ao_num) ] @@ -77,9 +64,26 @@ BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_inv, (ao_sphe_num,ao_num) ] ! Inverse of :c:data:`ao_cart_to_sphe_coef` END_DOC - call get_pseudo_inverse(ao_cart_to_sphe_coef,size(ao_cart_to_sphe_coef,1),& - ao_num,ao_sphe_num, & - ao_cart_to_sphe_inv, size(ao_cart_to_sphe_inv,1), lin_dep_cutoff) + ! Normalize + integer :: m,k + double precision, allocatable :: S(:,:), R(:,:), Rinv(:,:), Sinv(:,:) + + k = size(ao_cart_to_sphe_coef,1) + m = size(ao_cart_to_sphe_coef,2) + + allocate(S(k,k), R(k,m), Rinv(m,k), Sinv(k,k)) + + R(:,:) = ao_cart_to_sphe_coef(:,:) + + call dgemm('N','T', m, m, k, 1.d0, R, k, R, k, 0.d0, S, m) + call get_pseudo_inverse(S, k, k, m, Sinv, k, 1.d-20) + call dgemm('T','T', m, m, k, 1.d0, R, k, Sinv, k, 0.d0, Rinv, m) + + integer :: i + do i=1,ao_num + ao_cart_to_sphe_inv(:,i) = Rinv(:,i) !/ ao_cart_to_sphe_normalization(i) + enddo + END_PROVIDER diff --git a/src/ao_one_e_ints/ao_overlap.irp.f b/src/ao_one_e_ints/ao_overlap.irp.f index e5341f6a..cc676c4b 100644 --- a/src/ao_one_e_ints/ao_overlap.irp.f +++ b/src/ao_one_e_ints/ao_overlap.irp.f @@ -308,3 +308,26 @@ BEGIN_PROVIDER [ double precision, S_half, (ao_num,ao_num) ] END_PROVIDER + +BEGIN_PROVIDER [ double precision, ao_sphe_overlap, (ao_sphe_num,ao_sphe_num) ] + implicit none + BEGIN_DOC + ! |AO| overlap matrix in the spherical basis set + END_DOC + double precision, allocatable :: tmp(:,:) + allocate (tmp(ao_sphe_num,ao_num)) + + call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, & + ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), & + ao_overlap,size(ao_overlap,1), 0.d0, & + tmp, size(tmp,1)) + + call dgemm('N','N',ao_sphe_num,ao_sphe_num,ao_num, 1.d0, & + tmp, size(tmp,1), & + ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), 0.d0, & + ao_sphe_overlap,size(ao_sphe_overlap,1)) + + deallocate(tmp) + +END_PROVIDER + diff --git a/src/ao_one_e_ints/kin_ao_ints.irp.f b/src/ao_one_e_ints/kin_ao_ints.irp.f index 49eb56ad..f450721d 100644 --- a/src/ao_one_e_ints/kin_ao_ints.irp.f +++ b/src/ao_one_e_ints/kin_ao_ints.irp.f @@ -190,3 +190,25 @@ BEGIN_PROVIDER [double precision, ao_kinetic_integrals_imag, (ao_num,ao_num)] endif END_PROVIDER + +BEGIN_PROVIDER [ double precision, ao_sphe_kinetic_integrals, (ao_sphe_num,ao_sphe_num) ] + implicit none + BEGIN_DOC + ! |AO| kinetic inntegrals matrix in the spherical basis set + END_DOC + double precision, allocatable :: tmp(:,:) + allocate (tmp(ao_sphe_num,ao_num)) + + call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, & + ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), & + ao_kinetic_integrals,size(ao_kinetic_integrals,1), 0.d0, & + tmp, size(tmp,1)) + + call dgemm('N','N',ao_sphe_num,ao_sphe_num,ao_num, 1.d0, & + tmp, size(tmp,1), & + ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), 0.d0, & + ao_sphe_kinetic_integrals,size(ao_sphe_kinetic_integrals,1)) + + deallocate(tmp) + +END_PROVIDER diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f index 776b5ec0..c1544a5d 100644 --- a/src/ao_one_e_ints/pot_ao_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_ints.irp.f @@ -609,3 +609,25 @@ double precision function V_r(n,alpha) end + +BEGIN_PROVIDER [ double precision, ao_sphe_integrals_n_e, (ao_sphe_num,ao_sphe_num) ] + implicit none + BEGIN_DOC + ! |AO| VneVne inntegrals matrix in the spherical basis set + END_DOC + double precision, allocatable :: tmp(:,:) + allocate (tmp(ao_sphe_num,ao_num)) + + call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, & + ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), & + ao_integrals_n_e,size(ao_integrals_n_e,1), 0.d0, & + tmp, size(tmp,1)) + + call dgemm('N','N',ao_sphe_num,ao_sphe_num,ao_num, 1.d0, & + tmp, size(tmp,1), & + ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), 0.d0, & + ao_sphe_integrals_n_e,size(ao_sphe_integrals_n_e,1)) + + deallocate(tmp) + +END_PROVIDER diff --git a/src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f b/src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f index e75ca056..851f26d8 100644 --- a/src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f @@ -296,3 +296,67 @@ END_PROVIDER enddo END_PROVIDER +BEGIN_PROVIDER [ double precision, ao_sphe_pseudo_integrals_local, (ao_sphe_num,ao_sphe_num) ] + implicit none + BEGIN_DOC + ! |AO| pseudo_integrals_local matrix in the spherical basis set + END_DOC + double precision, allocatable :: tmp(:,:) + allocate (tmp(ao_sphe_num,ao_num)) + + call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, & + ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), & + ao_pseudo_integrals_local,size(ao_pseudo_integrals_local,1), 0.d0, & + tmp, size(tmp,1)) + + call dgemm('N','N',ao_sphe_num,ao_sphe_num,ao_num, 1.d0, & + tmp, size(tmp,1), & + ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), 0.d0, & + ao_sphe_pseudo_integrals_local,size(ao_sphe_pseudo_integrals_local,1)) + + deallocate(tmp) + +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, ao_sphe_pseudo_integrals_non_local, (ao_sphe_num,ao_sphe_num) ] + implicit none + BEGIN_DOC + ! |AO| pseudo_integrals_non_local matrix in the spherical basis set + END_DOC + double precision, allocatable :: tmp(:,:) + allocate (tmp(ao_sphe_num,ao_num)) + + call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, & + ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), & + ao_pseudo_integrals_non_local,size(ao_pseudo_integrals_non_local,1), 0.d0, & + tmp, size(tmp,1)) + + call dgemm('N','N',ao_sphe_num,ao_sphe_num,ao_num, 1.d0, & + tmp, size(tmp,1), & + ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), 0.d0, & + ao_sphe_pseudo_integrals_non_local,size(ao_sphe_pseudo_integrals_non_local,1)) + + deallocate(tmp) + +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, ao_sphe_pseudo_integrals, (ao_sphe_num,ao_sphe_num)] + implicit none + BEGIN_DOC + ! Pseudo-potential integrals in the |AO| basis set. + END_DOC + + ao_sphe_pseudo_integrals = 0.d0 + if (do_pseudo) then + if (pseudo_klocmax > 0) then + ao_sphe_pseudo_integrals += ao_sphe_pseudo_integrals_local + endif + if (pseudo_kmax > 0) then + ao_sphe_pseudo_integrals += ao_sphe_pseudo_integrals_non_local + endif + endif + +END_PROVIDER + diff --git a/src/mo_basis/mos.irp.f b/src/mo_basis/mos.irp.f index a8b5441d..1eecca6c 100644 --- a/src/mo_basis/mos.irp.f +++ b/src/mo_basis/mos.irp.f @@ -347,11 +347,19 @@ subroutine ao_ortho_cano_to_ao(A_ao,LDA_ao,A,LDA) end -BEGIN_PROVIDER [ double precision, mo_coef_spherical] +BEGIN_PROVIDER [ double precision, mo_sphe_coef, (ao_sphe_num, mo_num) ] implicit none BEGIN_DOC ! MO coefficients in the basis of spherical harmonics AOs. END_DOC - + double precision, allocatable :: tmp(:,:) + allocate (tmp(ao_sphe_num,ao_num)) + + call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, & + ao_cart_to_sphe_coef,ao_num, & + mo_coef,size(mo_coef,1), 0.d0, & + mo_sphe_coef, size(mo_sphe_coef,1)) + + deallocate (tmp) END_PROVIDER diff --git a/src/trexio/EZFIO.cfg b/src/trexio/EZFIO.cfg index 88828520..22dbea00 100644 --- a/src/trexio/EZFIO.cfg +++ b/src/trexio/EZFIO.cfg @@ -22,6 +22,13 @@ doc: If True, export MO coefficients interface: ezfio, ocaml, provider default: True +[export_cartesian] +type: logical +doc: If False, export everything in the spherical AO basis +interface: ezfio, ocaml, provider +default: True + + [export_ao_one_e_ints] type: logical doc: If True, export one-electron integrals in AO basis diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f index cf1327b6..53b21dc9 100644 --- a/src/trexio/export_trexio_routines.irp.f +++ b/src/trexio/export_trexio_routines.irp.f @@ -77,6 +77,7 @@ subroutine export_trexio(update,full_path) rc = trexio_read_metadata_code_num(f(k), code_num) if (rc == TREXIO_ATTR_MISSING) then i = 1 + code_num = 0 code(:) = "" else rc = trexio_read_metadata_code(f(k), code, 64) @@ -97,6 +98,7 @@ subroutine export_trexio(update,full_path) rc = trexio_read_metadata_author_num(f(k), author_num) if (rc == TREXIO_ATTR_MISSING) then i = 1 + author_num = 0 author(:) = "" else rc = trexio_read_metadata_author(f(k), author, 64) @@ -305,35 +307,46 @@ subroutine export_trexio(update,full_path) print *, 'AOs' - rc = trexio_write_ao_num(f(1), ao_num) - call trexio_assert(rc, TREXIO_SUCCESS) + if (export_cartesian) then + rc = trexio_write_ao_cartesian(f(1), 1) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_cartesian(f(1), 1) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_num(f(1), ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_shell(f(1), ao_shell) + call trexio_assert(rc, TREXIO_SUCCESS) + + if (ezfio_convention >= 20250211) then + rc = trexio_write_ao_normalization(f(1), ao_coef_normalization_factor) + else + + allocate(factor(ao_num)) + do i=1,ao_num + l = ao_first_of_shell(ao_shell(i)) + factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) + enddo + rc = trexio_write_ao_normalization(f(1), factor) + deallocate(factor) + endif + + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_shell(f(1), ao_shell) - call trexio_assert(rc, TREXIO_SUCCESS) - if (ezfio_convention >= 20250211) then - rc = trexio_write_ao_normalization(f(1), ao_coef_normalization_factor) else - integer :: pow0(3), powA(3), nz - double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c - nz=100 + rc = trexio_write_ao_cartesian(f(1), 0) + call trexio_assert(rc, TREXIO_SUCCESS) - C_A(1) = 0.d0 - C_A(2) = 0.d0 - C_A(3) = 0.d0 + rc = trexio_write_ao_num(f(1), ao_sphe_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_shell(f(1), ao_sphe_shell) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_normalization(f(1), ao_sphe_coef_normalization_factor) + call trexio_assert(rc, TREXIO_SUCCESS) - allocate(factor(ao_num)) - do i=1,ao_num - l = ao_first_of_shell(ao_shell(i)) - factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) - enddo - rc = trexio_write_ao_normalization(f(1), factor) - deallocate(factor) endif - call trexio_assert(rc, TREXIO_SUCCESS) endif @@ -341,23 +354,45 @@ subroutine export_trexio(update,full_path) ! ------------------ if (export_ao_one_e_ints) then - print *, 'AO one-e integrals' - rc = trexio_write_ao_1e_int_overlap(f(1),ao_overlap) - call trexio_assert(rc, TREXIO_SUCCESS) + double precision, allocatable :: tmp_ao(:,:) + if (export_cartesian) then + print *, 'AO one-e integrals (cartesian)' - rc = trexio_write_ao_1e_int_kinetic(f(1),ao_kinetic_integrals) - call trexio_assert(rc, TREXIO_SUCCESS) - - rc = trexio_write_ao_1e_int_potential_n_e(f(1),ao_integrals_n_e) - call trexio_assert(rc, TREXIO_SUCCESS) - - if (do_pseudo) then - rc = trexio_write_ao_1e_int_ecp(f(1), ao_pseudo_integrals_local + ao_pseudo_integrals_non_local) + rc = trexio_write_ao_1e_int_overlap(f(1),ao_overlap) call trexio_assert(rc, TREXIO_SUCCESS) - endif - rc = trexio_write_ao_1e_int_core_hamiltonian(f(1),ao_one_e_integrals) + rc = trexio_write_ao_1e_int_kinetic(f(1),ao_kinetic_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_1e_int_potential_n_e(f(1),ao_integrals_n_e) + call trexio_assert(rc, TREXIO_SUCCESS) + + if (do_pseudo) then + rc = trexio_write_ao_1e_int_ecp(f(1), ao_pseudo_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_ao_1e_int_core_hamiltonian(f(1),ao_one_e_integrals) + else + print *, 'AO one-e integrals (spherical)' + + rc = trexio_write_ao_1e_int_overlap(f(1),ao_sphe_overlap) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_1e_int_kinetic(f(1),ao_sphe_kinetic_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_1e_int_potential_n_e(f(1),ao_sphe_integrals_n_e) + call trexio_assert(rc, TREXIO_SUCCESS) + + if (do_pseudo) then + rc = trexio_write_ao_1e_int_ecp(f(1), ao_sphe_pseudo_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_ao_1e_int_core_hamiltonian(f(1),ao_sphe_one_e_integrals) + endif call trexio_assert(rc, TREXIO_SUCCESS) end if @@ -465,8 +500,13 @@ subroutine export_trexio(update,full_path) call trexio_assert(rc, TREXIO_SUCCESS) enddo - rc = trexio_write_mo_coefficient(f(1), mo_coef) - call trexio_assert(rc, TREXIO_SUCCESS) + if (export_cartesian) then + rc = trexio_write_mo_coefficient(f(1), mo_coef) + call trexio_assert(rc, TREXIO_SUCCESS) + else + rc = trexio_write_mo_coefficient(f(1), mo_sphe_coef) + call trexio_assert(rc, TREXIO_SUCCESS) + endif if ( (trim(mo_label) == 'Canonical').and. & (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 4e7ca87d..629a998b 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1392,15 +1392,6 @@ subroutine get_pseudo_inverse(A, LDA, m, n, C, LDC, cutoff) call dgemm('T', 'T', n, m, n_svd, 1.d0, Vt, size(Vt,1), U, size(U,1), 0.d0, C, size(C,1)) -! C = 0.d0 -! do i=1,m -! do j=1,n -! do k=1,n_svd -! C(j,i) = C(j,i) + U(i,k) * D(k) * Vt(k,j) -! enddo -! enddo -! enddo - deallocate(U,D,Vt,work,A_tmp) end From b5543cb37a8ab47716d1946f59322c0a6f2e7a1d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 14 Mar 2025 22:34:49 +0100 Subject: [PATCH 59/59] Introduced ao_sphe_num --- external/irpf90 | 2 +- src/ao_basis/aos.irp.f | 13 +++++++++++++ src/ao_one_e_ints/ao_ortho_canonical.irp.f | 17 +++++++++-------- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/external/irpf90 b/external/irpf90 index 4ab1b175..43160c60 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 4ab1b175fc7ed0d96c1912f13dc53579b24157a6 +Subproject commit 43160c60d88d9f61fb97cc0b35477c8eb0df862b diff --git a/src/ao_basis/aos.irp.f b/src/ao_basis/aos.irp.f index d9381015..02eedf53 100644 --- a/src/ao_basis/aos.irp.f +++ b/src/ao_basis/aos.irp.f @@ -22,6 +22,19 @@ BEGIN_PROVIDER [ integer, ao_shell, (ao_num) ] enddo END_PROVIDER +BEGIN_PROVIDER [ integer, ao_sphe_num ] + implicit none + BEGIN_DOC + ! Number of spherical AOs + END_DOC + integer :: n, i + ao_sphe_num=0 + do i=1,shell_num + n = shell_ang_mom(i) + ao_sphe_num += 2*n+1 + enddo +END_PROVIDER + BEGIN_PROVIDER [ integer, ao_sphe_shell, (ao_sphe_num) ] implicit none BEGIN_DOC diff --git a/src/ao_one_e_ints/ao_ortho_canonical.irp.f b/src/ao_one_e_ints/ao_ortho_canonical.irp.f index 4e4d33f0..b19be1e2 100644 --- a/src/ao_one_e_ints/ao_ortho_canonical.irp.f +++ b/src/ao_one_e_ints/ao_ortho_canonical.irp.f @@ -1,6 +1,5 @@ BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_coef, (ao_num,ao_num)] &BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_normalization, (ao_num)] -&BEGIN_PROVIDER [ integer, ao_sphe_num ] implicit none BEGIN_DOC ! Coefficients to go from cartesian to spherical coordinates in the current @@ -11,18 +10,18 @@ integer :: i integer, external :: ao_power_index integer :: ibegin,j,k - integer :: prev + integer :: prev, ao_sphe_count prev = 0 ao_cart_to_sphe_coef(:,:) = 0.d0 ao_cart_to_sphe_normalization(:) = 1.d0 ! Assume order provided by ao_power_index i = 1 - ao_sphe_num = 0 + ao_sphe_count = 0 do while (i <= ao_num) select case ( ao_l(i) ) case (0) - ao_sphe_num += 1 - ao_cart_to_sphe_coef(i,ao_sphe_num) = 1.d0 + ao_sphe_count += 1 + ao_cart_to_sphe_coef(i,ao_sphe_count) = 1.d0 ao_cart_to_sphe_normalization(i) = 1.d0 i += 1 BEGIN_TEMPLATE @@ -30,14 +29,14 @@ if (ao_power(i,1) == $SHELL) then do k=1,size(cart_to_sphe_$SHELL,2) do j=1,size(cart_to_sphe_$SHELL,1) - ao_cart_to_sphe_coef(i+j-1,ao_sphe_num+k) = cart_to_sphe_$SHELL(j,k) + ao_cart_to_sphe_coef(i+j-1,ao_sphe_count+k) = cart_to_sphe_$SHELL(j,k) enddo enddo do j=1,size(cart_to_sphe_$SHELL,1) ao_cart_to_sphe_normalization(i+j-1) = cart_to_sphe_norm_$SHELL(j) enddo i += size(cart_to_sphe_$SHELL,1) - ao_sphe_num += size(cart_to_sphe_$SHELL,2) + ao_sphe_count += size(cart_to_sphe_$SHELL,2) endif SUBST [ SHELL ] 1;; @@ -55,7 +54,9 @@ end select enddo -print *, ao_cart_to_sphe_normalization(:) + if (ao_sphe_count /= ao_sphe_num) then + call qp_bug(irp_here, ao_sphe_count, "ao_sphe_count /= ao_sphe_num") + endif END_PROVIDER BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_inv, (ao_sphe_num,ao_num) ]