From 1c5db564b2ff7467c6b604e5cf396148e077aff8 Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 10 Mar 2023 17:34:57 +0100 Subject: [PATCH 01/87] cisd conversion Ha eV, (Q) if n_elec >= 4 --- src/cisd/cisd.irp.f | 59 ++++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/src/cisd/cisd.irp.f b/src/cisd/cisd.irp.f index fca3b10e..5f167686 100644 --- a/src/cisd/cisd.irp.f +++ b/src/cisd/cisd.irp.f @@ -69,7 +69,9 @@ subroutine run do i = 1,N_states k = maxloc(dabs(psi_coef_sorted(1:N_det,i)),dim=1) delta_E = CI_electronic_energy(i) - diag_h_mat_elem(psi_det_sorted(1,1,k),N_int) - cisdq(i) = CI_energy(i) + delta_E * (1.d0 - psi_coef_sorted(k,i)**2) + if (elec_alpha_num + elec_beta_num >= 4) then + cisdq(i) = CI_energy(i) + delta_E * (1.d0 - psi_coef_sorted(k,i)**2) + endif enddo print *, 'N_det = ', N_det print*,'' @@ -78,26 +80,43 @@ subroutine run do i = 1,N_states print *, i, CI_energy(i) enddo - print*,'' - print*,'******************************' - print *, 'CISD+Q Energies' - do i = 1,N_states - print *, i, cisdq(i) - enddo + if (elec_alpha_num + elec_beta_num >= 4) then + print*,'' + print*,'******************************' + print *, 'CISD+Q Energies' + do i = 1,N_states + print *, i, cisdq(i) + enddo + endif if (N_states > 1) then - print*,'' - print*,'******************************' - print*,'Excitation energies (au) (CISD+Q)' - do i = 2, N_states - print*, i ,CI_energy(i) - CI_energy(1), cisdq(i) - cisdq(1) - enddo - print*,'' - print*,'******************************' - print*,'Excitation energies (eV) (CISD+Q)' - do i = 2, N_states - print*, i ,(CI_energy(i) - CI_energy(1))/0.0367502d0, & - (cisdq(i) - cisdq(1)) / 0.0367502d0 - enddo + if (elec_alpha_num + elec_beta_num >= 4) then + print*,'' + print*,'******************************' + print*,'Excitation energies (au) (CISD+Q)' + do i = 2, N_states + print*, i ,CI_energy(i) - CI_energy(1), cisdq(i) - cisdq(1) + enddo + print*,'' + print*,'******************************' + print*,'Excitation energies (eV) (CISD+Q)' + do i = 2, N_states + print*, i ,(CI_energy(i) - CI_energy(1)) * ha_to_ev, & + (cisdq(i) - cisdq(1)) * ha_to_ev + enddo + else + print*,'' + print*,'******************************' + print*,'Excitation energies (au) (CISD)' + do i = 2, N_states + print*, i ,CI_energy(i) - CI_energy(1) + enddo + print*,'' + print*,'******************************' + print*,'Excitation energies (eV) (CISD)' + do i = 2, N_states + print*, i ,(CI_energy(i) - CI_energy(1)) * ha_to_ev + enddo + endif endif end From f5dc20a29ffe1e1d79971ba47c279ca12190aa84 Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 10 Mar 2023 17:46:30 +0100 Subject: [PATCH 02/87] tests cisd w frozen core --- src/cisd/30.cisd.bats | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cisd/30.cisd.bats b/src/cisd/30.cisd.bats index 69b862b0..42d0dc5e 100644 --- a/src/cisd/30.cisd.bats +++ b/src/cisd/30.cisd.bats @@ -10,6 +10,8 @@ function run() { qp set determinants n_states 2 qp set davidson threshold_davidson 1.e-12 qp set davidson n_states_diag 24 + qp run cis + qp set_frozen_core qp run cisd energy1="$(qp get cisd energy | tr '[]' ' ' | cut -d ',' -f 1)" energy2="$(qp get cisd energy | tr '[]' ' ' | cut -d ',' -f 2)" From 457af473235f832842ad13fb546b2492953ee9eb Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 10 Mar 2023 20:15:29 +0100 Subject: [PATCH 03/87] add one body transition density matrix --- src/determinants/tr_density_matrix.irp.f | 313 +++++++++++++++++++++++ 1 file changed, 313 insertions(+) create mode 100644 src/determinants/tr_density_matrix.irp.f diff --git a/src/determinants/tr_density_matrix.irp.f b/src/determinants/tr_density_matrix.irp.f new file mode 100644 index 00000000..fa0d4239 --- /dev/null +++ b/src/determinants/tr_density_matrix.irp.f @@ -0,0 +1,313 @@ +BEGIN_PROVIDER [double precision, tr_one_e_dm_mo, (mo_num, mo_num, N_states, N_states)] + + implicit none + + BEGIN_DOC + ! One body transition density matrix for all pairs of states n and m, < Psi^n | a_i^\dagger a_a | Psi^m > + END_DOC + + integer :: j,k,l,m,k_a,k_b,n + integer :: occ(N_int*bit_kind_size,2) + double precision :: ck, cl, ckl + double precision :: phase + integer :: h1,h2,p1,p2,s1,s2, degree + integer(bit_kind) :: tmp_det(N_int,2), tmp_det2(N_int) + integer :: exc(0:2,2),n_occ(2) + double precision, allocatable :: tmp_a(:,:,:,:), tmp_b(:,:,:,:) + integer :: krow, kcol, lrow, lcol + + PROVIDE psi_det + + tr_one_e_dm_mo = 0d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,k_a,k_b,l,m,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc,& + !$OMP tmp_a, tmp_b, n_occ, krow, kcol, lrow, lcol, tmp_det, tmp_det2)& + !$OMP SHARED(psi_det,psi_coef,N_int,N_states,elec_alpha_num, & + !$OMP elec_beta_num,tr_one_e_dm_mo,N_det,& + !$OMP mo_num,psi_bilinear_matrix_rows,psi_bilinear_matrix_columns,& + !$OMP psi_bilinear_matrix_transp_rows, psi_bilinear_matrix_transp_columns,& + !$OMP psi_bilinear_matrix_order_reverse, psi_det_alpha_unique, psi_det_beta_unique,& + !$OMP psi_bilinear_matrix_values, psi_bilinear_matrix_transp_values,& + !$OMP N_det_alpha_unique,N_det_beta_unique,irp_here) + allocate(tmp_a(mo_num,mo_num,N_states,N_states), tmp_b(mo_num,mo_num,N_states,N_states) ) + tmp_a = 0.d0 + !$OMP DO SCHEDULE(dynamic,64) + do k_a=1,N_det + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow) + tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol) + + ! Diagonal part + ! ------------- + + call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int) + do m=1,N_states + do n = 1, N_states + ck = psi_bilinear_matrix_values(k_a,m)*psi_bilinear_matrix_values(k_a,n) + do l=1,elec_alpha_num + j = occ(l,1) + tmp_a(j,j,m,n) += ck + enddo + enddo + enddo + + if (k_a == N_det) cycle + l = k_a+1 + lrow = psi_bilinear_matrix_rows(l) + lcol = psi_bilinear_matrix_columns(l) + ! Fix beta determinant, loop over alphas + do while ( lcol == kcol ) + tmp_det2(:) = psi_det_alpha_unique(:, lrow) + call get_excitation_degree_spin(tmp_det(1,1),tmp_det2,degree,N_int) + if (degree == 1) then + exc = 0 + call get_single_excitation_spin(tmp_det(1,1),tmp_det2,exc,phase,N_int) + call decode_exc_spin(exc,h1,p1,h2,p2) + do m=1,N_states + do n = 1, N_states + ckl = psi_bilinear_matrix_values(k_a,m)*psi_bilinear_matrix_values(l,n) * phase + tmp_a(h1,p1,m,n) += ckl + ckl = psi_bilinear_matrix_values(k_a,n)*psi_bilinear_matrix_values(l,m) * phase + tmp_a(p1,h1,m,n) += ckl + enddo + enddo + endif + l = l+1 + if (l>N_det) exit + lrow = psi_bilinear_matrix_rows(l) + lcol = psi_bilinear_matrix_columns(l) + enddo + + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tr_one_e_dm_mo(:,:,:,:) = tr_one_e_dm_mo(:,:,:,:) + tmp_a(:,:,:,:) + !$OMP END CRITICAL + deallocate(tmp_a) + !$OMP BARRIER + + tmp_b = 0.d0 + !$OMP DO SCHEDULE(dynamic,64) + do k_b=1,N_det + krow = psi_bilinear_matrix_transp_rows(k_b) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_transp_columns(k_b) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow) + tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol) + + ! Diagonal part + ! ------------- + + call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int) + do m=1,N_states + do n = 1, N_states + ck = psi_bilinear_matrix_transp_values(k_b,m)*psi_bilinear_matrix_transp_values(k_b,n) + do l=1,elec_beta_num + j = occ(l,2) + tmp_b(j,j,m,n) += ck + enddo + enddo + enddo + + if (k_b == N_det) cycle + l = k_b+1 + lrow = psi_bilinear_matrix_transp_rows(l) + lcol = psi_bilinear_matrix_transp_columns(l) + ! Fix beta determinant, loop over alphas + do while ( lrow == krow ) + tmp_det2(:) = psi_det_beta_unique(:, lcol) + call get_excitation_degree_spin(tmp_det(1,2),tmp_det2,degree,N_int) + if (degree == 1) then + exc = 0 + call get_single_excitation_spin(tmp_det(1,2),tmp_det2,exc,phase,N_int) + call decode_exc_spin(exc,h1,p1,h2,p2) + do m=1,N_states + do n = 1, N_states + ckl = psi_bilinear_matrix_transp_values(k_b,m)*psi_bilinear_matrix_transp_values(l,n) * phase + tmp_b(h1,p1,m,n) += ckl + ckl = psi_bilinear_matrix_transp_values(k_b,n)*psi_bilinear_matrix_transp_values(l,m) * phase + tmp_b(p1,h1,m,n) += ckl + enddo + enddo + endif + l = l+1 + if (l>N_det) exit + lrow = psi_bilinear_matrix_transp_rows(l) + lcol = psi_bilinear_matrix_transp_columns(l) + enddo + + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + tr_one_e_dm_mo(:,:,:,:) = tr_one_e_dm_mo(:,:,:,:) + tmp_b(:,:,:,:) + !$OMP END CRITICAL + + deallocate(tmp_b) + !$OMP END PARALLEL + +END_PROVIDER + BEGIN_PROVIDER [ double precision, tr_one_e_dm_mo_alpha, (mo_num,mo_num,N_states,N_states) ] +&BEGIN_PROVIDER [ double precision, tr_one_e_dm_mo_beta, (mo_num,mo_num,N_states,N_states) ] + implicit none + BEGIN_DOC + ! $\alpha$ and $\beta$ one-body transition density matrices for all pairs of states + END_DOC + + integer :: j,k,l,m,n,k_a,k_b + integer :: occ(N_int*bit_kind_size,2) + double precision :: ck, cl, ckl + double precision :: phase + integer :: h1,h2,p1,p2,s1,s2, degree + integer(bit_kind) :: tmp_det(N_int,2), tmp_det2(N_int) + integer :: exc(0:2,2),n_occ(2) + double precision, allocatable :: tmp_a(:,:,:,:), tmp_b(:,:,:,:) + integer :: krow, kcol, lrow, lcol + + PROVIDE psi_det + + tr_one_e_dm_mo_alpha = 0.d0 + tr_one_e_dm_mo_beta = 0.d0 + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,k_a,k_b,l,m,n,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc,& + !$OMP tmp_a, tmp_b, n_occ, krow, kcol, lrow, lcol, tmp_det, tmp_det2)& + !$OMP SHARED(psi_det,psi_coef,N_int,N_states,elec_alpha_num, & + !$OMP elec_beta_num,tr_one_e_dm_mo_alpha,tr_one_e_dm_mo_beta,N_det,& + !$OMP mo_num,psi_bilinear_matrix_rows,psi_bilinear_matrix_columns,& + !$OMP psi_bilinear_matrix_transp_rows, psi_bilinear_matrix_transp_columns,& + !$OMP psi_bilinear_matrix_order_reverse, psi_det_alpha_unique, psi_det_beta_unique,& + !$OMP psi_bilinear_matrix_values, psi_bilinear_matrix_transp_values,& + !$OMP N_det_alpha_unique,N_det_beta_unique,irp_here) + allocate(tmp_a(mo_num,mo_num,N_states,N_states), tmp_b(mo_num,mo_num,N_states,N_states) ) + tmp_a = 0.d0 + !$OMP DO SCHEDULE(dynamic,64) + do k_a=1,N_det + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow) + tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol) + + ! Diagonal part + ! ------------- + + call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int) + do m=1,N_states + do n = 1, N_states + ck = psi_bilinear_matrix_values(k_a,m)*psi_bilinear_matrix_values(k_a,n) + do l=1,elec_alpha_num + j = occ(l,1) + tmp_a(j,j,m,n) += ck + enddo + enddo + enddo + + if (k_a == N_det) cycle + l = k_a+1 + lrow = psi_bilinear_matrix_rows(l) + lcol = psi_bilinear_matrix_columns(l) + ! Fix beta determinant, loop over alphas + do while ( lcol == kcol ) + tmp_det2(:) = psi_det_alpha_unique(:, lrow) + call get_excitation_degree_spin(tmp_det(1,1),tmp_det2,degree,N_int) + if (degree == 1) then + exc = 0 + call get_single_excitation_spin(tmp_det(1,1),tmp_det2,exc,phase,N_int) + call decode_exc_spin(exc,h1,p1,h2,p2) + do m=1,N_states + do n = 1, N_states + ckl = psi_bilinear_matrix_values(k_a,m)*psi_bilinear_matrix_values(l,n) * phase + tmp_a(h1,p1,m,n) += ckl + tmp_a(p1,h1,m,n) += ckl + enddo + enddo + endif + l = l+1 + if (l>N_det) exit + lrow = psi_bilinear_matrix_rows(l) + lcol = psi_bilinear_matrix_columns(l) + enddo + + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tr_one_e_dm_mo_alpha(:,:,:,:) = tr_one_e_dm_mo_alpha(:,:,:,:) + tmp_a(:,:,:,:) + !$OMP END CRITICAL + deallocate(tmp_a) + + tmp_b = 0.d0 + !$OMP DO SCHEDULE(dynamic,64) + do k_b=1,N_det + krow = psi_bilinear_matrix_transp_rows(k_b) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_transp_columns(k_b) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow) + tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol) + + ! Diagonal part + ! ------------- + + call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int) + do m=1,N_states + do n = 1, N_states + ck = psi_bilinear_matrix_transp_values(k_b,m)*psi_bilinear_matrix_transp_values(k_b,n) + do l=1,elec_beta_num + j = occ(l,2) + tmp_b(j,j,m,n) += ck + enddo + enddo + enddo + + if (k_b == N_det) cycle + l = k_b+1 + lrow = psi_bilinear_matrix_transp_rows(l) + lcol = psi_bilinear_matrix_transp_columns(l) + ! Fix beta determinant, loop over alphas + do while ( lrow == krow ) + tmp_det2(:) = psi_det_beta_unique(:, lcol) + call get_excitation_degree_spin(tmp_det(1,2),tmp_det2,degree,N_int) + if (degree == 1) then + exc = 0 + call get_single_excitation_spin(tmp_det(1,2),tmp_det2,exc,phase,N_int) + call decode_exc_spin(exc,h1,p1,h2,p2) + do m=1,N_states + do n = 1, N_states + ckl = psi_bilinear_matrix_transp_values(k_b,m)*psi_bilinear_matrix_transp_values(l,n) * phase + tmp_b(h1,p1,m,n) += ckl + tmp_b(p1,h1,m,n) += ckl + enddo + enddo + endif + l = l+1 + if (l>N_det) exit + lrow = psi_bilinear_matrix_transp_rows(l) + lcol = psi_bilinear_matrix_transp_columns(l) + enddo + + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + tr_one_e_dm_mo_beta(:,:,:,:) = tr_one_e_dm_mo_beta(:,:,:,:) + tmp_b(:,:,:,:) + !$OMP END CRITICAL + + deallocate(tmp_b) + !$OMP END PARALLEL + +END_PROVIDER + From 6b3487aa0af5f3b445e929237355e45a8f80920a Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 10 Mar 2023 20:19:17 +0100 Subject: [PATCH 04/87] typo --- src/determinants/tr_density_matrix.irp.f | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/determinants/tr_density_matrix.irp.f b/src/determinants/tr_density_matrix.irp.f index fa0d4239..1e94edcb 100644 --- a/src/determinants/tr_density_matrix.irp.f +++ b/src/determinants/tr_density_matrix.irp.f @@ -1,4 +1,4 @@ -BEGIN_PROVIDER [double precision, tr_one_e_dm_mo, (mo_num, mo_num, N_states, N_states)] +BEGIN_PROVIDER [double precision, one_e_tr_dm_mo, (mo_num, mo_num, N_states, N_states)] implicit none @@ -18,13 +18,13 @@ BEGIN_PROVIDER [double precision, tr_one_e_dm_mo, (mo_num, mo_num, N_states, N_s PROVIDE psi_det - tr_one_e_dm_mo = 0d0 + one_e_tr_dm_mo = 0d0 !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(j,k,k_a,k_b,l,m,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc,& !$OMP tmp_a, tmp_b, n_occ, krow, kcol, lrow, lcol, tmp_det, tmp_det2)& !$OMP SHARED(psi_det,psi_coef,N_int,N_states,elec_alpha_num, & - !$OMP elec_beta_num,tr_one_e_dm_mo,N_det,& + !$OMP elec_beta_num,one_e_tr_dm_mo,N_det,& !$OMP mo_num,psi_bilinear_matrix_rows,psi_bilinear_matrix_columns,& !$OMP psi_bilinear_matrix_transp_rows, psi_bilinear_matrix_transp_columns,& !$OMP psi_bilinear_matrix_order_reverse, psi_det_alpha_unique, psi_det_beta_unique,& @@ -88,7 +88,7 @@ BEGIN_PROVIDER [double precision, tr_one_e_dm_mo, (mo_num, mo_num, N_states, N_s !$OMP END DO NOWAIT !$OMP CRITICAL - tr_one_e_dm_mo(:,:,:,:) = tr_one_e_dm_mo(:,:,:,:) + tmp_a(:,:,:,:) + one_e_tr_dm_mo(:,:,:,:) = one_e_tr_dm_mo(:,:,:,:) + tmp_a(:,:,:,:) !$OMP END CRITICAL deallocate(tmp_a) !$OMP BARRIER @@ -149,15 +149,15 @@ BEGIN_PROVIDER [double precision, tr_one_e_dm_mo, (mo_num, mo_num, N_states, N_s enddo !$OMP END DO NOWAIT !$OMP CRITICAL - tr_one_e_dm_mo(:,:,:,:) = tr_one_e_dm_mo(:,:,:,:) + tmp_b(:,:,:,:) + one_e_tr_dm_mo(:,:,:,:) = one_e_tr_dm_mo(:,:,:,:) + tmp_b(:,:,:,:) !$OMP END CRITICAL deallocate(tmp_b) !$OMP END PARALLEL END_PROVIDER - BEGIN_PROVIDER [ double precision, tr_one_e_dm_mo_alpha, (mo_num,mo_num,N_states,N_states) ] -&BEGIN_PROVIDER [ double precision, tr_one_e_dm_mo_beta, (mo_num,mo_num,N_states,N_states) ] + BEGIN_PROVIDER [ double precision, one_e_tr_dm_mo_alpha, (mo_num,mo_num,N_states,N_states) ] +&BEGIN_PROVIDER [ double precision, one_e_tr_dm_mo_beta, (mo_num,mo_num,N_states,N_states) ] implicit none BEGIN_DOC ! $\alpha$ and $\beta$ one-body transition density matrices for all pairs of states @@ -175,13 +175,13 @@ END_PROVIDER PROVIDE psi_det - tr_one_e_dm_mo_alpha = 0.d0 - tr_one_e_dm_mo_beta = 0.d0 + one_e_tr_dm_mo_alpha = 0.d0 + one_e_tr_dm_mo_beta = 0.d0 !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(j,k,k_a,k_b,l,m,n,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc,& !$OMP tmp_a, tmp_b, n_occ, krow, kcol, lrow, lcol, tmp_det, tmp_det2)& !$OMP SHARED(psi_det,psi_coef,N_int,N_states,elec_alpha_num, & - !$OMP elec_beta_num,tr_one_e_dm_mo_alpha,tr_one_e_dm_mo_beta,N_det,& + !$OMP elec_beta_num,one_e_tr_dm_mo_alpha,one_e_tr_dm_mo_beta,N_det,& !$OMP mo_num,psi_bilinear_matrix_rows,psi_bilinear_matrix_columns,& !$OMP psi_bilinear_matrix_transp_rows, psi_bilinear_matrix_transp_columns,& !$OMP psi_bilinear_matrix_order_reverse, psi_det_alpha_unique, psi_det_beta_unique,& @@ -244,7 +244,7 @@ END_PROVIDER !$OMP END DO NOWAIT !$OMP CRITICAL - tr_one_e_dm_mo_alpha(:,:,:,:) = tr_one_e_dm_mo_alpha(:,:,:,:) + tmp_a(:,:,:,:) + one_e_tr_dm_mo_alpha(:,:,:,:) = one_e_tr_dm_mo_alpha(:,:,:,:) + tmp_a(:,:,:,:) !$OMP END CRITICAL deallocate(tmp_a) @@ -303,7 +303,7 @@ END_PROVIDER enddo !$OMP END DO NOWAIT !$OMP CRITICAL - tr_one_e_dm_mo_beta(:,:,:,:) = tr_one_e_dm_mo_beta(:,:,:,:) + tmp_b(:,:,:,:) + one_e_tr_dm_mo_beta(:,:,:,:) = one_e_tr_dm_mo_beta(:,:,:,:) + tmp_b(:,:,:,:) !$OMP END CRITICAL deallocate(tmp_b) From 8f8001fd09f0d06b888e54106252d9a72c3f356a Mon Sep 17 00:00:00 2001 From: ydamour Date: Sat, 11 Mar 2023 22:12:48 +0100 Subject: [PATCH 05/87] add some conversions factors --- src/utils/units.irp.f | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/utils/units.irp.f b/src/utils/units.irp.f index 1850b28b..51dcec82 100644 --- a/src/utils/units.irp.f +++ b/src/utils/units.irp.f @@ -1,22 +1,32 @@ BEGIN_PROVIDER [double precision, ha_to_ev] +&BEGIN_PROVIDER [double precision, au_to_D] +&BEGIN_PROVIDER [double precision, planck_cte] +&BEGIN_PROVIDER [double precision, light_speed] +&BEGIN_PROVIDER [double precision, Ha_to_J] +&BEGIN_PROVIDER [double precision, Ha_to_nm] implicit none + BEGIN_DOC - ! Converstion from Hartree to eV + ! Some conversion between different units END_DOC - ha_to_ev = 27.211396641308d0 - -END_PROVIDER - -BEGIN_PROVIDER [double precision, au_to_D] - - implicit none - BEGIN_DOC - ! Converstion from au to Debye - END_DOC + ! Hartree to eV + Ha_to_eV = 27.211396641308d0 + ! au to Debye au_to_D = 2.5415802529d0 -END_PROVIDER + ! Planck's constant in SI units + planck_cte = 6.62606957d-34 + ! Light speed in SI units + light_speed = 2.99792458d10 + + ! Hartree to Joule + Ha_to_J = 4.35974434d-18 + + ! Hartree to nm + Ha_to_nm = 1d9 * (planck_cte * light_speed) / Ha_to_J + +END_PROVIDER From b16a6c7d5336a5bb383475d04ff95c9affed5c8f Mon Sep 17 00:00:00 2001 From: ydamour Date: Sat, 11 Mar 2023 22:31:57 +0100 Subject: [PATCH 06/87] add molecular properties --- src/mol_properties/EZFIO.cfg | 23 +++ src/mol_properties/NEED | 2 + src/mol_properties/README.md | 17 ++ src/mol_properties/ci_energy_no_diag.irp.f | 13 ++ src/mol_properties/mo_deriv_1.irp.f | 30 +++ src/mol_properties/multi_s_deriv_1.irp.f | 78 +++++++ .../multi_s_dipole_moment.irp.f | 93 +++++++++ src/mol_properties/print_mol_properties.irp.f | 24 +++ src/mol_properties/print_properties.irp.f | 194 ++++++++++++++++++ src/mol_properties/properties.irp.f | 14 ++ 10 files changed, 488 insertions(+) create mode 100644 src/mol_properties/EZFIO.cfg create mode 100644 src/mol_properties/NEED create mode 100644 src/mol_properties/README.md create mode 100644 src/mol_properties/ci_energy_no_diag.irp.f create mode 100644 src/mol_properties/mo_deriv_1.irp.f create mode 100644 src/mol_properties/multi_s_deriv_1.irp.f create mode 100644 src/mol_properties/multi_s_dipole_moment.irp.f create mode 100644 src/mol_properties/print_mol_properties.irp.f create mode 100644 src/mol_properties/print_properties.irp.f create mode 100644 src/mol_properties/properties.irp.f diff --git a/src/mol_properties/EZFIO.cfg b/src/mol_properties/EZFIO.cfg new file mode 100644 index 00000000..35a095fb --- /dev/null +++ b/src/mol_properties/EZFIO.cfg @@ -0,0 +1,23 @@ +[print_all_transitions] +type: logical +doc: If true, print the transition between all the states +interface: ezfio,provider,ocaml +default: false + +[calc_dipole_moment] +type: logical +doc: If true, the electric dipole moment will be computed +interface: ezfio,provider,ocaml +default: false + +[calc_tr_dipole_moment] +type: logical +doc: If true and N_states > 1, the transition electric dipole moment will be computed +interface: ezfio,provider,ocaml +default: false + +[calc_osc_str] +type: logical +doc: If true and N_states > 1, the oscillator strength will be computed +interface: ezfio,provider,ocaml +default: false diff --git a/src/mol_properties/NEED b/src/mol_properties/NEED new file mode 100644 index 00000000..8d89a452 --- /dev/null +++ b/src/mol_properties/NEED @@ -0,0 +1,2 @@ +determinants +davidson_undressed diff --git a/src/mol_properties/README.md b/src/mol_properties/README.md new file mode 100644 index 00000000..583e79ea --- /dev/null +++ b/src/mol_properties/README.md @@ -0,0 +1,17 @@ +# Molecular properties + +Available quantities: +- Electric dipole moment +- Electric transition dipole moment +- Oscillator strength + +They are not computed by default. To compute them: +``` +qp set mol_properties calc_dipole_moment true +qp set mol_properties calc_tr_dipole_moment true +qp set mol_properties calc_osc_str true +``` +If you are interested in transitions between two excited states: +``` +qp set mol_properties print_all_transitions true +``` diff --git a/src/mol_properties/ci_energy_no_diag.irp.f b/src/mol_properties/ci_energy_no_diag.irp.f new file mode 100644 index 00000000..a4407d3b --- /dev/null +++ b/src/mol_properties/ci_energy_no_diag.irp.f @@ -0,0 +1,13 @@ +BEGIN_PROVIDER [double precision, ci_energy_no_diag, (N_states) ] + + implicit none + + BEGIN_DOC + ! CI energy from density matrices and integrals + ! Avoid the rediagonalization for ci_energy + END_DOC + + ci_energy_no_diag = psi_energy + nuclear_repulsion + +END_PROVIDER + diff --git a/src/mol_properties/mo_deriv_1.irp.f b/src/mol_properties/mo_deriv_1.irp.f new file mode 100644 index 00000000..cfe6f789 --- /dev/null +++ b/src/mol_properties/mo_deriv_1.irp.f @@ -0,0 +1,30 @@ + BEGIN_PROVIDER [double precision, mo_deriv_1_x , (mo_num,mo_num)] +&BEGIN_PROVIDER [double precision, mo_deriv_1_y , (mo_num,mo_num)] +&BEGIN_PROVIDER [double precision, mo_deriv_1_z , (mo_num,mo_num)] + BEGIN_DOC + ! array of the integrals of MO_i * d/dx MO_j + ! array of the integrals of MO_i * d/dy MO_j + ! array of the integrals of MO_i * d/dz MO_j + END_DOC + implicit none + + call ao_to_mo( & + ao_deriv_1_x, & + size(ao_deriv_1_x,1), & + mo_deriv_1_x, & + size(mo_deriv_1_x,1) & + ) + call ao_to_mo( & + ao_deriv_1_y, & + size(ao_deriv_1_y,1), & + mo_deriv_1_y, & + size(mo_deriv_1_y,1) & + ) + call ao_to_mo( & + ao_deriv_1_z, & + size(ao_deriv_1_z,1), & + mo_deriv_1_z, & + size(mo_deriv_1_z,1) & + ) + +END_PROVIDER diff --git a/src/mol_properties/multi_s_deriv_1.irp.f b/src/mol_properties/multi_s_deriv_1.irp.f new file mode 100644 index 00000000..b30130b7 --- /dev/null +++ b/src/mol_properties/multi_s_deriv_1.irp.f @@ -0,0 +1,78 @@ + BEGIN_PROVIDER [double precision, multi_s_deriv_1, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_x_deriv_1, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_y_deriv_1, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_z_deriv_1, (N_states, N_states)] + + implicit none + + BEGIN_DOC + ! Providers for : + ! + ! + ! + ! ||v|| = sqrt(v_x^2 + v_y^2 + v_z^2) + ! v_x = d/dx + ! Cf. multi_s_dipole_moment for the equations + END_DOC + + integer :: istate,jstate ! States + integer :: i,j ! general spatial MOs + double precision :: nuclei_part_x, nuclei_part_y, nuclei_part_z + + multi_s_x_deriv_1 = 0.d0 + multi_s_y_deriv_1 = 0.d0 + multi_s_z_deriv_1 = 0.d0 + + do jstate = 1, N_states + do istate = 1, N_states + + do i = 1, mo_num + ! Diag part + multi_s_x_deriv_1(istate,jstate) -= one_e_tr_dm_mo(i,i,istate,jstate) * mo_deriv_1_x(i,i) + multi_s_y_deriv_1(istate,jstate) -= one_e_tr_dm_mo(i,i,istate,jstate) * mo_deriv_1_y(i,i) + multi_s_z_deriv_1(istate,jstate) -= one_e_tr_dm_mo(i,i,istate,jstate) * mo_deriv_1_z(i,i) + + do j = 1, mo_num + if (i == j) then + cycle + endif + ! Extra diag part + multi_s_x_deriv_1(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_deriv_1_x(j,i) + multi_s_y_deriv_1(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_deriv_1_y(j,i) + multi_s_z_deriv_1(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_deriv_1_z(j,i) + enddo + enddo + + enddo + enddo + + ! Nuclei part + nuclei_part_x = 0.d0 + nuclei_part_y = 0.d0 + nuclei_part_z = 0.d0 + + do i = 1,nucl_num + nuclei_part_x += nucl_charge(i) * nucl_coord(i,1) + nuclei_part_y += nucl_charge(i) * nucl_coord(i,2) + nuclei_part_z += nucl_charge(i) * nucl_coord(i,3) + enddo + + ! Only if istate = jstate, otherwise 0 by the orthogonality of the states + do istate = 1, N_states + multi_s_x_deriv_1(istate,istate) += nuclei_part_x + multi_s_y_deriv_1(istate,istate) += nuclei_part_y + multi_s_z_deriv_1(istate,istate) += nuclei_part_z + enddo + + ! d = + do jstate = 1, N_states + do istate = 1, N_states + multi_s_deriv_1(istate,jstate) = & + dsqrt(multi_s_x_deriv_1(istate,jstate)**2 & + + multi_s_y_deriv_1(istate,jstate)**2 & + + multi_s_z_deriv_1(istate,jstate)**2) + enddo + enddo + +END_PROVIDER + diff --git a/src/mol_properties/multi_s_dipole_moment.irp.f b/src/mol_properties/multi_s_dipole_moment.irp.f new file mode 100644 index 00000000..d5e62799 --- /dev/null +++ b/src/mol_properties/multi_s_dipole_moment.irp.f @@ -0,0 +1,93 @@ +! Providers for the dipole moments along x,y,z and the total dipole +! moments. + +! The dipole moment along the x axis is: +! \begin{align*} +! \mu_x = < \Psi_m | \sum_i x_i + \sum_A Z_A R_A | \Psi_n > +! \end{align*} +! where $i$ is used for the electrons and $A$ for the nuclei. +! $Z_A$ the charge of the nucleus $A$ and $R_A$ its position in the +! space. + +! And it can be computed using the (transition, if n /= m) density +! matrix as a expectation value +! \begin{align*} +! <\Psi_n|x| \Psi_m > = \sum_p \gamma_{pp}^{nm} < \phi_p | x | \phi_p > +! + \sum_{pq, p \neq q} \gamma_{pq}^{nm} < \phi_p | x | \phi_q > + < \Psi_m | \sum_A Z_A R_A | \Psi_n > +! \end{align*} + + + +BEGIN_PROVIDER [double precision, multi_s_dipole_moment, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_x_dipole_moment, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_y_dipole_moment, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_z_dipole_moment, (N_states, N_states)] + + implicit none + + BEGIN_DOC + ! Providers for : + ! <\Psi_m|\mu_x|\Psi_n> + ! <\Psi_m|\mu_y|\Psi_n> + ! <\Psi_m|\mu_z|\Psi_n> + ! ||\mu|| = \sqrt{\mu_x^2 + \mu_y^2 + \mu_z^2} + ! + ! <\Psi_n|x| \Psi_m > = \sum_p \gamma_{pp}^{nm} \bra{\phi_p} x \ket{\phi_p} + ! + \sum_{pq, p \neq q} \gamma_{pq}^{nm} \bra{\phi_p} x \ket{\phi_q} + ! \Psi: wf + ! n,m indexes for the states + ! p,q: general spatial MOs + ! gamma^{nm}: density matrix \bra{\Psi^n} a^{\dagger}_a a_i \ket{\Psi^m} + END_DOC + + integer :: istate,jstate ! States + integer :: i,j ! general spatial MOs + double precision :: nuclei_part_x, nuclei_part_y, nuclei_part_z + + multi_s_x_dipole_moment = 0.d0 + multi_s_y_dipole_moment = 0.d0 + multi_s_z_dipole_moment = 0.d0 + + do jstate = 1, N_states + do istate = 1, N_states + + do i = 1, mo_num + do j = 1, mo_num + multi_s_x_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_x(j,i) + multi_s_y_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_y(j,i) + multi_s_z_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_z(j,i) + enddo + enddo + + enddo + enddo + + ! Nuclei part + nuclei_part_x = 0.d0 + nuclei_part_y = 0.d0 + nuclei_part_z = 0.d0 + + do i = 1,nucl_num + nuclei_part_x += nucl_charge(i) * nucl_coord(i,1) + nuclei_part_y += nucl_charge(i) * nucl_coord(i,2) + nuclei_part_z += nucl_charge(i) * nucl_coord(i,3) + enddo + + ! Only if istate = jstate, otherwise 0 by the orthogonality of the states + do istate = 1, N_states + multi_s_x_dipole_moment(istate,istate) += nuclei_part_x + multi_s_y_dipole_moment(istate,istate) += nuclei_part_y + multi_s_z_dipole_moment(istate,istate) += nuclei_part_z + enddo + + ! d = + do jstate = 1, N_states + do istate = 1, N_states + multi_s_dipole_moment(istate,jstate) = & + dsqrt(multi_s_x_dipole_moment(istate,jstate)**2 & + + multi_s_y_dipole_moment(istate,jstate)**2 & + + multi_s_z_dipole_moment(istate,jstate)**2) + enddo + enddo + +END_PROVIDER diff --git a/src/mol_properties/print_mol_properties.irp.f b/src/mol_properties/print_mol_properties.irp.f new file mode 100644 index 00000000..3753a3dd --- /dev/null +++ b/src/mol_properties/print_mol_properties.irp.f @@ -0,0 +1,24 @@ +subroutine print_mol_properties() + + implicit none + + BEGIN_DOC + ! Run the propertie calculations + END_DOC + + ! Electric dipole moment + if (calc_dipole_moment) then + call print_dipole_moment + endif + + ! Transition electric dipole moment + if (calc_tr_dipole_moment .and. N_states > 1) then + call print_transition_dipole_moment + endif + + ! Oscillator strength + if (calc_osc_str .and. N_states > 1) then + call print_oscillator_strength + endif + +end diff --git a/src/mol_properties/print_properties.irp.f b/src/mol_properties/print_properties.irp.f new file mode 100644 index 00000000..4c0a9f38 --- /dev/null +++ b/src/mol_properties/print_properties.irp.f @@ -0,0 +1,194 @@ +! Dipole moments + +! Provided +! | N_states | integer | Number of states | +! | multi_s_x_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along x axis | +! | multi_s_y_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along y axis | +! | multi_s_z_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along z axis | +! | multi_s_dipole_moment(N_states,N_states) | double precision | Total (transition) dipole moments | + + +subroutine print_dipole_moment + + implicit none + + BEGIN_DOC + ! To print the dipole moment ||<\Psi_i|µ|\Psi_i>|| and its x,y,z components + END_DOC + + integer :: istate + double precision, allocatable :: d(:), d_x(:), d_y(:), d_z(:) + + allocate(d(N_states),d_x(N_states),d_y(N_states),d_z(N_states)) + + do istate = 1, N_states + d_x(istate) = multi_s_x_dipole_moment(istate,istate) + d_y(istate) = multi_s_y_dipole_moment(istate,istate) + d_z(istate) = multi_s_z_dipole_moment(istate,istate) + d(istate) = multi_s_dipole_moment(istate,istate) + enddo + + ! Atomic units + print*,'' + print*,'# Dipoles:' + print*,'==============================================' + print*,' Dipole moments (au)' + print*,' State X Y Z ||µ||' + + do istate = 1, N_states + write(*,'(I5,4(F12.6))') (istate-1), d_x(istate), d_y(istate), d_z(istate), d(istate) + enddo + + ! Debye + print*,'' + print*,' Dipole moments (D)' + print*,' State X Y Z ||µ||' + + do istate = 1, N_states + write(*,'(I5,4(F12.6))') (istate-1), d_x(istate)*au_to_D, d_y(istate)*au_to_D, d_z(istate)*au_to_D, d(istate)*au_to_D + enddo + + print*,'==============================================' + print*,'' + + deallocate(d,d_x,d_y,d_z) + + end + +! Transition dipole moments + +! Provided +! | N_states | integer | Number of states | +! | multi_s_x_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along x axis | +! | multi_s_y_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along y axis | +! | multi_s_z_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along z axis | +! | multi_s_dipole_moment(N_states,N_states) | double precision | Total (transition) dipole moments | + + +subroutine print_transition_dipole_moment + + implicit none + + BEGIN_DOC + ! To print the transition dipole moment ||<\Psi_i|µ|\Psi_j>|| and its components along x, y and z + END_DOC + + integer :: istate,jstate, n_states_print + double precision :: f, d, d_x, d_y, d_z, dip_str + + if (N_states == 1 .or. N_det == 1) then + return + endif + + print*,'' + print*,'# Transition dipoles:' + print*,'==============================================' + print*,' Transition dipole moments (au)' + write(*,'(A89)') ' # Transition X Y Z ||µ|| Dip. str. Osc. str.' + + if (print_all_transitions) then + n_states_print = N_states + else + n_states_print = 1 + endif + + do jstate = 1, n_states_print !N_states + do istate = jstate + 1, N_states + d_x = multi_s_x_dipole_moment(istate,jstate) + d_y = multi_s_y_dipole_moment(istate,jstate) + d_z = multi_s_z_dipole_moment(istate,jstate) + dip_str = d_x**2 + d_y**2 + d_z**2 + d = multi_s_dipole_moment(istate,jstate) + f = 2d0/3d0 * d * d * dabs(ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)) + write(*,'(I4,I4,A4,I3,6(F12.6))') (istate-1), (jstate-1), ' ->', (istate-1), d_x, d_y, d_z, d, dip_str, f + enddo + enddo + + print*,'' + print*,' Transition dipole moments (D)' + write(*,'(A89)') ' # Transition X Y Z ||µ|| Dip. str. Osc. str.' + + do jstate = 1, n_states_print !N_states + do istate = jstate + 1, N_states + d_x = multi_s_x_dipole_moment(istate,jstate) * au_to_D + d_y = multi_s_y_dipole_moment(istate,jstate) * au_to_D + d_z = multi_s_z_dipole_moment(istate,jstate) * au_to_D + d = multi_s_dipole_moment(istate,jstate) + dip_str = d_x**2 + d_y**2 + d_z**2 + f = 2d0/3d0 * d * d * dabs(ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)) + d = multi_s_dipole_moment(istate,jstate) * au_to_D + write(*,'(I4,I4,A4,I3,6(F12.6))') (istate-1), (jstate-1), ' ->', (istate-1), d_x, d_y, d_z, d, dip_str, f + enddo + enddo + print*,'==============================================' + print*,'' + +end + +! Oscillator strengths + +! Provided +! | N_states | integer | Number of states | +! | multi_s_dipole_moment(N_states,N_states) | double precision | Total (transition) dipole moments | +! | multi_s_deriv1_moment(N_states,N_states) | double precision | Total (transition) ... | +! | ci_energy_no_diag(N_states) | double precision | CI energy of each state | + +! Internal +! | f_l | double precision | Oscillator strength in length gauge | +! | f_v | double precision | Oscillator strength in velocity gauge | +! | f_m | double precision | Oscillator strength in mixed gauge | +! | n_states_print | integer | Number of printed states | + + +subroutine print_oscillator_strength + + implicit none + + BEGIN_DOC + ! https://doi.org/10.1016/j.cplett.2004.03.126 + ! Oscillator strength in: + ! - length gauge, f^l_{ij} = 2/3 (E_i - E_j) <\Psi_i|r|\Psi_j> <\Psi_j|r|\Psi_i> + ! - velocity gauge, f^v_{ij} = 2/3 (E_i - E_j)^(-1) <\Psi_i|v|\Psi_j> <\Psi_j|v|\Psi_i> + ! - mixed gauge, f^m_{ij} = -2i/3 <\Psi_i|r|\Psi_j> <\Psi_j|v|\Psi_i> + END_DOC + + integer :: istate,jstate,k, n_states_print + double precision :: f_l,f_v,f_m,d,v + + if (N_states == 1 .or. N_det == 1) then + return + endif + + print*,'' + print*,'# Oscillator strength:' + print*,'==============================================' + + if (print_all_transitions) then + n_states_print = N_states + else + n_states_print = 1 + endif + + write(*,'(A103)') ' Oscillator strength in length gauge (f_l), velocity gauge (f_v) and mixed length-velocity gauge (f_m)' + do jstate = 1, n_states_print !N_states + do istate = jstate + 1, N_states + d = multi_s_dipole_moment(istate,jstate) + v = multi_s_deriv_1(istate,jstate) + ! Length gauge + f_l = 2d0/3d0 * d * d * dabs(ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)) + ! Velocity gauge + f_v = 2d0/3d0 * v * v * 1d0/dabs(ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)) + ! Mixed gauge + f_m = 2d0/3d0 * d * v + + write(*,'(A19,I3,A9,F10.6,A5,F7.1,A10,F9.6,A6,F9.6,A6,F9.6,A8,F7.3)') ' # Transition n.', (istate-1), ': Excit.=', dabs((ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)))*ha_to_ev, & + ' eV ( ',dabs((ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)))*Ha_to_nm,' nm), f_l=',f_l, ', f_v=', f_v, ', f_m=', f_m, ', =', s2_values(istate) + !write(*,'(I4,I4,A4,I3,A6,F6.1,A6,F6.1)') (istate-1), (jstate-1), ' ->', (istate-1), ', %T1=', percent_exc(2,istate), ', %T2=',percent_exc(3,istate) + + enddo + enddo + + print*,'==============================================' + print*,'' + +end diff --git a/src/mol_properties/properties.irp.f b/src/mol_properties/properties.irp.f new file mode 100644 index 00000000..c781c723 --- /dev/null +++ b/src/mol_properties/properties.irp.f @@ -0,0 +1,14 @@ +program mol_properties + + implicit none + + BEGIN_DOC + ! Run the propertie calculations + END_DOC + + read_wf = .True. + touch read_wf + + call print_mol_properties() + +end From 86974ea2d4c5337b88a284b2a6d4729582a8b63c Mon Sep 17 00:00:00 2001 From: ydamour Date: Sat, 11 Mar 2023 22:36:20 +0100 Subject: [PATCH 07/87] molecular properties in cipsi --- src/cipsi/NEED | 1 + src/cipsi/cipsi.irp.f | 2 ++ src/cipsi/stochastic_cipsi.irp.f | 2 ++ 3 files changed, 5 insertions(+) diff --git a/src/cipsi/NEED b/src/cipsi/NEED index 85d01f79..5bd742bc 100644 --- a/src/cipsi/NEED +++ b/src/cipsi/NEED @@ -3,3 +3,4 @@ zmq mpi iterations csf +mol_properties diff --git a/src/cipsi/cipsi.irp.f b/src/cipsi/cipsi.irp.f index 6e715531..5225c6df 100644 --- a/src/cipsi/cipsi.irp.f +++ b/src/cipsi/cipsi.irp.f @@ -108,6 +108,7 @@ subroutine run_cipsi call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) call print_extrapolated_energy() + call print_mol_properties() N_iter += 1 if (qp_stop()) exit @@ -156,6 +157,7 @@ subroutine run_cipsi pt2_data, pt2_data_err, N_det,N_configuration,N_states,psi_s2) call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) call print_extrapolated_energy() + call print_mol_properties() endif call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f index 781fcda6..35e80eb8 100644 --- a/src/cipsi/stochastic_cipsi.irp.f +++ b/src/cipsi/stochastic_cipsi.irp.f @@ -98,6 +98,7 @@ subroutine run_stochastic_cipsi call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) call print_extrapolated_energy() + call print_mol_properties() N_iter += 1 if (qp_stop()) exit @@ -136,6 +137,7 @@ subroutine run_stochastic_cipsi pt2_data , pt2_data_err, N_det, N_configuration, N_states, psi_s2) call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) call print_extrapolated_energy() + call print_mol_properties() endif call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) From 0b728d62e7dbd618e6c1d032c8d3cedb3d900806 Mon Sep 17 00:00:00 2001 From: ydamour Date: Sat, 11 Mar 2023 23:29:02 +0100 Subject: [PATCH 08/87] update doc --- src/mol_properties/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/mol_properties/README.md b/src/mol_properties/README.md index 583e79ea..637b76d7 100644 --- a/src/mol_properties/README.md +++ b/src/mol_properties/README.md @@ -15,3 +15,11 @@ If you are interested in transitions between two excited states: ``` qp set mol_properties print_all_transitions true ``` +They can be obtained by running +``` +qp run properties +``` +or at each step of a cipsi calculation with +``` +qp run fci +``` From 46d0a7388b10f0ecd61ae0e3fb9b2637eeff4df1 Mon Sep 17 00:00:00 2001 From: ydamour Date: Sat, 11 Mar 2023 23:40:52 +0100 Subject: [PATCH 09/87] clean --- src/mol_properties/multi_s_deriv_1.irp.f | 9 --------- src/mol_properties/properties.irp.f | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/src/mol_properties/multi_s_deriv_1.irp.f b/src/mol_properties/multi_s_deriv_1.irp.f index b30130b7..84bfecc9 100644 --- a/src/mol_properties/multi_s_deriv_1.irp.f +++ b/src/mol_properties/multi_s_deriv_1.irp.f @@ -27,16 +27,7 @@ do istate = 1, N_states do i = 1, mo_num - ! Diag part - multi_s_x_deriv_1(istate,jstate) -= one_e_tr_dm_mo(i,i,istate,jstate) * mo_deriv_1_x(i,i) - multi_s_y_deriv_1(istate,jstate) -= one_e_tr_dm_mo(i,i,istate,jstate) * mo_deriv_1_y(i,i) - multi_s_z_deriv_1(istate,jstate) -= one_e_tr_dm_mo(i,i,istate,jstate) * mo_deriv_1_z(i,i) - do j = 1, mo_num - if (i == j) then - cycle - endif - ! Extra diag part multi_s_x_deriv_1(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_deriv_1_x(j,i) multi_s_y_deriv_1(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_deriv_1_y(j,i) multi_s_z_deriv_1(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_deriv_1_z(j,i) diff --git a/src/mol_properties/properties.irp.f b/src/mol_properties/properties.irp.f index c781c723..7ea6f9c3 100644 --- a/src/mol_properties/properties.irp.f +++ b/src/mol_properties/properties.irp.f @@ -3,7 +3,7 @@ program mol_properties implicit none BEGIN_DOC - ! Run the propertie calculations + ! Calculation of the properties END_DOC read_wf = .True. From 0682ee18ab4519362ff95ce3467643edbed5dacc Mon Sep 17 00:00:00 2001 From: ydamour Date: Mon, 13 Mar 2023 09:38:35 +0100 Subject: [PATCH 10/87] utils cc --- src/utils_cc/EZFIO.cfg | 77 ++ src/utils_cc/NEED | 4 + src/utils_cc/README.md | 34 + src/utils_cc/diis.irp.f | 529 ++++++++++ src/utils_cc/energy.irp.f | 13 + src/utils_cc/guess_t.irp.f | 213 ++++ src/utils_cc/mo_integrals_cc.irp.f | 1256 ++++++++++++++++++++++++ src/utils_cc/occupancy.irp.f | 317 ++++++ src/utils_cc/org/TANGLE_org_mode.sh | 7 + src/utils_cc/org/diis.org | 574 +++++++++++ src/utils_cc/org/energy.org | 15 + src/utils_cc/org/guess_t.org | 222 +++++ src/utils_cc/org/mo_integrals_cc.org | 1305 +++++++++++++++++++++++++ src/utils_cc/org/occupancy.org | 330 +++++++ src/utils_cc/org/phase.org | 178 ++++ src/utils_cc/org/print_wf_qp_edit.org | 33 + src/utils_cc/org/update_t.org | 76 ++ src/utils_cc/phase.irp.f | 135 +++ src/utils_cc/print_wf_qp_edit.irp.f | 29 + src/utils_cc/update_t.irp.f | 73 ++ 20 files changed, 5420 insertions(+) create mode 100644 src/utils_cc/EZFIO.cfg create mode 100644 src/utils_cc/NEED create mode 100644 src/utils_cc/README.md create mode 100644 src/utils_cc/diis.irp.f create mode 100644 src/utils_cc/energy.irp.f create mode 100644 src/utils_cc/guess_t.irp.f create mode 100644 src/utils_cc/mo_integrals_cc.irp.f create mode 100644 src/utils_cc/occupancy.irp.f create mode 100755 src/utils_cc/org/TANGLE_org_mode.sh create mode 100644 src/utils_cc/org/diis.org create mode 100644 src/utils_cc/org/energy.org create mode 100644 src/utils_cc/org/guess_t.org create mode 100644 src/utils_cc/org/mo_integrals_cc.org create mode 100644 src/utils_cc/org/occupancy.org create mode 100644 src/utils_cc/org/phase.org create mode 100644 src/utils_cc/org/print_wf_qp_edit.org create mode 100644 src/utils_cc/org/update_t.org create mode 100644 src/utils_cc/phase.irp.f create mode 100644 src/utils_cc/print_wf_qp_edit.irp.f create mode 100644 src/utils_cc/update_t.irp.f diff --git a/src/utils_cc/EZFIO.cfg b/src/utils_cc/EZFIO.cfg new file mode 100644 index 00000000..71ee87e3 --- /dev/null +++ b/src/utils_cc/EZFIO.cfg @@ -0,0 +1,77 @@ +[cc_thresh_conv] +type: double precision +doc: Threshold for the convergence of the residual equations. +interface: ezfio,ocaml,provider +default: 1e-6 + +[cc_max_iter] +type: integer +doc: Maximum number of iterations. +interface: ezfio,ocaml,provider +default: 100 + +[cc_diis_depth] +type: integer +doc: Maximum depth of the DIIS, i.e., maximum number of iterations that the DIIS keeps in memory. Warning, we allocate matrices with the diis depth at the beginning without update. If you don't have enough memory it should crash in memory. +interface: ezfio,ocaml,provider +default: 8 + +[cc_level_shift] +type: double precision +doc: Level shift for the CC +interface: ezfio,ocaml,provider +default: 0.0 + +[cc_level_shift_guess] +type: double precision +doc: Level shift for the guess of the CC amplitudes +interface: ezfio,ocaml,provider +default: 0.0 + +[cc_update_method] +type: character*(32) +doc: Method used to update the CC amplitudes. none -> normal, diis -> with diis. +interface: ezfio,ocaml,provider +default: diis + +[cc_guess_t1] +type: character*(32) +doc: Guess used to initialize the T1 amplitudes. none -> 0, MP -> perturbation theory, read -> read from disk. +interface: ezfio,ocaml,provider +default: MP + +[cc_guess_t2] +type: character*(32) +doc: Guess used to initialize the T2 amplitudes. none -> 0, MP -> perturbation theory, read -> read from disk. +interface: ezfio,ocaml,provider +default: MP + +[cc_write_t1] +type: logical +doc: If true, it will write on disk the T1 amplitudes at the end of the calculation. +interface: ezfio,ocaml,provider +default: False + +[cc_write_t2] +type: logical +doc: If true, it will write on disk the T2 amplitudes at the end of the calculation. +interface: ezfio,ocaml,provider +default: False + +[cc_par_t] +type: logical +doc: If true, the CCSD(T) will be computed. +interface: ezfio,ocaml,provider +default: False + +[cc_dev] +type: logical +doc: Only for dev purposes. +interface: ezfio,ocaml,provider +default: False + +[cc_ref] +type: integer +doc: Index of the reference determinant in psi_det for CC calculation. +interface: ezfio,ocaml,provider +default: 1 diff --git a/src/utils_cc/NEED b/src/utils_cc/NEED new file mode 100644 index 00000000..bd5a151f --- /dev/null +++ b/src/utils_cc/NEED @@ -0,0 +1,4 @@ +hartree_fock +two_body_rdm +bitmask +determinants diff --git a/src/utils_cc/README.md b/src/utils_cc/README.md new file mode 100644 index 00000000..87cde388 --- /dev/null +++ b/src/utils_cc/README.md @@ -0,0 +1,34 @@ +# Utils for CC + +Utils for the CC modules. + +## Contents +- Providers related to reference occupancy +- Integrals related to the reference +- Diis for CC (but can be used for something else if you provide your own error vector) +- Guess for CC amplitudes +- Routines to update the CC amplitudes +- Phase between to arbitrary determinants +- print of the qp edit wf + +## Keywords +- cc_thresh_conv: Threshold for the convergence of the residual equations. Default: 1e-6. +- cc_max_iter: Maximum number of iterations. Default: 100. +- cc_diis_depth: Diis depth. Default: 8. +- cc_level_shift: Level shift for the CC. Default: 0.0. +- cc_level_shift_guess: Level shift for the MP guess of the amplitudes. Default: 0.0. +- cc_update_method: Method used to update the CC amplitudes. none -> normal, diis -> with diis. Default: diis. +- cc_guess_t1: Guess used to initialize the T1 amplitudes. none -> 0, MP -> perturbation theory, read -> read from disk. Default: MP. +- cc_guess_t2: Guess used to initialize the T2 amplitudes. none -> 0, MP -> perturbation theory, read -> read from disk. Default: MP. +- cc_write_t1: If true, it will write on disk the T1 amplitudes at the end of the calculation. Default: False. +- cc_write_t2: If true, it will write on disk the T2 amplitudes at the end of the calculation. Default: False. +- cc_par_t: If true, the CCSD(T) will be computed. +- cc_ref: Index of the reference determinant in psi_det for CC calculation. Default: 1. + +## Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh and +mv *.irp.f ../. +``` diff --git a/src/utils_cc/diis.irp.f b/src/utils_cc/diis.irp.f new file mode 100644 index 00000000..fe771373 --- /dev/null +++ b/src/utils_cc/diis.irp.f @@ -0,0 +1,529 @@ +! Code + +subroutine diis_cc(all_err,all_t,sze,m,iter,t) + + implicit none + + BEGIN_DOC + ! DIIS. Take the error vectors and the amplitudes of the previous + ! iterations to compute the new amplitudes + END_DOC + + ! {err_i}_{i=1}^{m_it} -> B -> c + ! {t_i}_{i=1}^{m_it}, c, {err_i}_{i=1}^{m_it} -> t_{m_it+1} + + integer, intent(in) :: m,iter,sze + double precision, intent(in) :: all_err(sze,m) + double precision, intent(in) :: all_t(sze,m) + + double precision, intent(out) :: t(sze) + + double precision, allocatable :: B(:,:), c(:), zero(:) + integer :: m_iter + integer :: i,j,k + integer :: info + integer, allocatable :: ipiv(:) + double precision :: accu + + m_iter = min(m,iter) + !print*,'m_iter',m_iter + allocate(B(m_iter+1,m_iter+1), c(m_iter), zero(m_iter+1)) + allocate(ipiv(m+1)) + + ! B(i,j) = < err(iter-m_iter+j),err(iter-m_iter+i) > ! iter-m_iter will be zero for us + B = 0d0 + !$OMP PARALLEL & + !$OMP SHARED(B,m,m_iter,sze,all_err) & + !$OMP PRIVATE(i,j,k,accu) & + !$OMP DEFAULT(NONE) + do j = 1, m_iter + do i = 1, m_iter + accu = 0d0 + !$OMP DO + do k = 1, sze + ! the errors of the ith iteration are in all_err(:,m+1-i) + accu = accu + all_err(k,m+1-i) * all_err(k,m+1-j) + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + B(i,j) = B(i,j) + accu + !$OMP END CRITICAL + enddo + enddo + !$OMP END PARALLEL + + do i = 1, m_iter + B(i,m_iter+1) = -1 + enddo + do j = 1, m_iter + B(m_iter+1,j) = -1 + enddo + ! Debug + !print*,'B' + !do i = 1, m_iter+1 + ! write(*,'(100(F10.6))') B(i,:) + !enddo + + ! (0 0 .... 0 -1) + zero = 0d0 + zero(m_iter+1) = -1d0 + + ! Solve B.c = zero + call dgesv(m_iter+1, 1, B, size(B,1), ipiv, zero, size(zero,1), info) + if (info /= 0) then + print*,'DIIS error in dgesv:', info + call abort + endif + ! c corresponds to the m_iter first solutions + c = zero(1:m_iter) + ! Debug + !print*,'c',c + !print*,'all_t' + !do i = 1, m + ! write(*,'(100(F10.6))') all_t(:,i) + !enddo + !print*,'all_err' + !do i = 1, m + ! write(*,'(100(F10.6))') all_err(:,i) + !enddo + + ! update T + !$OMP PARALLEL & + !$OMP SHARED(t,c,m,all_err,all_t,sze,m_iter) & + !$OMP PRIVATE(i,j,accu) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, sze + t(i) = 0d0 + enddo + !$OMP END DO + do i = 1, m_iter + !$OMP DO + do j = 1, sze + t(j) = t(j) + c(i) * (all_t(j,m+1-i) + all_err(j,m+1-i)) + enddo + !$OMP END DO + enddo + !$OMP END PARALLEL + + !print*,'new t',t + + deallocate(ipiv,B,c,zero) + +end + +! Update all err + +subroutine update_all_err(err,all_err,sze,m,iter) + + implicit none + + BEGIN_DOC + ! Shift all the err vectors of the previous iterations to add the new one + ! The last err vector is placed in the last position and all the others are + ! moved toward the first one. + END_DOC + + integer, intent(in) :: m, iter, sze + double precision, intent(in) :: err(sze) + double precision, intent(inout) :: all_err(sze,m) + integer :: i,j + integer :: m_iter + + m_iter = min(m,iter) + + ! Shift + !$OMP PARALLEL & + !$OMP SHARED(m,all_err,err,sze) & + !$OMP PRIVATE(i,j) & + !$OMP DEFAULT(NONE) + do i = 1, m-1 + !$OMP DO + do j = 1, sze + all_err(j,i) = all_err(j,i+1) + enddo + !$OMP END DO + enddo + + ! Debug + !print*,'shift err' + !do i = 1, m + ! print*,i, all_err(:,i) + !enddo + + ! New + !$OMP DO + do i = 1, sze + all_err(i,m) = err(i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Debug + !print*,'Updated err' + !do i = 1, m + ! print*,i, all_err(:,i) + !enddo + +end + +! Update all t + +subroutine update_all_t(t,all_t,sze,m,iter) + + implicit none + + BEGIN_DOC + ! Shift all the t vectors of the previous iterations to add the new one + ! The last t vector is placed in the last position and all the others are + ! moved toward the first one. + END_DOC + + integer, intent(in) :: m, iter, sze + double precision, intent(in) :: t(sze) + double precision, intent(inout) :: all_t(sze,m) + integer :: i,j + integer :: m_iter + + m_iter = min(m,iter) + + ! Shift + !$OMP PARALLEL & + !$OMP SHARED(m,all_t,t,sze) & + !$OMP PRIVATE(i,j) & + !$OMP DEFAULT(NONE) + do i = 1, m-1 + !$OMP DO + do j = 1, sze + all_t(j,i) = all_t(j,i+1) + enddo + !$OMP END DO + enddo + + ! New + !$OMP DO + do i = 1, sze + all_t(i,m) = t(i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Debug + !print*,'Updated t' + !do i = 1, m + ! print*,i, all_t(:,i) + !enddo + +end + +! Err1 + +subroutine compute_err1(nO,nV,f_o,f_v,r1,err1) + + implicit none + + BEGIN_DOC + ! Compute the error vector for the t1 + END_DOC + + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r1(nO,nV) + + double precision, intent(out) :: err1(nO,nV) + + integer :: i,a + + !$OMP PARALLEL & + !$OMP SHARED(err1,r1,f_o,f_v,nO,nV,cc_level_shift) & + !$OMP PRIVATE(i,a) & + !$OMP DEFAULT(NONE) + !$OMP DO + do a = 1, nV + do i = 1, nO + err1(i,a) = - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! Err2 + +subroutine compute_err2(nO,nV,f_o,f_v,r2,err2) + + implicit none + + BEGIN_DOC + ! Compute the error vector for the t2 + END_DOC + + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r2(nO,nO,nV,nV) + + double precision, intent(out) :: err2(nO,nO,nV,nV) + + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(err2,r2,f_o,f_v,nO,nV,cc_level_shift) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + err2(i,j,a,b) = - r2(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! Update t + +subroutine update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + + implicit none + + integer, intent(in) :: nO,nV,nb_iter + double precision, intent(in) :: f_o(nO), f_v(nV) + double precision, intent(in) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, intent(inout) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(inout) :: all_err1(nO*nV, cc_diis_depth), all_err2(nO*nO*nV*nV, cc_diis_depth) + double precision, intent(inout) :: all_t1(nO*nV, cc_diis_depth), all_t2(nO*nO*nV*nV, cc_diis_depth) + + double precision, allocatable :: err1(:,:), err2(:,:,:,:) + double precision, allocatable :: tmp_err1(:), tmp_err2(:) + double precision, allocatable :: tmp_t1(:), tmp_t2(:) + + if (cc_update_method == 'diis') then + + allocate(err1(nO,nV), err2(nO,nO,nV,nV)) + allocate(tmp_err1(nO*nV), tmp_err2(nO*nO*nV*nV)) + allocate(tmp_t1(nO*nV), tmp_t2(nO*nO*nV*nV)) + + ! DIIS T1, it is not always good since the t1 can be small + ! That's why there is a call to update the t1 in the standard way + ! T1 error tensor + !call compute_err1(nO,nV,f_o,f_v,r1,err1) + ! Transfo errors and parameters in vectors + !tmp_err1 = reshape(err1,(/nO*nV/)) + !tmp_t1 = reshape(t1 ,(/nO*nV/)) + ! Add the error and parameter vectors with those of the previous iterations + !call update_all_err(tmp_err1,all_err1,nO*nV,cc_diis_depth,nb_iter+1) + !call update_all_t (tmp_t1 ,all_t1 ,nO*nV,cc_diis_depth,nb_iter+1) + ! Diis and reshape T as a tensor + !call diis_cc(all_err1,all_t1,nO*nV,cc_diis_depth,nb_iter+1,tmp_t1) + !t1 = reshape(tmp_t1 ,(/nO,nV/)) + call update_t1(nO,nV,f_o,f_v,r1,t1) + + ! DIIS T2 + ! T2 error tensor + call compute_err2(nO,nV,f_o,f_v,r2,err2) + ! Transfo errors and parameters in vectors + tmp_err2 = reshape(err2,(/nO*nO*nV*nV/)) + tmp_t2 = reshape(t2 ,(/nO*nO*nV*nV/)) + ! Add the error and parameter vectors with those of the previous iterations + call update_all_err(tmp_err2,all_err2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + call update_all_t (tmp_t2 ,all_t2 ,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + ! Diis and reshape T as a tensor + call diis_cc(all_err2,all_t2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp_t2) + t2 = reshape(tmp_t2 ,(/nO,nO,nV,nV/)) + + deallocate(tmp_t1,tmp_t2,tmp_err1,tmp_err2,err1,err2) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + + call update_t1(nO,nV,f_o,f_v,r1,t1) + call update_t2(nO,nV,f_o,f_v,r2,t2) + + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + +end + +! Update t v2 + +subroutine update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + + implicit none + + integer, intent(in) :: nO,nV,nb_iter + double precision, intent(in) :: f_o(nO), f_v(nV) + double precision, intent(in) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, intent(inout) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(inout) :: all_err1(nO*nV, cc_diis_depth), all_err2(nO*nO*nV*nV, cc_diis_depth) + double precision, intent(inout) :: all_t1(nO*nV, cc_diis_depth), all_t2(nO*nO*nV*nV, cc_diis_depth) + + double precision, allocatable :: all_t(:,:), all_err(:,:), tmp_t(:) + double precision, allocatable :: err1(:,:), err2(:,:,:,:) + double precision, allocatable :: tmp_err1(:), tmp_err2(:) + double precision, allocatable :: tmp_t1(:), tmp_t2(:) + + integer :: i,j + + ! Allocate + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + allocate(tmp_t(nO*nV+nO*nO*nV*nV)) + allocate(err1(nO,nV), err2(nO,nO,nV,nV)) + allocate(tmp_err1(nO*nV), tmp_err2(nO*nO*nV*nV)) + allocate(tmp_t1(nO*nV), tmp_t2(nO*nO*nV*nV)) + + ! Compute the errors and reshape them as vector + call compute_err1(nO,nV,f_o,f_v,r1,err1) + call compute_err2(nO,nV,f_o,f_v,r2,err2) + tmp_err1 = reshape(err1,(/nO*nV/)) + tmp_err2 = reshape(err2,(/nO*nO*nV*nV/)) + tmp_t1 = reshape(t1 ,(/nO*nV/)) + tmp_t2 = reshape(t2 ,(/nO*nO*nV*nV/)) + + ! Update the errors and parameters for the diis + call update_all_err(tmp_err1,all_err1,nO*nV,cc_diis_depth,nb_iter+1) + call update_all_t (tmp_t1 ,all_t1 ,nO*nV,cc_diis_depth,nb_iter+1) + call update_all_err(tmp_err2,all_err2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + call update_all_t (tmp_t2 ,all_t2 ,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + + ! Gather the different parameters and errors + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,all_err,all_err1,all_err2,cc_diis_depth,& + !$OMP all_t,all_t1,all_t2) & + !$OMP PRIVATE(i,j) & + !$OMP DEFAULT(NONE) + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nV + all_err(i,j) = all_err1(i,j) + enddo + !$OMP END DO NOWAIT + enddo + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nO*nV*nV + all_err(i+nO*nV,j) = all_err2(i,j) + enddo + !$OMP END DO NOWAIT + enddo + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nV + all_t(i,j) = all_t1(i,j) + enddo + !$OMP END DO NOWAIT + enddo + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nO*nV*nV + all_t(i+nO*nV,j) = all_t2(i,j) + enddo + !$OMP END DO + enddo + !$OMP END PARALLEL + + ! Diis + call diis_cc(all_err,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp_t) + + ! Split the resulting vector + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tmp_t,tmp_t1,tmp_t2) & + !$OMP PRIVATE(i) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, nO*nV + tmp_t1(i) = tmp_t(i) + enddo + !$OMP END DO NOWAIT + !$OMP DO + do i = 1, nO*nO*nV*nV + tmp_t2(i) = tmp_t(i+nO*nV) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Reshape as tensors + t1 = reshape(tmp_t1 ,(/nO,nV/)) + t2 = reshape(tmp_t2 ,(/nO,nO,nV,nV/)) + + ! Deallocate + deallocate(tmp_t1,tmp_t2,tmp_err1,tmp_err2,err1,err2,all_t,all_err) + +end + +! Update t v3 + +subroutine update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) + + implicit none + + integer, intent(in) :: nO,nV,nb_iter + double precision, intent(in) :: f_o(nO), f_v(nV) + double precision, intent(in) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, intent(inout) :: t1(nO*nV), t2(nO*nO*nV*nV) + double precision, intent(inout) :: all_err(nO*nV+nO*nO*nV*nV, cc_diis_depth) + double precision, intent(inout) :: all_t(nO*nV+nO*nO*nV*nV, cc_diis_depth) + + double precision, allocatable :: tmp(:) + + integer :: i,j + + ! Allocate + allocate(tmp(nO*nV+nO*nO*nV*nV)) + + ! Compute the errors + call compute_err1(nO,nV,f_o,f_v,r1,tmp(1:nO*nV)) + call compute_err2(nO,nV,f_o,f_v,r2,tmp(nO*nV+1:nO*nV+nO*nO*nV*nV)) + + ! Update the errors and parameters for the diis + call update_all_err(tmp,all_err,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tmp,t1,t2) & + !$OMP PRIVATE(i) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, nO*nV + tmp(i) = t1(i) + enddo + !$OMP END DO NOWAIT + !$OMP DO + do i = 1, nO*nO*nV*nV + tmp(i+nO*nV) = t2(i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + call update_all_t(tmp,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + + ! Diis + call diis_cc(all_err,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp) + + ! Split the resulting vector + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tmp,t1,t2) & + !$OMP PRIVATE(i) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, nO*nV + t1(i) = tmp(i) + enddo + !$OMP END DO NOWAIT + !$OMP DO + do i = 1, nO*nO*nV*nV + t2(i) = tmp(i+nO*nV) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Deallocate + deallocate(tmp) + +end diff --git a/src/utils_cc/energy.irp.f b/src/utils_cc/energy.irp.f new file mode 100644 index 00000000..33e0cbae --- /dev/null +++ b/src/utils_cc/energy.irp.f @@ -0,0 +1,13 @@ +subroutine det_energy(det,energy) + + implicit none + + integer(bit_kind), intent(in) :: det + + double precision, intent(out) :: energy + + call i_H_j(det,det,N_int,energy) + + energy = energy + nuclear_repulsion + +end diff --git a/src/utils_cc/guess_t.irp.f b/src/utils_cc/guess_t.irp.f new file mode 100644 index 00000000..42acdf78 --- /dev/null +++ b/src/utils_cc/guess_t.irp.f @@ -0,0 +1,213 @@ +! T1 + +subroutine guess_t1(nO,nV,f_o,f_v,f_ov,t1) + + implicit none + + BEGIN_DOC + ! Update the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + + ! inout + double precision, intent(out) :: t1(nO, nV) + + ! internal + integer :: i,a + + if (trim(cc_guess_t1) == 'none') then + t1 = 0d0 + else if (trim(cc_guess_t1) == 'MP') then + do a = 1, nV + do i = 1, nO + t1(i,a) = f_ov(i,a) / (f_o(i) - f_v(a) - cc_level_shift_guess) + enddo + enddo + else if (trim(cc_guess_t1) == 'read') then + call read_t1(nO,nV,t1) + else + print*, 'Unknown cc_guess_t1 type: '//trim(cc_guess_t1) + call abort + endif + +end + +! T2 + +subroutine guess_t2(nO,nV,f_o,f_v,v_oovv,t2) + + implicit none + + BEGIN_DOC + ! Update the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), v_oovv(nO, nO, nV, nV) + + ! inout + double precision, intent(out) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + + if (trim(cc_guess_t2) == 'none') then + t2 = 0d0 + else if (trim(cc_guess_t2) == 'MP') then + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + t2(i,j,a,b) = v_oovv(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift_guess) + enddo + enddo + enddo + enddo + else if (trim(cc_guess_t2) == 'read') then + call read_t2(nO,nV,t2) + else + print*, 'Unknown cc_guess_t1 type: '//trim(cc_guess_t2) + call abort + endif + +end + +! T1 + +subroutine write_t1(nO,nV,t1) + + implicit none + + BEGIN_DOC + ! Write the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO, nV) + + ! internal + integer :: i,a + + if (cc_write_t1) then + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1') + do a = 1, nV + do i = 1, nO + write(11,'(F20.12)') t1(i,a) + enddo + enddo + close(11) + endif + +end + +! T2 + +subroutine write_t2(nO,nV,t2) + + implicit none + + BEGIN_DOC + ! Write the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + + if (cc_write_t2) then + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2') + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + write(11,'(F20.12)') t2(i,j,a,b) + enddo + enddo + enddo + enddo + close(11) + endif + +end + +! T1 + +subroutine read_t1(nO,nV,t1) + + implicit none + + BEGIN_DOC + ! Read the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(out) :: t1(nO, nV) + + ! internal + integer :: i,a + logical :: ok + + inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok) + if (.not. ok) then + print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1' + print*, 'Do a first calculation with cc_write_t1 = True' + print*, 'and cc_guess_t1 /= read before setting cc_guess_t1 = read' + call abort + endif + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1') + do a = 1, nV + do i = 1, nO + read(11,'(F20.12)') t1(i,a) + enddo + enddo + close(11) + +end + +! T2 + +subroutine read_t2(nO,nV,t2) + + implicit none + + BEGIN_DOC + ! Read the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(out) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + logical :: ok + + inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok) + if (.not. ok) then + print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1' + print*, 'Do a first calculation with cc_write_t2 = True' + print*, 'and cc_guess_t2 /= read before setting cc_guess_t2 = read' + call abort + endif + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2') + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + read(11,'(F20.12)') t2(i,j,a,b) + enddo + enddo + enddo + enddo + close(11) + +end diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f new file mode 100644 index 00000000..9e244d82 --- /dev/null +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -0,0 +1,1256 @@ +! F + +subroutine gen_f_space(det,n1,n2,list1,list2,f) + + implicit none + + integer, intent(in) :: n1,n2 + integer, intent(in) :: list1(n1),list2(n2) + integer(bit_kind), intent(in) :: det(N_int,2) + double precision, intent(out) :: f(n1,n2) + + double precision, allocatable :: tmp_F(:,:) + integer :: i1,i2,idx1,idx2 + + allocate(tmp_F(mo_num,mo_num)) + + call get_fock_matrix_spin(det,1,tmp_F) + + !$OMP PARALLEL & + !$OMP SHARED(tmp_F,f,n1,n2,list1,list2) & + !$OMP PRIVATE(idx1,idx2,i1,i2)& + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do i2 = 1, n2 + do i1 = 1, n1 + idx2 = list2(i2) + idx1 = list1(i1) + f(i1,i2) = tmp_F(idx1,idx2) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_F) + +end + +! V + +subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) + + implicit none + + integer, intent(in) :: n1,n2,n3,n4 + integer, intent(in) :: list1(n1),list2(n2),list3(n3),list4(n4) + double precision, intent(out) :: v(n1,n2,n3,n4) + + integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4 + double precision :: get_two_e_integral + + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL & + !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_integrals_map) & + !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4)& + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + idx4 = list4(i4) + idx3 = list3(i3) + idx2 = list2(i2) + idx1 = list1(i1) + v(i1,i2,i3,i4) = get_two_e_integral(idx1,idx2,idx3,idx4,mo_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! full + +BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] + + implicit none + + integer :: i,j,k,l + double precision :: get_two_e_integral + + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL & + !$OMP SHARED(cc_space_v,mo_num,mo_integrals_map) & + !$OMP PRIVATE(i,j,k,l) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + cc_space_v(i,j,k,l) = get_two_e_integral(i,j,k,l,mo_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +END_PROVIDER + +! oooo + +BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo) + +END_PROVIDER + +! vooo + +BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo) + +END_PROVIDER + +! ovoo + +BEGIN_PROVIDER [double precision, cc_space_v_ovoo, (cc_nOa, cc_nVa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo) + +END_PROVIDER + +! oovo + +BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo) + +END_PROVIDER + +! ooov + +BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov) + +END_PROVIDER + +! vvoo + +BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo) + +END_PROVIDER + +! vovo + +BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo) + +END_PROVIDER + +! voov + +BEGIN_PROVIDER [double precision, cc_space_v_voov, (cc_nVa, cc_nOa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov) + +END_PROVIDER + +! ovvo + +BEGIN_PROVIDER [double precision, cc_space_v_ovvo, (cc_nOa, cc_nVa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo) + +END_PROVIDER + +! ovov + +BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov) + +END_PROVIDER + +! oovv + +BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv) + +END_PROVIDER + +! vvvo + +BEGIN_PROVIDER [double precision, cc_space_v_vvvo, (cc_nVa, cc_nVa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_vvvo) + +END_PROVIDER + +! vvov + +BEGIN_PROVIDER [double precision, cc_space_v_vvov, (cc_nVa, cc_nVa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nVa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_vvov) + +END_PROVIDER + +! vovv + +BEGIN_PROVIDER [double precision, cc_space_v_vovv, (cc_nVa, cc_nOa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_vovv) + +END_PROVIDER + +! ovvv + +BEGIN_PROVIDER [double precision, cc_space_v_ovvv, (cc_nOa, cc_nVa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_vir, cc_space_v_ovvv) + +END_PROVIDER + +! vvvv + +BEGIN_PROVIDER [double precision, cc_space_v_vvvv, (cc_nVa, cc_nVa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,cc_nVa, cc_list_vir,cc_list_vir,cc_list_vir,cc_list_vir, cc_space_v_vvvv) + +END_PROVIDER + +! ppqq + +BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)] + + implicit none + + BEGIN_DOC + ! integrals for general MOs (excepted core and deleted ones) + END_DOC + + integer :: p,q + double precision, allocatable :: tmp_v(:,:,:,:) + + allocate(tmp_v(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo)) + + call gen_v_space(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo, cc_list_gen,cc_list_gen,cc_list_gen,cc_list_gen, tmp_v) + + do q = 1, cc_n_mo + do p = 1, cc_n_mo + cc_space_v_ppqq(p,q) = tmp_v(p,p,q,q) + enddo + enddo + + deallocate(tmp_v) + +END_PROVIDER + +! aaii + +BEGIN_PROVIDER [double precision, cc_space_v_aaii, (cc_nVa,cc_nOa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: occupied MO + END_DOC + + integer :: a,i + + do i = 1, cc_nOa + do a = 1, cc_nVa + cc_space_v_aaii(a,i) = cc_space_v_vvoo(a,a,i,i) + enddo + enddo + + FREE cc_space_v_vvoo + +END_PROVIDER + +! iiaa + +BEGIN_PROVIDER [double precision, cc_space_v_iiaa, (cc_nOa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: occupied MO + END_DOC + + integer :: a,i + + do a = 1, cc_nVa + do i = 1, cc_nOa + cc_space_v_iiaa(i,a) = cc_space_v_oovv(i,i,a,a) + enddo + enddo + + FREE cc_space_v_oovv + +END_PROVIDER + +! iijj + +BEGIN_PROVIDER [double precision, cc_space_v_iijj, (cc_nOa,cc_nOa)] + + implicit none + + BEGIN_DOC + ! integrals + ! i,j: occupied MO + END_DOC + + integer :: i,j + + do j = 1, cc_nOa + do i = 1, cc_nOa + cc_space_v_iijj(i,j) = cc_space_v_oooo(i,i,j,j) + enddo + enddo + + FREE cc_space_v_oooo + +END_PROVIDER + +! aabb + +BEGIN_PROVIDER [double precision, cc_space_v_aabb, (cc_nVa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a,b: virtual MO + END_DOC + + integer :: a,b + + do b = 1, cc_nVa + do a = 1, cc_nVa + cc_space_v_aabb(a,b) = cc_space_v_vvvv(a,a,b,b) + enddo + enddo + + FREE cc_space_v_vvvv + +END_PROVIDER + +! iaia + +BEGIN_PROVIDER [double precision, cc_space_v_iaia, (cc_nOa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: occupied MO + END_DOC + + integer :: a,i + + do a = 1, cc_nVa + do i = 1, cc_nOa + cc_space_v_iaia(i,a) = cc_space_v_ovov(i,a,i,a) + enddo + enddo + + FREE cc_space_v_ovov + +END_PROVIDER + +! iaai + +BEGIN_PROVIDER [double precision, cc_space_v_iaai, (cc_nOa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: inactive MO + END_DOC + + integer :: a,i + + do a = 1, cc_nVa + do i = 1, cc_nOa + cc_space_v_iaai(i,a) = cc_space_v_ovvo(i,a,a,i) + enddo + enddo + + FREE cc_space_v_ovvo + +END_PROVIDER + +! aiia + +BEGIN_PROVIDER [double precision, cc_space_v_aiia, (cc_nVa,cc_nOa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: inactive MO + END_DOC + + integer :: a,i + + do i = 1, cc_nOa + do a = 1, cc_nVa + cc_space_v_aiia(a,i) = cc_space_v_voov(a,i,i,a) + enddo + enddo + + FREE cc_space_v_voov + +END_PROVIDER + +! oovv + +BEGIN_PROVIDER [double precision, cc_space_w_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)] + + implicit none + + double precision, allocatable :: tmp_v(:,:,:,:) + integer :: i,j,a,b + + allocate(tmp_v(cc_nOa,cc_nOa,cc_nVa,cc_nVa)) + + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, tmp_v) + + !$OMP PARALLEL & + !$OMP SHARED(cc_nVa,cc_nOa,tmp_v,cc_space_w_oovv) & + !$OMP PRIVATE(i,j,a,b)& + !$OMP DEFAULT(NONE) + !$OMP DO + do b = 1, cc_nVa + do a = 1, cc_nVa + do j = 1, cc_nOa + do i = 1, cc_nOa + cc_space_w_oovv(i,j,a,b) = 2d0 * tmp_v(i,j,a,b) - tmp_v(j,i,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_v) + +END_PROVIDER + +! vvoo + +BEGIN_PROVIDER [double precision, cc_space_w_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)] + + implicit none + + double precision, allocatable :: tmp_v(:,:,:,:) + integer :: i,j,a,b + + allocate(tmp_v(cc_nVa,cc_nVa,cc_nOa,cc_nOa)) + + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, tmp_v) + + !$OMP PARALLEL & + !$OMP SHARED(cc_nVa,cc_nOa,tmp_v,cc_space_w_vvoo) & + !$OMP PRIVATE(i,j,a,b)& + !$OMP DEFAULT(NONE) + !$OMP DO + do j = 1, cc_nOa + do i = 1, cc_nOa + do b = 1, cc_nVa + do a = 1, cc_nVa + cc_space_w_vvoo(a,b,i,j) = 2d0 * tmp_v(a,b,i,j) - tmp_v(b,a,i,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_v) + +END_PROVIDER + +! F_oo + +BEGIN_PROVIDER [double precision, cc_space_f_oo, (cc_nOa, cc_nOa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nOa,cc_nOa, cc_list_occ,cc_list_occ, cc_space_f_oo) + +END_PROVIDER + +! F_ov + +BEGIN_PROVIDER [double precision, cc_space_f_ov, (cc_nOa, cc_nVa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nOa,cc_nVa, cc_list_occ,cc_list_vir, cc_space_f_ov) + +END_PROVIDER + +! F_vo + +BEGIN_PROVIDER [double precision, cc_space_f_vo, (cc_nVa, cc_nOa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nVa,cc_nOa, cc_list_vir,cc_list_occ, cc_space_f_vo) + +END_PROVIDER + +! F_vv + +BEGIN_PROVIDER [double precision, cc_space_f_vv, (cc_nVa, cc_nVa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nVa,cc_nVa, cc_list_vir,cc_list_vir, cc_space_f_vv) + +END_PROVIDER + +! F_o + +BEGIN_PROVIDER [double precision, cc_space_f_o, (cc_nOa)] + + implicit none + + integer :: i + + do i = 1, cc_nOa + cc_space_f_o(i) = cc_space_f_oo(i,i) + enddo + +END_PROVIDER + +! F_v + +BEGIN_PROVIDER [double precision, cc_space_f_v, (cc_nVa)] + + implicit none + + integer :: i + + do i = 1, cc_nVa + cc_space_f_v(i) = cc_space_f_vv(i,i) + enddo + +END_PROVIDER + +! Shift + +subroutine shift_idx_spin(s,n_S,shift) + + implicit none + + BEGIN_DOC + ! Shift for the partitionning alpha/beta of the spin orbitals + ! n_S(1): number of spin alpha in the correspondong list + ! n_S(2): number of spin beta in the correspondong list + END_DOC + + integer, intent(in) :: s, n_S(2) + integer, intent(out) :: shift + + if (s == 1) then + shift = 0 + else + shift = n_S(1) + endif + +end + +! F + +subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f) + + implicit none + + BEGIN_DOC + ! Compute the Fock matrix corresponding to two lists of spin orbitals. + ! Ex: occ/occ, occ/vir,... + END_DOC + + integer(bit_kind), intent(in) :: det(N_int,2) + integer, intent(in) :: n1,n2, n1_S(2), n2_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2) + integer, intent(in) :: dim1, dim2 + + double precision, intent(out) :: f(dim1, dim2) + + double precision, allocatable :: tmp_F(:,:) + integer :: i,j, idx_i,idx_j,i_shift,j_shift + integer :: tmp_i,tmp_j + integer :: si,sj,s + + allocate(tmp_F(mo_num,mo_num)) + + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + s = si + sj + + if (s == 2 .or. s == 4) then + call get_fock_matrix_spin(det,sj,tmp_F) + else + do j = 1, mo_num + do i = 1, mo_num + tmp_F(i,j) = 0d0 + enddo + enddo + endif + + do tmp_j = 1, n2_S(sj) + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + f(idx_i,idx_j) = tmp_F(i,j) + enddo + enddo + + enddo + enddo + + deallocate(tmp_F) + +end + +! Get F + +subroutine get_fock_matrix_spin(det,s,f) + + implicit none + + BEGIN_DOC + ! Fock matrix alpha or beta of an arbitrary det + END_DOC + + integer(bit_kind), intent(in) :: det(N_int,2) + integer, intent(in) :: s + + double precision, intent(out) :: f(mo_num,mo_num) + + integer :: p,q,i,s1,s2 + integer(bit_kind) :: res(N_int,2) + logical :: ok + double precision :: mo_two_e_integral + + if (s == 1) then + s1 = 1 + s2 = 2 + else + s1 = 2 + s2 = 1 + endif + + !$OMP PARALLEL & + !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals) & + !$OMP PRIVATE(p,q,ok,i,res)& + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do q = 1, mo_num + do p = 1, mo_num + f(p,q) = mo_one_e_integrals(p,q) + do i = 1, mo_num + call apply_hole(det, s1, i, res, ok, N_int) + if (ok) then + f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q) + endif + enddo + do i = 1, mo_num + call apply_hole(det, s2, i, res, ok, N_int) + if (ok) then + f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) + endif + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! V + +subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3,dim4, v) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3, dim4 + double precision, intent(out) :: v(dim1,dim2,dim3,dim4) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_j,idx_k,idx_l + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + !$OMP PARALLEL & + !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) & + !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, & + !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,& + !$OMP tmp_i,tmp_j,tmp_k,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v(idx_i,idx_j,idx_k,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + +end + +! V_3idx + +subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_l) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,idx_l,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3 + double precision, intent(out) :: v_l(dim1,dim2,dim3) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_j,idx_k + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + if (idx_l <= n4_S(1)) then + sl = 1 + else + sl = 2 + endif + call shift_idx_spin(sl,n4_S,l_shift) + tmp_l = idx_l - l_shift + l = list4(tmp_l,sl) + + !$OMP PARALLEL & + !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) & + !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, & + !$OMP i,j,k,idx_i,idx_j,idx_k,& + !$OMP tmp_i,tmp_j,tmp_k)& + !$OMP DEFAULT(NONE) + + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_l(idx_i,idx_j,idx_k) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_l(idx_i,idx_j,idx_k) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + +end + +! V_3idx_ij_l + +subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_k) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,idx_k,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3 + double precision, intent(out) :: v_k(dim1,dim2,dim3) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_j,idx_l + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + if (idx_k <= n3_S(1)) then + sk = 1 + else + sk = 2 + endif + call shift_idx_spin(sk,n3_S,k_shift) + tmp_k = idx_k - k_shift + k = list3(tmp_k,sk) + + !$OMP PARALLEL & + !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) & + !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, & + !$OMP i,j,l,idx_i,idx_j,idx_l,& + !$OMP tmp_i,tmp_j,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_k(idx_i,idx_j,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_k(idx_i,idx_j,idx_l) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + +end + +! V_3idx_i_kl + +subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_j) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,idx_j,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3 + double precision, intent(out) :: v_j(dim1,dim2,dim3) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_k,idx_l + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + if (idx_j <= n2_S(1)) then + sj = 1 + else + sj = 2 + endif + call shift_idx_spin(sj,n2_S,j_shift) + tmp_j = idx_j - j_shift + j = list2(tmp_j,sj) + + !$OMP PARALLEL & + !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) & + !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, & + !$OMP i,k,l,idx_i,idx_k,idx_l,& + !$OMP tmp_i,tmp_k,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_j(idx_i,idx_k,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_j(idx_i,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + +end diff --git a/src/utils_cc/occupancy.irp.f b/src/utils_cc/occupancy.irp.f new file mode 100644 index 00000000..76e6fb3d --- /dev/null +++ b/src/utils_cc/occupancy.irp.f @@ -0,0 +1,317 @@ +! N spin orb + +subroutine extract_n_spin(det,n) + + implicit none + + BEGIN_DOC + ! Returns the number of occupied alpha, occupied beta, virtual alpha, virtual beta spin orbitals + ! in det without counting the core and deleted orbitals in the format n(nOa,nOb,nVa,nVb) + END_DOC + + integer(bit_kind), intent(in) :: det(N_int,2) + + integer, intent(out) :: n(4) + + integer(bit_kind) :: res(N_int,2) + integer :: i, si + logical :: ok, is_core, is_del + + ! Init + n = 0 + + ! Loop over the spin + do si = 1, 2 + do i = 1, mo_num + call apply_hole(det, si, i, res, ok, N_int) + + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + + if (ok) then + ! particle + n(si) = n(si) + 1 + else + ! hole + n(si+2) = n(si+2) + 1 + endif + enddo + enddo + + !print*,n(1),n(2),n(3),n(4) + +end + +! Spin + +subroutine extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir) + + implicit none + + BEGIN_DOC + ! Returns the the list of occupied alpha/beta, virtual alpha/beta spin orbitals + ! size(nO_m,1) must be max(nOa,nOb) and size(nV_m,1) must be max(nVa,nVb) + END_DOC + + integer, intent(in) :: nO_m, nV_m + integer(bit_kind), intent(in) :: det(N_int,2) + + integer, intent(out) :: list_occ(nO_m,2), list_vir(nV_m,2) + + integer(bit_kind) :: res(N_int,2) + integer :: i, si, idx_o, idx_v, idx_i, idx_b + logical :: ok, is_core, is_del + + list_occ = 0 + list_vir = 0 + + ! List of occ/vir alpha/beta + + ! occ alpha -> list_occ(:,1) + ! occ beta -> list_occ(:,2) + ! vir alpha -> list_vir(:,1) + ! vir beta -> list_vir(:,2) + + ! Loop over the spin + do si = 1, 2 + ! tmp idx + idx_o = 1 + idx_v = 1 + do i = 1, mo_num + call apply_hole(det, si, i, res, ok, N_int) + + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + + if (ok) then + ! particle + list_occ(idx_o,si) = i + idx_o = idx_o + 1 + else + ! hole + list_vir(idx_v,si) = i + idx_v = idx_v + 1 + endif + enddo + enddo + +end + +! Space + +subroutine extract_list_orb_space(det,nO,nV,list_occ,list_vir) + + implicit none + + BEGIN_DOC + ! Returns the the list of occupied and virtual alpha spin orbitals + END_DOC + + integer, intent(in) :: nO, nV + integer(bit_kind), intent(in) :: det(N_int,2) + + integer, intent(out) :: list_occ(nO), list_vir(nV) + + integer(bit_kind) :: res(N_int,2) + integer :: i, si, idx_o, idx_v, idx_i, idx_b + logical :: ok, is_core, is_del + + if (elec_alpha_num /= elec_beta_num) then + print*,'Error elec_alpha_num /= elec_beta_num, impossible to create cc_list_occ and cc_list_vir, abort' + call abort + endif + + list_occ = 0 + list_vir = 0 + + ! List of occ/vir alpha + + ! occ alpha -> list_occ(:,1) + ! vir alpha -> list_vir(:,1) + + ! tmp idx + idx_o = 1 + idx_v = 1 + do i = 1, mo_num + call apply_hole(det, 1, i, res, ok, N_int) + + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + + if (ok) then + ! particle + list_occ(idx_o) = i + idx_o = idx_o + 1 + else + ! hole + list_vir(idx_v) = i + idx_v = idx_v + 1 + endif + enddo + +end + +! is_core + +function is_core(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a core orbital + END_DOC + + integer, intent(in) :: i + logical :: is_core + + integer :: j + + ! Init + is_core = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_core = .True. + exit + endif + enddo + +end + +! is_del + +function is_del(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a deleted orbital + END_DOC + + integer, intent(in) :: i + logical :: is_del + + integer :: j + + ! Init + is_del = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_del = .True. + exit + endif + enddo + +end + +! N orb + +BEGIN_PROVIDER [integer, cc_nO_m] +&BEGIN_PROVIDER [integer, cc_nOa] +&BEGIN_PROVIDER [integer, cc_nOb] +&BEGIN_PROVIDER [integer, cc_nOab] +&BEGIN_PROVIDER [integer, cc_nV_m] +&BEGIN_PROVIDER [integer, cc_nVa] +&BEGIN_PROVIDER [integer, cc_nVb] +&BEGIN_PROVIDER [integer, cc_nVab] +&BEGIN_PROVIDER [integer, cc_n_mo] +&BEGIN_PROVIDER [integer, cc_nO_S, (2)] +&BEGIN_PROVIDER [integer, cc_nV_S, (2)] + + implicit none + + BEGIN_DOC + ! Number of orbitals without core and deleted ones of the cc_ref det in psi_det + ! a: alpha, b: beta + ! nO_m: max(a,b) occupied + ! nOa: nb a occupied + ! nOb: nb b occupied + ! nOab: nb a+b occupied + ! nV_m: max(a,b) virtual + ! nVa: nb a virtual + ! nVb: nb b virtual + ! nVab: nb a+b virtual + END_DOC + + integer :: n_spin(4) + + ! Extract number of occ/vir alpha/beta spin orbitals + call extract_n_spin(psi_det(1,1,cc_ref),n_spin) + + cc_nOa = n_spin(1) + cc_nOb = n_spin(2) + cc_nOab = cc_nOa + cc_nOb !n_spin(1) + n_spin(2) + cc_nO_m = max(cc_nOa,cc_nOb) !max(n_spin(1), n_spin(2)) + cc_nVa = n_spin(3) + cc_nVb = n_spin(4) + cc_nVab = cc_nVa + cc_nVb !n_spin(3) + n_spin(4) + cc_nV_m = max(cc_nVa,cc_nVb) !max(n_spin(3), n_spin(4)) + cc_n_mo = cc_nVa + cc_nVb !n_spin(1) + n_spin(3) + cc_nO_S = (/cc_nOa,cc_nOb/) + cc_nV_S = (/cc_nVa,cc_nVb/) + +END_PROVIDER + +! General + +BEGIN_PROVIDER [integer, cc_list_gen, (cc_n_mo)] + + implicit none + + BEGIN_DOC + ! List of general orbitals without core and deleted ones + END_DOC + + integer :: i,j + logical :: is_core, is_del + + j = 1 + do i = 1, mo_num + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + cc_list_gen(j) = i + j = j+1 + enddo + +END_PROVIDER + +! Space + +BEGIN_PROVIDER [integer, cc_list_occ, (cc_nOa)] +&BEGIN_PROVIDER [integer, cc_list_vir, (cc_nVa)] + + implicit none + + BEGIN_DOC + ! List of occupied and virtual spatial orbitals without core and deleted ones + END_DOC + + call extract_list_orb_space(psi_det(1,1,cc_ref),cc_nOa,cc_nVa,cc_list_occ,cc_list_vir) + +END_PROVIDER + +! Spin + +BEGIN_PROVIDER [integer, cc_list_occ_spin, (cc_nO_m,2)] +&BEGIN_PROVIDER [integer, cc_list_vir_spin, (cc_nV_m,2)] + + implicit none + + BEGIN_DOC + ! List of occupied and virtual spin orbitals without core and deleted ones + END_DOC + + call extract_list_orb_spin(psi_det(1,1,cc_ref),cc_nO_m,cc_nV_m,cc_list_occ_spin,cc_list_vir_spin) + +END_PROVIDER diff --git a/src/utils_cc/org/TANGLE_org_mode.sh b/src/utils_cc/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/utils_cc/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/utils_cc/org/diis.org b/src/utils_cc/org/diis.org new file mode 100644 index 00000000..c48b917e --- /dev/null +++ b/src/utils_cc/org/diis.org @@ -0,0 +1,574 @@ +* DIIS +https://hal.archives-ouvertes.fr/hal-02492983/document +Maxime Chupin, Mi-Song Dupuy, Guillaume Legendre, Eric Séré. Convergence analysis of adaptive +DIIS algorithms witerh application to electronic ground state calculations. +ESAIM: Mathematical Modelling and Numerical Analysis, EDP Sciences, 2021, 55 (6), pp.2785 - 2825. 10.1051/m2an/2021069ff.ffhal-02492983v5 + +t_{k+1} = g(t_k) +err_k = f(t_k) = t_{k+1} - t_k + +m_k = min(m,k) +m maximal depth +t_{k+1} = \sum_{i=0}^{m_k} c_i^k g(t_{k-m_k+i}) +\sum_{i=0}^{m_k} c_i^k = 1 + +b_{ij}^k = < err^{k-m_k+j}, err^{k-m_k+i} > + +(b -1) ( c^k ) = ( 0 ) +(-1 0) ( \lambda) ( -1 ) + +lambda is used to put the constraint \sum_{i=0}^{m_k} c_i^k = 1 + +In: t_0, err_0, m +err_0 = g(t_0) +k = 0 +m_k = 0 +while ||err_k|| > CC + A.x=b + t_{k+1} = \sum_{i=0}^{m_k} c_i^k g(t_{k-m_k+i}) + err_{k+1} = f(t_{k+1}) + m_{k+1} = min(m_k+1,m) + k = k +1 +end + +* Code +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine diis_cc(all_err,all_t,sze,m,iter,t) + + implicit none + + BEGIN_DOC + ! DIIS. Take the error vectors and the amplitudes of the previous + ! iterations to compute the new amplitudes + END_DOC + + ! {err_i}_{i=1}^{m_it} -> B -> c + ! {t_i}_{i=1}^{m_it}, c, {err_i}_{i=1}^{m_it} -> t_{m_it+1} + + integer, intent(in) :: m,iter,sze + double precision, intent(in) :: all_err(sze,m) + double precision, intent(in) :: all_t(sze,m) + + double precision, intent(out) :: t(sze) + + double precision, allocatable :: B(:,:), c(:), zero(:) + integer :: m_iter + integer :: i,j,k + integer :: info + integer, allocatable :: ipiv(:) + double precision :: accu + + m_iter = min(m,iter) + !print*,'m_iter',m_iter + allocate(B(m_iter+1,m_iter+1), c(m_iter), zero(m_iter+1)) + allocate(ipiv(m+1)) + + ! B(i,j) = < err(iter-m_iter+j),err(iter-m_iter+i) > ! iter-m_iter will be zero for us + B = 0d0 + !$OMP PARALLEL & + !$OMP SHARED(B,m,m_iter,sze,all_err) & + !$OMP PRIVATE(i,j,k,accu) & + !$OMP DEFAULT(NONE) + do j = 1, m_iter + do i = 1, m_iter + accu = 0d0 + !$OMP DO + do k = 1, sze + ! the errors of the ith iteration are in all_err(:,m+1-i) + accu = accu + all_err(k,m+1-i) * all_err(k,m+1-j) + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + B(i,j) = B(i,j) + accu + !$OMP END CRITICAL + enddo + enddo + !$OMP END PARALLEL + + do i = 1, m_iter + B(i,m_iter+1) = -1 + enddo + do j = 1, m_iter + B(m_iter+1,j) = -1 + enddo + ! Debug + !print*,'B' + !do i = 1, m_iter+1 + ! write(*,'(100(F10.6))') B(i,:) + !enddo + + ! (0 0 .... 0 -1) + zero = 0d0 + zero(m_iter+1) = -1d0 + + ! Solve B.c = zero + call dgesv(m_iter+1, 1, B, size(B,1), ipiv, zero, size(zero,1), info) + if (info /= 0) then + print*,'DIIS error in dgesv:', info + call abort + endif + ! c corresponds to the m_iter first solutions + c = zero(1:m_iter) + ! Debug + !print*,'c',c + !print*,'all_t' + !do i = 1, m + ! write(*,'(100(F10.6))') all_t(:,i) + !enddo + !print*,'all_err' + !do i = 1, m + ! write(*,'(100(F10.6))') all_err(:,i) + !enddo + + ! update T + !$OMP PARALLEL & + !$OMP SHARED(t,c,m,all_err,all_t,sze,m_iter) & + !$OMP PRIVATE(i,j,accu) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, sze + t(i) = 0d0 + enddo + !$OMP END DO + do i = 1, m_iter + !$OMP DO + do j = 1, sze + t(j) = t(j) + c(i) * (all_t(j,m+1-i) + all_err(j,m+1-i)) + enddo + !$OMP END DO + enddo + !$OMP END PARALLEL + + !print*,'new t',t + + deallocate(ipiv,B,c,zero) + +end +#+end_src + +** Update all err +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine update_all_err(err,all_err,sze,m,iter) + + implicit none + + BEGIN_DOC + ! Shift all the err vectors of the previous iterations to add the new one + ! The last err vector is placed in the last position and all the others are + ! moved toward the first one. + END_DOC + + integer, intent(in) :: m, iter, sze + double precision, intent(in) :: err(sze) + double precision, intent(inout) :: all_err(sze,m) + integer :: i,j + integer :: m_iter + + m_iter = min(m,iter) + + ! Shift + !$OMP PARALLEL & + !$OMP SHARED(m,all_err,err,sze) & + !$OMP PRIVATE(i,j) & + !$OMP DEFAULT(NONE) + do i = 1, m-1 + !$OMP DO + do j = 1, sze + all_err(j,i) = all_err(j,i+1) + enddo + !$OMP END DO + enddo + + ! Debug + !print*,'shift err' + !do i = 1, m + ! print*,i, all_err(:,i) + !enddo + + ! New + !$OMP DO + do i = 1, sze + all_err(i,m) = err(i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Debug + !print*,'Updated err' + !do i = 1, m + ! print*,i, all_err(:,i) + !enddo + +end +#+end_src + +** Update all t +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine update_all_t(t,all_t,sze,m,iter) + + implicit none + + BEGIN_DOC + ! Shift all the t vectors of the previous iterations to add the new one + ! The last t vector is placed in the last position and all the others are + ! moved toward the first one. + END_DOC + + integer, intent(in) :: m, iter, sze + double precision, intent(in) :: t(sze) + double precision, intent(inout) :: all_t(sze,m) + integer :: i,j + integer :: m_iter + + m_iter = min(m,iter) + + ! Shift + !$OMP PARALLEL & + !$OMP SHARED(m,all_t,t,sze) & + !$OMP PRIVATE(i,j) & + !$OMP DEFAULT(NONE) + do i = 1, m-1 + !$OMP DO + do j = 1, sze + all_t(j,i) = all_t(j,i+1) + enddo + !$OMP END DO + enddo + + ! New + !$OMP DO + do i = 1, sze + all_t(i,m) = t(i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Debug + !print*,'Updated t' + !do i = 1, m + ! print*,i, all_t(:,i) + !enddo + +end +#+end_src + +** Err +*** Err1 +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine compute_err1(nO,nV,f_o,f_v,r1,err1) + + implicit none + + BEGIN_DOC + ! Compute the error vector for the t1 + END_DOC + + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r1(nO,nV) + + double precision, intent(out) :: err1(nO,nV) + + integer :: i,a + + !$OMP PARALLEL & + !$OMP SHARED(err1,r1,f_o,f_v,nO,nV,cc_level_shift) & + !$OMP PRIVATE(i,a) & + !$OMP DEFAULT(NONE) + !$OMP DO + do a = 1, nV + do i = 1, nO + err1(i,a) = - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +*** Err2 +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine compute_err2(nO,nV,f_o,f_v,r2,err2) + + implicit none + + BEGIN_DOC + ! Compute the error vector for the t2 + END_DOC + + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r2(nO,nO,nV,nV) + + double precision, intent(out) :: err2(nO,nO,nV,nV) + + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(err2,r2,f_o,f_v,nO,nV,cc_level_shift) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + err2(i,j,a,b) = - r2(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +* Gather call diis +** Update t +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + + implicit none + + integer, intent(in) :: nO,nV,nb_iter + double precision, intent(in) :: f_o(nO), f_v(nV) + double precision, intent(in) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, intent(inout) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(inout) :: all_err1(nO*nV, cc_diis_depth), all_err2(nO*nO*nV*nV, cc_diis_depth) + double precision, intent(inout) :: all_t1(nO*nV, cc_diis_depth), all_t2(nO*nO*nV*nV, cc_diis_depth) + + double precision, allocatable :: err1(:,:), err2(:,:,:,:) + double precision, allocatable :: tmp_err1(:), tmp_err2(:) + double precision, allocatable :: tmp_t1(:), tmp_t2(:) + + if (cc_update_method == 'diis') then + + allocate(err1(nO,nV), err2(nO,nO,nV,nV)) + allocate(tmp_err1(nO*nV), tmp_err2(nO*nO*nV*nV)) + allocate(tmp_t1(nO*nV), tmp_t2(nO*nO*nV*nV)) + + ! DIIS T1, it is not always good since the t1 can be small + ! That's why there is a call to update the t1 in the standard way + ! T1 error tensor + !call compute_err1(nO,nV,f_o,f_v,r1,err1) + ! Transfo errors and parameters in vectors + !tmp_err1 = reshape(err1,(/nO*nV/)) + !tmp_t1 = reshape(t1 ,(/nO*nV/)) + ! Add the error and parameter vectors with those of the previous iterations + !call update_all_err(tmp_err1,all_err1,nO*nV,cc_diis_depth,nb_iter+1) + !call update_all_t (tmp_t1 ,all_t1 ,nO*nV,cc_diis_depth,nb_iter+1) + ! Diis and reshape T as a tensor + !call diis_cc(all_err1,all_t1,nO*nV,cc_diis_depth,nb_iter+1,tmp_t1) + !t1 = reshape(tmp_t1 ,(/nO,nV/)) + call update_t1(nO,nV,f_o,f_v,r1,t1) + + ! DIIS T2 + ! T2 error tensor + call compute_err2(nO,nV,f_o,f_v,r2,err2) + ! Transfo errors and parameters in vectors + tmp_err2 = reshape(err2,(/nO*nO*nV*nV/)) + tmp_t2 = reshape(t2 ,(/nO*nO*nV*nV/)) + ! Add the error and parameter vectors with those of the previous iterations + call update_all_err(tmp_err2,all_err2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + call update_all_t (tmp_t2 ,all_t2 ,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + ! Diis and reshape T as a tensor + call diis_cc(all_err2,all_t2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp_t2) + t2 = reshape(tmp_t2 ,(/nO,nO,nV,nV/)) + + deallocate(tmp_t1,tmp_t2,tmp_err1,tmp_err2,err1,err2) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + + call update_t1(nO,nV,f_o,f_v,r1,t1) + call update_t2(nO,nV,f_o,f_v,r2,t2) + + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + +end + #+end_src + +** Update t v2 +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + + implicit none + + integer, intent(in) :: nO,nV,nb_iter + double precision, intent(in) :: f_o(nO), f_v(nV) + double precision, intent(in) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, intent(inout) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(inout) :: all_err1(nO*nV, cc_diis_depth), all_err2(nO*nO*nV*nV, cc_diis_depth) + double precision, intent(inout) :: all_t1(nO*nV, cc_diis_depth), all_t2(nO*nO*nV*nV, cc_diis_depth) + + double precision, allocatable :: all_t(:,:), all_err(:,:), tmp_t(:) + double precision, allocatable :: err1(:,:), err2(:,:,:,:) + double precision, allocatable :: tmp_err1(:), tmp_err2(:) + double precision, allocatable :: tmp_t1(:), tmp_t2(:) + + integer :: i,j + + ! Allocate + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + allocate(tmp_t(nO*nV+nO*nO*nV*nV)) + allocate(err1(nO,nV), err2(nO,nO,nV,nV)) + allocate(tmp_err1(nO*nV), tmp_err2(nO*nO*nV*nV)) + allocate(tmp_t1(nO*nV), tmp_t2(nO*nO*nV*nV)) + + ! Compute the errors and reshape them as vector + call compute_err1(nO,nV,f_o,f_v,r1,err1) + call compute_err2(nO,nV,f_o,f_v,r2,err2) + tmp_err1 = reshape(err1,(/nO*nV/)) + tmp_err2 = reshape(err2,(/nO*nO*nV*nV/)) + tmp_t1 = reshape(t1 ,(/nO*nV/)) + tmp_t2 = reshape(t2 ,(/nO*nO*nV*nV/)) + + ! Update the errors and parameters for the diis + call update_all_err(tmp_err1,all_err1,nO*nV,cc_diis_depth,nb_iter+1) + call update_all_t (tmp_t1 ,all_t1 ,nO*nV,cc_diis_depth,nb_iter+1) + call update_all_err(tmp_err2,all_err2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + call update_all_t (tmp_t2 ,all_t2 ,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + + ! Gather the different parameters and errors + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,all_err,all_err1,all_err2,cc_diis_depth,& + !$OMP all_t,all_t1,all_t2) & + !$OMP PRIVATE(i,j) & + !$OMP DEFAULT(NONE) + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nV + all_err(i,j) = all_err1(i,j) + enddo + !$OMP END DO NOWAIT + enddo + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nO*nV*nV + all_err(i+nO*nV,j) = all_err2(i,j) + enddo + !$OMP END DO NOWAIT + enddo + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nV + all_t(i,j) = all_t1(i,j) + enddo + !$OMP END DO NOWAIT + enddo + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nO*nV*nV + all_t(i+nO*nV,j) = all_t2(i,j) + enddo + !$OMP END DO + enddo + !$OMP END PARALLEL + + ! Diis + call diis_cc(all_err,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp_t) + + ! Split the resulting vector + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tmp_t,tmp_t1,tmp_t2) & + !$OMP PRIVATE(i) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, nO*nV + tmp_t1(i) = tmp_t(i) + enddo + !$OMP END DO NOWAIT + !$OMP DO + do i = 1, nO*nO*nV*nV + tmp_t2(i) = tmp_t(i+nO*nV) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Reshape as tensors + t1 = reshape(tmp_t1 ,(/nO,nV/)) + t2 = reshape(tmp_t2 ,(/nO,nO,nV,nV/)) + + ! Deallocate + deallocate(tmp_t1,tmp_t2,tmp_err1,tmp_err2,err1,err2,all_t,all_err) + +end + #+end_src + + +** Update t v3 +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) + + implicit none + + integer, intent(in) :: nO,nV,nb_iter + double precision, intent(in) :: f_o(nO), f_v(nV) + double precision, intent(in) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, intent(inout) :: t1(nO*nV), t2(nO*nO*nV*nV) + double precision, intent(inout) :: all_err(nO*nV+nO*nO*nV*nV, cc_diis_depth) + double precision, intent(inout) :: all_t(nO*nV+nO*nO*nV*nV, cc_diis_depth) + + double precision, allocatable :: tmp(:) + + integer :: i,j + + ! Allocate + allocate(tmp(nO*nV+nO*nO*nV*nV)) + + ! Compute the errors + call compute_err1(nO,nV,f_o,f_v,r1,tmp(1:nO*nV)) + call compute_err2(nO,nV,f_o,f_v,r2,tmp(nO*nV+1:nO*nV+nO*nO*nV*nV)) + + ! Update the errors and parameters for the diis + call update_all_err(tmp,all_err,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tmp,t1,t2) & + !$OMP PRIVATE(i) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, nO*nV + tmp(i) = t1(i) + enddo + !$OMP END DO NOWAIT + !$OMP DO + do i = 1, nO*nO*nV*nV + tmp(i+nO*nV) = t2(i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + call update_all_t(tmp,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + + ! Diis + call diis_cc(all_err,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp) + + ! Split the resulting vector + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tmp,t1,t2) & + !$OMP PRIVATE(i) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, nO*nV + t1(i) = tmp(i) + enddo + !$OMP END DO NOWAIT + !$OMP DO + do i = 1, nO*nO*nV*nV + t2(i) = tmp(i+nO*nV) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Deallocate + deallocate(tmp) + +end + #+end_src + diff --git a/src/utils_cc/org/energy.org b/src/utils_cc/org/energy.org new file mode 100644 index 00000000..2ec5c8ef --- /dev/null +++ b/src/utils_cc/org/energy.org @@ -0,0 +1,15 @@ +#+begin_src f90 :comments org :tangle energy.irp.f +subroutine det_energy(det,energy) + + implicit none + + integer(bit_kind), intent(in) :: det + + double precision, intent(out) :: energy + + call i_H_j(det,det,N_int,energy) + + energy = energy + nuclear_repulsion + +end +#+end_src diff --git a/src/utils_cc/org/guess_t.org b/src/utils_cc/org/guess_t.org new file mode 100644 index 00000000..9e162242 --- /dev/null +++ b/src/utils_cc/org/guess_t.org @@ -0,0 +1,222 @@ +* Guess +** T1 +#+begin_src f90 :comments org :tangle guess_t.irp.f +subroutine guess_t1(nO,nV,f_o,f_v,f_ov,t1) + + implicit none + + BEGIN_DOC + ! Update the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + + ! inout + double precision, intent(out) :: t1(nO, nV) + + ! internal + integer :: i,a + + if (trim(cc_guess_t1) == 'none') then + t1 = 0d0 + else if (trim(cc_guess_t1) == 'MP') then + do a = 1, nV + do i = 1, nO + t1(i,a) = f_ov(i,a) / (f_o(i) - f_v(a) - cc_level_shift_guess) + enddo + enddo + else if (trim(cc_guess_t1) == 'read') then + call read_t1(nO,nV,t1) + else + print*, 'Unknown cc_guess_t1 type: '//trim(cc_guess_t1) + call abort + endif + +end +#+end_src + +** T2 +#+begin_src f90 :comments org :tangle guess_t.irp.f +subroutine guess_t2(nO,nV,f_o,f_v,v_oovv,t2) + + implicit none + + BEGIN_DOC + ! Update the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), v_oovv(nO, nO, nV, nV) + + ! inout + double precision, intent(out) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + + if (trim(cc_guess_t2) == 'none') then + t2 = 0d0 + else if (trim(cc_guess_t2) == 'MP') then + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + t2(i,j,a,b) = v_oovv(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift_guess) + enddo + enddo + enddo + enddo + else if (trim(cc_guess_t2) == 'read') then + call read_t2(nO,nV,t2) + else + print*, 'Unknown cc_guess_t1 type: '//trim(cc_guess_t2) + call abort + endif + +end +#+end_src + +* Write +** T1 +#+begin_src f90 :comments org :tangle guess_t.irp.f +subroutine write_t1(nO,nV,t1) + + implicit none + + BEGIN_DOC + ! Write the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO, nV) + + ! internal + integer :: i,a + + if (cc_write_t1) then + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1') + do a = 1, nV + do i = 1, nO + write(11,'(F20.12)') t1(i,a) + enddo + enddo + close(11) + endif + +end +#+end_src + +** T2 +#+begin_src f90 :comments org :tangle guess_t.irp.f +subroutine write_t2(nO,nV,t2) + + implicit none + + BEGIN_DOC + ! Write the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + + if (cc_write_t2) then + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2') + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + write(11,'(F20.12)') t2(i,j,a,b) + enddo + enddo + enddo + enddo + close(11) + endif + +end +#+end_src + +* Read +** T1 +#+begin_src f90 :comments org :tangle guess_t.irp.f +subroutine read_t1(nO,nV,t1) + + implicit none + + BEGIN_DOC + ! Read the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(out) :: t1(nO, nV) + + ! internal + integer :: i,a + logical :: ok + + inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok) + if (.not. ok) then + print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1' + print*, 'Do a first calculation with cc_write_t1 = True' + print*, 'and cc_guess_t1 /= read before setting cc_guess_t1 = read' + call abort + endif + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1') + do a = 1, nV + do i = 1, nO + read(11,'(F20.12)') t1(i,a) + enddo + enddo + close(11) + +end +#+end_src + +** T2 +#+begin_src f90 :comments org :tangle guess_t.irp.f +subroutine read_t2(nO,nV,t2) + + implicit none + + BEGIN_DOC + ! Read the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(out) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + logical :: ok + + inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok) + if (.not. ok) then + print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1' + print*, 'Do a first calculation with cc_write_t2 = True' + print*, 'and cc_guess_t2 /= read before setting cc_guess_t2 = read' + call abort + endif + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2') + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + read(11,'(F20.12)') t2(i,j,a,b) + enddo + enddo + enddo + enddo + close(11) + +end +#+end_src diff --git a/src/utils_cc/org/mo_integrals_cc.org b/src/utils_cc/org/mo_integrals_cc.org new file mode 100644 index 00000000..ff3d229c --- /dev/null +++ b/src/utils_cc/org/mo_integrals_cc.org @@ -0,0 +1,1305 @@ +* mo two e integrals +** Space +*** F +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine gen_f_space(det,n1,n2,list1,list2,f) + + implicit none + + integer, intent(in) :: n1,n2 + integer, intent(in) :: list1(n1),list2(n2) + integer(bit_kind), intent(in) :: det(N_int,2) + double precision, intent(out) :: f(n1,n2) + + double precision, allocatable :: tmp_F(:,:) + integer :: i1,i2,idx1,idx2 + + allocate(tmp_F(mo_num,mo_num)) + + call get_fock_matrix_spin(det,1,tmp_F) + + !$OMP PARALLEL & + !$OMP SHARED(tmp_F,f,n1,n2,list1,list2) & + !$OMP PRIVATE(idx1,idx2,i1,i2)& + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do i2 = 1, n2 + do i1 = 1, n1 + idx2 = list2(i2) + idx1 = list1(i1) + f(i1,i2) = tmp_F(idx1,idx2) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_F) + +end +#+end_src + +*** V +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) + + implicit none + + integer, intent(in) :: n1,n2,n3,n4 + integer, intent(in) :: list1(n1),list2(n2),list3(n3),list4(n4) + double precision, intent(out) :: v(n1,n2,n3,n4) + + integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4 + double precision :: get_two_e_integral + + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL & + !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_integrals_map) & + !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4)& + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + idx4 = list4(i4) + idx3 = list3(i3) + idx2 = list2(i2) + idx1 = list1(i1) + v(i1,i2,i3,i4) = get_two_e_integral(idx1,idx2,idx3,idx4,mo_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +** Provider space +*** V +**** full +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] + + implicit none + + integer :: i,j,k,l + double precision :: get_two_e_integral + + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL & + !$OMP SHARED(cc_space_v,mo_num,mo_integrals_map) & + !$OMP PRIVATE(i,j,k,l) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + cc_space_v(i,j,k,l) = get_two_e_integral(i,j,k,l,mo_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +END_PROVIDER +#+end_src +**** oooo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo) + +END_PROVIDER +#+end_src + +**** vooo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo) + +END_PROVIDER +#+end_src + +**** ovoo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_ovoo, (cc_nOa, cc_nVa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo) + +END_PROVIDER +#+end_src + +**** oovo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo) + +END_PROVIDER +#+end_src + +**** ooov +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov) + +END_PROVIDER +#+end_src + +**** vvoo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo) + +END_PROVIDER +#+end_src + +**** vovo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo) + +END_PROVIDER +#+end_src + +**** voov +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_voov, (cc_nVa, cc_nOa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov) + +END_PROVIDER +#+end_src + +**** ovvo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_ovvo, (cc_nOa, cc_nVa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo) + +END_PROVIDER +#+end_src + +**** ovov +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov) + +END_PROVIDER +#+end_src + +**** oovv +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv) + +END_PROVIDER +#+end_src + +**** vvvo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_vvvo, (cc_nVa, cc_nVa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_vvvo) + +END_PROVIDER +#+end_src + +**** vvov +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_vvov, (cc_nVa, cc_nVa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nVa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_vvov) + +END_PROVIDER +#+end_src + +**** vovv +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_vovv, (cc_nVa, cc_nOa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_vovv) + +END_PROVIDER +#+end_src + +**** ovvv +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_ovvv, (cc_nOa, cc_nVa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_vir, cc_space_v_ovvv) + +END_PROVIDER +#+end_src + +**** vvvv +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_vvvv, (cc_nVa, cc_nVa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,cc_nVa, cc_list_vir,cc_list_vir,cc_list_vir,cc_list_vir, cc_space_v_vvvv) + +END_PROVIDER +#+end_src + +**** ppqq +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)] + + implicit none + + BEGIN_DOC + ! integrals for general MOs (excepted core and deleted ones) + END_DOC + + integer :: p,q + double precision, allocatable :: tmp_v(:,:,:,:) + + allocate(tmp_v(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo)) + + call gen_v_space(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo, cc_list_gen,cc_list_gen,cc_list_gen,cc_list_gen, tmp_v) + + do q = 1, cc_n_mo + do p = 1, cc_n_mo + cc_space_v_ppqq(p,q) = tmp_v(p,p,q,q) + enddo + enddo + + deallocate(tmp_v) + +END_PROVIDER +#+END_SRC + +**** aaii +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_aaii, (cc_nVa,cc_nOa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: occupied MO + END_DOC + + integer :: a,i + + do i = 1, cc_nOa + do a = 1, cc_nVa + cc_space_v_aaii(a,i) = cc_space_v_vvoo(a,a,i,i) + enddo + enddo + + FREE cc_space_v_vvoo + +END_PROVIDER +#+END_SRC + +**** iiaa +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_iiaa, (cc_nOa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: occupied MO + END_DOC + + integer :: a,i + + do a = 1, cc_nVa + do i = 1, cc_nOa + cc_space_v_iiaa(i,a) = cc_space_v_oovv(i,i,a,a) + enddo + enddo + + FREE cc_space_v_oovv + +END_PROVIDER +#+END_SRC + +**** iijj +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_iijj, (cc_nOa,cc_nOa)] + + implicit none + + BEGIN_DOC + ! integrals + ! i,j: occupied MO + END_DOC + + integer :: i,j + + do j = 1, cc_nOa + do i = 1, cc_nOa + cc_space_v_iijj(i,j) = cc_space_v_oooo(i,i,j,j) + enddo + enddo + + FREE cc_space_v_oooo + +END_PROVIDER +#+END_SRC + +**** aabb +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_aabb, (cc_nVa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a,b: virtual MO + END_DOC + + integer :: a,b + + do b = 1, cc_nVa + do a = 1, cc_nVa + cc_space_v_aabb(a,b) = cc_space_v_vvvv(a,a,b,b) + enddo + enddo + + FREE cc_space_v_vvvv + +END_PROVIDER +#+END_SRC + +**** iaia +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_iaia, (cc_nOa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: occupied MO + END_DOC + + integer :: a,i + + do a = 1, cc_nVa + do i = 1, cc_nOa + cc_space_v_iaia(i,a) = cc_space_v_ovov(i,a,i,a) + enddo + enddo + + FREE cc_space_v_ovov + +END_PROVIDER +#+END_SRC + +**** iaai +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_iaai, (cc_nOa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: inactive MO + END_DOC + + integer :: a,i + + do a = 1, cc_nVa + do i = 1, cc_nOa + cc_space_v_iaai(i,a) = cc_space_v_ovvo(i,a,a,i) + enddo + enddo + + FREE cc_space_v_ovvo + +END_PROVIDER +#+END_SRC + +**** aiia +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_aiia, (cc_nVa,cc_nOa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: inactive MO + END_DOC + + integer :: a,i + + do i = 1, cc_nOa + do a = 1, cc_nVa + cc_space_v_aiia(a,i) = cc_space_v_voov(a,i,i,a) + enddo + enddo + + FREE cc_space_v_voov + +END_PROVIDER +#+END_SRC + +*** W +**** oovv +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_w_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)] + + implicit none + + double precision, allocatable :: tmp_v(:,:,:,:) + integer :: i,j,a,b + + allocate(tmp_v(cc_nOa,cc_nOa,cc_nVa,cc_nVa)) + + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, tmp_v) + + !$OMP PARALLEL & + !$OMP SHARED(cc_nVa,cc_nOa,tmp_v,cc_space_w_oovv) & + !$OMP PRIVATE(i,j,a,b)& + !$OMP DEFAULT(NONE) + !$OMP DO + do b = 1, cc_nVa + do a = 1, cc_nVa + do j = 1, cc_nOa + do i = 1, cc_nOa + cc_space_w_oovv(i,j,a,b) = 2d0 * tmp_v(i,j,a,b) - tmp_v(j,i,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_v) + +END_PROVIDER +#+end_src + +**** vvoo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_w_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)] + + implicit none + + double precision, allocatable :: tmp_v(:,:,:,:) + integer :: i,j,a,b + + allocate(tmp_v(cc_nVa,cc_nVa,cc_nOa,cc_nOa)) + + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, tmp_v) + + !$OMP PARALLEL & + !$OMP SHARED(cc_nVa,cc_nOa,tmp_v,cc_space_w_vvoo) & + !$OMP PRIVATE(i,j,a,b)& + !$OMP DEFAULT(NONE) + !$OMP DO + do j = 1, cc_nOa + do i = 1, cc_nOa + do b = 1, cc_nVa + do a = 1, cc_nVa + cc_space_w_vvoo(a,b,i,j) = 2d0 * tmp_v(a,b,i,j) - tmp_v(b,a,i,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_v) + +END_PROVIDER +#+end_src + +*** F +**** F_oo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_f_oo, (cc_nOa, cc_nOa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nOa,cc_nOa, cc_list_occ,cc_list_occ, cc_space_f_oo) + +END_PROVIDER +#+end_src + +**** F_ov +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_f_ov, (cc_nOa, cc_nVa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nOa,cc_nVa, cc_list_occ,cc_list_vir, cc_space_f_ov) + +END_PROVIDER +#+end_src + +**** F_vo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_f_vo, (cc_nVa, cc_nOa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nVa,cc_nOa, cc_list_vir,cc_list_occ, cc_space_f_vo) + +END_PROVIDER +#+end_src + +**** F_vv +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_f_vv, (cc_nVa, cc_nVa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nVa,cc_nVa, cc_list_vir,cc_list_vir, cc_space_f_vv) + +END_PROVIDER +#+end_src + +**** F_o +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_f_o, (cc_nOa)] + + implicit none + + integer :: i + + do i = 1, cc_nOa + cc_space_f_o(i) = cc_space_f_oo(i,i) + enddo + +END_PROVIDER +#+end_src + +**** F_v +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_f_v, (cc_nVa)] + + implicit none + + integer :: i + + do i = 1, cc_nVa + cc_space_f_v(i) = cc_space_f_vv(i,i) + enddo + +END_PROVIDER +#+end_src + +** Spin +*** Shift +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine shift_idx_spin(s,n_S,shift) + + implicit none + + BEGIN_DOC + ! Shift for the partitionning alpha/beta of the spin orbitals + ! n_S(1): number of spin alpha in the correspondong list + ! n_S(2): number of spin beta in the correspondong list + END_DOC + + integer, intent(in) :: s, n_S(2) + integer, intent(out) :: shift + + if (s == 1) then + shift = 0 + else + shift = n_S(1) + endif + +end +#+end_src + +*** F +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f) + + implicit none + + BEGIN_DOC + ! Compute the Fock matrix corresponding to two lists of spin orbitals. + ! Ex: occ/occ, occ/vir,... + END_DOC + + integer(bit_kind), intent(in) :: det(N_int,2) + integer, intent(in) :: n1,n2, n1_S(2), n2_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2) + integer, intent(in) :: dim1, dim2 + + double precision, intent(out) :: f(dim1, dim2) + + double precision, allocatable :: tmp_F(:,:) + integer :: i,j, idx_i,idx_j,i_shift,j_shift + integer :: tmp_i,tmp_j + integer :: si,sj,s + + allocate(tmp_F(mo_num,mo_num)) + + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + s = si + sj + + if (s == 2 .or. s == 4) then + call get_fock_matrix_spin(det,sj,tmp_F) + else + do j = 1, mo_num + do i = 1, mo_num + tmp_F(i,j) = 0d0 + enddo + enddo + endif + + do tmp_j = 1, n2_S(sj) + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + f(idx_i,idx_j) = tmp_F(i,j) + enddo + enddo + + enddo + enddo + + deallocate(tmp_F) + +end +#+end_src + +*** Get F +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine get_fock_matrix_spin(det,s,f) + + implicit none + + BEGIN_DOC + ! Fock matrix alpha or beta of an arbitrary det + END_DOC + + integer(bit_kind), intent(in) :: det(N_int,2) + integer, intent(in) :: s + + double precision, intent(out) :: f(mo_num,mo_num) + + integer :: p,q,i,s1,s2 + integer(bit_kind) :: res(N_int,2) + logical :: ok + double precision :: mo_two_e_integral + + if (s == 1) then + s1 = 1 + s2 = 2 + else + s1 = 2 + s2 = 1 + endif + + !$OMP PARALLEL & + !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals) & + !$OMP PRIVATE(p,q,ok,i,res)& + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do q = 1, mo_num + do p = 1, mo_num + f(p,q) = mo_one_e_integrals(p,q) + do i = 1, mo_num + call apply_hole(det, s1, i, res, ok, N_int) + if (ok) then + f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q) + endif + enddo + do i = 1, mo_num + call apply_hole(det, s2, i, res, ok, N_int) + if (ok) then + f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) + endif + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +*** V +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3,dim4, v) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3, dim4 + double precision, intent(out) :: v(dim1,dim2,dim3,dim4) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_j,idx_k,idx_l + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + !$OMP PARALLEL & + !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) & + !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, & + !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,& + !$OMP tmp_i,tmp_j,tmp_k,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v(idx_i,idx_j,idx_k,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + +end +#+end_src + +*** V_3idx +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_l) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,idx_l,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3 + double precision, intent(out) :: v_l(dim1,dim2,dim3) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_j,idx_k + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + if (idx_l <= n4_S(1)) then + sl = 1 + else + sl = 2 + endif + call shift_idx_spin(sl,n4_S,l_shift) + tmp_l = idx_l - l_shift + l = list4(tmp_l,sl) + + !$OMP PARALLEL & + !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) & + !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, & + !$OMP i,j,k,idx_i,idx_j,idx_k,& + !$OMP tmp_i,tmp_j,tmp_k)& + !$OMP DEFAULT(NONE) + + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_l(idx_i,idx_j,idx_k) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_l(idx_i,idx_j,idx_k) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + +end +#+end_src + +*** V_3idx_ij_l +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_k) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,idx_k,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3 + double precision, intent(out) :: v_k(dim1,dim2,dim3) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_j,idx_l + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + if (idx_k <= n3_S(1)) then + sk = 1 + else + sk = 2 + endif + call shift_idx_spin(sk,n3_S,k_shift) + tmp_k = idx_k - k_shift + k = list3(tmp_k,sk) + + !$OMP PARALLEL & + !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) & + !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, & + !$OMP i,j,l,idx_i,idx_j,idx_l,& + !$OMP tmp_i,tmp_j,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_k(idx_i,idx_j,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_k(idx_i,idx_j,idx_l) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + +end +#+end_src + +*** V_3idx_i_kl +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_j) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,idx_j,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3 + double precision, intent(out) :: v_j(dim1,dim2,dim3) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_k,idx_l + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + if (idx_j <= n2_S(1)) then + sj = 1 + else + sj = 2 + endif + call shift_idx_spin(sj,n2_S,j_shift) + tmp_j = idx_j - j_shift + j = list2(tmp_j,sj) + + !$OMP PARALLEL & + !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) & + !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, & + !$OMP i,k,l,idx_i,idx_k,idx_l,& + !$OMP tmp_i,tmp_k,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_j(idx_i,idx_k,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_j(idx_i,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + +end +#+end_src + diff --git a/src/utils_cc/org/occupancy.org b/src/utils_cc/org/occupancy.org new file mode 100644 index 00000000..9e7a251d --- /dev/null +++ b/src/utils_cc/org/occupancy.org @@ -0,0 +1,330 @@ +* N spin orb +#+begin_src f90 :comments org :tangle occupancy.irp.f +subroutine extract_n_spin(det,n) + + implicit none + + BEGIN_DOC + ! Returns the number of occupied alpha, occupied beta, virtual alpha, virtual beta spin orbitals + ! in det without counting the core and deleted orbitals in the format n(nOa,nOb,nVa,nVb) + END_DOC + + integer(bit_kind), intent(in) :: det(N_int,2) + + integer, intent(out) :: n(4) + + integer(bit_kind) :: res(N_int,2) + integer :: i, si + logical :: ok, is_core, is_del + + ! Init + n = 0 + + ! Loop over the spin + do si = 1, 2 + do i = 1, mo_num + call apply_hole(det, si, i, res, ok, N_int) + + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + + if (ok) then + ! particle + n(si) = n(si) + 1 + else + ! hole + n(si+2) = n(si+2) + 1 + endif + enddo + enddo + + !print*,n(1),n(2),n(3),n(4) + +end +#+end_src + +* List_orb +** Spin +#+begin_src f90 :comments org :tangle occupancy.irp.f +subroutine extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir) + + implicit none + + BEGIN_DOC + ! Returns the the list of occupied alpha/beta, virtual alpha/beta spin orbitals + ! size(nO_m,1) must be max(nOa,nOb) and size(nV_m,1) must be max(nVa,nVb) + END_DOC + + integer, intent(in) :: nO_m, nV_m + integer(bit_kind), intent(in) :: det(N_int,2) + + integer, intent(out) :: list_occ(nO_m,2), list_vir(nV_m,2) + + integer(bit_kind) :: res(N_int,2) + integer :: i, si, idx_o, idx_v, idx_i, idx_b + logical :: ok, is_core, is_del + + list_occ = 0 + list_vir = 0 + + ! List of occ/vir alpha/beta + + ! occ alpha -> list_occ(:,1) + ! occ beta -> list_occ(:,2) + ! vir alpha -> list_vir(:,1) + ! vir beta -> list_vir(:,2) + + ! Loop over the spin + do si = 1, 2 + ! tmp idx + idx_o = 1 + idx_v = 1 + do i = 1, mo_num + call apply_hole(det, si, i, res, ok, N_int) + + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + + if (ok) then + ! particle + list_occ(idx_o,si) = i + idx_o = idx_o + 1 + else + ! hole + list_vir(idx_v,si) = i + idx_v = idx_v + 1 + endif + enddo + enddo + +end +#+end_src + +** Space +#+begin_src f90 :comments org :tangle occupancy.irp.f +subroutine extract_list_orb_space(det,nO,nV,list_occ,list_vir) + + implicit none + + BEGIN_DOC + ! Returns the the list of occupied and virtual alpha spin orbitals + END_DOC + + integer, intent(in) :: nO, nV + integer(bit_kind), intent(in) :: det(N_int,2) + + integer, intent(out) :: list_occ(nO), list_vir(nV) + + integer(bit_kind) :: res(N_int,2) + integer :: i, si, idx_o, idx_v, idx_i, idx_b + logical :: ok, is_core, is_del + + if (elec_alpha_num /= elec_beta_num) then + print*,'Error elec_alpha_num /= elec_beta_num, impossible to create cc_list_occ and cc_list_vir, abort' + call abort + endif + + list_occ = 0 + list_vir = 0 + + ! List of occ/vir alpha + + ! occ alpha -> list_occ(:,1) + ! vir alpha -> list_vir(:,1) + + ! tmp idx + idx_o = 1 + idx_v = 1 + do i = 1, mo_num + call apply_hole(det, 1, i, res, ok, N_int) + + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + + if (ok) then + ! particle + list_occ(idx_o) = i + idx_o = idx_o + 1 + else + ! hole + list_vir(idx_v) = i + idx_v = idx_v + 1 + endif + enddo + +end +#+end_src + +** is_core +#+begin_src f90 :comments org :tangle occupancy.irp.f +function is_core(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a core orbital + END_DOC + + integer, intent(in) :: i + logical :: is_core + + integer :: j + + ! Init + is_core = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_core = .True. + exit + endif + enddo + +end +#+end_src + +** is_del +#+begin_src f90 :comments org :tangle occupancy.irp.f +function is_del(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a deleted orbital + END_DOC + + integer, intent(in) :: i + logical :: is_del + + integer :: j + + ! Init + is_del = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_del = .True. + exit + endif + enddo + +end +#+end_src + +* Providers +** N orb +#+BEGIN_SRC f90 :comments org :tangle occupancy.irp.f + BEGIN_PROVIDER [integer, cc_nO_m] +&BEGIN_PROVIDER [integer, cc_nOa] +&BEGIN_PROVIDER [integer, cc_nOb] +&BEGIN_PROVIDER [integer, cc_nOab] +&BEGIN_PROVIDER [integer, cc_nV_m] +&BEGIN_PROVIDER [integer, cc_nVa] +&BEGIN_PROVIDER [integer, cc_nVb] +&BEGIN_PROVIDER [integer, cc_nVab] +&BEGIN_PROVIDER [integer, cc_n_mo] +&BEGIN_PROVIDER [integer, cc_nO_S, (2)] +&BEGIN_PROVIDER [integer, cc_nV_S, (2)] + + implicit none + + BEGIN_DOC + ! Number of orbitals without core and deleted ones of the cc_ref det in psi_det + ! a: alpha, b: beta + ! nO_m: max(a,b) occupied + ! nOa: nb a occupied + ! nOb: nb b occupied + ! nOab: nb a+b occupied + ! nV_m: max(a,b) virtual + ! nVa: nb a virtual + ! nVb: nb b virtual + ! nVab: nb a+b virtual + END_DOC + + integer :: n_spin(4) + + ! Extract number of occ/vir alpha/beta spin orbitals + call extract_n_spin(psi_det(1,1,cc_ref),n_spin) + + cc_nOa = n_spin(1) + cc_nOb = n_spin(2) + cc_nOab = cc_nOa + cc_nOb !n_spin(1) + n_spin(2) + cc_nO_m = max(cc_nOa,cc_nOb) !max(n_spin(1), n_spin(2)) + cc_nVa = n_spin(3) + cc_nVb = n_spin(4) + cc_nVab = cc_nVa + cc_nVb !n_spin(3) + n_spin(4) + cc_nV_m = max(cc_nVa,cc_nVb) !max(n_spin(3), n_spin(4)) + cc_n_mo = cc_nVa + cc_nVb !n_spin(1) + n_spin(3) + cc_nO_S = (/cc_nOa,cc_nOb/) + cc_nV_S = (/cc_nVa,cc_nVb/) + +END_PROVIDER +#+end_src + +** List orb + +*** General +#+BEGIN_SRC f90 :comments org :tangle occupancy.irp.f + BEGIN_PROVIDER [integer, cc_list_gen, (cc_n_mo)] + + implicit none + + BEGIN_DOC + ! List of general orbitals without core and deleted ones + END_DOC + + integer :: i,j + logical :: is_core, is_del + + j = 1 + do i = 1, mo_num + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + cc_list_gen(j) = i + j = j+1 + enddo + +END_PROVIDER +#+end_src + +*** Space +#+BEGIN_SRC f90 :comments org :tangle occupancy.irp.f + BEGIN_PROVIDER [integer, cc_list_occ, (cc_nOa)] +&BEGIN_PROVIDER [integer, cc_list_vir, (cc_nVa)] + + implicit none + + BEGIN_DOC + ! List of occupied and virtual spatial orbitals without core and deleted ones + END_DOC + + call extract_list_orb_space(psi_det(1,1,cc_ref),cc_nOa,cc_nVa,cc_list_occ,cc_list_vir) + +END_PROVIDER +#+end_src + +*** Spin +#+BEGIN_SRC f90 :comments org :tangle occupancy.irp.f + BEGIN_PROVIDER [integer, cc_list_occ_spin, (cc_nO_m,2)] +&BEGIN_PROVIDER [integer, cc_list_vir_spin, (cc_nV_m,2)] + + implicit none + + BEGIN_DOC + ! List of occupied and virtual spin orbitals without core and deleted ones + END_DOC + + call extract_list_orb_spin(psi_det(1,1,cc_ref),cc_nO_m,cc_nV_m,cc_list_occ_spin,cc_list_vir_spin) + +END_PROVIDER +#+end_src diff --git a/src/utils_cc/org/phase.org b/src/utils_cc/org/phase.org new file mode 100644 index 00000000..5f67859c --- /dev/null +++ b/src/utils_cc/org/phase.org @@ -0,0 +1,178 @@ +#+begin_src f90 :comments org :notangle phase.irp.f +program run + implicit none + + integer :: n(2), degree1, degree2, exc(0:2,2,2) + integer, allocatable :: list_anni(:,:), list_crea(:,:) + double precision :: phase1, phase2 + integer :: h1,h2,p1,p2,s1,s2,i,j + + allocate(list_anni(N_int*bit_kind_size,2)) + allocate(list_crea(N_int*bit_kind_size,2)) + + do i = 1, N_det-1 + do j = i+1, N_det + !call print_det(psi_det(1,1,j),N_int) + call get_excitation(psi_det(1,1,i),psi_det(1,1,j),exc,degree1,phase1,N_int) + call decode_exc(exc,degree1,h1,p1,h2,p2,s1,s2) + !print*,'old',degree1,phase1 + !print*,'h1:',h1,'h2:',h2,'s1:',s1,'s2:',s2 + !print*,'p1:',p1,'p2:',p2 + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree1,N_int) + call get_excitation_general(psi_det(1,1,i),psi_det(1,1,j),degree2,n,list_anni,list_crea,phase2,N_int) + !print*,'new',degree2,phase2 + !print*,'ha:',list_anni(1:n(1),1),'hb',list_anni(1:n(2),2) + !print*,'pa:',list_crea(1:n(1),1),'pb',list_crea(1:n(2),2) + !print*,'' + if (degree1 /= degree2) then + print*,'Error degree:',degree1,degree2 + call abort + endif + if (degree1 <= 2 .and. phase1 /= phase2) then + print*,'Error phase',phase1,phase2 + call abort + endif + enddo + enddo + +end +#+end_src + +** phase +#+begin_src f90 :comments org :tangle phase.irp.f +subroutine get_phase_general(det1,det2,phase,degree,Nint) + implicit none + + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2), det2(Nint,2) + double precision, intent(out) :: phase + integer, intent(out) :: degree + integer :: n(2) + integer, allocatable :: list_anni(:,:), list_crea(:,:) + + allocate(list_anni(N_int*bit_kind_size,2)) + allocate(list_crea(N_int*bit_kind_size,2)) + + call get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,Nint) +end +#+end_src + +** Get excitation general +#+begin_src f90 :comments org :tangle phase.irp.f +subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,Nint) + + use bitmasks + + implicit none + + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2), det2(Nint,2) + double precision, intent(out) :: phase + integer, intent(out) :: list_crea(Nint*bit_kind_size,2) + integer, intent(out) :: list_anni(Nint*bit_kind_size,2) + integer, intent(out) :: degree, n(2) + + integer, allocatable :: l1(:,:), l2(:,:) + integer(bit_kind), allocatable :: det_crea(:,:), det_anni(:,:) + integer, allocatable :: pos_anni(:,:), pos_crea(:,:) + + integer :: n1(2),n2(2),n_crea(2),n_anni(2),i,j,k,d + + allocate(l1(Nint*bit_kind_size,2)) + allocate(l2(Nint*bit_kind_size,2)) + allocate(det_crea(Nint,2),det_anni(Nint,2)) + + ! 1 111010 + ! 2 110101 + ! + !not 1-> 000101 + ! 2 110101 + !and 000101 -> crea + ! + ! 1 111010 + !not 2-> 001010 + ! 001010 -> anni + + do j = 1, 2 + do i = 1, Nint + det_crea(i,j) = iand(not(det1(i,j)),det2(i,j)) + enddo + enddo + + do j = 1, 2 + do i = 1, Nint + det_anni(i,j) = iand(det1(i,j),not(det2(i,j))) + enddo + enddo + + call bitstring_to_list_ab(det1,l1,n1,Nint) + call bitstring_to_list_ab(det2,l2,n2,Nint) + call bitstring_to_list_ab(det_crea,list_crea,n_crea,Nint) + call bitstring_to_list_ab(det_anni,list_anni,n_anni,Nint) + + do i = 1, 2 + if (n_crea(i) /= n_anni(i)) then + print*,'Well, it seems we have a problem here...' + call abort + endif + enddo + + !1 11110011001 1 2 3 4 7 8 11 + !pos 1 2 3 4 5 6 7 + !2 11100101011 1 2 3 6 8 10 11 + !anni 00010010000 4 7 + !pos 4 5 + !crea 00000100010 6 10 + !pos 4 6 + !4 -> 6 pos(4 -> 4) + !7 -> 10 pos(5 -> 6) + + n = n_anni + degree = n_anni(1) + n_anni(2) + + allocate(pos_anni(max(n(1),n(2)),2)) + allocate(pos_crea(max(n(1),n(2)),2)) + + ! Search pos anni + do j = 1, 2 + k = 1 + do i = 1, n1(j) + if (l1(i,j) /= list_anni(k,j)) cycle + pos_anni(k,j) = i + k = k + 1 + enddo + enddo + + ! Search pos crea + do j = 1, 2 + k = 1 + do i = 1, n2(j) + if (l2(i,j) /= list_crea(k,j)) cycle + pos_crea(k,j) = i + k = k + 1 + enddo + enddo + + ! Distance between the ith anni and the ith crea op + ! By doing so there is no crossing between the different pairs of anni/crea + ! and the phase is determined by the sum of the distances + ! -> (-1)^{sum of the distances} + d = 0 + do j = 1, 2 + do i = 1, n(j) + d = d + abs(pos_anni(i,j) - pos_crea(i,j)) + enddo + enddo + + phase = dble((-1)**d) + + ! Debug + !print*,l2(1:n2(1),1) + !print*,l2(1:n2(2),2) + !!call print_det(det1,Nint) + !!call print_det(det2,Nint) + !print*,phase + !print*,'' +end +#+end_src + diff --git a/src/utils_cc/org/print_wf_qp_edit.org b/src/utils_cc/org/print_wf_qp_edit.org new file mode 100644 index 00000000..0f19ac76 --- /dev/null +++ b/src/utils_cc/org/print_wf_qp_edit.org @@ -0,0 +1,33 @@ +#+begin_src f90 :comments org :tangle print_wf_qp_edit.irp.f +program run + + implicit none + + read_wf = .true. + touch read_wf + + call print_wf_qp_edit() + +end +#+end_src + +#+begin_src f90 :comments org :tangle print_wf_qp_edit.irp.f +subroutine print_wf_qp_edit() + + implicit none + + BEGIN_DOC + ! Print the psi_det wave function up to n_det_qp_edit + END_DOC + + integer :: i + + do i = 1, n_det_qp_edit + print*,i + write(*,'(100(1pE12.4))') psi_coef(i,:) + call print_det(psi_det(1,1,i),N_int) + print*,'' + enddo + +end +#+end_src diff --git a/src/utils_cc/org/update_t.org b/src/utils_cc/org/update_t.org new file mode 100644 index 00000000..c0207b22 --- /dev/null +++ b/src/utils_cc/org/update_t.org @@ -0,0 +1,76 @@ +* T1 +#+begin_src f90 :comments org :tangle update_t.irp.f +subroutine update_t1(nO,nV,f_o,f_v,r1,t1) + + implicit none + + BEGIN_DOC + ! Update the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r1(nO, nV) + + ! inout + double precision, intent(inout) :: t1(nO, nV) + + ! internal + integer :: i,a + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,t1,r1,cc_level_shift,f_o,f_v) & + !$OMP PRIVATE(i,a) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do a = 1, nV + do i = 1, nO + t1(i,a) = t1(i,a) - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +* T2 +#+begin_src f90 :comments org :tangle update_t.irp.f +subroutine update_t2(nO,nV,f_o,f_v,r2,t2) + + implicit none + + BEGIN_DOC + ! Update the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r2(nO, nO, nV, nV) + + ! inout + double precision, intent(inout) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,t2,r2,cc_level_shift,f_o,f_v) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + t2(i,j,a,b) = t2(i,j,a,b) - r2(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + diff --git a/src/utils_cc/phase.irp.f b/src/utils_cc/phase.irp.f new file mode 100644 index 00000000..01b41f49 --- /dev/null +++ b/src/utils_cc/phase.irp.f @@ -0,0 +1,135 @@ +! phase + +subroutine get_phase_general(det1,det2,phase,degree,Nint) + implicit none + + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2), det2(Nint,2) + double precision, intent(out) :: phase + integer, intent(out) :: degree + integer :: n(2) + integer, allocatable :: list_anni(:,:), list_crea(:,:) + + allocate(list_anni(N_int*bit_kind_size,2)) + allocate(list_crea(N_int*bit_kind_size,2)) + + call get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,Nint) +end + +! Get excitation general + +subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,Nint) + + use bitmasks + + implicit none + + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2), det2(Nint,2) + double precision, intent(out) :: phase + integer, intent(out) :: list_crea(Nint*bit_kind_size,2) + integer, intent(out) :: list_anni(Nint*bit_kind_size,2) + integer, intent(out) :: degree, n(2) + + integer, allocatable :: l1(:,:), l2(:,:) + integer(bit_kind), allocatable :: det_crea(:,:), det_anni(:,:) + integer, allocatable :: pos_anni(:,:), pos_crea(:,:) + + integer :: n1(2),n2(2),n_crea(2),n_anni(2),i,j,k,d + + allocate(l1(Nint*bit_kind_size,2)) + allocate(l2(Nint*bit_kind_size,2)) + allocate(det_crea(Nint,2),det_anni(Nint,2)) + + ! 1 111010 + ! 2 110101 + ! + !not 1-> 000101 + ! 2 110101 + !and 000101 -> crea + ! + ! 1 111010 + !not 2-> 001010 + ! 001010 -> anni + + do j = 1, 2 + do i = 1, Nint + det_crea(i,j) = iand(not(det1(i,j)),det2(i,j)) + enddo + enddo + + do j = 1, 2 + do i = 1, Nint + det_anni(i,j) = iand(det1(i,j),not(det2(i,j))) + enddo + enddo + + call bitstring_to_list_ab(det1,l1,n1,Nint) + call bitstring_to_list_ab(det2,l2,n2,Nint) + call bitstring_to_list_ab(det_crea,list_crea,n_crea,Nint) + call bitstring_to_list_ab(det_anni,list_anni,n_anni,Nint) + + do i = 1, 2 + if (n_crea(i) /= n_anni(i)) then + print*,'Well, it seems we have a problem here...' + call abort + endif + enddo + + !1 11110011001 1 2 3 4 7 8 11 + !pos 1 2 3 4 5 6 7 + !2 11100101011 1 2 3 6 8 10 11 + !anni 00010010000 4 7 + !pos 4 5 + !crea 00000100010 6 10 + !pos 4 6 + !4 -> 6 pos(4 -> 4) + !7 -> 10 pos(5 -> 6) + + n = n_anni + degree = n_anni(1) + n_anni(2) + + allocate(pos_anni(max(n(1),n(2)),2)) + allocate(pos_crea(max(n(1),n(2)),2)) + + ! Search pos anni + do j = 1, 2 + k = 1 + do i = 1, n1(j) + if (l1(i,j) /= list_anni(k,j)) cycle + pos_anni(k,j) = i + k = k + 1 + enddo + enddo + + ! Search pos crea + do j = 1, 2 + k = 1 + do i = 1, n2(j) + if (l2(i,j) /= list_crea(k,j)) cycle + pos_crea(k,j) = i + k = k + 1 + enddo + enddo + + ! Distance between the ith anni and the ith crea op + ! By doing so there is no crossing between the different pairs of anni/crea + ! and the phase is determined by the sum of the distances + ! -> (-1)^{sum of the distances} + d = 0 + do j = 1, 2 + do i = 1, n(j) + d = d + abs(pos_anni(i,j) - pos_crea(i,j)) + enddo + enddo + + phase = dble((-1)**d) + + ! Debug + !print*,l2(1:n2(1),1) + !print*,l2(1:n2(2),2) + !!call print_det(det1,Nint) + !!call print_det(det2,Nint) + !print*,phase + !print*,'' +end diff --git a/src/utils_cc/print_wf_qp_edit.irp.f b/src/utils_cc/print_wf_qp_edit.irp.f new file mode 100644 index 00000000..1337621d --- /dev/null +++ b/src/utils_cc/print_wf_qp_edit.irp.f @@ -0,0 +1,29 @@ +program run + + implicit none + + read_wf = .true. + touch read_wf + + call print_wf_qp_edit() + +end + +subroutine print_wf_qp_edit() + + implicit none + + BEGIN_DOC + ! Print the psi_det wave function up to n_det_qp_edit + END_DOC + + integer :: i + + do i = 1, n_det_qp_edit + print*,i + write(*,'(100(1pE12.4))') psi_coef(i,:) + call print_det(psi_det(1,1,i),N_int) + print*,'' + enddo + +end diff --git a/src/utils_cc/update_t.irp.f b/src/utils_cc/update_t.irp.f new file mode 100644 index 00000000..dbd4f4bd --- /dev/null +++ b/src/utils_cc/update_t.irp.f @@ -0,0 +1,73 @@ +! T1 + +subroutine update_t1(nO,nV,f_o,f_v,r1,t1) + + implicit none + + BEGIN_DOC + ! Update the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r1(nO, nV) + + ! inout + double precision, intent(inout) :: t1(nO, nV) + + ! internal + integer :: i,a + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,t1,r1,cc_level_shift,f_o,f_v) & + !$OMP PRIVATE(i,a) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do a = 1, nV + do i = 1, nO + t1(i,a) = t1(i,a) - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! T2 + +subroutine update_t2(nO,nV,f_o,f_v,r2,t2) + + implicit none + + BEGIN_DOC + ! Update the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r2(nO, nO, nV, nV) + + ! inout + double precision, intent(inout) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,t2,r2,cc_level_shift,f_o,f_v) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + t2(i,j,a,b) = t2(i,j,a,b) - r2(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end From 9495d490ba2cd91b0f467cf83033887af4071be4 Mon Sep 17 00:00:00 2001 From: ydamour Date: Mon, 13 Mar 2023 10:25:39 +0100 Subject: [PATCH 11/87] fix test cisd --- src/cisd/30.cisd.bats | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/cisd/30.cisd.bats b/src/cisd/30.cisd.bats index 42d0dc5e..6b8fddb6 100644 --- a/src/cisd/30.cisd.bats +++ b/src/cisd/30.cisd.bats @@ -11,7 +11,6 @@ function run() { qp set davidson threshold_davidson 1.e-12 qp set davidson n_states_diag 24 qp run cis - qp set_frozen_core qp run cisd energy1="$(qp get cisd energy | tr '[]' ' ' | cut -d ',' -f 1)" energy2="$(qp get cisd energy | tr '[]' ' ' | cut -d ',' -f 2)" @@ -22,26 +21,31 @@ function run() { @test "B-B" { # qp set_file b2_stretched.ezfio + qp set_frozen_core run -49.120607088648597 -49.055152453388231 } @test "SiH2_3B1" { # 1.53842s 3.53856s qp set_file sih2_3b1.ezfio + qp set_frozen_core run -290.015949171697 -289.805036176618 } @test "HBO" { # 4.42968s 19.6099s qp set_file hbo.ezfio + qp set_frozen_core run -100.2019254455993 -99.79484127741013 } @test "HCO" { # 6.6077s 28.6801s qp set_file hco.ezfio + qp set_frozen_core run -113.39088802205114 -113.22204293108558 } @test "H2O" { # 7.0651s 30.6642s qp set_file h2o.ezfio + qp set_frozen_core run -76.22975602077072 -75.80609108747208 } @@ -52,6 +56,7 @@ function run() { @test "H2S" { # 7.42152s 32.5461s [[ -n $TRAVIS ]] && skip qp set_file h2s.ezfio + qp set_frozen_core run -398.853701416768 -398.519020035337 } @@ -72,6 +77,7 @@ function run() { @test "OH" { # 18.2159s 1.28453m [[ -n $TRAVIS ]] && skip qp set_file oh.ezfio + qp set_frozen_core run -75.6087472926588 -75.5370393736601 } @@ -85,6 +91,7 @@ function run() { @test "SiH3" { # 20.2202s 1.38648m [[ -n $TRAVIS ]] && skip qp set_file sih3.ezfio + qp set_frozen_core run -5.57096611856522 -5.30950347928823 } @@ -105,6 +112,7 @@ function run() { @test "H3COH" { # 24.7248s 1.85043m [[ -n $TRAVIS ]] && skip qp set_file h3coh.ezfio + qp set_frozen_core run -115.204958752377 -114.755913828245 } @@ -119,6 +127,7 @@ function run() { @test "ClF" { # 30.3225s [[ -n $TRAVIS ]] && skip qp set_file clf.ezfio + qp set_frozen_core run -559.162476603880 -558.792395927088 } @@ -132,6 +141,7 @@ function run() { @test "ClO" { # 37.6949s [[ -n $TRAVIS ]] && skip qp set_file clo.ezfio + qp set_frozen_core run -534.5404021326773 -534.3818725793897 } @@ -152,6 +162,7 @@ function run() { @test "SO" { # 51.2476s [[ -n $TRAVIS ]] && skip qp set_file so.ezfio + qp set_frozen_core run -26.0131812819785 -25.7053111980226 } From f0d9b3767803860a6edcd894aa0fe40eb23f6a03 Mon Sep 17 00:00:00 2001 From: ydamour Date: Mon, 13 Mar 2023 14:03:54 +0100 Subject: [PATCH 12/87] provider open shell --- src/utils_cc/occupancy.irp.f | 11 +++++++++++ src/utils_cc/org/occupancy.org | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/utils_cc/occupancy.irp.f b/src/utils_cc/occupancy.irp.f index 76e6fb3d..c6139bb3 100644 --- a/src/utils_cc/occupancy.irp.f +++ b/src/utils_cc/occupancy.irp.f @@ -305,6 +305,7 @@ END_PROVIDER BEGIN_PROVIDER [integer, cc_list_occ_spin, (cc_nO_m,2)] &BEGIN_PROVIDER [integer, cc_list_vir_spin, (cc_nV_m,2)] +&BEGIN_PROVIDER [logical, cc_ref_is_open_shell] implicit none @@ -312,6 +313,16 @@ BEGIN_PROVIDER [integer, cc_list_occ_spin, (cc_nO_m,2)] ! List of occupied and virtual spin orbitals without core and deleted ones END_DOC + integer :: i + call extract_list_orb_spin(psi_det(1,1,cc_ref),cc_nO_m,cc_nV_m,cc_list_occ_spin,cc_list_vir_spin) + cc_ref_is_open_shell = .False. + do i = 1, cc_nO_m + if (cc_list_occ_spin(i,1) /= cc_list_occ_spin(i,2)) then + cc_ref_is_open_shell = .True. + endif + enddo + + END_PROVIDER diff --git a/src/utils_cc/org/occupancy.org b/src/utils_cc/org/occupancy.org index 9e7a251d..246bbd5b 100644 --- a/src/utils_cc/org/occupancy.org +++ b/src/utils_cc/org/occupancy.org @@ -317,6 +317,7 @@ END_PROVIDER #+BEGIN_SRC f90 :comments org :tangle occupancy.irp.f BEGIN_PROVIDER [integer, cc_list_occ_spin, (cc_nO_m,2)] &BEGIN_PROVIDER [integer, cc_list_vir_spin, (cc_nV_m,2)] +&BEGIN_PROVIDER [logical, cc_ref_is_open_shell] implicit none @@ -324,7 +325,17 @@ END_PROVIDER ! List of occupied and virtual spin orbitals without core and deleted ones END_DOC + integer :: i + call extract_list_orb_spin(psi_det(1,1,cc_ref),cc_nO_m,cc_nV_m,cc_list_occ_spin,cc_list_vir_spin) + cc_ref_is_open_shell = .False. + do i = 1, cc_nO_m + if (cc_list_occ_spin(i,1) /= cc_list_occ_spin(i,2)) then + cc_ref_is_open_shell = .True. + endif + enddo + + END_PROVIDER #+end_src From fadbddc869ca984ed285f7d05ba90f352e673480 Mon Sep 17 00:00:00 2001 From: ydamour Date: Mon, 13 Mar 2023 14:08:32 +0100 Subject: [PATCH 13/87] add ccsd --- src/ccsd/80.ccsd_spin.bats | 225 +++ src/ccsd/81.ccsd_space.bats | 225 +++ src/ccsd/NEED | 2 + src/ccsd/README.md | 31 + src/ccsd/ccsd.irp.f | 18 + src/ccsd/ccsd_space_orb.irp.f | 12 + src/ccsd/ccsd_space_orb_sub.irp.f | 2078 +++++++++++++++++++++++++ src/ccsd/ccsd_spin_orb.irp.f | 16 + src/ccsd/ccsd_spin_orb_sub.irp.f | 2301 ++++++++++++++++++++++++++++ src/ccsd/ccsd_t_space_orb.irp.f | 412 +++++ src/ccsd/ccsd_t_spin_orb.irp.f | 376 +++++ src/ccsd/org/ccsd_space_orb.org | 2121 ++++++++++++++++++++++++++ src/ccsd/org/ccsd_spin_orb.org | 2352 +++++++++++++++++++++++++++++ src/ccsd/org/ccsd_t_space_orb.org | 428 ++++++ src/ccsd/org/ccsd_t_spin_orb.org | 385 +++++ 15 files changed, 10982 insertions(+) create mode 100644 src/ccsd/80.ccsd_spin.bats create mode 100644 src/ccsd/81.ccsd_space.bats create mode 100644 src/ccsd/NEED create mode 100644 src/ccsd/README.md create mode 100644 src/ccsd/ccsd.irp.f create mode 100644 src/ccsd/ccsd_space_orb.irp.f create mode 100644 src/ccsd/ccsd_space_orb_sub.irp.f create mode 100644 src/ccsd/ccsd_spin_orb.irp.f create mode 100644 src/ccsd/ccsd_spin_orb_sub.irp.f create mode 100644 src/ccsd/ccsd_t_space_orb.irp.f create mode 100644 src/ccsd/ccsd_t_spin_orb.irp.f create mode 100644 src/ccsd/org/ccsd_space_orb.org create mode 100644 src/ccsd/org/ccsd_spin_orb.org create mode 100644 src/ccsd/org/ccsd_t_space_orb.org create mode 100644 src/ccsd/org/ccsd_t_spin_orb.org diff --git a/src/ccsd/80.ccsd_spin.bats b/src/ccsd/80.ccsd_spin.bats new file mode 100644 index 00000000..0b616871 --- /dev/null +++ b/src/ccsd/80.ccsd_spin.bats @@ -0,0 +1,225 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + + +function run() { + thresh1=1e-6 + thresh2=1e-6 + test_exe scf || skip + qp set_file $1 + qp edit --check + #qp run scf + qp set_frozen_core + qp set utils_cc cc_par_t true + qp set utils_cc cc_thresh_conv 1e-12 + file="$(echo $1 | sed 's/.ezfio//g')" + qp run ccsd_spin_orb | tee $file.ccsd.out + energy1="$(grep 'E(CCSD)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')" + energy2="$(grep 'E(T)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')" + #rm $file.ccsd.out + eq $energy1 $2 $thresh1 + eq $energy2 $3 $thresh2 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -49.136487344382 -0.003497589175 +} + +@test "be" { +run be.ezfio -14.623559003577 -0.000230982022 +} + +@test "c2h2" { +run c2h2.ezfio -12.394008897618 -0.010790491561 +} + +@test "ch4" { +run ch4.ezfio -40.390721785799 -0.004476100282 +} + +@test "clf" { +run clf.ezfio -559.186562904081 -0.006577143392 +} + +@test "clo" { +run clo.ezfio -534.564874409332 -0.007584571424 +} + +@test "co2" { +run co2.ezfio -188.129602527766 -0.018040668885 +} + +@test "dhno" { +run dhno.ezfio -130.816650109473 -0.012197331453 +} + +@test "f2" { +run f2.ezfio -199.287826338097 -0.017592872692 +} + +@test "f" { +run f.ezfio -99.616644511121 -0.003624525307 +} + +@test "h2o2" { +run h2o2.ezfio -151.182552729963 -0.009511682086 +} + +@test "h2o" { +run h2o.ezfio -76.237710276526 -0.003001800577 +} + +@test "h2s" { +run h2s.ezfio -398.861214015390 -0.003300559757 +} + +@test "h3coh" { +run h3coh.ezfio -115.221296424969 -0.003566171432 +} + +@test "hbo" { +run hbo.ezfio -100.213539770415 -0.006851489212 +} + +@test "hcn" { +run hcn.ezfio -93.190247992657 -0.013418135043 +} + +@test "hco" { +run hco.ezfio -113.405413962350 -0.007973455337 +} + +@test "lif" { +run lif.ezfio -107.270402903250 -0.007742969005 +} + +@test "n2" { +run n2.ezfio -109.355358930472 -0.018477744342 +} + +@test "n2h4" { +run n2h4.ezfio -111.556885923139 -0.009048077008 +} + +@test "nh3" { +run nh3.ezfio -56.465503060954 -0.007638273755 +} + +@test "oh" { +run oh.ezfio -75.614606132774 -0.004126661739 +} + +@test "sih2_3b1" { +run sih2_3b1.ezfio -290.016780973072 -0.000497825874 +} + +@test "sih3" { +run sih3.ezfio -5.575343504534 -0.002094123268 +} + +@test "so" { +run so.ezfio -26.035945178665 -0.010594351274 +} + +#@test "b2_stretched" { +#run b2_stretched.ezfio -49.136487344382 -49.139984933557 +#} +# +#@test "be" { +#run be.ezfio -14.623559003577 -14.623789985599 +#} +# +#@test "c2h2" { +#run c2h2.ezfio -12.394008897618 -12.404799389179 +#} +# +#@test "ch4" { +#run ch4.ezfio -40.390721784961 -40.395197884406 +#} +# +#@test "clf" { +#run clf.ezfio -559.186562906072 -559.193140046904 +#} +# +#@test "clo" { +#run clo.ezfio -534.564874409333 -534.572458980757 +#} +# +#@test "co2" { +#run co2.ezfio -188.129602511724 -188.147643198675 +#} +# +#@test "dhno" { +#run dhno.ezfio -130.816650109473 -130.828847440925 +#} +# +#@test "f2" { +#run f2.ezfio -199.287826338097 -199.305419210789 +#} +# +#@test "f" { +#run f.ezfio -99.616644511120 -99.620269036428 +#} +# +#@test "h2o2" { +#run h2o2.ezfio -151.182552729963 -151.192064412049 +#} +# +#@test "h2o" { +#run h2o.ezfio -76.237710276526 -76.240712077103 +#} +# +#@test "h2s" { +#run h2s.ezfio -398.861214015416 -398.864514575146 +#} +# +#@test "h3coh" { +#run h3coh.ezfio -115.221296424969 -115.224862596401 +#} +# +#@test "hbo" { +#run hbo.ezfio -100.213539770415 -100.220391259627 +#} +# +#@test "hcn" { +#run hcn.ezfio -93.190247983000 -93.203666131216 +#} +# +#@test "hco" { +#run hco.ezfio -113.405413962350 -113.413387417687 +#} +# +#@test "lif" { +#run lif.ezfio -107.270402903211 -107.278145872216 +#} +# +#@test "n2" { +#run n2.ezfio -109.355358930472 -109.373836674814 +#} +# +#@test "n2h4" { +#run n2h4.ezfio -111.556885922642 -111.565934000556 +#} +# +#@test "nh3" { +#run nh3.ezfio -56.465503060954 -56.473141334709 +#} +# +#@test "oh" { +#run oh.ezfio -75.614606131897 -75.618732794235 +#} +# +#@test "sih2_3b1" { +#run sih2_3b1.ezfio -290.016780973071 -290.017278798946 +#} +# +#@test "sih3" { +#run sih3.ezfio -5.575343504534 -5.577437627802 +#} +# +#@test "so" { +#run so.ezfio -26.035945181998 -26.046539528491 +#} + diff --git a/src/ccsd/81.ccsd_space.bats b/src/ccsd/81.ccsd_space.bats new file mode 100644 index 00000000..02e8e987 --- /dev/null +++ b/src/ccsd/81.ccsd_space.bats @@ -0,0 +1,225 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + + +function run() { + thresh1=1e-6 + thresh2=1e-6 + test_exe scf || skip + qp set_file $1 + qp edit --check + #qp run scf + qp set_frozen_core + qp set utils_cc cc_par_t true + qp set utils_cc cc_thresh_conv 1e-12 + file="$(echo $1 | sed 's/.ezfio//g')" + qp run ccsd_space_orb | tee $file.ccsd.out + energy1="$(grep 'E(CCSD)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')" + energy2="$(grep 'E(T)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')" + #rm $file.ccsd.out + eq $energy1 $2 $thresh1 + eq $energy2 $3 $thresh2 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -49.136487344382 -0.003497589175 +} + +@test "be" { +run be.ezfio -14.623559003577 -0.000230982022 +} + +@test "c2h2" { +run c2h2.ezfio -12.394008897618 -0.010790491561 +} + +@test "ch4" { +run ch4.ezfio -40.390721785799 -0.004476100282 +} + +@test "clf" { +run clf.ezfio -559.186562904081 -0.006577143392 +} + +#@test "clo" { +#run clo.ezfio -534.564874409332 -0.007584571424 +#} + +@test "co2" { +run co2.ezfio -188.129602527766 -0.018040668885 +} + +#@test "dhno" { +#run dhno.ezfio -130.816650109473 -0.012197331453 +#} + +@test "f2" { +run f2.ezfio -199.287826338097 -0.017592872692 +} + +#@test "f" { +#run f.ezfio -99.616644511121 -0.003624525307 +#} + +@test "h2o2" { +run h2o2.ezfio -151.182552729963 -0.009511682086 +} + +@test "h2o" { +run h2o.ezfio -76.237710276526 -0.003001800577 +} + +@test "h2s" { +run h2s.ezfio -398.861214015390 -0.003300559757 +} + +@test "h3coh" { +run h3coh.ezfio -115.221296424969 -0.003566171432 +} + +@test "hbo" { +run hbo.ezfio -100.213539770415 -0.006851489212 +} + +@test "hcn" { +run hcn.ezfio -93.190247992657 -0.013418135043 +} + +#@test "hco" { +#run hco.ezfio -113.405413962350 -0.007973455337 +#} + +@test "lif" { +run lif.ezfio -107.270402903250 -0.007742969005 +} + +@test "n2" { +run n2.ezfio -109.355358930472 -0.018477744342 +} + +@test "n2h4" { +run n2h4.ezfio -111.556885923139 -0.009048077008 +} + +@test "nh3" { +run nh3.ezfio -56.465503060954 -0.007638273755 +} + +#@test "oh" { +#run oh.ezfio -75.614606132774 -0.004126661739 +#} + +#@test "sih2_3b1" { +#run sih2_3b1.ezfio -290.016780973072 -0.000497825874 +#} + +#@test "sih3" { +#run sih3.ezfio -5.575343504534 -0.002094123268 +#} + +#@test "so" { +#run so.ezfio -26.035945178665 -0.010594351274 +#} + +#@test "b2_stretched" { +#run b2_stretched.ezfio -49.136487344382 -49.139984933557 +#} +# +#@test "be" { +#run be.ezfio -14.623559003577 -14.623789985599 +#} +# +#@test "c2h2" { +#run c2h2.ezfio -12.394008897618 -12.404799389179 +#} +# +#@test "ch4" { +#run ch4.ezfio -40.390721784961 -40.395197884406 +#} +# +#@test "clf" { +#run clf.ezfio -559.186562906072 -559.193140046904 +#} +# +##@test "clo" { +##run clo.ezfio -534.564874409333 -534.572458980757 +##} +# +#@test "co2" { +#run co2.ezfio -188.129602511724 -188.147643198675 +#} +# +##@test "dhno" { +##run dhno.ezfio -130.816650109473 -130.828847440925 +##} +# +#@test "f2" { +#run f2.ezfio -199.287826338097 -199.305419210789 +#} +# +##@test "f" { +##run f.ezfio -99.616644511120 -99.620269036428 +##} +# +#@test "h2o2" { +#run h2o2.ezfio -151.182552729963 -151.192064412049 +#} +# +#@test "h2o" { +#run h2o.ezfio -76.237710276526 -76.240712077103 +#} +# +#@test "h2s" { +#run h2s.ezfio -398.861214015416 -398.864514575146 +#} +# +#@test "h3coh" { +#run h3coh.ezfio -115.221296424969 -115.224862596401 +#} +# +#@test "hbo" { +#run hbo.ezfio -100.213539770415 -100.220391259627 +#} +# +#@test "hcn" { +#run hcn.ezfio -93.190247983000 -93.203666131216 +#} +# +##@test "hco" { +##run hco.ezfio -113.405413962350 -113.413387417687 +##} +# +#@test "lif" { +#run lif.ezfio -107.270402903211 -107.278145872216 +#} +# +#@test "n2" { +#run n2.ezfio -109.355358930472 -109.373836674814 +#} +# +#@test "n2h4" { +#run n2h4.ezfio -111.556885922642 -111.565934000556 +#} +# +#@test "nh3" { +#run nh3.ezfio -56.465503060954 -56.473141334709 +#} +# +##@test "oh" { +##run oh.ezfio -75.614606131897 -75.618732794235 +##} +# +##@test "sih2_3b1" { +##run sih2_3b1.ezfio -290.016780973071 -290.017278798946 +##} +# +##@test "sih3" { +##run sih3.ezfio -5.575343504534 -5.577437627802 +##} +# +##@test "so" { +##run so.ezfio -26.035945181998 -26.046539528491 +##} + diff --git a/src/ccsd/NEED b/src/ccsd/NEED new file mode 100644 index 00000000..e6e6bc59 --- /dev/null +++ b/src/ccsd/NEED @@ -0,0 +1,2 @@ +hartree_fock +utils_cc diff --git a/src/ccsd/README.md b/src/ccsd/README.md new file mode 100644 index 00000000..fa59e8a6 --- /dev/null +++ b/src/ccsd/README.md @@ -0,0 +1,31 @@ +# CCSD in spin orbitals and spatial orbitals + +CCSD and CCSD(T) in spin orbitals for open and closed shell systems. +CCSD and CCSD(T) in spatial orbitals for closed shell systems. + +## Calculations +The program will automatically choose the version in spin or spatial orbitals +To run the general program: +``` +qp run ccsd +``` +Nevertheless, you can enforce the run in spin orbitals with +``` +qp run ccsd_spin_orb +``` + +## Settings +The settings can be changed with: +``` +qp set utils_cc cc_#param #val +``` +For more informations on the settings, look at the module utils_cc and its documentation. + +## Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh and +mv *.irp.f ../. +``` + diff --git a/src/ccsd/ccsd.irp.f b/src/ccsd/ccsd.irp.f new file mode 100644 index 00000000..035f50b8 --- /dev/null +++ b/src/ccsd/ccsd.irp.f @@ -0,0 +1,18 @@ +program ccsd + + implicit none + + BEGIN_DOC + ! CCSD program + END_DOC + + read_wf = .True. + touch read_wf + + if (.not. cc_ref_is_open_shell) then + call run_ccsd_space_orb + else + call run_ccsd_spin_orb + endif + +end diff --git a/src/ccsd/ccsd_space_orb.irp.f b/src/ccsd/ccsd_space_orb.irp.f new file mode 100644 index 00000000..53028ec0 --- /dev/null +++ b/src/ccsd/ccsd_space_orb.irp.f @@ -0,0 +1,12 @@ +! Code + +program ccsd + + implicit none + + read_wf = .True. + touch read_wf + + call run_ccsd_space_orb + +end diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f new file mode 100644 index 00000000..b63375cf --- /dev/null +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -0,0 +1,2078 @@ +subroutine run_ccsd_space_orb + + implicit none + + integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d + integer :: u,v,gam,beta,tmp_gam,tmp_beta + integer :: nb_iter + double precision :: get_two_e_integral + double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb + logical :: not_converged + + double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:) + double precision, allocatable :: t1(:,:), r1(:,:) + double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) + + double precision, allocatable :: all_err(:,:), all_t(:,:) + integer, allocatable :: list_occ(:), list_vir(:) + integer(bit_kind) :: det(N_int,2) + integer :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4) + + PROVIDE mo_two_e_integrals_in_map + + det = psi_det(:,:,cc_ref) + print*,'Reference determinant:' + call print_det(det,N_int) + + ! Extract number of occ/vir alpha/beta spin orbitals + !call extract_n_spin(det,n_spin) + nOa = cc_nOa !n_spin(1) + nOb = cc_nOb !n_spin(2) + nVa = cc_nVa !n_spin(3) + nVb = cc_nVb !n_spin(4) + + ! Check that the reference is a closed shell determinant + if (cc_ref_is_open_shell) then + call abort + endif + + ! Number of occ/vir spatial orb + nO = nOa + nV = nVa + + allocate(list_occ(nO),list_vir(nV)) + list_occ = cc_list_occ + list_vir = cc_list_vir + ! Debug + !call extract_list_orb_space(det,nO,nV,list_occ,list_vir) + !print*,'occ',list_occ + !print*,'vir',list_vir + + allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) + allocate(tau(nO,nO,nV,nV)) + allocate(t1(nO,nV), r1(nO,nV)) + allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) + + if (cc_update_method == 'diis') then + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + all_err = 0d0 + all_t = 0d0 + endif + + if (elec_alpha_num /= elec_beta_num) then + print*, 'Only for closed shell systems' + print*, 'elec_alpha_num=',elec_alpha_num + print*, 'elec_beta_num =',elec_beta_num + print*, 'abort' + call abort + endif + + ! Init + call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1) + call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2) + call update_tau_space(nO,nV,t1,t2,tau) + !print*,'hf_energy', hf_energy + call det_energy(det,uncorr_energy) + print*,'Det energy', uncorr_energy + call ccsd_energy_space(nO,nV,tau,t1,energy) + print*,'Guess energy', uncorr_energy+energy, energy + + nb_iter = 0 + not_converged = .True. + max_r1 = 0d0 + max_r2 = 0d0 + + write(*,'(A77)') ' -----------------------------------------------------------------------------' + write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(ta) + + do while (not_converged) + + call compute_H_oo(nO,nV,t1,t2,tau,H_oo) + call compute_H_vv(nO,nV,t1,t2,tau,H_vv) + call compute_H_vo(nO,nV,t1,t2,H_vo) + + ! Residue + call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + max_r = max(max_r1,max_r2) + + ! Update + if (cc_update_method == 'diis') then + !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1,r2,t1,t2,all_err,all_t) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1) + call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2) + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + + call update_tau_space(nO,nV,t1,t2,tau) + + ! Energy + call ccsd_energy_space(nO,nV,tau,t1,energy) + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + + nb_iter = nb_iter + 1 + if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then + not_converged = .False. + endif + + enddo + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + print*,'' + if (max_r < cc_thresh_conv) then + write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations' + else + write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations' + endif + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' + write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + print*,'' + + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + + ! Deallocation + if (cc_update_method == 'diis') then + deallocate(all_err,all_t) + endif + + deallocate(H_vv,H_oo,H_vo,r1,r2,tau) + + ! CCSD(T) + double precision :: e_t + + if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then + + ! Dumb way + !call wall_time(ta) + !call ccsd_par_t_space(nO,nV,t1,t2,e_t) + !call wall_time(tb) + !print*,'Time: ',tb-ta, ' s' + + !print*,'' + !write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' + !write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' + !write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + !print*,'' + + ! New + print*,'Computing (T) correction...' + call wall_time(ta) + call ccsd_par_t_space_v2(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & + ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' + write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + print*,'' + endif + + print*,'Reference determinant:' + call print_det(det,N_int) + + deallocate(t1,t2) + +end + +! Energy + +subroutine ccsd_energy_space(nO,nV,tau,t1,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau(nO,nO,nV,nV) + double precision, intent(in) :: t1(nO,nV) + double precision, intent(out) :: energy + + ! internal + integer :: i,j,a,b + double precision :: e + + energy = 0d0 + !$omp parallel & + !$omp shared(nO,nV,energy,tau,t1,& + !$omp cc_space_f_vo,cc_space_w_oovv) & + !$omp private(i,j,a,b,e) & + !$omp default(none) + e = 0d0 + !$omp do + do i = 1, nO + do a = 1, nV + e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) + enddo + enddo + !$omp end do nowait + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + energy = energy + e + !$omp end critical + !$omp end parallel + +end + +! Tau + +subroutine update_tau_space(nO,nV,t1,t2,tau) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + ! out + double precision, intent(out) :: tau(nO,nO,nV,nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tau,t2,t1) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! R1 + +subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r1(nO,nV), max_r1 + + ! internal + integer :: u,i,j,beta,a,b + + !$omp parallel & + !$omp shared(nO,nV,r1,cc_space_f_ov) & + !$omp private(u,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + r1(u,beta) = cc_space_f_ov(u,beta) + enddo + enddo + !$omp end do + !$omp end parallel + + ! r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a) + ! cc_space_f_vo(a,i) * t1(i,beta) -> X1(nV,nV), O(nV*nV*nO) + ! X1(a,beta) * t1(u,a) -> O(nO*nV*nV) + ! cc_space_f_vo(a,i) * t1(u,a) -> X1(nO,nO), O(nO*nO*nV) + ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_oo(:,:) + allocate(X_oo(nO,nO)) + call dgemm('N','N', nO, nO, nV, & + -2d0, t1 , size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, X_oo , size(X_oo,1)) + + call dgemm('T','N', nO, nV, nO, & + 1d0, X_oo, size(X_oo,2), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + deallocate(X_oo) + + ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) + !do beta = 1, nV + ! do u = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) + ! enddo + ! enddo + !enddo + call dgemm('N','N', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + H_vv, size(H_vv,1), & + 1d0, r1 , size(r1,1)) + + ! r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta) + ! enddo + ! enddo + !enddo + call dgemm('N','N', nO, nV, nO, & + -1d0, H_oo, size(H_oo,1), & + t1 , size(t1,1), & + 1d0, r1, size(r1,1)) + + !r1(u,beta) = r1(u,beta) + H_vo(a,i) * (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta)) + ! <=> + ! r1(u,beta) = r1(u,beta) + H_vo(a,i) * X(a,i,u,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + H_vo(a,i) * & + ! (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta)) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_voov(:,:,:,:) + allocate(X_voov(nV, nO, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_voov,t2,t1) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,u,beta) = 2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nV*nO, nO*nV, & + 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & + H_vo , 1, & + 1d0, r1 , 1) + + deallocate(X_voov) + + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) + ! <=> + ! r1(u,beta) = r1(u,beta) + X(i,a,u,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_ovov(:,:,:,:) + allocate(X_ovov(nO, nV, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do a = 1, nv + do i = 1, nO + X_ovov(i,a,u,beta) = 2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nO*nV, nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + t1 , 1, & + 1d0, r1 , 1) + + deallocate(X_ovov) + + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! do b = 1, nV + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) + allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & + !$omp private(b,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do u = 1, nO + T_vvoo(a,b,i,u) = tau(i,u,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp end parallel + + call dgemm('T','N',nO,nV,nO*nV*nV, & + 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & + W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_vvov,T_vvoo) + + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do j = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: W_oovo(:,:,:,:) + allocate(W_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) & + !$omp private(u,a,i,j) & + !$omp default(none) + !$omp do collapse(3) + do u = 1, nO + do a = 1, nV + do j = 1, nO + do i = 1, nO + W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & + tau , size(tau,1) * size(tau,2) * size(tau,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_oovo) + + max_r1 = 0d0 + do a = 1, nV + do i = 1, nO + if (dabs(r1(i,a)) > max_r1) then + max_r1 = dabs(r1(i,a)) + endif + enddo + enddo + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r1) & + !$omp private(a,i) & + !$omp default(none) + !$omp do + do a = 1, nV + do i = 1, nO + r1(i,a) = -r1(i,a) + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! H_oo + +subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_oo(nO, nO) + + integer :: a,tmp_a,k,b,l,c,d,tmp_c,tmp_d,i,j,u + + !H_oo = 0d0 + + !do i = 1, nO + ! do u = 1, nO + ! H_oo(u,i) = cc_space_f_oo(u,i) + + ! do j = 1, nO + ! do a = 1, nV + ! do b = 1, nV + ! !H_oo(u,i) = H_oo(u,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * tau(u,j,a,b) + ! !H_oo(u,i) = H_oo(u,i) + cc_space_w_vvoo(a,b,i,j) * tau(u,j,a,b) + ! H_oo(u,i) = H_oo(u,i) + cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) + ! enddo + ! enddo + ! enddo + ! + ! enddo + !enddo + + ! H_oo(u,i) = cc_space_f_oo(u,i) + !$omp parallel & + !$omp shared(nO,H_oo,cc_space_f_oo) & + !$omp private(i,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + H_oo(u,i) = cc_space_f_oo(u,i) + enddo + enddo + !$omp end do + !$omp end parallel + + ! H_oo(u,i) += cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) + ! H_oo(u,i) += tau(u,j,a,b) * cc_space_w_oovv(i,j,a,b) + call dgemm('N','T', nO, nO, nO*nV*nV, & + 1d0, tau , size(tau,1), & + cc_space_w_oovv, size(cc_space_w_oovv,1), & + 1d0, H_oo , size(H_oo,1)) + +end + +! H_vv + +subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_vv(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + !H_vv = 0d0 + + !do beta = 1, nV + ! do a = 1, nV + ! H_vv(a,beta) = cc_space_f_vv(a,beta) + + ! do j = 1, nO + ! do i = 1, nO + ! do b = 1, nV + ! !H_vv(a,beta) = H_vv(a,beta) - (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(a,b,j,i)) * tau(i,j,beta,b) + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) + ! enddo + ! enddo + ! enddo + ! + ! enddo + !enddo + + double precision, allocatable :: tmp_tau(:,:,:,:) + + allocate(tmp_tau(nV,nO,nO,nV)) + + ! H_vv(a,beta) = cc_space_f_vv(a,beta) + !$omp parallel & + !$omp shared(nV,nO,H_vv,cc_space_f_vv,tmp_tau,tau) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + H_vv(a,beta) = cc_space_f_vv(a,beta) + enddo + enddo + !$omp end do nowait + + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta) + + !$omp do collapse(3) + do beta = 1, nV + do j = 1, nO + do i = 1, nO + do b = 1, nV + tmp_tau(b,i,j,beta) = tau(i,j,beta,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nV,nV,nO*nO*nV, & + -1d0, cc_space_w_vvoo, size(cc_space_w_vvoo,1), & + tmp_tau , size(tmp_tau,1) * size(tmp_tau,2) * size(tmp_tau,3), & + 1d0, H_vv , size(H_vv,1)) + + deallocate(tmp_tau) + +end + +! H_vo + +subroutine compute_H_vo(nO,nV,t1,t2,H_vo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: H_vo(nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + !H_vo = 0d0 + + !do i = 1, nO + ! do a = 1, nV + ! H_vo(a,i) = cc_space_f_vo(a,i) + + ! do j = 1, nO + ! do b = 1, nV + ! !H_vo(a,i) = H_vo(a,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t1(j,b) + ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) + ! enddo + ! enddo + ! + ! enddo + !enddo + + double precision, allocatable :: w(:,:,:,:) + + allocate(w(nV,nO,nO,nV)) + + !$omp parallel & + !$omp shared(nV,nO,H_vo,cc_space_f_vo,w,cc_space_w_vvoo,t1) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do i = 1, nO + do a = 1, nV + H_vo(a,i) = cc_space_f_vo(a,i) + enddo + enddo + !$omp end do nowait + + ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) + ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) + + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + w(a,i,j,b) = cc_space_w_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('N',nV*nO, nO*nV, & + 1d0, w , size(w,1) * size(w,2), & + t1 , 1, & + 1d0, H_vo, 1) + + deallocate(w) + +end + +! R2 + +subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2 + + ! internal + double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:) + double precision, allocatable :: A1(:,:,:,:), B1(:,:,:,:) + integer :: u,v,i,j,beta,gam,a,b + + allocate(g_occ(nO,nO), g_vir(nV,nV)) + allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV)) + allocate(A1(nO,nO,nO,nO)) + + call compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) + call compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) + call compute_A1(nO,nV,t1,t2,tau,A1) + call compute_J1(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & + cc_space_v_vvvo,cc_space_v_vvoo,J1) + call compute_K1(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & + cc_space_v_ovov,cc_space_v_vvov,K1) + + ! Residual + !r2 = 0d0 + + !$omp parallel & + !$omp shared(nO,nV,r2,cc_space_v_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = cc_space_v_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do j = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + A1(u,v,i,j) * tau(i,j,beta,gam) + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO*nO,nV*nV,nO*nO, & + 1d0, A1, size(A1,1) * size(A1,2), & + tau, size(tau,1) * size(tau,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do b = 1, nv + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + B1(a,b,beta,gam) * tau(u,v,a,b) + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + allocate(B1(nV,nV,nV,nV)) + call compute_B1(nO,nV,t1,t2,B1) + call dgemm('N','N',nO*nO,nV*nV,nV*nV, & + 1d0, tau, size(tau,1) * size(tau,2), & + B1 , size(B1,1) * size(B1,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + deallocate(B1) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + g_vir(a,beta) * t2(u,v,a,gam) & + ! + g_vir(a,gam) * t2(v,u,a,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_oovv(:,:,:,:),Y_oovv(:,:,:,:) + allocate(X_oovv(nO,nO,nV,nV),Y_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,t2,X_oovv) & + !$omp private(u,v,gam,a) & + !$omp default(none) + !$omp do collapse(3) + do a = 1, nV + do gam = 1, nV + do v = 1, nO + do u = 1, nO + X_oovv(u,v,gam,a) = t2(u,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nO*nV,nV,nV, & + 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + g_vir, size(g_vir,1), & + 0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(u,v,beta,gam) + Y_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - g_occ(u,i) * t2(i,v,beta,gam) & + ! - g_occ(v,i) * t2(i,u,gam,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO,nO*nV*nV,nO, & + 1d0, g_occ , size(g_occ,1), & + t2 , size(t2,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_oovv) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + cc_space_v_ovvv(u,a,beta,gam) * t1(v,a) & + ! + cc_space_v_ovvv(v,a,gam,beta) * t1(u,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vovv(:,:,:,:) + allocate(X_vovv(nV,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & + !$omp private(u,a,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do u = 1, nO + do a = 1, nV + X_vovv(a,u,beta,gam) = cc_space_v_ovvv(u,a,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1 , size(t1,1), & + X_vovv, size(X_vovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_ovov(u,a,i,gam) * t1(i,beta) * t1(v,a) & + ! - cc_space_v_ovov(v,a,i,beta) * t1(i,gam) * t1(u,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & + !$omp private(u,v,gam,i) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do u = 1, nO + do a = 1, nV + X_vovo(a,u,gam,i) = cc_space_v_ovov(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nV*nO*nV,nV,nO, & + 1d0, X_vovo, size(X_vovo,1) * size(X_vovo,2) * size(X_vovo,3), & + t1 , size(t1,1), & + 0d0, Y_vovv, size(Y_vovv,1) * size(Y_vovv,2) * size(Y_vovv,3)) + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1, size(t1,1), & + Y_vovv, size(Y_vovv,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(v,u,gam,beta) - X_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_vovv) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_oovo(u,v,beta,i) * t1(i,gam) & + ! - cc_space_v_oovo(v,u,gam,i) * t1(i,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO*nO*nV,nV,nO, & + 1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_ovvo(u,a,beta,i) * t1(v,a) * t1(i,gam) & + ! - cc_space_v_ovvo(v,a,gam,i) * t1(u,a) * t1(i,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: Y_oovo(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & + !$omp private(a,v,gam,i) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do v = 1, nO + do a = 1, nV + X_vovo(a,v,gam,i) = cc_space_v_ovvo(v,a,gam,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nO,nV, & + 1d0, t1, size(t1,1), & + X_vovo, size(X_vovo,1), & + 0d0, Y_oovo, size(Y_oovo,1)) + + call dgemm('N','N',nO*nO*nV, nV, nO, & + 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,gam,beta) - X_oovv(v,u,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_oovo) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) * & + ! (2d0 * t2(i,v,a,gam) - t2(i,v,gam,a)) & + ! + 0.5d0 * (2d0 * J1(v,a,gam,i) - K1(v,a,i,gam)) * & + ! (2d0 * t2(i,u,a,beta) - t2(i,u,beta,a)) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_voov(:,:,:,:), Z_ovov(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO), Y_voov(nV,nO,nO,nV),Z_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do a = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,a,i) = 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do gam = 1, nV + do v = 1, nO + do i = 1, nO + do a = 1, nV + Y_voov(a,i,v,gam) = 2d0 * t2(i,v,a,gam) - t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_voov, size(Y_voov,1) * size(Y_voov,2), & + 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo,Y_voov) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - 0.5d0 * K1(u,a,i,beta) * t2(i,v,gam,a) & + ! - 0.5d0 * K1(v,a,i,gam) * t2(i,u,beta,a) !P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_ovov(:,:,:,:),Y_ovov(:,:,:,:) + allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & + !$omp private(u,a,i,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do a = 1, nV + do i = 1, nO + X_ovov(i,a,u,beta) = 0.5d0 * K1(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do gam = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Y_ovov(i,a,v,gam) = t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('T','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - K1(u,a,i,gam) * t2(i,v,beta,a) & + ! - K1(v,a,i,beta) * t2(i,u,gam,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + !$omp parallel & + !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do a = 1, nV + do i = 1, nO + do gam = 1, nV + do u = 1, nO + X_ovov(u,gam,i,a) = K1(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do beta = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Z_ovov(i,a,v,beta) = t2(i,v,beta,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovov,Y_ovov,Z_ovov) + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r2) & + !$omp private(i,j,a,b) & + !$omp default(none) + !$omp do collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = -r2(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + max_r2 = 0d0 + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + if (dabs(r2(i,j,a,b)) > max_r2) then + max_r2 = dabs(r2(i,j,a,b)) + endif + enddo + enddo + enddo + enddo + + deallocate(g_occ,g_vir,J1,K1,A1) + +end + +! A1 + +subroutine compute_A1(nO,nV,t1,t2,tau,A1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: A1(nO, nO, nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta + + !A1 = 0d0 + + !do j = 1, nO + ! do i = 1, nO + ! do v = 1, nO + ! do u = 1, nO + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + + ! do a = 1, nV + ! A1(u,v,i,j) = A1(u,v,i,j) & + ! + cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + ! + cc_space_v_vooo(a,v,i,j) * t1(u,a) + ! + ! do b = 1, nV + ! A1(u,v,i,j) = A1(u,v,i,j) + cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:) + allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO)) + + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + !$omp parallel & + !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + enddo + enddo + enddo + enddo + !$omp end do nowait + + ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do u = 1, nO + do a = 1, nV + X_vooo(a,u,i,j) = cc_space_v_ovoo(u,a,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + X_vooo, size(X_vooo,1), & + 0d0, Y_oooo, size(Y_oooo,1)) + + !$omp parallel & + !$omp shared(nO,nV,A1,Y_oooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = A1(u,v,i,j) + Y_oooo(v,u,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vooo,Y_oooo) + + ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a) + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + cc_space_v_vooo, size(cc_space_v_vooo,1), & + 1d0, A1 , size(A1,1)) + + ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + call dgemm('N','N', nO*nO, nO*nO, nV*nV, & + 1d0, tau , size(tau,1) * size(tau,2), & + cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & + 1d0, A1 , size(A1,1) * size(A1,2)) + +end + +! B1 + +subroutine compute_B1(nO,nV,t1,t2,B1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: B1(nV, nV, nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !B1 = 0d0 + + !do gam = 1, nV + ! do beta = 1, nV + ! do b = 1, nV + ! do a = 1, nV + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + + ! do i = 1, nO + ! B1(a,b,beta,gam) = B1(a,b,beta,gam) & + ! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + ! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vvvo(:,:,:,:), Y_vvvv(:,:,:,:) + allocate(X_vvvo(nV,nV,nV,nO), Y_vvvv(nV,nV,nV,nV)) + + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + !$omp parallel & + !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) & + !$omp private(a,b,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do b = 1, nV + do a = 1, nV + X_vvvo(a,b,gam,i) = cc_space_v_vvov(a,b,i,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + call dgemm('N','N', nV*nV*nV, nV, nO, & + -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & + t1 , size(t1,1), & + 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) + + + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + call dgemm('N','N', nV*nV*nV, nV, nO, & + -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2) * size(X_vvvo,3), & + t1 , size(t1,1), & + 0d0, Y_vvvv, size(Y_vvvv,1) * size(Y_vvvv,2) * size(Y_vvvv,3)) + + !$omp parallel & + !$omp shared(nV,B1,Y_vvvv) & + !$omp private(a,b,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta,gam) = B1(a,b,beta,gam) + Y_vvvv(a,b,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vvvo,Y_vvvv) + +end + +! g_occ + +subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_oo(nO, nO) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_occ(nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !g_occ = 0d0 + + !do i = 1, nO + ! do u = 1, nO + ! g_occ(u,i) = H_oo(u,i) + ! + ! do a = 1, nV + ! g_occ(u,i) = g_occ(u,i) + cc_space_f_vo(a,i) * t1(u,a) + ! + ! do j = 1, nO + ! g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) + ! enddo + ! + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO,nO,nV, & + 1d0, t1, size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, g_occ, size(g_occ,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_occ,H_oo, cc_space_v_ovoo,t1) & + !$omp private(i,j,a,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + H_oo(u,i) + enddo + enddo + !$omp end do + + !$omp do collapse(1) + do i = 1, nO + do j = 1, nO + do a = 1, nV + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! g_vir + +subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_vv(nV, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_vir(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !g_vir = 0d0 + + !do beta = 1, nV + ! do a = 1, nV + ! g_vir(a,beta) = H_vv(a,beta) + ! + ! do i = 1, nO + ! g_vir(a,beta) = g_vir(a,beta) - cc_space_f_vo(a,i) * t1(i,beta) + ! + ! do b = 1, nV + ! g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) + ! enddo + ! + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nV,nV,nO, & + -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & + t1 , size(t1,1), & + 0d0, g_vir, size(g_vir,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) & + !$omp private(i,b,a,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta) + enddo + enddo + !$omp end do + + !$omp do collapse(1) + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! J1 + +subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: J1(nO, nV, nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !J1 = 0d0 + + !do i = 1, nO + ! do beta = 1, nV + ! do a = 1, nV + ! do u = 1, nO + ! J1(u,a,beta,i) = cc_space_v_ovvo(u,a,beta,i) + + ! do j = 1, nO + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! - cc_space_v_ovoo(u,a,j,i) * t1(j,beta) + ! enddo + + ! do b = 1, nV + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) + ! enddo + + ! do j = 1, nO + ! do b = 1, nV + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! - cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + ! + 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) + allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) + + !$omp parallel & + !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & + !$omp private(i,j,a,u,beta) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = v_ovvo(u,a,beta,i) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do a = 1, nV + do u = 1, nO + X_ovoo(u,a,i,j) = v_ovoo(u,a,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & + t1 , size(t1,1), & + 0d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2) * size(Y_ovov,3)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Y_ovov) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Y_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(X_ovoo) + + ! v_vvvo(b,a,beta,i) * t1(u,b) + call dgemm('N','N',nO,nV*nV*nO,nV, & + 1d0, t1 , size(t1,1), & + v_vvvo, size(v_vvvo,1), & + 1d0, J1 , size(J1,1)) + + !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:) + allocate(X_voov(nV,nO,nO,nV), Z_ovvo(nO,nV,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & + !$omp private(i,beta,a,u,b,j) & + !$omp default(none) + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do beta = 1, nV + do u = 1, nO + Y_ovov(u,beta,j,b) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,j,b) = v_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nO*nV, & + -1d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + X_voov, size(X_voov,1) * size(X_voov,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + deallocate(X_voov) + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_vovo(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO),Y_vovo(nV,nO,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do i = 1, nO + do a = 1, nV + Y_vovo(a,i,b,j) = 0.5d0 * (2d0 * v_vvoo(a,b,i,j) - v_vvoo(b,a,i,j)) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,b,j) = t2(u,j,beta,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo,Z_ovvo,Y_ovov) + +end + +! K1 + +subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV) + double precision, intent(in) :: v_vvov(nV,nV,nO,nV), v_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: K1(nO, nV, nO, nV) + + double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !K1 = 0d0 + + !do beta = 1, nV + ! do i = 1, nO + ! do a = 1, nV + ! do u = 1, nO + ! K1(u,a,i,beta) = cc_space_v_ovov(u,a,i,beta) + + ! do j = 1, nO + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! - cc_space_v_ovoo(u,a,i,j) * t1(j,beta) + ! enddo + + ! do b = 1, nV + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) + ! enddo + + ! do j = 1, nO + ! do b = 1, nV + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! - cc_space_v_vvoo(b,a,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do i = 1, nO + do a = 1, nV + do j = 1, nO + do b = 1, nV + X(b,j,a,i) = - v_vvoo(b,a,i,j) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do beta = 1, nV + do u = 1, nO + Y(u,beta,b,j) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, v_ovoo, size(v_ovoo,1) * size(v_ovoo,2) * size(v_ovoo,3), & + t1 , size(t1,1), & + 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) + + call dgemm('N','N',nO,nV*nO*nV,nV, & + 1d0, t1 , size(t1,1), & + v_vvov, size(v_vvov,1), & + 1d0, K1 , size(K1,1)) + + ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i) + call dgemm('N','N',nV*nO,nO*nV,nV*nO, & + 1d0, Y, size(Y,1) * size(Y,2), & + X, size(X,1) * size(X,2), & + 0d0, Z, size(Z,1) * size(Z,2)) + + !$omp parallel & + !$omp shared(nO,nV,K1,Z) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = K1(u,a,i,beta) + Z(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X,Y,Z) + +end diff --git a/src/ccsd/ccsd_spin_orb.irp.f b/src/ccsd/ccsd_spin_orb.irp.f new file mode 100644 index 00000000..6f2de11c --- /dev/null +++ b/src/ccsd/ccsd_spin_orb.irp.f @@ -0,0 +1,16 @@ +! Prog + +program ccsd + + implicit none + + BEGIN_DOC + ! CCSD in spin orbitals + END_DOC + + read_wf = .True. + touch read_wf + + call run_ccsd_spin_orb + +end diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f new file mode 100644 index 00000000..23e2cef1 --- /dev/null +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -0,0 +1,2301 @@ +! Code + +subroutine run_ccsd_spin_orb + + implicit none + + BEGIN_DOC + ! CCSD in spin orbitals + END_DOC + + double precision, allocatable :: t1(:,:), t2(:,:,:,:), tau(:,:,:,:), tau_t(:,:,:,:) + double precision, allocatable :: r1(:,:), r2(:,:,:,:) + double precision, allocatable :: cF_oo(:,:), cF_ov(:,:), cF_vv(:,:) + double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:), cW_vvvv(:,:,:,:) + + double precision, allocatable :: f_oo(:,:), f_ov(:,:), f_vv(:,:), f_o(:), f_v(:) + double precision, allocatable :: v_oooo(:,:,:,:), v_vooo(:,:,:,:), v_ovoo(:,:,:,:) + double precision, allocatable :: v_oovo(:,:,:,:), v_ooov(:,:,:,:), v_vvoo(:,:,:,:) + double precision, allocatable :: v_vovo(:,:,:,:), v_voov(:,:,:,:), v_ovvo(:,:,:,:) + double precision, allocatable :: v_ovov(:,:,:,:), v_oovv(:,:,:,:), v_vvvo(:,:,:,:) + double precision, allocatable :: v_vvov(:,:,:,:), v_vovv(:,:,:,:), v_ovvv(:,:,:,:) + double precision, allocatable :: v_vvvv(:,:,:,:) + + double precision, allocatable :: all_err(:,:), all_t(:,:) + + logical :: not_converged + integer, allocatable :: list_occ(:,:), list_vir(:,:) + integer :: nO,nV,nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4) + integer :: nb_iter, i,j,a,b + double precision :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi + integer(bit_kind) :: det(N_int,2) + + det = psi_det(:,:,cc_ref) + print*,'Reference determinant:' + call print_det(det,N_int) + + ! Extract number of occ/vir alpha/beta spin orbitals + !call extract_n_spin(det,n_spin) + nOa = cc_nOa !n_spin(1) + nOb = cc_nOb !n_spin(2) + nVa = cc_nVa !n_spin(3) + nVb = cc_nVb !n_spin(4) + + ! Total number of occ/vir spin orb + nO = cc_nOab !nOa + nOb + nV = cc_nVab !nVa + nVb + ! Debug + !print*,nO,nV + + ! Number of occ/vir spin orb per spin + nO_S = cc_nO_S !(/nOa,nOb/) + nV_S = cc_nV_S !(/nVa,nVb/) + ! Debug + !print*,nO_S,nV_S + + ! Maximal number of occ/vir + nO_m = cc_nO_m !max(nOa, nOb) + nV_m = cc_nV_m !max(nVa, nVb) + ! Debug + !print*,nO_m,nV_m + + allocate(list_occ(nO_m,2), list_vir(nV_m,2)) + list_occ = cc_list_occ_spin + list_vir = cc_list_vir_spin + ! Debug + !call extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir) + !print*,list_occ(:,1) + !print*,list_occ(:,2) + !print*,list_vir(:,1) + !print*,list_vir(:,2) + + ! Allocation + allocate(t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV), tau_t(nO,nO,nV,nV)) + allocate(r1(nO,nV), r2(nO,nO,nV,nV)) + allocate(cF_oo(nO,nO), cF_ov(nO,nV), cF_vv(nV,nV)) + allocate(cW_oooo(nO,nO,nO,nO), cW_ovvo(nO,nV,nV,nO))!, cW_vvvv(nV,nV,nV,nV)) + allocate(v_oooo(nO,nO,nO,nO)) + !allocate(v_vooo(nV,nO,nO,nO)) + allocate(v_ovoo(nO,nV,nO,nO)) + allocate(v_oovo(nO,nO,nV,nO)) + allocate(v_ooov(nO,nO,nO,nV)) + allocate(v_vvoo(nV,nV,nO,nO)) + !allocate(v_vovo(nV,nO,nV,nO)) + !allocate(v_voov(nV,nO,nO,nV)) + allocate(v_ovvo(nO,nV,nV,nO)) + allocate(v_ovov(nO,nV,nO,nV)) + allocate(v_oovv(nO,nO,nV,nV)) + !allocate(v_vvvo(nV,nV,nV,nO)) + !allocate(v_vvov(nV,nV,nO,nV)) + !allocate(v_vovv(nV,nO,nV,nV)) + !allocate(v_ovvv(nO,nV,nV,nV)) + !allocate(v_vvvv(nV,nV,nV,nV)) + allocate(f_o(nO), f_v(nV)) + allocate(f_oo(nO, nO)) + allocate(f_ov(nO, nV)) + allocate(f_vv(nV, nV)) + + ! Allocation for the diis + if (cc_update_method == 'diis') then + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + all_err = 0d0 + all_t = 0d0 + endif + + ! Fock elements + call gen_f_spin(det, nO_m,nO_m, nO_S,nO_S, list_occ,list_occ, nO,nO, f_oo) + call gen_f_spin(det, nO_m,nV_m, nO_S,nV_S, list_occ,list_vir, nO,nV, f_ov) + call gen_f_spin(det, nV_m,nV_m, nV_S,nV_S, list_vir,list_vir, nV,nV, f_vv) + + ! Diag elements + do i = 1, nO + f_o(i) = f_oo(i,i) + enddo + do i = 1, nV + f_v(i) = f_vv(i,i) + enddo + + ! Bi electronic integrals from list + call wall_time(ti) + ! OOOO + call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, list_occ,list_occ,list_occ,list_occ, nO,nO,nO,nO, v_oooo) + + ! OOO V + !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, list_vir,list_occ,list_occ,list_occ, nV,nO,nO,nO, v_vooo) + call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, list_occ,list_vir,list_occ,list_occ, nO,nV,nO,nO, v_ovoo) + call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, list_occ,list_occ,list_vir,list_occ, nO,nO,nV,nO, v_oovo) + call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, list_occ,list_occ,list_occ,list_vir, nO,nO,nO,nV, v_ooov) + + ! OO VV + call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, list_vir,list_vir,list_occ,list_occ, nV,nV,nO,nO, v_vvoo) + !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, list_vir,list_occ,list_vir,list_occ, nV,nO,nV,nO, v_vovo) + !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, list_vir,list_occ,list_occ,list_vir, nV,nO,nO,nV, v_voov) + call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, list_occ,list_vir,list_vir,list_occ, nO,nV,nV,nO, v_ovvo) + call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, list_occ,list_vir,list_occ,list_vir, nO,nV,nO,nV, v_ovov) + call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, list_occ,list_occ,list_vir,list_vir, nO,nO,nV,nV, v_oovv) + + ! O VVV + !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, list_vir,list_vir,list_vir,list_occ, nV,nV,nV,nO, v_vvvo) + !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, list_vir,list_vir,list_occ,list_vir, nV,nV,nO,nV, v_vvov) + !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, list_vir,list_occ,list_vir,list_vir, nV,nO,nV,nV, v_vovv) + !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, list_occ,list_vir,list_vir,list_vir, nO,nV,nV,nV, v_ovvv) + + ! VVVV + !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, list_vir,list_vir,list_vir,list_vir, nV,nV,nV,nV, v_vvvv) + call wall_time(tf) + if (cc_dev) then + print*,'Load bi elec int:',tf-ti,'s' + endif + + ! Init of T + t1 = 0d0 + call guess_t1(nO,nV,f_o,f_v,f_ov,t1) + call guess_t2(nO,nV,f_o,f_v,v_oovv,t2) + call compute_tau_spin(nO,nV,t1,t2,tau) + call compute_tau_t_spin(nO,nV,t1,t2,tau_t) + + ! Loop init + nb_iter = 0 + not_converged = .True. + r1 = 0d0 + r2 = 0d0 + max_r1 = 0d0 + max_r2 = 0d0 + + call det_energy(det,uncorr_energy) + print*,'Det energy', uncorr_energy + call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + print*,'guess energy', uncorr_energy+energy, energy + + write(*,'(A77)') ' -----------------------------------------------------------------------------' + write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' + write(*,'(A77)') ' -----------------------------------------------------------------------------' + + call wall_time(ta) + + ! Loop + do while (not_converged) + + ! Intermediates + call wall_time(tbi) + call wall_time(ti) + call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) + call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) + call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,cF_vv) + call wall_time(tf) + if (cc_dev) then + print*,'Compute cFs:',tf-ti,'s' + endif + + call wall_time(ti) + call compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) + call compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) + !call compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) + call wall_time(tf) + if (cc_dev) then + print*,'Compute cFs:',tf-ti,'s' + endif + + ! Residuals + call wall_time(ti) + call compute_r1_spin(nO,nV,t1,t2,f_o,f_v,F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) + call wall_time(tf) + if (cc_dev) then + print*,'Compute r1:',tf-ti,'s' + endif + call wall_time(ti) + call compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) + call wall_time(tf) + if (cc_dev) then + print*,'Compute r2:',tf-ti,'s' + endif + + ! Max elements in the residuals + max_r1 = maxval(abs(r1(:,:))) + max_r2 = maxval(abs(r2(:,:,:,:))) + max_r = max(max_r1,max_r2) + + call wall_time(ti) + ! Update + if (cc_update_method == 'diis') then + !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + call update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + call update_t1(nO,nV,f_o,f_v,r1,t1) + call update_t2(nO,nV,f_o,f_v,r2,t2) + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + + call compute_tau_spin(nO,nV,t1,t2,tau) + call compute_tau_t_spin(nO,nV,t1,t2,tau_t) + call wall_time(tf) + if (cc_dev) then + print*,'Update:',tf-ti,'s' + endif + + ! Print + call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + call wall_time(tfi) + + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', & + uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + if (cc_dev) then + print*,'Total:',tfi-tbi,'s' + endif + + ! Convergence + nb_iter = nb_iter + 1 + if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then + not_converged = .False. + endif + + enddo + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + print*,'' + if (max_r < cc_thresh_conv) then + write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations' + else + write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations' + endif + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' + write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + print*,'' + + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + + ! Deallocate + if (cc_update_method == 'diis') then + deallocate(all_err,all_t) + endif + deallocate(tau,tau_t) + deallocate(r1,r2) + deallocate(cF_oo,cF_ov,cF_vv) + deallocate(cW_oooo,cW_ovvo)!,cW_vvvv) + deallocate(v_oooo) + deallocate(v_ovoo,v_oovo) + deallocate(v_ovvo,v_ovov,v_oovv) + + if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then + double precision :: t_corr + print*,'CCSD(T) calculation...' + call wall_time(ta) + !allocate(v_vvvo(nV,nV,nV,nO)) + !call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & + ! cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + ! cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + ! nV,nV,nV,nO, v_vvvo) + + !call ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,t_corr) + call ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,t_corr) + !print*,'Working on it...' + !call abort + call wall_time(tb) + print*,'Done' + print*,'Time: ',tb-ta, ' s' + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + t_corr, ' Ha' + write(*,'(A15,F18.12,A3)') ' E(T) = ', t_corr, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + t_corr, ' Ha' + print*,'' + endif + print*,'Reference determinant:' + call print_det(det,N_int) + + deallocate(f_oo,f_ov,f_vv,f_o,f_v) + deallocate(v_ooov,v_vvoo,t1,t2) + !deallocate(v_ovvv,v_vvvo,v_vovv) + !deallocate(v_vvvv) + +end + +! Energy + +subroutine ccsd_energy_spin(nO,nV,t1,t2,Fov,v_oovv,energy) + + implicit none + + BEGIN_DOC + ! CCSD energy in spin orbitals + END_DOC + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: energy + + integer :: i,j,a,b + + + energy = 0d0 + + do i=1,nO + do a=1,nV + energy = energy + Fov(i,a) * t1(i,a) + end do + end do + + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + energy = energy & + + 0.5d0 * v_oovv(i,j,a,b) * t1(i,a) * t1(j,b) & + + 0.25d0 * v_oovv(i,j,a,b) * t2(i,j,a,b) + end do + end do + end do + end do + +end + +! Tau + +subroutine compute_tau_spin(nO,nV,t1,t2,tau) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + + double precision,intent(out) :: tau(nO,nO,nV,nV) + + integer :: i,j,k,l + integer :: a,b,c,d + + !$OMP PARALLEL & + !$OMP SHARED(tau,t1,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! Tau_t + +subroutine compute_tau_t_spin(nO,nV,t1,t2,tau_t) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + + double precision,intent(out) :: tau_t(nO,nO,nV,nV) + + integer :: i,j,k,l + integer :: a,b,c,d + + !$OMP PARALLEL & + !$OMP SHARED(tau_t,t1,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + tau_t(i,j,a,b) = t2(i,j,a,b) + 0.5d0*(t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a)) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! R1 + +subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: f_o(nO), f_v(nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: cF_oo(nO,nO) + double precision,intent(in) :: cF_ov(nO,nV) + double precision,intent(in) :: cF_vv(nV,nV) + double precision,intent(in) :: v_oovo(nO,nO,nV,nO) + double precision,intent(in) :: v_ovov(nO,nV,nO,nV) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: r1(nO,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + !double precision, allocatable :: X_vovv(:,:,:,:) + double precision, allocatable :: X_oovv(:,:,:,:) + double precision :: accu + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,Fov,cF_vv,cF_ov, & + !$OMP v_ovov,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(1) + do a=1,nV + do i=1,nO + r1(i,a) = Fov(i,a) + do e=1,nV + do m=1,nO + r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e) + end do + end do + do f=1,nV + do n=1,nO + r1(i,a) = r1(i,a) - t1(n,f)*v_ovov(n,a,i,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !do a=1,nV + ! do i=1,nO + ! do e=1,nV + ! r1(i,a) = r1(i,a) + t1(i,e)*cF_vv(a,e) + ! end do + ! end do + !end do + call dgemm('N','T', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + cF_vv, size(cF_vv,1), & + 1d0, r1 , size(r1,1)) + + !do a=1,nV + ! do i=1,nO + ! do m=1,nO + ! r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i) + ! end do + ! end do + !end do + call dgemm('T','N', nO, nV, nO, & + -1d0, cF_oo, size(cF_oo,1), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + + !do a=1,nV + ! do i=1,nO + ! do f=1,nV + ! do e=1,nV + ! do m=1,nO + ! r1(i,a) = r1(i,a) - 0.5d0*t2(i,m,e,f)*v_ovvv(m,a,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + + !allocate(X_vovv(nV,nO,nV,nV)) + double precision, allocatable :: v_ovvf(:,:,:), X_vovf(:,:,:) + allocate(v_ovvf(nO,nV,nV),X_vovf(nV,nO,nV)) + + do f = 1, nV + call gen_v_spin_3idx(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovvf) + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,X_vovf,v_ovvf,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + + !$OMP DO collapse(3) + !do f = 1, nV + do e = 1, nV + do m = 1, nO + do a = 1, nV + !X_vovv(a,m,e,f) = v_ovvv(m,a,e,f) + X_vovf(a,m,e) = v_ovvf(m,a,e) + enddo + enddo + enddo + !enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','T', nO, nV, nO*nV, & + -0.5d0, t2(1,1,1,f), size(t2,1), & + X_vovf, size(X_vovf,1), & + 1d0 , r1 , size(r1,1)) + enddo + + !call dgemm('N','T', nO, nV, nO*nV*nV, & + ! -0.5d0, t2 , size(t2,1), & + ! X_vovv, size(X_vovv,1), & + ! 1d0 , r1 , size(r1,1)) + + deallocate(X_vovf) + !deallocate(X_vovv) + allocate(X_oovv(nO,nO,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,X_oovv, & + !$OMP f_o,f_v,v_oovo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + !do a=1,nV + ! do i=1,nO + ! do e=1,nV + ! do m=1,nO + ! do n=1,nO + ! r1(i,a) = r1(i,a) - 0.5d0*t2(m,n,a,e)*v_oovo(n,m,e,i) + ! end do + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(3) + do a = 1, nV + do e = 1, nV + do m = 1, nO + do n = 1, nO + X_oovv(n,m,e,a) = t2(m,n,a,e) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -0.5d0, v_oovo, size(v_oovo,1) * size(v_oovo,2) * size(v_oovo,3), & + X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + 1d0 , r1 , size(r1,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,X_oovv,f_o,f_v,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do a = 1, nV + do i = 1, nO + r1(i,a) = (f_o(i)-f_v(a)) * t1(i,a) - r1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_oovv) + +end + +! R2 + +subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: cF_oo(nO,nO) + double precision,intent(in) :: cF_ov(nO,nV) + double precision,intent(in) :: cF_vv(nV,nV) + double precision,intent(in) :: f_o(nO), f_v(nV) + double precision,intent(in) :: cW_oooo(nO,nO,nO,nO) + !double precision,intent(in) :: cW_vvvv(nV,nV,nV,nV) + double precision,intent(in) :: cW_ovvo(nO,nV,nV,nO) + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_ovoo(nO,nV,nO,nO) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_ovvo(nO,nV,nV,nO) + !double precision,intent(in) :: v_vvvo(nV,nV,nV,nO)!, v_vovv(nV,nO,nV,nV) + + double precision,intent(out) :: r2(nO,nO,nV,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: X_vvoo(:,:,:,:) + !double precision, allocatable :: A_vvov(:,:,:,:) + double precision, allocatable :: X_oovv(:,:,:,:), Y_oovv(:,:,:,:) + double precision, allocatable :: A_vvoo(:,:,:,:), B_ovoo(:,:,:,:), C_ovov(:,:,:,:) + double precision, allocatable :: A_ovov(:,:,:,:), B_ovvo(:,:,:,:), X_ovvo(:,:,:,:) + double precision, allocatable :: A_vv(:,:) + double precision, allocatable :: A_oo(:,:), B_oovv(:,:,:,:) + double precision, allocatable :: A_vbov(:,:,:), X_vboo(:,:,:), v_vbvo(:,:,:) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + ! r2(i,j,a,b) = v_oovv(i,j,a,b) + ! end do + ! end do + ! end do + !end do + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(i,j,a,e)*cF_vv(b,e) + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,j,b,e)*cF_vv(a,e) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(X_oovv(nO,nO,nV,nV)) + call dgemm('N','T',nO*nO*nV, nV, nV, & + 1d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + cF_VV , size(cF_vv,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = v_oovv(i,j,a,b) + X_oovv(i,j,a,b) - X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !deallocate(X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_vv(nV,nV))!, X_oovv(nO,nO,nV,nV)) + call dgemm('T','N', nV, nV, nO, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_vv , size(A_vv,1)) + + call dgemm('N','T', nO*nO*nV, nV, nV, & + 0.5d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + A_vv , size(A_vv,1), & + 0d0 , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vv)!,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j) + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(Y_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + !$OMP PARALLEL & + !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do i=1,nO + do m=1,nO + X_oovv(m,i,a,b) = t2(i,m,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nO*nV*nV, nO, & + 1d0, cF_oo , size(cF_oo,1), & + X_oovv, size(X_oovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(Y_oovv)!,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + + call dgemm('N','T', nO, nO, nV, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_oo , size(A_oo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(t2,B_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do i = 1, nO + do m = 1, nO + B_oovv(m,i,a,b) = t2(i,m,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nO*nV*nV, nO, & + 0.5d0, A_oo, size(A_oo,1), & + B_oovv, size(B_oovv,1), & + 0d0 , X_oovv, size(X_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(A_oo,B_oovv,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do n=1,nO + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(m,n,a,b)*cW_oooo(m,n,i,j) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + call dgemm('T','N', nO*nO, nV*nV, nO*nO, & + 0.5d0, cW_oooo, size(cW_oooo,1) * size(cW_oooo,2), & + tau , size(tau,1) * size(tau,2), & + 1d0 , r2 , size(r2,1) * size(r2,2)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do f=1,nV + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + !call dgemm('N','T', nO*nO, nV*nV, nV*nV, & + ! 0.5d0, tau , size(tau,1) * size(tau,2), & + ! cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2), & + ! 1d0 , r2 , size(r2,1) * size(r2,2)) + double precision :: ti,tf + call wall_time(ti) + call use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) + call wall_time(tf) + if (cc_dev) then + print*,'cW_vvvv:',tf-ti,'s' + endif + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) & + ! + t2(i,m,a,e)*cW_ovvo(m,b,e,j) & + ! - t2(j,m,a,e)*cW_ovvo(m,b,e,i) & + ! - t2(i,m,b,e)*cW_ovvo(m,a,e,j) & + ! + t2(j,m,b,e)*cW_ovvo(m,a,e,i) & + ! - t1(i,e)*t1(m,a)*v_ovvo(m,b,e,j) & + ! + t1(j,e)*t1(m,a)*v_ovvo(m,b,e,i) & + ! + t1(i,e)*t1(m,b)*v_ovvo(m,a,e,j) & + ! - t1(j,e)*t1(m,b)*v_ovvo(m,a,e,i) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_ovov(nO,nV,nO,nV), B_ovvo(nO,nV,nV,nO), X_ovvo(nO,nV,nV,nO)) + !$OMP PARALLEL & + !$OMP SHARED(t2,A_ovov,B_ovvo,cW_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do a = 1, nV + do i = 1, nO + do e = 1, nV + do m = 1, nO + A_ovov(m,e,i,a) = t2(i,m,a,e) + end do + end do + end do + end do + !$OMP END DO NOWAIT + !$OMP DO collapse(3) + do j = 1, nO + do b = 1, nV + do e = 1, nV + do m = 1, nO + B_ovvo(m,e,b,j) = cW_ovvo(m,b,e,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO*nV, nV*nO, nO*nV, & + 1d0, A_ovov, size(A_ovov,1) * size(A_ovov,2), & + B_ovvo, size(B_ovvo,1) * size(B_ovvo,2), & + 0d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = r2(i,j,a,b) + X_ovvo(i,a,b,j) - X_ovvo(j,a,b,i) & + - X_ovvo(i,b,a,j) + X_ovvo(j,b,a,i) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_ovov,B_ovvo,X_ovvo) + allocate(A_vvoo(nV,nV,nO,nO), B_ovoo(nO,nV,nO,nO), C_ovov(nO,nV,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(A_vvoo,v_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do m = 1, nO + do j = 1, nO + do b = 1, nV + do e = 1, nV + A_vvoo(e,b,j,m) = v_ovvo(m,b,e,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nV*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + A_vvoo, size(A_vvoo,1), & + 0d0, B_ovoo, size(B_ovoo,1)) + + call dgemm('N','N', nO*nV*nO, nV, nO, & + 1d0, B_ovoo, size(B_ovoo,1) * size(B_ovoo,2) * size(B_ovoo,3), & + t1 , size(t1,1), & + 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2) * size(C_ovov,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,C_ovov,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - C_ovov(i,b,j,a) + C_ovov(j,b,i,a) & + + C_ovov(i,a,j,b) - C_ovov(j,a,i,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vvoo, B_ovoo, C_ovov) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + t1(i,e)*v_vvvo(a,b,e,j) - t1(j,e)*v_vvvo(a,b,e,i) + ! end do + + ! end do + ! end do + ! end do + !end do + !allocate(A_vvov(nV,nV,nO,nV), X_vvoo(nV,nV,nO,nO)) + allocate(A_vbov(nV,nO,nV), X_vboo(nV,nO,nO), v_vbvo(nV,nV,nO)) + do b = 1, nV + + call gen_v_spin_3idx_i_kl(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, b, cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + nV,nV,nO, v_vbvo) + + !$OMP PARALLEL & + !$OMP SHARED(b,A_vbov,v_vbvo,nO,nV) & + !$OMP PRIVATE(i,j,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do e = 1, nV + do j = 1, nO + !do b = 1, nV + do a = 1, nV + !A_vvov(a,b,j,e) = v_vvvo(a,b,e,j) + A_vbov(a,j,e) = v_vbvo(a,e,j) + enddo + !enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','T', nV*nO, nO, nV, & + 1d0, A_vbov, size(A_vbov,1) * size(A_vbov,2), & + t1 , size(t1,1), & + 0d0, X_vboo, size(X_vboo,1) * size(X_vboo,2)) + !call dgemm('N','T', nV*nV*nO, nO, nV, & + ! 1d0, A_vvov, size(A_vvov,1) * size(A_vvov,2) * size(A_vvov,3), & + ! t1 , size(t1,1), & + ! 0d0, X_vvoo, size(X_vvoo,1) * size(X_vvoo,2) * size(X_vvoo,3)) + + !$OMP PARALLEL & + !$OMP SHARED(b,r2,X_vboo,nO,nV) & + !$OMP PRIVATE(i,j,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + !do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, NO + !r2(i,j,a,b ) = r2(i,j,a,b) + X_vvoo(a,b,j,i) - X_vvoo(a,b,i,j) + r2(i,j,a,b) = r2(i,j,a,b) + X_vboo(a,j,i) - X_vboo(a,i,j) + enddo + enddo + enddo + !enddo + !$OMP END DO + !$OMP END PARALLEL + enddo + + !deallocate(A_vvov)!,X_vvoo) + deallocate(A_vbov, X_vboo, v_vbvo) + allocate(X_vvoo(nV,nV,nO,nO)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t1(m,a)*v_ovoo(m,b,i,j) + t1(m,b)*v_ovoo(m,a,i,j) + ! end do + + ! end do + ! end do + ! end do + !end do + !allocate(X_vvoo(nV,nV,nO,nO)) + + call dgemm('T','N', nV, nV*nO*nO, nO, & + 1d0, t1 , size(t1,1), & + v_ovoo, size(v_ovoo,1), & + 0d0, X_vvoo, size(X_vvoo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_vvoo,f_o,f_v,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_vvoo(a,b,i,j) + X_vvoo(b,a,i,j) + end do + end do + end do + end do + !$OMP END DO + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = (f_o(i)+f_o(j)-f_v(a)-f_v(b)) * t2(i,j,a,b) - r2(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_vvoo) + +end + +! Use cF_oo + +subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2) + + implicit none + + integer,intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau_t(nO,nO,nV,nV) + double precision, intent(in) :: F_oo(nO,nV), F_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV), v_oovv(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_oo(:,:), X_oovv(:,:,:,:),Y_oovv(:,:,:,:) + integer :: i,j,m,a,b + + allocate(cF_oo(nO,nO)) + + call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) + + !do a=1,nV + ! do i=1,nO + ! do m=1,nO + ! r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i) + ! end do + ! end do + !end do + call dgemm('T','N', nO, nV, nO, & + -1d0, cF_oo, size(cF_oo,1), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j) + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i) + ! end do + + ! end do + ! end do + ! end do + !end do + + allocate(Y_oovv(nO,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + !$OMP PARALLEL & + !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do i=1,nO + do m=1,nO + X_oovv(m,i,a,b) = t2(i,m,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nO*nV*nV, nO, & + 1d0, cF_oo , size(cF_oo,1), & + X_oovv, size(X_oovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(cF_oo,X_oovv,Y_oovv) + +end + +! Use cF_ov + +subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: F_ov(nO,nV), v_oovv(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_ov(:,:), A_oo(:,:), A_vv(:,:) + double precision, allocatable :: X_oovv(:,:,:,:), B_oovv(:,:,:,:) + integer :: i,j,a,b,e,m + + allocate(cF_ov(nO,nV)) + + call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t2,cF_ov,nO,nV) & + !$OMP PRIVATE(i,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(1) + do a=1,nV + do i=1,nO + do e=1,nV + do m=1,nO + r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_vv(nV,nV), X_oovv(nO,nO,nV,nV)) + call dgemm('T','N', nV, nV, nO, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_vv , size(A_vv,1)) + + call dgemm('N','T', nO*nO*nV, nV, nV, & + 0.5d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + A_vv , size(A_vv,1), & + 0d0 , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,r2,X_oovv) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + + call dgemm('N','T', nO, nO, nV, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_oo , size(A_oo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(t2,B_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do i = 1, nO + do m = 1, nO + B_oovv(m,i,a,b) = t2(i,m,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nO*nV*nV, nO, & + 0.5d0, A_oo, size(A_oo,1), & + B_oovv, size(B_oovv,1), & + 0d0 , X_oovv, size(X_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(cF_ov,A_oo,B_oovv,X_oovv) + +end + +! Use cF_vv + +subroutine use_cF_vv(nO,nV,t1,t2,r1,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_vv(:,:) + integer :: i,j,a,b,e,m + + allocate(cF_vv(nV,nV)) + + !call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,v_ovvv,cF_vv) + + deallocate(cF_vv) + +end + +! Use cW_vvvd + +subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: v_oovv(nO,nO,nV,nV) + !double precision, intent(in) :: v_vovv(nV,nO,nV,nV) + + double precision, intent(inout) :: r2(nO,nO,nV,nV) + + double precision, allocatable :: cW_vvvf(:,:,:), v_vvvf(:,:,:), tau_f(:,:,:), v_vovf(:,:,:) + integer :: i,j,e,f + double precision :: ti,tf + + allocate(cW_vvvf(nV,nV,nV),v_vvvf(nV,nV,nV),tau_f(nO,nO,nV),v_vovf(nV,nO,nV)) + + !PROVIDE cc_nVab + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do f=1,nV + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + do f = 1, nV + call wall_time(ti) + !$OMP PARALLEL & + !$OMP SHARED(tau,tau_f,f,nO,nV) & + !$OMP PRIVATE(i,j,e) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do e = 1, nV + do j = 1, nO + do i = 1, nO + tau_f(i,j,e) = tau(i,j,e,f) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'1st transpo', tf-ti + endif + + call wall_time(ti) + call gen_v_spin_3idx(cc_nV_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nV,nV,nV, v_vvvf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'vvvf', tf-ti + endif + call wall_time(ti) + call gen_v_spin_3idx(cc_nV_m,cc_nO_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nO_S,cc_nV_S,cc_nV_S, & + cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, & + nV,nO,nV, v_vovf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'vovf', tf-ti + endif + + call wall_time(ti) + call compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'cW_vvvf', tf-ti + endif + + call wall_time(ti) + call dgemm('N','T', nO*nO, nV*nV, nV, & + 0.5d0, tau_f , size(tau_f,1) * size(tau_f,2), & + cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2), & + 1d0 , r2 , size(r2,1) * size(r2,2)) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'last dgemm', tf-ti + endif + enddo + + deallocate(cW_vvvf,v_vvvf,v_vovf) + +end + +! cF_oo + +subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: tau_t(nO,nO,nV,nV) + double precision,intent(in) :: Foo(nO,nO) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: v_ooov(nO,nO,nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cF_oo(nO,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision,external :: Kronecker_Delta + + !$OMP PARALLEL & + !$OMP SHARED(cF_oo,Foo,t1,v_ooov,nO,nV) & + !$OMP PRIVATE(i,m,n,e) & + !$OMP DEFAULT(NONE) + + !do i=1,nO + ! do m=1,nO + ! cF_oo(m,i) = (1d0 - Kronecker_delta(m,i))*Foo(m,i) + ! end do + !end do + !$OMP DO collapse(1) + do i=1,nO + do m=1,nO + cF_oo(m,i) = Foo(m,i) + end do + end do + !$OMP END DO + !$OMP DO + do i = 1, nO + cF_oo(i,i) = 0d0 + end do + !$OMP END DO + + do e=1,nV + do n=1,nO + !$OMP DO collapse(1) + do i=1,nO + do m=1,nO + cF_oo(m,i) = cF_oo(m,i) + t1(n,e)*v_ooov(m,n,i,e) + end do + end do + !$OMP END DO + end do + end do + !$OMP END PARALLEL + + !do i=1,nO + ! do m=1,nO + ! do e=1,nV + ! cF_oo(m,i) = cF_oo(m,i) + 0.5d0*t1(i,e)*Fov(m,e) + ! end do + ! end do + !end do + call dgemm('N','T', nO, nO, nV,& + 0.5d0, Fov , size(Fov,1), & + t1 , size(t1,1), & + 1d0 , cF_oo, size(cF_oo,1)) + + !do i=1,nO + ! do m=1,nO + ! do f=1,nV + ! do e=1,nV + ! do n=1,nO + ! cF_oo(m,i) = cF_oo(m,i) + 0.5d0*tau_t(i,n,e,f)*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + call dgemm('N','T', nO, nO, nO*nV*nV, & + 0.5d0, v_oovv, size(v_oovv,1), & + tau_t , size(tau_t,1), & + 1d0 , cF_oo , size(cF_oo,1)) + +end + +! cF_ov + +subroutine compute_cF_ov(nO,nV,t1,Fov,v_oovv,cF_ov) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: Fov(nO,nV),v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cF_ov(nO,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + + !$OMP PARALLEL & + !$OMP SHARED(cF_ov,Fov,t1,v_oovv,nO,nV) & + !$OMP PRIVATE(i,a,m,n,e,f) & + !$OMP DEFAULT(NONE) + + !cF_ov = Fov + + !$OMP DO collapse(1) + do e=1,nV + do m=1,nO + cF_ov(m,e) = Fov(m,e) + do f=1,nV + do n=1,nO + cF_ov(m,e) = cF_ov(m,e) + t1(n,f)*v_oovv(m,n,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + +end + +! cF_vv + +subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: tau_t(nO,nO,nV,nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: Fvv(nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: cF_vv(nV,nV) + + double precision, allocatable :: v_ovfv(:,:,:),X_ovfv(:,:,:) + integer :: i,j,m,n + integer :: a,b,e,f + + !$OMP PARALLEL & + !$OMP SHARED(cF_vv,Fvv,nO,nV) & + !$OMP PRIVATE(e,a) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do e=1,nV + do a=1,nV + cF_vv(a,e) = Fvv(a,e) + end do + end do + !$OMP END DO + !$OMP DO + do e = 1, nV + cF_vv(e,e) = 0d0 + enddo + !$OMP END DO + !$OMP END PARALLEL + + !do e=1,nV + ! do a=1,nV + ! do m=1,nO + ! cF_vv(a,e) = cF_vv(a,e) - 0.5d0*t1(m,a)*Fov(m,e) + ! end do + ! end do + !end do + call dgemm('T','N', nV, nV, nO, & + -0.5d0, t1 , size(t1,1), & + Fov , size(Fov,1), & + 1d0 , cF_vv, size(cF_vv,1)) + + !do e=1,nV + ! do a=1,nV + ! do m=1,nO + ! do f=1,nV + ! cF_vv(a,e) = cF_vv(a,e) + t1(m,f)*v_ovvv(m,a,f,e) + ! end do + ! end do + ! end do + !end do + allocate(v_ovfv(nO,nV,nV),X_ovfv(nO,nV,nV)) + do f = 1, nV + + call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovfv) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,v_ovfv,X_ovfv,f) & + !$OMP PRIVATE(m,a,e) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do e = 1, nV + do a = 1, nV + do m = 1, nO + !X_ovfv(m,a,e) = v_ovvv(m,a,f,e) + X_ovfv(m,a,e) = v_ovfv(m,a,e) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemv('T', nO, nV*nV, & + !1d0, v_ovvv(:,:,f,:), size(v_ovvv,1), & + 1d0, X_ovfv, size(X_ovfv,1), & + t1(1,f), 1, & + 1d0, cF_vv, 1) + enddo + deallocate(v_ovfv,X_ovfv) + + !do e=1,nV + ! do a=1,nV + ! do f=1,nV + ! do n=1,nO + ! do m=1,nO + ! cF_vv(a,e) = cF_vv(a,e) - 0.5d0*tau_t(m,n,a,f)*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + do f = 1, nV + call dgemm('T','N', nV, nV, nO*nO,& + -0.5d0, tau_t(1,1,1,f) , size(tau_t,1) * size(tau_t,2), & + v_oovv(1,1,1,f), size(v_oovv,1) * size(v_oovv,2), & + 1d0 , cF_vv, size(cF_vv,1)) + enddo + +end + +! cW_oooo + +subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oooo(nO,nO,nO,nO) + double precision,intent(in) :: v_ooov(nO,nO,nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cW_oooo(nO,nO,nO,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: X_oooo(:,:,:,:) + + ! oooo block + + !cW_oooo = v_oooo + + !do j=1,nO + ! do i=1,nO + ! do n=1,nO + ! do m=1,nO + + ! do e=1,nV + ! cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + t1(j,e)*v_ooov(m,n,i,e) - t1(i,e)*v_ooov(m,n,j,e) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(X_oooo(nO,nO,nO,nO)) + + call dgemm('N','T', nO*nO*nO, nO, nV, & + 1d0, v_ooov, size(v_ooov,1) * size(v_ooov,2) * size(v_ooov,3), & + t1 , size(t1,1), & + 0d0, X_oooo, size(X_oooo,1) * size(X_oooo,1) * size(X_oooo,3)) + !$OMP PARALLEL & + !$OMP SHARED(cW_oooo,v_oooo,X_oooo,nO,nV) & + !$OMP PRIVATE(i,j,m,n) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do j=1,nO + do i=1,nO + do n=1,nO + do m=1,nO + cW_oooo(m,n,i,j) = v_oooo(m,n,i,j) + X_oooo(m,n,i,j) - X_oooo(m,n,j,i) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_oooo) + + !do m=1,nO + ! do n=1,nO + ! do i=1,nO + ! do j=1,nO + ! + ! do e=1,nV + ! do f=1,nV + ! cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + 0.25d0*tau(i,j,e,f)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + call dgemm('N','T', nO*nO, nO*nO, nV*nV, & + 0.25d0, v_oovv , size(v_oovv,1) * size(v_oovv,2), & + tau , size(tau,1) * size(tau,2), & + 1.d0 , cW_oooo, size(cW_oooo,1) * size(cW_oooo,2)) + +end + +! cW_ovvo + +subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovo(nO,nO,nV,nO) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_ovvo(nO,nV,nV,nO) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: cW_ovvo(nO,nV,nV,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: A_oovo(:,:,:,:), B_vovo(:,:,:,:) + double precision, allocatable :: A_voov(:,:,:,:), B_voov(:,:,:,:), C_ovov(:,:,:,:) + double precision, allocatable :: v_ovev(:,:,:), cW_oveo(:,:,:) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,v_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do j = 1, nO + do b = 1, nV + do a = 1, nV + do i = 1, nO + cW_ovvo(i,a,b,j) = v_ovvo(i,a,b,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !do m=1,nO + ! do b=1,nV + ! do e=1,nV + ! do j=1,nO + ! do f=1,nV + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + t1(j,f)*v_ovvv(m,b,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + allocate(v_ovev(nO,nV,nV),cW_oveo(nO,nV,nO)) + do e = 1, nV + + call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, e, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovev) + + call dgemm('N','T', nO*nV, nO, nV, & + 1.d0, v_ovev , size(v_ovev,1) * size(v_ovev,2), & + t1 , size(t1,1), & + 0.d0, cW_oveo, size(cW_oveo,1) * size(cW_oveo,2)) + !$OMP PARALLEL & + !$OMP SHARED(e,cW_ovvo,cW_oveo,nO,nV) & + !$OMP PRIVATE(m,b,j) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do j = 1, nO + do b = 1, nV + do m = 1, nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + cW_oveo(m,b,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + enddo + deallocate(v_ovev,cW_oveo) + !call dgemm('N','T', nO*nV*nV, nO, nV, & + ! 1.d0, v_ovvv , size(v_ovvv,1) * size(v_ovvv,2) * size(v_ovvv,3), & + ! t1 , size(t1,1), & + ! 1.d0, cW_ovvo, size(cW_ovvo,1) * size(cW_ovvo,2) * size(cW_ovvo,3)) + + !do j=1,nO + ! do e=1,nV + ! do b=1,nV + ! do m=1,nO + ! do n=1,nO + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - t1(n,b)*v_oovo(m,n,e,j) + ! end do + ! end do + ! end do + ! end do + !end do + + allocate(A_oovo(nO,nO,nV,nO), B_vovo(nV,nO,nV,nO)) + + !$OMP PARALLEL & + !$OMP SHARED(A_oovo,v_oovo,nO,nV) & + !$OMP PRIVATE(j,e,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j=1,nO + do e=1,nV + do m=1,nO + do n=1,nO + A_oovo(n,m,e,j) = v_oovo(m,n,e,j) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nO*nV*nO, nO, & + 1d0, t1 , size(t1,1), & + A_oovo, size(A_oovo,1), & + 0d0, B_vovo, size(B_vovo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,B_vovo,nO,nV) & + !$OMP PRIVATE(j,e,m,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j=1,nO + do e=1,nV + do b=1,nV + do m=1,nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - B_vovo(b,m,e,j) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(A_oovo,B_vovo) + + !do j=1,nO + ! do e=1,nV + ! do b=1,nV + ! do m=1,nO + ! do f=1,nV + ! do n=1,nO + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) & + ! - ( 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b) )*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + ! end do + !end do + allocate(A_voov(nV,nO,nO,nV), B_voov(nV,nO,nO,nV), C_ovov(nO,nV,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,A_voov,B_voov,v_oovv,t2,t1) & + !$OMP PRIVATE(f,n,m,e,j,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do j = 1, nO + do n = 1, nO + do f = 1, nV + A_voov(f,n,j,b) = 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP DO collapse(3) + do e = 1, nV + do m = 1, nO + do n = 1, nO + do f = 1, nV + B_voov(f,n,m,e) = v_oovv(m,n,e,f) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO*nV, nV*nO, nV*nO, & + 1d0, A_voov, size(A_voov,1) * size(A_voov,2), & + B_voov, size(B_voov,1) * size(B_voov,2), & + 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2)) + + deallocate(A_voov,B_voov) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,C_ovov,nO,nV) & + !$OMP PRIVATE(j,e,m,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j = 1, nO + do e = 1, nV + do b = 1, nV + do m = 1, nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - C_ovov(j,b,m,e) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(C_ovov) + +end + +! cW_vvvv + +subroutine compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_vovv(nV,nO,nV,nV) + double precision,intent(in) :: v_vvvv(nV,nV,nV,nV) + + double precision,intent(out) :: cW_vvvv(nV,nV,nV,nV) + + integer :: i,j,m,n + integer :: a,b,c,d,e,f + double precision, allocatable :: A_ovvv(:,:,:,:), B_vvvv(:,:,:,:) + + allocate(A_ovvv(nO,nV,nV,nV), B_vvvv(nV,nV,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvv,A_ovvv,v_vovv,v_vvvv) & + !$OMP PRIVATE(a,b,c,d,e,f,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do d = 1, nV + do c = 1, nV + do b = 1, nV + do a = 1, nV + cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !do f=1,nV + ! do e=1,nV + ! do b=1,nV + ! do a=1,nV + ! do m=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + !$OMP DO collapse(3) + do f=1,nV + do e=1,nV + do a=1,nV + do m=1,nO + A_ovvv(m,a,e,f) = v_vovv(a,m,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nV*nV*nV, nO, & + 1d0, t1 , size(t1,1), & + A_ovvv, size(A_ovvv,1), & + 0d0, B_vvvv, size(B_vvvv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvv,B_vvvv) & + !$OMP PRIVATE(a,b,c,d,e,f,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do f=1,nV + do e=1,nV + do b=1,nV + do a=1,nV + cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_ovvv,B_vvvv) + + !do a=1,nV + ! do b=1,nV + ! do e=1,nV + ! do f=1,nV + ! + ! do m=1,nO + ! do n=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + call dgemm('T','N', nV*nV, nV*nV, nO*nO, & + 0.25d0, tau , size(tau,1) * size(tau,2), & + v_oovv , size(v_oovv,1) * size(v_oovv,2), & + 1.d0 , cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2)) + +end + +! cW_vvvf + +subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) + + implicit none + + integer,intent(in) :: nO,nV,f + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_vovf(nV,nO,nV) + double precision,intent(in) :: v_vvvf(nV,nV,nV) + + double precision,intent(out) :: cW_vvvf(nV,nV,nV) + + integer :: i,j,m,n + integer :: a,b,c,d,e + double precision, allocatable :: A_ovvf(:,:,:), B_vvvf(:,:,:), v_oovf(:,:,:) + double precision :: ti,tf + + allocate(A_ovvf(nO,nV,nV), B_vvvf(nV,nV,nV)) + allocate(v_oovf(nO,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvf,A_ovvf,v_vovf,v_vvvf,f) & + !$OMP PRIVATE(a,b,c,d,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + !cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d) + cW_vvvf(a,b,c) = v_vvvf(a,b,c) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !do f=1,nV + ! do e=1,nV + ! do b=1,nV + ! do a=1,nV + ! do m=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(2) + do e=1,nV + do a=1,nV + do m=1,nO + !A_ovvv(m,a,e,f) = v_vovv(a,m,e,f) + !A_ovvf(m,a,e) = v_vovv(a,m,e,f) + A_ovvf(m,a,e) = v_vovf(a,m,e) + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nV*nV, nO, & + 1d0, t1 , size(t1,1), & + A_ovvf, size(A_ovvf,1), & + 0d0, B_vvvf, size(B_vvvf,1)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvf,B_vvvf,v_oovf,v_oovv,f) & + !$OMP PRIVATE(a,b,c,d,e,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do e=1,nV + do b=1,nV + do a=1,nV + !cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f) + cW_vvvf(a,b,e) = cW_vvvf(a,b,e) - B_vvvf(b,a,e) + B_vvvf(a,b,e) + end do + end do + end do + !$OMP END DO NOWAIT + + !deallocate(A_ovvf,B_vvvf) + + !do a=1,nV + ! do b=1,nV + ! do e=1,nV + ! do f=1,nV + ! + ! do m=1,nO + ! do n=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(2) + do e = 1, nV + do n = 1, nO + do m = 1, nO + v_oovf(m,n,e) = v_oovv(m,n,e,f) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV*nV, nV, nO*nO, & + 0.25d0, tau , size(tau,1) * size(tau,2), & + v_oovf , size(v_oovf,1) * size(v_oovf,2), & + 1.d0 , cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2)) + + deallocate(v_oovf) + deallocate(A_ovvf,B_vvvf) + +end diff --git a/src/ccsd/ccsd_t_space_orb.irp.f b/src/ccsd/ccsd_t_space_orb.irp.f new file mode 100644 index 00000000..1f1db87e --- /dev/null +++ b/src/ccsd/ccsd_t_space_orb.irp.f @@ -0,0 +1,412 @@ +! Dumb way + +subroutine ccsd_par_t_space(nO,nV,t1,t2,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + integer :: i,j,k,a,b,c + + allocate(W(nO,nO,nO,nV,nV,nV)) + allocate(V(nO,nO,nO,nV,nV,nV)) + + call form_w(nO,nV,t2,W) + call form_v(nO,nV,t1,W,V) + + energy = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + + energy = energy / 3d0 + + deallocate(V,W) +end + +subroutine form_w(nO,nV,t2,W) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: W(nO, nO, nO, nV, nV, nV) + + integer :: i,j,k,l,a,b,c,d + + W = 0d0 + do c = 1, nV + print*,'W:',c,'/',nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + + do d = 1, nV + W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + ! chem (bd|ai) + ! phys + + cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & + + cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj + + cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik + + cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij + + cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj + + cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik + enddo + + do l = 1, nO + W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + ! chem (ck|jl) + ! phys + - cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & + - cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj + - cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik + - cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij + - cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj + - cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik + enddo + + enddo + enddo + enddo + enddo + enddo + enddo + +end + +subroutine form_v(nO,nV,t1,w,v) + +implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: W(nO, nO, nO, nV, nV, nV) + double precision, intent(out) :: V(nO, nO, nO, nV, nV, nV) + + integer :: i,j,k,a,b,c + + V = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + + cc_space_v_vvoo(b,c,j,k) * t1(i,a) & + + cc_space_v_vvoo(a,c,i,k) * t1(j,b) & + + cc_space_v_vvoo(a,b,i,j) * t1(k,c) + enddo + enddo + enddo + enddo + enddo + enddo + +end + +! Main + +subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + double precision, allocatable :: W_ijk(:,:,:), V_ijk(:,:,:) + double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:) + double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) + integer :: i,j,k,l,a,b,c,d + double precision :: e,ta,tb, delta, delta_ijk + + !allocate(W(nV,nV,nV,nO,nO,nO)) + !allocate(V(nV,nV,nV,nO,nO,nO)) + allocate(W_ijk(nV,nV,nV), V_ijk(nV,nV,nV)) + allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO)) + allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO)) + + ! Temporary arrays + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, & + !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & + !$OMP PRIVATE(a,b,c,d,i,j,k,l) & + !$OMP DEFAULT(NONE) + + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) + + !$OMP DO collapse(3) + do i = 1, nO + do a = 1, nV + do b = 1, nV + do d = 1, nV + X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do d = 1, nV + T_vvoo(d,c,k,j) = t2(k,j,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vooo(c,j,k,l) * t2(i,l,a,b) & + !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + + !$OMP DO collapse(3) + do k = 1, nO + do j = 1, nO + do c = 1, nV + do l = 1, nO + X_ovoo(l,c,j,k) = v_vooo(c,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do l = 1, nO + T_ovvo(l,a,b,i) = t2(i,l,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vvoo(b,c,j,k) * t1(i,a) & + !X_vvoo(b,c,k,j) * T1_vo(a,i) & + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do b = 1, nV + X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(1) + do i = 1, nO + do a = 1, nV + T_vo(a,i) = t1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(ta) + energy = 0d0 + do i = 1, nO + do j = 1, nO + do k = 1, nO + delta_ijk = f_o(i) + f_o(j) + f_o(k) + call form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_ijk) + call form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,W_ijk,V_ijk) + !$OMP PARALLEL & + !$OMP SHARED(energy,nV,i,j,k,W_ijk,V_ijk,f_o,f_v,delta_ijk) & + !$OMP PRIVATE(a,b,c,e,delta) & + !$OMP DEFAULT(NONE) + e = 0d0 + !$OMP DO + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta = 1d0 / (delta_ijk - f_v(a) - f_v(b) - f_v(c)) + !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + e = e + (4d0 * W_ijk(a,b,c) + W_ijk(b,c,a) + W_ijk(c,a,b)) & + * (V_ijk(a,b,c) - V_ijk(c,b,a)) * delta + enddo + enddo + enddo + !$OMP END DO + !$OMP CRITICAL + energy = energy + e + !$OMP END CRITICAL + !$OMP END PARALLEL + enddo + enddo + call wall_time(tb) + write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' + enddo + + energy = energy / 3d0 + + deallocate(W_ijk,V_ijk,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) + !deallocate(V,W) +end + +! W_ijk + +subroutine form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) + + implicit none + + integer, intent(in) :: nO,nV,i,j,k + !double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) + double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: W(nV,nV,nV)!,nO,nO,nO) + + integer :: l,a,b,c,d + + !W = 0d0 + !do i = 1, nO + ! do j = 1, nO + ! do k = 1, nO + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) & + !$OMP PRIVATE(a,b,c,d,l) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + W(a,b,c) = 0d0 + + do d = 1, nV + !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + W(a,b,c) = W(a,b,c) & + ! chem (bd|ai) + ! phys + !+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj + !+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik + !+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij + !+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj + !+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik + + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & + + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & ! bc kj + + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & ! prev ac ik + + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & ! prev ab ij + + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & ! prev bc kj + + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) ! prev ac ik + enddo + + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + + do l = 1, nO + !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + W(a,b,c) = W(a,b,c) & + ! chem (ck|jl) + ! phys + !- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & + !- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj + !- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik + !- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij + !- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj + !- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik + - X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + - X_ovoo(l,b,k,j) * T_ovvo(l,a,c,i) & ! bc kj + - X_ovoo(l,b,i,j) * T_ovvo(l,c,a,k) & ! prev ac ik + - X_ovoo(l,a,j,i) * T_ovvo(l,c,b,k) & ! prev ab ij + - X_ovoo(l,a,k,i) * T_ovvo(l,b,c,j) & ! prev bc kj + - X_ovoo(l,c,i,k) * T_ovvo(l,b,a,j) ! prev ac ik + enddo + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! enddo + ! enddo + !enddo + +end + +! V_ijk + +subroutine form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,w,v) + +implicit none + + integer, intent(in) :: nO,nV,i,j,k + !double precision, intent(in) :: t1(nO,nV) + double precision, intent(in) :: T_vo(nV,nO) + double precision, intent(in) :: X_vvoo(nV,nV,nO,nO) + double precision, intent(in) :: W(nV,nV,nV)!,nO,nO,nO) + double precision, intent(out) :: V(nV,nV,nV)!,nO,nO,nO) + + integer :: a,b,c + + !V = 0d0 + !do i = 1, nO + ! do j = 1, nO + ! do k = 1, nO + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,i,j,k,T_vo,X_vvoo,W,V) & + !$OMP PRIVATE(a,b,c) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + V(a,b,c) = W(a,b,c) & + !+ cc_space_v_vvoo(b,c,j,k) * t1(i,a) & + !+ cc_space_v_vvoo(a,c,i,k) * t1(j,b) & + !+ cc_space_v_vvoo(a,b,i,j) * t1(k,c) + + X_vvoo(b,c,k,j) * T_vo(a,i) & + + X_vvoo(a,c,k,i) * T_vo(b,j) & + + X_vvoo(a,b,j,i) * T_vo(c,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! enddo + ! enddo + !enddo + +end diff --git a/src/ccsd/ccsd_t_spin_orb.irp.f b/src/ccsd/ccsd_t_spin_orb.irp.f new file mode 100644 index 00000000..3f79e4a0 --- /dev/null +++ b/src/ccsd/ccsd_t_spin_orb.irp.f @@ -0,0 +1,376 @@ +! v1 + +subroutine ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_vvvo(nV,nV,nV,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: t3(:,:,:,:,:,:), s(:,:) + double precision :: e_t, e_st, e_dt, delta_abc, delta + integer :: i,j,k,l,m,a,b,c,d,e + + allocate(t3(nO,nO,nO,nV,nV,nV), s(nO,nV)) + + t3 = 0d0 + + ! T3 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + do e = 1, nV + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) & + + t2(j,k,a,e) * v_vvvo(b,c,e,i) & + - t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij) + - t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik) + - t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab) + - t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac) + + t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab) + + t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac) + + t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab) + + t2(j,i,c,e) * v_vvvo(b,a,e,k) ! + P(ik) P(ac) + enddo + do m = 1, nO + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) & + + t2(m,i,b,c) * v_ooov(j,k,m,a) & + - t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij) + - t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik) + - t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab) + - t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac) + + t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab) + + t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac) + + t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab) + + t2(m,k,b,a) * v_ooov(j,i,m,c) ! + P(ik) P(ac) + enddo + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) * (1d0 / delta) + enddo + enddo + enddo + enddo + enddo + enddo + + + ! E_T + e_t = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + e_t = e_t + t3(i,j,k,a,b,c) * delta * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + e_t = e_t / 36d0 + + ! E_ST + s = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + + e_st = 0d0 + do a = 1, nV + do i = 1, nO + e_st = e_st + s(i,a) * t1(i,a) + enddo + enddo + e_st = e_st * 0.25d0 + + ! E_DT + e_dt = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + e_dt = e_dt + t2(i,j,a,b) * f_ov(k,c) * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + e_dt = e_dt * 0.25d0 + + ! (T) + !print*,e_t,e_st,e_dt + energy = e_t + e_st + e_dt + + deallocate(t3,s) + +end + +! v2 + +subroutine ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: t3_bc(:,:,:,:), s(:,:), e_t(:), e_dt(:) + double precision, allocatable :: A_vovv(:,:,:,:), v_vvvo(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), B_ooov(:,:,:,:) + double precision :: e_st, delta_abc, delta, ta, tb + integer :: i,j,k,l,m,a,b,c,d,e + + allocate(t3_bc(nO,nO,nO,nV), s(nO,nV), e_t(nV), e_dt(nV)) + allocate(A_vovv(nV,nO,nV,nV),v_vvvo(nV,nV,nV,nO),T_voov(nV,nO,nO,nV),B_ooov(nO,nO,nO,nV)) + + call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & + cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + nV,nV,nV,nO, v_vvvo) + + ! Init + s = 0d0 + e_t = 0d0 + e_st = 0d0 + e_dt = 0d0 + + call wall_time(ta) + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,m,a,b,c,e) & + !$OMP SHARED(A_vovv,ta,tb,t3_bc,s,e_t,e_st,e_dt,t2,v_vvvo,v_ooov, & + !$OMP v_vvoo,f_o,f_v,f_ov,delta,delta_abc,nO,nV,T_voov,B_ooov) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do c = 1, nV + do b = 1, nV + do i = 1, nO + do e = 1, nV + A_vovv(e,i,b,c) = v_vvvo(b,c,e,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$omp do collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do e = 1, nV + T_voov(e,j,k,a) = t2(j,k,a,e) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do m = 1, nO + B_ooov(m,j,k,a) = v_ooov(j,k,m,a) + enddo + enddo + enddo + enddo + !$omp end do + + do c = 1, nV + do b = 1, nV + + ! T3(:,:,:,:,b,c) + ! Init + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + t3_bc(i,j,k,a) = 0d0 + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + do e = 1, nV + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) & + !+ t2(j,k,a,e) * v_vvvo(b,c,e,i) & + !- t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij) + !- t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik) + !- t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab) + !- t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac) + !+ t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab) + !+ t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac) + !+ t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab) + !+ t2(j,i,c,e) * v_vvvo(b,a,e,k) ! + P(ik) P(ac) + + T_voov(e,j,k,a) * A_vovv(e,i,b,c) & + - T_voov(e,i,k,a) * A_vovv(e,j,b,c) & ! - P(ij) + - T_voov(e,j,i,a) * A_vovv(e,k,b,c) & ! - P(ik) + - T_voov(e,j,k,b) * A_vovv(e,i,a,c) & ! - P(ab) + - T_voov(e,j,k,c) * A_vovv(e,i,b,a) & ! - P(ac) + + T_voov(e,i,k,b) * A_vovv(e,j,a,c) & ! + P(ij) P(ab) + + T_voov(e,i,k,c) * A_vovv(e,j,b,a) & ! + P(ij) P(ac) + + T_voov(e,j,i,b) * A_vovv(e,k,a,c) & ! + P(ik) P(ab) + + T_voov(e,j,i,c) * A_vovv(e,k,b,a) ! + P(ik) P(ac) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + do m = 1, nO + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) & + !+ t2(m,i,b,c) * v_ooov(j,k,m,a) & + !- t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij) + !- t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik) + !- t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab) + !- t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac) + !+ t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab) + !+ t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac) + !+ t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab) + !+ t2(m,k,b,a) * v_ooov(j,i,m,c) ! + P(ik) P(ac) + + t2(m,i,b,c) * B_ooov(m,j,k,a) & + - t2(m,j,b,c) * B_ooov(m,i,k,a) & ! - P(ij) + - t2(m,k,b,c) * B_ooov(m,j,i,a) & ! - P(ik) + - t2(m,i,a,c) * B_ooov(m,j,k,b) & ! - P(ab) + - t2(m,i,b,a) * B_ooov(m,j,k,c) & ! - P(ac) + + t2(m,j,a,c) * B_ooov(m,i,k,b) & ! + P(ij) P(ab) + + t2(m,j,b,a) * B_ooov(m,i,k,c) & ! + P(ij) P(ac) + + t2(m,k,a,c) * B_ooov(m,j,i,b) & ! + P(ik) P(ab) + + t2(m,k,b,a) * B_ooov(m,j,i,c) ! + P(ik) P(ac) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) * (1d0 / delta) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! E_T + !$OMP DO + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + e_t(a) = e_t(a) + t3_bc(i,j,k,a) * delta * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + ! E_ST + !$OMP DO + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + ! E_DT + !$OMP DO + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + e_dt(a) = e_dt(a) + t2(i,j,a,b) * f_ov(k,c) * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO + enddo + !$OMP MASTER + call wall_time(tb) + write(*,'(A1,F6.2,A5,F10.2,A2)') ' ', dble(c)/dble(nV)*100d0, '% in ', tb-ta, ' s' + !$OMP END MASTER + enddo + !$OMP END PARALLEL + + do a = 2, nV + e_t(1) = e_t(1) + e_t(a) + enddo + + do a = 2, nV + e_dt(1) = e_dt(1) + e_dt(a) + enddo + + e_t = e_t / 36d0 + + do a = 1, nV + do i = 1, nO + e_st = e_st + s(i,a) * t1(i,a) + enddo + enddo + e_st = e_st * 0.25d0 + + e_dt = e_dt * 0.25d0 + + ! (T) + !print*,e_t(1),e_st,e_dt(1) + energy = e_t(1) + e_st + e_dt(1) + + deallocate(t3_bc,s) + +end diff --git a/src/ccsd/org/ccsd_space_orb.org b/src/ccsd/org/ccsd_space_orb.org new file mode 100644 index 00000000..a848fd26 --- /dev/null +++ b/src/ccsd/org/ccsd_space_orb.org @@ -0,0 +1,2121 @@ +* ccsd with spatial orbitals + +Scuseria, Gustavo E.; Janssen, Curtis L.; Schaefer, Henry +F. (1988). An efficient reformulation of the closed-shell coupled +cluster single and double excitation (CCSD) equations. The Journal of +Chemical Physics, 89(12), 7382–. doi:10.1063/1.455269 + +* Code +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb.irp.f +program ccsd + + implicit none + + read_wf = .True. + touch read_wf + + call run_ccsd_space_orb + +end +#+end_src + +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine run_ccsd_space_orb + + implicit none + + integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d + integer :: u,v,gam,beta,tmp_gam,tmp_beta + integer :: nb_iter + double precision :: get_two_e_integral + double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb + logical :: not_converged + + double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:) + double precision, allocatable :: t1(:,:), r1(:,:) + double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) + + double precision, allocatable :: all_err(:,:), all_t(:,:) + integer, allocatable :: list_occ(:), list_vir(:) + integer(bit_kind) :: det(N_int,2) + integer :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4) + + PROVIDE mo_two_e_integrals_in_map + + det = psi_det(:,:,cc_ref) + print*,'Reference determinant:' + call print_det(det,N_int) + + ! Extract number of occ/vir alpha/beta spin orbitals + !call extract_n_spin(det,n_spin) + nOa = cc_nOa !n_spin(1) + nOb = cc_nOb !n_spin(2) + nVa = cc_nVa !n_spin(3) + nVb = cc_nVb !n_spin(4) + + ! Check that the reference is a closed shell determinant + if (cc_ref_is_open_shell) then + call abort + endif + + ! Number of occ/vir spatial orb + nO = nOa + nV = nVa + + allocate(list_occ(nO),list_vir(nV)) + list_occ = cc_list_occ + list_vir = cc_list_vir + ! Debug + !call extract_list_orb_space(det,nO,nV,list_occ,list_vir) + !print*,'occ',list_occ + !print*,'vir',list_vir + + allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) + allocate(tau(nO,nO,nV,nV)) + allocate(t1(nO,nV), r1(nO,nV)) + allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) + + if (cc_update_method == 'diis') then + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + all_err = 0d0 + all_t = 0d0 + endif + + if (elec_alpha_num /= elec_beta_num) then + print*, 'Only for closed shell systems' + print*, 'elec_alpha_num=',elec_alpha_num + print*, 'elec_beta_num =',elec_beta_num + print*, 'abort' + call abort + endif + + ! Init + call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1) + call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2) + call update_tau_space(nO,nV,t1,t2,tau) + !print*,'hf_energy', hf_energy + call det_energy(det,uncorr_energy) + print*,'Det energy', uncorr_energy + call ccsd_energy_space(nO,nV,tau,t1,energy) + print*,'Guess energy', uncorr_energy+energy, energy + + nb_iter = 0 + not_converged = .True. + max_r1 = 0d0 + max_r2 = 0d0 + + write(*,'(A77)') ' -----------------------------------------------------------------------------' + write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(ta) + + do while (not_converged) + + call compute_H_oo(nO,nV,t1,t2,tau,H_oo) + call compute_H_vv(nO,nV,t1,t2,tau,H_vv) + call compute_H_vo(nO,nV,t1,t2,H_vo) + + ! Residue + call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + max_r = max(max_r1,max_r2) + + ! Update + if (cc_update_method == 'diis') then + !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1,r2,t1,t2,all_err,all_t) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1) + call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2) + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + + call update_tau_space(nO,nV,t1,t2,tau) + + ! Energy + call ccsd_energy_space(nO,nV,tau,t1,energy) + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + + nb_iter = nb_iter + 1 + if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then + not_converged = .False. + endif + + enddo + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + print*,'' + if (max_r < cc_thresh_conv) then + write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations' + else + write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations' + endif + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' + write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + print*,'' + + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + + ! Deallocation + if (cc_update_method == 'diis') then + deallocate(all_err,all_t) + endif + + deallocate(H_vv,H_oo,H_vo,r1,r2,tau) + + ! CCSD(T) + double precision :: e_t + + if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then + + ! Dumb way + !call wall_time(ta) + !call ccsd_par_t_space(nO,nV,t1,t2,e_t) + !call wall_time(tb) + !print*,'Time: ',tb-ta, ' s' + + !print*,'' + !write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' + !write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' + !write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + !print*,'' + + ! New + print*,'Computing (T) correction...' + call wall_time(ta) + call ccsd_par_t_space_v2(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & + ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' + write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + print*,'' + endif + + print*,'Reference determinant:' + call print_det(det,N_int) + + deallocate(t1,t2) + +end +#+END_SRC + +* Energy +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine ccsd_energy_space(nO,nV,tau,t1,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau(nO,nO,nV,nV) + double precision, intent(in) :: t1(nO,nV) + double precision, intent(out) :: energy + + ! internal + integer :: i,j,a,b + double precision :: e + + energy = 0d0 + !$omp parallel & + !$omp shared(nO,nV,energy,tau,t1,& + !$omp cc_space_f_vo,cc_space_w_oovv) & + !$omp private(i,j,a,b,e) & + !$omp default(none) + e = 0d0 + !$omp do + do i = 1, nO + do a = 1, nV + e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) + enddo + enddo + !$omp end do nowait + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + energy = energy + e + !$omp end critical + !$omp end parallel + +end +#+END_SRC + +* T +** Tau +#+begin_src f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine update_tau_space(nO,nV,t1,t2,tau) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + ! out + double precision, intent(out) :: tau(nO,nO,nV,nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tau,t2,t1) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +* Residual equations +** R1 +*** R1 +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r1(nO,nV), max_r1 + + ! internal + integer :: u,i,j,beta,a,b + + !$omp parallel & + !$omp shared(nO,nV,r1,cc_space_f_ov) & + !$omp private(u,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + r1(u,beta) = cc_space_f_ov(u,beta) + enddo + enddo + !$omp end do + !$omp end parallel + + ! r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a) + ! cc_space_f_vo(a,i) * t1(i,beta) -> X1(nV,nV), O(nV*nV*nO) + ! X1(a,beta) * t1(u,a) -> O(nO*nV*nV) + ! cc_space_f_vo(a,i) * t1(u,a) -> X1(nO,nO), O(nO*nO*nV) + ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_oo(:,:) + allocate(X_oo(nO,nO)) + call dgemm('N','N', nO, nO, nV, & + -2d0, t1 , size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, X_oo , size(X_oo,1)) + + call dgemm('T','N', nO, nV, nO, & + 1d0, X_oo, size(X_oo,2), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + deallocate(X_oo) + + ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) + !do beta = 1, nV + ! do u = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) + ! enddo + ! enddo + !enddo + call dgemm('N','N', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + H_vv, size(H_vv,1), & + 1d0, r1 , size(r1,1)) + + ! r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta) + ! enddo + ! enddo + !enddo + call dgemm('N','N', nO, nV, nO, & + -1d0, H_oo, size(H_oo,1), & + t1 , size(t1,1), & + 1d0, r1, size(r1,1)) + + !r1(u,beta) = r1(u,beta) + H_vo(a,i) * (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta)) + ! <=> + ! r1(u,beta) = r1(u,beta) + H_vo(a,i) * X(a,i,u,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + H_vo(a,i) * & + ! (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta)) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_voov(:,:,:,:) + allocate(X_voov(nV, nO, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_voov,t2,t1) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,u,beta) = 2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nV*nO, nO*nV, & + 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & + H_vo , 1, & + 1d0, r1 , 1) + + deallocate(X_voov) + + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) + ! <=> + ! r1(u,beta) = r1(u,beta) + X(i,a,u,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_ovov(:,:,:,:) + allocate(X_ovov(nO, nV, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do a = 1, nv + do i = 1, nO + X_ovov(i,a,u,beta) = 2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nO*nV, nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + t1 , 1, & + 1d0, r1 , 1) + + deallocate(X_ovov) + + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! do b = 1, nV + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) + allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & + !$omp private(b,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do u = 1, nO + T_vvoo(a,b,i,u) = tau(i,u,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp end parallel + + call dgemm('T','N',nO,nV,nO*nV*nV, & + 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & + W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_vvov,T_vvoo) + + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do j = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: W_oovo(:,:,:,:) + allocate(W_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) & + !$omp private(u,a,i,j) & + !$omp default(none) + !$omp do collapse(3) + do u = 1, nO + do a = 1, nV + do j = 1, nO + do i = 1, nO + W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & + tau , size(tau,1) * size(tau,2) * size(tau,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_oovo) + + max_r1 = 0d0 + do a = 1, nV + do i = 1, nO + if (dabs(r1(i,a)) > max_r1) then + max_r1 = dabs(r1(i,a)) + endif + enddo + enddo + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r1) & + !$omp private(a,i) & + !$omp default(none) + !$omp do + do a = 1, nV + do i = 1, nO + r1(i,a) = -r1(i,a) + enddo + enddo + !$omp end do + !$omp end parallel + +end +#+end_src + +*** Intermediates +**** H_oo +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_oo(nO, nO) + + integer :: a,tmp_a,k,b,l,c,d,tmp_c,tmp_d,i,j,u + + !H_oo = 0d0 + + !do i = 1, nO + ! do u = 1, nO + ! H_oo(u,i) = cc_space_f_oo(u,i) + + ! do j = 1, nO + ! do a = 1, nV + ! do b = 1, nV + ! !H_oo(u,i) = H_oo(u,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * tau(u,j,a,b) + ! !H_oo(u,i) = H_oo(u,i) + cc_space_w_vvoo(a,b,i,j) * tau(u,j,a,b) + ! H_oo(u,i) = H_oo(u,i) + cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) + ! enddo + ! enddo + ! enddo + ! + ! enddo + !enddo + + ! H_oo(u,i) = cc_space_f_oo(u,i) + !$omp parallel & + !$omp shared(nO,H_oo,cc_space_f_oo) & + !$omp private(i,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + H_oo(u,i) = cc_space_f_oo(u,i) + enddo + enddo + !$omp end do + !$omp end parallel + + ! H_oo(u,i) += cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) + ! H_oo(u,i) += tau(u,j,a,b) * cc_space_w_oovv(i,j,a,b) + call dgemm('N','T', nO, nO, nO*nV*nV, & + 1d0, tau , size(tau,1), & + cc_space_w_oovv, size(cc_space_w_oovv,1), & + 1d0, H_oo , size(H_oo,1)) + +end +#+END_SRC + +**** H_vv +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_vv(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + !H_vv = 0d0 + + !do beta = 1, nV + ! do a = 1, nV + ! H_vv(a,beta) = cc_space_f_vv(a,beta) + + ! do j = 1, nO + ! do i = 1, nO + ! do b = 1, nV + ! !H_vv(a,beta) = H_vv(a,beta) - (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(a,b,j,i)) * tau(i,j,beta,b) + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) + ! enddo + ! enddo + ! enddo + ! + ! enddo + !enddo + + double precision, allocatable :: tmp_tau(:,:,:,:) + + allocate(tmp_tau(nV,nO,nO,nV)) + + ! H_vv(a,beta) = cc_space_f_vv(a,beta) + !$omp parallel & + !$omp shared(nV,nO,H_vv,cc_space_f_vv,tmp_tau,tau) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + H_vv(a,beta) = cc_space_f_vv(a,beta) + enddo + enddo + !$omp end do nowait + + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta) + + !$omp do collapse(3) + do beta = 1, nV + do j = 1, nO + do i = 1, nO + do b = 1, nV + tmp_tau(b,i,j,beta) = tau(i,j,beta,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nV,nV,nO*nO*nV, & + -1d0, cc_space_w_vvoo, size(cc_space_w_vvoo,1), & + tmp_tau , size(tmp_tau,1) * size(tmp_tau,2) * size(tmp_tau,3), & + 1d0, H_vv , size(H_vv,1)) + + deallocate(tmp_tau) + +end +#+END_SRC + +**** H_vo +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_H_vo(nO,nV,t1,t2,H_vo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: H_vo(nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + !H_vo = 0d0 + + !do i = 1, nO + ! do a = 1, nV + ! H_vo(a,i) = cc_space_f_vo(a,i) + + ! do j = 1, nO + ! do b = 1, nV + ! !H_vo(a,i) = H_vo(a,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t1(j,b) + ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) + ! enddo + ! enddo + ! + ! enddo + !enddo + + double precision, allocatable :: w(:,:,:,:) + + allocate(w(nV,nO,nO,nV)) + + !$omp parallel & + !$omp shared(nV,nO,H_vo,cc_space_f_vo,w,cc_space_w_vvoo,t1) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do i = 1, nO + do a = 1, nV + H_vo(a,i) = cc_space_f_vo(a,i) + enddo + enddo + !$omp end do nowait + + ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) + ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) + + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + w(a,i,j,b) = cc_space_w_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('N',nV*nO, nO*nV, & + 1d0, w , size(w,1) * size(w,2), & + t1 , 1, & + 1d0, H_vo, 1) + + deallocate(w) + +end +#+END_SRC + +** R2 +*** R2 +#+begin_src f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2 + + ! internal + double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:) + double precision, allocatable :: A1(:,:,:,:), B1(:,:,:,:) + integer :: u,v,i,j,beta,gam,a,b + + allocate(g_occ(nO,nO), g_vir(nV,nV)) + allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV)) + allocate(A1(nO,nO,nO,nO)) + + call compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) + call compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) + call compute_A1(nO,nV,t1,t2,tau,A1) + call compute_J1(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & + cc_space_v_vvvo,cc_space_v_vvoo,J1) + call compute_K1(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & + cc_space_v_ovov,cc_space_v_vvov,K1) + + ! Residual + !r2 = 0d0 + + !$omp parallel & + !$omp shared(nO,nV,r2,cc_space_v_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = cc_space_v_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do j = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + A1(u,v,i,j) * tau(i,j,beta,gam) + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO*nO,nV*nV,nO*nO, & + 1d0, A1, size(A1,1) * size(A1,2), & + tau, size(tau,1) * size(tau,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do b = 1, nv + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + B1(a,b,beta,gam) * tau(u,v,a,b) + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + allocate(B1(nV,nV,nV,nV)) + call compute_B1(nO,nV,t1,t2,B1) + call dgemm('N','N',nO*nO,nV*nV,nV*nV, & + 1d0, tau, size(tau,1) * size(tau,2), & + B1 , size(B1,1) * size(B1,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + deallocate(B1) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + g_vir(a,beta) * t2(u,v,a,gam) & + ! + g_vir(a,gam) * t2(v,u,a,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_oovv(:,:,:,:),Y_oovv(:,:,:,:) + allocate(X_oovv(nO,nO,nV,nV),Y_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,t2,X_oovv) & + !$omp private(u,v,gam,a) & + !$omp default(none) + !$omp do collapse(3) + do a = 1, nV + do gam = 1, nV + do v = 1, nO + do u = 1, nO + X_oovv(u,v,gam,a) = t2(u,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nO*nV,nV,nV, & + 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + g_vir, size(g_vir,1), & + 0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(u,v,beta,gam) + Y_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - g_occ(u,i) * t2(i,v,beta,gam) & + ! - g_occ(v,i) * t2(i,u,gam,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO,nO*nV*nV,nO, & + 1d0, g_occ , size(g_occ,1), & + t2 , size(t2,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_oovv) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + cc_space_v_ovvv(u,a,beta,gam) * t1(v,a) & + ! + cc_space_v_ovvv(v,a,gam,beta) * t1(u,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vovv(:,:,:,:) + allocate(X_vovv(nV,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & + !$omp private(u,a,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do u = 1, nO + do a = 1, nV + X_vovv(a,u,beta,gam) = cc_space_v_ovvv(u,a,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1 , size(t1,1), & + X_vovv, size(X_vovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_ovov(u,a,i,gam) * t1(i,beta) * t1(v,a) & + ! - cc_space_v_ovov(v,a,i,beta) * t1(i,gam) * t1(u,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & + !$omp private(u,v,gam,i) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do u = 1, nO + do a = 1, nV + X_vovo(a,u,gam,i) = cc_space_v_ovov(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nV*nO*nV,nV,nO, & + 1d0, X_vovo, size(X_vovo,1) * size(X_vovo,2) * size(X_vovo,3), & + t1 , size(t1,1), & + 0d0, Y_vovv, size(Y_vovv,1) * size(Y_vovv,2) * size(Y_vovv,3)) + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1, size(t1,1), & + Y_vovv, size(Y_vovv,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(v,u,gam,beta) - X_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_vovv) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_oovo(u,v,beta,i) * t1(i,gam) & + ! - cc_space_v_oovo(v,u,gam,i) * t1(i,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO*nO*nV,nV,nO, & + 1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_ovvo(u,a,beta,i) * t1(v,a) * t1(i,gam) & + ! - cc_space_v_ovvo(v,a,gam,i) * t1(u,a) * t1(i,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: Y_oovo(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & + !$omp private(a,v,gam,i) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do v = 1, nO + do a = 1, nV + X_vovo(a,v,gam,i) = cc_space_v_ovvo(v,a,gam,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nO,nV, & + 1d0, t1, size(t1,1), & + X_vovo, size(X_vovo,1), & + 0d0, Y_oovo, size(Y_oovo,1)) + + call dgemm('N','N',nO*nO*nV, nV, nO, & + 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,gam,beta) - X_oovv(v,u,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_oovo) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) * & + ! (2d0 * t2(i,v,a,gam) - t2(i,v,gam,a)) & + ! + 0.5d0 * (2d0 * J1(v,a,gam,i) - K1(v,a,i,gam)) * & + ! (2d0 * t2(i,u,a,beta) - t2(i,u,beta,a)) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_voov(:,:,:,:), Z_ovov(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO), Y_voov(nV,nO,nO,nV),Z_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do a = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,a,i) = 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do gam = 1, nV + do v = 1, nO + do i = 1, nO + do a = 1, nV + Y_voov(a,i,v,gam) = 2d0 * t2(i,v,a,gam) - t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_voov, size(Y_voov,1) * size(Y_voov,2), & + 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo,Y_voov) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - 0.5d0 * K1(u,a,i,beta) * t2(i,v,gam,a) & + ! - 0.5d0 * K1(v,a,i,gam) * t2(i,u,beta,a) !P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_ovov(:,:,:,:),Y_ovov(:,:,:,:) + allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & + !$omp private(u,a,i,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do a = 1, nV + do i = 1, nO + X_ovov(i,a,u,beta) = 0.5d0 * K1(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do gam = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Y_ovov(i,a,v,gam) = t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('T','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - K1(u,a,i,gam) * t2(i,v,beta,a) & + ! - K1(v,a,i,beta) * t2(i,u,gam,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + !$omp parallel & + !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do a = 1, nV + do i = 1, nO + do gam = 1, nV + do u = 1, nO + X_ovov(u,gam,i,a) = K1(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do beta = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Z_ovov(i,a,v,beta) = t2(i,v,beta,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovov,Y_ovov,Z_ovov) + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r2) & + !$omp private(i,j,a,b) & + !$omp default(none) + !$omp do collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = -r2(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + max_r2 = 0d0 + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + if (dabs(r2(i,j,a,b)) > max_r2) then + max_r2 = dabs(r2(i,j,a,b)) + endif + enddo + enddo + enddo + enddo + + deallocate(g_occ,g_vir,J1,K1,A1) + +end +#+end_src + +*** Intermediates +**** A1 +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_A1(nO,nV,t1,t2,tau,A1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: A1(nO, nO, nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta + + !A1 = 0d0 + + !do j = 1, nO + ! do i = 1, nO + ! do v = 1, nO + ! do u = 1, nO + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + + ! do a = 1, nV + ! A1(u,v,i,j) = A1(u,v,i,j) & + ! + cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + ! + cc_space_v_vooo(a,v,i,j) * t1(u,a) + ! + ! do b = 1, nV + ! A1(u,v,i,j) = A1(u,v,i,j) + cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:) + allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO)) + + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + !$omp parallel & + !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + enddo + enddo + enddo + enddo + !$omp end do nowait + + ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do u = 1, nO + do a = 1, nV + X_vooo(a,u,i,j) = cc_space_v_ovoo(u,a,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + X_vooo, size(X_vooo,1), & + 0d0, Y_oooo, size(Y_oooo,1)) + + !$omp parallel & + !$omp shared(nO,nV,A1,Y_oooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = A1(u,v,i,j) + Y_oooo(v,u,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vooo,Y_oooo) + + ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a) + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + cc_space_v_vooo, size(cc_space_v_vooo,1), & + 1d0, A1 , size(A1,1)) + + ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + call dgemm('N','N', nO*nO, nO*nO, nV*nV, & + 1d0, tau , size(tau,1) * size(tau,2), & + cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & + 1d0, A1 , size(A1,1) * size(A1,2)) + +end +#+END_SRC + +**** B1 +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_B1(nO,nV,t1,t2,B1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: B1(nV, nV, nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !B1 = 0d0 + + !do gam = 1, nV + ! do beta = 1, nV + ! do b = 1, nV + ! do a = 1, nV + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + + ! do i = 1, nO + ! B1(a,b,beta,gam) = B1(a,b,beta,gam) & + ! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + ! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vvvo(:,:,:,:), Y_vvvv(:,:,:,:) + allocate(X_vvvo(nV,nV,nV,nO), Y_vvvv(nV,nV,nV,nV)) + + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + !$omp parallel & + !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) & + !$omp private(a,b,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do b = 1, nV + do a = 1, nV + X_vvvo(a,b,gam,i) = cc_space_v_vvov(a,b,i,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + call dgemm('N','N', nV*nV*nV, nV, nO, & + -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & + t1 , size(t1,1), & + 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) + + + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + call dgemm('N','N', nV*nV*nV, nV, nO, & + -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2) * size(X_vvvo,3), & + t1 , size(t1,1), & + 0d0, Y_vvvv, size(Y_vvvv,1) * size(Y_vvvv,2) * size(Y_vvvv,3)) + + !$omp parallel & + !$omp shared(nV,B1,Y_vvvv) & + !$omp private(a,b,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta,gam) = B1(a,b,beta,gam) + Y_vvvv(a,b,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vvvo,Y_vvvv) + +end +#+END_SRC + +**** g_occ +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_oo(nO, nO) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_occ(nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !g_occ = 0d0 + + !do i = 1, nO + ! do u = 1, nO + ! g_occ(u,i) = H_oo(u,i) + ! + ! do a = 1, nV + ! g_occ(u,i) = g_occ(u,i) + cc_space_f_vo(a,i) * t1(u,a) + ! + ! do j = 1, nO + ! g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) + ! enddo + ! + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO,nO,nV, & + 1d0, t1, size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, g_occ, size(g_occ,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_occ,H_oo, cc_space_v_ovoo,t1) & + !$omp private(i,j,a,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + H_oo(u,i) + enddo + enddo + !$omp end do + + !$omp do collapse(1) + do i = 1, nO + do j = 1, nO + do a = 1, nV + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end +#+END_SRC + +**** g_vir +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_vv(nV, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_vir(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !g_vir = 0d0 + + !do beta = 1, nV + ! do a = 1, nV + ! g_vir(a,beta) = H_vv(a,beta) + ! + ! do i = 1, nO + ! g_vir(a,beta) = g_vir(a,beta) - cc_space_f_vo(a,i) * t1(i,beta) + ! + ! do b = 1, nV + ! g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) + ! enddo + ! + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nV,nV,nO, & + -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & + t1 , size(t1,1), & + 0d0, g_vir, size(g_vir,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) & + !$omp private(i,b,a,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta) + enddo + enddo + !$omp end do + + !$omp do collapse(1) + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end +#+END_SRC + +**** J1 +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: J1(nO, nV, nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !J1 = 0d0 + + !do i = 1, nO + ! do beta = 1, nV + ! do a = 1, nV + ! do u = 1, nO + ! J1(u,a,beta,i) = cc_space_v_ovvo(u,a,beta,i) + + ! do j = 1, nO + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! - cc_space_v_ovoo(u,a,j,i) * t1(j,beta) + ! enddo + + ! do b = 1, nV + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) + ! enddo + + ! do j = 1, nO + ! do b = 1, nV + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! - cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + ! + 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) + allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) + + !$omp parallel & + !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & + !$omp private(i,j,a,u,beta) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = v_ovvo(u,a,beta,i) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do a = 1, nV + do u = 1, nO + X_ovoo(u,a,i,j) = v_ovoo(u,a,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & + t1 , size(t1,1), & + 0d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2) * size(Y_ovov,3)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Y_ovov) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Y_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(X_ovoo) + + ! v_vvvo(b,a,beta,i) * t1(u,b) + call dgemm('N','N',nO,nV*nV*nO,nV, & + 1d0, t1 , size(t1,1), & + v_vvvo, size(v_vvvo,1), & + 1d0, J1 , size(J1,1)) + + !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:) + allocate(X_voov(nV,nO,nO,nV), Z_ovvo(nO,nV,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & + !$omp private(i,beta,a,u,b,j) & + !$omp default(none) + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do beta = 1, nV + do u = 1, nO + Y_ovov(u,beta,j,b) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,j,b) = v_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nO*nV, & + -1d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + X_voov, size(X_voov,1) * size(X_voov,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + deallocate(X_voov) + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_vovo(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO),Y_vovo(nV,nO,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do i = 1, nO + do a = 1, nV + Y_vovo(a,i,b,j) = 0.5d0 * (2d0 * v_vvoo(a,b,i,j) - v_vvoo(b,a,i,j)) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,b,j) = t2(u,j,beta,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo,Z_ovvo,Y_ovov) + +end +#+END_SRC + +**** K1 +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV) + double precision, intent(in) :: v_vvov(nV,nV,nO,nV), v_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: K1(nO, nV, nO, nV) + + double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !K1 = 0d0 + + !do beta = 1, nV + ! do i = 1, nO + ! do a = 1, nV + ! do u = 1, nO + ! K1(u,a,i,beta) = cc_space_v_ovov(u,a,i,beta) + + ! do j = 1, nO + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! - cc_space_v_ovoo(u,a,i,j) * t1(j,beta) + ! enddo + + ! do b = 1, nV + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) + ! enddo + + ! do j = 1, nO + ! do b = 1, nV + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! - cc_space_v_vvoo(b,a,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do i = 1, nO + do a = 1, nV + do j = 1, nO + do b = 1, nV + X(b,j,a,i) = - v_vvoo(b,a,i,j) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do beta = 1, nV + do u = 1, nO + Y(u,beta,b,j) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, v_ovoo, size(v_ovoo,1) * size(v_ovoo,2) * size(v_ovoo,3), & + t1 , size(t1,1), & + 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) + + call dgemm('N','N',nO,nV*nO*nV,nV, & + 1d0, t1 , size(t1,1), & + v_vvov, size(v_vvov,1), & + 1d0, K1 , size(K1,1)) + + ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i) + call dgemm('N','N',nV*nO,nO*nV,nV*nO, & + 1d0, Y, size(Y,1) * size(Y,2), & + X, size(X,1) * size(X,2), & + 0d0, Z, size(Z,1) * size(Z,2)) + + !$omp parallel & + !$omp shared(nO,nV,K1,Z) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = K1(u,a,i,beta) + Z(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X,Y,Z) + +end +#+END_SRC + diff --git a/src/ccsd/org/ccsd_spin_orb.org b/src/ccsd/org/ccsd_spin_orb.org new file mode 100644 index 00000000..8fb403c3 --- /dev/null +++ b/src/ccsd/org/ccsd_spin_orb.org @@ -0,0 +1,2352 @@ +* CCSD spin orb +** Ref +A direct product decomposition approach for symmetry exploitation in manybody +methods. I. Energy calculations +John F. Stanton, Jürgen Gauss, John D. Watts, and Rodney J. Bartlett +The Journal of Chemical Physics 94, 4334 (1991) +http://dx.doi.org/10.1063/1.460620A + +** Prog +#+begin_src f90 :comments org :tangle ccsd_spin_orb.irp.f +program ccsd + + implicit none + + BEGIN_DOC + ! CCSD in spin orbitals + END_DOC + + read_wf = .True. + touch read_wf + + call run_ccsd_spin_orb + +end +#+end_src + +** Code +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine run_ccsd_spin_orb + + implicit none + + BEGIN_DOC + ! CCSD in spin orbitals + END_DOC + + double precision, allocatable :: t1(:,:), t2(:,:,:,:), tau(:,:,:,:), tau_t(:,:,:,:) + double precision, allocatable :: r1(:,:), r2(:,:,:,:) + double precision, allocatable :: cF_oo(:,:), cF_ov(:,:), cF_vv(:,:) + double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:), cW_vvvv(:,:,:,:) + + double precision, allocatable :: f_oo(:,:), f_ov(:,:), f_vv(:,:), f_o(:), f_v(:) + double precision, allocatable :: v_oooo(:,:,:,:), v_vooo(:,:,:,:), v_ovoo(:,:,:,:) + double precision, allocatable :: v_oovo(:,:,:,:), v_ooov(:,:,:,:), v_vvoo(:,:,:,:) + double precision, allocatable :: v_vovo(:,:,:,:), v_voov(:,:,:,:), v_ovvo(:,:,:,:) + double precision, allocatable :: v_ovov(:,:,:,:), v_oovv(:,:,:,:), v_vvvo(:,:,:,:) + double precision, allocatable :: v_vvov(:,:,:,:), v_vovv(:,:,:,:), v_ovvv(:,:,:,:) + double precision, allocatable :: v_vvvv(:,:,:,:) + + double precision, allocatable :: all_err(:,:), all_t(:,:) + + logical :: not_converged + integer, allocatable :: list_occ(:,:), list_vir(:,:) + integer :: nO,nV,nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4) + integer :: nb_iter, i,j,a,b + double precision :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi + integer(bit_kind) :: det(N_int,2) + + det = psi_det(:,:,cc_ref) + print*,'Reference determinant:' + call print_det(det,N_int) + + ! Extract number of occ/vir alpha/beta spin orbitals + !call extract_n_spin(det,n_spin) + nOa = cc_nOa !n_spin(1) + nOb = cc_nOb !n_spin(2) + nVa = cc_nVa !n_spin(3) + nVb = cc_nVb !n_spin(4) + + ! Total number of occ/vir spin orb + nO = cc_nOab !nOa + nOb + nV = cc_nVab !nVa + nVb + ! Debug + !print*,nO,nV + + ! Number of occ/vir spin orb per spin + nO_S = cc_nO_S !(/nOa,nOb/) + nV_S = cc_nV_S !(/nVa,nVb/) + ! Debug + !print*,nO_S,nV_S + + ! Maximal number of occ/vir + nO_m = cc_nO_m !max(nOa, nOb) + nV_m = cc_nV_m !max(nVa, nVb) + ! Debug + !print*,nO_m,nV_m + + allocate(list_occ(nO_m,2), list_vir(nV_m,2)) + list_occ = cc_list_occ_spin + list_vir = cc_list_vir_spin + ! Debug + !call extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir) + !print*,list_occ(:,1) + !print*,list_occ(:,2) + !print*,list_vir(:,1) + !print*,list_vir(:,2) + + ! Allocation + allocate(t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV), tau_t(nO,nO,nV,nV)) + allocate(r1(nO,nV), r2(nO,nO,nV,nV)) + allocate(cF_oo(nO,nO), cF_ov(nO,nV), cF_vv(nV,nV)) + allocate(cW_oooo(nO,nO,nO,nO), cW_ovvo(nO,nV,nV,nO))!, cW_vvvv(nV,nV,nV,nV)) + allocate(v_oooo(nO,nO,nO,nO)) + !allocate(v_vooo(nV,nO,nO,nO)) + allocate(v_ovoo(nO,nV,nO,nO)) + allocate(v_oovo(nO,nO,nV,nO)) + allocate(v_ooov(nO,nO,nO,nV)) + allocate(v_vvoo(nV,nV,nO,nO)) + !allocate(v_vovo(nV,nO,nV,nO)) + !allocate(v_voov(nV,nO,nO,nV)) + allocate(v_ovvo(nO,nV,nV,nO)) + allocate(v_ovov(nO,nV,nO,nV)) + allocate(v_oovv(nO,nO,nV,nV)) + !allocate(v_vvvo(nV,nV,nV,nO)) + !allocate(v_vvov(nV,nV,nO,nV)) + !allocate(v_vovv(nV,nO,nV,nV)) + !allocate(v_ovvv(nO,nV,nV,nV)) + !allocate(v_vvvv(nV,nV,nV,nV)) + allocate(f_o(nO), f_v(nV)) + allocate(f_oo(nO, nO)) + allocate(f_ov(nO, nV)) + allocate(f_vv(nV, nV)) + + ! Allocation for the diis + if (cc_update_method == 'diis') then + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + all_err = 0d0 + all_t = 0d0 + endif + + ! Fock elements + call gen_f_spin(det, nO_m,nO_m, nO_S,nO_S, list_occ,list_occ, nO,nO, f_oo) + call gen_f_spin(det, nO_m,nV_m, nO_S,nV_S, list_occ,list_vir, nO,nV, f_ov) + call gen_f_spin(det, nV_m,nV_m, nV_S,nV_S, list_vir,list_vir, nV,nV, f_vv) + + ! Diag elements + do i = 1, nO + f_o(i) = f_oo(i,i) + enddo + do i = 1, nV + f_v(i) = f_vv(i,i) + enddo + + ! Bi electronic integrals from list + call wall_time(ti) + ! OOOO + call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, list_occ,list_occ,list_occ,list_occ, nO,nO,nO,nO, v_oooo) + + ! OOO V + !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, list_vir,list_occ,list_occ,list_occ, nV,nO,nO,nO, v_vooo) + call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, list_occ,list_vir,list_occ,list_occ, nO,nV,nO,nO, v_ovoo) + call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, list_occ,list_occ,list_vir,list_occ, nO,nO,nV,nO, v_oovo) + call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, list_occ,list_occ,list_occ,list_vir, nO,nO,nO,nV, v_ooov) + + ! OO VV + call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, list_vir,list_vir,list_occ,list_occ, nV,nV,nO,nO, v_vvoo) + !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, list_vir,list_occ,list_vir,list_occ, nV,nO,nV,nO, v_vovo) + !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, list_vir,list_occ,list_occ,list_vir, nV,nO,nO,nV, v_voov) + call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, list_occ,list_vir,list_vir,list_occ, nO,nV,nV,nO, v_ovvo) + call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, list_occ,list_vir,list_occ,list_vir, nO,nV,nO,nV, v_ovov) + call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, list_occ,list_occ,list_vir,list_vir, nO,nO,nV,nV, v_oovv) + + ! O VVV + !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, list_vir,list_vir,list_vir,list_occ, nV,nV,nV,nO, v_vvvo) + !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, list_vir,list_vir,list_occ,list_vir, nV,nV,nO,nV, v_vvov) + !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, list_vir,list_occ,list_vir,list_vir, nV,nO,nV,nV, v_vovv) + !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, list_occ,list_vir,list_vir,list_vir, nO,nV,nV,nV, v_ovvv) + + ! VVVV + !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, list_vir,list_vir,list_vir,list_vir, nV,nV,nV,nV, v_vvvv) + call wall_time(tf) + if (cc_dev) then + print*,'Load bi elec int:',tf-ti,'s' + endif + + ! Init of T + t1 = 0d0 + call guess_t1(nO,nV,f_o,f_v,f_ov,t1) + call guess_t2(nO,nV,f_o,f_v,v_oovv,t2) + call compute_tau_spin(nO,nV,t1,t2,tau) + call compute_tau_t_spin(nO,nV,t1,t2,tau_t) + + ! Loop init + nb_iter = 0 + not_converged = .True. + r1 = 0d0 + r2 = 0d0 + max_r1 = 0d0 + max_r2 = 0d0 + + call det_energy(det,uncorr_energy) + print*,'Det energy', uncorr_energy + call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + print*,'guess energy', uncorr_energy+energy, energy + + write(*,'(A77)') ' -----------------------------------------------------------------------------' + write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' + write(*,'(A77)') ' -----------------------------------------------------------------------------' + + call wall_time(ta) + + ! Loop + do while (not_converged) + + ! Intermediates + call wall_time(tbi) + call wall_time(ti) + call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) + call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) + call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,cF_vv) + call wall_time(tf) + if (cc_dev) then + print*,'Compute cFs:',tf-ti,'s' + endif + + call wall_time(ti) + call compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) + call compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) + !call compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) + call wall_time(tf) + if (cc_dev) then + print*,'Compute cFs:',tf-ti,'s' + endif + + ! Residuals + call wall_time(ti) + call compute_r1_spin(nO,nV,t1,t2,f_o,f_v,F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) + call wall_time(tf) + if (cc_dev) then + print*,'Compute r1:',tf-ti,'s' + endif + call wall_time(ti) + call compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) + call wall_time(tf) + if (cc_dev) then + print*,'Compute r2:',tf-ti,'s' + endif + + ! Max elements in the residuals + max_r1 = maxval(abs(r1(:,:))) + max_r2 = maxval(abs(r2(:,:,:,:))) + max_r = max(max_r1,max_r2) + + call wall_time(ti) + ! Update + if (cc_update_method == 'diis') then + !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + call update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + call update_t1(nO,nV,f_o,f_v,r1,t1) + call update_t2(nO,nV,f_o,f_v,r2,t2) + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + + call compute_tau_spin(nO,nV,t1,t2,tau) + call compute_tau_t_spin(nO,nV,t1,t2,tau_t) + call wall_time(tf) + if (cc_dev) then + print*,'Update:',tf-ti,'s' + endif + + ! Print + call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + call wall_time(tfi) + + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', & + uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + if (cc_dev) then + print*,'Total:',tfi-tbi,'s' + endif + + ! Convergence + nb_iter = nb_iter + 1 + if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then + not_converged = .False. + endif + + enddo + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + print*,'' + if (max_r < cc_thresh_conv) then + write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations' + else + write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations' + endif + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' + write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + print*,'' + + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + + ! Deallocate + if (cc_update_method == 'diis') then + deallocate(all_err,all_t) + endif + deallocate(tau,tau_t) + deallocate(r1,r2) + deallocate(cF_oo,cF_ov,cF_vv) + deallocate(cW_oooo,cW_ovvo)!,cW_vvvv) + deallocate(v_oooo) + deallocate(v_ovoo,v_oovo) + deallocate(v_ovvo,v_ovov,v_oovv) + + if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then + double precision :: t_corr + print*,'CCSD(T) calculation...' + call wall_time(ta) + !allocate(v_vvvo(nV,nV,nV,nO)) + !call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & + ! cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + ! cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + ! nV,nV,nV,nO, v_vvvo) + + !call ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,t_corr) + call ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,t_corr) + !print*,'Working on it...' + !call abort + call wall_time(tb) + print*,'Done' + print*,'Time: ',tb-ta, ' s' + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + t_corr, ' Ha' + write(*,'(A15,F18.12,A3)') ' E(T) = ', t_corr, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + t_corr, ' Ha' + print*,'' + endif + print*,'Reference determinant:' + call print_det(det,N_int) + + deallocate(f_oo,f_ov,f_vv,f_o,f_v) + deallocate(v_ooov,v_vvoo,t1,t2) + !deallocate(v_ovvv,v_vvvo,v_vovv) + !deallocate(v_vvvv) + +end +#+end_src + +* Energy +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine ccsd_energy_spin(nO,nV,t1,t2,Fov,v_oovv,energy) + + implicit none + + BEGIN_DOC + ! CCSD energy in spin orbitals + END_DOC + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: energy + + integer :: i,j,a,b + + + energy = 0d0 + + do i=1,nO + do a=1,nV + energy = energy + Fov(i,a) * t1(i,a) + end do + end do + + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + energy = energy & + + 0.5d0 * v_oovv(i,j,a,b) * t1(i,a) * t1(j,b) & + + 0.25d0 * v_oovv(i,j,a,b) * t2(i,j,a,b) + end do + end do + end do + end do + +end +#+end_src + +* T +** Update +*** Tau +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_tau_spin(nO,nV,t1,t2,tau) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + + double precision,intent(out) :: tau(nO,nO,nV,nV) + + integer :: i,j,k,l + integer :: a,b,c,d + + !$OMP PARALLEL & + !$OMP SHARED(tau,t1,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +*** Tau_t +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_tau_t_spin(nO,nV,t1,t2,tau_t) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + + double precision,intent(out) :: tau_t(nO,nO,nV,nV) + + integer :: i,j,k,l + integer :: a,b,c,d + + !$OMP PARALLEL & + !$OMP SHARED(tau_t,t1,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + tau_t(i,j,a,b) = t2(i,j,a,b) + 0.5d0*(t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a)) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +* R +** R1 +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: f_o(nO), f_v(nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: cF_oo(nO,nO) + double precision,intent(in) :: cF_ov(nO,nV) + double precision,intent(in) :: cF_vv(nV,nV) + double precision,intent(in) :: v_oovo(nO,nO,nV,nO) + double precision,intent(in) :: v_ovov(nO,nV,nO,nV) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: r1(nO,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + !double precision, allocatable :: X_vovv(:,:,:,:) + double precision, allocatable :: X_oovv(:,:,:,:) + double precision :: accu + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,Fov,cF_vv,cF_ov, & + !$OMP v_ovov,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(1) + do a=1,nV + do i=1,nO + r1(i,a) = Fov(i,a) + do e=1,nV + do m=1,nO + r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e) + end do + end do + do f=1,nV + do n=1,nO + r1(i,a) = r1(i,a) - t1(n,f)*v_ovov(n,a,i,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !do a=1,nV + ! do i=1,nO + ! do e=1,nV + ! r1(i,a) = r1(i,a) + t1(i,e)*cF_vv(a,e) + ! end do + ! end do + !end do + call dgemm('N','T', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + cF_vv, size(cF_vv,1), & + 1d0, r1 , size(r1,1)) + + !do a=1,nV + ! do i=1,nO + ! do m=1,nO + ! r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i) + ! end do + ! end do + !end do + call dgemm('T','N', nO, nV, nO, & + -1d0, cF_oo, size(cF_oo,1), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + + !do a=1,nV + ! do i=1,nO + ! do f=1,nV + ! do e=1,nV + ! do m=1,nO + ! r1(i,a) = r1(i,a) - 0.5d0*t2(i,m,e,f)*v_ovvv(m,a,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + + !allocate(X_vovv(nV,nO,nV,nV)) + double precision, allocatable :: v_ovvf(:,:,:), X_vovf(:,:,:) + allocate(v_ovvf(nO,nV,nV),X_vovf(nV,nO,nV)) + + do f = 1, nV + call gen_v_spin_3idx(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovvf) + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,X_vovf,v_ovvf,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + + !$OMP DO collapse(3) + !do f = 1, nV + do e = 1, nV + do m = 1, nO + do a = 1, nV + !X_vovv(a,m,e,f) = v_ovvv(m,a,e,f) + X_vovf(a,m,e) = v_ovvf(m,a,e) + enddo + enddo + enddo + !enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','T', nO, nV, nO*nV, & + -0.5d0, t2(1,1,1,f), size(t2,1), & + X_vovf, size(X_vovf,1), & + 1d0 , r1 , size(r1,1)) + enddo + + !call dgemm('N','T', nO, nV, nO*nV*nV, & + ! -0.5d0, t2 , size(t2,1), & + ! X_vovv, size(X_vovv,1), & + ! 1d0 , r1 , size(r1,1)) + + deallocate(X_vovf) + !deallocate(X_vovv) + allocate(X_oovv(nO,nO,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,X_oovv, & + !$OMP f_o,f_v,v_oovo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + !do a=1,nV + ! do i=1,nO + ! do e=1,nV + ! do m=1,nO + ! do n=1,nO + ! r1(i,a) = r1(i,a) - 0.5d0*t2(m,n,a,e)*v_oovo(n,m,e,i) + ! end do + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(3) + do a = 1, nV + do e = 1, nV + do m = 1, nO + do n = 1, nO + X_oovv(n,m,e,a) = t2(m,n,a,e) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -0.5d0, v_oovo, size(v_oovo,1) * size(v_oovo,2) * size(v_oovo,3), & + X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + 1d0 , r1 , size(r1,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,X_oovv,f_o,f_v,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do a = 1, nV + do i = 1, nO + r1(i,a) = (f_o(i)-f_v(a)) * t1(i,a) - r1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_oovv) + +end +#+end_src + +** R2 +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: cF_oo(nO,nO) + double precision,intent(in) :: cF_ov(nO,nV) + double precision,intent(in) :: cF_vv(nV,nV) + double precision,intent(in) :: f_o(nO), f_v(nV) + double precision,intent(in) :: cW_oooo(nO,nO,nO,nO) + !double precision,intent(in) :: cW_vvvv(nV,nV,nV,nV) + double precision,intent(in) :: cW_ovvo(nO,nV,nV,nO) + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_ovoo(nO,nV,nO,nO) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_ovvo(nO,nV,nV,nO) + !double precision,intent(in) :: v_vvvo(nV,nV,nV,nO)!, v_vovv(nV,nO,nV,nV) + + double precision,intent(out) :: r2(nO,nO,nV,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: X_vvoo(:,:,:,:) + !double precision, allocatable :: A_vvov(:,:,:,:) + double precision, allocatable :: X_oovv(:,:,:,:), Y_oovv(:,:,:,:) + double precision, allocatable :: A_vvoo(:,:,:,:), B_ovoo(:,:,:,:), C_ovov(:,:,:,:) + double precision, allocatable :: A_ovov(:,:,:,:), B_ovvo(:,:,:,:), X_ovvo(:,:,:,:) + double precision, allocatable :: A_vv(:,:) + double precision, allocatable :: A_oo(:,:), B_oovv(:,:,:,:) + double precision, allocatable :: A_vbov(:,:,:), X_vboo(:,:,:), v_vbvo(:,:,:) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + ! r2(i,j,a,b) = v_oovv(i,j,a,b) + ! end do + ! end do + ! end do + !end do + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(i,j,a,e)*cF_vv(b,e) + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,j,b,e)*cF_vv(a,e) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(X_oovv(nO,nO,nV,nV)) + call dgemm('N','T',nO*nO*nV, nV, nV, & + 1d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + cF_VV , size(cF_vv,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = v_oovv(i,j,a,b) + X_oovv(i,j,a,b) - X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !deallocate(X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_vv(nV,nV))!, X_oovv(nO,nO,nV,nV)) + call dgemm('T','N', nV, nV, nO, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_vv , size(A_vv,1)) + + call dgemm('N','T', nO*nO*nV, nV, nV, & + 0.5d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + A_vv , size(A_vv,1), & + 0d0 , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vv)!,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j) + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(Y_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + !$OMP PARALLEL & + !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do i=1,nO + do m=1,nO + X_oovv(m,i,a,b) = t2(i,m,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nO*nV*nV, nO, & + 1d0, cF_oo , size(cF_oo,1), & + X_oovv, size(X_oovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(Y_oovv)!,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + + call dgemm('N','T', nO, nO, nV, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_oo , size(A_oo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(t2,B_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do i = 1, nO + do m = 1, nO + B_oovv(m,i,a,b) = t2(i,m,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nO*nV*nV, nO, & + 0.5d0, A_oo, size(A_oo,1), & + B_oovv, size(B_oovv,1), & + 0d0 , X_oovv, size(X_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(A_oo,B_oovv,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do n=1,nO + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(m,n,a,b)*cW_oooo(m,n,i,j) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + call dgemm('T','N', nO*nO, nV*nV, nO*nO, & + 0.5d0, cW_oooo, size(cW_oooo,1) * size(cW_oooo,2), & + tau , size(tau,1) * size(tau,2), & + 1d0 , r2 , size(r2,1) * size(r2,2)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do f=1,nV + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + !call dgemm('N','T', nO*nO, nV*nV, nV*nV, & + ! 0.5d0, tau , size(tau,1) * size(tau,2), & + ! cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2), & + ! 1d0 , r2 , size(r2,1) * size(r2,2)) + double precision :: ti,tf + call wall_time(ti) + call use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) + call wall_time(tf) + if (cc_dev) then + print*,'cW_vvvv:',tf-ti,'s' + endif + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) & + ! + t2(i,m,a,e)*cW_ovvo(m,b,e,j) & + ! - t2(j,m,a,e)*cW_ovvo(m,b,e,i) & + ! - t2(i,m,b,e)*cW_ovvo(m,a,e,j) & + ! + t2(j,m,b,e)*cW_ovvo(m,a,e,i) & + ! - t1(i,e)*t1(m,a)*v_ovvo(m,b,e,j) & + ! + t1(j,e)*t1(m,a)*v_ovvo(m,b,e,i) & + ! + t1(i,e)*t1(m,b)*v_ovvo(m,a,e,j) & + ! - t1(j,e)*t1(m,b)*v_ovvo(m,a,e,i) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_ovov(nO,nV,nO,nV), B_ovvo(nO,nV,nV,nO), X_ovvo(nO,nV,nV,nO)) + !$OMP PARALLEL & + !$OMP SHARED(t2,A_ovov,B_ovvo,cW_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do a = 1, nV + do i = 1, nO + do e = 1, nV + do m = 1, nO + A_ovov(m,e,i,a) = t2(i,m,a,e) + end do + end do + end do + end do + !$OMP END DO NOWAIT + !$OMP DO collapse(3) + do j = 1, nO + do b = 1, nV + do e = 1, nV + do m = 1, nO + B_ovvo(m,e,b,j) = cW_ovvo(m,b,e,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO*nV, nV*nO, nO*nV, & + 1d0, A_ovov, size(A_ovov,1) * size(A_ovov,2), & + B_ovvo, size(B_ovvo,1) * size(B_ovvo,2), & + 0d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = r2(i,j,a,b) + X_ovvo(i,a,b,j) - X_ovvo(j,a,b,i) & + - X_ovvo(i,b,a,j) + X_ovvo(j,b,a,i) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_ovov,B_ovvo,X_ovvo) + allocate(A_vvoo(nV,nV,nO,nO), B_ovoo(nO,nV,nO,nO), C_ovov(nO,nV,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(A_vvoo,v_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do m = 1, nO + do j = 1, nO + do b = 1, nV + do e = 1, nV + A_vvoo(e,b,j,m) = v_ovvo(m,b,e,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nV*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + A_vvoo, size(A_vvoo,1), & + 0d0, B_ovoo, size(B_ovoo,1)) + + call dgemm('N','N', nO*nV*nO, nV, nO, & + 1d0, B_ovoo, size(B_ovoo,1) * size(B_ovoo,2) * size(B_ovoo,3), & + t1 , size(t1,1), & + 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2) * size(C_ovov,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,C_ovov,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - C_ovov(i,b,j,a) + C_ovov(j,b,i,a) & + + C_ovov(i,a,j,b) - C_ovov(j,a,i,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vvoo, B_ovoo, C_ovov) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + t1(i,e)*v_vvvo(a,b,e,j) - t1(j,e)*v_vvvo(a,b,e,i) + ! end do + + ! end do + ! end do + ! end do + !end do + !allocate(A_vvov(nV,nV,nO,nV), X_vvoo(nV,nV,nO,nO)) + allocate(A_vbov(nV,nO,nV), X_vboo(nV,nO,nO), v_vbvo(nV,nV,nO)) + do b = 1, nV + + call gen_v_spin_3idx_i_kl(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, b, cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + nV,nV,nO, v_vbvo) + + !$OMP PARALLEL & + !$OMP SHARED(b,A_vbov,v_vbvo,nO,nV) & + !$OMP PRIVATE(i,j,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do e = 1, nV + do j = 1, nO + !do b = 1, nV + do a = 1, nV + !A_vvov(a,b,j,e) = v_vvvo(a,b,e,j) + A_vbov(a,j,e) = v_vbvo(a,e,j) + enddo + !enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','T', nV*nO, nO, nV, & + 1d0, A_vbov, size(A_vbov,1) * size(A_vbov,2), & + t1 , size(t1,1), & + 0d0, X_vboo, size(X_vboo,1) * size(X_vboo,2)) + !call dgemm('N','T', nV*nV*nO, nO, nV, & + ! 1d0, A_vvov, size(A_vvov,1) * size(A_vvov,2) * size(A_vvov,3), & + ! t1 , size(t1,1), & + ! 0d0, X_vvoo, size(X_vvoo,1) * size(X_vvoo,2) * size(X_vvoo,3)) + + !$OMP PARALLEL & + !$OMP SHARED(b,r2,X_vboo,nO,nV) & + !$OMP PRIVATE(i,j,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + !do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, NO + !r2(i,j,a,b ) = r2(i,j,a,b) + X_vvoo(a,b,j,i) - X_vvoo(a,b,i,j) + r2(i,j,a,b) = r2(i,j,a,b) + X_vboo(a,j,i) - X_vboo(a,i,j) + enddo + enddo + enddo + !enddo + !$OMP END DO + !$OMP END PARALLEL + enddo + + !deallocate(A_vvov)!,X_vvoo) + deallocate(A_vbov, X_vboo, v_vbvo) + allocate(X_vvoo(nV,nV,nO,nO)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t1(m,a)*v_ovoo(m,b,i,j) + t1(m,b)*v_ovoo(m,a,i,j) + ! end do + + ! end do + ! end do + ! end do + !end do + !allocate(X_vvoo(nV,nV,nO,nO)) + + call dgemm('T','N', nV, nV*nO*nO, nO, & + 1d0, t1 , size(t1,1), & + v_ovoo, size(v_ovoo,1), & + 0d0, X_vvoo, size(X_vvoo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_vvoo,f_o,f_v,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_vvoo(a,b,i,j) + X_vvoo(b,a,i,j) + end do + end do + end do + end do + !$OMP END DO + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = (f_o(i)+f_o(j)-f_v(a)-f_v(b)) * t2(i,j,a,b) - r2(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_vvoo) + +end +#+end_src + +* Use intermediates +** Use cF_oo +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2) + + implicit none + + integer,intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau_t(nO,nO,nV,nV) + double precision, intent(in) :: F_oo(nO,nV), F_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV), v_oovv(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_oo(:,:), X_oovv(:,:,:,:),Y_oovv(:,:,:,:) + integer :: i,j,m,a,b + + allocate(cF_oo(nO,nO)) + + call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) + + !do a=1,nV + ! do i=1,nO + ! do m=1,nO + ! r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i) + ! end do + ! end do + !end do + call dgemm('T','N', nO, nV, nO, & + -1d0, cF_oo, size(cF_oo,1), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j) + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i) + ! end do + + ! end do + ! end do + ! end do + !end do + + allocate(Y_oovv(nO,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + !$OMP PARALLEL & + !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do i=1,nO + do m=1,nO + X_oovv(m,i,a,b) = t2(i,m,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nO*nV*nV, nO, & + 1d0, cF_oo , size(cF_oo,1), & + X_oovv, size(X_oovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(cF_oo,X_oovv,Y_oovv) + +end +#+end_src + +** Use cF_ov +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: F_ov(nO,nV), v_oovv(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_ov(:,:), A_oo(:,:), A_vv(:,:) + double precision, allocatable :: X_oovv(:,:,:,:), B_oovv(:,:,:,:) + integer :: i,j,a,b,e,m + + allocate(cF_ov(nO,nV)) + + call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t2,cF_ov,nO,nV) & + !$OMP PRIVATE(i,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(1) + do a=1,nV + do i=1,nO + do e=1,nV + do m=1,nO + r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_vv(nV,nV), X_oovv(nO,nO,nV,nV)) + call dgemm('T','N', nV, nV, nO, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_vv , size(A_vv,1)) + + call dgemm('N','T', nO*nO*nV, nV, nV, & + 0.5d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + A_vv , size(A_vv,1), & + 0d0 , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,r2,X_oovv) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + + call dgemm('N','T', nO, nO, nV, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_oo , size(A_oo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(t2,B_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do i = 1, nO + do m = 1, nO + B_oovv(m,i,a,b) = t2(i,m,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nO*nV*nV, nO, & + 0.5d0, A_oo, size(A_oo,1), & + B_oovv, size(B_oovv,1), & + 0d0 , X_oovv, size(X_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(cF_ov,A_oo,B_oovv,X_oovv) + +end +#+end_src + +** Use cF_vv +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine use_cF_vv(nO,nV,t1,t2,r1,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_vv(:,:) + integer :: i,j,a,b,e,m + + allocate(cF_vv(nV,nV)) + + !call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,v_ovvv,cF_vv) + + deallocate(cF_vv) + +end +#+end_src + +** Use cW_vvvd +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: v_oovv(nO,nO,nV,nV) + !double precision, intent(in) :: v_vovv(nV,nO,nV,nV) + + double precision, intent(inout) :: r2(nO,nO,nV,nV) + + double precision, allocatable :: cW_vvvf(:,:,:), v_vvvf(:,:,:), tau_f(:,:,:), v_vovf(:,:,:) + integer :: i,j,e,f + double precision :: ti,tf + + allocate(cW_vvvf(nV,nV,nV),v_vvvf(nV,nV,nV),tau_f(nO,nO,nV),v_vovf(nV,nO,nV)) + + !PROVIDE cc_nVab + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do f=1,nV + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + do f = 1, nV + call wall_time(ti) + !$OMP PARALLEL & + !$OMP SHARED(tau,tau_f,f,nO,nV) & + !$OMP PRIVATE(i,j,e) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do e = 1, nV + do j = 1, nO + do i = 1, nO + tau_f(i,j,e) = tau(i,j,e,f) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'1st transpo', tf-ti + endif + + call wall_time(ti) + call gen_v_spin_3idx(cc_nV_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nV,nV,nV, v_vvvf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'vvvf', tf-ti + endif + call wall_time(ti) + call gen_v_spin_3idx(cc_nV_m,cc_nO_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nO_S,cc_nV_S,cc_nV_S, & + cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, & + nV,nO,nV, v_vovf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'vovf', tf-ti + endif + + call wall_time(ti) + call compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'cW_vvvf', tf-ti + endif + + call wall_time(ti) + call dgemm('N','T', nO*nO, nV*nV, nV, & + 0.5d0, tau_f , size(tau_f,1) * size(tau_f,2), & + cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2), & + 1d0 , r2 , size(r2,1) * size(r2,2)) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'last dgemm', tf-ti + endif + enddo + + deallocate(cW_vvvf,v_vvvf,v_vovf) + +end +#+end_src + +* Intermediates +** cF +*** cF_oo +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: tau_t(nO,nO,nV,nV) + double precision,intent(in) :: Foo(nO,nO) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: v_ooov(nO,nO,nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cF_oo(nO,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision,external :: Kronecker_Delta + + !$OMP PARALLEL & + !$OMP SHARED(cF_oo,Foo,t1,v_ooov,nO,nV) & + !$OMP PRIVATE(i,m,n,e) & + !$OMP DEFAULT(NONE) + + !do i=1,nO + ! do m=1,nO + ! cF_oo(m,i) = (1d0 - Kronecker_delta(m,i))*Foo(m,i) + ! end do + !end do + !$OMP DO collapse(1) + do i=1,nO + do m=1,nO + cF_oo(m,i) = Foo(m,i) + end do + end do + !$OMP END DO + !$OMP DO + do i = 1, nO + cF_oo(i,i) = 0d0 + end do + !$OMP END DO + + do e=1,nV + do n=1,nO + !$OMP DO collapse(1) + do i=1,nO + do m=1,nO + cF_oo(m,i) = cF_oo(m,i) + t1(n,e)*v_ooov(m,n,i,e) + end do + end do + !$OMP END DO + end do + end do + !$OMP END PARALLEL + + !do i=1,nO + ! do m=1,nO + ! do e=1,nV + ! cF_oo(m,i) = cF_oo(m,i) + 0.5d0*t1(i,e)*Fov(m,e) + ! end do + ! end do + !end do + call dgemm('N','T', nO, nO, nV,& + 0.5d0, Fov , size(Fov,1), & + t1 , size(t1,1), & + 1d0 , cF_oo, size(cF_oo,1)) + + !do i=1,nO + ! do m=1,nO + ! do f=1,nV + ! do e=1,nV + ! do n=1,nO + ! cF_oo(m,i) = cF_oo(m,i) + 0.5d0*tau_t(i,n,e,f)*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + call dgemm('N','T', nO, nO, nO*nV*nV, & + 0.5d0, v_oovv, size(v_oovv,1), & + tau_t , size(tau_t,1), & + 1d0 , cF_oo , size(cF_oo,1)) + +end +#+end_src + +*** cF_ov +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cF_ov(nO,nV,t1,Fov,v_oovv,cF_ov) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: Fov(nO,nV),v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cF_ov(nO,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + + !$OMP PARALLEL & + !$OMP SHARED(cF_ov,Fov,t1,v_oovv,nO,nV) & + !$OMP PRIVATE(i,a,m,n,e,f) & + !$OMP DEFAULT(NONE) + + !cF_ov = Fov + + !$OMP DO collapse(1) + do e=1,nV + do m=1,nO + cF_ov(m,e) = Fov(m,e) + do f=1,nV + do n=1,nO + cF_ov(m,e) = cF_ov(m,e) + t1(n,f)*v_oovv(m,n,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +*** cF_vv +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: tau_t(nO,nO,nV,nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: Fvv(nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: cF_vv(nV,nV) + + double precision, allocatable :: v_ovfv(:,:,:),X_ovfv(:,:,:) + integer :: i,j,m,n + integer :: a,b,e,f + + !$OMP PARALLEL & + !$OMP SHARED(cF_vv,Fvv,nO,nV) & + !$OMP PRIVATE(e,a) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do e=1,nV + do a=1,nV + cF_vv(a,e) = Fvv(a,e) + end do + end do + !$OMP END DO + !$OMP DO + do e = 1, nV + cF_vv(e,e) = 0d0 + enddo + !$OMP END DO + !$OMP END PARALLEL + + !do e=1,nV + ! do a=1,nV + ! do m=1,nO + ! cF_vv(a,e) = cF_vv(a,e) - 0.5d0*t1(m,a)*Fov(m,e) + ! end do + ! end do + !end do + call dgemm('T','N', nV, nV, nO, & + -0.5d0, t1 , size(t1,1), & + Fov , size(Fov,1), & + 1d0 , cF_vv, size(cF_vv,1)) + + !do e=1,nV + ! do a=1,nV + ! do m=1,nO + ! do f=1,nV + ! cF_vv(a,e) = cF_vv(a,e) + t1(m,f)*v_ovvv(m,a,f,e) + ! end do + ! end do + ! end do + !end do + allocate(v_ovfv(nO,nV,nV),X_ovfv(nO,nV,nV)) + do f = 1, nV + + call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovfv) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,v_ovfv,X_ovfv,f) & + !$OMP PRIVATE(m,a,e) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do e = 1, nV + do a = 1, nV + do m = 1, nO + !X_ovfv(m,a,e) = v_ovvv(m,a,f,e) + X_ovfv(m,a,e) = v_ovfv(m,a,e) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemv('T', nO, nV*nV, & + !1d0, v_ovvv(:,:,f,:), size(v_ovvv,1), & + 1d0, X_ovfv, size(X_ovfv,1), & + t1(1,f), 1, & + 1d0, cF_vv, 1) + enddo + deallocate(v_ovfv,X_ovfv) + + !do e=1,nV + ! do a=1,nV + ! do f=1,nV + ! do n=1,nO + ! do m=1,nO + ! cF_vv(a,e) = cF_vv(a,e) - 0.5d0*tau_t(m,n,a,f)*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + do f = 1, nV + call dgemm('T','N', nV, nV, nO*nO,& + -0.5d0, tau_t(1,1,1,f) , size(tau_t,1) * size(tau_t,2), & + v_oovv(1,1,1,f), size(v_oovv,1) * size(v_oovv,2), & + 1d0 , cF_vv, size(cF_vv,1)) + enddo + +end +#+end_src + +** cW +*** cW_oooo +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oooo(nO,nO,nO,nO) + double precision,intent(in) :: v_ooov(nO,nO,nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cW_oooo(nO,nO,nO,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: X_oooo(:,:,:,:) + + ! oooo block + + !cW_oooo = v_oooo + + !do j=1,nO + ! do i=1,nO + ! do n=1,nO + ! do m=1,nO + + ! do e=1,nV + ! cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + t1(j,e)*v_ooov(m,n,i,e) - t1(i,e)*v_ooov(m,n,j,e) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(X_oooo(nO,nO,nO,nO)) + + call dgemm('N','T', nO*nO*nO, nO, nV, & + 1d0, v_ooov, size(v_ooov,1) * size(v_ooov,2) * size(v_ooov,3), & + t1 , size(t1,1), & + 0d0, X_oooo, size(X_oooo,1) * size(X_oooo,1) * size(X_oooo,3)) + !$OMP PARALLEL & + !$OMP SHARED(cW_oooo,v_oooo,X_oooo,nO,nV) & + !$OMP PRIVATE(i,j,m,n) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do j=1,nO + do i=1,nO + do n=1,nO + do m=1,nO + cW_oooo(m,n,i,j) = v_oooo(m,n,i,j) + X_oooo(m,n,i,j) - X_oooo(m,n,j,i) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_oooo) + + !do m=1,nO + ! do n=1,nO + ! do i=1,nO + ! do j=1,nO + ! + ! do e=1,nV + ! do f=1,nV + ! cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + 0.25d0*tau(i,j,e,f)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + call dgemm('N','T', nO*nO, nO*nO, nV*nV, & + 0.25d0, v_oovv , size(v_oovv,1) * size(v_oovv,2), & + tau , size(tau,1) * size(tau,2), & + 1.d0 , cW_oooo, size(cW_oooo,1) * size(cW_oooo,2)) + +end +#+end_src + +*** cW_ovvo +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovo(nO,nO,nV,nO) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_ovvo(nO,nV,nV,nO) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: cW_ovvo(nO,nV,nV,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: A_oovo(:,:,:,:), B_vovo(:,:,:,:) + double precision, allocatable :: A_voov(:,:,:,:), B_voov(:,:,:,:), C_ovov(:,:,:,:) + double precision, allocatable :: v_ovev(:,:,:), cW_oveo(:,:,:) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,v_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do j = 1, nO + do b = 1, nV + do a = 1, nV + do i = 1, nO + cW_ovvo(i,a,b,j) = v_ovvo(i,a,b,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !do m=1,nO + ! do b=1,nV + ! do e=1,nV + ! do j=1,nO + ! do f=1,nV + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + t1(j,f)*v_ovvv(m,b,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + allocate(v_ovev(nO,nV,nV),cW_oveo(nO,nV,nO)) + do e = 1, nV + + call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, e, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovev) + + call dgemm('N','T', nO*nV, nO, nV, & + 1.d0, v_ovev , size(v_ovev,1) * size(v_ovev,2), & + t1 , size(t1,1), & + 0.d0, cW_oveo, size(cW_oveo,1) * size(cW_oveo,2)) + !$OMP PARALLEL & + !$OMP SHARED(e,cW_ovvo,cW_oveo,nO,nV) & + !$OMP PRIVATE(m,b,j) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do j = 1, nO + do b = 1, nV + do m = 1, nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + cW_oveo(m,b,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + enddo + deallocate(v_ovev,cW_oveo) + !call dgemm('N','T', nO*nV*nV, nO, nV, & + ! 1.d0, v_ovvv , size(v_ovvv,1) * size(v_ovvv,2) * size(v_ovvv,3), & + ! t1 , size(t1,1), & + ! 1.d0, cW_ovvo, size(cW_ovvo,1) * size(cW_ovvo,2) * size(cW_ovvo,3)) + + !do j=1,nO + ! do e=1,nV + ! do b=1,nV + ! do m=1,nO + ! do n=1,nO + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - t1(n,b)*v_oovo(m,n,e,j) + ! end do + ! end do + ! end do + ! end do + !end do + + allocate(A_oovo(nO,nO,nV,nO), B_vovo(nV,nO,nV,nO)) + + !$OMP PARALLEL & + !$OMP SHARED(A_oovo,v_oovo,nO,nV) & + !$OMP PRIVATE(j,e,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j=1,nO + do e=1,nV + do m=1,nO + do n=1,nO + A_oovo(n,m,e,j) = v_oovo(m,n,e,j) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nO*nV*nO, nO, & + 1d0, t1 , size(t1,1), & + A_oovo, size(A_oovo,1), & + 0d0, B_vovo, size(B_vovo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,B_vovo,nO,nV) & + !$OMP PRIVATE(j,e,m,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j=1,nO + do e=1,nV + do b=1,nV + do m=1,nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - B_vovo(b,m,e,j) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(A_oovo,B_vovo) + + !do j=1,nO + ! do e=1,nV + ! do b=1,nV + ! do m=1,nO + ! do f=1,nV + ! do n=1,nO + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) & + ! - ( 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b) )*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + ! end do + !end do + allocate(A_voov(nV,nO,nO,nV), B_voov(nV,nO,nO,nV), C_ovov(nO,nV,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,A_voov,B_voov,v_oovv,t2,t1) & + !$OMP PRIVATE(f,n,m,e,j,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do j = 1, nO + do n = 1, nO + do f = 1, nV + A_voov(f,n,j,b) = 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP DO collapse(3) + do e = 1, nV + do m = 1, nO + do n = 1, nO + do f = 1, nV + B_voov(f,n,m,e) = v_oovv(m,n,e,f) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO*nV, nV*nO, nV*nO, & + 1d0, A_voov, size(A_voov,1) * size(A_voov,2), & + B_voov, size(B_voov,1) * size(B_voov,2), & + 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2)) + + deallocate(A_voov,B_voov) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,C_ovov,nO,nV) & + !$OMP PRIVATE(j,e,m,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j = 1, nO + do e = 1, nV + do b = 1, nV + do m = 1, nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - C_ovov(j,b,m,e) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(C_ovov) + +end +#+end_src + +*** cW_vvvv +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_vovv(nV,nO,nV,nV) + double precision,intent(in) :: v_vvvv(nV,nV,nV,nV) + + double precision,intent(out) :: cW_vvvv(nV,nV,nV,nV) + + integer :: i,j,m,n + integer :: a,b,c,d,e,f + double precision, allocatable :: A_ovvv(:,:,:,:), B_vvvv(:,:,:,:) + + allocate(A_ovvv(nO,nV,nV,nV), B_vvvv(nV,nV,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvv,A_ovvv,v_vovv,v_vvvv) & + !$OMP PRIVATE(a,b,c,d,e,f,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do d = 1, nV + do c = 1, nV + do b = 1, nV + do a = 1, nV + cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !do f=1,nV + ! do e=1,nV + ! do b=1,nV + ! do a=1,nV + ! do m=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + !$OMP DO collapse(3) + do f=1,nV + do e=1,nV + do a=1,nV + do m=1,nO + A_ovvv(m,a,e,f) = v_vovv(a,m,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nV*nV*nV, nO, & + 1d0, t1 , size(t1,1), & + A_ovvv, size(A_ovvv,1), & + 0d0, B_vvvv, size(B_vvvv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvv,B_vvvv) & + !$OMP PRIVATE(a,b,c,d,e,f,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do f=1,nV + do e=1,nV + do b=1,nV + do a=1,nV + cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_ovvv,B_vvvv) + + !do a=1,nV + ! do b=1,nV + ! do e=1,nV + ! do f=1,nV + ! + ! do m=1,nO + ! do n=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + call dgemm('T','N', nV*nV, nV*nV, nO*nO, & + 0.25d0, tau , size(tau,1) * size(tau,2), & + v_oovv , size(v_oovv,1) * size(v_oovv,2), & + 1.d0 , cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2)) + +end +#+end_src + +*** cW_vvvf +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) + + implicit none + + integer,intent(in) :: nO,nV,f + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_vovf(nV,nO,nV) + double precision,intent(in) :: v_vvvf(nV,nV,nV) + + double precision,intent(out) :: cW_vvvf(nV,nV,nV) + + integer :: i,j,m,n + integer :: a,b,c,d,e + double precision, allocatable :: A_ovvf(:,:,:), B_vvvf(:,:,:), v_oovf(:,:,:) + double precision :: ti,tf + + allocate(A_ovvf(nO,nV,nV), B_vvvf(nV,nV,nV)) + allocate(v_oovf(nO,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvf,A_ovvf,v_vovf,v_vvvf,f) & + !$OMP PRIVATE(a,b,c,d,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + !cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d) + cW_vvvf(a,b,c) = v_vvvf(a,b,c) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !do f=1,nV + ! do e=1,nV + ! do b=1,nV + ! do a=1,nV + ! do m=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(2) + do e=1,nV + do a=1,nV + do m=1,nO + !A_ovvv(m,a,e,f) = v_vovv(a,m,e,f) + !A_ovvf(m,a,e) = v_vovv(a,m,e,f) + A_ovvf(m,a,e) = v_vovf(a,m,e) + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nV*nV, nO, & + 1d0, t1 , size(t1,1), & + A_ovvf, size(A_ovvf,1), & + 0d0, B_vvvf, size(B_vvvf,1)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvf,B_vvvf,v_oovf,v_oovv,f) & + !$OMP PRIVATE(a,b,c,d,e,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do e=1,nV + do b=1,nV + do a=1,nV + !cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f) + cW_vvvf(a,b,e) = cW_vvvf(a,b,e) - B_vvvf(b,a,e) + B_vvvf(a,b,e) + end do + end do + end do + !$OMP END DO NOWAIT + + !deallocate(A_ovvf,B_vvvf) + + !do a=1,nV + ! do b=1,nV + ! do e=1,nV + ! do f=1,nV + ! + ! do m=1,nO + ! do n=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(2) + do e = 1, nV + do n = 1, nO + do m = 1, nO + v_oovf(m,n,e) = v_oovv(m,n,e,f) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV*nV, nV, nO*nO, & + 0.25d0, tau , size(tau,1) * size(tau,2), & + v_oovf , size(v_oovf,1) * size(v_oovf,2), & + 1.d0 , cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2)) + + deallocate(v_oovf) + deallocate(A_ovvf,B_vvvf) + +end +#+end_src + diff --git a/src/ccsd/org/ccsd_t_space_orb.org b/src/ccsd/org/ccsd_t_space_orb.org new file mode 100644 index 00000000..8709d7be --- /dev/null +++ b/src/ccsd/org/ccsd_t_space_orb.org @@ -0,0 +1,428 @@ +Ref: +Integral-Direct and Parallel Implementation of the CCSD(T) Method: +Algorithmic Developments and Large-Scale Applications +László Gyevi-Nagy, Mihály Kállay, and Péter R. Nagy +J. Chem. Theory Comput. 2020, 16, 1, 366–384 +https://doi.org/10.1021/acs.jctc.9b00957 + +* Dumb way +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine ccsd_par_t_space(nO,nV,t1,t2,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + integer :: i,j,k,a,b,c + + allocate(W(nO,nO,nO,nV,nV,nV)) + allocate(V(nO,nO,nO,nV,nV,nV)) + + call form_w(nO,nV,t2,W) + call form_v(nO,nV,t1,W,V) + + energy = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + + energy = energy / 3d0 + + deallocate(V,W) +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine form_w(nO,nV,t2,W) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: W(nO, nO, nO, nV, nV, nV) + + integer :: i,j,k,l,a,b,c,d + + W = 0d0 + do c = 1, nV + print*,'W:',c,'/',nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + + do d = 1, nV + W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + ! chem (bd|ai) + ! phys + + cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & + + cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj + + cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik + + cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij + + cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj + + cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik + enddo + + do l = 1, nO + W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + ! chem (ck|jl) + ! phys + - cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & + - cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj + - cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik + - cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij + - cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj + - cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik + enddo + + enddo + enddo + enddo + enddo + enddo + enddo + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine form_v(nO,nV,t1,w,v) + +implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: W(nO, nO, nO, nV, nV, nV) + double precision, intent(out) :: V(nO, nO, nO, nV, nV, nV) + + integer :: i,j,k,a,b,c + + V = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + + cc_space_v_vvoo(b,c,j,k) * t1(i,a) & + + cc_space_v_vvoo(a,c,i,k) * t1(j,b) & + + cc_space_v_vvoo(a,b,i,j) * t1(k,c) + enddo + enddo + enddo + enddo + enddo + enddo + +end +#+END_SRC + +* Better way +** Main +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + double precision, allocatable :: W_ijk(:,:,:), V_ijk(:,:,:) + double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:) + double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) + integer :: i,j,k,l,a,b,c,d + double precision :: e,ta,tb, delta, delta_ijk + + !allocate(W(nV,nV,nV,nO,nO,nO)) + !allocate(V(nV,nV,nV,nO,nO,nO)) + allocate(W_ijk(nV,nV,nV), V_ijk(nV,nV,nV)) + allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO)) + allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO)) + + ! Temporary arrays + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, & + !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & + !$OMP PRIVATE(a,b,c,d,i,j,k,l) & + !$OMP DEFAULT(NONE) + + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) + + !$OMP DO collapse(3) + do i = 1, nO + do a = 1, nV + do b = 1, nV + do d = 1, nV + X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do d = 1, nV + T_vvoo(d,c,k,j) = t2(k,j,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vooo(c,j,k,l) * t2(i,l,a,b) & + !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + + !$OMP DO collapse(3) + do k = 1, nO + do j = 1, nO + do c = 1, nV + do l = 1, nO + X_ovoo(l,c,j,k) = v_vooo(c,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do l = 1, nO + T_ovvo(l,a,b,i) = t2(i,l,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vvoo(b,c,j,k) * t1(i,a) & + !X_vvoo(b,c,k,j) * T1_vo(a,i) & + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do b = 1, nV + X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(1) + do i = 1, nO + do a = 1, nV + T_vo(a,i) = t1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(ta) + energy = 0d0 + do i = 1, nO + do j = 1, nO + do k = 1, nO + delta_ijk = f_o(i) + f_o(j) + f_o(k) + call form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_ijk) + call form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,W_ijk,V_ijk) + !$OMP PARALLEL & + !$OMP SHARED(energy,nV,i,j,k,W_ijk,V_ijk,f_o,f_v,delta_ijk) & + !$OMP PRIVATE(a,b,c,e,delta) & + !$OMP DEFAULT(NONE) + e = 0d0 + !$OMP DO + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta = 1d0 / (delta_ijk - f_v(a) - f_v(b) - f_v(c)) + !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + e = e + (4d0 * W_ijk(a,b,c) + W_ijk(b,c,a) + W_ijk(c,a,b)) & + * (V_ijk(a,b,c) - V_ijk(c,b,a)) * delta + enddo + enddo + enddo + !$OMP END DO + !$OMP CRITICAL + energy = energy + e + !$OMP END CRITICAL + !$OMP END PARALLEL + enddo + enddo + call wall_time(tb) + write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' + enddo + + energy = energy / 3d0 + + deallocate(W_ijk,V_ijk,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) + !deallocate(V,W) +end +#+END_SRC + +** W_ijk +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) + + implicit none + + integer, intent(in) :: nO,nV,i,j,k + !double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) + double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: W(nV,nV,nV)!,nO,nO,nO) + + integer :: l,a,b,c,d + + !W = 0d0 + !do i = 1, nO + ! do j = 1, nO + ! do k = 1, nO + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) & + !$OMP PRIVATE(a,b,c,d,l) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + W(a,b,c) = 0d0 + + do d = 1, nV + !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + W(a,b,c) = W(a,b,c) & + ! chem (bd|ai) + ! phys + !+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj + !+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik + !+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij + !+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj + !+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik + + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & + + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & ! bc kj + + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & ! prev ac ik + + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & ! prev ab ij + + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & ! prev bc kj + + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) ! prev ac ik + enddo + + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + + do l = 1, nO + !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + W(a,b,c) = W(a,b,c) & + ! chem (ck|jl) + ! phys + !- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & + !- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj + !- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik + !- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij + !- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj + !- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik + - X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + - X_ovoo(l,b,k,j) * T_ovvo(l,a,c,i) & ! bc kj + - X_ovoo(l,b,i,j) * T_ovvo(l,c,a,k) & ! prev ac ik + - X_ovoo(l,a,j,i) * T_ovvo(l,c,b,k) & ! prev ab ij + - X_ovoo(l,a,k,i) * T_ovvo(l,b,c,j) & ! prev bc kj + - X_ovoo(l,c,i,k) * T_ovvo(l,b,a,j) ! prev ac ik + enddo + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! enddo + ! enddo + !enddo + +end +#+END_SRC + +** V_ijk +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,w,v) + +implicit none + + integer, intent(in) :: nO,nV,i,j,k + !double precision, intent(in) :: t1(nO,nV) + double precision, intent(in) :: T_vo(nV,nO) + double precision, intent(in) :: X_vvoo(nV,nV,nO,nO) + double precision, intent(in) :: W(nV,nV,nV)!,nO,nO,nO) + double precision, intent(out) :: V(nV,nV,nV)!,nO,nO,nO) + + integer :: a,b,c + + !V = 0d0 + !do i = 1, nO + ! do j = 1, nO + ! do k = 1, nO + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,i,j,k,T_vo,X_vvoo,W,V) & + !$OMP PRIVATE(a,b,c) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + V(a,b,c) = W(a,b,c) & + !+ cc_space_v_vvoo(b,c,j,k) * t1(i,a) & + !+ cc_space_v_vvoo(a,c,i,k) * t1(j,b) & + !+ cc_space_v_vvoo(a,b,i,j) * t1(k,c) + + X_vvoo(b,c,k,j) * T_vo(a,i) & + + X_vvoo(a,c,k,i) * T_vo(b,j) & + + X_vvoo(a,b,j,i) * T_vo(c,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! enddo + ! enddo + !enddo + +end +#+END_SRC diff --git a/src/ccsd/org/ccsd_t_spin_orb.org b/src/ccsd/org/ccsd_t_spin_orb.org new file mode 100644 index 00000000..c9a41abd --- /dev/null +++ b/src/ccsd/org/ccsd_t_spin_orb.org @@ -0,0 +1,385 @@ +* CCSD(T) spin orb + +Ref: +John D. Watts, Jürgen Gauss, and Rodney J. Bartlett +J. Chem. Phys. 98, 8718 (1993) +http://dx.doi.org/10.1063/1.464480 + +** v1 +#+begin_src f90 :comments org :tangle ccsd_t_spin_orb.irp.f +subroutine ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_vvvo(nV,nV,nV,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: t3(:,:,:,:,:,:), s(:,:) + double precision :: e_t, e_st, e_dt, delta_abc, delta + integer :: i,j,k,l,m,a,b,c,d,e + + allocate(t3(nO,nO,nO,nV,nV,nV), s(nO,nV)) + + t3 = 0d0 + + ! T3 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + do e = 1, nV + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) & + + t2(j,k,a,e) * v_vvvo(b,c,e,i) & + - t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij) + - t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik) + - t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab) + - t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac) + + t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab) + + t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac) + + t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab) + + t2(j,i,c,e) * v_vvvo(b,a,e,k) ! + P(ik) P(ac) + enddo + do m = 1, nO + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) & + + t2(m,i,b,c) * v_ooov(j,k,m,a) & + - t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij) + - t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik) + - t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab) + - t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac) + + t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab) + + t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac) + + t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab) + + t2(m,k,b,a) * v_ooov(j,i,m,c) ! + P(ik) P(ac) + enddo + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) * (1d0 / delta) + enddo + enddo + enddo + enddo + enddo + enddo + + + ! E_T + e_t = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + e_t = e_t + t3(i,j,k,a,b,c) * delta * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + e_t = e_t / 36d0 + + ! E_ST + s = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + + e_st = 0d0 + do a = 1, nV + do i = 1, nO + e_st = e_st + s(i,a) * t1(i,a) + enddo + enddo + e_st = e_st * 0.25d0 + + ! E_DT + e_dt = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + e_dt = e_dt + t2(i,j,a,b) * f_ov(k,c) * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + e_dt = e_dt * 0.25d0 + + ! (T) + !print*,e_t,e_st,e_dt + energy = e_t + e_st + e_dt + + deallocate(t3,s) + +end +#+end_src + +** v2 +#+begin_src f90 :comments org :tangle ccsd_t_spin_orb.irp.f +subroutine ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: t3_bc(:,:,:,:), s(:,:), e_t(:), e_dt(:) + double precision, allocatable :: A_vovv(:,:,:,:), v_vvvo(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), B_ooov(:,:,:,:) + double precision :: e_st, delta_abc, delta, ta, tb + integer :: i,j,k,l,m,a,b,c,d,e + + allocate(t3_bc(nO,nO,nO,nV), s(nO,nV), e_t(nV), e_dt(nV)) + allocate(A_vovv(nV,nO,nV,nV),v_vvvo(nV,nV,nV,nO),T_voov(nV,nO,nO,nV),B_ooov(nO,nO,nO,nV)) + + call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & + cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + nV,nV,nV,nO, v_vvvo) + + ! Init + s = 0d0 + e_t = 0d0 + e_st = 0d0 + e_dt = 0d0 + + call wall_time(ta) + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,m,a,b,c,e) & + !$OMP SHARED(A_vovv,ta,tb,t3_bc,s,e_t,e_st,e_dt,t2,v_vvvo,v_ooov, & + !$OMP v_vvoo,f_o,f_v,f_ov,delta,delta_abc,nO,nV,T_voov,B_ooov) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do c = 1, nV + do b = 1, nV + do i = 1, nO + do e = 1, nV + A_vovv(e,i,b,c) = v_vvvo(b,c,e,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$omp do collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do e = 1, nV + T_voov(e,j,k,a) = t2(j,k,a,e) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do m = 1, nO + B_ooov(m,j,k,a) = v_ooov(j,k,m,a) + enddo + enddo + enddo + enddo + !$omp end do + + do c = 1, nV + do b = 1, nV + + ! T3(:,:,:,:,b,c) + ! Init + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + t3_bc(i,j,k,a) = 0d0 + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + do e = 1, nV + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) & + !+ t2(j,k,a,e) * v_vvvo(b,c,e,i) & + !- t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij) + !- t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik) + !- t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab) + !- t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac) + !+ t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab) + !+ t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac) + !+ t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab) + !+ t2(j,i,c,e) * v_vvvo(b,a,e,k) ! + P(ik) P(ac) + + T_voov(e,j,k,a) * A_vovv(e,i,b,c) & + - T_voov(e,i,k,a) * A_vovv(e,j,b,c) & ! - P(ij) + - T_voov(e,j,i,a) * A_vovv(e,k,b,c) & ! - P(ik) + - T_voov(e,j,k,b) * A_vovv(e,i,a,c) & ! - P(ab) + - T_voov(e,j,k,c) * A_vovv(e,i,b,a) & ! - P(ac) + + T_voov(e,i,k,b) * A_vovv(e,j,a,c) & ! + P(ij) P(ab) + + T_voov(e,i,k,c) * A_vovv(e,j,b,a) & ! + P(ij) P(ac) + + T_voov(e,j,i,b) * A_vovv(e,k,a,c) & ! + P(ik) P(ab) + + T_voov(e,j,i,c) * A_vovv(e,k,b,a) ! + P(ik) P(ac) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + do m = 1, nO + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) & + !+ t2(m,i,b,c) * v_ooov(j,k,m,a) & + !- t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij) + !- t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik) + !- t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab) + !- t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac) + !+ t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab) + !+ t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac) + !+ t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab) + !+ t2(m,k,b,a) * v_ooov(j,i,m,c) ! + P(ik) P(ac) + + t2(m,i,b,c) * B_ooov(m,j,k,a) & + - t2(m,j,b,c) * B_ooov(m,i,k,a) & ! - P(ij) + - t2(m,k,b,c) * B_ooov(m,j,i,a) & ! - P(ik) + - t2(m,i,a,c) * B_ooov(m,j,k,b) & ! - P(ab) + - t2(m,i,b,a) * B_ooov(m,j,k,c) & ! - P(ac) + + t2(m,j,a,c) * B_ooov(m,i,k,b) & ! + P(ij) P(ab) + + t2(m,j,b,a) * B_ooov(m,i,k,c) & ! + P(ij) P(ac) + + t2(m,k,a,c) * B_ooov(m,j,i,b) & ! + P(ik) P(ab) + + t2(m,k,b,a) * B_ooov(m,j,i,c) ! + P(ik) P(ac) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) * (1d0 / delta) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! E_T + !$OMP DO + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + e_t(a) = e_t(a) + t3_bc(i,j,k,a) * delta * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + ! E_ST + !$OMP DO + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + ! E_DT + !$OMP DO + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + e_dt(a) = e_dt(a) + t2(i,j,a,b) * f_ov(k,c) * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO + enddo + !$OMP MASTER + call wall_time(tb) + write(*,'(A1,F6.2,A5,F10.2,A2)') ' ', dble(c)/dble(nV)*100d0, '% in ', tb-ta, ' s' + !$OMP END MASTER + enddo + !$OMP END PARALLEL + + do a = 2, nV + e_t(1) = e_t(1) + e_t(a) + enddo + + do a = 2, nV + e_dt(1) = e_dt(1) + e_dt(a) + enddo + + e_t = e_t / 36d0 + + do a = 1, nV + do i = 1, nO + e_st = e_st + s(i,a) * t1(i,a) + enddo + enddo + e_st = e_st * 0.25d0 + + e_dt = e_dt * 0.25d0 + + ! (T) + !print*,e_t(1),e_st,e_dt(1) + energy = e_t(1) + e_st + e_dt(1) + + deallocate(t3_bc,s) + +end +#+end_src From 859f8653de5b64fce3bbf367ea3d96f54facf2ea Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Tue, 14 Mar 2023 21:18:19 +0100 Subject: [PATCH 14/87] tc_scf added var_tc option --- src/tc_scf/tc_scf.irp.f | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 187750ff..85389f30 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -8,7 +8,7 @@ program tc_scf implicit none - print *, 'starting ...' + print *, ' starting ...' my_grid_becke = .True. my_n_pt_r_grid = 30 @@ -27,17 +27,37 @@ program tc_scf !call orthonormalize_mos() PROVIDE tcscf_algorithm - if(tcscf_algorithm == 'DIIS') then - call rh_tcscf_diis() - elseif(tcscf_algorithm == 'Simple') then - call rh_tcscf_simple() + PROVIDE var_tc + + if(var_tc) then + + print *, ' VAR-TC' + + if(tcscf_algorithm == 'DIIS') then + print*, ' NOT implemented yet' + elseif(tcscf_algorithm == 'Simple') then + call rh_vartcscf_simple() + else + print *, ' not implemented yet', tcscf_algorithm + stop + endif + else - print *, ' not implemented yet', tcscf_algorithm - stop + + if(tcscf_algorithm == 'DIIS') then + call rh_tcscf_diis() + elseif(tcscf_algorithm == 'Simple') then + call rh_tcscf_simple() + else + print *, ' not implemented yet', tcscf_algorithm + stop + endif + + call minimize_tc_orb_angles() + call print_energy_and_mos() + endif - call minimize_tc_orb_angles() - call print_energy_and_mos() end From a284f6f9d8edc268c6656c235198988fe00e997f Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 14 Mar 2023 23:49:38 +0100 Subject: [PATCH 15/87] removed STUPID DAMN BUG in ordering of psi_selectors for TC --- external/qp2-dependencies | 2 +- src/cipsi_tc_bi_ortho/selection.irp.f | 14 ++++++++++++-- src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f | 2 ++ src/fci_tc_bi/selectors.irp.f | 16 ++++++++-------- src/non_h_ints_mu/total_tc_int.irp.f | 2 ++ src/tc_bi_ortho/psi_det_tc_sorted.irp.f | 2 +- src/tc_bi_ortho/slater_tc_3e.irp.f | 1 + src/tc_bi_ortho/slater_tc_opt_double.irp.f | 1 + src/tc_bi_ortho/u0_h_u0.irp.f | 6 +++--- 9 files changed, 31 insertions(+), 15 deletions(-) diff --git a/external/qp2-dependencies b/external/qp2-dependencies index f40bde09..ce14f57b 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8 +Subproject commit ce14f57b50511825a9fedb096749200779d3f4d4 diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f index 13e6c510..633ca815 100644 --- a/src/cipsi_tc_bi_ortho/selection.irp.f +++ b/src/cipsi_tc_bi_ortho/selection.irp.f @@ -916,8 +916,18 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d psi_h_alpha = mat_l(istate, p1, p2) alpha_h_psi = mat_r(istate, p1, p2) endif - coef(istate) = alpha_h_psi / delta_E - e_pert(istate) = coef(istate) * psi_h_alpha + val = 4.d0 * psi_h_alpha * alpha_h_psi + tmp = dsqrt(delta_E * delta_E + val) + if (delta_E < 0.d0) then + tmp = -tmp + endif + e_pert(istate) = 0.5d0 * (tmp - delta_E) + if(dsqrt(dabs(tmp)).gt.1.d-4.and.dabs(alpha_h_psi).gt.1.d-4)then + coef(istate) = e_pert(istate) / alpha_h_psi + else + coef(istate) = alpha_h_psi / delta_E + endif + ! if(selection_tc == 1 )then ! if(e_pert(istate).lt.0.d0)then ! e_pert(istate) = 0.d0 diff --git a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index c1e4af0c..e67287d3 100644 --- a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -78,6 +78,8 @@ subroutine run_stochastic_cipsi (N_det < N_det_max) .and. & (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) & ) + print*,'maxval(abs(pt2_data % pt2(1:N_states)))',maxval(abs(pt2_data % pt2(1:N_states))) + print*,pt2_max write(*,'(A)') '--------------------------------------------------------------------------------' diff --git a/src/fci_tc_bi/selectors.irp.f b/src/fci_tc_bi/selectors.irp.f index af1176d2..3830927b 100644 --- a/src/fci_tc_bi/selectors.irp.f +++ b/src/fci_tc_bi/selectors.irp.f @@ -18,14 +18,14 @@ BEGIN_PROVIDER [ integer, N_det_selectors] double precision :: norm, norm_max call write_time(6) N_det_selectors = N_det - norm = 1.d0 - do i=1,N_det - norm = norm - psi_average_norm_contrib_tc(i) - if (norm - 1.d-10 < 1.d0 - threshold_selectors) then - N_det_selectors = i - exit - endif - enddo +! norm = 1.d0 +! do i=1,N_det +! norm = norm - psi_average_norm_contrib_tc(i) +! if (norm - 1.d-10 < 1.d0 - threshold_selectors) then +! N_det_selectors = i +! exit +! endif +! enddo N_det_selectors = max(N_det_selectors,N_det_generators) call write_int(6,N_det_selectors,'Number of selectors') END_PROVIDER diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index c1e010c7..2fd2719c 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -56,6 +56,7 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao do i = 1, ao_num do k = 1, ao_num ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) +! ao_tc_int_chemist(k,i,l,j) = ao_two_e_coul(k,i,l,j) enddo enddo enddo @@ -83,6 +84,7 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_nu do i = 1, ao_num do k = 1, ao_num ao_tc_int_chemist_test(k,i,l,j) = tc_grad_square_ao_test(k,i,l,j) + tc_grad_and_lapl_ao_test(k,i,l,j) + ao_two_e_coul(k,i,l,j) +! ao_tc_int_chemist_test(k,i,l,j) = ao_two_e_coul(k,i,l,j) enddo enddo enddo diff --git a/src/tc_bi_ortho/psi_det_tc_sorted.irp.f b/src/tc_bi_ortho/psi_det_tc_sorted.irp.f index 212c8588..42617557 100644 --- a/src/tc_bi_ortho/psi_det_tc_sorted.irp.f +++ b/src/tc_bi_ortho/psi_det_tc_sorted.irp.f @@ -39,7 +39,7 @@ END_PROVIDER psi_average_norm_contrib_sorted_tc(i) = -psi_average_norm_contrib_tc(i) iorder(i) = i enddo - call dsort(psi_average_norm_contrib_sorted_tc,iorder,N_det) +! call dsort(psi_average_norm_contrib_sorted_tc,iorder,N_det) do i=1,N_det do j=1,N_int psi_det_sorted_tc(j,1,i) = psi_det(j,1,iorder(i)) diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e.irp.f index 9740ee2f..7b73d5f2 100644 --- a/src/tc_bi_ortho/slater_tc_3e.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e.irp.f @@ -232,6 +232,7 @@ subroutine double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) other_spin(1) = 2 other_spin(2) = 1 + call get_excitation_degree(key_i, key_j, degree, Nint) hthree = 0.d0 diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f index d094d76e..baca498c 100644 --- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f @@ -94,6 +94,7 @@ subroutine three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) integer :: ipart, ihole double precision :: direct_int, exchange_int + nexc(1) = 0 nexc(2) = 0 !! Get all the holes and particles of key_i with respect to the ROHF determinant diff --git a/src/tc_bi_ortho/u0_h_u0.irp.f b/src/tc_bi_ortho/u0_h_u0.irp.f index afbe15a7..e107ad88 100644 --- a/src/tc_bi_ortho/u0_h_u0.irp.f +++ b/src/tc_bi_ortho/u0_h_u0.irp.f @@ -93,9 +93,9 @@ subroutine H_tc_u_0_nstates_openmp(v_0,u_0,N_st,sze, do_right) double precision, allocatable :: u_t(:,:), v_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t allocate(u_t(N_st,N_det),v_t(N_st,N_det)) - provide mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e - provide ref_tc_energy_tot fock_op_2_e_tc_closed_shell - provide eff_2_e_from_3_e_ab eff_2_e_from_3_e_aa eff_2_e_from_3_e_bb +! provide mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e +! provide ref_tc_energy_tot fock_op_2_e_tc_closed_shell +! provide eff_2_e_from_3_e_ab eff_2_e_from_3_e_aa eff_2_e_from_3_e_bb do k=1,N_st call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) enddo From 4e35f9dbf622eb9d2ccfb04653f53c90874533ff Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 15 Mar 2023 11:55:03 +0100 Subject: [PATCH 16/87] does not work --- src/cipsi/selection.irp.f | 45 +-- src/cipsi_tc_bi_ortho/cipsi.irp.f | 2 +- src/cipsi_tc_bi_ortho/pt2.irp.f | 2 +- src/cipsi_tc_bi_ortho/selection.irp.f | 365 +++++++++---------- src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f | 2 +- src/fci_tc_bi/diagonalize_ci.irp.f | 5 +- src/fci_tc_bi/generators.irp.f | 7 +- src/fci_tc_bi/selectors.irp.f | 9 +- src/tc_bi_ortho/psi_det_tc_sorted.irp.f | 29 +- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 3 + 10 files changed, 203 insertions(+), 266 deletions(-) diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 62d7c52c..6f40a809 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -312,9 +312,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d end do deallocate(indices) -! !$OMP CRITICAL -! print *, 'Step1: ', i_generator, preinteresting(0) -! !$OMP END CRITICAL allocate(banned(mo_num, mo_num,2), bannedOrb(mo_num, 2)) allocate (mat(N_states, mo_num, mo_num)) @@ -466,17 +463,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d fullinteresting(sze+1) = i end if end do - allocate (fullminilist (N_int, 2, fullinteresting(0)), & minilist (N_int, 2, interesting(0)) ) -! if(pert_2rdm)then -! allocate(coef_fullminilist_rev(N_states,fullinteresting(0))) -! do i=1,fullinteresting(0) -! do j = 1, N_states -! coef_fullminilist_rev(j,i) = psi_coef_sorted(fullinteresting(i),j) -! enddo -! enddo -! endif do i=1,fullinteresting(0) fullminilist(:,:,i) = psi_det_sorted(:,:,fullinteresting(i)) @@ -524,33 +512,19 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle -! !$OMP CRITICAL -! print *, 'Step3: ', i_generator, h1, interesting(0) -! !$OMP END CRITICAL call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) - -! if(.not.pert_2rdm)then - call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) -! else -! call fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf,fullminilist, coef_fullminilist_rev, fullinteresting(0)) -! endif + call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) end if enddo if(s1 /= s2) monoBdo = .false. enddo deallocate(fullminilist,minilist) -! if(pert_2rdm)then -! deallocate(coef_fullminilist_rev) -! endif enddo enddo deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) deallocate(banned, bannedOrb,mat) end subroutine - - - subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) use bitmasks use selection_types @@ -606,18 +580,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d ! to a determinant of the future. In that case, the determinant will be ! detected as already generated when generating in the future with a ! double excitation. -! -! if (.not.do_singles) then -! if ((h1 == p1) .or. (h2 == p2)) then -! cycle -! endif -! endif -! -! if (.not.do_doubles) then -! if ((h1 /= p1).and.(h2 /= p2)) then -! cycle -! endif -! endif ! ----- if(bannedOrb(p2, s2)) cycle @@ -974,13 +936,10 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere call get_mask_phase(psi_det_sorted(1,1,interesting(i)), phasemask,N_int) if(nt == 4) then -! call get_d2_reference(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) call get_d2(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) else if(nt == 3) then -! call get_d1_reference(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) else -! call get_d0_reference(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) end if else if(nt == 4) then @@ -1540,8 +1499,6 @@ subroutine past_d2(banned, p, sp) end if end - - subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) use bitmasks implicit none diff --git a/src/cipsi_tc_bi_ortho/cipsi.irp.f b/src/cipsi_tc_bi_ortho/cipsi.irp.f index b1941068..fb907cb3 100644 --- a/src/cipsi_tc_bi_ortho/cipsi.irp.f +++ b/src/cipsi_tc_bi_ortho/cipsi.irp.f @@ -64,7 +64,7 @@ subroutine run_cipsi endif if (N_det > N_det_max) then - psi_det(1:N_int,1:2,1:N_det) = psi_det_sorted_tc_gen(1:N_int,1:2,1:N_det) + psi_det(1:N_int,1:2,1:N_det) = psi_det_generators(1:N_int,1:2,1:N_det) psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states) N_det = N_det_max soft_touch N_det psi_det psi_coef diff --git a/src/cipsi_tc_bi_ortho/pt2.irp.f b/src/cipsi_tc_bi_ortho/pt2.irp.f index e7dca456..13b4dff4 100644 --- a/src/cipsi_tc_bi_ortho/pt2.irp.f +++ b/src/cipsi_tc_bi_ortho/pt2.irp.f @@ -52,7 +52,7 @@ subroutine pt2_tc_bi_ortho ! call routine_save_right if (N_det > N_det_max) then - psi_det(1:N_int,1:2,1:N_det) = psi_det_sorted_tc_gen(1:N_int,1:2,1:N_det) + psi_det(1:N_int,1:2,1:N_det) = psi_det_generators(1:N_int,1:2,1:N_det) psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states) N_det = N_det_max soft_touch N_det psi_det psi_coef diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f index 633ca815..393023f2 100644 --- a/src/cipsi_tc_bi_ortho/selection.irp.f +++ b/src/cipsi_tc_bi_ortho/selection.irp.f @@ -91,7 +91,6 @@ subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset) end subroutine select_connected - double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2, Nint) use bitmasks implicit none @@ -136,7 +135,7 @@ double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2, Nint) end -subroutine select_singles_and_doubles(i_generator, hole_mask,particle_mask, fock_diag_tmp, E0, pt2_data, buf, subset, csubset) +subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,buf,subset,csubset) use bitmasks use selection_types implicit none @@ -266,7 +265,6 @@ subroutine select_singles_and_doubles(i_generator, hole_mask,particle_mask, fock enddo do k = 1, nmax - i = indices(k) mobMask(1,1) = iand(negMask(1,1), psi_det_sorted_tc(1,1,i)) mobMask(1,2) = iand(negMask(1,2), psi_det_sorted_tc(1,2,i)) @@ -304,10 +302,10 @@ subroutine select_singles_and_doubles(i_generator, hole_mask,particle_mask, fock prefullinteresting(sze+1) = i endif endif - enddo deallocate(indices) + allocate( banned(mo_num, mo_num,2), bannedOrb(mo_num, 2) ) allocate( mat(N_states, mo_num, mo_num) ) allocate( mat_l(N_states, mo_num, mo_num), mat_r(N_states, mo_num, mo_num) ) @@ -463,17 +461,11 @@ subroutine select_singles_and_doubles(i_generator, hole_mask,particle_mask, fock minilist (N_int, 2, interesting(0)) ) do i = 1, fullinteresting(0) - do k = 1, N_int - fullminilist(k,1,i) = psi_det_sorted_tc(k,1,fullinteresting(i)) - fullminilist(k,2,i) = psi_det_sorted_tc(k,2,fullinteresting(i)) - enddo + fullminilist(:,:,i) = psi_det_sorted_tc(:,:,fullinteresting(i)) enddo do i = 1, interesting(0) - do k = 1, N_int - minilist(k,1,i) = psi_det_sorted_tc(k,1,interesting(i)) - minilist(k,2,i) = psi_det_sorted_tc(k,2,interesting(i)) - enddo + minilist(:,:,i) = psi_det_sorted_tc(:,:,interesting(i)) enddo do s2 = s1, 2 @@ -516,196 +508,19 @@ subroutine select_singles_and_doubles(i_generator, hole_mask,particle_mask, fock if(fullMatch) cycle call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting, mat_l, mat_r) - call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf, mat_l, mat_r) endif - enddo - if(s1 /= s2) monoBdo = .false. enddo - deallocate(fullminilist, minilist) - enddo enddo - deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) deallocate(banned, bannedOrb,mat) deallocate(mat_l, mat_r) - end subroutine select_singles_and_doubles - -! --- - -subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) - - use bitmasks - implicit none - - BEGIN_DOC - ! Identify the determinants in det which are in the internal space. These are - ! the determinants that can be produced by creating two particles on the mask. - END_DOC - - integer, intent(in) :: i_gen, N - integer, intent(in) :: interesting(0:N) - integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) - logical, intent(inout) :: banned(mo_num, mo_num) - logical, intent(out) :: fullMatch - - integer :: i, j, na, nb, list(3) - integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2) - - fullMatch = .false. - - do i=1,N_int - negMask(i,1) = not(mask(i,1)) - negMask(i,2) = not(mask(i,2)) - end do - - genl : do i=1, N - ! If det(i) can't be generated by the mask, cycle - do j=1, N_int - if(iand(det(j,1,i), mask(j,1)) /= mask(j, 1)) cycle genl - if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl - end do - - ! If det(i) < det(i_gen), it hs already been considered - if(interesting(i) < i_gen) then - fullMatch = .true. - return - end if - - ! Identify the particles - do j=1, N_int - myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1)) - myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2)) - end do - - call bitstring_to_list_in_selection(myMask(1,1), list(1), na, N_int) - call bitstring_to_list_in_selection(myMask(1,2), list(na+1), nb, N_int) - banned(list(1), list(2)) = .true. - end do genl - -end subroutine spot_isinwf - -! --- - -subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting, mat_l, mat_r) - - BEGIN_DOC - ! Computes the contributions A(r,s) by - ! comparing the external determinant to all the internal determinants det(i). - ! an applying two particles (r,s) to the mask. - END_DOC - - use bitmasks - implicit none - - integer, intent(in) :: sp, i_gen, N_sel - integer, intent(in) :: interesting(0:N_sel) - integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N_sel) - logical, intent(inout) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num, 2) - double precision, intent(inout) :: mat(N_states, mo_num, mo_num) - double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num), mat_r(N_states, mo_num, mo_num) - - integer :: i, ii, j, k, l, h(0:2,2), p(0:4,2), nt - integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) - integer(bit_kind) :: phasemask(N_int,2) - - - PROVIDE psi_selectors_coef_transp_tc psi_det_sorted_tc - PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp - - - mat = 0d0 - mat_l = 0d0 - mat_r = 0d0 - - do i = 1, N_int - negMask(i,1) = not(mask(i,1)) - negMask(i,2) = not(mask(i,2)) - end do - - do i = 1, N_sel - if(interesting(i) < 0) then - stop 'prefetch interesting(i) and det(i)' - endif - - mobMask(1,1) = iand(negMask(1,1), det(1,1,i)) - mobMask(1,2) = iand(negMask(1,2), det(1,2,i)) - nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) - - if(nt > 4) cycle - - do j = 2, N_int - mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) - mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) - nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) - enddo - - if(nt > 4) cycle - - if (interesting(i) == i_gen) then - if(sp == 3) then - do k = 1, mo_num - do j = 1, mo_num - banned(j,k,2) = banned(k,j,1) - enddo - enddo - else - do k = 1, mo_num - do l = k+1, mo_num - banned(l,k,1) = banned(k,l,1) - enddo - enddo - endif - endif - - if (interesting(i) >= i_gen) then - - call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) - call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) - - perMask(1,1) = iand(mask(1,1), not(det(1,1,i))) - perMask(1,2) = iand(mask(1,2), not(det(1,2,i))) - do j=2,N_int - perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) - perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) - end do -! call get_d3_h ( det(1,1,i), bannedOrb, banned, mat , mask, p, sp, psi_selectors_coef_transp_tc (1, interesting(i)) ) -! call get_d3_htc( det(1,1,i), bannedOrb, banned, mat_r, mat_l, mask, p, sp, psi_selectors_rcoef_bi_orth_transp(1, interesting(i)) & -! , psi_selectors_lcoef_bi_orth_transp(1, interesting(i)) ) - - call bitstring_to_list_in_selection(perMask(1,1), h(1,1), h(0,1), N_int) - call bitstring_to_list_in_selection(perMask(1,2), h(1,2), h(0,2), N_int) - - call get_mask_phase(psi_det_sorted_tc(1,1,interesting(i)), phasemask,N_int) - if(nt == 4) then - call get_d2_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) - elseif(nt == 3) then - call get_d1_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) - else - call get_d0_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) - endif - elseif(nt == 4) then - call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) - call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) - call past_d2(banned, p, sp) - elseif(nt == 3) then - call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) - call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) - call past_d1(bannedOrb, p) - endif - enddo - -end subroutine splash_pq - -! --- - subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf, mat_l, mat_r) - use bitmasks use selection_types implicit none @@ -740,7 +555,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d PROVIDE dominant_dets_of_cfgs N_dominant_dets_of_cfgs - do jstate = 1, N_states do istate = 1, N_states s_weight(istate,jstate) = dsqrt(selection_weight(istate)*selection_weight(jstate)) @@ -780,11 +594,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d ! endif !endif - ! MANU: ERREUR dans les calculs puisque < I | H | J > = 0 - ! n'implique pas < I | H_TC | J > = 0 ?? - !val = maxval(abs(mat(1:N_states, p1, p2))) - !if( val == 0d0) cycle - call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) if(do_only_cas) then @@ -811,7 +620,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if(excitation_max >= 0) then do_cycle = .True. if(excitation_ref == 1) then - call get_excitation_degree(HF_bitmask, det(1,1), degree, N_int) + call get_excitation_degree(HF_bitmask,det(1,1),degree,N_int) do_cycle = do_cycle .and. (degree > excitation_max) elseif(excitation_ref == 2) then do k = 1, N_dominant_dets_of_cfgs @@ -995,12 +804,118 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d enddo ! end do p2 enddo ! end do p1 - end subroutine fill_buffer_double - ! --- +subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting, mat_l, mat_r) + + BEGIN_DOC + ! Computes the contributions A(r,s) by + ! comparing the external determinant to all the internal determinants det(i). + ! an applying two particles (r,s) to the mask. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: sp, i_gen, N_sel + integer, intent(in) :: interesting(0:N_sel) + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N_sel) + logical, intent(inout) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num, 2) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num), mat_r(N_states, mo_num, mo_num) + + integer :: i, ii, j, k, l, h(0:2,2), p(0:4,2), nt + integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) + integer(bit_kind) :: phasemask(N_int,2) + PROVIDE psi_selectors_coef_transp_tc psi_det_sorted_tc + PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp + + + mat = 0d0 + mat_l = 0d0 + mat_r = 0d0 + + do i = 1, N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + do i = 1, N_sel + if(interesting(i) < 0) then + stop 'prefetch interesting(i) and det(i)' + endif + + mobMask(1,1) = iand(negMask(1,1), det(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), det(1,2,i)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + + if(nt > 4) cycle + + do j = 2, N_int + mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) + nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + enddo + + if(nt > 4) cycle + + if (interesting(i) == i_gen) then + if(sp == 3) then + do k = 1, mo_num + do j = 1, mo_num + banned(j,k,2) = banned(k,j,1) + enddo + enddo + else + do k = 1, mo_num + do l = k+1, mo_num + banned(l,k,1) = banned(k,l,1) + enddo + enddo + endif + endif + + if (interesting(i) >= i_gen) then + + call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) + call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) + + perMask(1,1) = iand(mask(1,1), not(det(1,1,i))) + perMask(1,2) = iand(mask(1,2), not(det(1,2,i))) + do j=2,N_int + perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) + perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) + end do +! call get_d3_h ( det(1,1,i), bannedOrb, banned, mat , mask, p, sp, psi_selectors_coef_transp_tc (1, interesting(i)) ) +! call get_d3_htc( det(1,1,i), bannedOrb, banned, mat_r, mat_l, mask, p, sp, psi_selectors_rcoef_bi_orth_transp(1, interesting(i)) & +! , psi_selectors_lcoef_bi_orth_transp(1, interesting(i)) ) + + call bitstring_to_list_in_selection(perMask(1,1), h(1,1), h(0,1), N_int) + call bitstring_to_list_in_selection(perMask(1,2), h(1,2), h(0,2), N_int) + + call get_mask_phase(psi_det_sorted_tc(1,1,interesting(i)), phasemask,N_int) + if(nt == 4) then + call get_d2_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + elseif(nt == 3) then + call get_d1_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + else + call get_d0_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + endif + elseif(nt == 4) then + call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) + call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) + call past_d2(banned, p, sp) + elseif(nt == 3) then + call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) + call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) + call past_d1(bannedOrb, p) + endif + enddo + +end subroutine splash_pq +! --- subroutine past_d1(bannedOrb, p) use bitmasks @@ -1043,9 +958,61 @@ subroutine past_d2(banned, p, sp) end do end do end if - end subroutine past_d2 +subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) + + use bitmasks + implicit none + + BEGIN_DOC + ! Identify the determinants in det which are in the internal space. These are + ! the determinants that can be produced by creating two particles on the mask. + END_DOC + + integer, intent(in) :: i_gen, N + integer, intent(in) :: interesting(0:N) + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) + logical, intent(inout) :: banned(mo_num, mo_num) + logical, intent(out) :: fullMatch + + integer :: i, j, na, nb, list(3) + integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2) + + fullMatch = .false. + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + genl : do i=1, N + ! If det(i) can't be generated by the mask, cycle + do j=1, N_int + if(iand(det(j,1,i), mask(j,1)) /= mask(j, 1)) cycle genl + if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl + end do + + ! If det(i) < det(i_gen), it hs already been considered + if(interesting(i) < i_gen) then + fullMatch = .true. + return + end if + + ! Identify the particles + do j=1, N_int + myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1)) + myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2)) + end do + +! call debug_det(myMask, N_int) + call bitstring_to_list_in_selection(myMask(1,1), list(1), na, N_int) + call bitstring_to_list_in_selection(myMask(1,2), list(na+1), nb, N_int) + banned(list(1), list(2)) = .true. + end do genl + +end subroutine spot_isinwf + ! --- subroutine bitstring_to_list_in_selection( string, list, n_elements, Nint) diff --git a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index e67287d3..64e7e6ba 100644 --- a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -54,7 +54,7 @@ subroutine run_stochastic_cipsi ! if (N_det > N_det_max) then -! psi_det(1:N_int,1:2,1:N_det) = psi_det_sorted_tc_gen(1:N_int,1:2,1:N_det) +! psi_det(1:N_int,1:2,1:N_det) = psi_det_generators(1:N_int,1:2,1:N_det) ! psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states) ! N_det = N_det_max ! soft_touch N_det psi_det psi_coef diff --git a/src/fci_tc_bi/diagonalize_ci.irp.f b/src/fci_tc_bi/diagonalize_ci.irp.f index 56c561ac..c8369e93 100644 --- a/src/fci_tc_bi/diagonalize_ci.irp.f +++ b/src/fci_tc_bi/diagonalize_ci.irp.f @@ -49,9 +49,8 @@ subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) psi_coef(i,j) = dabs(psi_l_coef_bi_ortho(i,j) * psi_r_coef_bi_ortho(i,j)) enddo enddo - SOFT_TOUCH eigval_left_tc_bi_orth eigval_right_tc_bi_orth leigvec_tc_bi_orth reigvec_tc_bi_orth norm_ground_left_right_bi_orth psi_coef psi_l_coef_bi_ortho psi_r_coef_bi_ortho - - + SOFT_TOUCH eigval_left_tc_bi_orth eigval_right_tc_bi_orth leigvec_tc_bi_orth reigvec_tc_bi_orth norm_ground_left_right_bi_orth + SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho psi_coef call save_tc_bi_ortho_wavefunction end diff --git a/src/fci_tc_bi/generators.irp.f b/src/fci_tc_bi/generators.irp.f index 55c0cbb9..bf972423 100644 --- a/src/fci_tc_bi/generators.irp.f +++ b/src/fci_tc_bi/generators.irp.f @@ -43,9 +43,14 @@ END_PROVIDER ! For Single reference wave functions, the generator is the ! Hartree-Fock determinant END_DOC - psi_det_sorted_tc_gen = psi_det_sorted_tc + psi_det_sorted_tc_gen = psi_det_sorted_tc psi_coef_sorted_tc_gen = psi_coef_sorted_tc psi_det_sorted_tc_gen_order = psi_det_sorted_tc_order + integer :: i +! do i = 1,N_det +! print*,'i = ',i +! call debug_det(psi_det_sorted_tc(1,1,i),N_int) +! enddo END_PROVIDER diff --git a/src/fci_tc_bi/selectors.irp.f b/src/fci_tc_bi/selectors.irp.f index 3830927b..94aa4b01 100644 --- a/src/fci_tc_bi/selectors.irp.f +++ b/src/fci_tc_bi/selectors.irp.f @@ -47,13 +47,20 @@ END_PROVIDER enddo do k=1,N_states do i=1,N_det_selectors - psi_selectors_coef(i,k) = psi_coef_sorted_tc_gen(i,k) + psi_selectors_coef(i,k) = dsqrt(dabs(psi_l_coef_sorted_bi_ortho(i,k) * psi_r_coef_sorted_bi_ortho(i,k))) psi_selectors_coef_tc(i,1,k) = psi_l_coef_sorted_bi_ortho(i,k) psi_selectors_coef_tc(i,2,k) = psi_r_coef_sorted_bi_ortho(i,k) +! psi_selectors_coef_tc(i,1,k) = psi_l_coef_bi_ortho(i,k) +! psi_selectors_coef_tc(i,2,k) = psi_r_coef_bi_ortho(i,k) ! psi_selectors_coef_tc(i,1,k) = 1.d0 ! psi_selectors_coef_tc(i,2,k) = 1.d0 enddo enddo + print*,'selectors ' + do i = 1, N_det_selectors + print*,i,dabs(psi_selectors_coef_tc(i,1,1)*psi_selectors_coef_tc(i,2,1)) + call debug_det(psi_selectors(1,1,i),N_int) + enddo END_PROVIDER diff --git a/src/tc_bi_ortho/psi_det_tc_sorted.irp.f b/src/tc_bi_ortho/psi_det_tc_sorted.irp.f index 42617557..35c78468 100644 --- a/src/tc_bi_ortho/psi_det_tc_sorted.irp.f +++ b/src/tc_bi_ortho/psi_det_tc_sorted.irp.f @@ -10,7 +10,10 @@ BEGIN_PROVIDER [ double precision, psi_average_norm_contrib_tc, (psi_det_size) ] psi_average_norm_contrib_tc(:) = 0.d0 do k=1,N_states + print*,'in psi_average_norm_contrib_tc' do i=1,N_det + print*,i,dabs(psi_l_coef_bi_ortho(i,k)*psi_r_coef_bi_ortho(i,k)) + call debug_det(psi_det(1,1,i),N_int) psi_average_norm_contrib_tc(i) = psi_average_norm_contrib_tc(i) + & dabs(psi_l_coef_bi_ortho(i,k)*psi_r_coef_bi_ortho(i,k))*state_average_weight(k) enddo @@ -26,11 +29,18 @@ END_PROVIDER &BEGIN_PROVIDER [ double precision, psi_coef_sorted_tc, (psi_det_size,N_states) ] &BEGIN_PROVIDER [ double precision, psi_average_norm_contrib_sorted_tc, (psi_det_size) ] &BEGIN_PROVIDER [ integer, psi_det_sorted_tc_order, (psi_det_size) ] +&BEGIN_PROVIDER [double precision, psi_r_coef_sorted_bi_ortho, (psi_det_size, N_states)] +&BEGIN_PROVIDER [double precision, psi_l_coef_sorted_bi_ortho, (psi_det_size, N_states)] implicit none BEGIN_DOC ! Wave function sorted by determinants contribution to the norm (state-averaged) ! ! psi_det_sorted_tc_order(i) -> k : index in psi_det + ! + ! psi_r_coef_sorted_bi_ortho : right coefficients corresponding to psi_det_sorted_tc + ! + ! psi_l_coef_sorted_bi_ortho : left coefficients corresponding to psi_det_sorted_tc + END_DOC END_DOC integer :: i,j,k integer, allocatable :: iorder(:) @@ -39,7 +49,7 @@ END_PROVIDER psi_average_norm_contrib_sorted_tc(i) = -psi_average_norm_contrib_tc(i) iorder(i) = i enddo -! call dsort(psi_average_norm_contrib_sorted_tc,iorder,N_det) + call dsort(psi_average_norm_contrib_sorted_tc,iorder,N_det) do i=1,N_det do j=1,N_int psi_det_sorted_tc(j,1,i) = psi_det(j,1,iorder(i)) @@ -66,24 +76,13 @@ END_PROVIDER psi_average_norm_contrib_sorted_tc(N_det+1:psi_det_size) = 0.d0 psi_det_sorted_tc_order(N_det+1:psi_det_size) = 0 - deallocate(iorder) - -END_PROVIDER - - BEGIN_PROVIDER [double precision, psi_r_coef_sorted_bi_ortho, (psi_det_size, N_states)] -&BEGIN_PROVIDER [double precision, psi_l_coef_sorted_bi_ortho, (psi_det_size, N_states)] - BEGIN_DOC - ! psi_r_coef_sorted_bi_ortho : right coefficients corresponding to psi_det_sorted_tc - ! psi_l_coef_sorted_bi_ortho : left coefficients corresponding to psi_det_sorted_tc - END_DOC - implicit none - integer :: i, j, k psi_r_coef_sorted_bi_ortho = 0.d0 psi_l_coef_sorted_bi_ortho = 0.d0 do i = 1, N_det - psi_r_coef_sorted_bi_ortho(i,1) = psi_r_coef_bi_ortho(psi_det_sorted_tc_order(i),1) - psi_l_coef_sorted_bi_ortho(i,1) = psi_l_coef_bi_ortho(psi_det_sorted_tc_order(i),1) + psi_r_coef_sorted_bi_ortho(i,1:N_states) = psi_r_coef_bi_ortho(psi_det_sorted_tc_order(i),1:N_states) + psi_l_coef_sorted_bi_ortho(i,1:N_states) = psi_l_coef_bi_ortho(psi_det_sorted_tc_order(i),1:N_states) enddo + deallocate(iorder) END_PROVIDER diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index d39b7a29..d12bbb4e 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -133,7 +133,10 @@ call bi_normalize(leigvec_tc_bi_orth,reigvec_tc_bi_orth,size(reigvec_tc_bi_orth,1),N_det,N_states) print*,'leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) = ',leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) norm_ground_left_right_bi_orth = 0.d0 + print*,'In diago' do j = 1, N_det + print*,j,dabs(leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1)) + call debug_det(psi_det(1,1,j),N_int) norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1) enddo print*,'norm l/r = ',norm_ground_left_right_bi_orth From d1068047e8df56adbdd3006a114b88e9995fb28f Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 15 Mar 2023 14:33:10 +0100 Subject: [PATCH 17/87] trying to debug some psi_det_generators stuffs in fci_tc_bi --- src/cipsi_tc_bi_ortho/selection.irp.f | 11 ++++--- src/fci_tc_bi/generators.irp.f | 8 +++++ src/fci_tc_bi/selectors.irp.f | 15 ++++++++++ src/tc_bi_ortho/psi_det_tc_sorted.irp.f | 40 ++++++++++++++++++++++++- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 3 ++ 5 files changed, 72 insertions(+), 5 deletions(-) diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f index 633ca815..0c3f0451 100644 --- a/src/cipsi_tc_bi_ortho/selection.irp.f +++ b/src/cipsi_tc_bi_ortho/selection.irp.f @@ -464,15 +464,15 @@ subroutine select_singles_and_doubles(i_generator, hole_mask,particle_mask, fock do i = 1, fullinteresting(0) do k = 1, N_int - fullminilist(k,1,i) = psi_det_sorted_tc(k,1,fullinteresting(i)) - fullminilist(k,2,i) = psi_det_sorted_tc(k,2,fullinteresting(i)) + fullminilist(k,1,i) = psi_selectors(k,1,fullinteresting(i)) + fullminilist(k,2,i) = psi_selectors(k,2,fullinteresting(i)) enddo enddo do i = 1, interesting(0) do k = 1, N_int - minilist(k,1,i) = psi_det_sorted_tc(k,1,interesting(i)) - minilist(k,2,i) = psi_det_sorted_tc(k,2,interesting(i)) + minilist(k,1,i) = psi_selectors(k,1,interesting(i)) + minilist(k,2,i) = psi_selectors(k,2,interesting(i)) enddo enddo @@ -628,7 +628,10 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere negMask(i,2) = not(mask(i,2)) end do + print*,'in selection ' do i = 1, N_sel +! call debug_det(det(1,1,i),N_int) +! print*,i,dabs(psi_selectors_coef_transp_tc(1,2,i) * psi_selectors_coef_transp_tc(1,1,i)) if(interesting(i) < 0) then stop 'prefetch interesting(i) and det(i)' endif diff --git a/src/fci_tc_bi/generators.irp.f b/src/fci_tc_bi/generators.irp.f index 55c0cbb9..250a1f71 100644 --- a/src/fci_tc_bi/generators.irp.f +++ b/src/fci_tc_bi/generators.irp.f @@ -31,6 +31,14 @@ END_PROVIDER END_DOC psi_det_generators(1:N_int,1:2,1:N_det) = psi_det_sorted_tc(1:N_int,1:2,1:N_det) psi_coef_generators(1:N_det,1:N_states) = psi_coef_sorted_tc(1:N_det,1:N_states) + integer :: i +! print*,'generators ' + do i = 1, N_det + if(N_det.ne.1)then + print*,'writing generators' + write(33,*) psi_det_generators(1,1,i), psi_det_generators(1,2,i) + endif + enddo END_PROVIDER diff --git a/src/fci_tc_bi/selectors.irp.f b/src/fci_tc_bi/selectors.irp.f index 3830927b..3c12bb07 100644 --- a/src/fci_tc_bi/selectors.irp.f +++ b/src/fci_tc_bi/selectors.irp.f @@ -43,15 +43,27 @@ END_PROVIDER do k=1,N_int psi_selectors(k,1,i) = psi_det_sorted_tc(k,1,i) psi_selectors(k,2,i) = psi_det_sorted_tc(k,2,i) +! psi_selectors(k,2,i) = psi_det(k,2,i) +! psi_selectors(k,2,i) = psi_det(k,2,i) enddo enddo + print*,'selectors ' do k=1,N_states do i=1,N_det_selectors psi_selectors_coef(i,k) = psi_coef_sorted_tc_gen(i,k) +! psi_selectors_coef_tc(i,1,k) = psi_l_coef_bi_ortho(i,k) +! psi_selectors_coef_tc(i,2,k) = psi_r_coef_bi_ortho(i,k) psi_selectors_coef_tc(i,1,k) = psi_l_coef_sorted_bi_ortho(i,k) psi_selectors_coef_tc(i,2,k) = psi_r_coef_sorted_bi_ortho(i,k) +! call debug_det(psi_selectors(1,1,i),N_int) + if(N_det.ne.1)then + print*,'writing selectors' + write(34,*)psi_selectors(1,1,i),psi_selectors(1,2,i) + write(40,'(F10.7)')dabs(psi_selectors_coef_tc(i,1,1) * psi_selectors_coef_tc(i,2,1)) + endif ! psi_selectors_coef_tc(i,1,k) = 1.d0 ! psi_selectors_coef_tc(i,2,k) = 1.d0 + enddo enddo @@ -71,6 +83,9 @@ END_PROVIDER psi_selectors_coef_transp_tc(k,1,i) = psi_selectors_coef_tc(i,1,k) psi_selectors_coef_transp_tc(k,2,i) = psi_selectors_coef_tc(i,2,k) enddo + if(N_det.ne.1)then + write(41,'(F10.7)')dabs(psi_selectors_coef_transp_tc(1,1,i)*psi_selectors_coef_transp_tc(1,2,i)) + endif enddo END_PROVIDER diff --git a/src/tc_bi_ortho/psi_det_tc_sorted.irp.f b/src/tc_bi_ortho/psi_det_tc_sorted.irp.f index 42617557..e8477dec 100644 --- a/src/tc_bi_ortho/psi_det_tc_sorted.irp.f +++ b/src/tc_bi_ortho/psi_det_tc_sorted.irp.f @@ -34,13 +34,19 @@ END_PROVIDER END_DOC integer :: i,j,k integer, allocatable :: iorder(:) + print *, 'providing psi_det_sorted_tc' allocate ( iorder(N_det) ) + print*,'before ' do i=1,N_det psi_average_norm_contrib_sorted_tc(i) = -psi_average_norm_contrib_tc(i) iorder(i) = i + print*,i,iorder(i),psi_average_norm_contrib_sorted_tc(i) enddo -! call dsort(psi_average_norm_contrib_sorted_tc,iorder,N_det) + call dsort(psi_average_norm_contrib_sorted_tc,iorder,N_det) + print*,'after ' do i=1,N_det +! iorder(i) = i + print*,i,iorder(i),psi_average_norm_contrib_sorted_tc(i) do j=1,N_int psi_det_sorted_tc(j,1,i) = psi_det(j,1,iorder(i)) psi_det_sorted_tc(j,2,i) = psi_det(j,2,iorder(i)) @@ -67,6 +73,23 @@ END_PROVIDER psi_det_sorted_tc_order(N_det+1:psi_det_size) = 0 deallocate(iorder) + logical :: pouet + pouet = .true. + do i = 1, N_det + if(psi_average_norm_contrib_sorted_tc(i) == 0.d0)then + pouet = .False. + exit + endif + enddo + + if(pouet.and.N_det.ne.1)then + print*,'writing sorted' + do i = 1, N_det +! call debug_det(psi_det_sorted_tc(1,1,i),N_int) + print*,i,psi_average_norm_contrib_sorted_tc(i) + write(35,*)psi_det_sorted_tc(1,1,i),psi_det_sorted_tc(1,2,i) + enddo + endif END_PROVIDER @@ -84,6 +107,21 @@ END_PROVIDER psi_r_coef_sorted_bi_ortho(i,1) = psi_r_coef_bi_ortho(psi_det_sorted_tc_order(i),1) psi_l_coef_sorted_bi_ortho(i,1) = psi_l_coef_bi_ortho(psi_det_sorted_tc_order(i),1) enddo + logical :: pouet + pouet = .true. + do i = 1, N_det + if(psi_l_coef_sorted_bi_ortho(i,1) == 0.d0)then + pouet = .False. + exit + endif + enddo + if(pouet.and.N_det.ne.1)then + print*,'psi_r_coef_sorted_bi_ortho' + do i = 1, N_det + print*,psi_r_coef_bi_ortho(psi_det_sorted_tc_order(i),1) + write(42,'(F10.7)')dabs(psi_r_coef_sorted_bi_ortho(i,1)*psi_l_coef_sorted_bi_ortho(i,1)) + enddo + endif END_PROVIDER diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index d39b7a29..c66ff036 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -133,7 +133,10 @@ call bi_normalize(leigvec_tc_bi_orth,reigvec_tc_bi_orth,size(reigvec_tc_bi_orth,1),N_det,N_states) print*,'leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) = ',leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) norm_ground_left_right_bi_orth = 0.d0 + print*,'after diago' do j = 1, N_det + call debug_det(psi_det(1,1,j),N_int) + print*,j,dabs(leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1)) norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1) enddo print*,'norm l/r = ',norm_ground_left_right_bi_orth From 22fb8c17e21f986c7e5b18faf01ed206fa6ea73d Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 16 Mar 2023 14:00:21 +0100 Subject: [PATCH 18/87] fixed the bug of misalignement between coefs and determinants in fci_tc_bi_ortho --- .../pt2_stoch_routines.irp.f | 1 - src/cipsi_tc_bi_ortho/selection.irp.f | 29 +++-- src/cipsi_tc_bi_ortho/selection_buffer.irp.f | 12 +- src/cipsi_tc_bi_ortho/slave_cipsi.irp.f | 2 - src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f | 1 + src/fci_tc_bi/generators.irp.f | 8 -- src/fci_tc_bi/selectors.irp.f | 47 +------- src/tc_bi_ortho/psi_det_tc_sorted.irp.f | 104 ++++++++---------- src/tc_bi_ortho/psi_r_l_prov.irp.f | 26 ++--- src/tc_bi_ortho/tc_bi_ortho.irp.f | 26 ++--- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 25 ++++- src/tc_keywords/EZFIO.cfg | 8 ++ 12 files changed, 124 insertions(+), 165 deletions(-) diff --git a/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f b/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f index 027b74c5..284b2bc8 100644 --- a/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f +++ b/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f @@ -134,7 +134,6 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in) PROVIDE psi_det_hii selection_weight pseudo_sym PROVIDE n_act_orb n_inact_orb n_core_orb n_virt_orb n_del_orb seniority_max PROVIDE excitation_beta_max excitation_alpha_max excitation_max - PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp if (h0_type == 'CFG') then PROVIDE psi_configuration_hii det_to_configuration diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f index 0c3f0451..4c271a4b 100644 --- a/src/cipsi_tc_bi_ortho/selection.irp.f +++ b/src/cipsi_tc_bi_ortho/selection.irp.f @@ -181,7 +181,6 @@ subroutine select_singles_and_doubles(i_generator, hole_mask,particle_mask, fock PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp_tc - PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp PROVIDE banned_excitation @@ -616,7 +615,6 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere PROVIDE psi_selectors_coef_transp_tc psi_det_sorted_tc - PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp mat = 0d0 @@ -628,7 +626,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere negMask(i,2) = not(mask(i,2)) end do - print*,'in selection ' +! print*,'in selection ' do i = 1, N_sel ! call debug_det(det(1,1,i),N_int) ! print*,i,dabs(psi_selectors_coef_transp_tc(1,2,i) * psi_selectors_coef_transp_tc(1,1,i)) @@ -677,9 +675,6 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) end do -! call get_d3_h ( det(1,1,i), bannedOrb, banned, mat , mask, p, sp, psi_selectors_coef_transp_tc (1, interesting(i)) ) -! call get_d3_htc( det(1,1,i), bannedOrb, banned, mat_r, mat_l, mask, p, sp, psi_selectors_rcoef_bi_orth_transp(1, interesting(i)) & -! , psi_selectors_lcoef_bi_orth_transp(1, interesting(i)) ) call bitstring_to_list_in_selection(perMask(1,1), h(1,1), h(0,1), N_int) call bitstring_to_list_in_selection(perMask(1,2), h(1,2), h(0,2), N_int) @@ -921,15 +916,26 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d endif val = 4.d0 * psi_h_alpha * alpha_h_psi tmp = dsqrt(delta_E * delta_E + val) - if (delta_E < 0.d0) then - tmp = -tmp - endif - e_pert(istate) = 0.5d0 * (tmp - delta_E) +! if (delta_E < 0.d0) then +! tmp = -tmp +! endif + e_pert(istate) = 0.25 * val / delta_E +! e_pert(istate) = 0.5d0 * (tmp - delta_E) if(dsqrt(dabs(tmp)).gt.1.d-4.and.dabs(alpha_h_psi).gt.1.d-4)then - coef(istate) = e_pert(istate) / alpha_h_psi + coef(istate) = e_pert(istate) / psi_h_alpha else coef(istate) = alpha_h_psi / delta_E endif + + if(selection_tc == 1)then + if(e_pert(istate).lt.0.d0)then + e_pert(istate)=0.d0 + else + e_pert(istate)=-e_pert(istate) + endif + else if(selection_tc == -1)then + if(e_pert(istate).gt.0.d0)e_pert(istate)=0.d0 + endif ! if(selection_tc == 1 )then ! if(e_pert(istate).lt.0.d0)then @@ -943,7 +949,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d enddo - do_diag = sum(dabs(coef)) > 0.001d0 .and. N_states > 1 do istate = 1, N_states diff --git a/src/cipsi_tc_bi_ortho/selection_buffer.irp.f b/src/cipsi_tc_bi_ortho/selection_buffer.irp.f index 10132086..0bd51464 100644 --- a/src/cipsi_tc_bi_ortho/selection_buffer.irp.f +++ b/src/cipsi_tc_bi_ortho/selection_buffer.irp.f @@ -125,7 +125,11 @@ subroutine merge_selection_buffers(b1, b2) enddo b2%det => detmp b2%val => val - b2%mini = min(b2%mini,b2%val(b2%N)) +! if(selection_tc == 1)then +! b2%mini = max(b2%mini,b2%val(b2%N)) +! else + b2%mini = min(b2%mini,b2%val(b2%N)) +! endif b2%cur = nmwen end @@ -157,7 +161,11 @@ subroutine sort_selection_buffer(b) end do deallocate(b%det,iorder) b%det => detmp - b%mini = min(b%mini,b%val(b%N)) +! if(selection_tc == 1)then +! b%mini = max(b%mini,b%val(b%N)) +! else + b%mini = min(b%mini,b%val(b%N)) +! endif b%cur = nmwen end subroutine diff --git a/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f b/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f index c3a49280..6343bf8b 100644 --- a/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f +++ b/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f @@ -17,7 +17,6 @@ end subroutine provide_everything PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context N_states_diag - PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp PROVIDE pt2_e0_denominator mo_num N_int ci_energy mpi_master zmq_state zmq_context PROVIDE psi_det psi_coef threshold_generators state_average_weight @@ -312,7 +311,6 @@ subroutine run_slave_main PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted_tc - PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp PROVIDE psi_det_hii selection_weight pseudo_sym pt2_min_parallel_tasks diff --git a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index e67287d3..e7ee4be9 100644 --- a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -108,6 +108,7 @@ subroutine run_stochastic_cipsi ept2(N_iter-1) = E_tc + nuclear_repulsion + (pt2_data % pt2(1))/norm pt1(N_iter-1) = dsqrt(pt2_data % overlap(1,1)) call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) +! stop if (qp_stop()) exit enddo ! print*,'data to extrapolate ' diff --git a/src/fci_tc_bi/generators.irp.f b/src/fci_tc_bi/generators.irp.f index 250a1f71..55c0cbb9 100644 --- a/src/fci_tc_bi/generators.irp.f +++ b/src/fci_tc_bi/generators.irp.f @@ -31,14 +31,6 @@ END_PROVIDER END_DOC psi_det_generators(1:N_int,1:2,1:N_det) = psi_det_sorted_tc(1:N_int,1:2,1:N_det) psi_coef_generators(1:N_det,1:N_states) = psi_coef_sorted_tc(1:N_det,1:N_states) - integer :: i -! print*,'generators ' - do i = 1, N_det - if(N_det.ne.1)then - print*,'writing generators' - write(33,*) psi_det_generators(1,1,i), psi_det_generators(1,2,i) - endif - enddo END_PROVIDER diff --git a/src/fci_tc_bi/selectors.irp.f b/src/fci_tc_bi/selectors.irp.f index 3c12bb07..4d3de7d0 100644 --- a/src/fci_tc_bi/selectors.irp.f +++ b/src/fci_tc_bi/selectors.irp.f @@ -18,15 +18,6 @@ BEGIN_PROVIDER [ integer, N_det_selectors] double precision :: norm, norm_max call write_time(6) N_det_selectors = N_det -! norm = 1.d0 -! do i=1,N_det -! norm = norm - psi_average_norm_contrib_tc(i) -! if (norm - 1.d-10 < 1.d0 - threshold_selectors) then -! N_det_selectors = i -! exit -! endif -! enddo - N_det_selectors = max(N_det_selectors,N_det_generators) call write_int(6,N_det_selectors,'Number of selectors') END_PROVIDER @@ -43,27 +34,13 @@ END_PROVIDER do k=1,N_int psi_selectors(k,1,i) = psi_det_sorted_tc(k,1,i) psi_selectors(k,2,i) = psi_det_sorted_tc(k,2,i) -! psi_selectors(k,2,i) = psi_det(k,2,i) -! psi_selectors(k,2,i) = psi_det(k,2,i) enddo enddo - print*,'selectors ' do k=1,N_states do i=1,N_det_selectors - psi_selectors_coef(i,k) = psi_coef_sorted_tc_gen(i,k) -! psi_selectors_coef_tc(i,1,k) = psi_l_coef_bi_ortho(i,k) -! psi_selectors_coef_tc(i,2,k) = psi_r_coef_bi_ortho(i,k) + psi_selectors_coef(i,k) = psi_coef_sorted_tc_gen(i,k) psi_selectors_coef_tc(i,1,k) = psi_l_coef_sorted_bi_ortho(i,k) psi_selectors_coef_tc(i,2,k) = psi_r_coef_sorted_bi_ortho(i,k) -! call debug_det(psi_selectors(1,1,i),N_int) - if(N_det.ne.1)then - print*,'writing selectors' - write(34,*)psi_selectors(1,1,i),psi_selectors(1,2,i) - write(40,'(F10.7)')dabs(psi_selectors_coef_tc(i,1,1) * psi_selectors_coef_tc(i,2,1)) - endif -! psi_selectors_coef_tc(i,1,k) = 1.d0 -! psi_selectors_coef_tc(i,2,k) = 1.d0 - enddo enddo @@ -83,31 +60,9 @@ END_PROVIDER psi_selectors_coef_transp_tc(k,1,i) = psi_selectors_coef_tc(i,1,k) psi_selectors_coef_transp_tc(k,2,i) = psi_selectors_coef_tc(i,2,k) enddo - if(N_det.ne.1)then - write(41,'(F10.7)')dabs(psi_selectors_coef_transp_tc(1,1,i)*psi_selectors_coef_transp_tc(1,2,i)) - endif enddo END_PROVIDER - BEGIN_PROVIDER [ double precision, psi_selectors_rcoef_bi_orth_transp, (N_states, psi_det_size) ] -&BEGIN_PROVIDER [ double precision, psi_selectors_lcoef_bi_orth_transp, (N_states, psi_det_size) ] - - implicit none - integer :: i, k - - psi_selectors_rcoef_bi_orth_transp = 0.d0 - psi_selectors_lcoef_bi_orth_transp = 0.d0 - - print*,'N_det,N_det_selectors',N_det,N_det_selectors - do i = 1, N_det_selectors - do k = 1, N_states - psi_selectors_rcoef_bi_orth_transp(k,i) = psi_r_coef_sorted_bi_ortho(i,k) - psi_selectors_lcoef_bi_orth_transp(k,i) = psi_l_coef_sorted_bi_ortho(i,k) - enddo - enddo - -END_PROVIDER - BEGIN_PROVIDER [ integer, psi_selectors_size ] implicit none psi_selectors_size = psi_det_size diff --git a/src/tc_bi_ortho/psi_det_tc_sorted.irp.f b/src/tc_bi_ortho/psi_det_tc_sorted.irp.f index e8477dec..2d2111d6 100644 --- a/src/tc_bi_ortho/psi_det_tc_sorted.irp.f +++ b/src/tc_bi_ortho/psi_det_tc_sorted.irp.f @@ -11,7 +11,7 @@ BEGIN_PROVIDER [ double precision, psi_average_norm_contrib_tc, (psi_det_size) ] psi_average_norm_contrib_tc(:) = 0.d0 do k=1,N_states do i=1,N_det - psi_average_norm_contrib_tc(i) = psi_average_norm_contrib_tc(i) + & + psi_average_norm_contrib_tc(i) = & dabs(psi_l_coef_bi_ortho(i,k)*psi_r_coef_bi_ortho(i,k))*state_average_weight(k) enddo enddo @@ -26,39 +26,54 @@ END_PROVIDER &BEGIN_PROVIDER [ double precision, psi_coef_sorted_tc, (psi_det_size,N_states) ] &BEGIN_PROVIDER [ double precision, psi_average_norm_contrib_sorted_tc, (psi_det_size) ] &BEGIN_PROVIDER [ integer, psi_det_sorted_tc_order, (psi_det_size) ] +&BEGIN_PROVIDER [double precision, psi_r_coef_sorted_bi_ortho, (psi_det_size, N_states)] +&BEGIN_PROVIDER [double precision, psi_l_coef_sorted_bi_ortho, (psi_det_size, N_states)] implicit none BEGIN_DOC ! Wave function sorted by determinants contribution to the norm (state-averaged) ! ! psi_det_sorted_tc_order(i) -> k : index in psi_det + ! + ! psi_r_coef_sorted_bi_ortho : right coefficients corresponding to psi_det_sorted_tc + ! + ! psi_l_coef_sorted_bi_ortho : left coefficients corresponding to psi_det_sorted_tc END_DOC integer :: i,j,k integer, allocatable :: iorder(:) - print *, 'providing psi_det_sorted_tc' allocate ( iorder(N_det) ) - print*,'before ' +! print*,'before = ' do i=1,N_det - psi_average_norm_contrib_sorted_tc(i) = -psi_average_norm_contrib_tc(i) iorder(i) = i - print*,i,iorder(i),psi_average_norm_contrib_sorted_tc(i) + psi_average_norm_contrib_sorted_tc(i) = -psi_average_norm_contrib_tc(i) +! print*,'------------' +! call debug_det(psi_det(1,1,i),N_int) +! print*,i,psi_average_norm_contrib_tc(i) +! print*,i,psi_l_coef_bi_ortho(iorder(i),1:N_states),psi_r_coef_bi_ortho(iorder(i),1:N_states) +! print*,'------------' enddo call dsort(psi_average_norm_contrib_sorted_tc,iorder,N_det) - print*,'after ' +! print*,'after = ' do i=1,N_det -! iorder(i) = i - print*,i,iorder(i),psi_average_norm_contrib_sorted_tc(i) + psi_average_norm_contrib_sorted_tc(i) = -psi_average_norm_contrib_sorted_tc(i) do j=1,N_int psi_det_sorted_tc(j,1,i) = psi_det(j,1,iorder(i)) psi_det_sorted_tc(j,2,i) = psi_det(j,2,iorder(i)) enddo - psi_average_norm_contrib_sorted_tc(i) = -psi_average_norm_contrib_sorted_tc(i) - psi_det_sorted_tc_order(iorder(i)) = i + psi_det_sorted_tc_order(iorder(i)) = i +! if(iorder(i).ne.i)then +! print*,'changed the order for ',i,iorder(i) +! endif +! print*,'------------' +! call debug_det(psi_det_sorted_tc(1,1,i),N_int) +! print*,i,psi_average_norm_contrib_tc(i) +! print*,i,psi_l_coef_bi_ortho(iorder(i),1:N_states),psi_r_coef_bi_ortho(iorder(i),1:N_states) +! print*,'------------' enddo double precision :: accu do k=1,N_states accu = 0.d0 do i=1,N_det - psi_coef_sorted_tc(i,k) = dsqrt(dabs(psi_l_coef_bi_ortho(iorder(i),k)*psi_r_coef_bi_ortho(iorder(i),k))) + psi_coef_sorted_tc(i,k) = dsqrt(psi_average_norm_contrib_sorted_tc(i)) accu += psi_coef_sorted_tc(i,k)**2 enddo accu = 1.d0/dsqrt(accu) @@ -72,60 +87,33 @@ END_PROVIDER psi_average_norm_contrib_sorted_tc(N_det+1:psi_det_size) = 0.d0 psi_det_sorted_tc_order(N_det+1:psi_det_size) = 0 - deallocate(iorder) - logical :: pouet - pouet = .true. - do i = 1, N_det - if(psi_average_norm_contrib_sorted_tc(i) == 0.d0)then - pouet = .False. - exit - endif - enddo - - if(pouet.and.N_det.ne.1)then - print*,'writing sorted' - do i = 1, N_det -! call debug_det(psi_det_sorted_tc(1,1,i),N_int) - print*,i,psi_average_norm_contrib_sorted_tc(i) - write(35,*)psi_det_sorted_tc(1,1,i),psi_det_sorted_tc(1,2,i) - enddo - endif - -END_PROVIDER - - BEGIN_PROVIDER [double precision, psi_r_coef_sorted_bi_ortho, (psi_det_size, N_states)] -&BEGIN_PROVIDER [double precision, psi_l_coef_sorted_bi_ortho, (psi_det_size, N_states)] - BEGIN_DOC - ! psi_r_coef_sorted_bi_ortho : right coefficients corresponding to psi_det_sorted_tc - ! psi_l_coef_sorted_bi_ortho : left coefficients corresponding to psi_det_sorted_tc - END_DOC - implicit none - integer :: i, j, k psi_r_coef_sorted_bi_ortho = 0.d0 psi_l_coef_sorted_bi_ortho = 0.d0 do i = 1, N_det - psi_r_coef_sorted_bi_ortho(i,1) = psi_r_coef_bi_ortho(psi_det_sorted_tc_order(i),1) - psi_l_coef_sorted_bi_ortho(i,1) = psi_l_coef_bi_ortho(psi_det_sorted_tc_order(i),1) + psi_r_coef_sorted_bi_ortho(i,1:N_states) = psi_r_coef_bi_ortho(iorder(i),1:N_states) + psi_l_coef_sorted_bi_ortho(i,1:N_states) = psi_l_coef_bi_ortho(iorder(i),1:N_states) enddo - logical :: pouet - pouet = .true. - do i = 1, N_det - if(psi_l_coef_sorted_bi_ortho(i,1) == 0.d0)then - pouet = .False. - exit - endif - enddo - if(pouet.and.N_det.ne.1)then - print*,'psi_r_coef_sorted_bi_ortho' - do i = 1, N_det - print*,psi_r_coef_bi_ortho(psi_det_sorted_tc_order(i),1) - write(42,'(F10.7)')dabs(psi_r_coef_sorted_bi_ortho(i,1)*psi_l_coef_sorted_bi_ortho(i,1)) - enddo - endif + + deallocate(iorder) +! logical :: pouet +! pouet = .true. +! do i = 1, N_det +! if(psi_average_norm_contrib_sorted_tc(i) == 0.d0)then +! pouet = .False. +! exit +! endif +! enddo +! +! if(pouet.and.N_det.ne.1)then +! print*,'writing sorted' +! do i = 1, N_det +! print*,i,psi_average_norm_contrib_sorted_tc(i) +! write(35,*)psi_det_sorted_tc(1,1,i),psi_det_sorted_tc(1,2,i) +! enddo +! endif END_PROVIDER - BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_tc_bit, (N_int,2,psi_det_size) ] &BEGIN_PROVIDER [ double precision, psi_coef_sorted_tc_bit, (psi_det_size,N_states) ] implicit none diff --git a/src/tc_bi_ortho/psi_r_l_prov.irp.f b/src/tc_bi_ortho/psi_r_l_prov.irp.f index ac9b0e74..521acff5 100644 --- a/src/tc_bi_ortho/psi_r_l_prov.irp.f +++ b/src/tc_bi_ortho/psi_r_l_prov.irp.f @@ -136,15 +136,15 @@ BEGIN_PROVIDER [ double precision, psi_r_coef_bi_ortho, (psi_det_size,N_states) END_PROVIDER -subroutine save_tc_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psilcoef,psircoef) +subroutine save_tc_wavefunction_general(ndet,nstates,psidet,sze,dim_psicoef,psilcoef,psircoef) implicit none BEGIN_DOC ! Save the wave function into the |EZFIO| file END_DOC use bitmasks include 'constants.include.F' - integer, intent(in) :: ndet,nstates,dim_psicoef - integer(bit_kind), intent(in) :: psidet(N_int,2,ndet) + integer, intent(in) :: ndet,nstates,dim_psicoef,sze + integer(bit_kind), intent(in) :: psidet(N_int,2,sze) double precision, intent(in) :: psilcoef(dim_psicoef,nstates) double precision, intent(in) :: psircoef(dim_psicoef,nstates) integer*8, allocatable :: psi_det_save(:,:,:) @@ -188,23 +188,17 @@ subroutine save_tc_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psilcoef call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(psir_coef_save) deallocate (psil_coef_save,psir_coef_save) -! allocate (psi_coef_save(ndet_qp_edit,nstates)) -! do k=1,nstates -! do i=1,ndet_qp_edit -! psi_coef_save(i,k) = psicoef(i,k) -! enddo -! enddo -! -! call ezfio_set_determinants_psi_coef_qp_edit(psi_coef_save) -! deallocate (psi_coef_save) - call write_int(6,ndet,'Saved determinantsi and psi_r/psi_l coef') endif end subroutine save_tc_bi_ortho_wavefunction implicit none - call save_tc_wavefunction_general(N_det,N_states,psi_det,size(psi_l_coef_bi_ortho, 1),psi_l_coef_bi_ortho,psi_r_coef_bi_ortho) + if(save_sorted_tc_wf)then + call save_tc_wavefunction_general(N_det,N_states,psi_det_sorted_tc,size(psi_det_sorted_tc, 3),size(psi_l_coef_sorted_bi_ortho, 1),psi_l_coef_sorted_bi_ortho,psi_r_coef_sorted_bi_ortho) + else + call save_tc_wavefunction_general(N_det,N_states,psi_det,size(psi_det, 3), size(psi_l_coef_bi_ortho, 1),psi_l_coef_bi_ortho,psi_r_coef_bi_ortho) + endif call routine_save_right_bi_ortho end @@ -214,9 +208,9 @@ subroutine routine_save_right_bi_ortho integer :: i allocate(coef_tmp(N_det, N_states)) do i = 1, N_det - coef_tmp(i,1:N_states) = psi_r_coef_bi_ortho(i,1:N_states) + coef_tmp(i,1:N_states) = psi_r_coef_sorted_bi_ortho(i,1:N_states) enddo - call save_wavefunction_general_unormalized(N_det,N_states,psi_det,size(coef_tmp,1),coef_tmp(1,1)) + call save_wavefunction_general_unormalized(N_det,N_states,psi_det_sorted_tc,size(coef_tmp,1),coef_tmp(1,1)) end subroutine routine_save_left_right_bi_ortho diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f index cfa24f3b..2d51f6f0 100644 --- a/src/tc_bi_ortho/tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f @@ -11,7 +11,7 @@ program tc_bi_ortho touch read_wf touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid call routine_diag -! call test + call save_tc_bi_ortho_wavefunction end subroutine test @@ -19,18 +19,19 @@ subroutine test integer :: i,j double precision :: hmono,htwoe,hthree,htot use bitmasks - - print*,'test' -! call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,2), N_int, hmono, htwoe, hthree, htot) - call double_htilde_mu_mat_bi_ortho(N_int,psi_det(1,1,1), psi_det(1,1,2), hmono, htwoe, htot) - print*,hmono, htwoe, htot + print*,'reading the wave function ' + do i = 1, N_det + call debug_det(psi_det(1,1,i),N_int) + print*,i,psi_l_coef_bi_ortho(i,1)*psi_r_coef_bi_ortho(i,1) + print*,i,psi_l_coef_bi_ortho(i,1),psi_r_coef_bi_ortho(i,1) + enddo end subroutine routine_diag implicit none ! provide eigval_right_tc_bi_orth - provide overlap_bi_ortho +! provide overlap_bi_ortho ! provide htilde_matrix_elmt_bi_ortho integer ::i,j print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1) @@ -46,16 +47,7 @@ subroutine routine_diag print*,'e_corr_double_bi_orth = ',e_corr_double_bi_orth print*,'Left/right eigenvectors' do i = 1,N_det - write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1) + write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1) enddo - do j=1,N_states - do i=1,N_det - psi_l_coef_bi_ortho(i,j) = leigvec_tc_bi_orth(i,j) - psi_r_coef_bi_ortho(i,j) = reigvec_tc_bi_orth(i,j) - enddo - enddo - SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho - call save_tc_bi_ortho_wavefunction -! call routine_save_left_right_bi_ortho end diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index c66ff036..1ccda822 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -12,6 +12,25 @@ enddo END_PROVIDER +subroutine diagonalize_CI_tc + implicit none + BEGIN_DOC +! Replace the coefficients of the |CI| states by the coefficients of the +! eigenstates of the |CI| matrix. + END_DOC + integer :: i,j + do j=1,N_states + do i=1,N_det + psi_l_coef_bi_ortho(i,j) = leigvec_tc_bi_orth(i,j) + psi_r_coef_bi_ortho(i,j) = reigvec_tc_bi_orth(i,j) + enddo + enddo +! psi_energy(1:N_states) = CI_electronic_energy(1:N_states) +! psi_s2(1:N_states) = CI_s2(1:N_states) + + SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho +end + BEGIN_PROVIDER [double precision, eigval_right_tc_bi_orth, (N_states)] @@ -133,10 +152,10 @@ call bi_normalize(leigvec_tc_bi_orth,reigvec_tc_bi_orth,size(reigvec_tc_bi_orth,1),N_det,N_states) print*,'leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) = ',leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) norm_ground_left_right_bi_orth = 0.d0 - print*,'after diago' +! print*,'after diago' do j = 1, N_det - call debug_det(psi_det(1,1,j),N_int) - print*,j,dabs(leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1)) +! call debug_det(psi_det(1,1,j),N_int) +! print*,j,dabs(leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1)) norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1) enddo print*,'norm l/r = ',norm_ground_left_right_bi_orth diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 8765cd6e..e397e700 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -207,3 +207,11 @@ type: logical doc: If |true|, only the right part of WF is used to compute spin dens interface: ezfio,provider,ocaml default: False + +[save_sorted_tc_wf] +type: logical +doc: If |true|, save the bi-ortho wave functions in a sorted way +interface: ezfio,provider,ocaml +default: True + + From edefcef1a3b798171f5103c226e8f9e4d023e144 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 16 Mar 2023 22:11:26 +0100 Subject: [PATCH 19/87] added the get_fci_conv.sh script --- scripts/get_fci_conv.sh | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100755 scripts/get_fci_conv.sh diff --git a/scripts/get_fci_conv.sh b/scripts/get_fci_conv.sh new file mode 100755 index 00000000..8b0f5ac2 --- /dev/null +++ b/scripts/get_fci_conv.sh @@ -0,0 +1,7 @@ +file=$1 +grep "N_det =" $1 | cut -d "=" -f 2 > N_det_tmp +grep "E =" $file | cut -d "=" -f 2 > E_tmp +grep "E+PT2 =" $file | cut -d "=" -f 2 | cut -d "+" -f 1 > E+PT2_tmp +grep "E+rPT2 =" $file | cut -d "=" -f 2 | cut -d "+" -f 1 > E+rPT2_tmp +paste N_det_tmp E_tmp E+PT2_tmp E+rPT2_tmp | column -s ' ' -t > $file.conv_fci +rm N_det_tmp E_tmp E+PT2_tmp E+rPT2_tmp From 29230b175d2416d19ea92954b8c0d17c04d5f40a Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 17 Mar 2023 11:26:51 +0100 Subject: [PATCH 20/87] added script_fci_tc.sh --- scripts/script_fci_tc.sh | 33 ++++++++++++++++++++++++++++ src/tools/print_sorted_wf_coef.irp.f | 19 ++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100755 scripts/script_fci_tc.sh create mode 100644 src/tools/print_sorted_wf_coef.irp.f diff --git a/scripts/script_fci_tc.sh b/scripts/script_fci_tc.sh new file mode 100755 index 00000000..26ef2aaf --- /dev/null +++ b/scripts/script_fci_tc.sh @@ -0,0 +1,33 @@ + source ~/qp2/quantum_package.rc + alpha=1.8 + input=O + basis=cc-pvdz + mult=3 + output=${input}_${basis}_al_${alpha} + qp create_ezfio -b ${basis} ${input}.xyz -m $mult + qp run scf + qp set perturbation pt2_max 0.0001 + qp set_frozen_core + +########## FCI CALCULATION FOR REFERENCE + qp run fci | tee ${EZFIO_FILE}.fci.out + qp run sort_wf + mv ${EZFIO_FILE}.wf_sorted ${EZFIO_FILE}_fci.wf_sorted +########### TC SCF CALCULATION + qp reset -d + qp set ao_two_e_erf_ints mu_erf 0.87 + qp set tc_keywords j1b_type 3 + qp set tc_keywords j1b_pen "[${alpha}]" + qp set tc_keywords bi_ortho True + qp set tc_keywords test_cycle_tc True + qp set tc_keywords write_tc_integ True + qp set tc_keywords read_tc_integ False + qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out + qp set tc_keywords write_tc_integ False + qp set tc_keywords read_tc_integ True +############ TC-FCI CALCULATION + qp run fci_tc_bi_ortho | tee ${EZFIO_FILE}.fci_tc_bi_ortho.out + grep "Ndet,E,E+PT2,E+RPT2,|PT2|=" ${EZFIO_FILE}.fci_tc_bi_ortho.out | cut -d "=" -f 2 > data_al_$alpha + qp run sort_wf + mv ${EZFIO_FILE}.wf_sorted ${EZFIO_FILE}_tc_fci.wf_sorted + diff --git a/src/tools/print_sorted_wf_coef.irp.f b/src/tools/print_sorted_wf_coef.irp.f new file mode 100644 index 00000000..fa0f1eab --- /dev/null +++ b/src/tools/print_sorted_wf_coef.irp.f @@ -0,0 +1,19 @@ +program sort_wf + implicit none + read_wf =.True. + call routine + +end + +subroutine routine + implicit none + integer :: i + character*(128) :: output + integer :: i_unit_output,getUnitAndOpen + output=trim(ezfio_filename)//'.wf_sorted' + i_unit_output = getUnitAndOpen(output,'w') + do i= 1, N_det + write(i_unit_output,*)i,dabs(psi_coef_sorted(i,1)) + enddo + +end From e710d2623765dce23896f576cb0b1a37b393a9f1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 17 Mar 2023 19:23:07 +0100 Subject: [PATCH 21/87] Improve I/O on TC integrals --- src/non_h_ints_mu/grad_squared.irp.f | 70 ++++++--------- src/non_h_ints_mu/grad_squared_manu.irp.f | 76 +++++++--------- src/non_h_ints_mu/new_grad_tc.irp.f | 28 ++---- src/non_h_ints_mu/new_grad_tc_manu.irp.f | 103 ++++++++-------------- src/tc_keywords/EZFIO.cfg | 34 +++---- 5 files changed, 118 insertions(+), 193 deletions(-) diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index 7925fa7c..1fd39f6a 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -2,7 +2,7 @@ ! --- ! TODO : strong optmization : write the loops in a different way -! : for each couple of AO, the gaussian product are done once for all +! : for each couple of AO, the gaussian product are done once for all BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_points_final_grid) ] @@ -20,14 +20,14 @@ BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_poi ! gradu_squared_u_ij_mu = -0.50 x \int r2 \phi_i(2) \phi_j(2) [ v1^2 v2^2 ((grad_1 u12)^2 + (grad_2 u12^2)]) + u12^2 v2^2 (grad_1 v1)^2 + 2 u12 v1 v2^2 (grad_1 u12) . (grad_1 v1) ] ! = -0.25 x v1^2 \int r2 \phi_i(2) \phi_j(2) [1 - erf(mu r12)]^2 v2^2 ! + -0.50 x (grad_1 v1)^2 \int r2 \phi_i(2) \phi_j(2) u12^2 v2^2 - ! + -1.00 x v1 (grad_1 v1) \int r2 \phi_i(2) \phi_j(2) (grad_1 u12) v2^2 + ! + -1.00 x v1 (grad_1 v1) \int r2 \phi_i(2) \phi_j(2) (grad_1 u12) v2^2 ! = v1^2 x int2_grad1u2_grad2u2_j1b2 ! + -0.5 x (grad_1 v1)^2 x int2_u2_j1b2 ! + -1.0 X V1 x (grad_1 v1) \cdot [ int2_u_grad1u_j1b2 x r - int2_u_grad1u_x_j1b ] ! ! END_DOC - + implicit none integer :: ipoint, i, j, m, igauss double precision :: x, y, z, r(3), delta, coef @@ -100,7 +100,7 @@ BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_poi call wall_time(time1) print*, ' Wall time for gradu_squared_u_ij_mu = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -151,7 +151,7 @@ END_PROVIDER ! ! deallocate(ac_mat) ! -!END_PROVIDER +!END_PROVIDER ! --- @@ -214,12 +214,12 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_nu call wall_time(time1) print*, ' Wall time for tc_grad_square_ao_loop = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: r(3), delta, coef @@ -267,7 +267,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g call wall_time(time1) print*, ' Wall time for grad12_j12 = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -297,12 +297,12 @@ BEGIN_PROVIDER [ double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_ call wall_time(time1) print*, ' Wall time for u12sq_j1bsq = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: x, y, z @@ -347,7 +347,7 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, call wall_time(time1) print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -370,26 +370,18 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao if(read_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_square_ao', action="read") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - read(11) tc_grad_square_ao(l,k,j,i) - enddo - enddo - enddo - enddo + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao', action="read") + read(11) tc_grad_square_ao close(11) else allocate(b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid)) - + b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & + !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num @@ -401,11 +393,11 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao enddo !$OMP END DO !$OMP END PARALLEL - + tmp = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, l, ipoint) & + !$OMP PRIVATE (j, l, ipoint) & !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12) !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid @@ -417,25 +409,25 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao enddo !$OMP END DO !$OMP END PARALLEL - + tc_grad_square_ao = 0.d0 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 1.d0, tc_grad_square_ao, ao_num*ao_num) deallocate(tmp, b_mat) - + call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num) - + !!$OMP PARALLEL & !!$OMP DEFAULT (NONE) & - !!$OMP PRIVATE (i, j, k, l) & + !!$OMP PRIVATE (i, j, k, l) & !!$OMP SHARED (ac_mat, tc_grad_square_ao, ao_num) !!$OMP DO SCHEDULE (static) ! do j = 1, ao_num ! do l = 1, ao_num ! do i = 1, ao_num ! do k = 1, ao_num - ! tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + ! tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) ! enddo ! enddo ! enddo @@ -444,23 +436,17 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao !!$OMP END PARALLEL endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_square_ao', action="write") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - write(11) tc_grad_square_ao(l,k,j,i) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) tc_grad_square_ao close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for tc_grad_square_ao = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f index cb9e15c4..66f3c693 100644 --- a/src/non_h_ints_mu/grad_squared_manu.irp.f +++ b/src/non_h_ints_mu/grad_squared_manu.irp.f @@ -17,29 +17,21 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu call wall_time(time0) if(read_tc_integ) then - - open(unit=11, form="unformatted", file='tc_grad_square_ao_test', action="read") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - read(11) tc_grad_square_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao_test', action="read") + read(11) tc_grad_square_ao_test close(11) else provide u12sq_j1bsq_test u12_grad1_u12_j1b_grad1_j1b_test grad12_j12_test - + allocate(b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid)) - + b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & + !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num @@ -51,11 +43,11 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu enddo !$OMP END DO !$OMP END PARALLEL - + tmp = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, l, ipoint) & + !$OMP PRIVATE (j, l, ipoint) & !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test) !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid @@ -67,23 +59,23 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu enddo !$OMP END DO !$OMP END PARALLEL - + tc_grad_square_ao_test = 0.d0 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 1.d0, tc_grad_square_ao_test, ao_num*ao_num) deallocate(tmp, b_mat) - + call sum_A_At(tc_grad_square_ao_test(1,1,1,1), ao_num*ao_num) !do i = 1, ao_num ! do j = 1, ao_num ! do k = i, ao_num - + ! do l = max(j,k), ao_num ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) ! end do - + ! !if (j.eq.k) then ! ! do l = j+1, ao_num ! ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) @@ -95,14 +87,14 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu ! ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) ! ! enddo ! !endif - + ! enddo ! enddo !enddo !tc_grad_square_ao_test = 2.d0 * tc_grad_square_ao_test ! !$OMP PARALLEL & ! !$OMP DEFAULT (NONE) & - ! !$OMP PRIVATE (i, j, k, l) & + ! !$OMP PRIVATE (i, j, k, l) & ! !$OMP SHARED (tc_grad_square_ao_test, ao_num) ! !$OMP DO SCHEDULE (static) ! integer :: ii @@ -121,10 +113,10 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu ! print *, ' ii =', ii ! !$OMP END DO ! !$OMP END PARALLEL - + ! !$OMP PARALLEL & ! !$OMP DEFAULT (NONE) & - ! !$OMP PRIVATE (i, j, k, l) & + ! !$OMP PRIVATE (i, j, k, l) & ! !$OMP SHARED (tc_grad_square_ao_test, ao_num) ! !$OMP DO SCHEDULE (static) ! do j = 1, ao_num @@ -144,24 +136,18 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_square_ao_test', action="write") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - write(11) tc_grad_square_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao_test', action="write") + call ezfio_set_work_empty(.False.) + write(11) tc_grad_square_ao_test close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for tc_grad_square_ao_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -189,7 +175,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & + !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num @@ -205,7 +191,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a tmp = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, l, ipoint) & + !$OMP PRIVATE (j, l, ipoint) & !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test) !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid @@ -226,7 +212,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, l) & + !$OMP PRIVATE (i, j, k, l) & !$OMP SHARED (ac_mat, tc_grad_square_ao_test_ref, ao_num) !$OMP DO SCHEDULE (static) do j = 1, ao_num @@ -246,7 +232,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a call wall_time(time1) print*, ' Wall time for tc_grad_square_ao_test_ref = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -276,12 +262,12 @@ BEGIN_PROVIDER [ double precision, u12sq_j1bsq_test, (ao_num, ao_num, n_points_f call wall_time(time1) print*, ' Wall time for u12sq_j1bsq_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: x, y, z @@ -328,12 +314,12 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao call wall_time(time1) print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: r(3), delta, coef @@ -381,7 +367,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_fi call wall_time(time1) print*, ' Wall time for grad12_j12_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index a15f690a..754e1240 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -36,16 +36,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_ if(read_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao', action="read") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - read(11) int2_grad1_u12_ao(i,j,ipoint,m) - enddo - enddo - enddo - enddo + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read") + read(11) int2_grad1_u12_ao close(11) else @@ -89,18 +81,12 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_ endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao', action="write") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - write(11) int2_grad1_u12_ao(i,j,ipoint,m) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f index 47b05e52..901e3048 100644 --- a/src/non_h_ints_mu/new_grad_tc_manu.irp.f +++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f @@ -3,7 +3,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po BEGIN_DOC ! - ! int2_grad1_u12_ao_test(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) + ! int2_grad1_u12_ao_test(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) ! ! where r1 = r(ipoint) ! @@ -15,9 +15,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po ! if J(r1,r2) = u12 x v1 x v2 ! ! int2_grad1_u12_ao_test(i,j,ipoint,:) = v1 x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ] - ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] - ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) - ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) + ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] + ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) + ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) ! - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint) ! ! @@ -35,25 +35,18 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po if(read_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao_test', action="read") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - read(11) int2_grad1_u12_ao_test(i,j,ipoint,m) - enddo - enddo - enddo - enddo + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao_test', action="read") + read(11) int2_grad1_u12_ao_test close(11) + else - + if(j1b_type .eq. 3) then do ipoint = 1, n_points_final_grid x = final_grid_points(1,ipoint) y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) + z = final_grid_points(3,ipoint) tmp0 = 0.5d0 * v_1b(ipoint) tmp_x = v_1b_grad(1,ipoint) tmp_y = v_1b_grad(2,ipoint) @@ -87,24 +80,18 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao_test', action="write") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - write(11) int2_grad1_u12_ao_test(i,j,ipoint,m) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao_test', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao_test close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for int2_grad1_u12_ao_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -114,9 +101,9 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ ! ! tc_grad_and_lapl_ao_test(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) | ij > ! - ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) ! - ! This is obtained by integration by parts. + ! This is obtained by integration by parts. ! END_DOC @@ -131,40 +118,32 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ call wall_time(time0) if(read_tc_integ) then - - open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao_test', action="read") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - read(11) tc_grad_and_lapl_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao_test', action="read") + read(11) tc_grad_and_lapl_ao_test close(11) else - provide int2_grad1_u12_ao_test - + provide int2_grad1_u12_ao_test + allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num)) - + b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & - !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num do k = 1, ao_num do ipoint = 1, n_points_final_grid - + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) ao_i_r = aos_in_r_array_transp(ipoint,i) ao_k_r = aos_in_r_array_transp(ipoint,k) - + b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) @@ -173,19 +152,19 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ enddo !$OMP END DO !$OMP END PARALLEL - + ac_mat = 0.d0 do m = 1, 3 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , int2_grad1_u12_ao_test(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & , 1.d0, ac_mat, ao_num*ao_num) - + enddo deallocate(b_mat) - + !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, l) & + !$OMP PRIVATE (i, j, k, l) & !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao_test, ao_num) !$OMP DO SCHEDULE (static) do j = 1, ao_num @@ -199,29 +178,23 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ enddo !$OMP END DO !$OMP END PARALLEL - + deallocate(ac_mat) endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao_test', action="write") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - write(11) tc_grad_and_lapl_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao_test', action="write") + call ezfio_set_work_empty(.False.) + write(11) tc_grad_and_lapl_ao_test close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for tc_grad_and_lapl_ao_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index e397e700..3f34c088 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -6,7 +6,7 @@ default: False [comp_left_eigv] type: logical -doc: If |true|, computes also the left-eigenvector +doc: If |true|, computes also the left-eigenvector interface: ezfio,provider,ocaml default: False @@ -14,7 +14,7 @@ default: False type: logical doc: If |true|, three-body terms are included interface: ezfio,provider,ocaml -default: True +default: True [pure_three_body_h_tc] type: logical @@ -30,13 +30,13 @@ default: False [core_tc_op] type: logical -doc: If |true|, takes the usual Hamiltonian for core orbitals (assumed to be doubly occupied) +doc: If |true|, takes the usual Hamiltonian for core orbitals (assumed to be doubly occupied) interface: ezfio,provider,ocaml default: False [full_tc_h_solver] type: logical -doc: If |true|, you diagonalize the full TC H matrix +doc: If |true|, you diagonalize the full TC H matrix interface: ezfio,provider,ocaml default: False @@ -60,11 +60,11 @@ default: 0.000005 [thresh_psi_r_norm] type: logical -doc: If |true|, you prune the WF to compute the PT1 coef based on the norm. If False, the pruning is done through the amplitude on the right-coefficient. +doc: If |true|, you prune the WF to compute the PT1 coef based on the norm. If False, the pruning is done through the amplitude on the right-coefficient. interface: ezfio,provider,ocaml default: False -[state_following_tc] +[state_following_tc] type: logical doc: If |true|, the states are re-ordered to match the input states default: False @@ -78,7 +78,7 @@ default: True [symetric_fock_tc] type: logical -doc: If |true|, using F+F^t as Fock TC +doc: If |true|, using F+F^t as Fock TC interface: ezfio,provider,ocaml default: False @@ -126,7 +126,7 @@ default: 1.e-6 [maxovl_tc] type: logical -doc: If |true|, maximize the overlap between orthogonalized left- and right eigenvectors +doc: If |true|, maximize the overlap between orthogonalized left- and right eigenvectors interface: ezfio,provider,ocaml default: False @@ -152,7 +152,7 @@ default: 0. type: character*(32) doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS] interface: ezfio,provider,ocaml -default: Simple +default: DIIS [im_thresh_tcscf] type: Threshold @@ -180,21 +180,15 @@ default: 1.e-6 [var_tc] type: logical -doc: If |true|, use VAR-TC +doc: If |true|, use VAR-TC interface: ezfio,provider,ocaml default: False -[read_tc_integ] -type: logical -doc: If |true|, read integrals: int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_lapl_ao +[io_tc_integ] +type: Disk_access +doc: Read/Write integrals int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_lapl_ao from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml -default: False - -[write_tc_integ] -type: logical -doc: If |true|, write integrals: int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_lapl_ao -interface: ezfio,provider,ocaml -default: False +default: None [debug_tc_pt2] type: integer From 08d86379d52ffd7d203e730dc75e35c5c70d31d7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 21 Mar 2023 14:51:44 +0100 Subject: [PATCH 22/87] Updated release notes --- RELEASE_NOTES.org | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/RELEASE_NOTES.org b/RELEASE_NOTES.org index 9b579146..3bd02898 100644 --- a/RELEASE_NOTES.org +++ b/RELEASE_NOTES.org @@ -9,15 +9,23 @@ - Configure adapted for ARM - Added many types of integrals - Accelerated four-index transformation - -*** TODO: take from dev - - [ ] Added GTOs with complex exponent - - Updated version of f77-zmq - Added transcorrelated SCF - Added transcorrelated CIPSI + - Added CCSD and CCSD(T) + - Added MO localization + - Changed coupling parameters for ROHF + - General Davidson algorithm + - Accelerated restore_symmetry + - Point charges in the Hamiltonian + - Removed cryptokit dependency in OCaml + - Using now standard convention in RDM + - Added molecular properties + - [ ] Added GTOs with complex exponent + +*** TODO: take from dev + - Updated version of f77-zmq - Started to introduce shells in AOs - Added ECMD UEG functional - - General Davidson algorithm * Version 2.2 From d3bb04ec8d9344c8fd3e424aac6d7d30fc0cec07 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 21 Mar 2023 15:25:11 +0100 Subject: [PATCH 23/87] Removed IO READ messages --- .travis.yml | 52 ------------------- .../ezfio_generate_provider.py | 6 +-- src/davidson/input.irp.f | 5 -- src/mo_basis/mo_class.irp.f | 7 --- 4 files changed, 3 insertions(+), 67 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index bec11f5e..00000000 --- a/.travis.yml +++ /dev/null @@ -1,52 +0,0 @@ -#sudo: true -#before_script: -# - sudo apt-get update -q -# - sudo apt-get remove curl -# - sudo apt-get remove zlib1g-dev -# - sudo apt-get install autoconf -# - sudo rm /usr/local/bin/bats - -os: linux - -dist: bionic - -sudo: false - -compiler: gfortran - -addons: - apt: - packages: - - gfortran - - gcc - - libatlas-base-dev -# - liblapack-dev -# - libblas-dev - - wget - -env: - - OPAMROOT=$HOME/.opam - -cache: - directories: - - $HOME/.opam/ - - $HOME/cache - -language: python -python: - - "3.7" - -stages: - - configuration - - compilation - - testing - -jobs: - include: - - stage: configuration - script: travis/configuration.sh - - stage: compilation - script: travis/compilation.sh - - stage: testing - script: travis/testing.sh - diff --git a/scripts/ezfio_interface/ezfio_generate_provider.py b/scripts/ezfio_interface/ezfio_generate_provider.py index 6b49955b..a282b834 100755 --- a/scripts/ezfio_interface/ezfio_generate_provider.py +++ b/scripts/ezfio_interface/ezfio_generate_provider.py @@ -52,7 +52,7 @@ BEGIN_PROVIDER [ %(type)s, %(name)s %(size)s ] %(test_null_size)s call ezfio_has_%(ezfio_dir)s_%(ezfio_name)s(has) if (has) then - write(6,'(A)') '.. >>>>> [ IO READ: %(name)s ] <<<<< ..' +! write(6,'(A)') '.. >>>>> [ IO READ: %(name)s ] <<<<< ..' call ezfio_get_%(ezfio_dir)s_%(ezfio_name)s(%(name)s) else print *, '%(ezfio_dir)s/%(ezfio_name)s not found in EZFIO file' @@ -117,7 +117,7 @@ END_PROVIDER output = self.output name = self.name l_write = ["", - " call write_time(%(output)s)", + "! call write_time(%(output)s)", ""] self.write = "\n".join(l_write) % locals() @@ -129,7 +129,7 @@ END_PROVIDER write = self.write_correspondance[self.type] l_write = ["", - " call write_time(%(output)s)", + "! call write_time(%(output)s)", " call %(write)s(%(output)s, %(name)s, &", " '%(name)s')", ""] diff --git a/src/davidson/input.irp.f b/src/davidson/input.irp.f index aba88ae9..b37c87d0 100644 --- a/src/davidson/input.irp.f +++ b/src/davidson/input.irp.f @@ -30,10 +30,5 @@ BEGIN_PROVIDER [ integer, n_states_diag ] endif IRP_ENDIF - call write_time(6) - if (mpi_master) then - write(6, *) 'Read n_states_diag' - endif - END_PROVIDER diff --git a/src/mo_basis/mo_class.irp.f b/src/mo_basis/mo_class.irp.f index 95fbb443..7705e414 100644 --- a/src/mo_basis/mo_class.irp.f +++ b/src/mo_basis/mo_class.irp.f @@ -1,8 +1,3 @@ -! DO NOT MODIFY BY HAND -! Created by $QP_ROOT/scripts/ezfio_interface/ei_handler.py -! from file /home/eginer/programs/qp2/src/mo_basis/EZFIO.cfg - - BEGIN_PROVIDER [ character*(32), mo_class , (mo_num) ] implicit none BEGIN_DOC @@ -35,6 +30,4 @@ BEGIN_PROVIDER [ character*(32), mo_class , (mo_num) ] endif IRP_ENDIF - call write_time(6) - END_PROVIDER From c19f486670b042603ead9bd49849cfa6ce472d1c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 21 Mar 2023 17:31:53 +0100 Subject: [PATCH 24/87] Added qp_extract_cipsi_data.py --- scripts/qp_extract_cipsi_data.py | 54 ++++++++++++++++++++++++++++++++ src/tools/save_natorb.irp.f | 1 + 2 files changed, 55 insertions(+) create mode 100755 scripts/qp_extract_cipsi_data.py diff --git a/scripts/qp_extract_cipsi_data.py b/scripts/qp_extract_cipsi_data.py new file mode 100755 index 00000000..8f0b1f3c --- /dev/null +++ b/scripts/qp_extract_cipsi_data.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +import re +import sys + +# Read output file +with open(sys.argv[1], 'r') as file: + output = file.read() + + +def extract_data(output): + lines = output.split("\n") + data = [] + + n_det = None + e = None + pt2 = None + err_pt2 = None + rpt2 = None + err_rpt2 = None + e_ex = None + + + reading = False + for iline, line in enumerate(lines): + if not reading and line.startswith(" N_det "): + n_det = int(re.search(r"N_det\s+=\s+(\d+)", line).group(1)) + reading = True + + if reading: + if line.startswith(" E "): + e = float(re.search(r"E\s+=\s+(-?\d+\.\d+)", line).group(1)) + elif line.startswith(" PT2 "): + pt2 = float(re.search(r"PT2\s+=\s+(-?\d+\.\d+E?.\d*)", line).group(1)) + err_pt2 = float(re.search(r"\+/-\s+(-?\d+\.\d+E?.\d*)", line).group(1)) + elif line.startswith(" rPT2 "): + rpt2 = float(re.search(r"rPT2\s+=\s+(-?\d+\.\d+E?.\d*)", line).group(1)) + err_rpt2 = float(re.search(r"\+/-\s+(-?\d+\.\d+E?.\d*)", line).group(1)) + elif "minimum PT2 Extrapolated energy" in line: + e_ex_line = lines[iline+2] + e_ex = float(e_ex_line.split()[1]) + reading = False + + data.append((n_det, e, pt2, err_pt2, rpt2, err_rpt2, e_ex)) + n_det = e = pt2 = err_pt2 = rpt2 = err_rpt2 = e_ex = None + + return data + +data = extract_data(output) + +for item in data: + print(" ".join(str(x) for x in item)) + + diff --git a/src/tools/save_natorb.irp.f b/src/tools/save_natorb.irp.f index f6331d13..b4e9f5dc 100644 --- a/src/tools/save_natorb.irp.f +++ b/src/tools/save_natorb.irp.f @@ -12,6 +12,7 @@ program save_natorb ! matrices of each state with the corresponding ! :option:`determinants state_average_weight` END_DOC + PROVIDE nucl_coord read_wf = .True. touch read_wf call save_natural_mos From ba447be2e8627df7cbafba7e0ccbac23c2071552 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 23 Mar 2023 11:12:02 +0100 Subject: [PATCH 25/87] added sort_wf --- src/tools/sort_wf.irp.f | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 src/tools/sort_wf.irp.f diff --git a/src/tools/sort_wf.irp.f b/src/tools/sort_wf.irp.f new file mode 100644 index 00000000..2e0014ea --- /dev/null +++ b/src/tools/sort_wf.irp.f @@ -0,0 +1,7 @@ +program sort_wf + implicit none + read_wf = .true. + touch read_wf + call save_wavefunction + +end From 2ef2e8044d338c6c823d7f965f6a62a0c55640b8 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 23 Mar 2023 11:24:26 +0100 Subject: [PATCH 26/87] added sort_wf.irp.f --- external/qp2-dependencies | 2 +- src/tools/sort_wf.irp.f | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/external/qp2-dependencies b/external/qp2-dependencies index ce14f57b..6e23ebac 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit ce14f57b50511825a9fedb096749200779d3f4d4 +Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a diff --git a/src/tools/sort_wf.irp.f b/src/tools/sort_wf.irp.f index 2e0014ea..038b24c8 100644 --- a/src/tools/sort_wf.irp.f +++ b/src/tools/sort_wf.irp.f @@ -2,6 +2,19 @@ program sort_wf implicit none read_wf = .true. touch read_wf - call save_wavefunction + call routine + +end + +subroutine routine + implicit none + integer :: i + character*(128) :: output + integer :: i_unit_output,getUnitAndOpen + output=trim(ezfio_filename)//'.wf_sorted' + i_unit_output = getUnitAndOpen(output,'w') + do i = 1, N_det + write(i_unit_output, *)i,dabs(psi_coef_sorted(i,1)) + enddo end From 7aee93997a9e98a6ebe8798a8e3a00ca10f1119d Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 23 Mar 2023 13:08:09 +0100 Subject: [PATCH 27/87] added Hn.py --- scripts/Hn.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 scripts/Hn.py diff --git a/scripts/Hn.py b/scripts/Hn.py new file mode 100644 index 00000000..a4119145 --- /dev/null +++ b/scripts/Hn.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +import sys +from math import * +arg = sys.argv +#f = open('data_dft','r') +n = int(sys.argv[1]) +r = float(sys.argv[2]) +f = open('H'+str(n)+'_'+str(r),'w') +string=str(n)+"\n" +f.write(string) +string="\n" +f.write(string) +for i in range(n): + x = r * cos(2.* i* pi/n) + y = r * sin(2.* i* pi/n) + z = 0. + string="H "+str(x)+" "+str(y)+" "+str(z)+"\n" + f.write(string) + +#lines = f.readlines() +#cipsi_dft= [] +# +#dissoc = [] +#dissoc.append(float(-76.0179223470363)) +#dissoc.append(float(-76.0592367866993)) +#dissoc.append(float(-76.0678739715659)) +#delta_e = [] +# +#for line in lines: +# data = line.split() +# if(len(data)>0): +# dft=float(data[1]) +# fci=float(data[2]) +# e=fci+dft +# cipsi_dft.append(e) +# +#print(*cipsi_dft,sep=" & ") +# +#for i in 0,1,2: +# delta_e.append(1000.*(dissoc[i] - cipsi_dft[i])) +# +#print(*delta_e,sep=" & ") +# From a2f229370b50392333a8c0082c84099d2fbdf74f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 23 Mar 2023 15:13:30 +0100 Subject: [PATCH 28/87] Added replace_wf in h_apply --- external/qp2-dependencies | 2 +- src/determinants/h_apply.irp.f | 106 ++++++++++++++++++++++----------- 2 files changed, 73 insertions(+), 35 deletions(-) diff --git a/external/qp2-dependencies b/external/qp2-dependencies index ce14f57b..6e23ebac 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit ce14f57b50511825a9fedb096749200779d3f4d4 +Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a diff --git a/src/determinants/h_apply.irp.f b/src/determinants/h_apply.irp.f index d01ad1c7..a67f71da 100644 --- a/src/determinants/h_apply.irp.f +++ b/src/determinants/h_apply.irp.f @@ -69,9 +69,12 @@ subroutine resize_H_apply_buffer(new_size,iproc) END_DOC PROVIDE H_apply_buffer_allocated - ASSERT (new_size > 0) - ASSERT (iproc >= 0) - ASSERT (iproc < nproc) + + if (N_det < 0) call abort(irp_here//': N_det < 0') + if (N_int <= 0) call abort(irp_here//': N_int <= 0') + if (new_size <= 0) call abort(irp_here//': new_size <= 0') + if (iproc < 0) call abort(irp_here//': iproc < 0') + if (iproc >= nproc) call abort(irp_here//': iproc >= nproc') allocate ( buffer_det(N_int,2,new_size), & buffer_coef(new_size,N_states), & @@ -126,31 +129,34 @@ subroutine copy_H_apply_buffer_to_wf ASSERT (N_int > 0) - ASSERT (N_det > 0) + ASSERT (N_det >= 0) - allocate ( buffer_det(N_int,2,N_det), buffer_coef(N_det,N_states) ) + N_det_old = N_det + if (N_det > 0) then + allocate ( buffer_det(N_int,2,N_det), buffer_coef(N_det,N_states) ) - ! Backup determinants - j=0 - do i=1,N_det - if (pruned(i)) cycle ! Pruned determinants - j+=1 - ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num) - ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num) - buffer_det(:,:,j) = psi_det(:,:,i) - enddo - N_det_old = j + ! Backup determinants + j=0 + do i=1,N_det + if (pruned(i)) cycle ! Pruned determinants + j+=1 + ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num) + ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num) + buffer_det(:,:,j) = psi_det(:,:,i) + enddo + N_det_old = j - ! Backup coefficients - do k=1,N_states - j=0 - do i=1,N_det - if (pruned(i)) cycle ! Pruned determinants - j += 1 - buffer_coef(j,k) = psi_coef(i,k) - enddo - ASSERT ( j == N_det_old ) - enddo + ! Backup coefficients + do k=1,N_states + j=0 + do i=1,N_det + if (pruned(i)) cycle ! Pruned determinants + j += 1 + buffer_coef(j,k) = psi_coef(i,k) + enddo + ASSERT ( j == N_det_old ) + enddo + endif ! Update N_det N_det = N_det_old @@ -164,17 +170,19 @@ subroutine copy_H_apply_buffer_to_wf TOUCH psi_det_size endif - ! Restore backup in resized array - do i=1,N_det_old - psi_det(:,:,i) = buffer_det(:,:,i) - ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num) - ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num ) - enddo - do k=1,N_states + if (N_det_old > 0) then + ! Restore backup in resized array do i=1,N_det_old - psi_coef(i,k) = buffer_coef(i,k) + psi_det(:,:,i) = buffer_det(:,:,i) + ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num) + ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num ) enddo - enddo + do k=1,N_states + do i=1,N_det_old + psi_coef(i,k) = buffer_coef(i,k) + enddo + enddo + endif ! Copy new buffers @@ -339,3 +347,33 @@ subroutine fill_H_apply_buffer_no_selection(n_selected,det_buffer,Nint,iproc) call omp_unset_lock(H_apply_buffer_lock(1,iproc)) end + +subroutine replace_wf(N_det_new, LDA, psi_coef_new, psi_det_new) + use omp_lib + implicit none + BEGIN_DOC +! Replaces the wave function. +! After calling this subroutine, N_det, psi_det and psi_coef need to be touched + END_DOC + integer, intent(in) :: N_det_new, LDA + double precision, intent(in) :: psi_coef_new(LDA,N_states) + integer(bit_kind), intent(in) :: psi_det_new(N_int,2,N_det_new) + + integer :: i,j + + PROVIDE H_apply_buffer_allocated + + if (N_det_new <= 0) call abort(irp_here//': N_det_new <= 0') + if (N_int <= 0) call abort(irp_here//': N_int <= 0') + if (LDA < N_det_new) call abort(irp_here//': LDA < N_det_new') + + do j=0,nproc-1 + H_apply_buffer(j)%N_det = 0 + enddo + N_det = 0 + SOFT_TOUCH N_det + call fill_H_apply_buffer_no_selection(N_det_new,psi_det_new,N_int,0) + call copy_h_apply_buffer_to_wf + psi_coef(1:N_det_new,1:N_states) = psi_coef_new(1:N_det_new,1:N_states) + +end From 436b8815807c3ad04dc5035244c6ed9e7c4f2580 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 28 Mar 2023 11:21:19 +0200 Subject: [PATCH 29/87] added thresh_cycle --- src/ao_many_one_e_ints/grad2_jmu_manu.irp.f | 48 ++-- .../grad_lapl_jmu_manu.irp.f | 46 ++-- src/ao_many_one_e_ints/listj1b_sorted.irp.f | 42 ++-- src/non_h_ints_mu/total_tc_int.irp.f | 19 ++ src/tc_keywords/EZFIO.cfg | 6 + src/tc_scf/tc_scf.irp.f | 4 +- src/tc_scf/test_int.irp.f | 214 +++++++++++------- src/tools/sort_wf.irp.f | 2 +- 8 files changed, 212 insertions(+), 169 deletions(-) diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f index 8e253d75..14170ede 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f @@ -38,7 +38,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & !$OMP List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b, & - !$OMP ao_overlap_abs,sq_pi_3_2) + !$OMP ao_overlap_abs,sq_pi_3_2,thrsh_cycle_tc) !$OMP DO SCHEDULE(dynamic) do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -46,7 +46,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n r(3) = final_grid_points(3,ipoint) do i = 1, ao_num do j = i, ao_num - if(ao_overlap_abs(j,i) .lt. 1.d-12) then + if(ao_overlap_abs(j,i) .lt. thrsh_cycle_tc) then cycle endif @@ -58,7 +58,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_1_erf_x_2(i_fit) coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) - if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.thrsh_cycle_tc)cycle int_gauss = overlap_gauss_r12_ao(r, expo_fit, i, j) int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_gauss enddo @@ -81,8 +81,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n !DIR$ FORCEINLINE call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef -! if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version - if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.thrsh_cycle_tc)cycle ! call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, & ! expo_fit, i, j, int_fit_v, n_points_final_grid) int_gauss = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) @@ -145,14 +144,14 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & !$OMP List_comb_thr_b3_cent, big_array,& - !$OMP ao_abs_comb_b3_j1b,ao_overlap_abs) + !$OMP ao_abs_comb_b3_j1b,ao_overlap_abs,thrsh_cycle_tc) ! allocate(int_fit_v(n_points_final_grid)) !$OMP DO SCHEDULE(dynamic) do i = 1, ao_num do j = i, ao_num - if(ao_overlap_abs(j,i) .lt. 1.d-12) then + if(ao_overlap_abs(j,i) .lt. thrsh_cycle_tc) then cycle endif @@ -161,7 +160,6 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) -! if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -243,7 +241,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ !$OMP final_grid_points, ng_fit_jast, & !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo,sq_pi_3_2, & - !$OMP List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b) + !$OMP List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -260,11 +258,11 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ ! --- --- --- int_j1b = ao_abs_comb_b3_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle + if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_x_2(i_fit) coef_fit = coef_gauss_j_mu_x_2(i_fit) - if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) tmp += coef_fit * int_fit enddo @@ -278,7 +276,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -288,8 +286,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ coef_fit = coef_gauss_j_mu_x_2(i_fit) !DIR$ FORCEINLINE call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) -! if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version - if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) tmp += coef * coef_fit * int_fit enddo @@ -350,7 +347,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n !$OMP final_grid_points, ng_fit_jast, & !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & - !$OMP List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2) + !$OMP List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid @@ -369,7 +366,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle + if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -392,8 +389,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) sq_alpha = alpha_1s_inv * dsqrt(alpha_1s_inv) -! if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle ! old version - if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-10) cycle +! if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. thrsh_cycle_tc) cycle call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit) @@ -470,13 +466,13 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & !$OMP ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, ao_abs_comb_b3_j1b, & - !$OMP List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test) + !$OMP List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10) cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc) cycle r(1) = final_grid_points(1,ipoint) r(2) = final_grid_points(2,ipoint) @@ -489,10 +485,10 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p ! --- --- --- int_j1b = ao_abs_comb_b3_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle +! if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_1_erf(i_fit) - if(dabs(int_j1b)*dsqpi_3_2*expo_fit**(-1.5d0).lt.1.d-15) cycle +! if(dabs(int_j1b)*dsqpi_3_2*expo_fit**(-1.5d0).lt.thrsh_cycle_tc) cycle coef_fit = coef_gauss_j_mu_1_erf(i_fit) int_fit = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r) tmp += coef_fit * int_fit @@ -507,7 +503,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -517,7 +513,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_1_erf(i_fit) call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) - if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-1.5d0).lt.1.d-15)cycle +! if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-1.5d0).lt.thrsh_cycle_tc)cycle coef_fit = coef_gauss_j_mu_1_erf(i_fit) alpha_1s = beta + expo_fit @@ -527,9 +523,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist - if(expo_coef_1s .gt. 20.d0) cycle +! if(expo_coef_1s .gt. 20.d0) cycle coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) - if(dabs(coef_tmp) .lt. 1d-08) cycle +! if(dabs(coef_tmp) .lt. 1d-08) cycle int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r) diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f index 5c9f81e9..66a2b961 100644 --- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f +++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f @@ -31,7 +31,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, !$OMP SHARED (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points, & !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,ao_abs_comb_b2_j1b, & !$OMP v_ij_erf_rk_cst_mu_j1b_test, mu_erf, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,thrsh_cycle_tc) !$OMP DO !do ipoint = 1, 10 do ipoint = 1, n_points_final_grid @@ -41,7 +41,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp = 0.d0 do i_1s = 1, List_comb_thr_b2_size(j,i) @@ -49,7 +49,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) @@ -110,7 +110,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_nu !$OMP SHARED (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,& !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent, & !$OMP x_v_ij_erf_rk_cst_mu_j1b_test, mu_erf,ao_abs_comb_b2_j1b, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,thrsh_cycle_tc) ! !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,expo_erfc_mu_gauss) !$OMP DO do ipoint = 1, n_points_final_grid @@ -120,7 +120,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_nu do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp_x = 0.d0 tmp_y = 0.d0 @@ -130,19 +130,11 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_nu coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle + ! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) -! if(ao_prod_center(1,j,i).ne.10000.d0)then -! ! approximate 1 - erf(mu r12) by a gaussian * 10 -! !DIR$ FORCEINLINE -! call gaussian_product(expo_erfc_mu_gauss,r, & -! ao_prod_sigma(j,i),ao_prod_center(1,j,i), & -! factor_ij_1s,beta_ij,center_ij_1s) -! if(dabs(coef * factor_ij_1s*int_j1b*10.d0 * dsqpi_3_2 * beta_ij**(-1.5d0)).lt.1.d-10)cycle -! endif call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints ) call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, ints_coulomb) @@ -216,7 +208,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po !$OMP expo_gauss_j_mu_x, coef_gauss_j_mu_x, & !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size, & !$OMP List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_test,ao_abs_comb_b2_j1b, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -225,7 +217,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp = 0.d0 @@ -234,11 +226,11 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po ! --- --- --- int_j1b = ao_abs_comb_b2_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle + ! if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_x(i_fit) coef_fit = coef_gauss_j_mu_x(i_fit) - if(ao_overlap_abs_grid(j,i).lt.1.d-15) cycle + ! if(ao_overlap_abs_grid(j,i).lt.thrsh_cycle_tc) cycle int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) tmp += coef_fit * int_fit enddo @@ -251,7 +243,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) @@ -259,9 +251,9 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po expo_fit = expo_gauss_j_mu_x(i_fit) coef_fit = coef_gauss_j_mu_x(i_fit) coeftot = coef * coef_fit - if(dabs(coeftot).lt.1.d-15)cycle +! if(dabs(coeftot).lt.thrsh_cycle_tc)cycle call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u) - if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle +! if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) tmp += coef * coef_fit * int_fit enddo @@ -325,7 +317,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, !$OMP expo_gauss_j_mu_x, coef_gauss_j_mu_x, & !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size, & !$OMP List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_ng_1_test,ao_abs_comb_b2_j1b, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -334,7 +326,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp = 0.d0 @@ -343,7 +335,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, ! --- --- --- int_j1b = ao_abs_comb_b2_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle +! if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle expo_fit = expo_good_j_mu_1gauss int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) tmp += int_fit @@ -356,7 +348,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) @@ -364,9 +356,9 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, expo_fit = expo_good_j_mu_1gauss coef_fit = 1.d0 coeftot = coef * coef_fit - if(dabs(coeftot).lt.1.d-15)cycle + if(dabs(coeftot).lt.thrsh_cycle_tc)cycle call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u) - if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle + if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) tmp += coef * coef_fit * int_fit ! enddo diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f index bf493fbb..9bcce449 100644 --- a/src/ao_many_one_e_ints/listj1b_sorted.irp.f +++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f @@ -3,15 +3,16 @@ &BEGIN_PROVIDER [ integer, max_List_comb_thr_b2_size] implicit none integer :: i_1s,i,j,ipoint - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 List_comb_thr_b2_size = 0 + print*,'List_all_comb_b2_size = ',List_all_comb_b2_size +! pause do i = 1, ao_num do j = i, ao_num do i_1s = 1, List_all_comb_b2_size coef = List_all_comb_b2_coef (i_1s) - if(dabs(coef).lt.1.d-15)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle beta = List_all_comb_b2_expo (i_1s) beta = max(beta,1.d-12) center(1:3) = List_all_comb_b2_cent(1:3,i_1s) @@ -24,7 +25,7 @@ dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then List_comb_thr_b2_size(j,i) += 1 endif enddo @@ -40,6 +41,7 @@ list(i) = maxval(List_comb_thr_b2_size(:,i)) enddo max_List_comb_thr_b2_size = maxval(list) + print*,'max_List_comb_thr_b2_size = ',max_List_comb_thr_b2_size END_PROVIDER @@ -49,16 +51,15 @@ END_PROVIDER &BEGIN_PROVIDER [ double precision, ao_abs_comb_b2_j1b, ( max_List_comb_thr_b2_size ,ao_num, ao_num)] implicit none integer :: i_1s,i,j,ipoint,icount - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 ao_abs_comb_b2_j1b = 10000000.d0 do i = 1, ao_num do j = i, ao_num icount = 0 do i_1s = 1, List_all_comb_b2_size coef = List_all_comb_b2_coef (i_1s) - if(dabs(coef).lt.1.d-12)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle beta = List_all_comb_b2_expo (i_1s) center(1:3) = List_all_comb_b2_cent(1:3,i_1s) int_j1b = 0.d0 @@ -70,7 +71,7 @@ END_PROVIDER dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then icount += 1 List_comb_thr_b2_coef(icount,j,i) = coef List_comb_thr_b2_expo(icount,j,i) = beta @@ -98,17 +99,17 @@ END_PROVIDER &BEGIN_PROVIDER [ integer, max_List_comb_thr_b3_size] implicit none integer :: i_1s,i,j,ipoint - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 List_comb_thr_b3_size = 0 + print*,'List_all_comb_b3_size = ',List_all_comb_b3_size do i = 1, ao_num do j = 1, ao_num do i_1s = 1, List_all_comb_b3_size coef = List_all_comb_b3_coef (i_1s) beta = List_all_comb_b3_expo (i_1s) center(1:3) = List_all_comb_b3_cent(1:3,i_1s) - if(dabs(coef).lt.thr)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle int_j1b = 0.d0 do ipoint = 1, n_points_extra_final_grid r(1:3) = final_grid_points_extra(1:3,ipoint) @@ -118,7 +119,7 @@ END_PROVIDER dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then List_comb_thr_b3_size(j,i) += 1 endif enddo @@ -144,9 +145,8 @@ END_PROVIDER &BEGIN_PROVIDER [ double precision, ao_abs_comb_b3_j1b, ( max_List_comb_thr_b3_size ,ao_num, ao_num)] implicit none integer :: i_1s,i,j,ipoint,icount - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 ao_abs_comb_b3_j1b = 10000000.d0 do i = 1, ao_num do j = 1, ao_num @@ -156,7 +156,7 @@ END_PROVIDER beta = List_all_comb_b3_expo (i_1s) beta = max(beta,1.d-12) center(1:3) = List_all_comb_b3_cent(1:3,i_1s) - if(dabs(coef).lt.thr)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle int_j1b = 0.d0 do ipoint = 1, n_points_extra_final_grid r(1:3) = final_grid_points_extra(1:3,ipoint) @@ -166,7 +166,7 @@ END_PROVIDER dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then icount += 1 List_comb_thr_b3_coef(icount,j,i) = coef List_comb_thr_b3_expo(icount,j,i) = beta @@ -177,15 +177,5 @@ END_PROVIDER enddo enddo -! do i = 1, ao_num -! do j = 1, i-1 -! do icount = 1, List_comb_thr_b3_size(j,i) -! List_comb_thr_b3_coef(icount,j,i) = List_comb_thr_b3_coef(icount,i,j) -! List_comb_thr_b3_expo(icount,j,i) = List_comb_thr_b3_expo(icount,i,j) -! List_comb_thr_b3_cent(1:3,icount,j,i) = List_comb_thr_b3_cent(1:3,icount,i,j) -! enddo -! enddo -! enddo - END_PROVIDER diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 2fd2719c..4f8dc74d 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -68,7 +68,26 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao END_PROVIDER +BEGIN_PROVIDER [double precision, ao_tc_int_chemist_no_cycle, (ao_num, ao_num, ao_num, ao_num)] ! --- + implicit none + integer :: i, j, k, l + double precision :: wall1, wall0 + print *, ' providing ao_tc_int_chemist_no_cycle ...' + call wall_time(wall0) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ao_tc_int_chemist_no_cycle(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) +! ao_tc_int_chemist(k,i,l,j) = ao_two_e_coul(k,i,l,j) + enddo + enddo + enddo + enddo + call wall_time(wall1) + print *, ' wall time for ao_tc_int_chemist_no_cycle ', wall1 - wall0 +END_PROVIDER BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_num, ao_num)] diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index e397e700..62b6d2bf 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -46,6 +46,12 @@ doc: Thresholds on the energy for iterative Davidson used in TC interface: ezfio,provider,ocaml default: 1.e-5 +[thrsh_cycle_tc] +type: Threshold +doc: Thresholds to cycle the integrals with the envelop +interface: ezfio,provider,ocaml +default: 1.e-15 + [max_it_dav] type: integer doc: nb max of iteration in Davidson used in TC diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 85389f30..ae3b609b 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -11,8 +11,8 @@ program tc_scf print *, ' starting ...' my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + my_n_pt_r_grid = 60 + my_n_pt_a_grid = 110 ! my_n_pt_r_grid = 10 ! small grid for quick debug ! my_n_pt_a_grid = 26 ! small grid for quick debug touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index 0866cdaf..b9287d58 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -21,25 +21,22 @@ program test_ints touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid !! OK -!call routine_int2_u_grad1u_j1b2 -!! OK -!call routine_v_ij_erf_rk_cst_mu_j1b -!! OK +! call routine_int2_u_grad1u_j1b2 +! OK +! call routine_v_ij_erf_rk_cst_mu_j1b +! OK ! call routine_x_v_ij_erf_rk_cst_mu_j1b -!! OK -! call routine_v_ij_u_cst_mu_j1b - -!! OK -!call routine_int2_u2_j1b2 - -!! OK -!call routine_int2_u_grad1u_x_j1b2 - -!! OK +! OK +! call routine_int2_u2_j1b2 +! OK +! call routine_int2_u_grad1u_x_j1b2 +! OK ! call routine_int2_grad1u2_grad2u2_j1b2 ! call routine_int2_u_grad1u_j1b2 ! call test_total_grad_lapl ! call test_total_grad_square +! call test_int2_grad1_u12_ao_test +! call routine_v_ij_u_cst_mu_j1b_test ! call test_ao_tc_int_chemist ! call test_grid_points_ao ! call test_tc_scf @@ -53,12 +50,12 @@ program test_ints !call test_two_e_tc_non_hermit_integral() - call test_tc_grad_square_ao_test() - - PROVIDE TC_HF_energy VARTC_HF_energy - print *, ' TC_HF_energy = ', TC_HF_energy - print *, ' VARTC_HF_energy = ', VARTC_HF_energy +! call test_tc_grad_square_ao_test() +!!PROVIDE TC_HF_energy VARTC_HF_energy +!!print *, ' TC_HF_energy = ', TC_HF_energy +!!print *, ' VARTC_HF_energy = ', VARTC_HF_energy + call test_old_ints end ! --- @@ -157,6 +154,9 @@ subroutine routine_int2_u_grad1u_j1b2 enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_int2_u_grad1u_j1b2' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -169,20 +169,6 @@ subroutine routine_v_ij_erf_rk_cst_mu_j1b integer :: i,j,ipoint,k,l double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) @@ -215,6 +201,9 @@ subroutine routine_v_ij_erf_rk_cst_mu_j1b enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_v_ij_erf_rk_cst_mu_j1b' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -228,20 +217,6 @@ subroutine routine_x_v_ij_erf_rk_cst_mu_j1b integer :: i,j,ipoint,k,l,m double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) @@ -276,6 +251,10 @@ subroutine routine_x_v_ij_erf_rk_cst_mu_j1b enddo enddo enddo + + print*,'******' + print*,'******' + print*,'routine_x_v_ij_erf_rk_cst_mu_j1b' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -290,20 +269,6 @@ subroutine routine_v_ij_u_cst_mu_j1b_test integer :: i,j,ipoint,k,l double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) @@ -336,6 +301,9 @@ subroutine routine_v_ij_u_cst_mu_j1b_test enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_v_ij_u_cst_mu_j1b_test' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -427,19 +395,6 @@ subroutine routine_int2_u2_j1b2 integer :: i,j,ipoint,k,l double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 @@ -473,6 +428,9 @@ subroutine routine_int2_u2_j1b2 enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_int2_u2_j1b2' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -486,19 +444,6 @@ subroutine routine_int2_u_grad1u_x_j1b2 integer :: i,j,ipoint,k,l,m double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 @@ -534,6 +479,9 @@ subroutine routine_int2_u_grad1u_x_j1b2 enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_int2_u_grad1u_x_j1b2' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -579,6 +527,9 @@ subroutine routine_v_ij_u_cst_mu_j1b enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_v_ij_u_cst_mu_j1b' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -742,6 +693,9 @@ subroutine test_total_grad_lapl enddo enddo enddo + print*,'******' + print*,'******' + print*,' test_total_grad_lapl' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -767,6 +721,9 @@ subroutine test_total_grad_square enddo enddo enddo + print*,'******' + print*,'******' + print*,'test_total_grad_square' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -1057,3 +1014,86 @@ end ! --- + +subroutine test_old_ints + implicit none + integer :: i,j,k,l + double precision :: old, new, contrib, get_ao_tc_sym_two_e_pot + double precision :: integral_sym , integral_nsym,accu + PROVIDE ao_tc_sym_two_e_pot_in_map + accu = 0.d0 + do j = 1, ao_num + do l= 1, ao_num + do i = 1, ao_num + do k = 1, ao_num +! integral_sym = get_ao_tc_sym_two_e_pot(i, j, k, l, ao_tc_sym_two_e_pot_map) + ! ao_non_hermit_term_chemist(k,i,l,j) = < k l | [erf( mu r12) - 1] d/d_r12 | i j > on the AO basis +! integral_nsym = ao_non_hermit_term_chemist(k,i,l,j) +! old = integral_sym + integral_nsym +! old = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) + new = ao_tc_int_chemist_test(k,i,l,j) + old = ao_tc_int_chemist_no_cycle(k,i,l,j) + contrib = dabs(old - new) + if(contrib.gt.1.d-6)then + print*,'problem !!' + print*,i,j,k,l + print*,old, new, contrib + endif + accu += contrib + enddo + enddo + enddo + enddo + print*,'******' + print*,'******' + print*,'in test_old_ints' + print*,'accu = ',accu/dble(ao_num**4) + +end + +subroutine test_int2_grad1_u12_ao_test + implicit none + integer :: i,j,ipoint,m,k,l + double precision :: weight,accu_relat, accu_abs, contrib + double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) + allocate(array(ao_num, ao_num, ao_num, ao_num)) + array = 0.d0 + allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) + array_ref = 0.d0 + do m = 1, 3 + do ipoint = 1, n_points_final_grid + weight = final_weight_at_r_vector(ipoint) + do k = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do j = 1, ao_num + array(j,i,l,k) += int2_grad1_u12_ao_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight + array_ref(j,i,l,k) += int2_grad1_u12_ao(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight + enddo + enddo + enddo + enddo + enddo + enddo + + accu_relat = 0.d0 + accu_abs = 0.d0 + do k = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do j = 1, ao_num + contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) + accu_abs += contrib + if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then + accu_relat += contrib/dabs(array_ref(j,i,l,k)) + endif + enddo + enddo + enddo + enddo + print*,'******' + print*,'******' + print*,'test_int2_grad1_u12_ao_test' + print*,'accu_abs = ',accu_abs/dble(ao_num)**4 + print*,'accu_relat = ',accu_relat/dble(ao_num)**4 +end diff --git a/src/tools/sort_wf.irp.f b/src/tools/sort_wf.irp.f index 038b24c8..95b1a964 100644 --- a/src/tools/sort_wf.irp.f +++ b/src/tools/sort_wf.irp.f @@ -14,7 +14,7 @@ subroutine routine output=trim(ezfio_filename)//'.wf_sorted' i_unit_output = getUnitAndOpen(output,'w') do i = 1, N_det - write(i_unit_output, *)i,dabs(psi_coef_sorted(i,1)) + write(i_unit_output, *)i,dabs(psi_coef_sorted(i,1))/dabs(psi_coef_sorted(1,1)) enddo end From a38255cec989f639231d3254784787e725961be1 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 28 Mar 2023 12:43:15 +0200 Subject: [PATCH 30/87] added Hn.py --- external/qp2-dependencies | 2 +- scripts/Hn.py | 2 +- scripts/get_fci_tc_conv.sh | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) create mode 100755 scripts/get_fci_tc_conv.sh diff --git a/external/qp2-dependencies b/external/qp2-dependencies index 6e23ebac..f40bde09 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a +Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8 diff --git a/scripts/Hn.py b/scripts/Hn.py index a4119145..0f938510 100644 --- a/scripts/Hn.py +++ b/scripts/Hn.py @@ -5,7 +5,7 @@ arg = sys.argv #f = open('data_dft','r') n = int(sys.argv[1]) r = float(sys.argv[2]) -f = open('H'+str(n)+'_'+str(r),'w') +f = open('H'+str(n)+'_'+str(r)+'.xyz','w') string=str(n)+"\n" f.write(string) string="\n" diff --git a/scripts/get_fci_tc_conv.sh b/scripts/get_fci_tc_conv.sh new file mode 100755 index 00000000..643f3ac0 --- /dev/null +++ b/scripts/get_fci_tc_conv.sh @@ -0,0 +1,2 @@ +file=$1 +grep "Ndet,E,E+PT2,E+RPT2,|PT2|=" $file | cut -d "=" -f 2 > ${file}.conv_fci_tc From 7e617bee131ccd63c90394e26ae82acf454c166e Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 28 Mar 2023 13:24:28 +0200 Subject: [PATCH 31/87] COLLAPSE --- external/qp2-dependencies | 2 +- src/bi_ort_ints/three_body_ijmk.irp.f | 12 ++++++------ src/bi_ort_ints/three_body_ijmkl.irp.f | 12 ++++++------ 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/external/qp2-dependencies b/external/qp2-dependencies index f40bde09..ce14f57b 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8 +Subproject commit ce14f57b50511825a9fedb096749200779d3f4d4 diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 853972f7..5afd49ab 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -27,7 +27,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -74,7 +74,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -121,7 +121,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -168,7 +168,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -214,7 +214,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -261,7 +261,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index bd5c4977..ae4c9bd5 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -26,7 +26,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -75,7 +75,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -124,7 +124,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -173,7 +173,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -222,7 +222,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -271,7 +271,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num From 6d7d7ccfb4420556a341dd454caaa759bb721927 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 29 Mar 2023 15:47:59 +0200 Subject: [PATCH 32/87] set the threshold_cycle to 1e-10 by default --- src/bi_ort_ints/total_twoe_pot.irp.f | 1 + src/tc_keywords/EZFIO.cfg | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f index 78047d1b..f5f5959a 100644 --- a/src/bi_ort_ints/total_twoe_pot.irp.f +++ b/src/bi_ort_ints/total_twoe_pot.irp.f @@ -57,6 +57,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n PROVIDE ao_tc_sym_two_e_pot_in_map + !!! TODO :: OPENMP do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 62b6d2bf..e65a1400 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -50,7 +50,7 @@ default: 1.e-5 type: Threshold doc: Thresholds to cycle the integrals with the envelop interface: ezfio,provider,ocaml -default: 1.e-15 +default: 1.e-10 [max_it_dav] type: integer From 504d46f6934e07a7e8ffb6152d3e43e01b9a963e Mon Sep 17 00:00:00 2001 From: eginer Date: Sat, 1 Apr 2023 18:41:47 +0200 Subject: [PATCH 33/87] added print_mos --- external/qp2-dependencies | 2 +- src/kohn_sham/print_mos.irp.f | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 src/kohn_sham/print_mos.irp.f diff --git a/external/qp2-dependencies b/external/qp2-dependencies index ce14f57b..6e23ebac 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit ce14f57b50511825a9fedb096749200779d3f4d4 +Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a diff --git a/src/kohn_sham/print_mos.irp.f b/src/kohn_sham/print_mos.irp.f new file mode 100644 index 00000000..5e728444 --- /dev/null +++ b/src/kohn_sham/print_mos.irp.f @@ -0,0 +1,30 @@ +program print_mos + implicit none + integer :: i,nx + double precision :: r(3), xmax, dx, accu + double precision, allocatable :: mos_array(:) + double precision:: alpha,envelop + allocate(mos_array(mo_num)) + xmax = 5.d0 + nx = 1000 + dx=xmax/dble(nx) + r = 0.d0 + alpha = 0.5d0 + do i = 1, nx + call give_all_mos_at_r(r,mos_array) + accu = mos_array(3)**2+mos_array(4)**2+mos_array(5)**2 + accu = dsqrt(accu) + envelop = (1.d0 - dexp(-alpha * r(3)**2)) + write(33,'(100(F16.10,X))')r(3), mos_array(1), mos_array(2), accu, envelop + r(3) += dx + enddo + +end + +double precision function f_mu(x) + implicit none + double precision, intent(in) :: x + + + +end From 450a80e3078d695c931386044ddcd1451b1eaacd Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 3 Apr 2023 14:32:25 +0200 Subject: [PATCH 34/87] fixed stupid problem in pt_charges --- src/nuclei/write_pt_charges.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/nuclei/write_pt_charges.py b/src/nuclei/write_pt_charges.py index 6dbcd5b8..f5007090 100644 --- a/src/nuclei/write_pt_charges.py +++ b/src/nuclei/write_pt_charges.py @@ -21,7 +21,7 @@ def mv_in_ezfio(ezfio,tmp): os.system(cmdmv) -# Getting the EZFIO + ##Getting the EZFIO EZFIO=sys.argv[1] EZFIO=EZFIO.replace("/", "") print(EZFIO) @@ -66,8 +66,20 @@ zip_in_ezfio(EZFIO,tmp) tmp="pts_charge_coord" fcoord = open(tmp,'w') fcoord.write(" 2\n") -fcoord.write(" "+str(n_charges)+' 3\n') -#fcoord.write(" "+' 3 '+str(n_charges)+' \n') +if(n_charges < 10): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <100): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <1000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <10000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <100000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <1000000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <10000000): + fcoord.write(" "+str(n_charges)+' 3\n') for i in range(n_charges): fcoord.write(' '+coord_x[i]+'\n') for i in range(n_charges): From dee12a7907a2fcc3fead9b0784c2d3989f1c2433 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Apr 2023 08:03:31 +0200 Subject: [PATCH 35/87] Fix call abort --- external/qp2-dependencies | 2 +- src/determinants/h_apply.irp.f | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/external/qp2-dependencies b/external/qp2-dependencies index 6e23ebac..ce14f57b 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a +Subproject commit ce14f57b50511825a9fedb096749200779d3f4d4 diff --git a/src/determinants/h_apply.irp.f b/src/determinants/h_apply.irp.f index a67f71da..078c2104 100644 --- a/src/determinants/h_apply.irp.f +++ b/src/determinants/h_apply.irp.f @@ -70,11 +70,14 @@ subroutine resize_H_apply_buffer(new_size,iproc) PROVIDE H_apply_buffer_allocated - if (N_det < 0) call abort(irp_here//': N_det < 0') - if (N_int <= 0) call abort(irp_here//': N_int <= 0') - if (new_size <= 0) call abort(irp_here//': new_size <= 0') - if (iproc < 0) call abort(irp_here//': iproc < 0') - if (iproc >= nproc) call abort(irp_here//': iproc >= nproc') + ASSERT (new_size > 0) + ASSERT (iproc >= 0) + ASSERT (iproc < nproc) + if (N_det < 0) call abort() !irp_here//': N_det < 0') + if (N_int <= 0) call abort() !irp_here//': N_int <= 0') + if (new_size <= 0) call abort() !irp_here//': new_size <= 0') + if (iproc < 0) call abort() !irp_here//': iproc < 0') + if (iproc >= nproc) call abort() !irp_here//': iproc >= nproc') allocate ( buffer_det(N_int,2,new_size), & buffer_coef(new_size,N_states), & @@ -363,9 +366,9 @@ subroutine replace_wf(N_det_new, LDA, psi_coef_new, psi_det_new) PROVIDE H_apply_buffer_allocated - if (N_det_new <= 0) call abort(irp_here//': N_det_new <= 0') - if (N_int <= 0) call abort(irp_here//': N_int <= 0') - if (LDA < N_det_new) call abort(irp_here//': LDA < N_det_new') + if (N_det_new <= 0) call abort() !irp_here//': N_det_new <= 0') + if (N_int <= 0) call abort() !irp_here//': N_int <= 0') + if (LDA < N_det_new) call abort() !irp_here//': LDA < N_det_new') do j=0,nproc-1 H_apply_buffer(j)%N_det = 0 From 04715abc640010ce15e5096b504baa5425bb36ab Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Apr 2023 16:12:32 +0200 Subject: [PATCH 36/87] beginning to work on s2 for TC --- src/tc_bi_ortho/h_tc_s2_u0.irp.f | 739 ++++++++++++++++++ .../{u0_h_u0.irp.f => h_tc_u0.irp.f} | 3 - src/tc_bi_ortho/tc_bi_ortho.irp.f | 1 - src/tc_bi_ortho/tc_h_eigvectors.irp.f | 2 - src/tc_scf/tc_scf.irp.f | 4 +- 5 files changed, 741 insertions(+), 8 deletions(-) create mode 100644 src/tc_bi_ortho/h_tc_s2_u0.irp.f rename src/tc_bi_ortho/{u0_h_u0.irp.f => h_tc_u0.irp.f} (99%) diff --git a/src/tc_bi_ortho/h_tc_s2_u0.irp.f b/src/tc_bi_ortho/h_tc_s2_u0.irp.f new file mode 100644 index 00000000..5a9f5e69 --- /dev/null +++ b/src/tc_bi_ortho/h_tc_s2_u0.irp.f @@ -0,0 +1,739 @@ +subroutine H_tc_s2_u_0_opt(v_0,s_0,u_0,N_st,sze) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) + logical :: do_right + do_right = .True. + call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) +end + +subroutine H_tc_s2_dagger_u_0_opt(v_0,s_0,u_0,N_st,sze) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) + logical :: do_right + do_right = .False. + call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) +end + + +subroutine H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + ! + ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) + logical, intent(in) :: do_right + integer :: k + double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t + allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det)) + do k=1,N_st + call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) + enddo + v_t = 0.d0 + s_t = 0.d0 + call dtranspose( & + u_0, & + size(u_0, 1), & + u_t, & + size(u_t, 1), & + N_det, N_st) + + call H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,1,N_det,0,1, do_right) + deallocate(u_t) + + call dtranspose( & + v_t, & + size(v_t, 1), & + v_0, & + size(v_0, 1), & + N_st, N_det) + call dtranspose( & + s_t, & + size(s_t, 1), & + s_0, & + size(s_0, 1), & + N_st, N_det) + deallocate(v_t,s_t) + + do k=1,N_st + call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + enddo + +end + + +subroutine H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep, do_right) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_t = H | u_t\rangle$ + ! + ! Default should be 1,N_det,0,1 + ! + ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> + END_DOC + integer, intent(in) :: N_st,sze,istart,iend,ishift,istep + double precision, intent(in) :: u_t(N_st,N_det) + logical, intent(in) :: do_right + double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) + + + PROVIDE ref_bitmask_energy N_int + + select case (N_int) + case (1) + call H_tc_s2_u_0_nstates_openmp_work_1(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case (2) + call H_tc_s2_u_0_nstates_openmp_work_2(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case (3) + call H_tc_s2_u_0_nstates_openmp_work_3(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case (4) + call H_tc_s2_u_0_nstates_openmp_work_4(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case default + call H_tc_s2_u_0_nstates_openmp_work_N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + end select +end +BEGIN_TEMPLATE + +subroutine H_tc_s2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_t = H | u_t \\rangle$ and $s_t = S^2 | u_t\\rangle$ + ! + ! Default should be 1,N_det,0,1 + ! + ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> + END_DOC + integer, intent(in) :: N_st,sze,istart,iend,ishift,istep + double precision, intent(in) :: u_t(N_st,N_det) + logical, intent(in) :: do_right + double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) + + double precision :: hij, sij + integer :: i,j,k,l,kk + integer :: k_a, k_b, l_a, l_b, m_a, m_b + integer :: istate + integer :: krow, kcol, krow_b, kcol_b + integer :: lrow, lcol + integer :: mrow, mcol + integer(bit_kind) :: spindet($N_int) + integer(bit_kind) :: tmp_det($N_int,2) + integer(bit_kind) :: tmp_det2($N_int,2) + integer(bit_kind) :: tmp_det3($N_int,2) + integer(bit_kind), allocatable :: buffer(:,:) + integer :: n_doubles + integer, allocatable :: doubles(:) + integer, allocatable :: singles_a(:) + integer, allocatable :: singles_b(:) + integer, allocatable :: idx(:), idx0(:) + integer :: maxab, n_singles_a, n_singles_b, kcol_prev + integer*8 :: k8 + logical :: compute_singles + integer*8 :: last_found, left, right, right_max + double precision :: rss, mem, ratio + double precision, allocatable :: utl(:,:) + integer, parameter :: block_size=128 + logical :: u_is_sparse + +! call resident_memory(rss) +! mem = dble(singles_beta_csc_size) / 1024.d0**3 +! +! compute_singles = (mem+rss > qp_max_mem) +! +! if (.not.compute_singles) then +! provide singles_beta_csc +! endif +compute_singles=.True. + + + maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 + allocate(idx0(maxab)) + + do i=1,maxab + idx0(i) = i + enddo + + ! Prepare the array of all alpha single excitations + ! ------------------------------------------------- + + PROVIDE N_int nthreads_davidson + !$OMP PARALLEL DEFAULT(SHARED) NUM_THREADS(nthreads_davidson) & + !$OMP SHARED(psi_bilinear_matrix_rows, N_det, & + !$OMP psi_bilinear_matrix_columns, & + !$OMP psi_det_alpha_unique, psi_det_beta_unique, & + !$OMP n_det_alpha_unique, n_det_beta_unique, N_int, & + !$OMP psi_bilinear_matrix_transp_rows, & + !$OMP psi_bilinear_matrix_transp_columns, & + !$OMP psi_bilinear_matrix_transp_order, N_st, & + !$OMP psi_bilinear_matrix_order_transp_reverse, & + !$OMP psi_bilinear_matrix_columns_loc, & + !$OMP psi_bilinear_matrix_transp_rows_loc, & + !$OMP istart, iend, istep, irp_here, v_t, s_t, & + !$OMP ishift, idx0, u_t, maxab, compute_singles, & + !$OMP singles_alpha_csc,singles_alpha_csc_idx, & + !$OMP singles_beta_csc,singles_beta_csc_idx) & + !$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, & + !$OMP lcol, lrow, l_a, l_b, utl, kk, u_is_sparse, & + !$OMP buffer, doubles, n_doubles, umax, & + !$OMP tmp_det2, hij, sij, idx, l, kcol_prev,hmono, htwoe, hthree, & + !$OMP singles_a, n_singles_a, singles_b, ratio, & + !$OMP n_singles_b, k8, last_found,left,right,right_max) + + ! Alpha/Beta double excitations + ! ============================= + + allocate( buffer($N_int,maxab), & + singles_a(maxab), & + singles_b(maxab), & + doubles(maxab), & + idx(maxab), utl(N_st,block_size)) + + kcol_prev=-1 + + ! Check if u has multiple zeros + kk=1 ! Avoid division by zero + !$OMP DO + do k=1,N_det + umax = 0.d0 + do l=1,N_st + umax = max(umax, dabs(u_t(l,k))) + enddo + if (umax < 1.d-20) then + !$OMP ATOMIC + kk = kk+1 + endif + enddo + !$OMP END DO + u_is_sparse = N_det / kk < 20 ! 5% + + ASSERT (iend <= N_det) + ASSERT (istart > 0) + ASSERT (istep > 0) + + !$OMP DO SCHEDULE(guided,64) + do k_a=istart+ishift,iend,istep ! Loop over all determinants (/!\ not in psidet order) + + krow = psi_bilinear_matrix_rows(k_a) ! Index of alpha part of determinant k_a + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) ! Index of beta part of determinant k_a + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + + if (kcol /= kcol_prev) then + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + if (compute_singles) then + call get_all_spin_singles_$N_int( & + psi_det_beta_unique, idx0, & + tmp_det(1,2), N_det_beta_unique, & + singles_b, n_singles_b) + else + n_singles_b = 0 + !DIR$ LOOP COUNT avg(1000) + do k8=singles_beta_csc_idx(kcol),singles_beta_csc_idx(kcol+1)-1 + n_singles_b = n_singles_b+1 + singles_b(n_singles_b) = singles_beta_csc(k8) + enddo + endif + endif + kcol_prev = kcol + + ! -> Here, tmp_det is determinant k_a + + ! Loop over singly excited beta columns + ! ------------------------------------- + + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_b + lcol = singles_b(i) + + tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol) + + ! tmp_det2 is a single excitation of tmp_det in the beta spin + ! the alpha part is not defined yet + +!--- +! if (compute_singles) then + + l_a = psi_bilinear_matrix_columns_loc(lcol) + ASSERT (l_a <= N_det) + ! rows : | 1 2 3 4 | 1 3 4 6 | .... | 1 2 4 5 | + ! cols : | 1 1 1 1 | 2 2 2 2 | .... | 8 8 8 8 | + ! index : | 1 2 3 4 | 5 6 7 8 | .... | 58 59 60 61 | + ! ^ ^ + ! | | + ! l_a N_det + ! l_a is the index in the big vector os size Ndet of the position of the first element of column lcol + + ! Below we identify all the determinants with the same beta part + + !DIR$ UNROLL(8) + !DIR$ LOOP COUNT avg(50000) + do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + buffer(1:$N_int,j) = psi_det_alpha_unique(1:$N_int, lrow) ! hot spot + + ASSERT (l_a <= N_det) + idx(j) = l_a + l_a = l_a+1 + enddo + j = j-1 + + ! Get all single excitations from tmp_det(1,1) to buffer(1,?) + + call get_all_spin_singles_$N_int( & + buffer, idx, tmp_det(1,1), j, & + singles_a, n_singles_a ) + + ! Loop over alpha singles + ! ----------------------- + + double precision :: umax + + !DIR$ LOOP COUNT avg(1000) + do k = 1,n_singles_a,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) +! call i_H_j( tmp_det, tmp_det2, $N_int, hij) ! double alpha-beta + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + call get_s2(tmp_det,tmp_det2,$N_int,sij) + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + s_t(l,k_a) = s_t(l,k_a) + sij * utl(l,kk+1) + enddo + enddo + enddo + + enddo + + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(guided,64) + do k_a=istart+ishift,iend,istep + + + ! Single and double alpha excitations + ! =================================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + ! Initial determinant is at k_b in beta-major representation + ! ---------------------------------------------------------------------- + + k_b = psi_bilinear_matrix_order_transp_reverse(k_a) + ASSERT (k_b <= N_det) + + spindet(1:$N_int) = tmp_det(1:$N_int,1) + + ! Loop inside the beta column to gather all the connected alphas + lcol = psi_bilinear_matrix_columns(k_a) + l_a = psi_bilinear_matrix_columns_loc(lcol) + + !DIR$ LOOP COUNT avg(200000) + do i=1,N_det_alpha_unique + if (l_a > N_det) exit + lcol = psi_bilinear_matrix_columns(l_a) + if (lcol /= kcol) exit + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + buffer(1:$N_int,i) = psi_det_alpha_unique(1:$N_int, lrow) ! Hot spot + idx(i) = l_a + l_a = l_a+1 + enddo + i = i-1 + + call get_all_spin_singles_and_doubles_$N_int( & + buffer, idx, spindet, i, & + singles_a, doubles, n_singles_a, n_doubles ) + + ! Compute Hij for all alpha singles + ! ---------------------------------- + + tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_a,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) +! call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 1, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + + ! Compute Hij for all alpha doubles + ! ---------------------------------- + + !DIR$ LOOP COUNT avg(50000) + do i=1,n_doubles,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) +! call i_H_j( tmp_det, tmp_det2, $N_int, hij) +! call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + + ! Single and double beta excitations + ! ================================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + krow = psi_bilinear_matrix_rows(k_a) + kcol = psi_bilinear_matrix_columns(k_a) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + spindet(1:$N_int) = tmp_det(1:$N_int,2) + + ! Initial determinant is at k_b in beta-major representation + ! ----------------------------------------------------------------------- + + k_b = psi_bilinear_matrix_order_transp_reverse(k_a) + ASSERT (k_b <= N_det) + + ! Loop inside the alpha row to gather all the connected betas + lrow = psi_bilinear_matrix_transp_rows(k_b) + l_b = psi_bilinear_matrix_transp_rows_loc(lrow) + !DIR$ LOOP COUNT avg(200000) + do i=1,N_det_beta_unique + if (l_b > N_det) exit + lrow = psi_bilinear_matrix_transp_rows(l_b) + if (lrow /= krow) exit + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + buffer(1:$N_int,i) = psi_det_beta_unique(1:$N_int, lcol) + idx(i) = l_b + l_b = l_b+1 + enddo + i = i-1 + + call get_all_spin_singles_and_doubles_$N_int( & + buffer, idx, spindet, i, & + singles_b, doubles, n_singles_b, n_doubles ) + + ! Compute Hij for all beta singles + ! ---------------------------------- + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_b,block_size + umax = 0.d0 + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol) +! call i_H_j_single_spin( tmp_det, tmp_det2, $N_int, 2, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + ! Compute Hij for all beta doubles + ! ---------------------------------- + + !DIR$ LOOP COUNT avg(50000) + do i=1,n_doubles,block_size + umax = 0.d0 + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + tmp_det2(1:N_int,2) = psi_det_beta_unique(1:N_int, lcol) +! call i_H_j( tmp_det, tmp_det2, $N_int, hij) +! call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + + ! Diagonal contribution + ! ===================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + if (u_is_sparse) then + umax = 0.d0 + do l=1,N_st + umax = max(umax, dabs(u_t(l,k_a))) + enddo + else + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + double precision, external :: diag_H_mat_elem + double precision :: hmono, htwoe, hthree + +! hij = diag_H_mat_elem(tmp_det,$N_int) + call diag_htilde_mu_mat_fock_bi_ortho ($N_int, tmp_det, hmono, htwoe, hthree, hij) + call get_s2(tmp_det,tmp_det,$N_int,sij) + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a) + s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a) + enddo + + end do + !$OMP END DO + deallocate(buffer, singles_a, singles_b, doubles, idx, utl) + !$OMP END PARALLEL + +end + +SUBST [ N_int ] + +1;; +2;; +3;; +4;; +N_int;; + +END_TEMPLATE + + diff --git a/src/tc_bi_ortho/u0_h_u0.irp.f b/src/tc_bi_ortho/h_tc_u0.irp.f similarity index 99% rename from src/tc_bi_ortho/u0_h_u0.irp.f rename to src/tc_bi_ortho/h_tc_u0.irp.f index e107ad88..5e6150ea 100644 --- a/src/tc_bi_ortho/u0_h_u0.irp.f +++ b/src/tc_bi_ortho/h_tc_u0.irp.f @@ -93,9 +93,6 @@ subroutine H_tc_u_0_nstates_openmp(v_0,u_0,N_st,sze, do_right) double precision, allocatable :: u_t(:,:), v_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t allocate(u_t(N_st,N_det),v_t(N_st,N_det)) -! provide mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e -! provide ref_tc_energy_tot fock_op_2_e_tc_closed_shell -! provide eff_2_e_from_3_e_ab eff_2_e_from_3_e_aa eff_2_e_from_3_e_bb do k=1,N_st call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) enddo diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f index 2d51f6f0..bd0b1ef5 100644 --- a/src/tc_bi_ortho/tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f @@ -3,7 +3,6 @@ program tc_bi_ortho BEGIN_DOC ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. END_DOC - print *, 'Hello world' my_grid_becke = .True. my_n_pt_r_grid = 30 my_n_pt_a_grid = 50 diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index f2cbb637..11a14b41 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -25,8 +25,6 @@ subroutine diagonalize_CI_tc psi_r_coef_bi_ortho(i,j) = reigvec_tc_bi_orth(i,j) enddo enddo -! psi_energy(1:N_states) = CI_electronic_energy(1:N_states) -! psi_s2(1:N_states) = CI_s2(1:N_states) SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho end diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index ae3b609b..85389f30 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -11,8 +11,8 @@ program tc_scf print *, ' starting ...' my_grid_becke = .True. - my_n_pt_r_grid = 60 - my_n_pt_a_grid = 110 + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 ! my_n_pt_r_grid = 10 ! small grid for quick debug ! my_n_pt_a_grid = 26 ! small grid for quick debug touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid From 042159a13497dc73129e8360cd6b47611acb5c4f Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Apr 2023 16:50:55 +0200 Subject: [PATCH 37/87] added h_p in davidson diagonalization hS2 --- src/davidson/diagonalization_hs2_dressed.irp.f | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index d37b7386..8117f320 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -465,7 +465,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ integer :: lwork, info double precision, allocatable :: work(:) - y = h +! y = h + y = h_p lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & From fbe8c4b60f349322ab8deb2b34007c585ed5587b Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Apr 2023 17:53:09 +0200 Subject: [PATCH 38/87] working on S2 for TC: davidson with S2 penalty seems to work --- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 547 ++++++++++++++++++++++++++++++ src/tc_bi_ortho/test_s2_tc.irp.f | 157 +++++++++ 2 files changed, 704 insertions(+) create mode 100644 src/tc_bi_ortho/dav_h_tc_s2.irp.f create mode 100644 src/tc_bi_ortho/test_s2_tc.irp.f diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f new file mode 100644 index 00000000..02aa712b --- /dev/null +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -0,0 +1,547 @@ + +! --- + +subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_diag_in, converged, hcalc) + + use mmap_module + + BEGIN_DOC + ! Generic modified-Davidson diagonalization + ! + ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson + ! + ! u_in : guess coefficients on the various states. Overwritten on exit by right eigenvectors + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > N_st + ! + ! Initial guess vectors are not necessarily orthonormal + ! + ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output) + END_DOC + + implicit none + + integer, intent(in) :: sze, N_st, N_st_diag_in + double precision, intent(in) :: H_jj(sze) + logical, intent(inout) :: converged + double precision, intent(inout) :: u_in(sze,N_st_diag_in) + double precision, intent(out) :: energies(N_st) + external hcalc + + character*(16384) :: write_buffer + integer :: iter, N_st_diag + integer :: i, j, k, l, m + integer :: iter2, itertot + logical :: disk_based + integer :: shift, shift2, itermax + integer :: nproc_target + integer :: order(N_st_diag_in) + double precision :: to_print(3,N_st) + double precision :: r1, r2, alpha + double precision :: cpu, wall + double precision :: cmax + double precision :: energy_shift(N_st_diag_in*davidson_sze_max) + double precision, allocatable :: U(:,:) + double precision, allocatable :: y(:,:), h(:,:), lambda(:), h_p(:,:), s2(:) + real, allocatable :: y_s(:,:) + double precision, allocatable :: s_(:,:), s_tmp(:,:) + double precision, allocatable :: residual_norm(:) + + double precision :: lambda_tmp + integer, allocatable :: i_omax(:) + double precision, allocatable :: U_tmp(:), overlap(:), S_d(:,:) + + double precision, allocatable :: W(:,:) + real, pointer :: S(:,:) + + !double precision, pointer :: W(:,:) + double precision, external :: u_dot_v, u_dot_u + + + include 'constants.include.F' + + N_st_diag = N_st_diag_in +! print*,'trial vector' + do i = 1, sze + if(isnan(u_in(i,1)))then + print*,'pb in input vector of davidson_general_ext_rout_nonsym_b1space' + print*,i,u_in(i,1) + stop + else if (dabs(u_in(i,1)).lt.1.d-16)then + u_in(i,1) = 0.d0 + endif + enddo + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, y_s, S_d, h, lambda + + if(N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_full to ', N_st_diag*3 + stop -1 + endif + + itermax = max(2, min(davidson_sze_max, sze/N_st_diag)) + 1 + + provide threshold_nonsym_davidson + call write_time(6) + write(6,'(A)') '' + write(6,'(A)') 'Davidson Diagonalization' + write(6,'(A)') '------------------------' + write(6,'(A)') '' + + + ! Find max number of cores to fit in memory + ! ----------------------------------------- + + nproc_target = nproc + double precision :: rss + integer :: maxab + maxab = sze + + m=1 + disk_based = .False. + call resident_memory(rss) + do + r1 = 8.d0 * &! bytes + ( dble(sze)*(N_st_diag*itermax) &! U + + 1.5d0*dble(sze*m)*(N_st_diag*itermax) &! W, S + + 4.5d0*(N_st_diag*itermax)**2 &! h,y,y_s,s_, s_tmp + + 2.d0*(N_st_diag*itermax) &! s2,lambda + + 1.d0*(N_st_diag) &! residual_norm + ! In H_S2_u_0_nstates_zmq + + 3.d0*(N_st_diag*N_det) &! u_t, v_t, s_t on collector + + 3.d0*(N_st_diag*N_det) &! u_t, v_t, s_t on slave + + 0.5d0*maxab &! idx0 in H_S2_u_0_nstates_openmp_work_* + + nproc_target * &! In OMP section + ( 1.d0*(N_int*maxab) &! buffer + + 3.5d0*(maxab) ) &! singles_a, singles_b, doubles, idx + ) / 1024.d0**3 + + if(nproc_target == 0) then + call check_mem(r1, irp_here) + nproc_target = 1 + exit + endif + + if(r1+rss < qp_max_mem) then + exit + endif + + if(itermax > 4) then + itermax = itermax - 1 +! else if (m==1.and.disk_based_davidson) then +! m = 0 +! disk_based = .True. +! itermax = 6 + else + nproc_target = nproc_target - 1 + endif + + enddo + + nthreads_davidson = nproc_target + TOUCH nthreads_davidson + + call write_int(6, N_st, 'Number of states') + call write_int(6, N_st_diag, 'Number of states in diagonalization') + call write_int(6, sze, 'Number of basis functions') + call write_int(6, nproc_target, 'Number of threads for diagonalization') + call write_double(6, r1, 'Memory(Gb)') + if(disk_based) then + print *, 'Using swap space to reduce RAM' + endif + + !--------------- + + write(6,'(A)') '' + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = 'Iter' + do i=1,N_st + write_buffer = trim(write_buffer)//' Energy S^2 Residual ' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + + + ! --- + + + allocate( W(sze,N_st_diag*itermax), S(sze,N_st_diag*itermax) ) + + allocate( & + ! Large + U(sze,N_st_diag*itermax), & + S_d(sze,N_st_diag), & + + ! Small + h(N_st_diag*itermax,N_st_diag*itermax), & + h_p(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + lambda(N_st_diag*itermax), & + residual_norm(N_st_diag), & + i_omax(N_st), & + s2(N_st_diag*itermax), & + y_s(N_st_diag*itermax,N_st_diag*itermax) & + ) + + U = 0.d0 + h = 0.d0 + y = 0.d0 + s_ = 0.d0 + s_tmp = 0.d0 + + lambda = 0.d0 + residual_norm = 0.d0 + + + ASSERT (N_st > 0) + ASSERT (N_st_diag >= N_st) + ASSERT (sze > 0) + + ! Davidson iterations + ! =================== + + converged = .False. + + ! Initialize from N_st to N_st_diag with gaussian random numbers + ! to be sure to have overlap with any eigenvectors + do k = N_st+1, N_st_diag + u_in(k,k) = 10.d0 + do i = 1, sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + u_in(i,k) = r1*dcos(r2) + enddo + enddo + ! Normalize all states + do k = 1, N_st_diag + call normalize(u_in(1,k), sze) + enddo + + ! Copy from the guess input "u_in" to the working vectors "U" + do k = 1, N_st_diag + do i = 1, sze + U(i,k) = u_in(i,k) + enddo + enddo + + ! --- + + itertot = 0 + + do while (.not.converged) + + itertot = itertot + 1 + if(itertot == 8) then + exit + endif + + do iter = 1, itermax-1 + + shift = N_st_diag * (iter-1) + shift2 = N_st_diag * iter + + if( (iter > 1) .or. (itertot == 1) ) then + + ! Gram-Schmidt to orthogonalize all new guess with the previous vectors + call ortho_qr(U, size(U, 1), sze, shift2) + call ortho_qr(U, size(U, 1), sze, shift2) + + ! W = H U +! call hcalc(W(1,shift+1), U(1,shift+1), N_st_diag, sze) + call hcalc(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) + S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag)) + else + + ! Already computed in update below + continue + endif + ! Compute s_kl = = + ! ------------------------------------------- + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k) COLLAPSE(2) + do j=1,shift2 + do i=1,shift2 + s_(i,j) = 0.d0 + do k=1,sze + s_(i,j) = s_(i,j) + U(k,i) * dble(S(k,j)) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + ! Compute h_kl = = + ! ------------------------------------------- + call dgemm( 'T', 'N', shift2, shift2, sze, 1.d0 & + , U, size(U, 1), W, size(W, 1) & + , 0.d0, h, size(h, 1) ) + ! Penalty method + ! -------------- + + if (s2_eig) then + h_p = s_ + do k=1,shift2 + h_p(k,k) = h_p(k,k) - expected_s2 + enddo + if (only_expected_s2) then + alpha = 0.1d0 + h_p = h + alpha*h_p + else + alpha = 0.0001d0 + h_p = h + alpha*h_p + endif + else + h_p = h + alpha = 0.d0 + endif + + ! Diagonalize h y = lambda y + ! --------------------------- + call diag_nonsym_right(shift2, h_p(1,1), size(h_p, 1), y(1,1), size(y, 1), lambda(1), size(lambda, 1)) + + do k = 1, N_st_diag +! print*,'lambda(k) before = ',lambda(k) + lambda(k) = 0.d0 + do l = 1, shift2 + do m = 1, shift2 + lambda(k) += y(m,k) * h(m,l) * y(l,k) + enddo + enddo +! print*,'lambda(k) new = ',lambda(k) + enddo + ! Compute S2 for each eigenvector + ! ------------------------------- + + call dgemm('N','N',shift2,shift2,shift2, & + 1.d0, s_, size(s_,1), y, size(y,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('T','N',shift2,shift2,shift2, & + 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & + 0.d0, s_, size(s_,1)) + + do k=1,shift2 + s2(k) = s_(k,k) + enddo + + ! Express eigenvectors of h in the determinant basis: + ! --------------------------------------------------- + + ! y(:,k) = rk + ! U(:,k) = Bk + ! U(:,shift2+k) = Rk = Bk x rk + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , U, size(U, 1), y, size(y, 1) & + , 0.d0, U(1,shift2+1), size(U, 1) ) + + do k = 1, N_st_diag + call normalize(U(1,shift2+k), sze) + enddo + + ! --- + ! select the max overlap + + ! + ! start test ------------------------------------------------------------------------ + ! + !double precision, allocatable :: Utest(:,:), Otest(:) + !allocate( Utest(sze,shift2), Otest(shift2) ) + + !call dgemm( 'N', 'N', sze, shift2, shift2, 1.d0 & + ! , U, size(U, 1), y, size(y, 1), 0.d0, Utest(1,1), size(Utest, 1) ) + !do k = 1, shift2 + ! call normalize(Utest(1,k), sze) + !enddo + !do j = 1, sze + ! write(455, '(100(1X, F16.10))') (Utest(j,k), k=1,shift2) + !enddo + + !do k = 1, shift2 + ! Otest(k) = 0.d0 + ! do i = 1, sze + ! Otest(k) += Utest(i,k) * u_in(i,1) + ! enddo + ! Otest(k) = dabs(Otest(k)) + ! print *, ' Otest =', k, Otest(k), lambda(k) + !enddo + + !deallocate(Utest, Otest) + ! + ! end test ------------------------------------------------------------------------ + ! + + ! TODO + ! state_following is more efficient + do l = 1, N_st + + allocate( overlap(N_st_diag) ) + + do k = 1, N_st_diag + overlap(k) = 0.d0 + do i = 1, sze + overlap(k) = overlap(k) + U(i,shift2+k) * u_in(i,l) + enddo + overlap(k) = dabs(overlap(k)) + !print *, ' overlap =', k, overlap(k) + enddo + + lambda_tmp = 0.d0 + do k = 1, N_st_diag + if(overlap(k) .gt. lambda_tmp) then + i_omax(l) = k + lambda_tmp = overlap(k) + endif + enddo + + deallocate(overlap) + + if(lambda_tmp .lt. 0.7d0) then + print *, ' very small overlap ...', l, i_omax(l) + print *, ' max overlap = ', lambda_tmp + stop + endif + + if(i_omax(l) .ne. l) then + print *, ' !!! WARNONG !!!' + print *, ' index of state', l, i_omax(l) + endif + enddo + + ! y(:,k) = rk + ! W(:,k) = H x Bk + ! W(:,shift2+k) = H x Bk x rk + ! = Wk + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , W, size(W, 1), y, size(y, 1) & + , 0.d0, W(1,shift2+1), size(W, 1) ) + + ! --- + + ! Compute residual vector and davidson step + ! ----------------------------------------- + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + do k = 1, N_st_diag + do i = 1, sze + U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k)) / max(H_jj(i)-lambda(k), 1.d-2) + enddo + if(k <= N_st) then + l = k + residual_norm(k) = u_dot_u(U(1,shift2+l), sze) + to_print(1,k) = lambda(l) + to_print(2,k) = s2(l) + to_print(3,k) = residual_norm(l) + endif + enddo + !$OMP END PARALLEL DO + !residual_norm(1) = u_dot_u(U(1,shift2+1), sze) + !to_print(1,1) = lambda(1) + !to_print(2,1) = residual_norm(1) + + + if( (itertot > 1) .and. (iter == 1) ) then + !don't print + continue + else + write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, F16.10, 1X, F16.10))') iter-1, to_print(1:3,1:N_st) + endif + + ! Check convergence + if(iter > 1) then + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_nonsym_davidson + endif + + do k = 1, N_st + if(residual_norm(k) > 1.e8) then + print *, 'Davidson failed' + stop -1 + endif + enddo + if(converged) then + exit + endif + + logical, external :: qp_stop + if(qp_stop()) then + converged = .True. + exit + endif + + enddo ! loop over iter + + + ! Re-contract U and update W + ! -------------------------------- + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , W, size(W, 1), y, size(y, 1) & + , 0.d0, u_in, size(u_in, 1) ) + do k = 1, N_st_diag + do i = 1, sze + W(i,k) = u_in(i,k) + enddo + enddo + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , U, size(U, 1), y, size(y, 1) & + , 0.d0, u_in, size(u_in, 1) ) + do k = 1, N_st_diag + do i = 1, sze + U(i,k) = u_in(i,k) + enddo + enddo + + call ortho_qr(U, size(U, 1), sze, N_st_diag) + call ortho_qr(U, size(U, 1), sze, N_st_diag) + do j = 1, N_st_diag + k = 1 + do while( (k < sze) .and. (U(k,j) == 0.d0) ) + k = k+1 + enddo + if(U(k,j) * u_in(k,j) < 0.d0) then + do i = 1, sze + W(i,j) = -W(i,j) + enddo + endif + enddo + + enddo ! loop over while + + ! --- + + do k = 1, N_st + energies(k) = lambda(k) + enddo + write_buffer = '=====' + do i = 1, N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') trim(write_buffer) + write(6,'(A)') '' + call write_time(6) + + deallocate(W) + deallocate(U, h, y, lambda, residual_norm, i_omax) + + FREE nthreads_davidson + +end subroutine davidson_general_ext_rout_nonsym_b1space + +! --- diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f new file mode 100644 index 00000000..a5241fe3 --- /dev/null +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -0,0 +1,157 @@ +program test_tc + implicit none + read_wf = .True. + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + read_wf = .True. + touch read_wf + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + call routine_test_s2 + call routine_test_s2_davidson +end + +subroutine routine_test_s2 + implicit none + logical :: do_right + integer :: sze ,i, N_st, j + double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 + double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) + double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) + sze = N_det + N_st = 1 + allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) + print*,'Checking first the Left ' + do_right = .False. + do i = 1, sze + u_0(i,1) = psi_l_coef_bi_ortho(i,1) + enddo + call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) + s_0_ref = 0.d0 + do i = 1, sze + do j = 1, sze + call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) + s_0_ref(i,1) += u_0(j,1) * sij + enddo + enddo + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) + accu_e = 0.d0 + accu_s = 0.d0 + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) + accu_s_0 += s_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) + accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) + accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) + enddo + print*,'accu_e = ',accu_e + print*,'accu_s = ',accu_s + print*,'accu_e_0 = ',accu_e_0 + print*,'accu_s_0 = ',accu_s_0 + + print*,'Checking then the right ' + do_right = .True. + do i = 1, sze + u_0(i,1) = psi_r_coef_bi_ortho(i,1) + enddo + call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) + s_0_ref = 0.d0 + do i = 1, sze + do j = 1, sze + call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) + s_0_ref(i,1) += u_0(j,1) * sij + enddo + enddo + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) + accu_e = 0.d0 + accu_s = 0.d0 + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) + accu_s_0 += s_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) + accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) + accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) + enddo + print*,'accu_e = ',accu_e + print*,'accu_s = ',accu_s + print*,'accu_e_0 = ',accu_e_0 + print*,'accu_s_0 = ',accu_s_0 + + +end + +subroutine routine_test_s2_davidson + implicit none + double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) + integer :: i,istate + logical :: converged + external H_tc_s2_dagger_u_0_opt + external H_tc_s2_u_0_opt + allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag)) + do i = 1, N_det + call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + enddo + ! Preparing the left-eigenvector + print*,'Computing the left-eigenvector ' + vec_tmp = 0.d0 + do istate = 1, N_states + vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) + enddo + do istate = N_states+1, n_states_diag + vec_tmp(istate,istate) = 1.d0 + enddo + do istate = 1, N_states + leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) + enddo + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + print*,'energies = ',energies + double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) + integer :: sze,N_st + logical :: do_right + sze = N_det + N_st = 1 + do_right = .False. + allocate(s_0_new(N_det,1),v_0_new(N_det,1)) + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) + double precision :: accu_e_0, accu_s_0 + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) + accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) + enddo + print*,'accu_e_0',accu_e_0 + print*,'accu_s_0',accu_s_0 + + ! Preparing the right-eigenvector + print*,'Computing the right-eigenvector ' + vec_tmp = 0.d0 + do istate = 1, N_states + vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate) + enddo + do istate = N_states+1, n_states_diag + vec_tmp(istate,istate) = 1.d0 + enddo + do istate = 1, N_states + leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) + enddo + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) + print*,'energies = ',energies + sze = N_det + N_st = 1 + do_right = .True. + v_0_new = 0.d0 + s_0_new = 0.d0 + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) + accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) + enddo + print*,'accu_e_0',accu_e_0 + print*,'accu_s_0',accu_s_0 + +end From 367abb3d70a452eec981febbd1a5999f91be9bd7 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Apr 2023 19:37:54 +0200 Subject: [PATCH 39/87] S2 OK in TC --- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 4 +- src/tc_bi_ortho/h_tc_s2_u0.irp.f | 30 ++++ src/tc_bi_ortho/tc_h_eigvectors.irp.f | 201 +++++++++++++++++++------- src/tc_bi_ortho/test_s2_tc.irp.f | 14 +- 4 files changed, 189 insertions(+), 60 deletions(-) diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f index 02aa712b..c0ea054a 100644 --- a/src/tc_bi_ortho/dav_h_tc_s2.irp.f +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_diag_in, converged, hcalc) +subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, converged, hcalc) use mmap_module @@ -30,6 +30,7 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_dia logical, intent(inout) :: converged double precision, intent(inout) :: u_in(sze,N_st_diag_in) double precision, intent(out) :: energies(N_st) + double precision, intent(inout) :: s2_out(N_st) external hcalc character*(16384) :: write_buffer @@ -528,6 +529,7 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_dia do k = 1, N_st energies(k) = lambda(k) + s2_out(k) = s2(k) enddo write_buffer = '=====' do i = 1, N_st diff --git a/src/tc_bi_ortho/h_tc_s2_u0.irp.f b/src/tc_bi_ortho/h_tc_s2_u0.irp.f index 5a9f5e69..30b0f273 100644 --- a/src/tc_bi_ortho/h_tc_s2_u0.irp.f +++ b/src/tc_bi_ortho/h_tc_s2_u0.irp.f @@ -1,3 +1,33 @@ + +subroutine get_H_tc_s2_l0_r0(l_0,r_0,N_st,sze,energies, s2) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $e_0 = \langle l_0 | H | r_0\rangle$. + ! + ! Computes $s_0 = \langle l_0 | S^2 | r_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(in) :: l_0(sze,N_st), r_0(sze,N_st) + double precision, intent(out) :: energies(N_st), s2(N_st) + logical :: do_right + integer :: istate + double precision, allocatable :: s_0(:,:), v_0(:,:) + double precision :: u_dot_v, norm + allocate(s_0(sze,N_st), v_0(sze,N_st)) + do_right = .True. + call H_tc_s2_u_0_opt(v_0,s_0,r_0,N_st,sze) + do istate = 1, N_st + norm = u_dot_v(l_0(1,istate),r_0(1,istate),sze) + energies(istate) = u_dot_v(l_0(1,istate),v_0(1,istate),sze)/norm + s2(istate) = u_dot_v(l_0(1,istate),s_0(1,istate),sze)/norm + enddo +end + subroutine H_tc_s2_u_0_opt(v_0,s_0,u_0,N_st,sze) use bitmasks implicit none diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 11a14b41..71dad8d6 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -35,6 +35,7 @@ end &BEGIN_PROVIDER [double precision, eigval_left_tc_bi_orth, (N_states)] &BEGIN_PROVIDER [double precision, reigvec_tc_bi_orth, (N_det,N_states)] &BEGIN_PROVIDER [double precision, leigvec_tc_bi_orth, (N_det,N_states)] +&BEGIN_PROVIDER [double precision, s2_eigvec_tc_bi_orth, (N_states)] &BEGIN_PROVIDER [double precision, norm_ground_left_right_bi_orth ] BEGIN_DOC @@ -46,64 +47,153 @@ end logical :: converged, dagger integer :: n_real_tc_bi_orth_eigval_right,igood_r,igood_l double precision, allocatable :: reigvec_tc_bi_orth_tmp(:,:),leigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:) + double precision, allocatable :: s2_values_tmp(:), H_prime(:,:), expect_e(:) + double precision, parameter :: alpha = 0.1d0 + integer :: i_good_state,i_other_state, i_state + integer, allocatable :: index_good_state_array(:) + logical, allocatable :: good_state_array(:) + double precision, allocatable :: coef_hf_r(:),coef_hf_l(:) + integer, allocatable :: iorder(:) PROVIDE N_det N_int if(n_det.le.N_det_max_full)then - allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det)) - call non_hrmt_real_diag(N_det,htilde_matrix_elmt_bi_ortho,& + allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det),expect_e(N_det)) + allocate (H_prime(N_det,N_det),s2_values_tmp(N_det)) + H_prime(1:N_det,1:N_det) = htilde_matrix_elmt_bi_ortho(1:N_det,1:N_det) + if(s2_eig)then + H_prime(1:N_det,1:N_det) += alpha * S2_matrix_all_dets(1:N_det,1:N_det) + do j=1,N_det + H_prime(j,j) = H_prime(j,j) - alpha*expected_s2 + enddo + endif + call non_hrmt_real_diag(N_det,H_prime,& leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& n_real_tc_bi_orth_eigval_right,eigval_right_tmp) - double precision, allocatable :: coef_hf_r(:),coef_hf_l(:) - integer, allocatable :: iorder(:) - allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det)) - do i = 1,N_det - iorder(i) = i - coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) - enddo - call dsort(coef_hf_r,iorder,N_det) - igood_r = iorder(1) - print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1) - do i = 1,N_det - iorder(i) = i - coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) - enddo - call dsort(coef_hf_l,iorder,N_det) - igood_l = iorder(1) - print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1) - - if(igood_r.ne.igood_l.and.igood_r.ne.1)then - print *,'' - print *,'Warning, the left and right eigenvectors are "not the same" ' - print *,'Warning, the ground state is not dominated by HF...' - print *,'State with largest RIGHT coefficient of HF ',igood_r - print *,'coef of HF in RIGHT eigenvector = ',reigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_r) - print *,'State with largest LEFT coefficient of HF ',igood_l - print *,'coef of HF in LEFT eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l) - endif - if(state_following_tc)then - print *,'Following the states with the largest coef on HF' - print *,'igood_r,igood_l',igood_r,igood_l - i= igood_r - eigval_right_tc_bi_orth(1) = eigval_right_tmp(i) - do j = 1, N_det - reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i) -! print*,reigvec_tc_bi_orth(j,1) - enddo - i= igood_l - eigval_left_tc_bi_orth(1) = eigval_right_tmp(i) - do j = 1, N_det - leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i) - enddo - else - do i = 1, N_states - eigval_right_tc_bi_orth(i) = eigval_right_tmp(i) - eigval_left_tc_bi_orth(i) = eigval_right_tmp(i) - do j = 1, N_det - reigvec_tc_bi_orth(j,i) = reigvec_tc_bi_orth_tmp(j,i) - leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i) +! do i = 1, N_det +! call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp(1,i),reigvec_tc_bi_orth_tmp(1,i),1,N_det,expect_e(i), s2_values_tmp(i)) +! enddo + call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,N_det,N_det,expect_e, s2_values_tmp) + allocate(index_good_state_array(N_det),good_state_array(N_det)) + i_state = 0 + good_state_array = .False. + if(s2_eig)then + if (only_expected_s2) then + do j=1,N_det + ! Select at least n_states states with S^2 values closed to "expected_s2" +! print*,'s2_values_tmp(j) = ',s2_values_tmp(j),eigval_right_tmp(j),expect_e(j) + if(dabs(s2_values_tmp(j)-expected_s2).le.0.5d0)then + i_state +=1 + index_good_state_array(i_state) = j + good_state_array(j) = .True. + endif + if(i_state.eq.N_states) then + exit + endif enddo - enddo + else + do j=1,N_det + index_good_state_array(j) = j + good_state_array(j) = .True. + enddo + endif + if(i_state .ne.0)then + ! Fill the first "i_state" states that have a correct S^2 value + do j = 1, i_state + do i=1,N_det + reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,index_good_state_array(j)) + leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,index_good_state_array(j)) + enddo + eigval_right_tc_bi_orth(j) = expect_e(index_good_state_array(j)) + eigval_left_tc_bi_orth(j) = expect_e(index_good_state_array(j)) + s2_eigvec_tc_bi_orth(j) = s2_values_tmp(index_good_state_array(j)) + enddo + i_other_state = 0 + do j = 1, N_det + if(good_state_array(j))cycle + i_other_state +=1 + if(i_state+i_other_state.gt.n_states)then + exit + endif + do i=1,N_det + reigvec_tc_bi_orth(i,i_state+i_other_state) = reigvec_tc_bi_orth_tmp(i,j) + leigvec_tc_bi_orth(i,i_state+i_other_state) = leigvec_tc_bi_orth_tmp(i,j) + enddo + eigval_right_tc_bi_orth(i_state+i_other_state) = eigval_right_tmp(j) + eigval_left_tc_bi_orth (i_state+i_other_state) = eigval_right_tmp(j) + s2_eigvec_tc_bi_orth(i_state+i_other_state) = s2_values_tmp(i_state+i_other_state) + enddo + else ! istate == 0 + print*,'' + print*,'!!!!!!!! WARNING !!!!!!!!!' + print*,' Within the ',N_det,'determinants selected' + print*,' and the ',N_states_diag,'states requested' + print*,' We did not find only states with S^2 values close to ',expected_s2 + print*,' We will then set the first N_states eigenvectors of the H matrix' + print*,' as the CI_eigenvectors' + print*,' You should consider more states and maybe ask for s2_eig to be .True. or just enlarge the CI space' + print*,'' + do j=1,min(N_states_diag,N_det) + do i=1,N_det + leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,j) + reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,j) + enddo + eigval_right_tc_bi_orth(j) = eigval_right_tmp(j) + eigval_left_tc_bi_orth (j) = eigval_right_tmp(j) + s2_eigvec_tc_bi_orth(j) = s2_values_tmp(j) + enddo + endif ! istate .ne. 0 + + else ! s2_eig + allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det)) + do i = 1,N_det + iorder(i) = i + coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) + enddo + call dsort(coef_hf_r,iorder,N_det) + igood_r = iorder(1) + print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1) + do i = 1,N_det + iorder(i) = i + coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) + enddo + call dsort(coef_hf_l,iorder,N_det) + igood_l = iorder(1) + print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1) + + if(igood_r.ne.igood_l.and.igood_r.ne.1)then + print *,'' + print *,'Warning, the left and right eigenvectors are "not the same" ' + print *,'Warning, the ground state is not dominated by HF...' + print *,'State with largest RIGHT coefficient of HF ',igood_r + print *,'coef of HF in RIGHT eigenvector = ',reigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_r) + print *,'State with largest LEFT coefficient of HF ',igood_l + print *,'coef of HF in LEFT eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l) + endif + if(state_following_tc)then + print *,'Following the states with the largest coef on HF' + print *,'igood_r,igood_l',igood_r,igood_l + i= igood_r + eigval_right_tc_bi_orth(1) = eigval_right_tmp(i) + do j = 1, N_det + reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i) +! print*,reigvec_tc_bi_orth(j,1) + enddo + i= igood_l + eigval_left_tc_bi_orth(1) = eigval_right_tmp(i) + do j = 1, N_det + leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i) + enddo + else + do i = 1, N_states + eigval_right_tc_bi_orth(i) = eigval_right_tmp(i) + eigval_left_tc_bi_orth(i) = eigval_right_tmp(i) + do j = 1, N_det + reigvec_tc_bi_orth(j,i) = reigvec_tc_bi_orth_tmp(j,i) + leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i) + enddo + enddo + endif endif else double precision, allocatable :: H_jj(:),vec_tmp(:,:) @@ -111,6 +201,8 @@ end external htcdag_bi_ortho_calc_tdav external H_tc_u_0_opt external H_tc_dagger_u_0_opt + external H_tc_s2_dagger_u_0_opt + external H_tc_s2_u_0_opt allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) do i = 1, N_det call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) @@ -125,7 +217,8 @@ end vec_tmp(istate,istate) = 1.d0 enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) - call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) +! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo @@ -140,7 +233,8 @@ end vec_tmp(istate,istate) = 1.d0 enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) - call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) +! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) do istate = 1, N_states reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo @@ -154,6 +248,7 @@ end norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1) enddo print*,'norm l/r = ',norm_ground_left_right_bi_orth + print*,' = ',s2_eigvec_tc_bi_orth(1) END_PROVIDER diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index a5241fe3..4229fef1 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -84,12 +84,12 @@ end subroutine routine_test_s2_davidson implicit none - double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) + double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) , s2(:) integer :: i,istate logical :: converged external H_tc_s2_dagger_u_0_opt external H_tc_s2_u_0_opt - allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag)) + allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag), s2(n_states_diag)) do i = 1, N_det call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo @@ -105,8 +105,7 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) - print*,'energies = ',energies + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) integer :: sze,N_st logical :: do_right @@ -122,6 +121,8 @@ subroutine routine_test_s2_davidson accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) enddo + print*,'energies = ',energies + print*,'s2 = ',s2 print*,'accu_e_0',accu_e_0 print*,'accu_s_0',accu_s_0 @@ -137,8 +138,7 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) - print*,'energies = ',energies + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) sze = N_det N_st = 1 do_right = .True. @@ -151,6 +151,8 @@ subroutine routine_test_s2_davidson accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) enddo + print*,'energies = ',energies + print*,'s2 = ',s2 print*,'accu_e_0',accu_e_0 print*,'accu_s_0',accu_s_0 From accf8c28f68ca7846a66e480c2d55af795f2710f Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Apr 2023 16:50:55 +0200 Subject: [PATCH 40/87] added h_p in davidson diagonalization hS2 --- src/davidson/diagonalization_hs2_dressed.irp.f | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index d37b7386..8117f320 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -465,7 +465,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ integer :: lwork, info double precision, allocatable :: work(:) - y = h +! y = h + y = h_p lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & From 6be57e3c01f1ba71d22495fcfaed52448792163e Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 12 Apr 2023 17:10:06 +0200 Subject: [PATCH 41/87] fixed bug in S2 for TC davidson --- .../diagonalization_hs2_dressed.irp.f | 4 ++-- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 16 +++++++++++---- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 20 +++++++++++++++++-- src/tc_bi_ortho/test_s2_tc.irp.f | 7 +++++-- 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 8117f320..ac71d1d4 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -465,8 +465,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ integer :: lwork, info double precision, allocatable :: work(:) -! y = h - y = h_p + y = h +! y = h_p lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f index c0ea054a..ea9cacff 100644 --- a/src/tc_bi_ortho/dav_h_tc_s2.irp.f +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, converged, hcalc) +subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, n_it_max_dav, converged, hcalc) use mmap_module @@ -21,11 +21,17 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N ! Initial guess vectors are not necessarily orthonormal ! ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output) + ! + ! !!! WARNING !!! IT SEEMS THAT IF THE NUMBER OF MACRO ITERATIONS EXCEEDS n_it_max_dav, + ! + ! THE RECONTRACTION IS WRONG. YOU SHOULD CONSIDER CALLING MULTIPLE TIME THE ROUTINE + ! + ! SEE FOR INSTANCE IN tc_bi_ortho/tc_h_eigvectors.irp.f END_DOC implicit none - integer, intent(in) :: sze, N_st, N_st_diag_in + integer, intent(in) :: sze, N_st, N_st_diag_in, n_it_max_dav double precision, intent(in) :: H_jj(sze) logical, intent(inout) :: converged double precision, intent(inout) :: u_in(sze,N_st_diag_in) @@ -246,7 +252,9 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N itertot = 0 - do while (.not.converged) +! do while (.not.converged.or.itertot.le.n_it_max_dav) + integer :: iiii + do iiii = 1, n_it_max_dav itertot = itertot + 1 if(itertot == 8) then @@ -522,7 +530,7 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N enddo endif enddo - + if(converged)exit enddo ! loop over while ! --- diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 71dad8d6..91775cf1 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -208,7 +208,11 @@ end call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo !!!! Preparing the left-eigenvector + print*,'---------------------------------' + print*,'---------------------------------' print*,'Computing the left-eigenvector ' + print*,'---------------------------------' + print*,'---------------------------------' vec_tmp = 0.d0 do istate = 1, N_states vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) @@ -218,12 +222,21 @@ end enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + integer :: n_it_max + n_it_max = 1 + converged = .False. + do while (.not.converged) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + enddo do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo + print*,'---------------------------------' + print*,'---------------------------------' print*,'Computing the right-eigenvector ' + print*,'---------------------------------' + print*,'---------------------------------' !!!! Preparing the right-eigenvector vec_tmp = 0.d0 do istate = 1, N_states @@ -234,7 +247,10 @@ end enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + converged = .False. + do while (.not.converged) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + enddo do istate = 1, N_states reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index 4229fef1..4debe2e2 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -105,7 +105,9 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + integer :: n_it_max + n_it_max = 1 + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) integer :: sze,N_st logical :: do_right @@ -138,7 +140,8 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) + n_it_max = 1 + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) sze = N_det N_st = 1 do_right = .True. From 5cfff229a17859c97aeb15363bad58946852cdf7 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 12 Apr 2023 17:10:06 +0200 Subject: [PATCH 42/87] fixed bug in S2 for TC davidson --- .../diagonalization_hs2_dressed.irp.f | 4 ++-- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 16 +++++++++++---- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 20 +++++++++++++++++-- src/tc_bi_ortho/test_s2_tc.irp.f | 7 +++++-- 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 8117f320..ac71d1d4 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -465,8 +465,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ integer :: lwork, info double precision, allocatable :: work(:) -! y = h - y = h_p + y = h +! y = h_p lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f index c0ea054a..ea9cacff 100644 --- a/src/tc_bi_ortho/dav_h_tc_s2.irp.f +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, converged, hcalc) +subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, n_it_max_dav, converged, hcalc) use mmap_module @@ -21,11 +21,17 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N ! Initial guess vectors are not necessarily orthonormal ! ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output) + ! + ! !!! WARNING !!! IT SEEMS THAT IF THE NUMBER OF MACRO ITERATIONS EXCEEDS n_it_max_dav, + ! + ! THE RECONTRACTION IS WRONG. YOU SHOULD CONSIDER CALLING MULTIPLE TIME THE ROUTINE + ! + ! SEE FOR INSTANCE IN tc_bi_ortho/tc_h_eigvectors.irp.f END_DOC implicit none - integer, intent(in) :: sze, N_st, N_st_diag_in + integer, intent(in) :: sze, N_st, N_st_diag_in, n_it_max_dav double precision, intent(in) :: H_jj(sze) logical, intent(inout) :: converged double precision, intent(inout) :: u_in(sze,N_st_diag_in) @@ -246,7 +252,9 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N itertot = 0 - do while (.not.converged) +! do while (.not.converged.or.itertot.le.n_it_max_dav) + integer :: iiii + do iiii = 1, n_it_max_dav itertot = itertot + 1 if(itertot == 8) then @@ -522,7 +530,7 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N enddo endif enddo - + if(converged)exit enddo ! loop over while ! --- diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 71dad8d6..91775cf1 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -208,7 +208,11 @@ end call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo !!!! Preparing the left-eigenvector + print*,'---------------------------------' + print*,'---------------------------------' print*,'Computing the left-eigenvector ' + print*,'---------------------------------' + print*,'---------------------------------' vec_tmp = 0.d0 do istate = 1, N_states vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) @@ -218,12 +222,21 @@ end enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + integer :: n_it_max + n_it_max = 1 + converged = .False. + do while (.not.converged) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + enddo do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo + print*,'---------------------------------' + print*,'---------------------------------' print*,'Computing the right-eigenvector ' + print*,'---------------------------------' + print*,'---------------------------------' !!!! Preparing the right-eigenvector vec_tmp = 0.d0 do istate = 1, N_states @@ -234,7 +247,10 @@ end enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + converged = .False. + do while (.not.converged) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + enddo do istate = 1, N_states reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index 4229fef1..4debe2e2 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -105,7 +105,9 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + integer :: n_it_max + n_it_max = 1 + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) integer :: sze,N_st logical :: do_right @@ -138,7 +140,8 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) + n_it_max = 1 + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) sze = N_det N_st = 1 do_right = .True. From e3aadcf06cfe92744d15688ee9ba5aaa0a6d2e53 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 12 Apr 2023 22:44:14 +0200 Subject: [PATCH 43/87] last update on davidson S2 --- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 91775cf1..69302da2 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -222,11 +222,14 @@ end enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) - integer :: n_it_max + integer :: n_it_max,i_it n_it_max = 1 converged = .False. + i_it = 0 do while (.not.converged) call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + i_it += 1 + if(i_it .gt. 5)exit enddo do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) @@ -248,8 +251,11 @@ end ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) converged = .False. + i_it = 0 do while (.not.converged) call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + i_it += 1 + if(i_it .gt. 5)exit enddo do istate = 1, N_states reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) From 4c0de615fb0a572212c2c3940c601f64e8cd0164 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 13 Apr 2023 19:36:39 +0200 Subject: [PATCH 44/87] Fix qp_extract_cipsi_data.py --- scripts/qp_extract_cipsi_data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/qp_extract_cipsi_data.py b/scripts/qp_extract_cipsi_data.py index 8f0b1f3c..dd8e9c4d 100755 --- a/scripts/qp_extract_cipsi_data.py +++ b/scripts/qp_extract_cipsi_data.py @@ -23,6 +23,9 @@ def extract_data(output): reading = False for iline, line in enumerate(lines): + if line.startswith("Summary at N_det"): + reading = False + if not reading and line.startswith(" N_det "): n_det = int(re.search(r"N_det\s+=\s+(\d+)", line).group(1)) reading = True From 86ad99a753455b8a267acc4638ff8fcc6c2b1e30 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 13 Apr 2023 19:36:39 +0200 Subject: [PATCH 45/87] Fix qp_extract_cipsi_data.py --- scripts/qp_extract_cipsi_data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/qp_extract_cipsi_data.py b/scripts/qp_extract_cipsi_data.py index 8f0b1f3c..dd8e9c4d 100755 --- a/scripts/qp_extract_cipsi_data.py +++ b/scripts/qp_extract_cipsi_data.py @@ -23,6 +23,9 @@ def extract_data(output): reading = False for iline, line in enumerate(lines): + if line.startswith("Summary at N_det"): + reading = False + if not reading and line.startswith(" N_det "): n_det = int(re.search(r"N_det\s+=\s+(\d+)", line).group(1)) reading = True From 3d1c30700889a34094a345037876a09f00e1ebda Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 14 Apr 2023 10:56:07 +0200 Subject: [PATCH 46/87] canged h_p to h --- src/davidson/diagonalization_hs2_dressed.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index bbbab2d4..dc42b9a8 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -466,8 +466,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ double precision, allocatable :: work(:) -! y = h - y = h_p + y = h +! y = h_p lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & From 9c2fee6e1c69f47a1130d88fcdf470d849b92cc8 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 14 Apr 2023 12:33:29 +0200 Subject: [PATCH 47/87] Improve extract_data --- scripts/qp_extract_cipsi_data.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/scripts/qp_extract_cipsi_data.py b/scripts/qp_extract_cipsi_data.py index dd8e9c4d..70935d73 100755 --- a/scripts/qp_extract_cipsi_data.py +++ b/scripts/qp_extract_cipsi_data.py @@ -43,8 +43,8 @@ def extract_data(output): e_ex_line = lines[iline+2] e_ex = float(e_ex_line.split()[1]) reading = False - - data.append((n_det, e, pt2, err_pt2, rpt2, err_rpt2, e_ex)) + new_data = " {:8d} {:16.8f} {:e} {:e} {:e} {:e} {:16.8f}".format(n_det, e, pt2, err_pt2, rpt2, err_rpt2, e_ex) + data.append(new_data) n_det = e = pt2 = err_pt2 = rpt2 = err_rpt2 = e_ex = None return data @@ -52,6 +52,4 @@ def extract_data(output): data = extract_data(output) for item in data: - print(" ".join(str(x) for x in item)) - - + print(item) From f475446d9dfb9294cb916053b8a6a0d1f52149f7 Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 14 Apr 2023 16:35:06 +0200 Subject: [PATCH 48/87] Improved qp command --- bin/qp_test | 3 ++- etc/qp.rc | 12 +++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/bin/qp_test b/bin/qp_test index 288b7291..d3a188fb 100755 --- a/bin/qp_test +++ b/bin/qp_test @@ -46,7 +46,7 @@ def main(arguments): append_bats(dirname, filenames) else: for (dirname, _, filenames) in os.walk(os.getcwd(), followlinks=False): - if "IRPF90_temp" not in dirname: + if "IRPF90_temp" not in dirname and "external" not in dirname: append_bats(dirname, filenames) l_bats = [y for _, y in sorted(l_bats)] @@ -67,6 +67,7 @@ def main(arguments): os.system(test+" python3 bats_to_sh.py "+bats_file+ "| bash") else: +# print(" ".join(["bats", "--verbose-run", "--trace", bats_file])) subprocess.check_call(["bats", "--verbose-run", "--trace", bats_file], env=os.environ) diff --git a/etc/qp.rc b/etc/qp.rc index c56661c7..d339f475 100644 --- a/etc/qp.rc +++ b/etc/qp.rc @@ -110,6 +110,11 @@ function qp() unset COMMAND ;; + "test") + shift + qp_test $@ + ;; + *) which "qp_$1" &> /dev/null if [[ $? -eq 0 ]] ; then @@ -183,7 +188,7 @@ _qp_Complete() ;; esac;; set_file) - COMPREPLY=( $(compgen -W "$(for i in * ; do [[ -f ${i}/ezfio/.version ]] && echo $i ; done)" -- ${cur} ) ) + COMPREPLY=( $(compgen -W "$(for i in $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) ) return 0 ;; plugins) @@ -215,10 +220,15 @@ _qp_Complete() return 0 ;; esac;; + test) + COMPREPLY=( $(compgen -W "-v -a " -- $cur ) ) + return 0 + ;; *) COMPREPLY=( $(compgen -W 'plugins set_file \ unset_file man \ create_ezfio \ + test \ convert_output_to_ezfio \ -h update' -- $cur ) ) From 44d867297423cf33736340b111dc28f6f114a5dc Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 17 Apr 2023 16:24:07 +0200 Subject: [PATCH 49/87] OMP Critial around format_w_error --- src/utils/format_w_error.irp.f | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/utils/format_w_error.irp.f b/src/utils/format_w_error.irp.f index 1378d367..ce2665a7 100644 --- a/src/utils/format_w_error.irp.f +++ b/src/utils/format_w_error.irp.f @@ -1,7 +1,7 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_error) implicit none - + BEGIN_DOC ! Format for double precision, value(error) END_DOC @@ -14,7 +14,7 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err ! out ! | format_value | character | string FX.Y for the format | - ! | str_error | character | string of the error | + ! | str_error | character | string of the error | ! internal ! | str_size | character | size in string format | @@ -33,6 +33,7 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err character(len=20) :: str_size, str_nb_digits, str_exp integer :: nb_digits + !$OMP CRITICAL ! max_nb_digit: Y max ! size_nb = Size of the double: X (FX.Y) write(str_size,'(I3)') size_nb @@ -40,17 +41,17 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err ! Error write(str_exp,'(1pE20.0)') error str_error = trim(adjustl(str_exp)) - + ! Number of digit: Y (FX.Y) from the exponent str_nb_digits = str_exp(19:20) read(str_nb_digits,*) nb_digits - + ! If the error is 0d0 - if (error <= 1d-16) then + if (error <= 1d-16) then write(str_nb_digits,*) max_nb_digits endif - ! If the error is too small + ! If the error is too small if (nb_digits > max_nb_digits) then write(str_nb_digits,*) max_nb_digits str_error(1:1) = '0' @@ -65,7 +66,8 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err ! FX.Y,A1,A1,A1 for value(str_error) !string = 'F'//trim(adjustl(str_size))//'.'//trim(adjustl(str_nb_digits))//',A1,A1,A1' - ! FX.Y just for the value + ! FX.Y just for the value format_value = 'F'//trim(adjustl(str_size))//'.'//trim(adjustl(str_nb_digits)) + !$OMP END CRITICAL end From 5b6ecfa564b8d889981342c2e9ad597d124d15a3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 17 Apr 2023 17:03:16 +0200 Subject: [PATCH 50/87] Improve thread-safety --- src/cipsi/environment.irp.f | 2 ++ src/davidson/davidson_parallel.irp.f | 4 ++++ src/ezfio_files/lock.irp.f | 17 +++++++++++++++++ src/mo_two_e_ints/map_integrals.irp.f | 2 ++ src/nuclei/nuclei.irp.f | 14 +++++++------- src/two_body_rdm/io_two_rdm.irp.f | 12 ++++++++---- src/utils/format_w_error.irp.f | 4 ++-- src/utils/memory.irp.f | 8 ++++++-- 8 files changed, 48 insertions(+), 15 deletions(-) diff --git a/src/cipsi/environment.irp.f b/src/cipsi/environment.irp.f index 5c0e0820..363b8f1c 100644 --- a/src/cipsi/environment.irp.f +++ b/src/cipsi/environment.irp.f @@ -7,7 +7,9 @@ BEGIN_PROVIDER [ integer, nthreads_pt2 ] character*(32) :: env call getenv('QP_NTHREADS_PT2',env) if (trim(env) /= '') then + call lock_io() read(env,*) nthreads_pt2 + call unlock_io() call write_int(6,nthreads_pt2,'Target number of threads for PT2') endif END_PROVIDER diff --git a/src/davidson/davidson_parallel.irp.f b/src/davidson/davidson_parallel.irp.f index e627dfc9..399ab11b 100644 --- a/src/davidson/davidson_parallel.irp.f +++ b/src/davidson/davidson_parallel.irp.f @@ -150,7 +150,9 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze, exit endif if(task_id == 0) exit + call lock_io() read (msg,*) imin, imax, ishift, istep + call unlock_io() integer :: k do k=imin,imax v_t(:,k) = 0.d0 @@ -555,7 +557,9 @@ BEGIN_PROVIDER [ integer, nthreads_davidson ] character*(32) :: env call getenv('QP_NTHREADS_DAVIDSON',env) if (trim(env) /= '') then + call lock_io() read(env,*) nthreads_davidson + call unlock_io() call write_int(6,nthreads_davidson,'Target number of threads for ') endif END_PROVIDER diff --git a/src/ezfio_files/lock.irp.f b/src/ezfio_files/lock.irp.f index 53a99254..d28f7641 100644 --- a/src/ezfio_files/lock.irp.f +++ b/src/ezfio_files/lock.irp.f @@ -9,4 +9,21 @@ BEGIN_PROVIDER [ integer(omp_lock_kind), file_lock ] call omp_init_lock(file_lock) END_PROVIDER +! These functions need to be called because internal read and write are not thread safe. +subroutine lock_io() + implicit none + BEGIN_DOC +! Needs to be called because before doing I/O because internal read and write +! are not thread safe. + END_DOC + call omp_set_lock(file_lock) +end subroutine lock_io() +subroutine unlock_io() + implicit none + BEGIN_DOC +! Needs to be called because afterdoing I/O because internal read and write +! are not thread safe. + END_DOC + call omp_unset_lock(file_lock) +end subroutine lock_io() diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 272916e3..ada256a2 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -377,6 +377,7 @@ integer function load_mo_integrals(filename) integer*8 :: n, j load_mo_integrals = 1 open(unit=66,file=filename,FORM='unformatted',STATUS='UNKNOWN') + call lock_io() read(66,err=98,end=98) iknd, kknd if (iknd /= integral_kind) then print *, 'Wrong integrals kind in file :', iknd @@ -399,6 +400,7 @@ integer function load_mo_integrals(filename) n = mo_integrals_map%map(i)%n_elements read(66,err=99,end=99) (key(j), j=1,n), (val(j), j=1,n) enddo + call unlock_io() call map_sort(mo_integrals_map) load_mo_integrals = 0 return diff --git a/src/nuclei/nuclei.irp.f b/src/nuclei/nuclei.irp.f index 3c04316f..fabdc42e 100644 --- a/src/nuclei/nuclei.irp.f +++ b/src/nuclei/nuclei.irp.f @@ -241,13 +241,13 @@ END_PROVIDER enddo character*(80) :: buffer, dummy do - read(iunit,'(A80)',end=10) buffer - read(buffer,*) i ! First read i - read(buffer,*) i, element_name(i), dummy, element_mass(i) - enddo - 10 continue - close(10) - endif + read(iunit,'(A80)',end=10) buffer + read(buffer,*) i ! First read i + read(buffer,*) i, element_name(i), dummy, element_mass(i) + enddo + 10 continue + close(10) + endif IRP_IF MPI_DEBUG print *, irp_here, mpi_rank diff --git a/src/two_body_rdm/io_two_rdm.irp.f b/src/two_body_rdm/io_two_rdm.irp.f index f7008ca9..bdd8a4f9 100644 --- a/src/two_body_rdm/io_two_rdm.irp.f +++ b/src/two_body_rdm/io_two_rdm.irp.f @@ -1,15 +1,17 @@ subroutine write_array_two_rdm(n_orb,nstates,array_tmp,name_file) implicit none integer, intent(in) :: n_orb,nstates - character*(128), intent(in) :: name_file + character*(128), intent(in) :: name_file double precision, intent(in) :: array_tmp(n_orb,n_orb,n_orb,n_orb,nstates) character*(128) :: output integer :: i_unit_output,getUnitAndOpen - PROVIDE ezfio_filename + PROVIDE ezfio_filename output=trim(ezfio_filename)//'/work/'//trim(name_file) i_unit_output = getUnitAndOpen(output,'W') + call lock_io() write(i_unit_output)array_tmp + call unlock_io() close(unit=i_unit_output) end @@ -18,12 +20,14 @@ subroutine read_array_two_rdm(n_orb,nstates,array_tmp,name_file) character*(128) :: output integer :: i_unit_output,getUnitAndOpen integer, intent(in) :: n_orb,nstates - character*(128), intent(in) :: name_file + character*(128), intent(in) :: name_file double precision, intent(out) :: array_tmp(n_orb,n_orb,n_orb,n_orb,N_states) - PROVIDE ezfio_filename + PROVIDE ezfio_filename output=trim(ezfio_filename)//'/work/'//trim(name_file) i_unit_output = getUnitAndOpen(output,'R') + call lock_io() read(i_unit_output)array_tmp + call unlock_io() close(unit=i_unit_output) end diff --git a/src/utils/format_w_error.irp.f b/src/utils/format_w_error.irp.f index ce2665a7..7f7458b6 100644 --- a/src/utils/format_w_error.irp.f +++ b/src/utils/format_w_error.irp.f @@ -33,7 +33,7 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err character(len=20) :: str_size, str_nb_digits, str_exp integer :: nb_digits - !$OMP CRITICAL + call lock_io() ! max_nb_digit: Y max ! size_nb = Size of the double: X (FX.Y) write(str_size,'(I3)') size_nb @@ -68,6 +68,6 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err ! FX.Y just for the value format_value = 'F'//trim(adjustl(str_size))//'.'//trim(adjustl(str_nb_digits)) - !$OMP END CRITICAL + call unlock_io() end diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index d5a066a1..115b2cbe 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -8,7 +8,9 @@ BEGIN_PROVIDER [ integer, qp_max_mem ] qp_max_mem = 2000 call getenv('QP_MAXMEM',env) if (trim(env) /= '') then + call lock_io() read(env,*) qp_max_mem + call unlock_io() endif call write_int(6,qp_max_mem,'Target maximum memory (GB)') @@ -25,7 +27,7 @@ subroutine resident_memory(value) character*(32) :: key double precision, intent(out) :: value - call omp_set_lock(file_lock) + call lock_io() call usleep(10) value = 0.d0 @@ -40,7 +42,7 @@ subroutine resident_memory(value) 20 continue close(iunit) value = value / (1024.d0*1024.d0) - call omp_unset_lock(file_lock) + call unlock_io() end function subroutine total_memory(value) @@ -53,6 +55,7 @@ subroutine total_memory(value) character*(32) :: key double precision, intent(out) :: value + call lock_io() iunit = getUnitAndOpen('/proc/self/status','r') do read(iunit,*,err=10,end=20) key, value @@ -64,6 +67,7 @@ subroutine total_memory(value) 20 continue close(iunit) value = value / (1024.d0*1024.d0) + call unlock_io() end function double precision function memory_of_double(n) From 79c9d91d1991b7f2561e089f00e808c5c8ceb881 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 18 Apr 2023 11:20:36 +0200 Subject: [PATCH 51/87] missing script ccsd --- src/ccsd/org/TANGLE_org_mode.sh | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100755 src/ccsd/org/TANGLE_org_mode.sh diff --git a/src/ccsd/org/TANGLE_org_mode.sh b/src/ccsd/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/ccsd/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done From 0325e59ebef7fa8c0c6757d126215ee28db3021d Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 18 Apr 2023 11:22:04 +0200 Subject: [PATCH 52/87] remove old utils_trust_region --- src/utils_trust_region/EZFIO.cfg | 89 - src/utils_trust_region/NEED | 1 - src/utils_trust_region/README.rst | 5 - src/utils_trust_region/TANGLE_org_mode.sh | 7 - src/utils_trust_region/algo_trust.irp.f | 248 --- src/utils_trust_region/algo_trust.org | 593 ------ .../apply_mo_rotation.irp.f | 85 - src/utils_trust_region/apply_mo_rotation.org | 86 - src/utils_trust_region/mat_to_vec_index.irp.f | 61 - src/utils_trust_region/mat_to_vec_index.org | 63 - src/utils_trust_region/pi.h | 2 - src/utils_trust_region/rotation_matrix.irp.f | 443 ----- src/utils_trust_region/rotation_matrix.org | 454 ----- .../sub_to_full_rotation_matrix.irp.f | 64 - .../sub_to_full_rotation_matrix.org | 65 - .../trust_region_expected_e.irp.f | 119 -- .../trust_region_expected_e.org | 121 -- .../trust_region_optimal_lambda.irp.f | 1655 ---------------- .../trust_region_optimal_lambda.org | 1665 ----------------- src/utils_trust_region/trust_region_rho.irp.f | 121 -- src/utils_trust_region/trust_region_rho.org | 123 -- .../trust_region_step.irp.f | 716 ------- src/utils_trust_region/trust_region_step.org | 726 ------- src/utils_trust_region/vec_to_mat_index.irp.f | 71 - src/utils_trust_region/vec_to_mat_index.org | 72 - src/utils_trust_region/vec_to_mat_v2.irp.f | 39 - src/utils_trust_region/vec_to_mat_v2.org | 40 - 27 files changed, 7734 deletions(-) delete mode 100644 src/utils_trust_region/EZFIO.cfg delete mode 100644 src/utils_trust_region/NEED delete mode 100644 src/utils_trust_region/README.rst delete mode 100755 src/utils_trust_region/TANGLE_org_mode.sh delete mode 100644 src/utils_trust_region/algo_trust.irp.f delete mode 100644 src/utils_trust_region/algo_trust.org delete mode 100644 src/utils_trust_region/apply_mo_rotation.irp.f delete mode 100644 src/utils_trust_region/apply_mo_rotation.org delete mode 100644 src/utils_trust_region/mat_to_vec_index.irp.f delete mode 100644 src/utils_trust_region/mat_to_vec_index.org delete mode 100644 src/utils_trust_region/pi.h delete mode 100644 src/utils_trust_region/rotation_matrix.irp.f delete mode 100644 src/utils_trust_region/rotation_matrix.org delete mode 100644 src/utils_trust_region/sub_to_full_rotation_matrix.irp.f delete mode 100644 src/utils_trust_region/sub_to_full_rotation_matrix.org delete mode 100644 src/utils_trust_region/trust_region_expected_e.irp.f delete mode 100644 src/utils_trust_region/trust_region_expected_e.org delete mode 100644 src/utils_trust_region/trust_region_optimal_lambda.irp.f delete mode 100644 src/utils_trust_region/trust_region_optimal_lambda.org delete mode 100644 src/utils_trust_region/trust_region_rho.irp.f delete mode 100644 src/utils_trust_region/trust_region_rho.org delete mode 100644 src/utils_trust_region/trust_region_step.irp.f delete mode 100644 src/utils_trust_region/trust_region_step.org delete mode 100644 src/utils_trust_region/vec_to_mat_index.irp.f delete mode 100644 src/utils_trust_region/vec_to_mat_index.org delete mode 100644 src/utils_trust_region/vec_to_mat_v2.irp.f delete mode 100644 src/utils_trust_region/vec_to_mat_v2.org diff --git a/src/utils_trust_region/EZFIO.cfg b/src/utils_trust_region/EZFIO.cfg deleted file mode 100644 index 9c9f6248..00000000 --- a/src/utils_trust_region/EZFIO.cfg +++ /dev/null @@ -1,89 +0,0 @@ -[thresh_delta] -type: double precision -doc: Threshold to stop the optimization if the radius of the trust region delta < thresh_delta -interface: ezfio,provider,ocaml -default: 1.e-10 - -[thresh_rho] -type: double precision -doc: Threshold for the step acceptance in the trust region algorithm, if (rho .geq. thresh_rho) the step is accepted, else the step is cancelled and a smaller step is tried until (rho .geq. thresh_rho) -interface: ezfio,provider,ocaml -default: 0.1 - -[thresh_eig] -type: double precision -doc: Threshold to consider when an eigenvalue is 0 in the trust region algorithm -interface: ezfio,provider,ocaml -default: 1.e-12 - -[thresh_model] -type: double precision -doc: If if ABS(criterion - criterion_model) < thresh_model, the program exit the trust region algorithm -interface: ezfio,provider,ocaml -default: 1.e-12 - -[absolute_eig] -type: logical -doc: If True, the algorithm replace the eigenvalues of the hessian by their absolute value to compute the step (in the trust region) -interface: ezfio,provider,ocaml -default: false - -[thresh_wtg] -type: double precision -doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is equal to 0. Must be smaller than thresh_eig by several order of magnitude to avoid numerical problem. If the research of the optimal lambda cannot reach the condition (||x|| .eq. delta) because (||x|| .lt. delta), the reason might be that thresh_wtg is too big or/and thresh_eig is too small -interface: ezfio,provider,ocaml -default: 1.e-6 - -[thresh_wtg2] -type: double precision -doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is 0 in the case of avoid_saddle .eq. true. There is no particular reason to put a different value that thresh_wtg, but it can be useful one day -interface: ezfio,provider,ocaml -default: 1.e-6 - -[avoid_saddle] -type: logical -doc: Test to avoid saddle point, active if true -interface: ezfio,provider,ocaml -default: false - -[version_avoid_saddle] -type: integer -doc: cf. trust region, not stable -interface: ezfio,provider,ocaml -default: 3 - -[thresh_rho_2] -type: double precision -doc: Threshold for the step acceptance for the research of lambda in the trust region algorithm, if (rho_2 .geq. thresh_rho_2) the step is accepted, else the step is rejected -interface: ezfio,provider,ocaml -default: 0.1 - -[thresh_cc] -type: double precision -doc: Threshold to stop the research of the optimal lambda in the trust region algorithm when (dabs(1d0-||x||^2/delta^2) < thresh_cc) -interface: ezfio,provider,ocaml -default: 1.e-6 - -[thresh_model_2] -type: double precision -doc: if (ABS(criterion - criterion_model) < thresh_model_2), i.e., the difference between the actual criterion and the predicted next criterion, during the research of the optimal lambda in the trust region algorithm it prints a warning -interface: ezfio,provider,ocaml -default: 1.e-12 - -[version_lambda_search] -type: integer -doc: Research of the optimal lambda in the trust region algorithm to constrain the norm of the step by solving: 1 -> ||x||^2 - delta^2 .eq. 0, 2 -> 1/||x||^2 - 1/delta^2 .eq. 0 -interface: ezfio,provider,ocaml -default: 2 - -[nb_it_max_lambda] -type: integer -doc: Maximal number of iterations for the research of the optimal lambda in the trust region algorithm -interface: ezfio,provider,ocaml -default: 100 - -[nb_it_max_pre_search] -type: integer -doc: Maximal number of iterations for the pre-research of the optimal lambda in the trust region algorithm -interface: ezfio,provider,ocaml -default: 40 diff --git a/src/utils_trust_region/NEED b/src/utils_trust_region/NEED deleted file mode 100644 index 1a65ce38..00000000 --- a/src/utils_trust_region/NEED +++ /dev/null @@ -1 +0,0 @@ -hartree_fock diff --git a/src/utils_trust_region/README.rst b/src/utils_trust_region/README.rst deleted file mode 100644 index 6a0689b6..00000000 --- a/src/utils_trust_region/README.rst +++ /dev/null @@ -1,5 +0,0 @@ -============ -trust_region -============ - -The documentation can be found in the org files. diff --git a/src/utils_trust_region/TANGLE_org_mode.sh b/src/utils_trust_region/TANGLE_org_mode.sh deleted file mode 100755 index 059cbe7d..00000000 --- a/src/utils_trust_region/TANGLE_org_mode.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -list='ls *.org' -for element in $list -do - emacs --batch $element -f org-babel-tangle -done diff --git a/src/utils_trust_region/algo_trust.irp.f b/src/utils_trust_region/algo_trust.irp.f deleted file mode 100644 index eac17275..00000000 --- a/src/utils_trust_region/algo_trust.irp.f +++ /dev/null @@ -1,248 +0,0 @@ -! Algorithm for the trust region - -! step_in_trust_region: -! Computes the step in the trust region (delta) -! (automatically sets at the iteration 0 and which evolves during the -! process in function of the evolution of rho). The step is computing by -! constraining its norm with a lagrange multiplier. -! Since the calculation of the step is based on the Newton method, an -! estimation of the gain in energy is given using the Taylors series -! truncated at the second order (criterion_model). -! If (DABS(criterion-criterion_model) < 1d-12) then -! must_exit = .True. -! else -! must_exit = .False. - -! This estimation of the gain in energy is used by -! is_step_cancel_trust_region to say if the step is accepted or cancelled. - -! If the step must be cancelled, the calculation restart from the same -! hessian and gradient and recomputes the step but in a smaller trust -! region and so on until the step is accepted. If the step is accepted -! the hessian and the gradient are recomputed to produce a new step. - -! Example: - - -! !### Initialization ### -! delta = 0d0 -! nb_iter = 0 ! Must start at 0 !!! -! rho = 0.5d0 -! not_converged = .True. -! -! ! ### TODO ### -! ! Compute the criterion before the loop -! call #your_criterion(prev_criterion) -! -! do while (not_converged) -! ! ### TODO ## -! ! Call your gradient -! ! Call you hessian -! call #your_gradient(v_grad) (1D array) -! call #your_hessian(H) (2D array) -! -! ! ### TODO ### -! ! Diagonalization of the hessian -! call diagonalization_hessian(n,H,e_val,w) -! -! cancel_step = .True. ! To enter in the loop just after -! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho -! do while (cancel_step) -! -! ! Hessian,gradient,Criterion -> x -! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) -! -! if (must_exit) then -! ! ### Message ### -! ! if step_in_trust_region sets must_exit on true for numerical reasons -! print*,'algo_trust1 sends the message : Exit' -! !### exit ### -! endif -! -! !### TODO ### -! ! Compute x -> m_x -! ! Compute m_x -> R -! ! Apply R and keep the previous MOs... -! ! Update/touch -! ! Compute the new criterion/energy -> criterion -! -! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) -! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) -! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) -! -! TOUCH #your_variables -! -! call #your_criterion(criterion) -! -! ! Criterion -> step accepted or rejected -! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) -! -! ! ### TODO ### -! !if (cancel_step) then -! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) -! !endif -! #if (cancel_step) then -! #mo_coef = prev_mos -! #endif -! -! enddo -! -! !call save_mos() !### depend of the time for 1 iteration -! -! ! To exit the external loop if must_exit = .True. -! if (must_exit) then -! !### exit ### -! endif -! -! ! Step accepted, nb iteration + 1 -! nb_iter = nb_iter + 1 -! -! ! ### TODO ### -! !if (###Conditions###) then -! ! no_converged = .False. -! !endif -! #if (#your_conditions) then -! # not_converged = .False. -! #endif -! -! enddo - - - -! Variables: - -! Input: -! | n | integer | m*(m-1)/2 | -! | m | integer | number of mo in the mo_class | -! | H(n,n) | double precision | Hessian | -! | v_grad(n) | double precision | Gradient | -! | W(n,n) | double precision | Eigenvectors of the hessian | -! | e_val(n) | double precision | Eigenvalues of the hessian | -! | criterion | double precision | Actual criterion | -! | prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | -! | rho | double precision | Given by is_step_cancel_trus_region | -! | | | Agreement between the real function and the Taylor series (2nd order) | -! | nb_iter | integer | Actual number of iterations | - -! Input/output: -! | delta | double precision | Radius of the trust region | - -! Output: -! | criterion_model | double precision | Predicted criterion after the rotation | -! | x(n) | double precision | Step | -! | must_exit | logical | If the program must exit the loop | - - -subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) - - include 'pi.h' - - BEGIN_DOC - ! Compute the step and the expected criterion/energy after the step - END_DOC - - implicit none - - ! in - integer, intent(in) :: n, nb_iter - double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) - double precision, intent(in) :: rho, prev_criterion - - ! inout - double precision, intent(inout) :: delta, e_val(n) - - ! out - double precision, intent(out) :: criterion_model, x(n) - logical, intent(out) :: must_exit - - ! internal - integer :: info - - must_exit = .False. - - call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) - - call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) - - ! exit if DABS(prev_criterion - criterion_model) < 1d-12 - if (DABS(prev_criterion - criterion_model) < thresh_model) then - print*,'' - print*,'###############################################################################' - print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' - print*,'###############################################################################' - print*,'' - must_exit = .True. - endif - - if (delta < thresh_delta) then - print*,'' - print*,'##############################################' - print*,'Delta <', thresh_delta, 'stop the trust region' - print*,'##############################################' - print*,'' - must_exit = .True. - endif - - ! Add after the call to this subroutine, a statement: - ! "if (must_exit) then - ! exit - ! endif" - ! in order to exit the optimization loop - -end subroutine - - - -! Variables: - -! Input: -! | nb_iter | integer | actual number of iterations | -! | prev_criterion | double precision | criterion before the application of the step x | -! | criterion | double precision | criterion after the application of the step x | -! | criterion_model | double precision | predicted criterion after the application of x | - -! Output: -! | rho | double precision | Agreement between the predicted criterion and the real new criterion | -! | cancel_step | logical | If the step must be cancelled | - - -subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) - - include 'pi.h' - - BEGIN_DOC - ! Compute if the step should be cancelled - END_DOC - - implicit none - - ! in - double precision, intent(in) :: prev_criterion, criterion, criterion_model - - ! inout - integer, intent(inout) :: nb_iter - - ! out - logical, intent(out) :: cancel_step - double precision, intent(out) :: rho - - ! Computes rho - call trust_region_rho(prev_criterion,criterion,criterion_model,rho) - - if (nb_iter == 0) then - nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled - endif - - ! If rho < thresh_rho -> give something in output to cancel the step - if (rho >= thresh_rho) then !0.1d0) then - ! The step is accepted - cancel_step = .False. - else - ! The step is rejected - cancel_step = .True. - print*, '***********************' - print*, 'Step cancel : rho <', thresh_rho - print*, '***********************' - endif - -end subroutine diff --git a/src/utils_trust_region/algo_trust.org b/src/utils_trust_region/algo_trust.org deleted file mode 100644 index aa836f98..00000000 --- a/src/utils_trust_region/algo_trust.org +++ /dev/null @@ -1,593 +0,0 @@ -* Algorithm for the trust region - -step_in_trust_region: -Computes the step in the trust region (delta) -(automatically sets at the iteration 0 and which evolves during the -process in function of the evolution of rho). The step is computing by -constraining its norm with a lagrange multiplier. -Since the calculation of the step is based on the Newton method, an -estimation of the gain in energy is given using the Taylors series -truncated at the second order (criterion_model). -If (DABS(criterion-criterion_model) < 1d-12) then - must_exit = .True. -else - must_exit = .False. - -This estimation of the gain in energy is used by -is_step_cancel_trust_region to say if the step is accepted or cancelled. - -If the step must be cancelled, the calculation restart from the same -hessian and gradient and recomputes the step but in a smaller trust -region and so on until the step is accepted. If the step is accepted -the hessian and the gradient are recomputed to produce a new step. - -Example: - -#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f -! !### Initialization ### -! delta = 0d0 -! nb_iter = 0 ! Must start at 0 !!! -! rho = 0.5d0 -! not_converged = .True. -! -! ! ### TODO ### -! ! Compute the criterion before the loop -! call #your_criterion(prev_criterion) -! -! do while (not_converged) -! ! ### TODO ## -! ! Call your gradient -! ! Call you hessian -! call #your_gradient(v_grad) (1D array) -! call #your_hessian(H) (2D array) -! -! ! ### TODO ### -! ! Diagonalization of the hessian -! call diagonalization_hessian(n,H,e_val,w) -! -! cancel_step = .True. ! To enter in the loop just after -! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho -! do while (cancel_step) -! -! ! Hessian,gradient,Criterion -> x -! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) -! -! if (must_exit) then -! ! ### Message ### -! ! if step_in_trust_region sets must_exit on true for numerical reasons -! print*,'algo_trust1 sends the message : Exit' -! !### exit ### -! endif -! -! !### TODO ### -! ! Compute x -> m_x -! ! Compute m_x -> R -! ! Apply R and keep the previous MOs... -! ! Update/touch -! ! Compute the new criterion/energy -> criterion -! -! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) -! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) -! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) -! -! TOUCH #your_variables -! -! call #your_criterion(criterion) -! -! ! Criterion -> step accepted or rejected -! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) -! -! ! ### TODO ### -! !if (cancel_step) then -! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) -! !endif -! #if (cancel_step) then -! #mo_coef = prev_mos -! #endif -! -! enddo -! -! !call save_mos() !### depend of the time for 1 iteration -! -! ! To exit the external loop if must_exit = .True. -! if (must_exit) then -! !### exit ### -! endif -! -! ! Step accepted, nb iteration + 1 -! nb_iter = nb_iter + 1 -! -! ! ### TODO ### -! !if (###Conditions###) then -! ! no_converged = .False. -! !endif -! #if (#your_conditions) then -! # not_converged = .False. -! #endif -! -! enddo -#+END_SRC - -Variables: - -Input: -| n | integer | m*(m-1)/2 | -| m | integer | number of mo in the mo_class | -| H(n,n) | double precision | Hessian | -| v_grad(n) | double precision | Gradient | -| W(n,n) | double precision | Eigenvectors of the hessian | -| e_val(n) | double precision | Eigenvalues of the hessian | -| criterion | double precision | Actual criterion | -| prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | -| rho | double precision | Given by is_step_cancel_trus_region | -| | | Agreement between the real function and the Taylor series (2nd order) | -| nb_iter | integer | Actual number of iterations | - -Input/output: -| delta | double precision | Radius of the trust region | - -Output: -| criterion_model | double precision | Predicted criterion after the rotation | -| x(n) | double precision | Step | -| must_exit | logical | If the program must exit the loop | - -#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f -subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) - - include 'pi.h' - - BEGIN_DOC - ! Compute the step and the expected criterion/energy after the step - END_DOC - - implicit none - - ! in - integer, intent(in) :: n, nb_iter - double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) - double precision, intent(in) :: rho, prev_criterion - - ! inout - double precision, intent(inout) :: delta, e_val(n) - - ! out - double precision, intent(out) :: criterion_model, x(n) - logical, intent(out) :: must_exit - - ! internal - integer :: info - - must_exit = .False. - - call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) - - call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) - - ! exit if DABS(prev_criterion - criterion_model) < 1d-12 - if (DABS(prev_criterion - criterion_model) < thresh_model) then - print*,'' - print*,'###############################################################################' - print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' - print*,'###############################################################################' - print*,'' - must_exit = .True. - endif - - if (delta < thresh_delta) then - print*,'' - print*,'##############################################' - print*,'Delta <', thresh_delta, 'stop the trust region' - print*,'##############################################' - print*,'' - must_exit = .True. - endif - - ! Add after the call to this subroutine, a statement: - ! "if (must_exit) then - ! exit - ! endif" - ! in order to exit the optimization loop - -end subroutine -#+END_SRC - -Variables: - -Input: -| nb_iter | integer | actual number of iterations | -| prev_criterion | double precision | criterion before the application of the step x | -| criterion | double precision | criterion after the application of the step x | -| criterion_model | double precision | predicted criterion after the application of x | - -Output: -| rho | double precision | Agreement between the predicted criterion and the real new criterion | -| cancel_step | logical | If the step must be cancelled | - -#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f -subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) - - include 'pi.h' - - BEGIN_DOC - ! Compute if the step should be cancelled - END_DOC - - implicit none - - ! in - double precision, intent(in) :: prev_criterion, criterion, criterion_model - - ! inout - integer, intent(inout) :: nb_iter - - ! out - logical, intent(out) :: cancel_step - double precision, intent(out) :: rho - - ! Computes rho - call trust_region_rho(prev_criterion,criterion,criterion_model,rho) - - if (nb_iter == 0) then - nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled - endif - - ! If rho < thresh_rho -> give something in output to cancel the step - if (rho >= thresh_rho) then !0.1d0) then - ! The step is accepted - cancel_step = .False. - else - ! The step is rejected - cancel_step = .True. - print*, '***********************' - print*, 'Step cancel : rho <', thresh_rho - print*, '***********************' - endif - -end subroutine -#+END_SRC - -** Template for MOs -#+BEGIN_SRC f90 :comments org :tangle trust_region_template_mos.txt -subroutine algo_trust_template(tmp_n, tmp_list_size, tmp_list) - - implicit none - - ! Variables - - ! In - integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) - - ! Out - ! Rien ou un truc pour savoir si ça c'est bien passé - - ! Internal - double precision, allocatable :: e_val(:), W(:,:), tmp_R(:,:), R(:,:), tmp_x(:), tmp_m_x(:,:) - double precision, allocatable :: prev_mos(:,:) - double precision :: criterion, prev_criterion, criterion_model - double precision :: delta, rho - logical :: not_converged, cancel_step, must_exit, enforce_step_cancellation - integer :: nb_iter, info, nb_sub_iter - integer :: i,j,tmp_i,tmp_j - - allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n),tmp_m_x(tmp_list_size, tmp_list_size)) - allocate(tmp_R(tmp_list_size, tmp_list_size), R(mo_num, mo_num)) - allocate(prev_mos(ao_num, mo_num)) - - ! Provide the criterion, but unnecessary because it's done - ! automatically - PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER - - ! Initialization - delta = 0d0 - nb_iter = 0 ! Must start at 0 !!! - rho = 0.5d0 ! Must start at 0.5 - not_converged = .True. ! Must be true - - ! Compute the criterion before the loop - prev_criterion = C_PROVIDER - - do while (not_converged) - - print*,'' - print*,'******************' - print*,'Iteration', nb_iter - print*,'******************' - print*,'' - - ! The new hessian and gradient are computed at the end of the previous iteration - ! Diagonalization of the hessian - call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) - - cancel_step = .True. ! To enter in the loop just after - nb_sub_iter = 0 - - ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho - do while (cancel_step) - - print*,'-----------------------------' - print*,'Iteration:', nb_iter - print*,'Sub iteration:', nb_sub_iter - print*,'-----------------------------' - - ! Hessian,gradient,Criterion -> x - call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & - prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) - - if (must_exit) then - ! if step_in_trust_region sets must_exit on true for numerical reasons - print*,'trust_region_step_w_expected_e sent the message : Exit' - exit - endif - - ! 1D tmp -> 2D tmp - call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) - - ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) - call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, info, enforce_step_cancellation) - - if (enforce_step_cancellation) then - print*, 'Forces the step cancellation, too large error in the rotation matrix' - rho = 0d0 - cycle - endif - - ! tmp_R to R, subspace to full space - call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) - - ! Rotation of the MOs - call apply_mo_rotation(R, prev_mos) - - ! touch mo_coef - call clear_mo_map ! Only if you are using the bi-electronic integrals - ! mo_coef becomes valid - ! And avoid the recomputation of the providers which depend of mo_coef - TOUCH mo_coef C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER - - ! To update the other parameters if needed - call #update_parameters() - - ! To enforce the program to provide new criterion after the update - ! of the parameters - FREE C_PROVIDER - PROVIDE C_PROVIDER - criterion = C_PROVIDER - - ! Criterion -> step accepted or rejected - call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) - - ! Cancellation of the step ? - if (cancel_step) then - ! Replacement by the previous MOs - mo_coef = prev_mos - ! call save_mos() ! depends of the time for 1 iteration - - ! No need to clear_mo_map since we don't recompute the gradient and the hessian - ! mo_coef becomes valid - ! Avoid the recomputation of the providers which depend of mo_coef - TOUCH mo_coef H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER - else - ! The step is accepted: - ! criterion -> prev criterion - - ! The replacement "criterion -> prev criterion" is already done - ! in trust_region_rho, so if the criterion does not have a reason - ! to change, it will change nothing for the criterion and will - ! force the program to provide the new hessian, gradient and - ! convergence criterion for the next iteration. - ! But in the case of orbital optimization we diagonalize the CI - ! matrix after the "FREE" statement, so the criterion will change - - FREE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER - PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER - prev_criterion = C_PROVIDER - - endif - - nb_sub_iter = nb_sub_iter + 1 - enddo - - ! call save_mos() ! depends of the time for 1 iteration - - ! To exit the external loop if must_exit = .True. - if (must_exit) then - exit - endif - - ! Step accepted, nb iteration + 1 - nb_iter = nb_iter + 1 - - ! Provide the convergence criterion - ! Provide the gradient and the hessian for the next iteration - PROVIDE cc_PROVIDER - - ! To exit - if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then - not_converged = .False. - endif - - if (nb_iter > optimization_max_nb_iter) then - not_converged = .False. - endif - - if (delta < thresh_delta) then - not_converged = .False. - endif - - enddo - - ! Save the final MOs - call save_mos() - - ! Diagonalization of the hessian - ! (To see the eigenvalues at the end of the optimization) - call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) - - deallocate(e_val, W, tmp_R, R, tmp_x, prev_mos) - -end -#+END_SRC - -** Cartesian version -#+BEGIN_SRC f90 :comments org :tangle trust_region_template_xyz.txt -subroutine algo_trust_cartesian_template(tmp_n) - - implicit none - - ! Variables - - ! In - integer, intent(in) :: tmp_n - - ! Out - ! Rien ou un truc pour savoir si ça c'est bien passé - - ! Internal - double precision, allocatable :: e_val(:), W(:,:), tmp_x(:) - double precision :: criterion, prev_criterion, criterion_model - double precision :: delta, rho - logical :: not_converged, cancel_step, must_exit - integer :: nb_iter, nb_sub_iter - integer :: i,j - - allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n)) - - PROVIDE C_PROVIDER X_PROVIDER H_PROVIDER g_PROVIDER - - ! Initialization - delta = 0d0 - nb_iter = 0 ! Must start at 0 !!! - rho = 0.5d0 ! Must start at 0.5 - not_converged = .True. ! Must be true - - ! Compute the criterion before the loop - prev_criterion = C_PROVIDER - - do while (not_converged) - - print*,'' - print*,'******************' - print*,'Iteration', nb_iter - print*,'******************' - print*,'' - - if (nb_iter > 0) then - PROVIDE H_PROVIDER g_PROVIDER - endif - - ! Diagonalization of the hessian - call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) - - cancel_step = .True. ! To enter in the loop just after - nb_sub_iter = 0 - - ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho - do while (cancel_step) - - print*,'-----------------------------' - print*,'Iteration:', nb_iter - print*,'Sub iteration:', nb_sub_iter - print*,'-----------------------------' - - ! Hessian,gradient,Criterion -> x - call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & - prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) - - if (must_exit) then - ! if step_in_trust_region sets must_exit on true for numerical reasons - print*,'trust_region_step_w_expected_e sent the message : Exit' - exit - endif - - ! New coordinates, check the sign - X_PROVIDER = X_PROVIDER - tmp_x - - ! touch X_PROVIDER - TOUCH X_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER - - ! To update the other parameters if needed - call #update_parameters() - - ! New criterion - PROVIDE C_PROVIDER ! Unnecessary - criterion = C_PROVIDER - - ! Criterion -> step accepted or rejected - call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) - - ! Cancel the previous step - if (cancel_step) then - ! Replacement by the previous coordinates, check the sign - X_PROVIDER = X_PROVIDER + tmp_x - - ! Avoid the recomputation of the hessian and the gradient - TOUCH X_PROVIDER H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER - endif - - nb_sub_iter = nb_sub_iter + 1 - enddo - - ! To exit the external loop if must_exit = .True. - if (must_exit) then - exit - endif - - ! Step accepted, nb iteration + 1 - nb_iter = nb_iter + 1 - - PROVIDE cc_PROVIDER - - ! To exit - if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then - not_converged = .False. - endif - - if (nb_iter > optimization_max_nb_iter) then - not_converged = .False. - endif - - if (delta < thresh_delta) then - not_converged = .False. - endif - - enddo - - deallocate(e_val, W, tmp_x) - -end -#+END_SRC - -** Script template -#+BEGIN_SRC bash :tangle script_template_mos.sh -#!/bin/bash - -your_file= - -your_C_PROVIDER= -your_H_PROVIDER= -your_g_PROVIDER= -your_cc_PROVIDER= - -sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_mos.txt > $your_file -sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file -sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file -sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file -#+END_SRC - -#+BEGIN_SRC bash :tangle script_template_xyz.sh -#!/bin/bash - -your_file= - -your_C_PROVIDER= -your_X_PROVIDER= -your_H_PROVIDER= -your_g_PROVIDER= -your_cc_PROVIDER= - -sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_xyz.txt > $your_file -sed -i "s/X_PROVIDER/$your_X_PROVIDER/g" $your_file -sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file -sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file -sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file -#+END_SRC - diff --git a/src/utils_trust_region/apply_mo_rotation.irp.f b/src/utils_trust_region/apply_mo_rotation.irp.f deleted file mode 100644 index e274ec11..00000000 --- a/src/utils_trust_region/apply_mo_rotation.irp.f +++ /dev/null @@ -1,85 +0,0 @@ -! Apply MO rotation -! Subroutine to apply the rotation matrix to the coefficients of the -! MOs. - -! New MOs = Old MOs . Rotation matrix - -! *Compute the new MOs with the previous MOs and a rotation matrix* - -! Provided: -! | mo_num | integer | number of MOs | -! | ao_num | integer | number of AOs | -! | mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | - -! Intent in: -! | R(mo_num,mo_num) | double precision | rotation matrix | - -! Intent out: -! | prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | - -! Internal: -! | new_mos(ao_num,mo_num) | double precision | MOs after the rotation | -! | i,j | integer | indexes | - -subroutine apply_mo_rotation(R,prev_mos) - - include 'pi.h' - - BEGIN_DOC - ! Compute the new MOs knowing the rotation matrix - END_DOC - - implicit none - - ! Variables - - ! in - double precision, intent(in) :: R(mo_num,mo_num) - - ! out - double precision, intent(out) :: prev_mos(ao_num,mo_num) - - ! internal - double precision, allocatable :: new_mos(:,:) - integer :: i,j - double precision :: t1,t2,t3 - - print*,'' - print*,'---apply_mo_rotation---' - - call wall_time(t1) - - ! Allocation - allocate(new_mos(ao_num,mo_num)) - - ! Calculation - - ! Product of old MOs (mo_coef) by Rotation matrix (R) - call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) - - prev_mos = mo_coef - mo_coef = new_mos - - !if (debug) then - ! print*,'New mo_coef : ' - ! do i = 1, mo_num - ! write(*,'(100(F10.5))') mo_coef(i,:) - ! enddo - !endif - - ! Save the new MOs and change the label - mo_label = 'MCSCF' - !call save_mos - call ezfio_set_determinants_mo_label(mo_label) - - !print*,'Done, MOs saved' - - ! Deallocation, end - deallocate(new_mos) - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in apply mo rotation:', t3 - print*,'---End apply_mo_rotation---' - -end subroutine diff --git a/src/utils_trust_region/apply_mo_rotation.org b/src/utils_trust_region/apply_mo_rotation.org deleted file mode 100644 index 918581b7..00000000 --- a/src/utils_trust_region/apply_mo_rotation.org +++ /dev/null @@ -1,86 +0,0 @@ -* Apply MO rotation -Subroutine to apply the rotation matrix to the coefficients of the -MOs. - -New MOs = Old MOs . Rotation matrix - -*Compute the new MOs with the previous MOs and a rotation matrix* - -Provided: -| mo_num | integer | number of MOs | -| ao_num | integer | number of AOs | -| mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | - -Intent in: -| R(mo_num,mo_num) | double precision | rotation matrix | - -Intent out: -| prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | - -Internal: -| new_mos(ao_num,mo_num) | double precision | MOs after the rotation | -| i,j | integer | indexes | -#+BEGIN_SRC f90 :comments org :tangle apply_mo_rotation.irp.f -subroutine apply_mo_rotation(R,prev_mos) - - include 'pi.h' - - BEGIN_DOC - ! Compute the new MOs knowing the rotation matrix - END_DOC - - implicit none - - ! Variables - - ! in - double precision, intent(in) :: R(mo_num,mo_num) - - ! out - double precision, intent(out) :: prev_mos(ao_num,mo_num) - - ! internal - double precision, allocatable :: new_mos(:,:) - integer :: i,j - double precision :: t1,t2,t3 - - print*,'' - print*,'---apply_mo_rotation---' - - call wall_time(t1) - - ! Allocation - allocate(new_mos(ao_num,mo_num)) - - ! Calculation - - ! Product of old MOs (mo_coef) by Rotation matrix (R) - call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) - - prev_mos = mo_coef - mo_coef = new_mos - - !if (debug) then - ! print*,'New mo_coef : ' - ! do i = 1, mo_num - ! write(*,'(100(F10.5))') mo_coef(i,:) - ! enddo - !endif - - ! Save the new MOs and change the label - mo_label = 'MCSCF' - !call save_mos - call ezfio_set_determinants_mo_label(mo_label) - - !print*,'Done, MOs saved' - - ! Deallocation, end - deallocate(new_mos) - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in apply mo rotation:', t3 - print*,'---End apply_mo_rotation---' - -end subroutine -#+END_SRC diff --git a/src/utils_trust_region/mat_to_vec_index.irp.f b/src/utils_trust_region/mat_to_vec_index.irp.f deleted file mode 100644 index 35e12232..00000000 --- a/src/utils_trust_region/mat_to_vec_index.irp.f +++ /dev/null @@ -1,61 +0,0 @@ -! Matrix to vector index - -! *Compute the index i of a vector element from the indexes p,q of a -! matrix element* - -! Lower diagonal matrix (p,q), p > q -> vector (i) - -! If a matrix is antisymmetric it can be reshaped as a vector. And the -! vector can be reshaped as an antisymmetric matrix - -! \begin{align*} -! \begin{pmatrix} -! 0 & -1 & -2 & -4 \\ -! 1 & 0 & -3 & -5 \\ -! 2 & 3 & 0 & -6 \\ -! 4 & 5 & 6 & 0 -! \end{pmatrix} -! \Leftrightarrow -! \begin{pmatrix} -! 1 & 2 & 3 & 4 & 5 & 6 -! \end{pmatrix} -! \end{align*} - -! !!! Here the algorithm only work for the lower diagonal !!! - -! Input: -! | p,q | integer | indexes of a matrix element in the lower diagonal | -! | | | p > q, q -> column | -! | | | p -> row, | -! | | | q -> column | - -! Input: -! | i | integer | corresponding index in the vector | - - -subroutine mat_to_vec_index(p,q,i) - - include 'pi.h' - - implicit none - - ! Variables - - ! in - integer, intent(in) :: p,q - - ! out - integer, intent(out) :: i - - ! internal - integer :: a,b - double precision :: da - - ! Calculation - - a = p-1 - b = a*(a-1)/2 - - i = q+b - -end subroutine diff --git a/src/utils_trust_region/mat_to_vec_index.org b/src/utils_trust_region/mat_to_vec_index.org deleted file mode 100644 index 50840584..00000000 --- a/src/utils_trust_region/mat_to_vec_index.org +++ /dev/null @@ -1,63 +0,0 @@ -* Matrix to vector index - -*Compute the index i of a vector element from the indexes p,q of a -matrix element* - -Lower diagonal matrix (p,q), p > q -> vector (i) - -If a matrix is antisymmetric it can be reshaped as a vector. And the -vector can be reshaped as an antisymmetric matrix - -\begin{align*} -\begin{pmatrix} -0 & -1 & -2 & -4 \\ -1 & 0 & -3 & -5 \\ -2 & 3 & 0 & -6 \\ -4 & 5 & 6 & 0 -\end{pmatrix} -\Leftrightarrow -\begin{pmatrix} -1 & 2 & 3 & 4 & 5 & 6 -\end{pmatrix} -\end{align*} - -!!! Here the algorithm only work for the lower diagonal !!! - -Input: -| p,q | integer | indexes of a matrix element in the lower diagonal | -| | | p > q, q -> column | -| | | p -> row, | -| | | q -> column | - -Input: -| i | integer | corresponding index in the vector | - -#+BEGIN_SRC f90 :comments org :tangle mat_to_vec_index.irp.f -subroutine mat_to_vec_index(p,q,i) - - include 'pi.h' - - implicit none - - ! Variables - - ! in - integer, intent(in) :: p,q - - ! out - integer, intent(out) :: i - - ! internal - integer :: a,b - double precision :: da - - ! Calculation - - a = p-1 - b = a*(a-1)/2 - - i = q+b - -end subroutine -#+END_SRC - diff --git a/src/utils_trust_region/pi.h b/src/utils_trust_region/pi.h deleted file mode 100644 index bbfabfec..00000000 --- a/src/utils_trust_region/pi.h +++ /dev/null @@ -1,2 +0,0 @@ - !logical, parameter :: debug=.False. - double precision, parameter :: pi = 3.1415926535897932d0 diff --git a/src/utils_trust_region/rotation_matrix.irp.f b/src/utils_trust_region/rotation_matrix.irp.f deleted file mode 100644 index 4738fd67..00000000 --- a/src/utils_trust_region/rotation_matrix.irp.f +++ /dev/null @@ -1,443 +0,0 @@ -! Rotation matrix - -! *Build a rotation matrix from an antisymmetric matrix* - -! Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : -! $$ -! \textbf{R}=\exp(\textbf{A}) -! $$ - -! So : -! \begin{align*} -! \textbf{R}=& \exp(\textbf{A}) \\ -! =& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ -! =& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} -! \end{align*} - -! With : -! $\textbf{W}$ : eigenvectors of $\textbf{A}^2$ -! $\tau$ : $\sqrt{-x}$ -! $x$ : eigenvalues of $\textbf{A}^2$ - -! Input: -! | A(n,n) | double precision | antisymmetric matrix | -! | n | integer | number of columns of the A matrix | -! | LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | -! | LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | - -! Output: -! | R(n,n) | double precision | Rotation matrix | -! | info | integer | if info = 0, the execution is successful | -! | | | if info = k, the k-th parameter has an illegal value | -! | | | if info = -k, the algorithm failed | - -! Internal: -! | B(n,n) | double precision | B = A.A | -! | work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | -! | lwork | integer | dimension of the syev work array >= max(1, 3n-1) | -! | W(n,n) | double precision | eigenvectors of B | -! | e_val(n) | double precision | eigenvalues of B | -! | m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | -! | cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | -! | sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | -! | tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | -! | part_1(n,n) | double precision | matrix W.cos_tau.W^t | -! | part_1a(n,n) | double precision | matrix cos_tau.W^t | -! | part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | -! | part_2a(n,n) | double precision | matrix W^t.A | -! | part_2b(n,n) | double precision | matrix sin_tau.W^t.A | -! | part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | -! | RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | -! | norm | integer | norm of R.R^t-1, must be equal to 0 | -! | i,j | integer | indexes | - -! Functions: -! | dnrm2 | double precision | Lapack function, compute the norm of a matrix | -! | disnan | logical | Lapack function, check if an element is NaN | - - - -subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) - - implicit none - - BEGIN_DOC - ! Rotation matrix to rotate the molecular orbitals. - ! If the rotation is too large the transformation is not unitary and must be cancelled. - END_DOC - - include 'pi.h' - - ! Variables - - ! in - integer, intent(in) :: n,LDA,LDR - double precision, intent(inout) :: A(LDA,n) - - ! out - double precision, intent(out) :: R(LDR,n) - integer, intent(out) :: info - logical, intent(out) :: enforce_step_cancellation - - ! internal - double precision, allocatable :: B(:,:) - double precision, allocatable :: work(:,:) - double precision, allocatable :: W(:,:), e_val(:) - double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) - double precision, allocatable :: part_1(:,:),part_1a(:,:) - double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) - double precision, allocatable :: RR_t(:,:) - integer :: i,j - integer :: info2, lwork ! for dsyev - double precision :: norm, max_elem, max_elem_A, t1,t2,t3 - - ! function - double precision :: dnrm2 - logical :: disnan - - print*,'' - print*,'---rotation_matrix---' - - call wall_time(t1) - - ! Allocation - allocate(B(n,n)) - allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) - allocate(W(n,n),e_val(n)) - allocate(part_1(n,n),part_1a(n,n)) - allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) - allocate(RR_t(n,n)) - -! Pre-conditions - -! Initialization -info=0 -enforce_step_cancellation = .False. - -! Size of matrix A must be at least 1 by 1 -if (n<1) then - info = 3 - print*, 'WARNING: invalid parameter 5' - print*, 'n<1' - return -endif - -! Leading dimension of A must be >= n -if (LDA < n) then - info = 25 - print*, 'WARNING: invalid parameter 2 or 5' - print*, 'LDA < n' - return -endif - -! Leading dimension of A must be >= n -if (LDR < n) then - info = 4 - print*, 'WARNING: invalid parameter 4' - print*, 'LDR < n' - return -endif - -! Matrix elements of A must by non-NaN -do j = 1, n - do i = 1, n - if (disnan(A(i,j))) then - info=1 - print*, 'WARNING: invalid parameter 1' - print*, 'NaN element in A matrix' - return - endif - enddo -enddo - -do i = 1, n - if (A(i,i) /= 0d0) then - print*, 'WARNING: matrix A is not antisymmetric' - print*, 'Non 0 element on the diagonal', i, A(i,i) - call ABORT - endif -enddo - -do j = 1, n - do i = 1, n - if (A(i,j)+A(j,i)>1d-16) then - print*, 'WANRING: matrix A is not antisymmetric' - print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) - print*, 'diff:', A(i,j)+A(j,i) - call ABORT - endif - enddo -enddo - -! Fix for too big elements ! bad idea better to cancel if the error is too big -!do j = 1, n -! do i = 1, n -! A(i,j) = mod(A(i,j),2d0*pi) -! if (dabs(A(i,j)) > pi) then -! A(i,j) = 0d0 -! endif -! enddo -!enddo - -max_elem_A = 0d0 -do j = 1, n - do i = 1, n - if (ABS(A(i,j)) > ABS(max_elem_A)) then - max_elem_A = A(i,j) - endif - enddo -enddo -print*,'max element in A', max_elem_A - -if (ABS(max_elem_A) > 2 * pi) then - print*,'' - print*,'WARNING: ABS(max_elem_A) > 2 pi ' - print*,'' -endif - -! B=A.A -! - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ -! - Diagonalization of $\textbf{B}$ -! W, the eigenvectors -! e_val, the eigenvalues - - -! Compute B=A.A - -call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) - -! Copy B in W, diagonalization will put the eigenvectors in W -W=B - -! Diagonalization of B -! Eigenvalues -> e_val -! Eigenvectors -> W -lwork = 3*n-1 -allocate(work(lwork,n)) - -print*,'Starting diagonalization ...' - -call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) - -deallocate(work) - -if (info2 == 0) then - print*, 'Diagonalization : Done' -elseif (info2 < 0) then - print*, 'WARNING: error in the diagonalization' - print*, 'Illegal value of the ', info2,'-th parameter' -else - print*, "WARNING: Diagonalization failed to converge" -endif - -! Tau^-1, cos(tau), sin(tau) -! $$\tau = \sqrt{-x}$$ -! - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ -! - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ -! - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ -! These matrices are diagonals - -! Diagonal matrix m_diag -do j = 1, n - if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems - e_val(j) = 0.d0 - else - e_val(j) = - e_val(j) - endif -enddo - -m_diag = 0.d0 -do i = 1, n - m_diag(i,i) = e_val(i) -enddo - -! cos_tau -do j = 1, n - do i = 1, n - if (i==j) then - cos_tau(i,j) = dcos(dsqrt(e_val(i))) - else - cos_tau(i,j) = 0d0 - endif - enddo -enddo - -! sin_tau -do j = 1, n - do i = 1, n - if (i==j) then - sin_tau(i,j) = dsin(dsqrt(e_val(i))) - else - sin_tau(i,j) = 0d0 - endif - enddo -enddo - -! Debug, display the cos_tau and sin_tau matrix -!if (debug) then -! print*, 'cos_tau' -! do i = 1, n -! print*, cos_tau(i,:) -! enddo -! print*, 'sin_tau' -! do i = 1, n -! print*, sin_tau(i,:) -! enddo -!endif - -! tau^-1 -do j = 1, n - do i = 1, n - if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small - tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) - else - tau_m1(i,j) = 0d0 - endif - enddo -enddo - -max_elem = 0d0 -do i = 1, n - if (ABS(tau_m1(i,i)) > ABS(max_elem)) then - max_elem = tau_m1(i,i) - endif -enddo -print*,'max elem tau^-1:', max_elem - -! Debug -!print*,'eigenvalues:' -!do i = 1, n -! print*, e_val(i) -!enddo - -!Debug, display tau^-1 -!if (debug) then -! print*, 'tau^-1' -! do i = 1, n -! print*,tau_m1(i,:) -! enddo -!endif - -! Rotation matrix -! \begin{align*} -! \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} -! \end{align*} -! \begin{align*} -! \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} -! \end{align*} -! \begin{align*} -! \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} -! \end{align*} - -! First: -! part_1 = dgemm(W, dgemm(cos_tau, W^t)) -! part_1a = dgemm(cos_tau, W^t) -! part_1 = dgemm(W, part_1a) -! And: -! part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) -! part_2a = dgemm(W^t, A) -! part_2b = dgemm(sin_tau, part_2a) -! part_2c = dgemm(tau_m1, part_2b) -! part_2 = dgemm(W, part_2c) -! Finally: -! Rotation matrix, R = part_1+part_2 - -! If $R$ is a rotation matrix: -! $R.R^T=R^T.R=\textbf{1}$ - -! part_1 -call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) -call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) - -! part_2 -call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) -call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) -call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) -call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) - -! Rotation matrix R -R = part_1 + part_2 - -! Matrix check -! R.R^t and R^t.R must be equal to identity matrix -do j = 1, n - do i=1,n - if (i==j) then - RR_t(i,j) = 1d0 - else - RR_t(i,j) = 0d0 - endif - enddo -enddo - -call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) - -norm = dnrm2(n*n,RR_t,1) -print*, 'Rotation matrix check, norm R.R^T = ', norm - -! Debug -!if (debug) then -! print*, 'RR_t' -! do i = 1, n -! print*, RR_t(i,:) -! enddo -!endif - -! Post conditions - -! Check if R.R^T=1 -max_elem = 0d0 -do j = 1, n - do i = 1, n - if (ABS(RR_t(i,j)) > ABS(max_elem)) then - max_elem = RR_t(i,j) - endif - enddo -enddo - -print*, 'Max error in R.R^T:', max_elem -print*, 'e_val(1):', e_val(1) -print*, 'e_val(n):', e_val(n) -print*, 'max elem in A:', max_elem_A - -if (ABS(max_elem) > 1d-12) then - print*, 'WARNING: max error in R.R^T > 1d-12' - print*, 'Enforce the step cancellation' - enforce_step_cancellation = .True. -endif - -! Matrix elements of R must by non-NaN -do j = 1,n - do i = 1,LDR - if (disnan(R(i,j))) then - info = 666 - print*, 'NaN in rotation matrix' - call ABORT - endif - enddo -enddo - -! Display -!if (debug) then -! print*,'Rotation matrix :' -! do i = 1, n -! write(*,'(100(F10.5))') R(i,:) -! enddo -!endif - -! Deallocation, end - -deallocate(B) - deallocate(m_diag,cos_tau,sin_tau,tau_m1) - deallocate(W,e_val) - deallocate(part_1,part_1a) - deallocate(part_2,part_2a,part_2b,part_2c) - deallocate(RR_t) - - call wall_time(t2) - t3 = t2-t1 - print*,'Time in rotation matrix:', t3 - - print*,'---End rotation_matrix---' - -end subroutine diff --git a/src/utils_trust_region/rotation_matrix.org b/src/utils_trust_region/rotation_matrix.org deleted file mode 100644 index 73ba0298..00000000 --- a/src/utils_trust_region/rotation_matrix.org +++ /dev/null @@ -1,454 +0,0 @@ -* Rotation matrix - -*Build a rotation matrix from an antisymmetric matrix* - -Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : -$$ -\textbf{R}=\exp(\textbf{A}) -$$ - -So : -\begin{align*} -\textbf{R}=& \exp(\textbf{A}) \\ -=& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ -=& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} -\end{align*} - -With : -$\textbf{W}$ : eigenvectors of $\textbf{A}^2$ -$\tau$ : $\sqrt{-x}$ -$x$ : eigenvalues of $\textbf{A}^2$ - -Input: -| A(n,n) | double precision | antisymmetric matrix | -| n | integer | number of columns of the A matrix | -| LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | -| LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | - -Output: -| R(n,n) | double precision | Rotation matrix | -| info | integer | if info = 0, the execution is successful | -| | | if info = k, the k-th parameter has an illegal value | -| | | if info = -k, the algorithm failed | - -Internal: -| B(n,n) | double precision | B = A.A | -| work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | -| lwork | integer | dimension of the syev work array >= max(1, 3n-1) | -| W(n,n) | double precision | eigenvectors of B | -| e_val(n) | double precision | eigenvalues of B | -| m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | -| cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | -| sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | -| tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | -| part_1(n,n) | double precision | matrix W.cos_tau.W^t | -| part_1a(n,n) | double precision | matrix cos_tau.W^t | -| part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | -| part_2a(n,n) | double precision | matrix W^t.A | -| part_2b(n,n) | double precision | matrix sin_tau.W^t.A | -| part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | -| RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | -| norm | integer | norm of R.R^t-1, must be equal to 0 | -| i,j | integer | indexes | - -Functions: -| dnrm2 | double precision | Lapack function, compute the norm of a matrix | -| disnan | logical | Lapack function, check if an element is NaN | - - -#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f -subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) - - implicit none - - BEGIN_DOC - ! Rotation matrix to rotate the molecular orbitals. - ! If the rotation is too large the transformation is not unitary and must be cancelled. - END_DOC - - include 'pi.h' - - ! Variables - - ! in - integer, intent(in) :: n,LDA,LDR - double precision, intent(inout) :: A(LDA,n) - - ! out - double precision, intent(out) :: R(LDR,n) - integer, intent(out) :: info - logical, intent(out) :: enforce_step_cancellation - - ! internal - double precision, allocatable :: B(:,:) - double precision, allocatable :: work(:,:) - double precision, allocatable :: W(:,:), e_val(:) - double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) - double precision, allocatable :: part_1(:,:),part_1a(:,:) - double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) - double precision, allocatable :: RR_t(:,:) - integer :: i,j - integer :: info2, lwork ! for dsyev - double precision :: norm, max_elem, max_elem_A, t1,t2,t3 - - ! function - double precision :: dnrm2 - logical :: disnan - - print*,'' - print*,'---rotation_matrix---' - - call wall_time(t1) - - ! Allocation - allocate(B(n,n)) - allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) - allocate(W(n,n),e_val(n)) - allocate(part_1(n,n),part_1a(n,n)) - allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) - allocate(RR_t(n,n)) -#+END_SRC - -** Pre-conditions -#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - ! Initialization - info=0 - enforce_step_cancellation = .False. - - ! Size of matrix A must be at least 1 by 1 - if (n<1) then - info = 3 - print*, 'WARNING: invalid parameter 5' - print*, 'n<1' - return - endif - - ! Leading dimension of A must be >= n - if (LDA < n) then - info = 25 - print*, 'WARNING: invalid parameter 2 or 5' - print*, 'LDA < n' - return - endif - - ! Leading dimension of A must be >= n - if (LDR < n) then - info = 4 - print*, 'WARNING: invalid parameter 4' - print*, 'LDR < n' - return - endif - - ! Matrix elements of A must by non-NaN - do j = 1, n - do i = 1, n - if (disnan(A(i,j))) then - info=1 - print*, 'WARNING: invalid parameter 1' - print*, 'NaN element in A matrix' - return - endif - enddo - enddo - - do i = 1, n - if (A(i,i) /= 0d0) then - print*, 'WARNING: matrix A is not antisymmetric' - print*, 'Non 0 element on the diagonal', i, A(i,i) - call ABORT - endif - enddo - - do j = 1, n - do i = 1, n - if (A(i,j)+A(j,i)>1d-16) then - print*, 'WANRING: matrix A is not antisymmetric' - print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) - print*, 'diff:', A(i,j)+A(j,i) - call ABORT - endif - enddo - enddo - - ! Fix for too big elements ! bad idea better to cancel if the error is too big - !do j = 1, n - ! do i = 1, n - ! A(i,j) = mod(A(i,j),2d0*pi) - ! if (dabs(A(i,j)) > pi) then - ! A(i,j) = 0d0 - ! endif - ! enddo - !enddo - - max_elem_A = 0d0 - do j = 1, n - do i = 1, n - if (ABS(A(i,j)) > ABS(max_elem_A)) then - max_elem_A = A(i,j) - endif - enddo - enddo - print*,'max element in A', max_elem_A - - if (ABS(max_elem_A) > 2 * pi) then - print*,'' - print*,'WARNING: ABS(max_elem_A) > 2 pi ' - print*,'' - endif - -#+END_SRC - -** Calculations - -*** B=A.A - - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ - - Diagonalization of $\textbf{B}$ - W, the eigenvectors - e_val, the eigenvalues - - #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - ! Compute B=A.A - - call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) - - ! Copy B in W, diagonalization will put the eigenvectors in W - W=B - - ! Diagonalization of B - ! Eigenvalues -> e_val - ! Eigenvectors -> W - lwork = 3*n-1 - allocate(work(lwork,n)) - - print*,'Starting diagonalization ...' - - call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) - - deallocate(work) - - if (info2 == 0) then - print*, 'Diagonalization : Done' - elseif (info2 < 0) then - print*, 'WARNING: error in the diagonalization' - print*, 'Illegal value of the ', info2,'-th parameter' - else - print*, "WARNING: Diagonalization failed to converge" - endif - #+END_SRC - -*** Tau^-1, cos(tau), sin(tau) - $$\tau = \sqrt{-x}$$ - - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ - - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ - - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ - These matrices are diagonals - #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - ! Diagonal matrix m_diag - do j = 1, n - if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems - e_val(j) = 0.d0 - else - e_val(j) = - e_val(j) - endif - enddo - - m_diag = 0.d0 - do i = 1, n - m_diag(i,i) = e_val(i) - enddo - - ! cos_tau - do j = 1, n - do i = 1, n - if (i==j) then - cos_tau(i,j) = dcos(dsqrt(e_val(i))) - else - cos_tau(i,j) = 0d0 - endif - enddo - enddo - - ! sin_tau - do j = 1, n - do i = 1, n - if (i==j) then - sin_tau(i,j) = dsin(dsqrt(e_val(i))) - else - sin_tau(i,j) = 0d0 - endif - enddo - enddo - - ! Debug, display the cos_tau and sin_tau matrix - !if (debug) then - ! print*, 'cos_tau' - ! do i = 1, n - ! print*, cos_tau(i,:) - ! enddo - ! print*, 'sin_tau' - ! do i = 1, n - ! print*, sin_tau(i,:) - ! enddo - !endif - - ! tau^-1 - do j = 1, n - do i = 1, n - if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small - tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) - else - tau_m1(i,j) = 0d0 - endif - enddo - enddo - - max_elem = 0d0 - do i = 1, n - if (ABS(tau_m1(i,i)) > ABS(max_elem)) then - max_elem = tau_m1(i,i) - endif - enddo - print*,'max elem tau^-1:', max_elem - - ! Debug - !print*,'eigenvalues:' - !do i = 1, n - ! print*, e_val(i) - !enddo - - !Debug, display tau^-1 - !if (debug) then - ! print*, 'tau^-1' - ! do i = 1, n - ! print*,tau_m1(i,:) - ! enddo - !endif - #+END_SRC - -*** Rotation matrix - \begin{align*} - \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} - \end{align*} - \begin{align*} - \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} - \end{align*} - \begin{align*} - \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} - \end{align*} - - First: - part_1 = dgemm(W, dgemm(cos_tau, W^t)) - part_1a = dgemm(cos_tau, W^t) - part_1 = dgemm(W, part_1a) - And: - part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) - part_2a = dgemm(W^t, A) - part_2b = dgemm(sin_tau, part_2a) - part_2c = dgemm(tau_m1, part_2b) - part_2 = dgemm(W, part_2c) - Finally: - Rotation matrix, R = part_1+part_2 - - If $R$ is a rotation matrix: - $R.R^T=R^T.R=\textbf{1}$ - #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - ! part_1 - call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) - call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) - - ! part_2 - call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) - call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) - call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) - call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) - - ! Rotation matrix R - R = part_1 + part_2 - - ! Matrix check - ! R.R^t and R^t.R must be equal to identity matrix - do j = 1, n - do i=1,n - if (i==j) then - RR_t(i,j) = 1d0 - else - RR_t(i,j) = 0d0 - endif - enddo - enddo - - call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) - - norm = dnrm2(n*n,RR_t,1) - print*, 'Rotation matrix check, norm R.R^T = ', norm - - ! Debug - !if (debug) then - ! print*, 'RR_t' - ! do i = 1, n - ! print*, RR_t(i,:) - ! enddo - !endif - #+END_SRC - -*** Post conditions - #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - ! Check if R.R^T=1 - max_elem = 0d0 - do j = 1, n - do i = 1, n - if (ABS(RR_t(i,j)) > ABS(max_elem)) then - max_elem = RR_t(i,j) - endif - enddo - enddo - - print*, 'Max error in R.R^T:', max_elem - print*, 'e_val(1):', e_val(1) - print*, 'e_val(n):', e_val(n) - print*, 'max elem in A:', max_elem_A - - if (ABS(max_elem) > 1d-12) then - print*, 'WARNING: max error in R.R^T > 1d-12' - print*, 'Enforce the step cancellation' - enforce_step_cancellation = .True. - endif - - ! Matrix elements of R must by non-NaN - do j = 1,n - do i = 1,LDR - if (disnan(R(i,j))) then - info = 666 - print*, 'NaN in rotation matrix' - call ABORT - endif - enddo - enddo - - ! Display - !if (debug) then - ! print*,'Rotation matrix :' - ! do i = 1, n - ! write(*,'(100(F10.5))') R(i,:) - ! enddo - !endif - #+END_SRC - -** Deallocation, end - #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - deallocate(B) - deallocate(m_diag,cos_tau,sin_tau,tau_m1) - deallocate(W,e_val) - deallocate(part_1,part_1a) - deallocate(part_2,part_2a,part_2b,part_2c) - deallocate(RR_t) - - call wall_time(t2) - t3 = t2-t1 - print*,'Time in rotation matrix:', t3 - - print*,'---End rotation_matrix---' - -end subroutine - #+END_SRC - diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f deleted file mode 100644 index bdd1f6ba..00000000 --- a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f +++ /dev/null @@ -1,64 +0,0 @@ -! Rotation matrix in a subspace to rotation matrix in the full space - -! Usually, we are using a list of MOs, for exemple the active ones. When -! we compute a rotation matrix to rotate the MOs, we just compute a -! rotation matrix for these MOs in order to reduce the size of the -! matrix which has to be computed. Since the computation of a rotation -! matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to -! reuce the number of MOs involved. -! After that we replace the rotation matrix in the full space by -! building the elements of the rotation matrix in the full space from -! the elements of the rotation matrix in the subspace and adding some 0 -! on the extradiagonal elements and some 1 on the diagonal elements, -! for the MOs that are not involved in the rotation. - -! Provided: -! | mo_num | integer | Number of MOs | - -! Input: -! | m | integer | Size of tmp_list, m <= mo_num | -! | tmp_list(m) | integer | List of MOs | -! | tmp_R(m,m) | double precision | Rotation matrix in the space of | -! | | | the MOs containing by tmp_list | - -! Output: -! | R(mo_num,mo_num | double precision | Rotation matrix in the space | -! | | | of all the MOs | - -! Internal: -! | i,j | integer | indexes in the full space | -! | tmp_i,tmp_j | integer | indexes in the subspace | - - -subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) - - BEGIN_DOC - ! Compute the full rotation matrix from a smaller one - END_DOC - - implicit none - - ! in - integer, intent(in) :: m, tmp_list(m) - double precision, intent(in) :: tmp_R(m,m) - - ! out - double precision, intent(out) :: R(mo_num,mo_num) - - ! internal - integer :: i,j,tmp_i,tmp_j - - ! tmp_R to R, subspace to full space - R = 0d0 - do i = 1, mo_num - R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital - enddo - do tmp_j = 1, m - j = tmp_list(tmp_j) - do tmp_i = 1, m - i = tmp_list(tmp_i) - R(i,j) = tmp_R(tmp_i,tmp_j) - enddo - enddo - -end diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.org b/src/utils_trust_region/sub_to_full_rotation_matrix.org deleted file mode 100644 index 16434dc8..00000000 --- a/src/utils_trust_region/sub_to_full_rotation_matrix.org +++ /dev/null @@ -1,65 +0,0 @@ -* Rotation matrix in a subspace to rotation matrix in the full space - -Usually, we are using a list of MOs, for exemple the active ones. When -we compute a rotation matrix to rotate the MOs, we just compute a -rotation matrix for these MOs in order to reduce the size of the -matrix which has to be computed. Since the computation of a rotation -matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to -reuce the number of MOs involved. -After that we replace the rotation matrix in the full space by -building the elements of the rotation matrix in the full space from -the elements of the rotation matrix in the subspace and adding some 0 -on the extradiagonal elements and some 1 on the diagonal elements, -for the MOs that are not involved in the rotation. - -Provided: -| mo_num | integer | Number of MOs | - -Input: -| m | integer | Size of tmp_list, m <= mo_num | -| tmp_list(m) | integer | List of MOs | -| tmp_R(m,m) | double precision | Rotation matrix in the space of | -| | | the MOs containing by tmp_list | - -Output: -| R(mo_num,mo_num | double precision | Rotation matrix in the space | -| | | of all the MOs | - -Internal: -| i,j | integer | indexes in the full space | -| tmp_i,tmp_j | integer | indexes in the subspace | - -#+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f -subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) - - BEGIN_DOC - ! Compute the full rotation matrix from a smaller one - END_DOC - - implicit none - - ! in - integer, intent(in) :: m, tmp_list(m) - double precision, intent(in) :: tmp_R(m,m) - - ! out - double precision, intent(out) :: R(mo_num,mo_num) - - ! internal - integer :: i,j,tmp_i,tmp_j - - ! tmp_R to R, subspace to full space - R = 0d0 - do i = 1, mo_num - R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital - enddo - do tmp_j = 1, m - j = tmp_list(tmp_j) - do tmp_i = 1, m - i = tmp_list(tmp_i) - R(i,j) = tmp_R(tmp_i,tmp_j) - enddo - enddo - -end -#+END_SRC diff --git a/src/utils_trust_region/trust_region_expected_e.irp.f b/src/utils_trust_region/trust_region_expected_e.irp.f deleted file mode 100644 index b7d849d1..00000000 --- a/src/utils_trust_region/trust_region_expected_e.irp.f +++ /dev/null @@ -1,119 +0,0 @@ -! Predicted energy : e_model - -! *Compute the energy predicted by the Taylor series* - -! The energy is predicted using a Taylor expansion truncated at te 2nd -! order : - -! \begin{align*} -! E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) -! \end{align*} - -! Input: -! | n | integer | m*(m-1)/2 | -! | v_grad(n) | double precision | gradient | -! | H(n,n) | double precision | hessian | -! | x(n) | double precision | Step in the trust region | -! | prev_energy | double precision | previous energy | - -! Output: -! | e_model | double precision | predicted energy after the rotation of the MOs | - -! Internal: -! | part_1 | double precision | v_grad^T.x | -! | part_2 | double precision | 1/2 . x^T.H.x | -! | part_2a | double precision | H.x | -! | i,j | integer | indexes | - -! Function: -! | ddot | double precision | dot product (Lapack) | - - -subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) - - include 'pi.h' - - BEGIN_DOC - ! Compute the expected criterion/energy after the application of the step x - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n),H(n,n),x(n) - double precision, intent(in) :: prev_energy - - ! out - double precision, intent(out) :: e_model - - ! internal - double precision :: part_1, part_2, t1,t2,t3 - double precision, allocatable :: part_2a(:) - - integer :: i,j - - !Function - double precision :: ddot - - print*,'' - print*,'---Trust_e_model---' - - call wall_time(t1) - - ! Allocation - allocate(part_2a(n)) - -! Calculations - -! part_1 corresponds to the product g.x -! part_2a corresponds to the product H.x -! part_2 corresponds to the product 0.5*(x^T.H.x) - -! TODO: remove the dot products - - -! Product v_grad.x - part_1 = ddot(n,v_grad,1,x,1) - - !if (debug) then - print*,'g.x : ', part_1 - !endif - - ! Product H.x - call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) - - ! Product 1/2 . x^T.H.x - part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) - - !if (debug) then - print*,'1/2*x^T.H.x : ', part_2 - !endif - - print*,'prev_energy', prev_energy - - ! Sum - e_model = prev_energy + part_1 + part_2 - - ! Writing the predicted energy - print*, 'Predicted energy after the rotation : ', e_model - print*, 'Previous energy - predicted energy:', prev_energy - e_model - - ! Can be deleted, already in another subroutine - if (DABS(prev_energy - e_model) < 1d-12 ) then - print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' - endif - - ! Deallocation - deallocate(part_2a) - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in trust e model:', t3 - - print*,'---End trust_e_model---' - print*,'' - -end subroutine diff --git a/src/utils_trust_region/trust_region_expected_e.org b/src/utils_trust_region/trust_region_expected_e.org deleted file mode 100644 index 58c8f804..00000000 --- a/src/utils_trust_region/trust_region_expected_e.org +++ /dev/null @@ -1,121 +0,0 @@ -* Predicted energy : e_model - -*Compute the energy predicted by the Taylor series* - -The energy is predicted using a Taylor expansion truncated at te 2nd -order : - -\begin{align*} -E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) -\end{align*} - -Input: -| n | integer | m*(m-1)/2 | -| v_grad(n) | double precision | gradient | -| H(n,n) | double precision | hessian | -| x(n) | double precision | Step in the trust region | -| prev_energy | double precision | previous energy | - -Output: -| e_model | double precision | predicted energy after the rotation of the MOs | - -Internal: -| part_1 | double precision | v_grad^T.x | -| part_2 | double precision | 1/2 . x^T.H.x | -| part_2a | double precision | H.x | -| i,j | integer | indexes | - -Function: -| ddot | double precision | dot product (Lapack) | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f -subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) - - include 'pi.h' - - BEGIN_DOC - ! Compute the expected criterion/energy after the application of the step x - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n),H(n,n),x(n) - double precision, intent(in) :: prev_energy - - ! out - double precision, intent(out) :: e_model - - ! internal - double precision :: part_1, part_2, t1,t2,t3 - double precision, allocatable :: part_2a(:) - - integer :: i,j - - !Function - double precision :: ddot - - print*,'' - print*,'---Trust_e_model---' - - call wall_time(t1) - - ! Allocation - allocate(part_2a(n)) -#+END_SRC - -** Calculations - -part_1 corresponds to the product g.x -part_2a corresponds to the product H.x -part_2 corresponds to the product 0.5*(x^T.H.x) - -TODO: remove the dot products - -#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f - ! Product v_grad.x - part_1 = ddot(n,v_grad,1,x,1) - - !if (debug) then - print*,'g.x : ', part_1 - !endif - - ! Product H.x - call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) - - ! Product 1/2 . x^T.H.x - part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) - - !if (debug) then - print*,'1/2*x^T.H.x : ', part_2 - !endif - - print*,'prev_energy', prev_energy - - ! Sum - e_model = prev_energy + part_1 + part_2 - - ! Writing the predicted energy - print*, 'Predicted energy after the rotation : ', e_model - print*, 'Previous energy - predicted energy:', prev_energy - e_model - - ! Can be deleted, already in another subroutine - if (DABS(prev_energy - e_model) < 1d-12 ) then - print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' - endif - - ! Deallocation - deallocate(part_2a) - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in trust e model:', t3 - - print*,'---End trust_e_model---' - print*,'' - -end subroutine -#+END_SRC diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f deleted file mode 100644 index f71bb405..00000000 --- a/src/utils_trust_region/trust_region_optimal_lambda.irp.f +++ /dev/null @@ -1,1655 +0,0 @@ -! Newton's method to find the optimal lambda - -! *Compute the lambda value for the trust region* - -! This subroutine uses the Newton method in order to find the optimal -! lambda. This constant is added on the diagonal of the hessian to shift -! the eiganvalues. It has a double role: -! - ensure that the resulting hessian is positive definite for the -! Newton method -! - constrain the step in the trust region, i.e., -! $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius -! of the trust region. -! We search $\lambda$ which minimizes -! \begin{align*} -! f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 -! \end{align*} -! or -! \begin{align*} -! \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 -! \end{align*} -! and gives obviously 0 in both cases. \newline - -! There are several cases: -! - If $\textbf{H}$ is positive definite the interval containing the -! solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). -! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot -! \textbf{g} \neq 0$ then the interval containing -! the solution is $\lambda \in (-h_1, \infty)$. -! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot -! \textbf{g} = 0$ then the interval containing the solution is -! $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < -! 10^{-12}$ are not computed, so the term where $i = 1$ is -! automatically removed and this case becomes similar to the previous one. - -! So to avoid numerical problems (cf. trust_region) we start the -! algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, -! with $\epsilon$ a little constant. -! The research must be restricted to the interval containing the -! solution. For that reason a little trust region in 1D is used. - -! The Newton method to find the optimal $\lambda$ is : -! \begin{align*} -! \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ -! \end{align*} -! $f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to -! $\lambda$ at the l-th iteration, -! $f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to -! $\lambda$ at the l-th iteration.\newline - -! Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} -! f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as -! \begin{align*} -! y \leq \alpha -! \end{align*} -! with $\alpha$ a scalar representing the trust length (trust region in -! 1D) where the function $f$ or $\tilde{f}$ is correctly describe by the -! Taylor series truncated at the second order. Thus, if $y > \alpha$, -! the constraint is applied as -! \begin{align*} -! y^* = \alpha \frac{y}{|y|} -! \end{align*} -! with $y^*$ the solution in the trust region. - -! The size of the trust region evolves in function of $\rho$ as for the -! trust region seen previously cf. trust_region, rho_model. -! The prediction of the value of $f$ or $\tilde{f}$ is done using the -! Taylor series truncated at the second order cf. "trust_region", -! "trust_e_model". - -! The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - -! \Delta^2)^2$ with respect to $\lambda$ are: -! \begin{align*} -! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) -! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) -! \end{align*} -! \begin{align*} -! \frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -! \end{align*} - -! The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - -! 1/\Delta^2)^2$ with respect to $\lambda$ are: -! \begin{align*} -! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} -! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} -! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ -! &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} -! \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -! \end{align*} - -! \begin{align*} -! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} -! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} -! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} -! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ -! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} -! {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} -! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -! \end{align*} - -! Provided in qp_edit: -! | thresh_rho_2 | -! | thresh_cc | -! | nb_it_max_lambda | -! | version_lambda_search | -! | nb_it_max_pre_search | -! see qp_edit for more details - -! Input: -! | n | integer | m*(m-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | tmp_wtg(n) | double precision | w_i^T.v_grad(i) | -! | delta | double precision | delta for the trust region | - -! Output: -! | lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | -! | | | lambda > 0 | - -! Internal: -! | d1_N | double precision | value of d1_norm_trust_region | -! | d2_N | double precision | value of d2_norm_trust_region | -! | f_N | double precision | value of f_norm_trust_region | -! | prev_f_N | double precision | previous value of f_norm_trust_region | -! | f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | -! | prev_f_R | double precision | previous value of f_R | -! | model | double precision | predicted value of f_R from prev_f_R and y | -! | d_1 | double precision | value of the first derivative | -! | d_2 | double precision | value of the second derivative | -! | y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | -! | prev_lambda | double precision | previous value of lambda | -! | t1,t2,t3 | double precision | wall time | -! | i | integer | index | -! | epsilon | double precision | little constant to avoid numerical problem | -! | rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | -! | version | integer | version of the root finding method | - -! Function: -! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -! | d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -! | d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | -! | d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | -! | f_norm_trust_region | double precision | value of norm(x)^2 | - - - -subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - - include 'pi.h' - - BEGIN_DOC - ! Research the optimal lambda to constrain the step size in the trust region - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(inout) :: e_val(n) - double precision, intent(in) :: delta - double precision, intent(in) :: tmp_wtg(n) - - ! out - double precision, intent(out) :: lambda - - ! Internal - double precision :: d1_N, d2_N, f_N, prev_f_N - double precision :: prev_f_R, f_R - double precision :: model - double precision :: d_1, d_2 - double precision :: t1,t2,t3 - integer :: i - double precision :: epsilon - double precision :: y - double precision :: prev_lambda - double precision :: rho_2 - double precision :: alpha - integer :: version - - ! Functions - double precision :: d1_norm_trust_region,d1_norm_trust_region_omp - double precision :: d2_norm_trust_region, d2_norm_trust_region_omp - double precision :: f_norm_trust_region, f_norm_trust_region_omp - double precision :: d1_norm_inverse_trust_region - double precision :: d2_norm_inverse_trust_region - double precision :: d1_norm_inverse_trust_region_omp - double precision :: d2_norm_inverse_trust_region_omp - - print*,'' - print*,'---Trust_newton---' - print*,'' - - call wall_time(t1) - - ! version_lambda_search - ! 1 -> ||x||^2 - delta^2 = 0, - ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) - if (version_lambda_search == 1) then - print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' - else - print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' - endif - ! Version 2 is normally better - - - -! Resolution with the Newton method: - - -! Initialization - epsilon = 1d-4 - lambda =MAX(0d0, -e_val(1)) - - ! Pre research of lambda to start near the optimal lambda - ! by adding a constant epsilon and changing the constant to - ! have ||x(lambda + epsilon)|| ~ delta, before setting - ! lambda = lambda + epsilon - print*, 'Pre research of lambda:' - print*,'Initial lambda =', lambda - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta - i = 1 - - ! To increase lambda - if (f_N > delta**2) then - print*,'Increasing lambda...' - do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) - - ! Update the previous norm - prev_f_N = f_N - ! New epsilon - epsilon = epsilon * 2d0 - ! New norm - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta - - ! Security - if (prev_f_N < f_N) then - print*,'WARNING, error: prev_f_N < f_N, exit' - epsilon = epsilon * 0.5d0 - i = nb_it_max_pre_search + 1 - endif - - i = i + 1 - enddo - - ! To reduce lambda - else - print*,'Reducing lambda...' - do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) - - ! Update the previous norm - prev_f_N = f_N - ! New epsilon - epsilon = epsilon * 0.5d0 - ! New norm - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta - - ! Security - if (prev_f_N > f_N) then - print*,'WARNING, error: prev_f_N > f_N, exit' - epsilon = epsilon * 2d0 - i = nb_it_max_pre_search + 1 - endif - - i = i + 1 - enddo - endif - - print*,'End of the pre research of lambda' - - ! New value of lambda - lambda = lambda + epsilon - - print*, 'e_val(1):', e_val(1) - print*, 'Staring point, lambda =', lambda - - ! thresh_cc, threshold for the research of the optimal lambda - ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc - ! thresh_rho_2, threshold to cancel the step in the research - ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 - print*,'Threshold for the CC:', thresh_cc - print*,'Threshold for rho_2:', thresh_rho_2 - - print*, 'w_1^T . g =', tmp_wtg(1) - - ! Debug - !if (debug) then - ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' - !endif - - ! Initialization - i = 1 - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 - model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 - prev_f_N = 0d0 ! previous value of ||x||^2 - prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 - f_R = 0d0 ! value of (||x||^2 - delta^2)^2 - rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) - y = 0d0 ! step size - prev_lambda = 0d0 ! previous lambda - - ! Derivatives - if (version_lambda_search == 1) then - d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 - d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 - else - d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - endif - - ! Trust length - alpha = DABS((1d0/d_2)*d_1) - - ! Newton's method - do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) - print*,'--------------------------------------' - print*,'Research of lambda, iteration:', i - print*,'--------------------------------------' - - ! Update of f_N, f_R and the derivatives - prev_f_N = f_N - if (version_lambda_search == 1) then - prev_f_R = (prev_f_N - delta**2)**2 - d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 - d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 - else - prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 - d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - endif - write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 - - ! Newton's step - y = -(1d0/DABS(d_2))*d_1 - - ! Constraint on y (the newton step) - if (DABS(y) > alpha) then - y = alpha * (y/DABS(y)) ! preservation of the sign of y - endif - write(*,'(a,E12.5)') ' Step length: ', y - - ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series - model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 - - ! Updates lambda - prev_lambda = lambda - lambda = prev_lambda + y - print*,'prev lambda:', prev_lambda - print*,'new lambda:', lambda - - ! Checks if lambda is in (-h_1, \infty) - if (lambda > MAX(0d0, -e_val(1))) then - ! New value of ||x(lambda)||^2 - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) - - ! New f_R - if (version_lambda_search == 1) then - f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 - else - f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 - endif - - if (version_lambda_search == 1) then - print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R - print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R - print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model - else - print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R - print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R - print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model - endif - - print*,'previous - actual:', prev_f_R - f_R - print*,'previous - model:', prev_f_R - model - - ! Check the gain - if (DABS(prev_f_R - model) < thresh_model_2) then - print*,'' - print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' - print*,'' - endif - - ! Will be deleted - !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then - ! print*,'' - ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' - ! print*,'' - ! exit - !endif - - ! Computes rho_2 - rho_2 = (prev_f_R - f_R)/(prev_f_R - model) - print*,'rho_2:', rho_2 - else - rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) - print*,'lambda < -e_val(1) ===> rho_2 = 0' - endif - - ! Evolution of the trust length, alpha - if (rho_2 >= 0.75d0) then - alpha = 2d0 * alpha - elseif (rho_2 >= 0.5d0) then - alpha = alpha - elseif (rho_2 >= 0.25d0) then - alpha = 0.5d0 * alpha - else - alpha = 0.25d0 * alpha - endif - write(*,'(a,E12.5)') ' New trust length alpha: ', alpha - - ! cancellaion of the step if rho < 0.1 - if (rho_2 < thresh_rho_2) then !0.1d0) then - lambda = prev_lambda - f_N = prev_f_N - print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' - endif - - print*,'' - print*,'lambda, ||x||, delta:' - print*, lambda, dsqrt(f_N), delta - print*,'CC:', DABS(1d0 - f_N/delta**2) - print*,'' - - i = i + 1 - enddo - - ! if trust newton failed - if (i > nb_it_max_lambda) then - print*,'' - print*,'######################################################' - print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' - print*,'The research of the optimal lambda has failed' - print*,'######################################################' - print*,'' - endif - - print*,'Number of iterations :', i - print*,'Value of lambda :', lambda - print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 - print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 - print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 - - ! Time - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in trust_newton:', t3 - - print*,'' - print*,'---End trust_newton---' - print*,'' - -end subroutine - -! OMP: First derivative of (||x||^2 - Delta^2)^2 - -! *Function to compute the first derivative of (||x||^2 - Delta^2)^2* - -! This function computes the first derivative of (||x||^2 - Delta^2)^2 -! with respect to lambda. - -! \begin{align*} -! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) -! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) -! \end{align*} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} -! \end{align*} - -! Provided: -! | mo_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | accu1 | double precision | first sum of the formula | -! | accu2 | double precision | second sum of the formula | -! | tmp_accu1 | double precision | temporary array for the first sum | -! | tmp_accu2 | double precision | temporary array for the second sum | -! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | - - -function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg,accu1,accu2 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) - - ! Functions - double precision :: d1_norm_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) - - deallocate(tmp_accu1, tmp_accu2) - -end function - -! OMP: Second derivative of (||x||^2 - Delta^2)^2 - -! *Function to compute the second derivative of (||x||^2 - Delta^2)^2* - -! This function computes the second derivative of (||x||^2 - Delta^2)^2 -! with respect to lambda. -! \begin{align*} -! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -! \end{align*} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | accu1 | double precision | first sum of the formula | -! | accu2 | double precision | second sum of the formula | -! | accu3 | double precision | third sum of the formula | -! | tmp_accu1 | double precision | temporary array for the first sum | -! | tmp_accu2 | double precision | temporary array for the second sum | -! | tmp_accu2 | double precision | temporary array for the third sum | -! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | - - -function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Functions - double precision :: d2_norm_trust_region_omp - double precision :: ddot - - ! Internal - double precision :: accu1,accu2,accu3 - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) - integer :: i, j - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) - - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & - !$OMP accu1, accu2, accu3) & - !$OMP DEFAULT(NONE) - - ! Initialization - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - !$OMP DO - do i = 1, n - tmp_accu3(i) = 0d0 - enddo - !$OMP END DO - - ! Calculations - - ! accu1 - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - ! accu2 - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - ! accu3 - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu3 = accu3 + tmp_accu3(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) - - deallocate(tmp_accu1, tmp_accu2, tmp_accu3) - -end function - -! OMP: Function value of ||x||^2 - -! *Compute the value of ||x||^2* - -! This function computes the value of ||x(lambda)||^2 - -! \begin{align*} -! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | - -! Internal: -! | tmp_wtg(n) | double precision | temporary array for W^T.v_grad | -! | tmp_fN | double precision | temporary array for the function | -! | i,j | integer | indexes | - - -function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) - - use omp_lib - - include 'pi.h' - - BEGIN_DOC - ! Compute ||x(lambda)||^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - - ! functions - double precision :: f_norm_trust_region_omp - - ! internal - double precision, allocatable :: tmp_fN(:) - integer :: i,j - - ! Allocation - allocate(tmp_fN(n)) - - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & - !$OMP DEFAULT(NONE) - - ! Initialization - - !$OMP MASTER - f_norm_trust_region_omp = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_fN(i) = 0d0 - enddo - !$OMP END DO - - ! Calculations - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - deallocate(tmp_fN) - -end function - -! First derivative of (||x||^2 - Delta^2)^2 -! Version without omp - -! *Function to compute the first derivative of ||x||^2 - Delta* - -! This function computes the first derivative of (||x||^2 - Delta^2)^2 -! with respect to lambda. - -! \begin{align*} -! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) -! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) -! \end{align*} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | accu1 | double precision | first sum of the formula | -! | accu2 | double precision | second sum of the formula | -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -! | ddot | double precision | blas dot product | - - -function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2 - integer :: i, j - - ! Functions - double precision :: d1_norm_trust_region - double precision :: ddot - - ! Initialization - accu1 = 0d0 - accu2 = 0d0 - - do i = 1, n - wtg = 0d0 - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - wtg = 0d0 - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) - -end function - -! Second derivative of (||x||^2 - Delta^2)^2 -! Version without OMP - -! *Function to compute the second derivative of ||x||^2 - Delta* - - -! \begin{equation} -! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -! \end{equation} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -! \end{align*} -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | accu1 | double precision | first sum of the formula | -! | accu2 | double precision | second sum of the formula | -! | accu3 | double precision | third sum of the formula | -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | -! | ddot | double precision | blas dot product | - - -function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Functions - double precision :: d2_norm_trust_region - double precision :: ddot - - ! Internal - double precision :: wtg,accu1,accu2,accu3 - integer :: i, j - - ! Initialization - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 - endif - enddo - - d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) - -end function - -! Function value of ||x||^2 -! Version without OMP - -! *Compute the value of ||x||^2* - -! This function computes the value of ||x(lambda)||^2 - -! \begin{align*} -! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | i,j | integer | indexes | - -! Function: -! | f_norm_trust_region | double precision | value of norm(x)^2 | -! | ddot | double precision | blas dot product | - - - -function f_norm_trust_region(n,e_val,tmp_wtg,lambda) - - include 'pi.h' - - BEGIN_DOC - ! Compute ||x(lambda)||^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - - ! function - double precision :: f_norm_trust_region - double precision :: ddot - - ! internal - integer :: i,j - - ! Initialization - f_norm_trust_region = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - -end function - -! OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 -! Version with OMP - -! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -! \begin{align*} -! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} -! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} -! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ -! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} -! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -! \end{align*} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | tmp_accu1 | double precision | temporary array for the first sum | -! | tmp_accu2 | double precision | temporary array for the second sum | -! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d1_norm_inverse_trust_region | double precision | value of the first derivative | - - -function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: accu1, accu2 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) - - ! Functions - double precision :: d1_norm_inverse_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - -! !$OMP MASTER -! do i = 1, n -! if (ABS(e_val(i)+lambda) > 1d-12) then -! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 -! endif -! enddo -! !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - -! !$OMP MASTER -! do i = 1, n -! if (ABS(e_val(i)+lambda) > 1d-12) then -! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 -! endif -! enddo -! !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) - - deallocate(tmp_accu1, tmp_accu2) - -end - -! OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 -! Version with OMP - -! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -! \begin{align*} -! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} -! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ -! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -! \end{align*} - - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | tmp_accu1 | double precision | temporary array for the first sum | -! | tmp_accu2 | double precision | temporary array for the second sum | -! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d1_norm_inverse_trust_region | double precision | value of the first derivative | - - -function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: accu1, accu2, accu3 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) - - ! Functions - double precision :: d2_norm_inverse_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & - !$OMP accu1, accu2, accu3) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu3(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu3 = accu3 + tmp_accu3(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & - - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) - - deallocate(tmp_accu1,tmp_accu2,tmp_accu3) - -end - -! First derivative of (1/||x||^2 - 1/Delta^2)^2 -! Version without OMP - -! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -! \begin{align*} -! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} -! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} -! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ -! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} -! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -! \end{align*} -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -! \end{align*} -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d1_norm_inverse_trust_region | double precision | value of the first derivative | - - -function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2 - integer :: i,j - - ! Functions - double precision :: d1_norm_inverse_trust_region - - accu1 = 0d0 - accu2 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) - -end - -! Second derivative of (1/||x||^2 - 1/Delta^2)^2 -! Version without OMP - -! *Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* - -! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -! \begin{align*} -! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} -! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ -! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -! \end{align*} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d2_norm_inverse_trust_region | double precision | value of the first derivative | - - -function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2, accu3 - integer :: i,j - - ! Functions - double precision :: d2_norm_inverse_trust_region - - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 - endif - enddo - - d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & - - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) - -end diff --git a/src/utils_trust_region/trust_region_optimal_lambda.org b/src/utils_trust_region/trust_region_optimal_lambda.org deleted file mode 100644 index b39c9a10..00000000 --- a/src/utils_trust_region/trust_region_optimal_lambda.org +++ /dev/null @@ -1,1665 +0,0 @@ -* Newton's method to find the optimal lambda - -*Compute the lambda value for the trust region* - -This subroutine uses the Newton method in order to find the optimal -lambda. This constant is added on the diagonal of the hessian to shift -the eiganvalues. It has a double role: -- ensure that the resulting hessian is positive definite for the - Newton method -- constrain the step in the trust region, i.e., - $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius - of the trust region. -We search $\lambda$ which minimizes -\begin{align*} - f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 -\end{align*} -or -\begin{align*} - \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 -\end{align*} -and gives obviously 0 in both cases. \newline - -There are several cases: -- If $\textbf{H}$ is positive definite the interval containing the - solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). -- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot - \textbf{g} \neq 0$ then the interval containing - the solution is $\lambda \in (-h_1, \infty)$. -- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot - \textbf{g} = 0$ then the interval containing the solution is - $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < - 10^{-12}$ are not computed, so the term where $i = 1$ is - automatically removed and this case becomes similar to the previous one. - -So to avoid numerical problems (cf. trust_region) we start the -algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, -with $\epsilon$ a little constant. -The research must be restricted to the interval containing the -solution. For that reason a little trust region in 1D is used. - -The Newton method to find the optimal $\lambda$ is : -\begin{align*} - \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ -\end{align*} -$f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to -$\lambda$ at the l-th iteration, -$f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to -$\lambda$ at the l-th iteration.\newline - -Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} -f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as -\begin{align*} - y \leq \alpha -\end{align*} -with $\alpha$ a scalar representing the trust length (trust region in -1D) where the function $f$ or $\tilde{f}$ is correctly describe by the -Taylor series truncated at the second order. Thus, if $y > \alpha$, -the constraint is applied as -\begin{align*} - y^* = \alpha \frac{y}{|y|} -\end{align*} -with $y^*$ the solution in the trust region. - -The size of the trust region evolves in function of $\rho$ as for the -trust region seen previously cf. trust_region, rho_model. -The prediction of the value of $f$ or $\tilde{f}$ is done using the -Taylor series truncated at the second order cf. "trust_region", -"trust_e_model". - -The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - -\Delta^2)^2$ with respect to $\lambda$ are: -\begin{align*} - \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 - = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) - \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) -\end{align*} -\begin{align*} -\frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -= 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -\end{align*} - -The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - -1/\Delta^2)^2$ with respect to $\lambda$ are: -\begin{align*} - \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} - {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} - {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ - &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} - \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -\end{align*} - -\begin{align*} - \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} - {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} - - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} - {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ - &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} - {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} - {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -\end{align*} - -Provided in qp_edit: -| thresh_rho_2 | -| thresh_cc | -| nb_it_max_lambda | -| version_lambda_search | -| nb_it_max_pre_search | -see qp_edit for more details - -Input: -| n | integer | m*(m-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| tmp_wtg(n) | double precision | w_i^T.v_grad(i) | -| delta | double precision | delta for the trust region | - -Output: -| lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | -| | | lambda > 0 | - -Internal: -| d1_N | double precision | value of d1_norm_trust_region | -| d2_N | double precision | value of d2_norm_trust_region | -| f_N | double precision | value of f_norm_trust_region | -| prev_f_N | double precision | previous value of f_norm_trust_region | -| f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | -| prev_f_R | double precision | previous value of f_R | -| model | double precision | predicted value of f_R from prev_f_R and y | -| d_1 | double precision | value of the first derivative | -| d_2 | double precision | value of the second derivative | -| y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | -| prev_lambda | double precision | previous value of lambda | -| t1,t2,t3 | double precision | wall time | -| i | integer | index | -| epsilon | double precision | little constant to avoid numerical problem | -| rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | -| version | integer | version of the root finding method | - -Function: -| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -| d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -| d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | -| d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | -| f_norm_trust_region | double precision | value of norm(x)^2 | - - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - - include 'pi.h' - - BEGIN_DOC - ! Research the optimal lambda to constrain the step size in the trust region - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(inout) :: e_val(n) - double precision, intent(in) :: delta - double precision, intent(in) :: tmp_wtg(n) - - ! out - double precision, intent(out) :: lambda - - ! Internal - double precision :: d1_N, d2_N, f_N, prev_f_N - double precision :: prev_f_R, f_R - double precision :: model - double precision :: d_1, d_2 - double precision :: t1,t2,t3 - integer :: i - double precision :: epsilon - double precision :: y - double precision :: prev_lambda - double precision :: rho_2 - double precision :: alpha - integer :: version - - ! Functions - double precision :: d1_norm_trust_region,d1_norm_trust_region_omp - double precision :: d2_norm_trust_region, d2_norm_trust_region_omp - double precision :: f_norm_trust_region, f_norm_trust_region_omp - double precision :: d1_norm_inverse_trust_region - double precision :: d2_norm_inverse_trust_region - double precision :: d1_norm_inverse_trust_region_omp - double precision :: d2_norm_inverse_trust_region_omp - - print*,'' - print*,'---Trust_newton---' - print*,'' - - call wall_time(t1) - - ! version_lambda_search - ! 1 -> ||x||^2 - delta^2 = 0, - ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) - if (version_lambda_search == 1) then - print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' - else - print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' - endif - ! Version 2 is normally better -#+END_SRC - -Resolution with the Newton method: - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f - ! Initialization - epsilon = 1d-4 - lambda =MAX(0d0, -e_val(1)) - - ! Pre research of lambda to start near the optimal lambda - ! by adding a constant epsilon and changing the constant to - ! have ||x(lambda + epsilon)|| ~ delta, before setting - ! lambda = lambda + epsilon - print*, 'Pre research of lambda:' - print*,'Initial lambda =', lambda - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta - i = 1 - - ! To increase lambda - if (f_N > delta**2) then - print*,'Increasing lambda...' - do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) - - ! Update the previous norm - prev_f_N = f_N - ! New epsilon - epsilon = epsilon * 2d0 - ! New norm - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta - - ! Security - if (prev_f_N < f_N) then - print*,'WARNING, error: prev_f_N < f_N, exit' - epsilon = epsilon * 0.5d0 - i = nb_it_max_pre_search + 1 - endif - - i = i + 1 - enddo - - ! To reduce lambda - else - print*,'Reducing lambda...' - do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) - - ! Update the previous norm - prev_f_N = f_N - ! New epsilon - epsilon = epsilon * 0.5d0 - ! New norm - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta - - ! Security - if (prev_f_N > f_N) then - print*,'WARNING, error: prev_f_N > f_N, exit' - epsilon = epsilon * 2d0 - i = nb_it_max_pre_search + 1 - endif - - i = i + 1 - enddo - endif - - print*,'End of the pre research of lambda' - - ! New value of lambda - lambda = lambda + epsilon - - print*, 'e_val(1):', e_val(1) - print*, 'Staring point, lambda =', lambda - - ! thresh_cc, threshold for the research of the optimal lambda - ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc - ! thresh_rho_2, threshold to cancel the step in the research - ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 - print*,'Threshold for the CC:', thresh_cc - print*,'Threshold for rho_2:', thresh_rho_2 - - print*, 'w_1^T . g =', tmp_wtg(1) - - ! Debug - !if (debug) then - ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' - !endif - - ! Initialization - i = 1 - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 - model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 - prev_f_N = 0d0 ! previous value of ||x||^2 - prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 - f_R = 0d0 ! value of (||x||^2 - delta^2)^2 - rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) - y = 0d0 ! step size - prev_lambda = 0d0 ! previous lambda - - ! Derivatives - if (version_lambda_search == 1) then - d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 - d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 - else - d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - endif - - ! Trust length - alpha = DABS((1d0/d_2)*d_1) - - ! Newton's method - do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) - print*,'--------------------------------------' - print*,'Research of lambda, iteration:', i - print*,'--------------------------------------' - - ! Update of f_N, f_R and the derivatives - prev_f_N = f_N - if (version_lambda_search == 1) then - prev_f_R = (prev_f_N - delta**2)**2 - d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 - d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 - else - prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 - d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - endif - write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 - - ! Newton's step - y = -(1d0/DABS(d_2))*d_1 - - ! Constraint on y (the newton step) - if (DABS(y) > alpha) then - y = alpha * (y/DABS(y)) ! preservation of the sign of y - endif - write(*,'(a,E12.5)') ' Step length: ', y - - ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series - model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 - - ! Updates lambda - prev_lambda = lambda - lambda = prev_lambda + y - print*,'prev lambda:', prev_lambda - print*,'new lambda:', lambda - - ! Checks if lambda is in (-h_1, \infty) - if (lambda > MAX(0d0, -e_val(1))) then - ! New value of ||x(lambda)||^2 - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) - - ! New f_R - if (version_lambda_search == 1) then - f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 - else - f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 - endif - - if (version_lambda_search == 1) then - print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R - print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R - print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model - else - print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R - print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R - print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model - endif - - print*,'previous - actual:', prev_f_R - f_R - print*,'previous - model:', prev_f_R - model - - ! Check the gain - if (DABS(prev_f_R - model) < thresh_model_2) then - print*,'' - print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' - print*,'' - endif - - ! Will be deleted - !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then - ! print*,'' - ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' - ! print*,'' - ! exit - !endif - - ! Computes rho_2 - rho_2 = (prev_f_R - f_R)/(prev_f_R - model) - print*,'rho_2:', rho_2 - else - rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) - print*,'lambda < -e_val(1) ===> rho_2 = 0' - endif - - ! Evolution of the trust length, alpha - if (rho_2 >= 0.75d0) then - alpha = 2d0 * alpha - elseif (rho_2 >= 0.5d0) then - alpha = alpha - elseif (rho_2 >= 0.25d0) then - alpha = 0.5d0 * alpha - else - alpha = 0.25d0 * alpha - endif - write(*,'(a,E12.5)') ' New trust length alpha: ', alpha - - ! cancellaion of the step if rho < 0.1 - if (rho_2 < thresh_rho_2) then !0.1d0) then - lambda = prev_lambda - f_N = prev_f_N - print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' - endif - - print*,'' - print*,'lambda, ||x||, delta:' - print*, lambda, dsqrt(f_N), delta - print*,'CC:', DABS(1d0 - f_N/delta**2) - print*,'' - - i = i + 1 - enddo - - ! if trust newton failed - if (i > nb_it_max_lambda) then - print*,'' - print*,'######################################################' - print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' - print*,'The research of the optimal lambda has failed' - print*,'######################################################' - print*,'' - endif - - print*,'Number of iterations :', i - print*,'Value of lambda :', lambda - print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 - print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 - print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 - - ! Time - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in trust_newton:', t3 - - print*,'' - print*,'---End trust_newton---' - print*,'' - -end subroutine -#+END_SRC - -* OMP: First derivative of (||x||^2 - Delta^2)^2 - -*Function to compute the first derivative of (||x||^2 - Delta^2)^2* - -This function computes the first derivative of (||x||^2 - Delta^2)^2 -with respect to lambda. - -\begin{align*} -\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -= -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) -\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) -\end{align*} - -\begin{align*} - \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ - \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} -\end{align*} - -Provided: -| mo_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| accu1 | double precision | first sum of the formula | -| accu2 | double precision | second sum of the formula | -| tmp_accu1 | double precision | temporary array for the first sum | -| tmp_accu2 | double precision | temporary array for the second sum | -| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -| i,j | integer | indexes | - -Function: -| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg,accu1,accu2 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) - - ! Functions - double precision :: d1_norm_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) - - deallocate(tmp_accu1, tmp_accu2) - -end function -#+END_SRC - -* OMP: Second derivative of (||x||^2 - Delta^2)^2 - -*Function to compute the second derivative of (||x||^2 - Delta^2)^2* - -This function computes the second derivative of (||x||^2 - Delta^2)^2 -with respect to lambda. -\begin{align*} -\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -= 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -\end{align*} - -\begin{align*} - \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ - \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ - \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| accu1 | double precision | first sum of the formula | -| accu2 | double precision | second sum of the formula | -| accu3 | double precision | third sum of the formula | -| tmp_accu1 | double precision | temporary array for the first sum | -| tmp_accu2 | double precision | temporary array for the second sum | -| tmp_accu2 | double precision | temporary array for the third sum | -| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -| i,j | integer | indexes | - -Function: -| d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Functions - double precision :: d2_norm_trust_region_omp - double precision :: ddot - - ! Internal - double precision :: accu1,accu2,accu3 - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) - integer :: i, j - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) - - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & - !$OMP accu1, accu2, accu3) & - !$OMP DEFAULT(NONE) - - ! Initialization - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - !$OMP DO - do i = 1, n - tmp_accu3(i) = 0d0 - enddo - !$OMP END DO - - ! Calculations - - ! accu1 - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - ! accu2 - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - ! accu3 - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu3 = accu3 + tmp_accu3(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) - - deallocate(tmp_accu1, tmp_accu2, tmp_accu3) - -end function -#+END_SRC - -* OMP: Function value of ||x||^2 - -*Compute the value of ||x||^2* - -This function computes the value of ||x(lambda)||^2 - -\begin{align*} -||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | - -Internal: -| tmp_wtg(n) | double precision | temporary array for W^T.v_grad | -| tmp_fN | double precision | temporary array for the function | -| i,j | integer | indexes | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) - - use omp_lib - - include 'pi.h' - - BEGIN_DOC - ! Compute ||x(lambda)||^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - - ! functions - double precision :: f_norm_trust_region_omp - - ! internal - double precision, allocatable :: tmp_fN(:) - integer :: i,j - - ! Allocation - allocate(tmp_fN(n)) - - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & - !$OMP DEFAULT(NONE) - - ! Initialization - - !$OMP MASTER - f_norm_trust_region_omp = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_fN(i) = 0d0 - enddo - !$OMP END DO - - ! Calculations - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - deallocate(tmp_fN) - -end function -#+END_SRC - -* First derivative of (||x||^2 - Delta^2)^2 -Version without omp - -*Function to compute the first derivative of ||x||^2 - Delta* - -This function computes the first derivative of (||x||^2 - Delta^2)^2 -with respect to lambda. - -\begin{align*} -\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -= 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) -\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) -\end{align*} - -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| accu1 | double precision | first sum of the formula | -| accu2 | double precision | second sum of the formula | -| wtg | double precision | temporary variable to store W^T.v_grad | -| i,j | integer | indexes | - -Function: -| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -| ddot | double precision | blas dot product | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2 - integer :: i, j - - ! Functions - double precision :: d1_norm_trust_region - double precision :: ddot - - ! Initialization - accu1 = 0d0 - accu2 = 0d0 - - do i = 1, n - wtg = 0d0 - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - wtg = 0d0 - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) - -end function -#+END_SRC - -* Second derivative of (||x||^2 - Delta^2)^2 -Version without OMP - -*Function to compute the second derivative of ||x||^2 - Delta* - - -\begin{equation} -\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -= 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -\end{equation} - -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -\end{align*} -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| accu1 | double precision | first sum of the formula | -| accu2 | double precision | second sum of the formula | -| accu3 | double precision | third sum of the formula | -| wtg | double precision | temporary variable to store W^T.v_grad | -| i,j | integer | indexes | - -Function: -| d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | -| ddot | double precision | blas dot product | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Functions - double precision :: d2_norm_trust_region - double precision :: ddot - - ! Internal - double precision :: wtg,accu1,accu2,accu3 - integer :: i, j - - ! Initialization - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 - endif - enddo - - d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) - -end function -#+END_SRC - -* Function value of ||x||^2 -Version without OMP - -*Compute the value of ||x||^2* - -This function computes the value of ||x(lambda)||^2 - -\begin{align*} -||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| wtg | double precision | temporary variable to store W^T.v_grad | -| i,j | integer | indexes | - -Function: -| f_norm_trust_region | double precision | value of norm(x)^2 | -| ddot | double precision | blas dot product | - - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function f_norm_trust_region(n,e_val,tmp_wtg,lambda) - - include 'pi.h' - - BEGIN_DOC - ! Compute ||x(lambda)||^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - - ! function - double precision :: f_norm_trust_region - double precision :: ddot - - ! internal - integer :: i,j - - ! Initialization - f_norm_trust_region = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - -end function -#+END_SRC - -* OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 -Version with OMP - -*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -\begin{align*} - \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} - {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} - {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ - &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} - \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -\end{align*} - -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| wtg | double precision | temporary variable to store W^T.v_grad | -| tmp_accu1 | double precision | temporary array for the first sum | -| tmp_accu2 | double precision | temporary array for the second sum | -| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -| i,j | integer | indexes | - -Function: -| d1_norm_inverse_trust_region | double precision | value of the first derivative | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: accu1, accu2 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) - - ! Functions - double precision :: d1_norm_inverse_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - -! !$OMP MASTER -! do i = 1, n -! if (ABS(e_val(i)+lambda) > 1d-12) then -! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 -! endif -! enddo -! !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - -! !$OMP MASTER -! do i = 1, n -! if (ABS(e_val(i)+lambda) > 1d-12) then -! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 -! endif -! enddo -! !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) - - deallocate(tmp_accu1, tmp_accu2) - -end -#+END_SRC - -* OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 -Version with OMP - -*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -\begin{align*} - \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} - - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ - &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -\end{align*} - - -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| wtg | double precision | temporary variable to store W^T.v_grad | -| tmp_accu1 | double precision | temporary array for the first sum | -| tmp_accu2 | double precision | temporary array for the second sum | -| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -| i,j | integer | indexes | - -Function: -| d1_norm_inverse_trust_region | double precision | value of the first derivative | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: accu1, accu2, accu3 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) - - ! Functions - double precision :: d2_norm_inverse_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & - !$OMP accu1, accu2, accu3) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu3(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu3 = accu3 + tmp_accu3(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & - - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) - - deallocate(tmp_accu1,tmp_accu2,tmp_accu3) - -end -#+END_SRC - -* First derivative of (1/||x||^2 - 1/Delta^2)^2 -Version without OMP - -*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -\begin{align*} - \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} - {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} - {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ - &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} - \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -\end{align*} -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -\end{align*} -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| wtg | double precision | temporary variable to store W^T.v_grad | -| i,j | integer | indexes | - -Function: -| d1_norm_inverse_trust_region | double precision | value of the first derivative | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2 - integer :: i,j - - ! Functions - double precision :: d1_norm_inverse_trust_region - - accu1 = 0d0 - accu2 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) - -end -#+END_SRC - -* Second derivative of (1/||x||^2 - 1/Delta^2)^2 -Version without OMP - -*Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* - -This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -\begin{align*} - \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} - - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ - &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -\end{align*} - -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| wtg | double precision | temporary variable to store W^T.v_grad | -| i,j | integer | indexes | - -Function: -| d2_norm_inverse_trust_region | double precision | value of the first derivative | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2, accu3 - integer :: i,j - - ! Functions - double precision :: d2_norm_inverse_trust_region - - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 - endif - enddo - - d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & - - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) - -end -#+END_SRC diff --git a/src/utils_trust_region/trust_region_rho.irp.f b/src/utils_trust_region/trust_region_rho.irp.f deleted file mode 100644 index 45738736..00000000 --- a/src/utils_trust_region/trust_region_rho.irp.f +++ /dev/null @@ -1,121 +0,0 @@ -! Agreement with the model: Rho - -! *Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* - -! Rho represents the agreement between the model (the predicted energy -! by the Taylor expansion truncated at the 2nd order) and the real -! energy : - -! \begin{equation} -! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} -! \end{equation} -! With : -! $E^{k}$ the energy at the previous iteration -! $E^{k+1}$ the energy at the actual iteration -! $m^{k+1}$ the predicted energy for the actual iteration -! (cf. trust_e_model) - -! If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. -! If $\rho \leq 0$ the previous energy is lower than the actual -! energy. We have to cancel the last step and use a smaller trust -! region. -! Here we cancel the last step if $\rho < 0.1$, because even if -! the energy decreases, the agreement is bad, i.e., the Taylor expansion -! truncated at the second order doesn't represent correctly the energy -! landscape. So it's better to cancel the step and restart with a -! smaller trust region. - -! Provided in qp_edit: -! | thresh_rho | - -! Input: -! | prev_energy | double precision | previous energy (energy before the rotation) | -! | e_model | double precision | predicted energy after the rotation | - -! Output: -! | rho | double precision | the agreement between the model (predicted) and the real energy | -! | prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | -! | | | else the previous energy doesn't change | - -! Internal: -! | energy | double precision | energy (real) after the rotation | -! | i | integer | index | -! | t* | double precision | time | - - -subroutine trust_region_rho(prev_energy, energy,e_model,rho) - - include 'pi.h' - - BEGIN_DOC - ! Compute rho, the agreement between the predicted criterion/energy and the real one - END_DOC - - implicit none - - ! Variables - - ! In - double precision, intent(inout) :: prev_energy - double precision, intent(in) :: e_model, energy - - ! Out - double precision, intent(out) :: rho - - ! Internal - double precision :: t1, t2, t3 - integer :: i - - print*,'' - print*,'---Rho_model---' - - call wall_time(t1) - -! Rho -! \begin{equation} -! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} -! \end{equation} - -! In function of $\rho$ th step can be accepted or cancelled. - -! If we cancel the last step (k+1), the previous energy (k) doesn't -! change! -! If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) - - -! Already done in an other subroutine - !if (ABS(prev_energy - e_model) < 1d-12) then - ! print*,'WARNING: prev_energy - e_model < 1d-12' - ! print*,'=> rho will tend toward infinity' - ! print*,'Check you convergence criterion !' - !endif - - rho = (prev_energy - energy) / (prev_energy - e_model) - - print*, 'previous energy, prev_energy :', prev_energy - print*, 'predicted energy, e_model :', e_model - print*, 'real energy, energy :', energy - print*, 'prev_energy - energy :', prev_energy - energy - print*, 'prev_energy - e_model :', prev_energy - e_model - print*, 'Rho :', rho - print*, 'Threshold for rho:', thresh_rho - - ! Modification of prev_energy in function of rho - if (rho < thresh_rho) then !0.1) then - ! the step is cancelled - print*, 'Rho <', thresh_rho,', the previous energy does not changed' - print*, 'prev_energy :', prev_energy - else - ! the step is accepted - prev_energy = energy - print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy - endif - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in rho model:', t3 - - print*,'---End rho_model---' - print*,'' - -end subroutine diff --git a/src/utils_trust_region/trust_region_rho.org b/src/utils_trust_region/trust_region_rho.org deleted file mode 100644 index 9b25ee29..00000000 --- a/src/utils_trust_region/trust_region_rho.org +++ /dev/null @@ -1,123 +0,0 @@ -* Agreement with the model: Rho - -*Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* - -Rho represents the agreement between the model (the predicted energy -by the Taylor expansion truncated at the 2nd order) and the real -energy : - -\begin{equation} -\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} -\end{equation} -With : -$E^{k}$ the energy at the previous iteration -$E^{k+1}$ the energy at the actual iteration -$m^{k+1}$ the predicted energy for the actual iteration -(cf. trust_e_model) - -If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. -If $\rho \leq 0$ the previous energy is lower than the actual -energy. We have to cancel the last step and use a smaller trust -region. -Here we cancel the last step if $\rho < 0.1$, because even if -the energy decreases, the agreement is bad, i.e., the Taylor expansion -truncated at the second order doesn't represent correctly the energy -landscape. So it's better to cancel the step and restart with a -smaller trust region. - -Provided in qp_edit: -| thresh_rho | - -Input: -| prev_energy | double precision | previous energy (energy before the rotation) | -| e_model | double precision | predicted energy after the rotation | - -Output: -| rho | double precision | the agreement between the model (predicted) and the real energy | -| prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | -| | | else the previous energy doesn't change | - -Internal: -| energy | double precision | energy (real) after the rotation | -| i | integer | index | -| t* | double precision | time | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f -subroutine trust_region_rho(prev_energy, energy,e_model,rho) - - include 'pi.h' - - BEGIN_DOC - ! Compute rho, the agreement between the predicted criterion/energy and the real one - END_DOC - - implicit none - - ! Variables - - ! In - double precision, intent(inout) :: prev_energy - double precision, intent(in) :: e_model, energy - - ! Out - double precision, intent(out) :: rho - - ! Internal - double precision :: t1, t2, t3 - integer :: i - - print*,'' - print*,'---Rho_model---' - - call wall_time(t1) -#+END_SRC - -** Rho -\begin{equation} -\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} -\end{equation} - -In function of $\rho$ th step can be accepted or cancelled. - -If we cancel the last step (k+1), the previous energy (k) doesn't -change! -If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) - -#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f - ! Already done in an other subroutine - !if (ABS(prev_energy - e_model) < 1d-12) then - ! print*,'WARNING: prev_energy - e_model < 1d-12' - ! print*,'=> rho will tend toward infinity' - ! print*,'Check you convergence criterion !' - !endif - - rho = (prev_energy - energy) / (prev_energy - e_model) - - print*, 'previous energy, prev_energy :', prev_energy - print*, 'predicted energy, e_model :', e_model - print*, 'real energy, energy :', energy - print*, 'prev_energy - energy :', prev_energy - energy - print*, 'prev_energy - e_model :', prev_energy - e_model - print*, 'Rho :', rho - print*, 'Threshold for rho:', thresh_rho - - ! Modification of prev_energy in function of rho - if (rho < thresh_rho) then !0.1) then - ! the step is cancelled - print*, 'Rho <', thresh_rho,', the previous energy does not changed' - print*, 'prev_energy :', prev_energy - else - ! the step is accepted - prev_energy = energy - print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy - endif - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in rho model:', t3 - - print*,'---End rho_model---' - print*,'' - -end subroutine -#+END_SRC diff --git a/src/utils_trust_region/trust_region_step.irp.f b/src/utils_trust_region/trust_region_step.irp.f deleted file mode 100644 index 42aa6ed4..00000000 --- a/src/utils_trust_region/trust_region_step.irp.f +++ /dev/null @@ -1,716 +0,0 @@ -! Trust region - -! *Compute the next step with the trust region algorithm* - -! The Newton method is an iterative method to find a minimum of a given -! function. It uses a Taylor series truncated at the second order of the -! targeted function and gives its minimizer. The minimizer is taken as -! the new position and the same thing is done. And by doing so -! iteratively the method find a minimum, a local or global one depending -! of the starting point and the convexity/nonconvexity of the targeted -! function. - -! The goal of the trust region is to constrain the step size of the -! Newton method in a certain area around the actual position, where the -! Taylor series is a good approximation of the targeted function. This -! area is called the "trust region". - -! In addition, in function of the agreement between the Taylor -! development of the energy and the real energy, the size of the trust -! region will be updated at each iteration. By doing so, the step sizes -! are not too larges. In addition, since we add a criterion to cancel the -! step if the energy increases (more precisely if rho < 0.1), so it's -! impossible to diverge. \newline - -! References: \newline -! Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline -! https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline -! ISBN: 978-0-387-40065-5 \newline - -! By using the first and the second derivatives, the Newton method gives -! a step: -! \begin{align*} -! \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot -! \textbf{g}_{(k)} -! \end{align*} -! which leads to the minimizer of the Taylor series. -! !!! Warning: the Newton method gives the minimizer if and only if -! $\textbf{H}$ is positive definite, else it leads to a saddle point !!! -! But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm: -! \begin{align*} -! ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)} -! \end{align*} -! which is equivalent to -! \begin{align*} -! \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2 -! \end{align*} - -! with: \newline -! $\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of -! size n) \newline -! $\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n -! matrix) \newline -! $\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of -! size n) \newline -! $\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration -! \newline - -! Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a -! hypersphere of radius $\Delta_{(k+1)}$.\newline - -! So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and -! $\textbf{H}$ is positive definite, the -! solution is the step given by the Newton method -! $\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$. -! Else we have to constrain the step size. For simplicity we will remove -! the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have -! to put a constraint on $\textbf{x}$ with a Lagrange multiplier. -! Starting from the Taylor series of a function E (here, the energy) -! truncated at the 2nd order, we have: -! \begin{align*} -! E(\textbf{x}) = E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2} -! \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + -! \mathcal{O}(\textbf{x}^2) -! \end{align*} - -! With the constraint on the norm of $\textbf{x}$ we can write the -! Lagrangian -! \begin{align*} -! \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x} -! + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} -! + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2) -! \end{align*} -! Where: \newline -! $\lambda$ is the Lagrange multiplier \newline -! $E$ is the energy at the k-th iteration $\Leftrightarrow -! E(\textbf{x} = \textbf{0})$ \newline - -! To solve this equation, we search a stationary point where the first -! derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e. -! \begin{align*} -! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0 -! \end{align*} - -! The derivative is: -! \begin{align*} -! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} -! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} -! \end{align*} - -! So, we search $\textbf{x}$ such as: -! \begin{align*} -! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} -! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0 -! \end{align*} - -! We can rewrite that as: -! \begin{align*} -! \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} -! = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0 -! \end{align*} -! with $\textbf{I}$ is the identity matrix. - -! By doing so, the solution is: -! \begin{align*} -! (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g} -! \end{align*} -! \begin{align*} -! \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} -! \end{align*} -! with $\textbf{x}^T \textbf{x} = \Delta^2$. - -! We have to solve this previous equation to find this $\textbf{x}$ in the -! trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is -! just a one dimension problem because we can express $\textbf{x}$ as a -! function of $\lambda$: -! \begin{align*} -! \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} -! \end{align*} - -! We start from the fact that the hessian is diagonalizable. So we have: -! \begin{align*} -! \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T -! \end{align*} -! with: \newline -! $\textbf{H}$, the hessian matrix \newline -! $\textbf{W}$, the matrix containing the eigenvectors \newline -! $\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline -! $\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline -! $h_i$, the i-th eigenvalue in ascending order \newline - -! Now we use the fact that adding a constant on the diagonal just shifts -! the eigenvalues: -! \begin{align*} -! \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} -! +\textbf{I} \lambda) \cdot \textbf{W}^T -! \end{align*} - -! By doing so we can express $\textbf{x}$ as a function of $\lambda$ -! \begin{align*} -! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot -! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i -! \end{align*} -! with $\lambda \neq - h_i$. - -! An interesting thing in our case is the norm of $\textbf{x}$, -! because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of -! the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have: -! \begin{align*} -! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot -! \textbf{g})^2}{(h_i + \lambda)^2} -! \end{align*} - -! So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. -! And if we study the properties of this function we see that: -! \begin{align*} -! \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0 -! \end{align*} -! and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: -! \begin{align*} -! \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty -! \end{align*} - -! From these limits and knowing that $h_1$ is the lowest eigenvalue, we -! can conclude that $||\textbf{x}(\lambda)||$ is a continuous and -! strictly decreasing function on the interval $\lambda \in -! (-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which -! gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one -! solution. - -! Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot -! \textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly, -! $\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the -! Newton method is only defined for a positive definite hessian matrix, -! so $(\textbf{H} + \textbf{I} \lambda)$ must be positive -! definite. Consequently, in the case where $\textbf{H}$ is not positive -! definite, to ensure the positive definiteness, $\lambda$ must be -! greater than $- h_1$. -! \begin{align*} -! \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 -! \end{align*} - -! From that there are five cases: -! - if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ -! - if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot -! \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} -! \lambda)$ -! must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ -! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot -! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing -! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be -! positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) -! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot -! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing -! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be -! positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is -! similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = -! 0)|| \leq \Delta$ -! but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ -! time a constant to ensure the condition $||\textbf{x}(\lambda = -! -h_1)|| = \Delta$ and escape from the saddle point - -! Thus to find the solution, we can write: -! \begin{align*} -! ||\textbf{x}(\lambda)|| = \Delta -! \end{align*} -! \begin{align*} -! ||\textbf{x}(\lambda)|| - \Delta = 0 -! \end{align*} - -! Taking the square of this equation -! \begin{align*} -! (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 -! \end{align*} -! we have a function with one minimum for the optimal $\lambda$. -! Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve -! \begin{align*} -! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 -! \end{align*} - -! But in practice, it is more effective to solve: -! \begin{align*} -! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 -! \end{align*} - -! To do that, we just use the Newton method with "trust_newton" using -! first and second derivative of $(||\textbf{x}(\lambda)||^2 - -! \Delta^2)^2$ with respect to $\textbf{x}$. -! This will give the optimal $\lambda$ to compute the -! solution $\textbf{x}$ with the formula seen previously: -! \begin{align*} -! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot -! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i -! \end{align*} - -! The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our -! step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. - - - - -! Evolution of the trust region - -! We initialize the trust region at the first iteration using a radius -! \begin{align*} -! \Delta = ||\textbf{x}(\lambda=0)|| -! \end{align*} - -! And for the next iteration the trust region will evolves depending of -! the agreement of the energy prediction based on the Taylor series -! truncated at the 2nd order and the real energy. If the Taylor series -! truncated at the 2nd order represents correctly the energy landscape -! the trust region will be extent else it will be reduced. In order to -! mesure this agreement we use the ratio rho cf. "rho_model" and -! "trust_e_model". From that we use the following values: -! - if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, -! - if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, -! - if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, -! - if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. - -! In addition, if $\rho < 0.1$ the iteration is cancelled, so it -! restarts with a smaller trust region until the energy decreases. - - - - -! Summary - -! To summarize, knowing the hessian (eigenvectors and eigenvalues), the -! gradient and the radius of the trust region we can compute the norm of -! the Newton step -! \begin{align*} -! ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n -! \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 -! \end{align*} - -! - if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and -! $\textbf{x}(\lambda=0)$ is in the trust region and it is not -! necessary to put a constraint on $\textbf{x}$, the solution is the -! unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. -! - else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and -! $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in -! the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = -! -h_1)$, similarly to the previous case. -! But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ -! time a constant to ensure the condition $||\textbf{x}(\lambda = -! -h_1)|| = \Delta$ and escape from the saddle point -! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we -! have to search $\lambda \in (-h_1, \infty)$ such as -! $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method -! \begin{align*} -! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 -! \end{align*} -! or -! \begin{align*} -! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 -! \end{align*} -! which is numerically more stable. And finally compute -! \begin{align*} -! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot -! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i -! \end{align*} -! - else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we -! do exactly the same thing that the previous case but we search -! $\lambda \in (0, \infty)$ -! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and -! $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the -! sum), again we do exactly the same thing that the previous case -! searching $\lambda \in (-h_1, \infty)$. - - -! For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not -! necessary in fact to remove the $j = 1$ in the sum since the term -! where $h_i - \lambda < 10^{-6}$ are not computed. - -! After that, we take this vector $\textbf{x}^*$, called "x", and we do -! the transformation to an antisymmetric matrix $\textbf{X}$, called -! m_x. This matrix $\textbf{X}$ will be used to compute a rotation -! matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". - -! NB: -! An improvement can be done using a elleptical trust region. - - - - -! Code - -! Provided: -! | mo_num | integer | number of MOs | - -! Cf. qp_edit in orbital optimization section, for some constants/thresholds - -! Input: -! | m | integer | number of MOs | -! | n | integer | m*(m-1)/2 | -! | H(n, n) | double precision | hessian | -! | v_grad(n) | double precision | gradient | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n, n) | double precision | eigenvectors of the hessian | -! | rho | double precision | agreement between the model and the reality, | -! | | | represents the quality of the energy prediction | -! | nb_iter | integer | number of iteration | - -! Input/Ouput: -! | delta | double precision | radius of the trust region | - -! Output: -! | x(n) | double precision | vector containing the step | - -! Internal: -! | accu | double precision | temporary variable to compute the step | -! | lambda | double precision | lagrange multiplier | -! | trust_radius2 | double precision | square of the radius of the trust region | -! | norm2_x | double precision | norm^2 of the vector x | -! | norm2_g | double precision | norm^2 of the vector containing the gradient | -! | tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | -! | i, j, k | integer | indexes | - -! Function: -! | dnrm2 | double precision | Blas function computing the norm | -! | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | - - -subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compuet the step in the trust region - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n), rho - integer, intent(inout) :: nb_iter - double precision, intent(in) :: e_val(n), w(n,n) - - ! inout - double precision, intent(inout) :: delta - - ! out - double precision, intent(out) :: x(n) - - ! Internal - double precision :: accu, lambda, trust_radius2 - double precision :: norm2_x, norm2_g - double precision, allocatable :: tmp_wtg(:) - integer :: i,j,k - double precision :: t1,t2,t3 - integer :: n_neg_eval - - - ! Functions - double precision :: ddot, dnrm2 - double precision :: f_norm_trust_region_omp - - print*,'' - print*,'==================' - print*,'---Trust_region---' - print*,'==================' - - call wall_time(t1) - - ! Allocation - allocate(tmp_wtg(n)) - -! Initialization and norm - -! The norm of the step size will be useful for the trust region -! algorithm. We start from a first guess and the radius of the trust -! region will evolve during the optimization. - -! avoid_saddle is actually a test to avoid saddle points - - -! Initialization of the Lagrange multiplier -lambda = 0d0 - -! List of w^T.g, to avoid the recomputation -tmp_wtg = 0d0 -do j = 1, n - do i = 1, n - tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) - enddo -enddo - -! Replacement of the small tmp_wtg corresponding to a negative eigenvalue -! in the case of avoid_saddle -if (avoid_saddle .and. e_val(1) < - thresh_eig) then - i = 2 - ! Number of negative eigenvalues - do while (e_val(i) < - thresh_eig) - if (tmp_wtg(i) < thresh_wtg2) then - if (version_avoid_saddle == 1) then - tmp_wtg(i) = 1d0 - elseif (version_avoid_saddle == 2) then - tmp_wtg(i) = DABS(e_val(i)) - elseif (version_avoid_saddle == 3) then - tmp_wtg(i) = dsqrt(DABS(e_val(i))) - else - tmp_wtg(i) = thresh_wtg2 - endif - endif - i = i + 1 - enddo - - ! For the fist one it's a little bit different - if (tmp_wtg(1) < thresh_wtg2) then - tmp_wtg(1) = 0d0 - endif - -endif - -! Norm^2 of x, ||x||^2 -norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) -! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta -! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm -! Anyway if the step is too big it will be reduced -print*,'||x||^2 :', norm2_x - -! Norm^2 of the gradient, ||v_grad||^2 -norm2_g = (dnrm2(n,v_grad,1))**2 -print*,'||grad||^2 :', norm2_g - -! Trust radius initialization - -! At the first iteration (nb_iter = 0) we initialize the trust region -! with the norm of the step generate by the Newton's method ($\textbf{x}_1 = -! (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, -! we compute this norm using f_norm_trust_region_omp as explain just -! below) - - -! trust radius -if (nb_iter == 0) then - trust_radius2 = norm2_x - ! To avoid infinite loop of cancellation of this first step - ! without changing delta - nb_iter = 1 - - ! Compute delta, delta = sqrt(trust_radius) - delta = dsqrt(trust_radius2) -endif - -! Modification of the trust radius - -! In function of rho (which represents the agreement between the model -! and the reality, cf. rho_model) the trust region evolves. We update -! delta (the radius of the trust region). - -! To avoid too big trust region we put a maximum size. - - -! Modification of the trust radius in function of rho -if (rho >= 0.75d0) then - delta = 2d0 * delta -elseif (rho >= 0.5d0) then - delta = delta -elseif (rho >= 0.25d0) then - delta = 0.5d0 * delta -else - delta = 0.25d0 * delta -endif - -! Maximum size of the trust region -!if (delta > 0.5d0 * n * pi) then -! delta = 0.5d0 * n * pi -! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' -!endif - -if (delta > 1d10) then - delta = 1d10 -endif - -print*, 'Delta :', delta - -! Calculation of the optimal lambda - -! We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ -! - If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant -! $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ -! - If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the -! unconstrained one, $\lambda = 0$ - -! You will find more details at the beginning - - -! By giving delta, we search (||x||^2 - delta^2)^2 = 0 -! and not (||x||^2 - delta)^2 = 0 - -! Research of lambda to solve ||x(lambda)|| = Delta - -! Display -print*, 'e_val(1) = ', e_val(1) -print*, 'w_1^T.g =', tmp_wtg(1) - -! H positive definite -if (e_val(1) > - thresh_eig) then - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) - print*, '||x(0)||=', dsqrt(norm2_x) - print*, 'Delta=', delta - - ! H positive definite, ||x(lambda = 0)|| <= Delta - if (dsqrt(norm2_x) <= delta) then - print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' - print*, 'lambda = 0, no lambda optimization' - lambda = 0d0 - - ! H positive definite, ||x(lambda = 0)|| > Delta - else - ! Constraint solution - print*, 'H positive definite, ||x(lambda = 0)|| > Delta' - print*,'Computation of the optimal lambda...' - call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - endif - -! H indefinite -else - if (DABS(tmp_wtg(1)) < thresh_wtg) then - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) - print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) - endif - - ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta - if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then - ! Add e_val(1) in order to have (H - e_val(1) I) positive definite - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' - print*, 'lambda = -e_val(1), no lambda optimization' - lambda = - e_val(1) - - ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta - ! and - ! H indefinite, w_1^T.g =/= 0 - else - ! Constraint solution/ add lambda - if (DABS(tmp_wtg(1)) < thresh_wtg) then - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' - else - print*, 'H indefinite, w_1^T.g =/= 0' - endif - print*, 'Computation of the optimal lambda...' - call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - endif - -endif - -! Recomputation of the norm^2 of the step x -norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) -print*,'' -print*,'Summary after the trust region:' -print*,'lambda:', lambda -print*,'||x||:', dsqrt(norm2_x) -print*,'delta:', delta - -! Calculation of the step x - -! x refers to $\textbf{x}^*$ -! We compute x in function of lambda using its formula : -! \begin{align*} -! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i -! + \lambda} \cdot \textbf{w}_i -! \end{align*} - - -! Initialisation -x = 0d0 - -! Calculation of the step x - -! Normal version -if (.not. absolute_eig) then - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) - enddo - endif - enddo - -! Version to use the absolute value of the eigenvalues -else - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) - enddo - endif - enddo - -endif - -double precision :: beta, norm_x - -! Test -! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) -! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first -! eigenvectors multiply by a constant to ensure the condition -! ||x(lambda=-e_val(1))|| = delta and escape the saddle point -if (avoid_saddle .and. e_val(1) < - thresh_eig) then - if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then - - ! norm of x - norm_x = dnrm2(n,x,1) - - ! Computes the coefficient for the w_1 - beta = delta**2 - norm_x**2 - - ! Updates the step x - x = x + W(:,1) * dsqrt(beta) - - ! Recomputes the norm to check - norm_x = dnrm2(n,x,1) - - print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' - print*, '||x||', norm_x - endif -endif - -! Transformation of x - -! x is a vector of size n, so it can be write as a m by m -! antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". - - -! ! Step transformation vector -> matrix -! ! Vector with n element -> mo_num by mo_num matrix -! do j = 1, m -! do i = 1, m -! if (i>j) then -! call mat_to_vec_index(i,j,k) -! m_x(i,j) = x(k) -! else -! m_x(i,j) = 0d0 -! endif -! enddo -! enddo -! -! ! Antisymmetrization of the previous matrix -! do j = 1, m -! do i = 1, m -! if (i 0$ ($\lambda = 0$ is the unconstraint solution). But the -Newton method is only defined for a positive definite hessian matrix, -so $(\textbf{H} + \textbf{I} \lambda)$ must be positive -definite. Consequently, in the case where $\textbf{H}$ is not positive -definite, to ensure the positive definiteness, $\lambda$ must be -greater than $- h_1$. -\begin{align*} - \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 -\end{align*} - -From that there are five cases: -- if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ -- if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot - \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} - \lambda)$ - must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ -- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot - \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing - $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be - positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) -- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot - \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing - $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be - positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is - similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = - 0)|| \leq \Delta$ - but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ - time a constant to ensure the condition $||\textbf{x}(\lambda = - -h_1)|| = \Delta$ and escape from the saddle point - -Thus to find the solution, we can write: -\begin{align*} - ||\textbf{x}(\lambda)|| = \Delta -\end{align*} -\begin{align*} - ||\textbf{x}(\lambda)|| - \Delta = 0 -\end{align*} - -Taking the square of this equation -\begin{align*} - (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 -\end{align*} -we have a function with one minimum for the optimal $\lambda$. -Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve -\begin{align*} - (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 -\end{align*} - -But in practice, it is more effective to solve: -\begin{align*} - (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 -\end{align*} - -To do that, we just use the Newton method with "trust_newton" using -first and second derivative of $(||\textbf{x}(\lambda)||^2 - -\Delta^2)^2$ with respect to $\textbf{x}$. -This will give the optimal $\lambda$ to compute the -solution $\textbf{x}$ with the formula seen previously: -\begin{align*} - \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot - \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i -\end{align*} - -The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our -step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -#+END_SRC - -** Evolution of the trust region - -We initialize the trust region at the first iteration using a radius -\begin{align*} - \Delta = ||\textbf{x}(\lambda=0)|| -\end{align*} - -And for the next iteration the trust region will evolves depending of -the agreement of the energy prediction based on the Taylor series -truncated at the 2nd order and the real energy. If the Taylor series -truncated at the 2nd order represents correctly the energy landscape -the trust region will be extent else it will be reduced. In order to -mesure this agreement we use the ratio rho cf. "rho_model" and -"trust_e_model". From that we use the following values: -- if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, -- if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, -- if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, -- if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. - -In addition, if $\rho < 0.1$ the iteration is cancelled, so it -restarts with a smaller trust region until the energy decreases. - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -#+END_SRC - -** Summary - -To summarize, knowing the hessian (eigenvectors and eigenvalues), the -gradient and the radius of the trust region we can compute the norm of -the Newton step -\begin{align*} - ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n - \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 -\end{align*} - -- if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and - $\textbf{x}(\lambda=0)$ is in the trust region and it is not - necessary to put a constraint on $\textbf{x}$, the solution is the - unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. -- else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and - $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in - the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = - -h_1)$, similarly to the previous case. - But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ - time a constant to ensure the condition $||\textbf{x}(\lambda = - -h_1)|| = \Delta$ and escape from the saddle point -- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we - have to search $\lambda \in (-h_1, \infty)$ such as - $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method - \begin{align*} - (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 - \end{align*} - or - \begin{align*} - (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 - \end{align*} - which is numerically more stable. And finally compute - \begin{align*} - \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot - \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i - \end{align*} -- else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we - do exactly the same thing that the previous case but we search - $\lambda \in (0, \infty)$ -- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and - $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the - sum), again we do exactly the same thing that the previous case - searching $\lambda \in (-h_1, \infty)$. - - -For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not -necessary in fact to remove the $j = 1$ in the sum since the term -where $h_i - \lambda < 10^{-6}$ are not computed. - -After that, we take this vector $\textbf{x}^*$, called "x", and we do -the transformation to an antisymmetric matrix $\textbf{X}$, called -m_x. This matrix $\textbf{X}$ will be used to compute a rotation -matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". - -NB: -An improvement can be done using a elleptical trust region. - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -#+END_SRC - -** Code - -Provided: -| mo_num | integer | number of MOs | - -Cf. qp_edit in orbital optimization section, for some constants/thresholds - -Input: -| m | integer | number of MOs | -| n | integer | m*(m-1)/2 | -| H(n, n) | double precision | hessian | -| v_grad(n) | double precision | gradient | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n, n) | double precision | eigenvectors of the hessian | -| rho | double precision | agreement between the model and the reality, | -| | | represents the quality of the energy prediction | -| nb_iter | integer | number of iteration | - -Input/Ouput: -| delta | double precision | radius of the trust region | - -Output: -| x(n) | double precision | vector containing the step | - -Internal: -| accu | double precision | temporary variable to compute the step | -| lambda | double precision | lagrange multiplier | -| trust_radius2 | double precision | square of the radius of the trust region | -| norm2_x | double precision | norm^2 of the vector x | -| norm2_g | double precision | norm^2 of the vector containing the gradient | -| tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | -| i, j, k | integer | indexes | - -Function: -| dnrm2 | double precision | Blas function computing the norm | -| f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compuet the step in the trust region - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n), rho - integer, intent(inout) :: nb_iter - double precision, intent(in) :: e_val(n), w(n,n) - - ! inout - double precision, intent(inout) :: delta - - ! out - double precision, intent(out) :: x(n) - - ! Internal - double precision :: accu, lambda, trust_radius2 - double precision :: norm2_x, norm2_g - double precision, allocatable :: tmp_wtg(:) - integer :: i,j,k - double precision :: t1,t2,t3 - integer :: n_neg_eval - - - ! Functions - double precision :: ddot, dnrm2 - double precision :: f_norm_trust_region_omp - - print*,'' - print*,'==================' - print*,'---Trust_region---' - print*,'==================' - - call wall_time(t1) - - ! Allocation - allocate(tmp_wtg(n)) -#+END_SRC - - -*** Initialization and norm - -The norm of the step size will be useful for the trust region -algorithm. We start from a first guess and the radius of the trust -region will evolve during the optimization. - -avoid_saddle is actually a test to avoid saddle points - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f - ! Initialization of the Lagrange multiplier - lambda = 0d0 - - ! List of w^T.g, to avoid the recomputation - tmp_wtg = 0d0 - do j = 1, n - do i = 1, n - tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) - enddo - enddo - - ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue - ! in the case of avoid_saddle - if (avoid_saddle .and. e_val(1) < - thresh_eig) then - i = 2 - ! Number of negative eigenvalues - do while (e_val(i) < - thresh_eig) - if (tmp_wtg(i) < thresh_wtg2) then - if (version_avoid_saddle == 1) then - tmp_wtg(i) = 1d0 - elseif (version_avoid_saddle == 2) then - tmp_wtg(i) = DABS(e_val(i)) - elseif (version_avoid_saddle == 3) then - tmp_wtg(i) = dsqrt(DABS(e_val(i))) - else - tmp_wtg(i) = thresh_wtg2 - endif - endif - i = i + 1 - enddo - - ! For the fist one it's a little bit different - if (tmp_wtg(1) < thresh_wtg2) then - tmp_wtg(1) = 0d0 - endif - - endif - - ! Norm^2 of x, ||x||^2 - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) - ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta - ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm - ! Anyway if the step is too big it will be reduced - print*,'||x||^2 :', norm2_x - - ! Norm^2 of the gradient, ||v_grad||^2 - norm2_g = (dnrm2(n,v_grad,1))**2 - print*,'||grad||^2 :', norm2_g -#+END_SRC - -*** Trust radius initialization - - At the first iteration (nb_iter = 0) we initialize the trust region - with the norm of the step generate by the Newton's method ($\textbf{x}_1 = - (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, - we compute this norm using f_norm_trust_region_omp as explain just - below) - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f - ! trust radius - if (nb_iter == 0) then - trust_radius2 = norm2_x - ! To avoid infinite loop of cancellation of this first step - ! without changing delta - nb_iter = 1 - - ! Compute delta, delta = sqrt(trust_radius) - delta = dsqrt(trust_radius2) - endif -#+END_SRC - -*** Modification of the trust radius - -In function of rho (which represents the agreement between the model -and the reality, cf. rho_model) the trust region evolves. We update -delta (the radius of the trust region). - -To avoid too big trust region we put a maximum size. - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f - ! Modification of the trust radius in function of rho - if (rho >= 0.75d0) then - delta = 2d0 * delta - elseif (rho >= 0.5d0) then - delta = delta - elseif (rho >= 0.25d0) then - delta = 0.5d0 * delta - else - delta = 0.25d0 * delta - endif - - ! Maximum size of the trust region - !if (delta > 0.5d0 * n * pi) then - ! delta = 0.5d0 * n * pi - ! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' - !endif - - if (delta > 1d10) then - delta = 1d10 - endif - - print*, 'Delta :', delta -#+END_SRC - -*** Calculation of the optimal lambda - -We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ -- If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant - $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ -- If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the - unconstrained one, $\lambda = 0$ - -You will find more details at the beginning - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f - ! By giving delta, we search (||x||^2 - delta^2)^2 = 0 - ! and not (||x||^2 - delta)^2 = 0 - - ! Research of lambda to solve ||x(lambda)|| = Delta - - ! Display - print*, 'e_val(1) = ', e_val(1) - print*, 'w_1^T.g =', tmp_wtg(1) - - ! H positive definite - if (e_val(1) > - thresh_eig) then - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) - print*, '||x(0)||=', dsqrt(norm2_x) - print*, 'Delta=', delta - - ! H positive definite, ||x(lambda = 0)|| <= Delta - if (dsqrt(norm2_x) <= delta) then - print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' - print*, 'lambda = 0, no lambda optimization' - lambda = 0d0 - - ! H positive definite, ||x(lambda = 0)|| > Delta - else - ! Constraint solution - print*, 'H positive definite, ||x(lambda = 0)|| > Delta' - print*,'Computation of the optimal lambda...' - call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - endif - - ! H indefinite - else - if (DABS(tmp_wtg(1)) < thresh_wtg) then - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) - print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) - endif - - ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta - if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then - ! Add e_val(1) in order to have (H - e_val(1) I) positive definite - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' - print*, 'lambda = -e_val(1), no lambda optimization' - lambda = - e_val(1) - - ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta - ! and - ! H indefinite, w_1^T.g =/= 0 - else - ! Constraint solution/ add lambda - if (DABS(tmp_wtg(1)) < thresh_wtg) then - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' - else - print*, 'H indefinite, w_1^T.g =/= 0' - endif - print*, 'Computation of the optimal lambda...' - call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - endif - - endif - - ! Recomputation of the norm^2 of the step x - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) - print*,'' - print*,'Summary after the trust region:' - print*,'lambda:', lambda - print*,'||x||:', dsqrt(norm2_x) - print*,'delta:', delta -#+END_SRC - -*** Calculation of the step x - -x refers to $\textbf{x}^*$ -We compute x in function of lambda using its formula : -\begin{align*} -\textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i -+ \lambda} \cdot \textbf{w}_i -\end{align*} - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f - ! Initialisation - x = 0d0 - - ! Calculation of the step x - - ! Normal version - if (.not. absolute_eig) then - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) - enddo - endif - enddo - - ! Version to use the absolute value of the eigenvalues - else - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) - enddo - endif - enddo - - endif - - double precision :: beta, norm_x - - ! Test - ! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) - ! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first - ! eigenvectors multiply by a constant to ensure the condition - ! ||x(lambda=-e_val(1))|| = delta and escape the saddle point - if (avoid_saddle .and. e_val(1) < - thresh_eig) then - if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then - - ! norm of x - norm_x = dnrm2(n,x,1) - - ! Computes the coefficient for the w_1 - beta = delta**2 - norm_x**2 - - ! Updates the step x - x = x + W(:,1) * dsqrt(beta) - - ! Recomputes the norm to check - norm_x = dnrm2(n,x,1) - - print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' - print*, '||x||', norm_x - endif - endif -#+END_SRC - -*** Transformation of x - -x is a vector of size n, so it can be write as a m by m -antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". - - #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -! ! Step transformation vector -> matrix -! ! Vector with n element -> mo_num by mo_num matrix -! do j = 1, m -! do i = 1, m -! if (i>j) then -! call mat_to_vec_index(i,j,k) -! m_x(i,j) = x(k) -! else -! m_x(i,j) = 0d0 -! endif -! enddo -! enddo -! -! ! Antisymmetrization of the previous matrix -! do j = 1, m -! do i = 1, m -! if (i lower diagonal matrix (p,q), p > q - -! If a matrix is antisymmetric it can be reshaped as a vector. And the -! vector can be reshaped as an antisymmetric matrix - -! \begin{align*} -! \begin{pmatrix} -! 0 & -1 & -2 & -4 \\ -! 1 & 0 & -3 & -5 \\ -! 2 & 3 & 0 & -6 \\ -! 4 & 5 & 6 & 0 -! \end{pmatrix} -! \Leftrightarrow -! \begin{pmatrix} -! 1 & 2 & 3 & 4 & 5 & 6 -! \end{pmatrix} -! \end{align*} - -! !!! Here the algorithm only work for the lower diagonal !!! - -! Input: -! | i | integer | index in the vector | - -! Ouput: -! | p,q | integer | corresponding indexes in the lower diagonal of a matrix | -! | | | p > q, | -! | | | p -> row, | -! | | | q -> column | - - -subroutine vec_to_mat_index(i,p,q) - - include 'pi.h' - - BEGIN_DOC - ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing - ! its index i a vector - END_DOC - - implicit none - - ! Variables - - ! in - integer,intent(in) :: i - - ! out - integer, intent(out) :: p,q - - ! internal - integer :: a,b - double precision :: da - - da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) - a = INT(da) - if ((a*(a-1))/2==i) then - p = a-1 - else - p = a - endif - b = p*(p-1)/2 - - ! Matrix element indexes - p = p + 1 - q = i - b - -end subroutine diff --git a/src/utils_trust_region/vec_to_mat_index.org b/src/utils_trust_region/vec_to_mat_index.org deleted file mode 100644 index 0a09fa86..00000000 --- a/src/utils_trust_region/vec_to_mat_index.org +++ /dev/null @@ -1,72 +0,0 @@ -* Vector to matrix indexes - -*Compute the indexes p,q of a matrix element with the vector index i* - -Vector (i) -> lower diagonal matrix (p,q), p > q - -If a matrix is antisymmetric it can be reshaped as a vector. And the -vector can be reshaped as an antisymmetric matrix - -\begin{align*} -\begin{pmatrix} -0 & -1 & -2 & -4 \\ -1 & 0 & -3 & -5 \\ -2 & 3 & 0 & -6 \\ -4 & 5 & 6 & 0 -\end{pmatrix} -\Leftrightarrow -\begin{pmatrix} -1 & 2 & 3 & 4 & 5 & 6 -\end{pmatrix} -\end{align*} - -!!! Here the algorithm only work for the lower diagonal !!! - -Input: -| i | integer | index in the vector | - -Ouput: -| p,q | integer | corresponding indexes in the lower diagonal of a matrix | -| | | p > q, | -| | | p -> row, | -| | | q -> column | - -#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_index.irp.f -subroutine vec_to_mat_index(i,p,q) - - include 'pi.h' - - BEGIN_DOC - ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing - ! its index i a vector - END_DOC - - implicit none - - ! Variables - - ! in - integer,intent(in) :: i - - ! out - integer, intent(out) :: p,q - - ! internal - integer :: a,b - double precision :: da - - da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) - a = INT(da) - if ((a*(a-1))/2==i) then - p = a-1 - else - p = a - endif - b = p*(p-1)/2 - - ! Matrix element indexes - p = p + 1 - q = i - b - -end subroutine -#+END_SRC diff --git a/src/utils_trust_region/vec_to_mat_v2.irp.f b/src/utils_trust_region/vec_to_mat_v2.irp.f deleted file mode 100644 index 9140b8d3..00000000 --- a/src/utils_trust_region/vec_to_mat_v2.irp.f +++ /dev/null @@ -1,39 +0,0 @@ -! Vect to antisymmetric matrix using mat_to_vec_index - -! Vector to antisymmetric matrix transformation using mat_to_vec_index -! subroutine. - -! Can be done in OMP (for the first part and with omp critical for the second) - - -subroutine vec_to_mat_v2(n,m,v_x,m_x) - - BEGIN_DOC - ! Vector to antisymmetric matrix - END_DOC - - implicit none - - integer, intent(in) :: n,m - double precision, intent(in) :: v_x(n) - double precision, intent(out) :: m_x(m,m) - - integer :: i,j,k - - ! 1D -> 2D lower diagonal - m_x = 0d0 - do j = 1, m - 1 - do i = j + 1, m - call mat_to_vec_index(i,j,k) - m_x(i,j) = v_x(k) - enddo - enddo - - ! Antisym - do i = 1, m - 1 - do j = i + 1, m - m_x(i,j) = - m_x(j,i) - enddo - enddo - -end diff --git a/src/utils_trust_region/vec_to_mat_v2.org b/src/utils_trust_region/vec_to_mat_v2.org deleted file mode 100644 index 4e358a88..00000000 --- a/src/utils_trust_region/vec_to_mat_v2.org +++ /dev/null @@ -1,40 +0,0 @@ -* Vect to antisymmetric matrix using mat_to_vec_index - -Vector to antisymmetric matrix transformation using mat_to_vec_index -subroutine. - -Can be done in OMP (for the first part and with omp critical for the second) - -#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f -subroutine vec_to_mat_v2(n,m,v_x,m_x) - - BEGIN_DOC - ! Vector to antisymmetric matrix - END_DOC - - implicit none - - integer, intent(in) :: n,m - double precision, intent(in) :: v_x(n) - double precision, intent(out) :: m_x(m,m) - - integer :: i,j,k - - ! 1D -> 2D lower diagonal - m_x = 0d0 - do j = 1, m - 1 - do i = j + 1, m - call mat_to_vec_index(i,j,k) - m_x(i,j) = v_x(k) - enddo - enddo - - ! Antisym - do i = 1, m - 1 - do j = i + 1, m - m_x(i,j) = - m_x(j,i) - enddo - enddo - -end -#+END_SRC From c687569bf488f6c5e93f7c56c2f415a4b22686be Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 18 Apr 2023 13:01:25 +0200 Subject: [PATCH 53/87] create utils trust region --- src/utils_trust_region/EZFIO.cfg | 89 + src/utils_trust_region/NEED | 1 + src/utils_trust_region/README.md | 11 + src/utils_trust_region/algo_trust.irp.f | 248 +++ .../apply_mo_rotation.irp.f | 85 + src/utils_trust_region/mat_to_vec_index.irp.f | 61 + src/utils_trust_region/org/TANGLE_org_mode.sh | 7 + src/utils_trust_region/org/algo_trust.org | 593 ++++++ .../org/apply_mo_rotation.org | 86 + .../org/mat_to_vec_index.org | 63 + .../org/rotation_matrix.org | 452 +++++ .../org/rotation_matrix_iterative.org | 136 ++ .../org/sub_to_full_rotation_matrix.org | 65 + .../org/trust_region_expected_e.org | 128 ++ .../org/trust_region_optimal_lambda.org | 1661 +++++++++++++++++ .../org/trust_region_rho.org | 122 ++ .../org/trust_region_step.org | 759 ++++++++ .../org/vec_to_mat_index.org | 72 + src/utils_trust_region/org/vec_to_mat_v2.org | 40 + src/utils_trust_region/pi.h | 2 + src/utils_trust_region/rotation_matrix.irp.f | 441 +++++ .../rotation_matrix_iterative.irp.f | 134 ++ .../sub_to_full_rotation_matrix.irp.f | 64 + .../trust_region_expected_e.irp.f | 126 ++ .../trust_region_optimal_lambda.irp.f | 1651 ++++++++++++++++ src/utils_trust_region/trust_region_rho.irp.f | 120 ++ .../trust_region_step.irp.f | 749 ++++++++ src/utils_trust_region/vec_to_mat_index.irp.f | 71 + src/utils_trust_region/vec_to_mat_v2.irp.f | 39 + 29 files changed, 8076 insertions(+) create mode 100644 src/utils_trust_region/EZFIO.cfg create mode 100644 src/utils_trust_region/NEED create mode 100644 src/utils_trust_region/README.md create mode 100644 src/utils_trust_region/algo_trust.irp.f create mode 100644 src/utils_trust_region/apply_mo_rotation.irp.f create mode 100644 src/utils_trust_region/mat_to_vec_index.irp.f create mode 100755 src/utils_trust_region/org/TANGLE_org_mode.sh create mode 100644 src/utils_trust_region/org/algo_trust.org create mode 100644 src/utils_trust_region/org/apply_mo_rotation.org create mode 100644 src/utils_trust_region/org/mat_to_vec_index.org create mode 100644 src/utils_trust_region/org/rotation_matrix.org create mode 100644 src/utils_trust_region/org/rotation_matrix_iterative.org create mode 100644 src/utils_trust_region/org/sub_to_full_rotation_matrix.org create mode 100644 src/utils_trust_region/org/trust_region_expected_e.org create mode 100644 src/utils_trust_region/org/trust_region_optimal_lambda.org create mode 100644 src/utils_trust_region/org/trust_region_rho.org create mode 100644 src/utils_trust_region/org/trust_region_step.org create mode 100644 src/utils_trust_region/org/vec_to_mat_index.org create mode 100644 src/utils_trust_region/org/vec_to_mat_v2.org create mode 100644 src/utils_trust_region/pi.h create mode 100644 src/utils_trust_region/rotation_matrix.irp.f create mode 100644 src/utils_trust_region/rotation_matrix_iterative.irp.f create mode 100644 src/utils_trust_region/sub_to_full_rotation_matrix.irp.f create mode 100644 src/utils_trust_region/trust_region_expected_e.irp.f create mode 100644 src/utils_trust_region/trust_region_optimal_lambda.irp.f create mode 100644 src/utils_trust_region/trust_region_rho.irp.f create mode 100644 src/utils_trust_region/trust_region_step.irp.f create mode 100644 src/utils_trust_region/vec_to_mat_index.irp.f create mode 100644 src/utils_trust_region/vec_to_mat_v2.irp.f diff --git a/src/utils_trust_region/EZFIO.cfg b/src/utils_trust_region/EZFIO.cfg new file mode 100644 index 00000000..9c9f6248 --- /dev/null +++ b/src/utils_trust_region/EZFIO.cfg @@ -0,0 +1,89 @@ +[thresh_delta] +type: double precision +doc: Threshold to stop the optimization if the radius of the trust region delta < thresh_delta +interface: ezfio,provider,ocaml +default: 1.e-10 + +[thresh_rho] +type: double precision +doc: Threshold for the step acceptance in the trust region algorithm, if (rho .geq. thresh_rho) the step is accepted, else the step is cancelled and a smaller step is tried until (rho .geq. thresh_rho) +interface: ezfio,provider,ocaml +default: 0.1 + +[thresh_eig] +type: double precision +doc: Threshold to consider when an eigenvalue is 0 in the trust region algorithm +interface: ezfio,provider,ocaml +default: 1.e-12 + +[thresh_model] +type: double precision +doc: If if ABS(criterion - criterion_model) < thresh_model, the program exit the trust region algorithm +interface: ezfio,provider,ocaml +default: 1.e-12 + +[absolute_eig] +type: logical +doc: If True, the algorithm replace the eigenvalues of the hessian by their absolute value to compute the step (in the trust region) +interface: ezfio,provider,ocaml +default: false + +[thresh_wtg] +type: double precision +doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is equal to 0. Must be smaller than thresh_eig by several order of magnitude to avoid numerical problem. If the research of the optimal lambda cannot reach the condition (||x|| .eq. delta) because (||x|| .lt. delta), the reason might be that thresh_wtg is too big or/and thresh_eig is too small +interface: ezfio,provider,ocaml +default: 1.e-6 + +[thresh_wtg2] +type: double precision +doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is 0 in the case of avoid_saddle .eq. true. There is no particular reason to put a different value that thresh_wtg, but it can be useful one day +interface: ezfio,provider,ocaml +default: 1.e-6 + +[avoid_saddle] +type: logical +doc: Test to avoid saddle point, active if true +interface: ezfio,provider,ocaml +default: false + +[version_avoid_saddle] +type: integer +doc: cf. trust region, not stable +interface: ezfio,provider,ocaml +default: 3 + +[thresh_rho_2] +type: double precision +doc: Threshold for the step acceptance for the research of lambda in the trust region algorithm, if (rho_2 .geq. thresh_rho_2) the step is accepted, else the step is rejected +interface: ezfio,provider,ocaml +default: 0.1 + +[thresh_cc] +type: double precision +doc: Threshold to stop the research of the optimal lambda in the trust region algorithm when (dabs(1d0-||x||^2/delta^2) < thresh_cc) +interface: ezfio,provider,ocaml +default: 1.e-6 + +[thresh_model_2] +type: double precision +doc: if (ABS(criterion - criterion_model) < thresh_model_2), i.e., the difference between the actual criterion and the predicted next criterion, during the research of the optimal lambda in the trust region algorithm it prints a warning +interface: ezfio,provider,ocaml +default: 1.e-12 + +[version_lambda_search] +type: integer +doc: Research of the optimal lambda in the trust region algorithm to constrain the norm of the step by solving: 1 -> ||x||^2 - delta^2 .eq. 0, 2 -> 1/||x||^2 - 1/delta^2 .eq. 0 +interface: ezfio,provider,ocaml +default: 2 + +[nb_it_max_lambda] +type: integer +doc: Maximal number of iterations for the research of the optimal lambda in the trust region algorithm +interface: ezfio,provider,ocaml +default: 100 + +[nb_it_max_pre_search] +type: integer +doc: Maximal number of iterations for the pre-research of the optimal lambda in the trust region algorithm +interface: ezfio,provider,ocaml +default: 40 diff --git a/src/utils_trust_region/NEED b/src/utils_trust_region/NEED new file mode 100644 index 00000000..1a65ce38 --- /dev/null +++ b/src/utils_trust_region/NEED @@ -0,0 +1 @@ +hartree_fock diff --git a/src/utils_trust_region/README.md b/src/utils_trust_region/README.md new file mode 100644 index 00000000..72bfefef --- /dev/null +++ b/src/utils_trust_region/README.md @@ -0,0 +1,11 @@ +# Utils trust region + +The documentation can be found in the org files. + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` diff --git a/src/utils_trust_region/algo_trust.irp.f b/src/utils_trust_region/algo_trust.irp.f new file mode 100644 index 00000000..933d8eff --- /dev/null +++ b/src/utils_trust_region/algo_trust.irp.f @@ -0,0 +1,248 @@ +! Algorithm for the trust region + +! step_in_trust_region: +! Computes the step in the trust region (delta) +! (automatically sets at the iteration 0 and which evolves during the +! process in function of the evolution of rho). The step is computing by +! constraining its norm with a lagrange multiplier. +! Since the calculation of the step is based on the Newton method, an +! estimation of the gain in energy is given using the Taylors series +! truncated at the second order (criterion_model). +! If (DABS(criterion-criterion_model) < 1d-12) then +! must_exit = .True. +! else +! must_exit = .False. + +! This estimation of the gain in energy is used by +! is_step_cancel_trust_region to say if the step is accepted or cancelled. + +! If the step must be cancelled, the calculation restart from the same +! hessian and gradient and recomputes the step but in a smaller trust +! region and so on until the step is accepted. If the step is accepted +! the hessian and the gradient are recomputed to produce a new step. + +! Example: + + +! !### Initialization ### +! delta = 0d0 +! nb_iter = 0 ! Must start at 0 !!! +! rho = 0.5d0 +! not_converged = .True. +! +! ! ### TODO ### +! ! Compute the criterion before the loop +! call #your_criterion(prev_criterion) +! +! do while (not_converged) +! ! ### TODO ## +! ! Call your gradient +! ! Call you hessian +! call #your_gradient(v_grad) (1D array) +! call #your_hessian(H) (2D array) +! +! ! ### TODO ### +! ! Diagonalization of the hessian +! call diagonalization_hessian(n,H,e_val,w) +! +! cancel_step = .True. ! To enter in the loop just after +! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho +! do while (cancel_step) +! +! ! Hessian,gradient,Criterion -> x +! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) +! +! if (must_exit) then +! ! ### Message ### +! ! if step_in_trust_region sets must_exit on true for numerical reasons +! print*,'algo_trust1 sends the message : Exit' +! !### exit ### +! endif +! +! !### TODO ### +! ! Compute x -> m_x +! ! Compute m_x -> R +! ! Apply R and keep the previous MOs... +! ! Update/touch +! ! Compute the new criterion/energy -> criterion +! +! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) +! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) +! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) +! +! TOUCH #your_variables +! +! call #your_criterion(criterion) +! +! ! Criterion -> step accepted or rejected +! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) +! +! ! ### TODO ### +! !if (cancel_step) then +! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) +! !endif +! #if (cancel_step) then +! #mo_coef = prev_mos +! #endif +! +! enddo +! +! !call save_mos() !### depend of the time for 1 iteration +! +! ! To exit the external loop if must_exit = .True. +! if (must_exit) then +! !### exit ### +! endif +! +! ! Step accepted, nb iteration + 1 +! nb_iter = nb_iter + 1 +! +! ! ### TODO ### +! !if (###Conditions###) then +! ! no_converged = .False. +! !endif +! #if (#your_conditions) then +! # not_converged = .False. +! #endif +! +! enddo + + + +! Variables: + +! Input: +! | n | integer | m*(m-1)/2 | +! | m | integer | number of mo in the mo_class | +! | H(n,n) | double precision | Hessian | +! | v_grad(n) | double precision | Gradient | +! | W(n,n) | double precision | Eigenvectors of the hessian | +! | e_val(n) | double precision | Eigenvalues of the hessian | +! | criterion | double precision | Actual criterion | +! | prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | +! | rho | double precision | Given by is_step_cancel_trus_region | +! | | | Agreement between the real function and the Taylor series (2nd order) | +! | nb_iter | integer | Actual number of iterations | + +! Input/output: +! | delta | double precision | Radius of the trust region | + +! Output: +! | criterion_model | double precision | Predicted criterion after the rotation | +! | x(n) | double precision | Step | +! | must_exit | logical | If the program must exit the loop | + + +subroutine trust_region_step_w_expected_e(n,n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the step and the expected criterion/energy after the step + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n,n2, nb_iter + double precision, intent(in) :: H(n,n2), W(n,n2), v_grad(n) + double precision, intent(in) :: rho, prev_criterion + + ! inout + double precision, intent(inout) :: delta, e_val(n) + + ! out + double precision, intent(out) :: criterion_model, x(n) + logical, intent(out) :: must_exit + + ! internal + integer :: info + + must_exit = .False. + + call trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,W,x,delta) + + call trust_region_expected_e(n,n2,v_grad,H,x,prev_criterion,criterion_model) + + ! exit if DABS(prev_criterion - criterion_model) < 1d-12 + if (DABS(prev_criterion - criterion_model) < thresh_model) then + print*,'' + print*,'###############################################################################' + print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' + print*,'###############################################################################' + print*,'' + must_exit = .True. + endif + + if (delta < thresh_delta) then + print*,'' + print*,'##############################################' + print*,'Delta <', thresh_delta, 'stop the trust region' + print*,'##############################################' + print*,'' + must_exit = .True. + endif + + ! Add after the call to this subroutine, a statement: + ! "if (must_exit) then + ! exit + ! endif" + ! in order to exit the optimization loop + +end subroutine + + + +! Variables: + +! Input: +! | nb_iter | integer | actual number of iterations | +! | prev_criterion | double precision | criterion before the application of the step x | +! | criterion | double precision | criterion after the application of the step x | +! | criterion_model | double precision | predicted criterion after the application of x | + +! Output: +! | rho | double precision | Agreement between the predicted criterion and the real new criterion | +! | cancel_step | logical | If the step must be cancelled | + + +subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) + + include 'pi.h' + + !BEGIN_DOC + ! Compute if the step should be cancelled + !END_DOC + + implicit none + + ! in + double precision, intent(in) :: prev_criterion, criterion, criterion_model + + ! inout + integer, intent(inout) :: nb_iter + + ! out + logical, intent(out) :: cancel_step + double precision, intent(out) :: rho + + ! Computes rho + call trust_region_rho(prev_criterion,criterion,criterion_model,rho) + + if (nb_iter == 0) then + nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled + endif + + ! If rho < thresh_rho -> give something in output to cancel the step + if (rho >= thresh_rho) then !0.1d0) then + ! The step is accepted + cancel_step = .False. + else + ! The step is rejected + cancel_step = .True. + print*, '***********************' + print*, 'Step cancel : rho <', thresh_rho + print*, '***********************' + endif + +end subroutine diff --git a/src/utils_trust_region/apply_mo_rotation.irp.f b/src/utils_trust_region/apply_mo_rotation.irp.f new file mode 100644 index 00000000..a313769d --- /dev/null +++ b/src/utils_trust_region/apply_mo_rotation.irp.f @@ -0,0 +1,85 @@ +! Apply MO rotation +! Subroutine to apply the rotation matrix to the coefficients of the +! MOs. + +! New MOs = Old MOs . Rotation matrix + +! *Compute the new MOs with the previous MOs and a rotation matrix* + +! Provided: +! | mo_num | integer | number of MOs | +! | ao_num | integer | number of AOs | +! | mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | + +! Intent in: +! | R(mo_num,mo_num) | double precision | rotation matrix | + +! Intent out: +! | prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | + +! Internal: +! | new_mos(ao_num,mo_num) | double precision | MOs after the rotation | +! | i,j | integer | indexes | + +subroutine apply_mo_rotation(R,prev_mos) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the new MOs knowing the rotation matrix + !END_DOC + + implicit none + + ! Variables + + ! in + double precision, intent(in) :: R(mo_num,mo_num) + + ! out + double precision, intent(out) :: prev_mos(ao_num,mo_num) + + ! internal + double precision, allocatable :: new_mos(:,:) + integer :: i,j + double precision :: t1,t2,t3 + + print*,'' + print*,'---apply_mo_rotation---' + + call wall_time(t1) + + ! Allocation + allocate(new_mos(ao_num,mo_num)) + + ! Calculation + + ! Product of old MOs (mo_coef) by Rotation matrix (R) + call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) + + prev_mos = mo_coef + mo_coef = new_mos + + if (debug) then + print*,'New mo_coef : ' + do i = 1, mo_num + write(*,'(100(F10.5))') mo_coef(i,:) + enddo + endif + + ! Save the new MOs and change the label + mo_label = 'MCSCF' + !call save_mos + call ezfio_set_determinants_mo_label(mo_label) + + !print*,'Done, MOs saved' + + ! Deallocation, end + deallocate(new_mos) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in apply mo rotation:', t3 + print*,'---End apply_mo_rotation---' + +end subroutine diff --git a/src/utils_trust_region/mat_to_vec_index.irp.f b/src/utils_trust_region/mat_to_vec_index.irp.f new file mode 100644 index 00000000..35e12232 --- /dev/null +++ b/src/utils_trust_region/mat_to_vec_index.irp.f @@ -0,0 +1,61 @@ +! Matrix to vector index + +! *Compute the index i of a vector element from the indexes p,q of a +! matrix element* + +! Lower diagonal matrix (p,q), p > q -> vector (i) + +! If a matrix is antisymmetric it can be reshaped as a vector. And the +! vector can be reshaped as an antisymmetric matrix + +! \begin{align*} +! \begin{pmatrix} +! 0 & -1 & -2 & -4 \\ +! 1 & 0 & -3 & -5 \\ +! 2 & 3 & 0 & -6 \\ +! 4 & 5 & 6 & 0 +! \end{pmatrix} +! \Leftrightarrow +! \begin{pmatrix} +! 1 & 2 & 3 & 4 & 5 & 6 +! \end{pmatrix} +! \end{align*} + +! !!! Here the algorithm only work for the lower diagonal !!! + +! Input: +! | p,q | integer | indexes of a matrix element in the lower diagonal | +! | | | p > q, q -> column | +! | | | p -> row, | +! | | | q -> column | + +! Input: +! | i | integer | corresponding index in the vector | + + +subroutine mat_to_vec_index(p,q,i) + + include 'pi.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: p,q + + ! out + integer, intent(out) :: i + + ! internal + integer :: a,b + double precision :: da + + ! Calculation + + a = p-1 + b = a*(a-1)/2 + + i = q+b + +end subroutine diff --git a/src/utils_trust_region/org/TANGLE_org_mode.sh b/src/utils_trust_region/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/utils_trust_region/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/utils_trust_region/org/algo_trust.org b/src/utils_trust_region/org/algo_trust.org new file mode 100644 index 00000000..01e99c29 --- /dev/null +++ b/src/utils_trust_region/org/algo_trust.org @@ -0,0 +1,593 @@ +* Algorithm for the trust region + +step_in_trust_region: +Computes the step in the trust region (delta) +(automatically sets at the iteration 0 and which evolves during the +process in function of the evolution of rho). The step is computing by +constraining its norm with a lagrange multiplier. +Since the calculation of the step is based on the Newton method, an +estimation of the gain in energy is given using the Taylors series +truncated at the second order (criterion_model). +If (DABS(criterion-criterion_model) < 1d-12) then + must_exit = .True. +else + must_exit = .False. + +This estimation of the gain in energy is used by +is_step_cancel_trust_region to say if the step is accepted or cancelled. + +If the step must be cancelled, the calculation restart from the same +hessian and gradient and recomputes the step but in a smaller trust +region and so on until the step is accepted. If the step is accepted +the hessian and the gradient are recomputed to produce a new step. + +Example: + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +! !### Initialization ### +! delta = 0d0 +! nb_iter = 0 ! Must start at 0 !!! +! rho = 0.5d0 +! not_converged = .True. +! +! ! ### TODO ### +! ! Compute the criterion before the loop +! call #your_criterion(prev_criterion) +! +! do while (not_converged) +! ! ### TODO ## +! ! Call your gradient +! ! Call you hessian +! call #your_gradient(v_grad) (1D array) +! call #your_hessian(H) (2D array) +! +! ! ### TODO ### +! ! Diagonalization of the hessian +! call diagonalization_hessian(n,H,e_val,w) +! +! cancel_step = .True. ! To enter in the loop just after +! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho +! do while (cancel_step) +! +! ! Hessian,gradient,Criterion -> x +! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) +! +! if (must_exit) then +! ! ### Message ### +! ! if step_in_trust_region sets must_exit on true for numerical reasons +! print*,'algo_trust1 sends the message : Exit' +! !### exit ### +! endif +! +! !### TODO ### +! ! Compute x -> m_x +! ! Compute m_x -> R +! ! Apply R and keep the previous MOs... +! ! Update/touch +! ! Compute the new criterion/energy -> criterion +! +! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) +! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) +! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) +! +! TOUCH #your_variables +! +! call #your_criterion(criterion) +! +! ! Criterion -> step accepted or rejected +! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) +! +! ! ### TODO ### +! !if (cancel_step) then +! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) +! !endif +! #if (cancel_step) then +! #mo_coef = prev_mos +! #endif +! +! enddo +! +! !call save_mos() !### depend of the time for 1 iteration +! +! ! To exit the external loop if must_exit = .True. +! if (must_exit) then +! !### exit ### +! endif +! +! ! Step accepted, nb iteration + 1 +! nb_iter = nb_iter + 1 +! +! ! ### TODO ### +! !if (###Conditions###) then +! ! no_converged = .False. +! !endif +! #if (#your_conditions) then +! # not_converged = .False. +! #endif +! +! enddo +#+END_SRC + +Variables: + +Input: +| n | integer | m*(m-1)/2 | +| m | integer | number of mo in the mo_class | +| H(n,n) | double precision | Hessian | +| v_grad(n) | double precision | Gradient | +| W(n,n) | double precision | Eigenvectors of the hessian | +| e_val(n) | double precision | Eigenvalues of the hessian | +| criterion | double precision | Actual criterion | +| prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | +| rho | double precision | Given by is_step_cancel_trus_region | +| | | Agreement between the real function and the Taylor series (2nd order) | +| nb_iter | integer | Actual number of iterations | + +Input/output: +| delta | double precision | Radius of the trust region | + +Output: +| criterion_model | double precision | Predicted criterion after the rotation | +| x(n) | double precision | Step | +| must_exit | logical | If the program must exit the loop | + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +subroutine trust_region_step_w_expected_e(n,n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the step and the expected criterion/energy after the step + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n,n2, nb_iter + double precision, intent(in) :: H(n,n2), W(n,n2), v_grad(n) + double precision, intent(in) :: rho, prev_criterion + + ! inout + double precision, intent(inout) :: delta, e_val(n) + + ! out + double precision, intent(out) :: criterion_model, x(n) + logical, intent(out) :: must_exit + + ! internal + integer :: info + + must_exit = .False. + + call trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,W,x,delta) + + call trust_region_expected_e(n,n2,v_grad,H,x,prev_criterion,criterion_model) + + ! exit if DABS(prev_criterion - criterion_model) < 1d-12 + if (DABS(prev_criterion - criterion_model) < thresh_model) then + print*,'' + print*,'###############################################################################' + print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' + print*,'###############################################################################' + print*,'' + must_exit = .True. + endif + + if (delta < thresh_delta) then + print*,'' + print*,'##############################################' + print*,'Delta <', thresh_delta, 'stop the trust region' + print*,'##############################################' + print*,'' + must_exit = .True. + endif + + ! Add after the call to this subroutine, a statement: + ! "if (must_exit) then + ! exit + ! endif" + ! in order to exit the optimization loop + +end subroutine +#+END_SRC + +Variables: + +Input: +| nb_iter | integer | actual number of iterations | +| prev_criterion | double precision | criterion before the application of the step x | +| criterion | double precision | criterion after the application of the step x | +| criterion_model | double precision | predicted criterion after the application of x | + +Output: +| rho | double precision | Agreement between the predicted criterion and the real new criterion | +| cancel_step | logical | If the step must be cancelled | + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) + + include 'pi.h' + + !BEGIN_DOC + ! Compute if the step should be cancelled + !END_DOC + + implicit none + + ! in + double precision, intent(in) :: prev_criterion, criterion, criterion_model + + ! inout + integer, intent(inout) :: nb_iter + + ! out + logical, intent(out) :: cancel_step + double precision, intent(out) :: rho + + ! Computes rho + call trust_region_rho(prev_criterion,criterion,criterion_model,rho) + + if (nb_iter == 0) then + nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled + endif + + ! If rho < thresh_rho -> give something in output to cancel the step + if (rho >= thresh_rho) then !0.1d0) then + ! The step is accepted + cancel_step = .False. + else + ! The step is rejected + cancel_step = .True. + print*, '***********************' + print*, 'Step cancel : rho <', thresh_rho + print*, '***********************' + endif + +end subroutine +#+END_SRC + +** Template for MOs +#+BEGIN_SRC f90 :comments org :tangle trust_region_template_mos.txt +subroutine algo_trust_template(tmp_n, tmp_list_size, tmp_list) + + implicit none + + ! Variables + + ! In + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + + ! Out + ! Rien ou un truc pour savoir si ça c'est bien passé + + ! Internal + double precision, allocatable :: e_val(:), W(:,:), tmp_R(:,:), R(:,:), tmp_x(:), tmp_m_x(:,:) + double precision, allocatable :: prev_mos(:,:) + double precision :: criterion, prev_criterion, criterion_model + double precision :: delta, rho + logical :: not_converged, cancel_step, must_exit, enforce_step_cancellation + integer :: nb_iter, info, nb_sub_iter + integer :: i,j,tmp_i,tmp_j + + allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n),tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Provide the criterion, but unnecessary because it's done + ! automatically + PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! Initialization + delta = 0d0 + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must start at 0.5 + not_converged = .True. ! Must be true + + ! Compute the criterion before the loop + prev_criterion = C_PROVIDER + + do while (not_converged) + + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! The new hessian and gradient are computed at the end of the previous iteration + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + cancel_step = .True. ! To enter in the loop just after + nb_sub_iter = 0 + + ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + + print*,'-----------------------------' + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + if (must_exit) then + ! if step_in_trust_region sets must_exit on true for numerical reasons + print*,'trust_region_step_w_expected_e sent the message : Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Forces the step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! touch mo_coef + call clear_mo_map ! Only if you are using the bi-electronic integrals + ! mo_coef becomes valid + ! And avoid the recomputation of the providers which depend of mo_coef + TOUCH mo_coef C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! To update the other parameters if needed + call #update_parameters() + + ! To enforce the program to provide new criterion after the update + ! of the parameters + FREE C_PROVIDER + PROVIDE C_PROVIDER + criterion = C_PROVIDER + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step ? + if (cancel_step) then + ! Replacement by the previous MOs + mo_coef = prev_mos + ! call save_mos() ! depends of the time for 1 iteration + + ! No need to clear_mo_map since we don't recompute the gradient and the hessian + ! mo_coef becomes valid + ! Avoid the recomputation of the providers which depend of mo_coef + TOUCH mo_coef H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER + else + ! The step is accepted: + ! criterion -> prev criterion + + ! The replacement "criterion -> prev criterion" is already done + ! in trust_region_rho, so if the criterion does not have a reason + ! to change, it will change nothing for the criterion and will + ! force the program to provide the new hessian, gradient and + ! convergence criterion for the next iteration. + ! But in the case of orbital optimization we diagonalize the CI + ! matrix after the "FREE" statement, so the criterion will change + + FREE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + prev_criterion = C_PROVIDER + + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + + ! call save_mos() ! depends of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! Provide the convergence criterion + ! Provide the gradient and the hessian for the next iteration + PROVIDE cc_PROVIDER + + ! To exit + if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then + not_converged = .False. + endif + + if (nb_iter > optimization_max_nb_iter) then + not_converged = .False. + endif + + if (delta < thresh_delta) then + not_converged = .False. + endif + + enddo + + ! Save the final MOs + call save_mos() + + ! Diagonalization of the hessian + ! (To see the eigenvalues at the end of the optimization) + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + deallocate(e_val, W, tmp_R, R, tmp_x, prev_mos) + +end +#+END_SRC + +** Cartesian version +#+BEGIN_SRC f90 :comments org :tangle trust_region_template_xyz.txt +subroutine algo_trust_cartesian_template(tmp_n) + + implicit none + + ! Variables + + ! In + integer, intent(in) :: tmp_n + + ! Out + ! Rien ou un truc pour savoir si ça c'est bien passé + + ! Internal + double precision, allocatable :: e_val(:), W(:,:), tmp_x(:) + double precision :: criterion, prev_criterion, criterion_model + double precision :: delta, rho + logical :: not_converged, cancel_step, must_exit + integer :: nb_iter, nb_sub_iter + integer :: i,j + + allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n)) + + PROVIDE C_PROVIDER X_PROVIDER H_PROVIDER g_PROVIDER + + ! Initialization + delta = 0d0 + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must start at 0.5 + not_converged = .True. ! Must be true + + ! Compute the criterion before the loop + prev_criterion = C_PROVIDER + + do while (not_converged) + + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + if (nb_iter > 0) then + PROVIDE H_PROVIDER g_PROVIDER + endif + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + cancel_step = .True. ! To enter in the loop just after + nb_sub_iter = 0 + + ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + + print*,'-----------------------------' + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + if (must_exit) then + ! if step_in_trust_region sets must_exit on true for numerical reasons + print*,'trust_region_step_w_expected_e sent the message : Exit' + exit + endif + + ! New coordinates, check the sign + X_PROVIDER = X_PROVIDER - tmp_x + + ! touch X_PROVIDER + TOUCH X_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! To update the other parameters if needed + call #update_parameters() + + ! New criterion + PROVIDE C_PROVIDER ! Unnecessary + criterion = C_PROVIDER + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancel the previous step + if (cancel_step) then + ! Replacement by the previous coordinates, check the sign + X_PROVIDER = X_PROVIDER + tmp_x + + ! Avoid the recomputation of the hessian and the gradient + TOUCH X_PROVIDER H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + PROVIDE cc_PROVIDER + + ! To exit + if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then + not_converged = .False. + endif + + if (nb_iter > optimization_max_nb_iter) then + not_converged = .False. + endif + + if (delta < thresh_delta) then + not_converged = .False. + endif + + enddo + + deallocate(e_val, W, tmp_x) + +end +#+END_SRC + +** Script template +#+BEGIN_SRC bash :tangle script_template_mos.sh +#!/bin/bash + +your_file= + +your_C_PROVIDER= +your_H_PROVIDER= +your_g_PROVIDER= +your_cc_PROVIDER= + +sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_mos.txt > $your_file +sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file +sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file +sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file +#+END_SRC + +#+BEGIN_SRC bash :tangle script_template_xyz.sh +#!/bin/bash + +your_file= + +your_C_PROVIDER= +your_X_PROVIDER= +your_H_PROVIDER= +your_g_PROVIDER= +your_cc_PROVIDER= + +sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_xyz.txt > $your_file +sed -i "s/X_PROVIDER/$your_X_PROVIDER/g" $your_file +sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file +sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file +sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file +#+END_SRC + diff --git a/src/utils_trust_region/org/apply_mo_rotation.org b/src/utils_trust_region/org/apply_mo_rotation.org new file mode 100644 index 00000000..955997e9 --- /dev/null +++ b/src/utils_trust_region/org/apply_mo_rotation.org @@ -0,0 +1,86 @@ +* Apply MO rotation +Subroutine to apply the rotation matrix to the coefficients of the +MOs. + +New MOs = Old MOs . Rotation matrix + +*Compute the new MOs with the previous MOs and a rotation matrix* + +Provided: +| mo_num | integer | number of MOs | +| ao_num | integer | number of AOs | +| mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | + +Intent in: +| R(mo_num,mo_num) | double precision | rotation matrix | + +Intent out: +| prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | + +Internal: +| new_mos(ao_num,mo_num) | double precision | MOs after the rotation | +| i,j | integer | indexes | +#+BEGIN_SRC f90 :comments org :tangle apply_mo_rotation.irp.f +subroutine apply_mo_rotation(R,prev_mos) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the new MOs knowing the rotation matrix + !END_DOC + + implicit none + + ! Variables + + ! in + double precision, intent(in) :: R(mo_num,mo_num) + + ! out + double precision, intent(out) :: prev_mos(ao_num,mo_num) + + ! internal + double precision, allocatable :: new_mos(:,:) + integer :: i,j + double precision :: t1,t2,t3 + + print*,'' + print*,'---apply_mo_rotation---' + + call wall_time(t1) + + ! Allocation + allocate(new_mos(ao_num,mo_num)) + + ! Calculation + + ! Product of old MOs (mo_coef) by Rotation matrix (R) + call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) + + prev_mos = mo_coef + mo_coef = new_mos + + if (debug) then + print*,'New mo_coef : ' + do i = 1, mo_num + write(*,'(100(F10.5))') mo_coef(i,:) + enddo + endif + + ! Save the new MOs and change the label + mo_label = 'MCSCF' + !call save_mos + call ezfio_set_determinants_mo_label(mo_label) + + !print*,'Done, MOs saved' + + ! Deallocation, end + deallocate(new_mos) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in apply mo rotation:', t3 + print*,'---End apply_mo_rotation---' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/org/mat_to_vec_index.org b/src/utils_trust_region/org/mat_to_vec_index.org new file mode 100644 index 00000000..50840584 --- /dev/null +++ b/src/utils_trust_region/org/mat_to_vec_index.org @@ -0,0 +1,63 @@ +* Matrix to vector index + +*Compute the index i of a vector element from the indexes p,q of a +matrix element* + +Lower diagonal matrix (p,q), p > q -> vector (i) + +If a matrix is antisymmetric it can be reshaped as a vector. And the +vector can be reshaped as an antisymmetric matrix + +\begin{align*} +\begin{pmatrix} +0 & -1 & -2 & -4 \\ +1 & 0 & -3 & -5 \\ +2 & 3 & 0 & -6 \\ +4 & 5 & 6 & 0 +\end{pmatrix} +\Leftrightarrow +\begin{pmatrix} +1 & 2 & 3 & 4 & 5 & 6 +\end{pmatrix} +\end{align*} + +!!! Here the algorithm only work for the lower diagonal !!! + +Input: +| p,q | integer | indexes of a matrix element in the lower diagonal | +| | | p > q, q -> column | +| | | p -> row, | +| | | q -> column | + +Input: +| i | integer | corresponding index in the vector | + +#+BEGIN_SRC f90 :comments org :tangle mat_to_vec_index.irp.f +subroutine mat_to_vec_index(p,q,i) + + include 'pi.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: p,q + + ! out + integer, intent(out) :: i + + ! internal + integer :: a,b + double precision :: da + + ! Calculation + + a = p-1 + b = a*(a-1)/2 + + i = q+b + +end subroutine +#+END_SRC + diff --git a/src/utils_trust_region/org/rotation_matrix.org b/src/utils_trust_region/org/rotation_matrix.org new file mode 100644 index 00000000..3b2ff437 --- /dev/null +++ b/src/utils_trust_region/org/rotation_matrix.org @@ -0,0 +1,452 @@ +* Rotation matrix + +*Build a rotation matrix from an antisymmetric matrix* + +Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : +$$ +\textbf{R}=\exp(\textbf{A}) +$$ + +So : +\begin{align*} +\textbf{R}=& \exp(\textbf{A}) \\ +=& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ +=& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} +\end{align*} + +With : +$\textbf{W}$ : eigenvectors of $\textbf{A}^2$ +$\tau$ : $\sqrt{-x}$ +$x$ : eigenvalues of $\textbf{A}^2$ + +Input: +| A(n,n) | double precision | antisymmetric matrix | +| n | integer | number of columns of the A matrix | +| LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | +| LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | + +Output: +| R(n,n) | double precision | Rotation matrix | +| info | integer | if info = 0, the execution is successful | +| | | if info = k, the k-th parameter has an illegal value | +| | | if info = -k, the algorithm failed | + +Internal: +| B(n,n) | double precision | B = A.A | +| work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | +| lwork | integer | dimension of the syev work array >= max(1, 3n-1) | +| W(n,n) | double precision | eigenvectors of B | +| e_val(n) | double precision | eigenvalues of B | +| m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | +| cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | +| sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | +| tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | +| part_1(n,n) | double precision | matrix W.cos_tau.W^t | +| part_1a(n,n) | double precision | matrix cos_tau.W^t | +| part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | +| part_2a(n,n) | double precision | matrix W^t.A | +| part_2b(n,n) | double precision | matrix sin_tau.W^t.A | +| part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | +| RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | +| norm | integer | norm of R.R^t-1, must be equal to 0 | +| i,j | integer | indexes | + +Functions: +| dnrm2 | double precision | Lapack function, compute the norm of a matrix | +| disnan | logical | Lapack function, check if an element is NaN | + + +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f +subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) + + implicit none + + !BEGIN_DOC + ! Rotation matrix to rotate the molecular orbitals. + ! If the rotation is too large the transformation is not unitary and must be cancelled. + !END_DOC + + include 'pi.h' + + ! Variables + + ! in + integer, intent(in) :: n,LDA,LDR + double precision, intent(inout) :: A(LDA,n) + + ! out + double precision, intent(out) :: R(LDR,n) + integer, intent(out) :: info + logical, intent(out) :: enforce_step_cancellation + + ! internal + double precision, allocatable :: B(:,:) + double precision, allocatable :: work(:,:) + double precision, allocatable :: W(:,:), e_val(:) + double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) + double precision, allocatable :: part_1(:,:),part_1a(:,:) + double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) + double precision, allocatable :: RR_t(:,:) + integer :: i,j + integer :: info2, lwork ! for dsyev + double precision :: norm, max_elem, max_elem_A, t1,t2,t3 + + ! function + double precision :: dnrm2 + logical :: disnan + + print*,'' + print*,'---rotation_matrix---' + + call wall_time(t1) + + ! Allocation + allocate(B(n,n)) + allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) + allocate(W(n,n),e_val(n)) + allocate(part_1(n,n),part_1a(n,n)) + allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) + allocate(RR_t(n,n)) +#+END_SRC + +** Pre-conditions +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Initialization + info=0 + enforce_step_cancellation = .False. + + ! Size of matrix A must be at least 1 by 1 + if (n<1) then + info = 3 + print*, 'WARNING: invalid parameter 5' + print*, 'n<1' + return + endif + + ! Leading dimension of A must be >= n + if (LDA < n) then + info = 25 + print*, 'WARNING: invalid parameter 2 or 5' + print*, 'LDA < n' + return + endif + + ! Leading dimension of A must be >= n + if (LDR < n) then + info = 4 + print*, 'WARNING: invalid parameter 4' + print*, 'LDR < n' + return + endif + + ! Matrix elements of A must by non-NaN + do j = 1, n + do i = 1, n + if (disnan(A(i,j))) then + info=1 + print*, 'WARNING: invalid parameter 1' + print*, 'NaN element in A matrix' + return + endif + enddo + enddo + + do i = 1, n + if (A(i,i) /= 0d0) then + print*, 'WARNING: matrix A is not antisymmetric' + print*, 'Non 0 element on the diagonal', i, A(i,i) + call ABORT + endif + enddo + + do j = 1, n + do i = 1, n + if (A(i,j)+A(j,i)>1d-16) then + print*, 'WANRING: matrix A is not antisymmetric' + print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) + print*, 'diff:', A(i,j)+A(j,i) + call ABORT + endif + enddo + enddo + + ! Fix for too big elements ! bad idea better to cancel if the error is too big + !do j = 1, n + ! do i = 1, n + ! A(i,j) = mod(A(i,j),2d0*pi) + ! if (dabs(A(i,j)) > pi) then + ! A(i,j) = 0d0 + ! endif + ! enddo + !enddo + + max_elem_A = 0d0 + do j = 1, n + do i = 1, n + if (ABS(A(i,j)) > ABS(max_elem_A)) then + max_elem_A = A(i,j) + endif + enddo + enddo + !print*,'max element in A', max_elem_A + + if (ABS(max_elem_A) > 2 * pi) then + print*,'' + print*,'WARNING: ABS(max_elem_A) > 2 pi ' + print*,'' + endif + +#+END_SRC + +** Calculations + +*** B=A.A + - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ + - Diagonalization of $\textbf{B}$ + W, the eigenvectors + e_val, the eigenvalues + + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Compute B=A.A + + call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) + + ! Copy B in W, diagonalization will put the eigenvectors in W + W=B + + ! Diagonalization of B + ! Eigenvalues -> e_val + ! Eigenvectors -> W + lwork = 3*n-1 + allocate(work(lwork,n)) + + !print*,'Starting diagonalization ...' + + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) + + deallocate(work) + + if (info2 < 0) then + print*, 'WARNING: error in the diagonalization' + print*, 'Illegal value of the ', info2,'-th parameter' + elseif (info2 >0) then + print*, "WARNING: Diagonalization failed to converge" + endif + #+END_SRC + +*** Tau^-1, cos(tau), sin(tau) + $$\tau = \sqrt{-x}$$ + - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ + - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ + - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ + These matrices are diagonals + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Diagonal matrix m_diag + do j = 1, n + if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems + e_val(j) = 0.d0 + else + e_val(j) = - e_val(j) + endif + enddo + + m_diag = 0.d0 + do i = 1, n + m_diag(i,i) = e_val(i) + enddo + + ! cos_tau + do j = 1, n + do i = 1, n + if (i==j) then + cos_tau(i,j) = dcos(dsqrt(e_val(i))) + else + cos_tau(i,j) = 0d0 + endif + enddo + enddo + + ! sin_tau + do j = 1, n + do i = 1, n + if (i==j) then + sin_tau(i,j) = dsin(dsqrt(e_val(i))) + else + sin_tau(i,j) = 0d0 + endif + enddo + enddo + + ! Debug, display the cos_tau and sin_tau matrix + !if (debug) then + ! print*, 'cos_tau' + ! do i = 1, n + ! print*, cos_tau(i,:) + ! enddo + ! print*, 'sin_tau' + ! do i = 1, n + ! print*, sin_tau(i,:) + ! enddo + !endif + + ! tau^-1 + do j = 1, n + do i = 1, n + if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small + tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) + else + tau_m1(i,j) = 0d0 + endif + enddo + enddo + + max_elem = 0d0 + do i = 1, n + if (ABS(tau_m1(i,i)) > ABS(max_elem)) then + max_elem = tau_m1(i,i) + endif + enddo + !print*,'max elem tau^-1:', max_elem + + ! Debug + !print*,'eigenvalues:' + !do i = 1, n + ! print*, e_val(i) + !enddo + + !Debug, display tau^-1 + !if (debug) then + ! print*, 'tau^-1' + ! do i = 1, n + ! print*,tau_m1(i,:) + ! enddo + !endif + #+END_SRC + +*** Rotation matrix + \begin{align*} + \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} + \end{align*} + \begin{align*} + \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \end{align*} + \begin{align*} + \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} + \end{align*} + + First: + part_1 = dgemm(W, dgemm(cos_tau, W^t)) + part_1a = dgemm(cos_tau, W^t) + part_1 = dgemm(W, part_1a) + And: + part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) + part_2a = dgemm(W^t, A) + part_2b = dgemm(sin_tau, part_2a) + part_2c = dgemm(tau_m1, part_2b) + part_2 = dgemm(W, part_2c) + Finally: + Rotation matrix, R = part_1+part_2 + + If $R$ is a rotation matrix: + $R.R^T=R^T.R=\textbf{1}$ + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! part_1 + call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) + call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) + + ! part_2 + call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) + call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) + call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) + call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) + + ! Rotation matrix R + R = part_1 + part_2 + + ! Matrix check + ! R.R^t and R^t.R must be equal to identity matrix + do j = 1, n + do i=1,n + if (i==j) then + RR_t(i,j) = 1d0 + else + RR_t(i,j) = 0d0 + endif + enddo + enddo + + call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) + + norm = dnrm2(n*n,RR_t,1) + !print*, 'Rotation matrix check, norm R.R^T = ', norm + + ! Debug + !if (debug) then + ! print*, 'RR_t' + ! do i = 1, n + ! print*, RR_t(i,:) + ! enddo + !endif + #+END_SRC + +*** Post conditions + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Check if R.R^T=1 + max_elem = 0d0 + do j = 1, n + do i = 1, n + if (ABS(RR_t(i,j)) > ABS(max_elem)) then + max_elem = RR_t(i,j) + endif + enddo + enddo + + print*, 'Max error in R.R^T:', max_elem + !print*, 'e_val(1):', e_val(1) + !print*, 'e_val(n):', e_val(n) + !print*, 'max elem in A:', max_elem_A + + if (ABS(max_elem) > 1d-12) then + print*, 'WARNING: max error in R.R^T > 1d-12' + print*, 'Enforce the step cancellation' + enforce_step_cancellation = .True. + endif + + ! Matrix elements of R must by non-NaN + do j = 1,n + do i = 1,LDR + if (disnan(R(i,j))) then + info = 666 + print*, 'NaN in rotation matrix' + call ABORT + endif + enddo + enddo + + ! Display + !if (debug) then + ! print*,'Rotation matrix :' + ! do i = 1, n + ! write(*,'(100(F10.5))') R(i,:) + ! enddo + !endif + #+END_SRC + +** Deallocation, end + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + deallocate(B) + deallocate(m_diag,cos_tau,sin_tau,tau_m1) + deallocate(W,e_val) + deallocate(part_1,part_1a) + deallocate(part_2,part_2a,part_2b,part_2c) + deallocate(RR_t) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in rotation matrix:', t3 + + print*,'---End rotation_matrix---' + +end subroutine + #+END_SRC + diff --git a/src/utils_trust_region/org/rotation_matrix_iterative.org b/src/utils_trust_region/org/rotation_matrix_iterative.org new file mode 100644 index 00000000..f6cc9909 --- /dev/null +++ b/src/utils_trust_region/org/rotation_matrix_iterative.org @@ -0,0 +1,136 @@ +* Rotation matrix with the iterative method + +\begin{align*} +\textbf{R} = \sum_{k=0}^{\infty} \frac{1}{k!} \textbf{X}^k +\end{align*} + +!!! Doesn't work !!! + +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix_iterative.irp.f +subroutine rotation_matrix_iterative(m,X,R) + + implicit none + + ! in + integer, intent(in) :: m + double precision, intent(in) :: X(m,m) + + ! out + double precision, intent(out) :: R(m,m) + + ! internal + double precision :: max_elem, pre_factor + double precision :: t1,t2,t3 + integer :: k,l,i,j + logical :: not_converged + double precision, allocatable :: RRT(:,:), A(:,:), B(:,:) + + ! Functions + integer :: factorial + + print*,'---rotation_matrix_iterative---' + call wall_time(t1) + + allocate(RRT(m,m),A(m,m),B(m,m)) + + ! k = 0 + R = 0d0 + do i = 1, m + R(i,i) = 1d0 + enddo + + ! k = 1 + R = R + X + + k = 2 + + not_converged = .True. + + do while (not_converged) + + pre_factor = 1d0/DBLE(factorial(k)) + if (pre_factor < 1d-15) then + print*,'pre factor=', pre_factor,'< 1d-15, exit' + exit + endif + + A = X + B = 0d0 + do l = 1, k-1 + call dgemm('N','N',m,m,m,1d0,X,size(X,1),A,size(A,1),0d0,B,size(B,1)) + A = B + enddo + + !print*,'B' + !do i = 1, m + ! print*,B(i,:) * 1d0/DBLE(factorial(k)) + !enddo + + R = R + pre_factor * B + + k = k + 1 + call dgemm('T','N',m,m,m,1d0,R,size(R,1),R,size(R,1),0d0,RRT,size(RRT,1)) + + !print*,'R' + !do i = 1, m + ! write(*,'(10(E12.5))') R(i,:) + !enddo + + do i = 1, m + RRT(i,i) = RRT(i,i) - 1d0 + enddo + + !print*,'RRT' + !do i = 1, m + ! write(*,'(10(E12.5))') RRT(i,:) + !enddo + + max_elem = 0d0 + do j = 1, m + do i = 1, m + if (dabs(RRT(i,j)) > max_elem) then + max_elem = dabs(RRT(i,j)) + endif + enddo + enddo + + print*, 'Iteration:', k + print*, 'Max error in R:', max_elem + + if (max_elem < 1d-12) then + not_converged = .False. + endif + + enddo + + deallocate(RRT,A,B) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rotation matrix iterative:', t3 + print*,'---End roration_matrix_iterative---' + + +print*,'Does not work yet, abort' +call abort + +end +#+END_SRC + +** Factorial +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix_iterative.irp.f +function factorial(n) + + implicit none + + integer, intent(in) :: n + integer :: factorial, k + + factorial = 1 + + do k = 1, n + factorial = factorial * k + enddo + +end +#+END_SRC diff --git a/src/utils_trust_region/org/sub_to_full_rotation_matrix.org b/src/utils_trust_region/org/sub_to_full_rotation_matrix.org new file mode 100644 index 00000000..f0cf0bfc --- /dev/null +++ b/src/utils_trust_region/org/sub_to_full_rotation_matrix.org @@ -0,0 +1,65 @@ +* Rotation matrix in a subspace to rotation matrix in the full space + +Usually, we are using a list of MOs, for exemple the active ones. When +we compute a rotation matrix to rotate the MOs, we just compute a +rotation matrix for these MOs in order to reduce the size of the +matrix which has to be computed. Since the computation of a rotation +matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to +reuce the number of MOs involved. +After that we replace the rotation matrix in the full space by +building the elements of the rotation matrix in the full space from +the elements of the rotation matrix in the subspace and adding some 0 +on the extradiagonal elements and some 1 on the diagonal elements, +for the MOs that are not involved in the rotation. + +Provided: +| mo_num | integer | Number of MOs | + +Input: +| m | integer | Size of tmp_list, m <= mo_num | +| tmp_list(m) | integer | List of MOs | +| tmp_R(m,m) | double precision | Rotation matrix in the space of | +| | | the MOs containing by tmp_list | + +Output: +| R(mo_num,mo_num | double precision | Rotation matrix in the space | +| | | of all the MOs | + +Internal: +| i,j | integer | indexes in the full space | +| tmp_i,tmp_j | integer | indexes in the subspace | + +#+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f +subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) + + !BEGIN_DOC + ! Compute the full rotation matrix from a smaller one + !END_DOC + + implicit none + + ! in + integer, intent(in) :: m, tmp_list(m) + double precision, intent(in) :: tmp_R(m,m) + + ! out + double precision, intent(out) :: R(mo_num,mo_num) + + ! internal + integer :: i,j,tmp_i,tmp_j + + ! tmp_R to R, subspace to full space + R = 0d0 + do i = 1, mo_num + R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital + enddo + do tmp_j = 1, m + j = tmp_list(tmp_j) + do tmp_i = 1, m + i = tmp_list(tmp_i) + R(i,j) = tmp_R(tmp_i,tmp_j) + enddo + enddo + +end +#+END_SRC diff --git a/src/utils_trust_region/org/trust_region_expected_e.org b/src/utils_trust_region/org/trust_region_expected_e.org new file mode 100644 index 00000000..9d2868fa --- /dev/null +++ b/src/utils_trust_region/org/trust_region_expected_e.org @@ -0,0 +1,128 @@ +* Predicted energy : e_model + +*Compute the energy predicted by the Taylor series* + +The energy is predicted using a Taylor expansion truncated at te 2nd +order : + +\begin{align*} +E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) +\end{align*} + +Input: +| n | integer | m*(m-1)/2 | +| n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +| v_grad(n) | double precision | gradient | +| H(n,n) | double precision | hessian | +| x(n) | double precision | Step in the trust region | +| prev_energy | double precision | previous energy | + +Output: +| e_model | double precision | predicted energy after the rotation of the MOs | + +Internal: +| part_1 | double precision | v_grad^T.x | +| part_2 | double precision | 1/2 . x^T.H.x | +| part_2a | double precision | H.x | +| i,j | integer | indexes | + +Function: +| ddot | double precision | dot product (Lapack) | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f +subroutine trust_region_expected_e(n,n2,v_grad,H,x,prev_energy,e_model) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the expected criterion/energy after the application of the step x + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n),H(n,n2),x(n) + double precision, intent(in) :: prev_energy + + ! out + double precision, intent(out) :: e_model + + ! internal + double precision :: part_1, part_2, t1,t2,t3 + double precision, allocatable :: part_2a(:) + + integer :: i,j + + !Function + double precision :: ddot + + print*,'' + print*,'---Trust_e_model---' + + call wall_time(t1) + + ! Allocation + allocate(part_2a(n)) +#+END_SRC + +** Calculations + +part_1 corresponds to the product g.x +part_2a corresponds to the product H.x +part_2 corresponds to the product 0.5*(x^T.H.x) + +TODO: remove the dot products + +#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f + ! Product v_grad.x + part_1 = ddot(n,v_grad,1,x,1) + + !if (debug) then + ! print*,'g.x : ', part_1 + !endif + + ! Product H.x + if (n == n2) then + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + else + ! If the hessian is diagonal + do i = 1, n + part_2a(i) = H(i,1) * x(i) + enddo + endif + + ! Product 1/2 . x^T.H.x + part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) + + !if (debug) then + ! print*,'1/2*x^T.H.x : ', part_2 + !endif + + + ! Sum + e_model = prev_energy + part_1 + part_2 + + ! Writing the predicted energy + print*, 'prev_energy: ', prev_energy + print*, 'Predicted energy after the rotation:', e_model + print*, 'Previous energy - predicted energy: ', prev_energy - e_model + + ! Can be deleted, already in another subroutine + if (DABS(prev_energy - e_model) < 1d-12 ) then + print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' + endif + + ! Deallocation + deallocate(part_2a) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust e model:', t3 + + print*,'---End trust_e_model---' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/org/trust_region_optimal_lambda.org b/src/utils_trust_region/org/trust_region_optimal_lambda.org new file mode 100644 index 00000000..ff454cb6 --- /dev/null +++ b/src/utils_trust_region/org/trust_region_optimal_lambda.org @@ -0,0 +1,1661 @@ +* Newton's method to find the optimal lambda + +*Compute the lambda value for the trust region* + +This subroutine uses the Newton method in order to find the optimal +lambda. This constant is added on the diagonal of the hessian to shift +the eiganvalues. It has a double role: +- ensure that the resulting hessian is positive definite for the + Newton method +- constrain the step in the trust region, i.e., + $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius + of the trust region. +We search $\lambda$ which minimizes +\begin{align*} + f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 +\end{align*} +or +\begin{align*} + \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 +\end{align*} +and gives obviously 0 in both cases. \newline + +There are several cases: +- If $\textbf{H}$ is positive definite the interval containing the + solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). +- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot + \textbf{g} \neq 0$ then the interval containing + the solution is $\lambda \in (-h_1, \infty)$. +- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot + \textbf{g} = 0$ then the interval containing the solution is + $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < + 10^{-12}$ are not computed, so the term where $i = 1$ is + automatically removed and this case becomes similar to the previous one. + +So to avoid numerical problems (cf. trust_region) we start the +algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, +with $\epsilon$ a little constant. +The research must be restricted to the interval containing the +solution. For that reason a little trust region in 1D is used. + +The Newton method to find the optimal $\lambda$ is : +\begin{align*} + \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ +\end{align*} +$f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to +$\lambda$ at the l-th iteration, +$f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to +$\lambda$ at the l-th iteration.\newline + +Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} +f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as +\begin{align*} + y \leq \alpha +\end{align*} +with $\alpha$ a scalar representing the trust length (trust region in +1D) where the function $f$ or $\tilde{f}$ is correctly describe by the +Taylor series truncated at the second order. Thus, if $y > \alpha$, +the constraint is applied as +\begin{align*} + y^* = \alpha \frac{y}{|y|} +\end{align*} +with $y^*$ the solution in the trust region. + +The size of the trust region evolves in function of $\rho$ as for the +trust region seen previously cf. trust_region, rho_model. +The prediction of the value of $f$ or $\tilde{f}$ is done using the +Taylor series truncated at the second order cf. "trust_region", +"trust_e_model". + +The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - +\Delta^2)^2$ with respect to $\lambda$ are: +\begin{align*} + \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 + = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) + \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} +\begin{align*} +\frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{align*} + +The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - +1/\Delta^2)^2$ with respect to $\lambda$ are: +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} + {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + +Provided in qp_edit: +| thresh_rho_2 | +| thresh_cc | +| nb_it_max_lambda | +| version_lambda_search | +| nb_it_max_pre_search | +see qp_edit for more details + +Input: +| n | integer | m*(m-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| tmp_wtg(n) | double precision | w_i^T.v_grad(i) | +| delta | double precision | delta for the trust region | + +Output: +| lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | +| | | lambda > 0 | + +Internal: +| d1_N | double precision | value of d1_norm_trust_region | +| d2_N | double precision | value of d2_norm_trust_region | +| f_N | double precision | value of f_norm_trust_region | +| prev_f_N | double precision | previous value of f_norm_trust_region | +| f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | +| prev_f_R | double precision | previous value of f_R | +| model | double precision | predicted value of f_R from prev_f_R and y | +| d_1 | double precision | value of the first derivative | +| d_2 | double precision | value of the second derivative | +| y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | +| prev_lambda | double precision | previous value of lambda | +| t1,t2,t3 | double precision | wall time | +| i | integer | index | +| epsilon | double precision | little constant to avoid numerical problem | +| rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | +| version | integer | version of the root finding method | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +| d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +| f_norm_trust_region | double precision | value of norm(x)^2 | + + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Research the optimal lambda to constrain the step size in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(inout) :: e_val(n) + double precision, intent(in) :: delta + double precision, intent(in) :: tmp_wtg(n) + + ! out + double precision, intent(out) :: lambda + + ! Internal + double precision :: d1_N, d2_N, f_N, prev_f_N + double precision :: prev_f_R, f_R + double precision :: model + double precision :: d_1, d_2 + double precision :: t1,t2,t3 + integer :: i + double precision :: epsilon + double precision :: y + double precision :: prev_lambda + double precision :: rho_2 + double precision :: alpha + integer :: version + + ! Functions + double precision :: d1_norm_trust_region,d1_norm_trust_region_omp + double precision :: d2_norm_trust_region, d2_norm_trust_region_omp + double precision :: f_norm_trust_region, f_norm_trust_region_omp + double precision :: d1_norm_inverse_trust_region + double precision :: d2_norm_inverse_trust_region + double precision :: d1_norm_inverse_trust_region_omp + double precision :: d2_norm_inverse_trust_region_omp + + print*,'' + print*,'---Trust_newton---' + + call wall_time(t1) + + ! version_lambda_search + ! 1 -> ||x||^2 - delta^2 = 0, + ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) + !if (version_lambda_search == 1) then + ! print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + !else + ! print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + !endif + ! Version 2 is normally better +#+END_SRC + +Resolution with the Newton method: + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f + ! Initialization + epsilon = 1d-4 + lambda = max(0d0, -e_val(1)) + + ! Pre research of lambda to start near the optimal lambda + ! by adding a constant epsilon and changing the constant to + ! have ||x(lambda + epsilon)|| ~ delta, before setting + ! lambda = lambda + epsilon + !print*, 'Pre research of lambda:' + !print*,'Initial lambda =', lambda + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + !print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + i = 1 + + ! To increase lambda + if (f_N > delta**2) then + !print*,'Increasing lambda...' + do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 2d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N < f_N) then + print*,'WARNING, error: prev_f_N < f_N, exit' + epsilon = epsilon * 0.5d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + + ! To reduce lambda + else + !print*,'Reducing lambda...' + do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 0.5d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N > f_N) then + print*,'WARNING, error: prev_f_N > f_N, exit' + epsilon = epsilon * 2d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + endif + + !print*,'End of the pre research of lambda' + + ! New value of lambda + lambda = lambda + epsilon + + !print*, 'e_val(1):', e_val(1) + !print*, 'Staring point, lambda =', lambda + + ! thresh_cc, threshold for the research of the optimal lambda + ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc + ! thresh_rho_2, threshold to cancel the step in the research + ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 + + !print*,'Threshold for the CC:', thresh_cc + !print*,'Threshold for rho_2:', thresh_rho_2 + !print*, 'w_1^T . g =', tmp_wtg(1) + + ! Debug + !print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' + + ! Initialization + i = 1 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 + model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 + prev_f_N = 0d0 ! previous value of ||x||^2 + prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 + f_R = 0d0 ! value of (||x||^2 - delta^2)^2 + rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) + y = 0d0 ! step size + prev_lambda = 0d0 ! previous lambda + + ! Derivatives + if (version_lambda_search == 1) then + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + + ! Trust length + alpha = DABS((1d0/d_2)*d_1) + + ! Newton's method + do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) + !print*,'--------------------------------------' + !print*,'Research of lambda, iteration:', i + !print*,'--------------------------------------' + + ! Update of f_N, f_R and the derivatives + prev_f_N = f_N + if (version_lambda_search == 1) then + prev_f_R = (prev_f_N - delta**2)**2 + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + !write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + + ! Newton's step + y = -(1d0/DABS(d_2))*d_1 + + ! Constraint on y (the newton step) + if (DABS(y) > alpha) then + y = alpha * (y/DABS(y)) ! preservation of the sign of y + endif + !write(*,'(a,E12.5)') ' Step length: ', y + + ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series + model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 + + ! Updates lambda + prev_lambda = lambda + lambda = prev_lambda + y + !print*,'prev lambda:', prev_lambda + !print*,'new lambda:', lambda + + ! Checks if lambda is in (-h_1, \infty) + if (lambda > MAX(0d0, -e_val(1))) then + ! New value of ||x(lambda)||^2 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + ! New f_R + if (version_lambda_search == 1) then + f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 + else + f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 + endif + + !if (version_lambda_search == 1) then + ! print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + ! print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + ! print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + !else + ! print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + ! print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + ! print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + !endif + + !print*,'previous - actual:', prev_f_R - f_R + !print*,'previous - model:', prev_f_R - model + + ! Check the gain + if (DABS(prev_f_R - model) < thresh_model_2) then + print*,'' + print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' + print*,'' + endif + + ! Will be deleted + !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then + ! print*,'' + ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' + ! print*,'' + ! exit + !endif + + ! Computes rho_2 + rho_2 = (prev_f_R - f_R)/(prev_f_R - model) + !print*,'rho_2:', rho_2 + else + rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) + !print*,'lambda < -e_val(1) ===> rho_2 = 0' + endif + + ! Evolution of the trust length, alpha + if (rho_2 >= 0.75d0) then + alpha = 2d0 * alpha + elseif (rho_2 >= 0.5d0) then + alpha = alpha + elseif (rho_2 >= 0.25d0) then + alpha = 0.5d0 * alpha + else + alpha = 0.25d0 * alpha + endif + !write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + + ! cancellaion of the step if rho < 0.1 + if (rho_2 < thresh_rho_2) then !0.1d0) then + lambda = prev_lambda + f_N = prev_f_N + !print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + endif + + !print*,'' + !print*,'lambda, ||x||, delta:' + !print*, lambda, dsqrt(f_N), delta + !print*,'CC:', DABS(1d0 - f_N/delta**2) + !print*,'' + + i = i + 1 + enddo + + ! if trust newton failed + if (i > nb_it_max_lambda) then + print*,'' + print*,'######################################################' + print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' + print*,'The research of the optimal lambda has failed' + print*,'######################################################' + print*,'' + endif + + print*,'Number of iterations:', i + print*,'Value of lambda:', lambda + !print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Convergence criterion:', 1d0-f_N/delta**2 + !print*,'Error on the trust region (||x||^2 - delta^2)^2):', (f_N - delta**2)**2 + !print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + + ! Time + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust_newton:', t3 + + print*,'---End trust_newton---' + +end subroutine +#+END_SRC + +* OMP: First derivative of (||x||^2 - Delta^2)^2 + +*Function to compute the first derivative of (||x||^2 - Delta^2)^2* + +This function computes the first derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. + +\begin{align*} +\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) +\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} + +\begin{align*} + \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ + \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| mo_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg,accu1,accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) + + deallocate(tmp_accu1, tmp_accu2) + +end function +#+END_SRC + +* OMP: Second derivative of (||x||^2 - Delta^2)^2 + +*Function to compute the second derivative of (||x||^2 - Delta^2)^2* + +This function computes the second derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. +\begin{align*} +\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{align*} + +\begin{align*} + \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ + \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ + \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| accu3 | double precision | third sum of the formula | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_accu2 | double precision | temporary array for the third sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region_omp + double precision :: ddot + + ! Internal + double precision :: accu1,accu2,accu3 + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + integer :: i, j + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + + ! accu1 + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + ! accu2 + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + ! accu3 + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) + + deallocate(tmp_accu1, tmp_accu2, tmp_accu3) + +end function +#+END_SRC + +* OMP: Function value of ||x||^2 + +*Compute the value of ||x||^2* + +This function computes the value of ||x(lambda)||^2 + +\begin{align*} +||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | + +Internal: +| tmp_wtg(n) | double precision | temporary array for W^T.v_grad | +| tmp_fN | double precision | temporary array for the function | +| i,j | integer | indexes | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + use omp_lib + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! functions + double precision :: f_norm_trust_region_omp + + ! internal + double precision, allocatable :: tmp_fN(:) + integer :: i,j + + ! Allocation + allocate(tmp_fN(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + f_norm_trust_region_omp = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_fN(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + deallocate(tmp_fN) + +end function +#+END_SRC + +* First derivative of (||x||^2 - Delta^2)^2 +Version without omp + +*Function to compute the first derivative of ||x||^2 - Delta* + +This function computes the first derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. + +\begin{align*} +\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| ddot | double precision | blas dot product | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i, j + + ! Functions + double precision :: d1_norm_trust_region + double precision :: ddot + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) + +end function +#+END_SRC + +* Second derivative of (||x||^2 - Delta^2)^2 +Version without OMP + +*Function to compute the second derivative of ||x||^2 - Delta* + + +\begin{equation} +\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{equation} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| accu3 | double precision | third sum of the formula | +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | +| ddot | double precision | blas dot product | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region + double precision :: ddot + + ! Internal + double precision :: wtg,accu1,accu2,accu3 + integer :: i, j + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 + endif + enddo + + d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) + +end function +#+END_SRC + +* Function value of ||x||^2 +Version without OMP + +*Compute the value of ||x||^2* + +This function computes the value of ||x(lambda)||^2 + +\begin{align*} +||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| f_norm_trust_region | double precision | value of norm(x)^2 | +| ddot | double precision | blas dot product | + + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function f_norm_trust_region(n,e_val,tmp_wtg,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! function + double precision :: f_norm_trust_region + double precision :: ddot + + ! internal + integer :: i,j + + ! Initialization + f_norm_trust_region = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + +end function +#+END_SRC + +* OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 +Version with OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + + deallocate(tmp_accu1, tmp_accu2) + +end +#+END_SRC + +* OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 +Version with OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2, accu3 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + + ! Functions + double precision :: d2_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + + deallocate(tmp_accu1,tmp_accu2,tmp_accu3) + +end +#+END_SRC + +* First derivative of (1/||x||^2 - 1/Delta^2)^2 +Version without OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i,j + + ! Functions + double precision :: d1_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + +end +#+END_SRC + +* Second derivative of (1/||x||^2 - 1/Delta^2)^2 +Version without OMP + +*Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2, accu3 + integer :: i,j + + ! Functions + double precision :: d2_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 + endif + enddo + + d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + +end +#+END_SRC diff --git a/src/utils_trust_region/org/trust_region_rho.org b/src/utils_trust_region/org/trust_region_rho.org new file mode 100644 index 00000000..b669da8c --- /dev/null +++ b/src/utils_trust_region/org/trust_region_rho.org @@ -0,0 +1,122 @@ +* Agreement with the model: Rho + +*Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* + +Rho represents the agreement between the model (the predicted energy +by the Taylor expansion truncated at the 2nd order) and the real +energy : + +\begin{equation} +\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +\end{equation} +With : +$E^{k}$ the energy at the previous iteration +$E^{k+1}$ the energy at the actual iteration +$m^{k+1}$ the predicted energy for the actual iteration +(cf. trust_e_model) + +If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. +If $\rho \leq 0$ the previous energy is lower than the actual +energy. We have to cancel the last step and use a smaller trust +region. +Here we cancel the last step if $\rho < 0.1$, because even if +the energy decreases, the agreement is bad, i.e., the Taylor expansion +truncated at the second order doesn't represent correctly the energy +landscape. So it's better to cancel the step and restart with a +smaller trust region. + +Provided in qp_edit: +| thresh_rho | + +Input: +| prev_energy | double precision | previous energy (energy before the rotation) | +| e_model | double precision | predicted energy after the rotation | + +Output: +| rho | double precision | the agreement between the model (predicted) and the real energy | +| prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | +| | | else the previous energy doesn't change | + +Internal: +| energy | double precision | energy (real) after the rotation | +| i | integer | index | +| t* | double precision | time | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f +subroutine trust_region_rho(prev_energy, energy,e_model,rho) + + include 'pi.h' + + !BEGIN_DOC + ! Compute rho, the agreement between the predicted criterion/energy and the real one + !END_DOC + + implicit none + + ! Variables + + ! In + double precision, intent(inout) :: prev_energy + double precision, intent(in) :: e_model, energy + + ! Out + double precision, intent(out) :: rho + + ! Internal + double precision :: t1, t2, t3 + integer :: i + + print*,'' + print*,'---Rho_model---' + + !call wall_time(t1) +#+END_SRC + +** Rho +\begin{equation} +\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +\end{equation} + +In function of $\rho$ th step can be accepted or cancelled. + +If we cancel the last step (k+1), the previous energy (k) doesn't +change! +If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) + +#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f + ! Already done in an other subroutine + !if (ABS(prev_energy - e_model) < 1d-12) then + ! print*,'WARNING: prev_energy - e_model < 1d-12' + ! print*,'=> rho will tend toward infinity' + ! print*,'Check you convergence criterion !' + !endif + + rho = (prev_energy - energy) / (prev_energy - e_model) + + !print*, 'previous energy, prev_energy:', prev_energy + !print*, 'predicted energy, e_model:', e_model + !print*, 'real energy, energy:', energy + !print*, 'prev_energy - energy:', prev_energy - energy + !print*, 'prev_energy - e_model:', prev_energy - e_model + print*, 'Rho:', rho + !print*, 'Threshold for rho:', thresh_rho + + ! Modification of prev_energy in function of rho + if (rho < thresh_rho) then !0.1) then + ! the step is cancelled + print*, 'Rho <', thresh_rho,', the previous energy does not changed' + !print*, 'prev_energy :', prev_energy + else + ! the step is accepted + prev_energy = energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy:', energy + endif + + !call wall_time(t2) + !t3 = t2 - t1 + !print*,'Time in rho model:', t3 + + print*,'---End rho_model---' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/org/trust_region_step.org b/src/utils_trust_region/org/trust_region_step.org new file mode 100644 index 00000000..0e5f090f --- /dev/null +++ b/src/utils_trust_region/org/trust_region_step.org @@ -0,0 +1,759 @@ +* Trust region + +*Compute the next step with the trust region algorithm* + +The Newton method is an iterative method to find a minimum of a given +function. It uses a Taylor series truncated at the second order of the +targeted function and gives its minimizer. The minimizer is taken as +the new position and the same thing is done. And by doing so +iteratively the method find a minimum, a local or global one depending +of the starting point and the convexity/nonconvexity of the targeted +function. + +The goal of the trust region is to constrain the step size of the +Newton method in a certain area around the actual position, where the +Taylor series is a good approximation of the targeted function. This +area is called the "trust region". + +In addition, in function of the agreement between the Taylor +development of the energy and the real energy, the size of the trust +region will be updated at each iteration. By doing so, the step sizes +are not too larges. In addition, since we add a criterion to cancel the +step if the energy increases (more precisely if rho < 0.1), so it's +impossible to diverge. \newline + +References: \newline +Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline +https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline +ISBN: 978-0-387-40065-5 \newline + +By using the first and the second derivatives, the Newton method gives +a step: +\begin{align*} + \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot + \textbf{g}_{(k)} +\end{align*} +which leads to the minimizer of the Taylor series. +!!! Warning: the Newton method gives the minimizer if and only if +$\textbf{H}$ is positive definite, else it leads to a saddle point !!! +But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm: +\begin{align*} + ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)} +\end{align*} +which is equivalent to +\begin{align*} + \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2 +\end{align*} + +with: \newline +$\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of +size n) \newline +$\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n +matrix) \newline +$\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of +size n) \newline +$\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration +\newline + +Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a +hypersphere of radius $\Delta_{(k+1)}$.\newline + +So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and +$\textbf{H}$ is positive definite, the +solution is the step given by the Newton method +$\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$. +Else we have to constrain the step size. For simplicity we will remove +the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have +to put a constraint on $\textbf{x}$ with a Lagrange multiplier. +Starting from the Taylor series of a function E (here, the energy) +truncated at the 2nd order, we have: +\begin{align*} + E(\textbf{x}) = E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2} + \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + + \mathcal{O}(\textbf{x}^2) +\end{align*} + +With the constraint on the norm of $\textbf{x}$ we can write the +Lagrangian +\begin{align*} + \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x} + + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2) +\end{align*} +Where: \newline +$\lambda$ is the Lagrange multiplier \newline +$E$ is the energy at the k-th iteration $\Leftrightarrow +E(\textbf{x} = \textbf{0})$ \newline + +To solve this equation, we search a stationary point where the first +derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e. +\begin{align*} + \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0 +\end{align*} + +The derivative is: +\begin{align*} + \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} + = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +\end{align*} + +So, we search $\textbf{x}$ such as: +\begin{align*} +\frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} += \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0 +\end{align*} + +We can rewrite that as: +\begin{align*} + \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} + = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0 +\end{align*} +with $\textbf{I}$ is the identity matrix. + +By doing so, the solution is: +\begin{align*} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g} +\end{align*} +\begin{align*} + \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +\end{align*} +with $\textbf{x}^T \textbf{x} = \Delta^2$. + +We have to solve this previous equation to find this $\textbf{x}$ in the +trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is +just a one dimension problem because we can express $\textbf{x}$ as a +function of $\lambda$: +\begin{align*} + \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +\end{align*} + +We start from the fact that the hessian is diagonalizable. So we have: +\begin{align*} + \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T +\end{align*} +with: \newline +$\textbf{H}$, the hessian matrix \newline +$\textbf{W}$, the matrix containing the eigenvectors \newline +$\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline +$\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline +$h_i$, the i-th eigenvalue in ascending order \newline + +Now we use the fact that adding a constant on the diagonal just shifts +the eigenvalues: +\begin{align*} + \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} + +\textbf{I} \lambda) \cdot \textbf{W}^T +\end{align*} + +By doing so we can express $\textbf{x}$ as a function of $\lambda$ +\begin{align*} + \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +\end{align*} +with $\lambda \neq - h_i$. + +An interesting thing in our case is the norm of $\textbf{x}$, +because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of +the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have: +\begin{align*} + ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot + \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. +And if we study the properties of this function we see that: +\begin{align*} + \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0 +\end{align*} +and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: +\begin{align*} + \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty +\end{align*} + +From these limits and knowing that $h_1$ is the lowest eigenvalue, we +can conclude that $||\textbf{x}(\lambda)||$ is a continuous and +strictly decreasing function on the interval $\lambda \in +(-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which +gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one +solution. + +Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot +\textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly, +$\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the +Newton method is only defined for a positive definite hessian matrix, +so $(\textbf{H} + \textbf{I} \lambda)$ must be positive +definite. Consequently, in the case where $\textbf{H}$ is not positive +definite, to ensure the positive definiteness, $\lambda$ must be +greater than $- h_1$. +\begin{align*} + \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 +\end{align*} + +From that there are five cases: +- if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ +- if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot + \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} + \lambda)$ + must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ +- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot + \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing + $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be + positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) +- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot + \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing + $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be + positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is + similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = + 0)|| \leq \Delta$ + but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ + time a constant to ensure the condition $||\textbf{x}(\lambda = + -h_1)|| = \Delta$ and escape from the saddle point + +Thus to find the solution, we can write: +\begin{align*} + ||\textbf{x}(\lambda)|| = \Delta +\end{align*} +\begin{align*} + ||\textbf{x}(\lambda)|| - \Delta = 0 +\end{align*} + +Taking the square of this equation +\begin{align*} + (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 +\end{align*} +we have a function with one minimum for the optimal $\lambda$. +Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve +\begin{align*} + (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +\end{align*} + +But in practice, it is more effective to solve: +\begin{align*} + (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +\end{align*} + +To do that, we just use the Newton method with "trust_newton" using +first and second derivative of $(||\textbf{x}(\lambda)||^2 - +\Delta^2)^2$ with respect to $\textbf{x}$. +This will give the optimal $\lambda$ to compute the +solution $\textbf{x}$ with the formula seen previously: +\begin{align*} + \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +\end{align*} + +The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our +step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Evolution of the trust region + +We initialize the trust region at the first iteration using a radius +\begin{align*} + \Delta = ||\textbf{x}(\lambda=0)|| +\end{align*} + +And for the next iteration the trust region will evolves depending of +the agreement of the energy prediction based on the Taylor series +truncated at the 2nd order and the real energy. If the Taylor series +truncated at the 2nd order represents correctly the energy landscape +the trust region will be extent else it will be reduced. In order to +mesure this agreement we use the ratio rho cf. "rho_model" and +"trust_e_model". From that we use the following values: +- if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, +- if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, +- if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, +- if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. + +In addition, if $\rho < 0.1$ the iteration is cancelled, so it +restarts with a smaller trust region until the energy decreases. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Summary + +To summarize, knowing the hessian (eigenvectors and eigenvalues), the +gradient and the radius of the trust region we can compute the norm of +the Newton step +\begin{align*} + ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n + \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 +\end{align*} + +- if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and + $\textbf{x}(\lambda=0)$ is in the trust region and it is not + necessary to put a constraint on $\textbf{x}$, the solution is the + unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. +- else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and + $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in + the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = + -h_1)$, similarly to the previous case. + But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ + time a constant to ensure the condition $||\textbf{x}(\lambda = + -h_1)|| = \Delta$ and escape from the saddle point +- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we + have to search $\lambda \in (-h_1, \infty)$ such as + $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method + \begin{align*} + (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 + \end{align*} + or + \begin{align*} + (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 + \end{align*} + which is numerically more stable. And finally compute + \begin{align*} + \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i + \end{align*} +- else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we + do exactly the same thing that the previous case but we search + $\lambda \in (0, \infty)$ +- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and + $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the + sum), again we do exactly the same thing that the previous case + searching $\lambda \in (-h_1, \infty)$. + + +For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not +necessary in fact to remove the $j = 1$ in the sum since the term +where $h_i - \lambda < 10^{-6}$ are not computed. + +After that, we take this vector $\textbf{x}^*$, called "x", and we do +the transformation to an antisymmetric matrix $\textbf{X}$, called +m_x. This matrix $\textbf{X}$ will be used to compute a rotation +matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". + +NB: +An improvement can be done using a elleptical trust region. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Code + +Provided: +| mo_num | integer | number of MOs | + +Cf. qp_edit in orbital optimization section, for some constants/thresholds + +Input: +| m | integer | number of MOs | +| n | integer | m*(m-1)/2 | +| n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +| H(n,n2) | double precision | hessian | +| v_grad(n) | double precision | gradient | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n, n) | double precision | eigenvectors of the hessian | +| rho | double precision | agreement between the model and the reality, | +| | | represents the quality of the energy prediction | +| nb_iter | integer | number of iteration | + +Input/Ouput: +| delta | double precision | radius of the trust region | + +Output: +| x(n) | double precision | vector containing the step | + +Internal: +| accu | double precision | temporary variable to compute the step | +| lambda | double precision | lagrange multiplier | +| trust_radius2 | double precision | square of the radius of the trust region | +| norm2_x | double precision | norm^2 of the vector x | +| norm2_g | double precision | norm^2 of the vector containing the gradient | +| tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | +| i, j, k | integer | indexes | + +Function: +| dnrm2 | double precision | Blas function computing the norm | +| f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +subroutine trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,w,x,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compuet the step in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n), rho + integer, intent(inout) :: nb_iter + double precision, intent(in) :: e_val(n), w(n,n2) + + ! inout + double precision, intent(inout) :: delta + + ! out + double precision, intent(out) :: x(n) + + ! Internal + double precision :: accu, lambda, trust_radius2 + double precision :: norm2_x, norm2_g + double precision, allocatable :: tmp_wtg(:) + integer :: i,j,k + double precision :: t1,t2,t3 + integer :: n_neg_eval + + + ! Functions + double precision :: ddot, dnrm2 + double precision :: f_norm_trust_region_omp + + print*,'' + print*,'==================' + print*,'---Trust_region---' + print*,'==================' + + call wall_time(t1) + + ! Allocation + allocate(tmp_wtg(n)) +#+END_SRC + + +*** Initialization and norm + +The norm of the step size will be useful for the trust region +algorithm. We start from a first guess and the radius of the trust +region will evolve during the optimization. + +avoid_saddle is actually a test to avoid saddle points + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Initialization of the Lagrange multiplier + lambda = 0d0 + + ! List of w^T.g, to avoid the recomputation + tmp_wtg = 0d0 + if (n == n2) then + do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo + enddo + else + ! For the diagonal case + do j = 1, n + k = int(w(j,1)+1d-15) + tmp_wtg(j) = v_grad(k) + enddo + endif + + ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue + ! in the case of avoid_saddle + if (avoid_saddle .and. e_val(1) < - thresh_eig) then + i = 2 + ! Number of negative eigenvalues + do while (e_val(i) < - thresh_eig) + if (tmp_wtg(i) < thresh_wtg2) then + if (version_avoid_saddle == 1) then + tmp_wtg(i) = 1d0 + elseif (version_avoid_saddle == 2) then + tmp_wtg(i) = DABS(e_val(i)) + elseif (version_avoid_saddle == 3) then + tmp_wtg(i) = dsqrt(DABS(e_val(i))) + else + tmp_wtg(i) = thresh_wtg2 + endif + endif + i = i + 1 + enddo + + ! For the fist one it's a little bit different + if (tmp_wtg(1) < thresh_wtg2) then + tmp_wtg(1) = 0d0 + endif + + endif + + ! Norm^2 of x, ||x||^2 + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta + ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm + ! Anyway if the step is too big it will be reduced + !print*,'||x||^2 :', norm2_x + + ! Norm^2 of the gradient, ||v_grad||^2 + norm2_g = (dnrm2(n,v_grad,1))**2 + !print*,'||grad||^2 :', norm2_g +#+END_SRC + +*** Trust radius initialization + + At the first iteration (nb_iter = 0) we initialize the trust region + with the norm of the step generate by the Newton's method ($\textbf{x}_1 = + (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, + we compute this norm using f_norm_trust_region_omp as explain just + below) + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! trust radius + if (nb_iter == 0) then + trust_radius2 = norm2_x + ! To avoid infinite loop of cancellation of this first step + ! without changing delta + nb_iter = 1 + + ! Compute delta, delta = sqrt(trust_radius) + delta = dsqrt(trust_radius2) + endif +#+END_SRC + +*** Modification of the trust radius + +In function of rho (which represents the agreement between the model +and the reality, cf. rho_model) the trust region evolves. We update +delta (the radius of the trust region). + +To avoid too big trust region we put a maximum size. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Modification of the trust radius in function of rho + if (rho >= 0.75d0) then + delta = 2d0 * delta + elseif (rho >= 0.5d0) then + delta = delta + elseif (rho >= 0.25d0) then + delta = 0.5d0 * delta + else + delta = 0.25d0 * delta + endif + + ! Maximum size of the trust region + !if (delta > 0.5d0 * n * pi) then + ! delta = 0.5d0 * n * pi + ! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' + !endif + + if (delta > 1d10) then + delta = 1d10 + endif + + !print*, 'Delta :', delta +#+END_SRC + +*** Calculation of the optimal lambda + +We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ +- If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant + $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ +- If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the + unconstrained one, $\lambda = 0$ + +You will find more details at the beginning + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! By giving delta, we search (||x||^2 - delta^2)^2 = 0 + ! and not (||x||^2 - delta)^2 = 0 + + ! Research of lambda to solve ||x(lambda)|| = Delta + + ! Display + !print*, 'e_val(1) = ', e_val(1) + !print*, 'w_1^T.g =', tmp_wtg(1) + + ! H positive definite + if (e_val(1) > - thresh_eig) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + !print*, '||x(0)||=', dsqrt(norm2_x) + !print*, 'Delta=', delta + + ! H positive definite, ||x(lambda = 0)|| <= Delta + if (dsqrt(norm2_x) <= delta) then + !print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + !print*, 'lambda = 0, no lambda optimization' + lambda = 0d0 + + ! H positive definite, ||x(lambda = 0)|| > Delta + else + ! Constraint solution + !print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + !print*,'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + + ! H indefinite + else + if (DABS(tmp_wtg(1)) < thresh_wtg) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) + !print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + endif + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta + if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then + ! Add e_val(1) in order to have (H - e_val(1) I) positive definite + !print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + !print*, 'lambda = -e_val(1), no lambda optimization' + lambda = - e_val(1) + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta + ! and + ! H indefinite, w_1^T.g =/= 0 + else + ! Constraint solution/ add lambda + !if (DABS(tmp_wtg(1)) < thresh_wtg) then + ! print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + !else + ! print*, 'H indefinite, w_1^T.g =/= 0' + !endif + !print*, 'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + + endif + + ! Recomputation of the norm^2 of the step x + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + print*,'' + print*,'Summary after the trust region:' + print*,'lambda:', lambda + print*,'||x||:', dsqrt(norm2_x) + print*,'delta:', delta +#+END_SRC + +*** Calculation of the step x + +x refers to $\textbf{x}^*$ +We compute x in function of lambda using its formula : +\begin{align*} +\textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i ++ \lambda} \cdot \textbf{w}_i +\end{align*} + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Initialisation + x = 0d0 + + ! Calculation of the step x + + if (n == n2) then + ! Normal version + if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo + + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + + endif + else + ! If the hessian is diagonal + ! Normal version + if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (e_val(i) + lambda) + endif + enddo + + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (DABS(e_val(i)) + lambda) + endif + enddo + + endif + endif + + double precision :: beta, norm_x + + ! Test + ! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) + ! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first + ! eigenvectors multiply by a constant to ensure the condition + ! ||x(lambda=-e_val(1))|| = delta and escape the saddle point + if (avoid_saddle .and. e_val(1) < - thresh_eig) then + if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then + + ! norm of x + norm_x = dnrm2(n,x,1) + + ! Computes the coefficient for the w_1 + beta = delta**2 - norm_x**2 + + ! Updates the step x + x = x + W(:,1) * dsqrt(beta) + + ! Recomputes the norm to check + norm_x = dnrm2(n,x,1) + + print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' + print*, '||x||', norm_x + endif + endif +#+END_SRC + +*** Transformation of x + +x is a vector of size n, so it can be write as a m by m +antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". + + #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +! ! Step transformation vector -> matrix +! ! Vector with n element -> mo_num by mo_num matrix +! do j = 1, m +! do i = 1, m +! if (i>j) then +! call mat_to_vec_index(i,j,k) +! m_x(i,j) = x(k) +! else +! m_x(i,j) = 0d0 +! endif +! enddo +! enddo +! +! ! Antisymmetrization of the previous matrix +! do j = 1, m +! do i = 1, m +! if (i lower diagonal matrix (p,q), p > q + +If a matrix is antisymmetric it can be reshaped as a vector. And the +vector can be reshaped as an antisymmetric matrix + +\begin{align*} +\begin{pmatrix} +0 & -1 & -2 & -4 \\ +1 & 0 & -3 & -5 \\ +2 & 3 & 0 & -6 \\ +4 & 5 & 6 & 0 +\end{pmatrix} +\Leftrightarrow +\begin{pmatrix} +1 & 2 & 3 & 4 & 5 & 6 +\end{pmatrix} +\end{align*} + +!!! Here the algorithm only work for the lower diagonal !!! + +Input: +| i | integer | index in the vector | + +Ouput: +| p,q | integer | corresponding indexes in the lower diagonal of a matrix | +| | | p > q, | +| | | p -> row, | +| | | q -> column | + +#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_index.irp.f +subroutine vec_to_mat_index(i,p,q) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing + ! its index i a vector + !END_DOC + + implicit none + + ! Variables + + ! in + integer,intent(in) :: i + + ! out + integer, intent(out) :: p,q + + ! internal + integer :: a,b + double precision :: da + + da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) + a = INT(da) + if ((a*(a-1))/2==i) then + p = a-1 + else + p = a + endif + b = p*(p-1)/2 + + ! Matrix element indexes + p = p + 1 + q = i - b + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/org/vec_to_mat_v2.org b/src/utils_trust_region/org/vec_to_mat_v2.org new file mode 100644 index 00000000..4ce5f5e1 --- /dev/null +++ b/src/utils_trust_region/org/vec_to_mat_v2.org @@ -0,0 +1,40 @@ +* Vect to antisymmetric matrix using mat_to_vec_index + +Vector to antisymmetric matrix transformation using mat_to_vec_index +subroutine. + +Can be done in OMP (for the first part and with omp critical for the second) + +#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f +subroutine vec_to_mat_v2(n,m,v_x,m_x) + + !BEGIN_DOC + ! Vector to antisymmetric matrix + !END_DOC + + implicit none + + integer, intent(in) :: n,m + double precision, intent(in) :: v_x(n) + double precision, intent(out) :: m_x(m,m) + + integer :: i,j,k + + ! 1D -> 2D lower diagonal + m_x = 0d0 + do j = 1, m - 1 + do i = j + 1, m + call mat_to_vec_index(i,j,k) + m_x(i,j) = v_x(k) + enddo + enddo + + ! Antisym + do i = 1, m - 1 + do j = i + 1, m + m_x(i,j) = - m_x(j,i) + enddo + enddo + +end +#+END_SRC diff --git a/src/utils_trust_region/pi.h b/src/utils_trust_region/pi.h new file mode 100644 index 00000000..2c36a9f0 --- /dev/null +++ b/src/utils_trust_region/pi.h @@ -0,0 +1,2 @@ + logical, parameter :: debug=.False. + double precision, parameter :: pi = 3.1415926535897932d0 diff --git a/src/utils_trust_region/rotation_matrix.irp.f b/src/utils_trust_region/rotation_matrix.irp.f new file mode 100644 index 00000000..827af8c0 --- /dev/null +++ b/src/utils_trust_region/rotation_matrix.irp.f @@ -0,0 +1,441 @@ +! Rotation matrix + +! *Build a rotation matrix from an antisymmetric matrix* + +! Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : +! $$ +! \textbf{R}=\exp(\textbf{A}) +! $$ + +! So : +! \begin{align*} +! \textbf{R}=& \exp(\textbf{A}) \\ +! =& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ +! =& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} +! \end{align*} + +! With : +! $\textbf{W}$ : eigenvectors of $\textbf{A}^2$ +! $\tau$ : $\sqrt{-x}$ +! $x$ : eigenvalues of $\textbf{A}^2$ + +! Input: +! | A(n,n) | double precision | antisymmetric matrix | +! | n | integer | number of columns of the A matrix | +! | LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | +! | LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | + +! Output: +! | R(n,n) | double precision | Rotation matrix | +! | info | integer | if info = 0, the execution is successful | +! | | | if info = k, the k-th parameter has an illegal value | +! | | | if info = -k, the algorithm failed | + +! Internal: +! | B(n,n) | double precision | B = A.A | +! | work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | +! | lwork | integer | dimension of the syev work array >= max(1, 3n-1) | +! | W(n,n) | double precision | eigenvectors of B | +! | e_val(n) | double precision | eigenvalues of B | +! | m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | +! | cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | +! | sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | +! | tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | +! | part_1(n,n) | double precision | matrix W.cos_tau.W^t | +! | part_1a(n,n) | double precision | matrix cos_tau.W^t | +! | part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | +! | part_2a(n,n) | double precision | matrix W^t.A | +! | part_2b(n,n) | double precision | matrix sin_tau.W^t.A | +! | part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | +! | RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | +! | norm | integer | norm of R.R^t-1, must be equal to 0 | +! | i,j | integer | indexes | + +! Functions: +! | dnrm2 | double precision | Lapack function, compute the norm of a matrix | +! | disnan | logical | Lapack function, check if an element is NaN | + + + +subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) + + implicit none + + !BEGIN_DOC + ! Rotation matrix to rotate the molecular orbitals. + ! If the rotation is too large the transformation is not unitary and must be cancelled. + !END_DOC + + include 'pi.h' + + ! Variables + + ! in + integer, intent(in) :: n,LDA,LDR + double precision, intent(inout) :: A(LDA,n) + + ! out + double precision, intent(out) :: R(LDR,n) + integer, intent(out) :: info + logical, intent(out) :: enforce_step_cancellation + + ! internal + double precision, allocatable :: B(:,:) + double precision, allocatable :: work(:,:) + double precision, allocatable :: W(:,:), e_val(:) + double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) + double precision, allocatable :: part_1(:,:),part_1a(:,:) + double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) + double precision, allocatable :: RR_t(:,:) + integer :: i,j + integer :: info2, lwork ! for dsyev + double precision :: norm, max_elem, max_elem_A, t1,t2,t3 + + ! function + double precision :: dnrm2 + logical :: disnan + + print*,'' + print*,'---rotation_matrix---' + + call wall_time(t1) + + ! Allocation + allocate(B(n,n)) + allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) + allocate(W(n,n),e_val(n)) + allocate(part_1(n,n),part_1a(n,n)) + allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) + allocate(RR_t(n,n)) + +! Pre-conditions + +! Initialization +info=0 +enforce_step_cancellation = .False. + +! Size of matrix A must be at least 1 by 1 +if (n<1) then + info = 3 + print*, 'WARNING: invalid parameter 5' + print*, 'n<1' + return +endif + +! Leading dimension of A must be >= n +if (LDA < n) then + info = 25 + print*, 'WARNING: invalid parameter 2 or 5' + print*, 'LDA < n' + return +endif + +! Leading dimension of A must be >= n +if (LDR < n) then + info = 4 + print*, 'WARNING: invalid parameter 4' + print*, 'LDR < n' + return +endif + +! Matrix elements of A must by non-NaN +do j = 1, n + do i = 1, n + if (disnan(A(i,j))) then + info=1 + print*, 'WARNING: invalid parameter 1' + print*, 'NaN element in A matrix' + return + endif + enddo +enddo + +do i = 1, n + if (A(i,i) /= 0d0) then + print*, 'WARNING: matrix A is not antisymmetric' + print*, 'Non 0 element on the diagonal', i, A(i,i) + call ABORT + endif +enddo + +do j = 1, n + do i = 1, n + if (A(i,j)+A(j,i)>1d-16) then + print*, 'WANRING: matrix A is not antisymmetric' + print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) + print*, 'diff:', A(i,j)+A(j,i) + call ABORT + endif + enddo +enddo + +! Fix for too big elements ! bad idea better to cancel if the error is too big +!do j = 1, n +! do i = 1, n +! A(i,j) = mod(A(i,j),2d0*pi) +! if (dabs(A(i,j)) > pi) then +! A(i,j) = 0d0 +! endif +! enddo +!enddo + +max_elem_A = 0d0 +do j = 1, n + do i = 1, n + if (ABS(A(i,j)) > ABS(max_elem_A)) then + max_elem_A = A(i,j) + endif + enddo +enddo +!print*,'max element in A', max_elem_A + +if (ABS(max_elem_A) > 2 * pi) then + print*,'' + print*,'WARNING: ABS(max_elem_A) > 2 pi ' + print*,'' +endif + +! B=A.A +! - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ +! - Diagonalization of $\textbf{B}$ +! W, the eigenvectors +! e_val, the eigenvalues + + +! Compute B=A.A + +call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) + +! Copy B in W, diagonalization will put the eigenvectors in W +W=B + +! Diagonalization of B +! Eigenvalues -> e_val +! Eigenvectors -> W +lwork = 3*n-1 +allocate(work(lwork,n)) + +!print*,'Starting diagonalization ...' + +call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) + +deallocate(work) + +if (info2 < 0) then + print*, 'WARNING: error in the diagonalization' + print*, 'Illegal value of the ', info2,'-th parameter' +elseif (info2 >0) then + print*, "WARNING: Diagonalization failed to converge" +endif + +! Tau^-1, cos(tau), sin(tau) +! $$\tau = \sqrt{-x}$$ +! - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ +! - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ +! - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ +! These matrices are diagonals + +! Diagonal matrix m_diag +do j = 1, n + if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems + e_val(j) = 0.d0 + else + e_val(j) = - e_val(j) + endif +enddo + +m_diag = 0.d0 +do i = 1, n + m_diag(i,i) = e_val(i) +enddo + +! cos_tau +do j = 1, n + do i = 1, n + if (i==j) then + cos_tau(i,j) = dcos(dsqrt(e_val(i))) + else + cos_tau(i,j) = 0d0 + endif + enddo +enddo + +! sin_tau +do j = 1, n + do i = 1, n + if (i==j) then + sin_tau(i,j) = dsin(dsqrt(e_val(i))) + else + sin_tau(i,j) = 0d0 + endif + enddo +enddo + +! Debug, display the cos_tau and sin_tau matrix +!if (debug) then +! print*, 'cos_tau' +! do i = 1, n +! print*, cos_tau(i,:) +! enddo +! print*, 'sin_tau' +! do i = 1, n +! print*, sin_tau(i,:) +! enddo +!endif + +! tau^-1 +do j = 1, n + do i = 1, n + if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small + tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) + else + tau_m1(i,j) = 0d0 + endif + enddo +enddo + +max_elem = 0d0 +do i = 1, n + if (ABS(tau_m1(i,i)) > ABS(max_elem)) then + max_elem = tau_m1(i,i) + endif +enddo +!print*,'max elem tau^-1:', max_elem + +! Debug +!print*,'eigenvalues:' +!do i = 1, n +! print*, e_val(i) +!enddo + +!Debug, display tau^-1 +!if (debug) then +! print*, 'tau^-1' +! do i = 1, n +! print*,tau_m1(i,:) +! enddo +!endif + +! Rotation matrix +! \begin{align*} +! \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} +! \end{align*} +! \begin{align*} +! \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} +! \end{align*} +! \begin{align*} +! \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} +! \end{align*} + +! First: +! part_1 = dgemm(W, dgemm(cos_tau, W^t)) +! part_1a = dgemm(cos_tau, W^t) +! part_1 = dgemm(W, part_1a) +! And: +! part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) +! part_2a = dgemm(W^t, A) +! part_2b = dgemm(sin_tau, part_2a) +! part_2c = dgemm(tau_m1, part_2b) +! part_2 = dgemm(W, part_2c) +! Finally: +! Rotation matrix, R = part_1+part_2 + +! If $R$ is a rotation matrix: +! $R.R^T=R^T.R=\textbf{1}$ + +! part_1 +call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) +call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) + +! part_2 +call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) +call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) +call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) +call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) + +! Rotation matrix R +R = part_1 + part_2 + +! Matrix check +! R.R^t and R^t.R must be equal to identity matrix +do j = 1, n + do i=1,n + if (i==j) then + RR_t(i,j) = 1d0 + else + RR_t(i,j) = 0d0 + endif + enddo +enddo + +call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) + +norm = dnrm2(n*n,RR_t,1) +!print*, 'Rotation matrix check, norm R.R^T = ', norm + +! Debug +!if (debug) then +! print*, 'RR_t' +! do i = 1, n +! print*, RR_t(i,:) +! enddo +!endif + +! Post conditions + +! Check if R.R^T=1 +max_elem = 0d0 +do j = 1, n + do i = 1, n + if (ABS(RR_t(i,j)) > ABS(max_elem)) then + max_elem = RR_t(i,j) + endif + enddo +enddo + +print*, 'Max error in R.R^T:', max_elem +!print*, 'e_val(1):', e_val(1) +!print*, 'e_val(n):', e_val(n) +!print*, 'max elem in A:', max_elem_A + +if (ABS(max_elem) > 1d-12) then + print*, 'WARNING: max error in R.R^T > 1d-12' + print*, 'Enforce the step cancellation' + enforce_step_cancellation = .True. +endif + +! Matrix elements of R must by non-NaN +do j = 1,n + do i = 1,LDR + if (disnan(R(i,j))) then + info = 666 + print*, 'NaN in rotation matrix' + call ABORT + endif + enddo +enddo + +! Display +!if (debug) then +! print*,'Rotation matrix :' +! do i = 1, n +! write(*,'(100(F10.5))') R(i,:) +! enddo +!endif + +! Deallocation, end + +deallocate(B) + deallocate(m_diag,cos_tau,sin_tau,tau_m1) + deallocate(W,e_val) + deallocate(part_1,part_1a) + deallocate(part_2,part_2a,part_2b,part_2c) + deallocate(RR_t) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in rotation matrix:', t3 + + print*,'---End rotation_matrix---' + +end subroutine diff --git a/src/utils_trust_region/rotation_matrix_iterative.irp.f b/src/utils_trust_region/rotation_matrix_iterative.irp.f new file mode 100644 index 00000000..f268df04 --- /dev/null +++ b/src/utils_trust_region/rotation_matrix_iterative.irp.f @@ -0,0 +1,134 @@ +! Rotation matrix with the iterative method + +! \begin{align*} +! \textbf{R} = \sum_{k=0}^{\infty} \frac{1}{k!} \textbf{X}^k +! \end{align*} + +! !!! Doesn't work !!! + + +subroutine rotation_matrix_iterative(m,X,R) + + implicit none + + ! in + integer, intent(in) :: m + double precision, intent(in) :: X(m,m) + + ! out + double precision, intent(out) :: R(m,m) + + ! internal + double precision :: max_elem, pre_factor + double precision :: t1,t2,t3 + integer :: k,l,i,j + logical :: not_converged + double precision, allocatable :: RRT(:,:), A(:,:), B(:,:) + + ! Functions + integer :: factorial + + print*,'---rotation_matrix_iterative---' + call wall_time(t1) + + allocate(RRT(m,m),A(m,m),B(m,m)) + + ! k = 0 + R = 0d0 + do i = 1, m + R(i,i) = 1d0 + enddo + + ! k = 1 + R = R + X + + k = 2 + + not_converged = .True. + + do while (not_converged) + + pre_factor = 1d0/DBLE(factorial(k)) + if (pre_factor < 1d-15) then + print*,'pre factor=', pre_factor,'< 1d-15, exit' + exit + endif + + A = X + B = 0d0 + do l = 1, k-1 + call dgemm('N','N',m,m,m,1d0,X,size(X,1),A,size(A,1),0d0,B,size(B,1)) + A = B + enddo + + !print*,'B' + !do i = 1, m + ! print*,B(i,:) * 1d0/DBLE(factorial(k)) + !enddo + + R = R + pre_factor * B + + k = k + 1 + call dgemm('T','N',m,m,m,1d0,R,size(R,1),R,size(R,1),0d0,RRT,size(RRT,1)) + + !print*,'R' + !do i = 1, m + ! write(*,'(10(E12.5))') R(i,:) + !enddo + + do i = 1, m + RRT(i,i) = RRT(i,i) - 1d0 + enddo + + !print*,'RRT' + !do i = 1, m + ! write(*,'(10(E12.5))') RRT(i,:) + !enddo + + max_elem = 0d0 + do j = 1, m + do i = 1, m + if (dabs(RRT(i,j)) > max_elem) then + max_elem = dabs(RRT(i,j)) + endif + enddo + enddo + + print*, 'Iteration:', k + print*, 'Max error in R:', max_elem + + if (max_elem < 1d-12) then + not_converged = .False. + endif + + enddo + + deallocate(RRT,A,B) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rotation matrix iterative:', t3 + print*,'---End roration_matrix_iterative---' + + +print*,'Does not work yet, abort' +call abort + +end + +! Factorial + +function factorial(n) + + implicit none + + integer, intent(in) :: n + integer :: factorial, k + + factorial = 1 + + do k = 1, n + factorial = factorial * k + enddo + +end diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f new file mode 100644 index 00000000..75d04352 --- /dev/null +++ b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f @@ -0,0 +1,64 @@ +! Rotation matrix in a subspace to rotation matrix in the full space + +! Usually, we are using a list of MOs, for exemple the active ones. When +! we compute a rotation matrix to rotate the MOs, we just compute a +! rotation matrix for these MOs in order to reduce the size of the +! matrix which has to be computed. Since the computation of a rotation +! matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to +! reuce the number of MOs involved. +! After that we replace the rotation matrix in the full space by +! building the elements of the rotation matrix in the full space from +! the elements of the rotation matrix in the subspace and adding some 0 +! on the extradiagonal elements and some 1 on the diagonal elements, +! for the MOs that are not involved in the rotation. + +! Provided: +! | mo_num | integer | Number of MOs | + +! Input: +! | m | integer | Size of tmp_list, m <= mo_num | +! | tmp_list(m) | integer | List of MOs | +! | tmp_R(m,m) | double precision | Rotation matrix in the space of | +! | | | the MOs containing by tmp_list | + +! Output: +! | R(mo_num,mo_num | double precision | Rotation matrix in the space | +! | | | of all the MOs | + +! Internal: +! | i,j | integer | indexes in the full space | +! | tmp_i,tmp_j | integer | indexes in the subspace | + + +subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) + + !BEGIN_DOC + ! Compute the full rotation matrix from a smaller one + !END_DOC + + implicit none + + ! in + integer, intent(in) :: m, tmp_list(m) + double precision, intent(in) :: tmp_R(m,m) + + ! out + double precision, intent(out) :: R(mo_num,mo_num) + + ! internal + integer :: i,j,tmp_i,tmp_j + + ! tmp_R to R, subspace to full space + R = 0d0 + do i = 1, mo_num + R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital + enddo + do tmp_j = 1, m + j = tmp_list(tmp_j) + do tmp_i = 1, m + i = tmp_list(tmp_i) + R(i,j) = tmp_R(tmp_i,tmp_j) + enddo + enddo + +end diff --git a/src/utils_trust_region/trust_region_expected_e.irp.f b/src/utils_trust_region/trust_region_expected_e.irp.f new file mode 100644 index 00000000..ad5ad2f9 --- /dev/null +++ b/src/utils_trust_region/trust_region_expected_e.irp.f @@ -0,0 +1,126 @@ +! Predicted energy : e_model + +! *Compute the energy predicted by the Taylor series* + +! The energy is predicted using a Taylor expansion truncated at te 2nd +! order : + +! \begin{align*} +! E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) +! \end{align*} + +! Input: +! | n | integer | m*(m-1)/2 | +! | n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +! | v_grad(n) | double precision | gradient | +! | H(n,n) | double precision | hessian | +! | x(n) | double precision | Step in the trust region | +! | prev_energy | double precision | previous energy | + +! Output: +! | e_model | double precision | predicted energy after the rotation of the MOs | + +! Internal: +! | part_1 | double precision | v_grad^T.x | +! | part_2 | double precision | 1/2 . x^T.H.x | +! | part_2a | double precision | H.x | +! | i,j | integer | indexes | + +! Function: +! | ddot | double precision | dot product (Lapack) | + + +subroutine trust_region_expected_e(n,n2,v_grad,H,x,prev_energy,e_model) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the expected criterion/energy after the application of the step x + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n),H(n,n2),x(n) + double precision, intent(in) :: prev_energy + + ! out + double precision, intent(out) :: e_model + + ! internal + double precision :: part_1, part_2, t1,t2,t3 + double precision, allocatable :: part_2a(:) + + integer :: i,j + + !Function + double precision :: ddot + + print*,'' + print*,'---Trust_e_model---' + + call wall_time(t1) + + ! Allocation + allocate(part_2a(n)) + +! Calculations + +! part_1 corresponds to the product g.x +! part_2a corresponds to the product H.x +! part_2 corresponds to the product 0.5*(x^T.H.x) + +! TODO: remove the dot products + + +! Product v_grad.x + part_1 = ddot(n,v_grad,1,x,1) + + !if (debug) then + ! print*,'g.x : ', part_1 + !endif + + ! Product H.x + if (n == n2) then + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + else + ! If the hessian is diagonal + do i = 1, n + part_2a(i) = H(i,1) * x(i) + enddo + endif + + ! Product 1/2 . x^T.H.x + part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) + + !if (debug) then + ! print*,'1/2*x^T.H.x : ', part_2 + !endif + + + ! Sum + e_model = prev_energy + part_1 + part_2 + + ! Writing the predicted energy + print*, 'prev_energy: ', prev_energy + print*, 'Predicted energy after the rotation:', e_model + print*, 'Previous energy - predicted energy: ', prev_energy - e_model + + ! Can be deleted, already in another subroutine + if (DABS(prev_energy - e_model) < 1d-12 ) then + print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' + endif + + ! Deallocation + deallocate(part_2a) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust e model:', t3 + + print*,'---End trust_e_model---' + +end subroutine diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f new file mode 100644 index 00000000..b7dcf875 --- /dev/null +++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f @@ -0,0 +1,1651 @@ +! Newton's method to find the optimal lambda + +! *Compute the lambda value for the trust region* + +! This subroutine uses the Newton method in order to find the optimal +! lambda. This constant is added on the diagonal of the hessian to shift +! the eiganvalues. It has a double role: +! - ensure that the resulting hessian is positive definite for the +! Newton method +! - constrain the step in the trust region, i.e., +! $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius +! of the trust region. +! We search $\lambda$ which minimizes +! \begin{align*} +! f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 +! \end{align*} +! or +! \begin{align*} +! \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 +! \end{align*} +! and gives obviously 0 in both cases. \newline + +! There are several cases: +! - If $\textbf{H}$ is positive definite the interval containing the +! solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). +! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot +! \textbf{g} \neq 0$ then the interval containing +! the solution is $\lambda \in (-h_1, \infty)$. +! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ then the interval containing the solution is +! $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < +! 10^{-12}$ are not computed, so the term where $i = 1$ is +! automatically removed and this case becomes similar to the previous one. + +! So to avoid numerical problems (cf. trust_region) we start the +! algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, +! with $\epsilon$ a little constant. +! The research must be restricted to the interval containing the +! solution. For that reason a little trust region in 1D is used. + +! The Newton method to find the optimal $\lambda$ is : +! \begin{align*} +! \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ +! \end{align*} +! $f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to +! $\lambda$ at the l-th iteration, +! $f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to +! $\lambda$ at the l-th iteration.\newline + +! Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} +! f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as +! \begin{align*} +! y \leq \alpha +! \end{align*} +! with $\alpha$ a scalar representing the trust length (trust region in +! 1D) where the function $f$ or $\tilde{f}$ is correctly describe by the +! Taylor series truncated at the second order. Thus, if $y > \alpha$, +! the constraint is applied as +! \begin{align*} +! y^* = \alpha \frac{y}{|y|} +! \end{align*} +! with $y^*$ the solution in the trust region. + +! The size of the trust region evolves in function of $\rho$ as for the +! trust region seen previously cf. trust_region, rho_model. +! The prediction of the value of $f$ or $\tilde{f}$ is done using the +! Taylor series truncated at the second order cf. "trust_region", +! "trust_e_model". + +! The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - +! \Delta^2)^2$ with respect to $\lambda$ are: +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{align*} + +! The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - +! 1/\Delta^2)^2$ with respect to $\lambda$ are: +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} +! {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + +! Provided in qp_edit: +! | thresh_rho_2 | +! | thresh_cc | +! | nb_it_max_lambda | +! | version_lambda_search | +! | nb_it_max_pre_search | +! see qp_edit for more details + +! Input: +! | n | integer | m*(m-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | tmp_wtg(n) | double precision | w_i^T.v_grad(i) | +! | delta | double precision | delta for the trust region | + +! Output: +! | lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | +! | | | lambda > 0 | + +! Internal: +! | d1_N | double precision | value of d1_norm_trust_region | +! | d2_N | double precision | value of d2_norm_trust_region | +! | f_N | double precision | value of f_norm_trust_region | +! | prev_f_N | double precision | previous value of f_norm_trust_region | +! | f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | +! | prev_f_R | double precision | previous value of f_R | +! | model | double precision | predicted value of f_R from prev_f_R and y | +! | d_1 | double precision | value of the first derivative | +! | d_2 | double precision | value of the second derivative | +! | y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | +! | prev_lambda | double precision | previous value of lambda | +! | t1,t2,t3 | double precision | wall time | +! | i | integer | index | +! | epsilon | double precision | little constant to avoid numerical problem | +! | rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | +! | version | integer | version of the root finding method | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +! | d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +! | f_norm_trust_region | double precision | value of norm(x)^2 | + + + +subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Research the optimal lambda to constrain the step size in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(inout) :: e_val(n) + double precision, intent(in) :: delta + double precision, intent(in) :: tmp_wtg(n) + + ! out + double precision, intent(out) :: lambda + + ! Internal + double precision :: d1_N, d2_N, f_N, prev_f_N + double precision :: prev_f_R, f_R + double precision :: model + double precision :: d_1, d_2 + double precision :: t1,t2,t3 + integer :: i + double precision :: epsilon + double precision :: y + double precision :: prev_lambda + double precision :: rho_2 + double precision :: alpha + integer :: version + + ! Functions + double precision :: d1_norm_trust_region,d1_norm_trust_region_omp + double precision :: d2_norm_trust_region, d2_norm_trust_region_omp + double precision :: f_norm_trust_region, f_norm_trust_region_omp + double precision :: d1_norm_inverse_trust_region + double precision :: d2_norm_inverse_trust_region + double precision :: d1_norm_inverse_trust_region_omp + double precision :: d2_norm_inverse_trust_region_omp + + print*,'' + print*,'---Trust_newton---' + + call wall_time(t1) + + ! version_lambda_search + ! 1 -> ||x||^2 - delta^2 = 0, + ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) + !if (version_lambda_search == 1) then + ! print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + !else + ! print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + !endif + ! Version 2 is normally better + + + +! Resolution with the Newton method: + + +! Initialization + epsilon = 1d-4 + lambda = max(0d0, -e_val(1)) + + ! Pre research of lambda to start near the optimal lambda + ! by adding a constant epsilon and changing the constant to + ! have ||x(lambda + epsilon)|| ~ delta, before setting + ! lambda = lambda + epsilon + !print*, 'Pre research of lambda:' + !print*,'Initial lambda =', lambda + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + !print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + i = 1 + + ! To increase lambda + if (f_N > delta**2) then + !print*,'Increasing lambda...' + do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 2d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N < f_N) then + print*,'WARNING, error: prev_f_N < f_N, exit' + epsilon = epsilon * 0.5d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + + ! To reduce lambda + else + !print*,'Reducing lambda...' + do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 0.5d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N > f_N) then + print*,'WARNING, error: prev_f_N > f_N, exit' + epsilon = epsilon * 2d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + endif + + !print*,'End of the pre research of lambda' + + ! New value of lambda + lambda = lambda + epsilon + + !print*, 'e_val(1):', e_val(1) + !print*, 'Staring point, lambda =', lambda + + ! thresh_cc, threshold for the research of the optimal lambda + ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc + ! thresh_rho_2, threshold to cancel the step in the research + ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 + + !print*,'Threshold for the CC:', thresh_cc + !print*,'Threshold for rho_2:', thresh_rho_2 + !print*, 'w_1^T . g =', tmp_wtg(1) + + ! Debug + !print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' + + ! Initialization + i = 1 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 + model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 + prev_f_N = 0d0 ! previous value of ||x||^2 + prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 + f_R = 0d0 ! value of (||x||^2 - delta^2)^2 + rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) + y = 0d0 ! step size + prev_lambda = 0d0 ! previous lambda + + ! Derivatives + if (version_lambda_search == 1) then + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + + ! Trust length + alpha = DABS((1d0/d_2)*d_1) + + ! Newton's method + do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) + !print*,'--------------------------------------' + !print*,'Research of lambda, iteration:', i + !print*,'--------------------------------------' + + ! Update of f_N, f_R and the derivatives + prev_f_N = f_N + if (version_lambda_search == 1) then + prev_f_R = (prev_f_N - delta**2)**2 + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + !write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + + ! Newton's step + y = -(1d0/DABS(d_2))*d_1 + + ! Constraint on y (the newton step) + if (DABS(y) > alpha) then + y = alpha * (y/DABS(y)) ! preservation of the sign of y + endif + !write(*,'(a,E12.5)') ' Step length: ', y + + ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series + model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 + + ! Updates lambda + prev_lambda = lambda + lambda = prev_lambda + y + !print*,'prev lambda:', prev_lambda + !print*,'new lambda:', lambda + + ! Checks if lambda is in (-h_1, \infty) + if (lambda > MAX(0d0, -e_val(1))) then + ! New value of ||x(lambda)||^2 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + ! New f_R + if (version_lambda_search == 1) then + f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 + else + f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 + endif + + !if (version_lambda_search == 1) then + ! print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + ! print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + ! print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + !else + ! print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + ! print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + ! print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + !endif + + !print*,'previous - actual:', prev_f_R - f_R + !print*,'previous - model:', prev_f_R - model + + ! Check the gain + if (DABS(prev_f_R - model) < thresh_model_2) then + print*,'' + print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' + print*,'' + endif + + ! Will be deleted + !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then + ! print*,'' + ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' + ! print*,'' + ! exit + !endif + + ! Computes rho_2 + rho_2 = (prev_f_R - f_R)/(prev_f_R - model) + !print*,'rho_2:', rho_2 + else + rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) + !print*,'lambda < -e_val(1) ===> rho_2 = 0' + endif + + ! Evolution of the trust length, alpha + if (rho_2 >= 0.75d0) then + alpha = 2d0 * alpha + elseif (rho_2 >= 0.5d0) then + alpha = alpha + elseif (rho_2 >= 0.25d0) then + alpha = 0.5d0 * alpha + else + alpha = 0.25d0 * alpha + endif + !write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + + ! cancellaion of the step if rho < 0.1 + if (rho_2 < thresh_rho_2) then !0.1d0) then + lambda = prev_lambda + f_N = prev_f_N + !print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + endif + + !print*,'' + !print*,'lambda, ||x||, delta:' + !print*, lambda, dsqrt(f_N), delta + !print*,'CC:', DABS(1d0 - f_N/delta**2) + !print*,'' + + i = i + 1 + enddo + + ! if trust newton failed + if (i > nb_it_max_lambda) then + print*,'' + print*,'######################################################' + print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' + print*,'The research of the optimal lambda has failed' + print*,'######################################################' + print*,'' + endif + + print*,'Number of iterations:', i + print*,'Value of lambda:', lambda + !print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Convergence criterion:', 1d0-f_N/delta**2 + !print*,'Error on the trust region (||x||^2 - delta^2)^2):', (f_N - delta**2)**2 + !print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + + ! Time + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust_newton:', t3 + + print*,'---End trust_newton---' + +end subroutine + +! OMP: First derivative of (||x||^2 - Delta^2)^2 + +! *Function to compute the first derivative of (||x||^2 - Delta^2)^2* + +! This function computes the first derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. + +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | mo_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + + +function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg,accu1,accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) + + deallocate(tmp_accu1, tmp_accu2) + +end function + +! OMP: Second derivative of (||x||^2 - Delta^2)^2 + +! *Function to compute the second derivative of (||x||^2 - Delta^2)^2* + +! This function computes the second derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. +! \begin{align*} +! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | accu3 | double precision | third sum of the formula | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_accu2 | double precision | temporary array for the third sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + + +function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region_omp + double precision :: ddot + + ! Internal + double precision :: accu1,accu2,accu3 + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + integer :: i, j + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + + ! accu1 + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + ! accu2 + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + ! accu3 + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) + + deallocate(tmp_accu1, tmp_accu2, tmp_accu3) + +end function + +! OMP: Function value of ||x||^2 + +! *Compute the value of ||x||^2* + +! This function computes the value of ||x(lambda)||^2 + +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | + +! Internal: +! | tmp_wtg(n) | double precision | temporary array for W^T.v_grad | +! | tmp_fN | double precision | temporary array for the function | +! | i,j | integer | indexes | + + +function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + use omp_lib + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! functions + double precision :: f_norm_trust_region_omp + + ! internal + double precision, allocatable :: tmp_fN(:) + integer :: i,j + + ! Allocation + allocate(tmp_fN(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + f_norm_trust_region_omp = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_fN(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + deallocate(tmp_fN) + +end function + +! First derivative of (||x||^2 - Delta^2)^2 +! Version without omp + +! *Function to compute the first derivative of ||x||^2 - Delta* + +! This function computes the first derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. + +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | ddot | double precision | blas dot product | + + +function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i, j + + ! Functions + double precision :: d1_norm_trust_region + double precision :: ddot + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) + +end function + +! Second derivative of (||x||^2 - Delta^2)^2 +! Version without OMP + +! *Function to compute the second derivative of ||x||^2 - Delta* + + +! \begin{equation} +! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{equation} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | accu3 | double precision | third sum of the formula | +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | +! | ddot | double precision | blas dot product | + + +function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region + double precision :: ddot + + ! Internal + double precision :: wtg,accu1,accu2,accu3 + integer :: i, j + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 + endif + enddo + + d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) + +end function + +! Function value of ||x||^2 +! Version without OMP + +! *Compute the value of ||x||^2* + +! This function computes the value of ||x(lambda)||^2 + +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | f_norm_trust_region | double precision | value of norm(x)^2 | +! | ddot | double precision | blas dot product | + + + +function f_norm_trust_region(n,e_val,tmp_wtg,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! function + double precision :: f_norm_trust_region + double precision :: ddot + + ! internal + integer :: i,j + + ! Initialization + f_norm_trust_region = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + +end function + +! OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version with OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + + deallocate(tmp_accu1, tmp_accu2) + +end + +! OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version with OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2, accu3 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + + ! Functions + double precision :: d2_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + + deallocate(tmp_accu1,tmp_accu2,tmp_accu3) + +end + +! First derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version without OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i,j + + ! Functions + double precision :: d1_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + +end + +! Second derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version without OMP + +! *Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2, accu3 + integer :: i,j + + ! Functions + double precision :: d2_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 + endif + enddo + + d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + +end diff --git a/src/utils_trust_region/trust_region_rho.irp.f b/src/utils_trust_region/trust_region_rho.irp.f new file mode 100644 index 00000000..11ab11e9 --- /dev/null +++ b/src/utils_trust_region/trust_region_rho.irp.f @@ -0,0 +1,120 @@ +! Agreement with the model: Rho + +! *Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* + +! Rho represents the agreement between the model (the predicted energy +! by the Taylor expansion truncated at the 2nd order) and the real +! energy : + +! \begin{equation} +! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +! \end{equation} +! With : +! $E^{k}$ the energy at the previous iteration +! $E^{k+1}$ the energy at the actual iteration +! $m^{k+1}$ the predicted energy for the actual iteration +! (cf. trust_e_model) + +! If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. +! If $\rho \leq 0$ the previous energy is lower than the actual +! energy. We have to cancel the last step and use a smaller trust +! region. +! Here we cancel the last step if $\rho < 0.1$, because even if +! the energy decreases, the agreement is bad, i.e., the Taylor expansion +! truncated at the second order doesn't represent correctly the energy +! landscape. So it's better to cancel the step and restart with a +! smaller trust region. + +! Provided in qp_edit: +! | thresh_rho | + +! Input: +! | prev_energy | double precision | previous energy (energy before the rotation) | +! | e_model | double precision | predicted energy after the rotation | + +! Output: +! | rho | double precision | the agreement between the model (predicted) and the real energy | +! | prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | +! | | | else the previous energy doesn't change | + +! Internal: +! | energy | double precision | energy (real) after the rotation | +! | i | integer | index | +! | t* | double precision | time | + + +subroutine trust_region_rho(prev_energy, energy,e_model,rho) + + include 'pi.h' + + !BEGIN_DOC + ! Compute rho, the agreement between the predicted criterion/energy and the real one + !END_DOC + + implicit none + + ! Variables + + ! In + double precision, intent(inout) :: prev_energy + double precision, intent(in) :: e_model, energy + + ! Out + double precision, intent(out) :: rho + + ! Internal + double precision :: t1, t2, t3 + integer :: i + + print*,'' + print*,'---Rho_model---' + + !call wall_time(t1) + +! Rho +! \begin{equation} +! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +! \end{equation} + +! In function of $\rho$ th step can be accepted or cancelled. + +! If we cancel the last step (k+1), the previous energy (k) doesn't +! change! +! If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) + + +! Already done in an other subroutine + !if (ABS(prev_energy - e_model) < 1d-12) then + ! print*,'WARNING: prev_energy - e_model < 1d-12' + ! print*,'=> rho will tend toward infinity' + ! print*,'Check you convergence criterion !' + !endif + + rho = (prev_energy - energy) / (prev_energy - e_model) + + !print*, 'previous energy, prev_energy:', prev_energy + !print*, 'predicted energy, e_model:', e_model + !print*, 'real energy, energy:', energy + !print*, 'prev_energy - energy:', prev_energy - energy + !print*, 'prev_energy - e_model:', prev_energy - e_model + print*, 'Rho:', rho + !print*, 'Threshold for rho:', thresh_rho + + ! Modification of prev_energy in function of rho + if (rho < thresh_rho) then !0.1) then + ! the step is cancelled + print*, 'Rho <', thresh_rho,', the previous energy does not changed' + !print*, 'prev_energy :', prev_energy + else + ! the step is accepted + prev_energy = energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy:', energy + endif + + !call wall_time(t2) + !t3 = t2 - t1 + !print*,'Time in rho model:', t3 + + print*,'---End rho_model---' + +end subroutine diff --git a/src/utils_trust_region/trust_region_step.irp.f b/src/utils_trust_region/trust_region_step.irp.f new file mode 100644 index 00000000..54161a1c --- /dev/null +++ b/src/utils_trust_region/trust_region_step.irp.f @@ -0,0 +1,749 @@ +! Trust region + +! *Compute the next step with the trust region algorithm* + +! The Newton method is an iterative method to find a minimum of a given +! function. It uses a Taylor series truncated at the second order of the +! targeted function and gives its minimizer. The minimizer is taken as +! the new position and the same thing is done. And by doing so +! iteratively the method find a minimum, a local or global one depending +! of the starting point and the convexity/nonconvexity of the targeted +! function. + +! The goal of the trust region is to constrain the step size of the +! Newton method in a certain area around the actual position, where the +! Taylor series is a good approximation of the targeted function. This +! area is called the "trust region". + +! In addition, in function of the agreement between the Taylor +! development of the energy and the real energy, the size of the trust +! region will be updated at each iteration. By doing so, the step sizes +! are not too larges. In addition, since we add a criterion to cancel the +! step if the energy increases (more precisely if rho < 0.1), so it's +! impossible to diverge. \newline + +! References: \newline +! Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline +! https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline +! ISBN: 978-0-387-40065-5 \newline + +! By using the first and the second derivatives, the Newton method gives +! a step: +! \begin{align*} +! \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot +! \textbf{g}_{(k)} +! \end{align*} +! which leads to the minimizer of the Taylor series. +! !!! Warning: the Newton method gives the minimizer if and only if +! $\textbf{H}$ is positive definite, else it leads to a saddle point !!! +! But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm: +! \begin{align*} +! ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)} +! \end{align*} +! which is equivalent to +! \begin{align*} +! \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2 +! \end{align*} + +! with: \newline +! $\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of +! size n) \newline +! $\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n +! matrix) \newline +! $\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of +! size n) \newline +! $\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration +! \newline + +! Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a +! hypersphere of radius $\Delta_{(k+1)}$.\newline + +! So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and +! $\textbf{H}$ is positive definite, the +! solution is the step given by the Newton method +! $\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$. +! Else we have to constrain the step size. For simplicity we will remove +! the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have +! to put a constraint on $\textbf{x}$ with a Lagrange multiplier. +! Starting from the Taylor series of a function E (here, the energy) +! truncated at the 2nd order, we have: +! \begin{align*} +! E(\textbf{x}) = E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2} +! \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + +! \mathcal{O}(\textbf{x}^2) +! \end{align*} + +! With the constraint on the norm of $\textbf{x}$ we can write the +! Lagrangian +! \begin{align*} +! \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x} +! + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} +! + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2) +! \end{align*} +! Where: \newline +! $\lambda$ is the Lagrange multiplier \newline +! $E$ is the energy at the k-th iteration $\Leftrightarrow +! E(\textbf{x} = \textbf{0})$ \newline + +! To solve this equation, we search a stationary point where the first +! derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e. +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0 +! \end{align*} + +! The derivative is: +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} +! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +! \end{align*} + +! So, we search $\textbf{x}$ such as: +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} +! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0 +! \end{align*} + +! We can rewrite that as: +! \begin{align*} +! \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +! = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0 +! \end{align*} +! with $\textbf{I}$ is the identity matrix. + +! By doing so, the solution is: +! \begin{align*} +! (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g} +! \end{align*} +! \begin{align*} +! \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +! \end{align*} +! with $\textbf{x}^T \textbf{x} = \Delta^2$. + +! We have to solve this previous equation to find this $\textbf{x}$ in the +! trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is +! just a one dimension problem because we can express $\textbf{x}$ as a +! function of $\lambda$: +! \begin{align*} +! \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +! \end{align*} + +! We start from the fact that the hessian is diagonalizable. So we have: +! \begin{align*} +! \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T +! \end{align*} +! with: \newline +! $\textbf{H}$, the hessian matrix \newline +! $\textbf{W}$, the matrix containing the eigenvectors \newline +! $\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline +! $\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline +! $h_i$, the i-th eigenvalue in ascending order \newline + +! Now we use the fact that adding a constant on the diagonal just shifts +! the eigenvalues: +! \begin{align*} +! \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} +! +\textbf{I} \lambda) \cdot \textbf{W}^T +! \end{align*} + +! By doing so we can express $\textbf{x}$ as a function of $\lambda$ +! \begin{align*} +! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} +! with $\lambda \neq - h_i$. + +! An interesting thing in our case is the norm of $\textbf{x}$, +! because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of +! the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have: +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot +! \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. +! And if we study the properties of this function we see that: +! \begin{align*} +! \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0 +! \end{align*} +! and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: +! \begin{align*} +! \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty +! \end{align*} + +! From these limits and knowing that $h_1$ is the lowest eigenvalue, we +! can conclude that $||\textbf{x}(\lambda)||$ is a continuous and +! strictly decreasing function on the interval $\lambda \in +! (-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which +! gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one +! solution. + +! Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot +! \textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly, +! $\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the +! Newton method is only defined for a positive definite hessian matrix, +! so $(\textbf{H} + \textbf{I} \lambda)$ must be positive +! definite. Consequently, in the case where $\textbf{H}$ is not positive +! definite, to ensure the positive definiteness, $\lambda$ must be +! greater than $- h_1$. +! \begin{align*} +! \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 +! \end{align*} + +! From that there are five cases: +! - if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ +! - if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot +! \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} +! \lambda)$ +! must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ +! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing +! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be +! positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) +! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing +! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be +! positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is +! similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = +! 0)|| \leq \Delta$ +! but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ +! time a constant to ensure the condition $||\textbf{x}(\lambda = +! -h_1)|| = \Delta$ and escape from the saddle point + +! Thus to find the solution, we can write: +! \begin{align*} +! ||\textbf{x}(\lambda)|| = \Delta +! \end{align*} +! \begin{align*} +! ||\textbf{x}(\lambda)|| - \Delta = 0 +! \end{align*} + +! Taking the square of this equation +! \begin{align*} +! (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 +! \end{align*} +! we have a function with one minimum for the optimal $\lambda$. +! Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve +! \begin{align*} +! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +! \end{align*} + +! But in practice, it is more effective to solve: +! \begin{align*} +! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +! \end{align*} + +! To do that, we just use the Newton method with "trust_newton" using +! first and second derivative of $(||\textbf{x}(\lambda)||^2 - +! \Delta^2)^2$ with respect to $\textbf{x}$. +! This will give the optimal $\lambda$ to compute the +! solution $\textbf{x}$ with the formula seen previously: +! \begin{align*} +! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} + +! The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our +! step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. + + + + +! Evolution of the trust region + +! We initialize the trust region at the first iteration using a radius +! \begin{align*} +! \Delta = ||\textbf{x}(\lambda=0)|| +! \end{align*} + +! And for the next iteration the trust region will evolves depending of +! the agreement of the energy prediction based on the Taylor series +! truncated at the 2nd order and the real energy. If the Taylor series +! truncated at the 2nd order represents correctly the energy landscape +! the trust region will be extent else it will be reduced. In order to +! mesure this agreement we use the ratio rho cf. "rho_model" and +! "trust_e_model". From that we use the following values: +! - if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, +! - if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, +! - if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, +! - if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. + +! In addition, if $\rho < 0.1$ the iteration is cancelled, so it +! restarts with a smaller trust region until the energy decreases. + + + + +! Summary + +! To summarize, knowing the hessian (eigenvectors and eigenvalues), the +! gradient and the radius of the trust region we can compute the norm of +! the Newton step +! \begin{align*} +! ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n +! \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 +! \end{align*} + +! - if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and +! $\textbf{x}(\lambda=0)$ is in the trust region and it is not +! necessary to put a constraint on $\textbf{x}$, the solution is the +! unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. +! - else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and +! $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in +! the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = +! -h_1)$, similarly to the previous case. +! But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ +! time a constant to ensure the condition $||\textbf{x}(\lambda = +! -h_1)|| = \Delta$ and escape from the saddle point +! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we +! have to search $\lambda \in (-h_1, \infty)$ such as +! $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method +! \begin{align*} +! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +! \end{align*} +! or +! \begin{align*} +! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +! \end{align*} +! which is numerically more stable. And finally compute +! \begin{align*} +! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} +! - else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we +! do exactly the same thing that the previous case but we search +! $\lambda \in (0, \infty)$ +! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and +! $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the +! sum), again we do exactly the same thing that the previous case +! searching $\lambda \in (-h_1, \infty)$. + + +! For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not +! necessary in fact to remove the $j = 1$ in the sum since the term +! where $h_i - \lambda < 10^{-6}$ are not computed. + +! After that, we take this vector $\textbf{x}^*$, called "x", and we do +! the transformation to an antisymmetric matrix $\textbf{X}$, called +! m_x. This matrix $\textbf{X}$ will be used to compute a rotation +! matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". + +! NB: +! An improvement can be done using a elleptical trust region. + + + + +! Code + +! Provided: +! | mo_num | integer | number of MOs | + +! Cf. qp_edit in orbital optimization section, for some constants/thresholds + +! Input: +! | m | integer | number of MOs | +! | n | integer | m*(m-1)/2 | +! | n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +! | H(n,n2) | double precision | hessian | +! | v_grad(n) | double precision | gradient | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n, n) | double precision | eigenvectors of the hessian | +! | rho | double precision | agreement between the model and the reality, | +! | | | represents the quality of the energy prediction | +! | nb_iter | integer | number of iteration | + +! Input/Ouput: +! | delta | double precision | radius of the trust region | + +! Output: +! | x(n) | double precision | vector containing the step | + +! Internal: +! | accu | double precision | temporary variable to compute the step | +! | lambda | double precision | lagrange multiplier | +! | trust_radius2 | double precision | square of the radius of the trust region | +! | norm2_x | double precision | norm^2 of the vector x | +! | norm2_g | double precision | norm^2 of the vector containing the gradient | +! | tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | +! | i, j, k | integer | indexes | + +! Function: +! | dnrm2 | double precision | Blas function computing the norm | +! | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | + + +subroutine trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,w,x,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compuet the step in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n), rho + integer, intent(inout) :: nb_iter + double precision, intent(in) :: e_val(n), w(n,n2) + + ! inout + double precision, intent(inout) :: delta + + ! out + double precision, intent(out) :: x(n) + + ! Internal + double precision :: accu, lambda, trust_radius2 + double precision :: norm2_x, norm2_g + double precision, allocatable :: tmp_wtg(:) + integer :: i,j,k + double precision :: t1,t2,t3 + integer :: n_neg_eval + + + ! Functions + double precision :: ddot, dnrm2 + double precision :: f_norm_trust_region_omp + + print*,'' + print*,'==================' + print*,'---Trust_region---' + print*,'==================' + + call wall_time(t1) + + ! Allocation + allocate(tmp_wtg(n)) + +! Initialization and norm + +! The norm of the step size will be useful for the trust region +! algorithm. We start from a first guess and the radius of the trust +! region will evolve during the optimization. + +! avoid_saddle is actually a test to avoid saddle points + + +! Initialization of the Lagrange multiplier +lambda = 0d0 + +! List of w^T.g, to avoid the recomputation +tmp_wtg = 0d0 +if (n == n2) then + do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo + enddo +else + ! For the diagonal case + do j = 1, n + k = int(w(j,1)+1d-15) + tmp_wtg(j) = v_grad(k) + enddo +endif + +! Replacement of the small tmp_wtg corresponding to a negative eigenvalue +! in the case of avoid_saddle +if (avoid_saddle .and. e_val(1) < - thresh_eig) then + i = 2 + ! Number of negative eigenvalues + do while (e_val(i) < - thresh_eig) + if (tmp_wtg(i) < thresh_wtg2) then + if (version_avoid_saddle == 1) then + tmp_wtg(i) = 1d0 + elseif (version_avoid_saddle == 2) then + tmp_wtg(i) = DABS(e_val(i)) + elseif (version_avoid_saddle == 3) then + tmp_wtg(i) = dsqrt(DABS(e_val(i))) + else + tmp_wtg(i) = thresh_wtg2 + endif + endif + i = i + 1 + enddo + + ! For the fist one it's a little bit different + if (tmp_wtg(1) < thresh_wtg2) then + tmp_wtg(1) = 0d0 + endif + +endif + +! Norm^2 of x, ||x||^2 +norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) +! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta +! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm +! Anyway if the step is too big it will be reduced +!print*,'||x||^2 :', norm2_x + +! Norm^2 of the gradient, ||v_grad||^2 +norm2_g = (dnrm2(n,v_grad,1))**2 +!print*,'||grad||^2 :', norm2_g + +! Trust radius initialization + +! At the first iteration (nb_iter = 0) we initialize the trust region +! with the norm of the step generate by the Newton's method ($\textbf{x}_1 = +! (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, +! we compute this norm using f_norm_trust_region_omp as explain just +! below) + + +! trust radius +if (nb_iter == 0) then + trust_radius2 = norm2_x + ! To avoid infinite loop of cancellation of this first step + ! without changing delta + nb_iter = 1 + + ! Compute delta, delta = sqrt(trust_radius) + delta = dsqrt(trust_radius2) +endif + +! Modification of the trust radius + +! In function of rho (which represents the agreement between the model +! and the reality, cf. rho_model) the trust region evolves. We update +! delta (the radius of the trust region). + +! To avoid too big trust region we put a maximum size. + + +! Modification of the trust radius in function of rho +if (rho >= 0.75d0) then + delta = 2d0 * delta +elseif (rho >= 0.5d0) then + delta = delta +elseif (rho >= 0.25d0) then + delta = 0.5d0 * delta +else + delta = 0.25d0 * delta +endif + +! Maximum size of the trust region +!if (delta > 0.5d0 * n * pi) then +! delta = 0.5d0 * n * pi +! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' +!endif + +if (delta > 1d10) then + delta = 1d10 +endif + +!print*, 'Delta :', delta + +! Calculation of the optimal lambda + +! We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ +! - If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant +! $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ +! - If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the +! unconstrained one, $\lambda = 0$ + +! You will find more details at the beginning + + +! By giving delta, we search (||x||^2 - delta^2)^2 = 0 +! and not (||x||^2 - delta)^2 = 0 + +! Research of lambda to solve ||x(lambda)|| = Delta + +! Display +!print*, 'e_val(1) = ', e_val(1) +!print*, 'w_1^T.g =', tmp_wtg(1) + +! H positive definite +if (e_val(1) > - thresh_eig) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + !print*, '||x(0)||=', dsqrt(norm2_x) + !print*, 'Delta=', delta + + ! H positive definite, ||x(lambda = 0)|| <= Delta + if (dsqrt(norm2_x) <= delta) then + !print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + !print*, 'lambda = 0, no lambda optimization' + lambda = 0d0 + + ! H positive definite, ||x(lambda = 0)|| > Delta + else + ! Constraint solution + !print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + !print*,'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + +! H indefinite +else + if (DABS(tmp_wtg(1)) < thresh_wtg) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) + !print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + endif + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta + if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then + ! Add e_val(1) in order to have (H - e_val(1) I) positive definite + !print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + !print*, 'lambda = -e_val(1), no lambda optimization' + lambda = - e_val(1) + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta + ! and + ! H indefinite, w_1^T.g =/= 0 + else + ! Constraint solution/ add lambda + !if (DABS(tmp_wtg(1)) < thresh_wtg) then + ! print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + !else + ! print*, 'H indefinite, w_1^T.g =/= 0' + !endif + !print*, 'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + +endif + +! Recomputation of the norm^2 of the step x +norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) +print*,'' +print*,'Summary after the trust region:' +print*,'lambda:', lambda +print*,'||x||:', dsqrt(norm2_x) +print*,'delta:', delta + +! Calculation of the step x + +! x refers to $\textbf{x}^*$ +! We compute x in function of lambda using its formula : +! \begin{align*} +! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i +! + \lambda} \cdot \textbf{w}_i +! \end{align*} + + +! Initialisation +x = 0d0 + +! Calculation of the step x + +if (n == n2) then + ! Normal version + if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo + + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + + endif +else + ! If the hessian is diagonal + ! Normal version + if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (e_val(i) + lambda) + endif + enddo + + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (DABS(e_val(i)) + lambda) + endif + enddo + + endif +endif + +double precision :: beta, norm_x + +! Test +! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) +! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first +! eigenvectors multiply by a constant to ensure the condition +! ||x(lambda=-e_val(1))|| = delta and escape the saddle point +if (avoid_saddle .and. e_val(1) < - thresh_eig) then + if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then + + ! norm of x + norm_x = dnrm2(n,x,1) + + ! Computes the coefficient for the w_1 + beta = delta**2 - norm_x**2 + + ! Updates the step x + x = x + W(:,1) * dsqrt(beta) + + ! Recomputes the norm to check + norm_x = dnrm2(n,x,1) + + print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' + print*, '||x||', norm_x + endif +endif + +! Transformation of x + +! x is a vector of size n, so it can be write as a m by m +! antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". + + +! ! Step transformation vector -> matrix +! ! Vector with n element -> mo_num by mo_num matrix +! do j = 1, m +! do i = 1, m +! if (i>j) then +! call mat_to_vec_index(i,j,k) +! m_x(i,j) = x(k) +! else +! m_x(i,j) = 0d0 +! endif +! enddo +! enddo +! +! ! Antisymmetrization of the previous matrix +! do j = 1, m +! do i = 1, m +! if (i lower diagonal matrix (p,q), p > q + +! If a matrix is antisymmetric it can be reshaped as a vector. And the +! vector can be reshaped as an antisymmetric matrix + +! \begin{align*} +! \begin{pmatrix} +! 0 & -1 & -2 & -4 \\ +! 1 & 0 & -3 & -5 \\ +! 2 & 3 & 0 & -6 \\ +! 4 & 5 & 6 & 0 +! \end{pmatrix} +! \Leftrightarrow +! \begin{pmatrix} +! 1 & 2 & 3 & 4 & 5 & 6 +! \end{pmatrix} +! \end{align*} + +! !!! Here the algorithm only work for the lower diagonal !!! + +! Input: +! | i | integer | index in the vector | + +! Ouput: +! | p,q | integer | corresponding indexes in the lower diagonal of a matrix | +! | | | p > q, | +! | | | p -> row, | +! | | | q -> column | + + +subroutine vec_to_mat_index(i,p,q) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing + ! its index i a vector + !END_DOC + + implicit none + + ! Variables + + ! in + integer,intent(in) :: i + + ! out + integer, intent(out) :: p,q + + ! internal + integer :: a,b + double precision :: da + + da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) + a = INT(da) + if ((a*(a-1))/2==i) then + p = a-1 + else + p = a + endif + b = p*(p-1)/2 + + ! Matrix element indexes + p = p + 1 + q = i - b + +end subroutine diff --git a/src/utils_trust_region/vec_to_mat_v2.irp.f b/src/utils_trust_region/vec_to_mat_v2.irp.f new file mode 100644 index 00000000..e184d3ba --- /dev/null +++ b/src/utils_trust_region/vec_to_mat_v2.irp.f @@ -0,0 +1,39 @@ +! Vect to antisymmetric matrix using mat_to_vec_index + +! Vector to antisymmetric matrix transformation using mat_to_vec_index +! subroutine. + +! Can be done in OMP (for the first part and with omp critical for the second) + + +subroutine vec_to_mat_v2(n,m,v_x,m_x) + + !BEGIN_DOC + ! Vector to antisymmetric matrix + !END_DOC + + implicit none + + integer, intent(in) :: n,m + double precision, intent(in) :: v_x(n) + double precision, intent(out) :: m_x(m,m) + + integer :: i,j,k + + ! 1D -> 2D lower diagonal + m_x = 0d0 + do j = 1, m - 1 + do i = j + 1, m + call mat_to_vec_index(i,j,k) + m_x(i,j) = v_x(k) + enddo + enddo + + ! Antisym + do i = 1, m - 1 + do j = i + 1, m + m_x(i,j) = - m_x(j,i) + enddo + enddo + +end From d6f7ec60f8e140982a24b282a36d027397d68d93 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 18 Apr 2023 13:22:46 +0200 Subject: [PATCH 54/87] add mo localization --- src/mo_localization/84.mo_localization.bats | 97 + src/mo_localization/EZFIO.cfg | 54 + src/mo_localization/NEED | 3 + src/mo_localization/README.md | 113 + src/mo_localization/break_spatial_sym.irp.f | 27 + src/mo_localization/debug_gradient_loc.irp.f | 65 + src/mo_localization/debug_hessian_loc.irp.f | 65 + src/mo_localization/kick_the_mos.irp.f | 16 + src/mo_localization/localization.irp.f | 520 +++ src/mo_localization/localization_sub.irp.f | 2008 ++++++++++++ src/mo_localization/org/TANGLE_org_mode.sh | 7 + src/mo_localization/org/break_spatial_sym.org | 28 + .../org/debug_gradient_loc.org | 67 + src/mo_localization/org/debug_hessian_loc.org | 67 + src/mo_localization/org/kick_the_mos.org | 18 + src/mo_localization/org/localization.org | 2899 +++++++++++++++++ 16 files changed, 6054 insertions(+) create mode 100644 src/mo_localization/84.mo_localization.bats create mode 100644 src/mo_localization/EZFIO.cfg create mode 100644 src/mo_localization/NEED create mode 100644 src/mo_localization/README.md create mode 100644 src/mo_localization/break_spatial_sym.irp.f create mode 100644 src/mo_localization/debug_gradient_loc.irp.f create mode 100644 src/mo_localization/debug_hessian_loc.irp.f create mode 100644 src/mo_localization/kick_the_mos.irp.f create mode 100644 src/mo_localization/localization.irp.f create mode 100644 src/mo_localization/localization_sub.irp.f create mode 100755 src/mo_localization/org/TANGLE_org_mode.sh create mode 100644 src/mo_localization/org/break_spatial_sym.org create mode 100644 src/mo_localization/org/debug_gradient_loc.org create mode 100644 src/mo_localization/org/debug_hessian_loc.org create mode 100644 src/mo_localization/org/kick_the_mos.org create mode 100644 src/mo_localization/org/localization.org diff --git a/src/mo_localization/84.mo_localization.bats b/src/mo_localization/84.mo_localization.bats new file mode 100644 index 00000000..b34c0bd5 --- /dev/null +++ b/src/mo_localization/84.mo_localization.bats @@ -0,0 +1,97 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + +zero () { + if [ -z "$1" ]; then echo 0.0; else echo $1; fi +} + +function run() { + thresh1=1e-10 + thresh2=1e-12 + thresh3=1e-4 + test_exe scf || skip + qp set_file $1 + qp edit --check + qp reset -d + qp set_frozen_core + qp set localization localization_method boys + file="$(echo $1 | sed 's/.ezfio//g')" + energy="$(cat $1/hartree_fock/energy)" + fb_err1="$(qp run debug_gradient_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + fb_err2="$(qp run debug_hessian_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + qp run localization > $file.loc.out + fb_energy="$(qp run print_energy | grep -A 1 'Nuclear repulsion energy' | tail -n 1 )" + fb_c="$(cat $file.loc.out | grep 'Criterion:Core' | tail -n 1 | awk '{print $3}')i" + fb_i="$(cat $file.loc.out | grep 'Criterion:Inactive' | tail -n 1 | awk '{print $3}')" + fb_a="$(cat $file.loc.out | grep 'Criterion:Active' | tail -n 1 | awk '{print $3}')" + fb_v="$(cat $file.loc.out | grep 'Criterion:Virtual' | tail -n 1 | awk '{print $3}')" + qp reset -a + qp run scf + qp set_frozen_core + qp set localization localization_method pipek + pm_err1="$(qp run debug_gradient_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + pm_err2="$(qp run debug_hessian_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + qp run localization > $file.loc.out + pm_c="$(cat $file.loc.out | grep 'Criterion:Core' | tail -n 1 | awk '{print $3}')i" + pm_i="$(cat $file.loc.out | grep 'Criterion:Inactive' | tail -n 1 | awk '{print $3}')" + pm_a="$(cat $file.loc.out | grep 'Criterion:Active' | tail -n 1 | awk '{print $3}')" + pm_v="$(cat $file.loc.out | grep 'Criterion:Virtual' | tail -n 1 | awk '{print $3}')" + pm_energy="$(qp run print_energy | grep -A 1 'Nuclear repulsion energy' | tail -n 1 )" + qp set localization localization_method boys + qp reset -a + qp run scf + qp set_frozen_core + eq $energy $fb_energy $thresh1 + eq $fb_err1 0.0 $thresh2 + eq $fb_err2 0.0 $thresh2 + eq $energy $pm_energy $thresh1 + eq $pm_err1 0.0 $thresh2 + eq $pm_err2 0.0 $thresh2 + fb_c=$(zero $fb_c) + fb_i=$(zero $fb_i) + fb_a=$(zero $fb_a) + fb_v=$(zero $fb_v) + pm_c=$(zero $pm_c) + pm_i=$(zero $pm_i) + pm_a=$(zero $pm_a) + pm_v=$(zero $pm_v) + eq $fb_c $2 $thresh3 + eq $fb_i $3 $thresh3 + eq $fb_a $4 $thresh3 + eq $fb_v $5 $thresh3 + eq $pm_c $6 $thresh3 + eq $pm_i $7 $thresh3 + eq $pm_a $8 $thresh3 + eq $pm_v $9 $thresh3 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -32.1357551678876 -47.0041982094667 0.0 -223.470015856259 -1.99990778964451 -2.51376723927071 0.0 -12.8490602539275 +} + +@test "clo" { +run clo.ezfio -44.1624001765291 -32.4386660941387 0.0 -103.666309287187 -5.99985418946811 -5.46871580225222 0.0 -20.2480064922275 +} + +@test "clf" { +run clf.ezfio -47.5143398826967 -35.7206886315104 0.0 -107.043029033468 -5.99994222062230 -6.63916513458470 0.0 -19.7035159913484 +} + +@test "h2o2" { +run h2o2.ezfio -7.76848143170524 -30.9694344369829 0.0 -175.898343829453 -1.99990497554575 -5.62980322957485 0.0 -33.5699813186666 +} + +@test "h2o" { +run h2o.ezfio 0.0 -2.52317434969591 0.0 -45.3136377925359 0.0 -3.01248365356981 0.0 -22.4470831240924 +} + +@test "h3coh" { +run h3coh.ezfio -3.66763692804590 -24.0463089480870 0.0 -111.485948435075 -1.99714061342078 -4.89242181322988 0.0 -23.6405412057679 +} + +@test "n2h4" { +run n2h4.ezfio -7.46608163002070 -35.7632174051822 0.0 -305.913449004632 -1.99989326143356 -4.62496615892268 0.0 -51.5171904685553 +} + diff --git a/src/mo_localization/EZFIO.cfg b/src/mo_localization/EZFIO.cfg new file mode 100644 index 00000000..d1b844a5 --- /dev/null +++ b/src/mo_localization/EZFIO.cfg @@ -0,0 +1,54 @@ +[localization_method] +type: character*(32) +doc: Method for the orbital localization. boys: Foster-Boys, pipek: Pipek-Mezey. +interface: ezfio,provider,ocaml +default: boys + +[localization_max_nb_iter] +type: integer +doc: Maximal number of iterations for the orbital localization. +interface: ezfio,provider,ocaml +default: 1000 + +[localization_use_hessian] +type: logical +doc: If true, it uses the trust region algorithm with the gradient and the diagonal of the hessian. Else it computes the rotation between each pair of MOs that should be applied to maximize/minimize the localization criterion. The last option is not easy to converge. +interface: ezfio,provider,ocaml +default: true + +[auto_mo_class] +type: logical +doc: If true, set automatically the classes. +interface: ezfio,provider,ocaml +default: true + +[thresh_loc_max_elem_grad] +type: double precision +doc: Threshold for the convergence, the localization exits when the largest element in the gradient is smaller than thresh_localization_max_elem_grad. +interface: ezfio,provider,ocaml +default: 1.e-6 + +[kick_in_mos] +type: logical +doc: If True, it applies a rotation of an angle angle_pre_rot between the MOs of a same mo_class before the localization. +interface: ezfio,provider,ocaml +default: true + +[angle_pre_rot] +type: double precision +doc: To define the angle for the rotation of the MOs before the localization (in rad). +interface: ezfio,provider,ocaml +default: 0.1 + +[sort_mos_by_e] +type: logical +doc: If True, the MOs are sorted using the diagonal elements of the Fock matrix. +interface: ezfio,provider,ocaml +default: false + +[debug_hf] +type: logical +doc: If True, prints the HF energy before/after the different steps of the localization. Only for debugging. +interface: ezfio,provider,ocaml +default: false + diff --git a/src/mo_localization/NEED b/src/mo_localization/NEED new file mode 100644 index 00000000..b438f39d --- /dev/null +++ b/src/mo_localization/NEED @@ -0,0 +1,3 @@ +hartree_fock +utils_trust_region +determinants diff --git a/src/mo_localization/README.md b/src/mo_localization/README.md new file mode 100644 index 00000000..c28a5ee1 --- /dev/null +++ b/src/mo_localization/README.md @@ -0,0 +1,113 @@ +# Orbital localisation +To localize the MOs: +``` +qp run localization +``` +By default, the different otbital classes are automatically set by splitting +the orbitales in the following classes: +- Core -> Core +- Active, doubly occupied -> Inactive +- Active, singly occupied -> Active +- Active, empty -> Virtual +- Deleted -> Deleted +The orbitals will be localized among each class, excpect the deleted ones. +If you want to choose another splitting, you can set +``` +qp set mo_localization auto_mo_class false +``` +and define the classes with +``` +qp set_mo_class -c [] -a [] -v [] -i [] -d [] +``` +for more information +``` +qp set_mo_class -q +``` +We don't care about the name of the +mo classes. The algorithm just localizes all the MOs of +a given class between them, for all the classes, except the deleted MOs. +If you are using the last option don't forget to reset the initial mo classes +after the localization. + +Before the localization, a kick is done for each mo class +(except the deleted ones) to break the MOs. This is done by +doing a given rotation between the MOs. +This feature can be removed by setting: +``` +qp set localization kick_in_mos false +``` +and the default angle for the rotation can be changed with: +``` +qp set localization angle_pre_rot 1e-3 # or something else +``` + +After the localization, the MOs of each class (except the deleted ones) +can be sorted between them using the diagonal elements of +the fock matrix with: +``` +qp set localization sort_mos_by_e true +``` + +You can check the Hartree-Fock energy before/during/after the localization +by putting (only for debugging): +``` +qp set localization debug_hf true +``` + +## Foster-Boys & Pipek-Mezey +Foster-Boys: +``` +qp set localization localization_method boys +``` + +Pipek-Mezey: +``` +qp set localization localization_method pipek +``` + +# Break the spatial symmetry of the MOs +This program work exactly as the localization. +To break the spatial symmetry of the MOs: +``` +qp run break_spatial_sym +``` +The default angle for the rotations is too big for this kind of +application, a value between 1e-3 and 1e-6 should break the spatial +symmetry with just a small change in the energy: +``` +qp set localization angle_pre_rot 1e-3 +``` + +# With or without hessian + trust region +With hessian + trust region +``` +qp set localization localisation_use_hessian true +``` +It uses the trust region algorithm with the diagonal of the hessian of the +localization criterion with respect to the MO rotations. + +Without the hessian and the trust region +``` +qp set localization localisation_use_hessian false +``` +By doing so it does not require to store the hessian but the +convergence is not easy, in particular for virtual MOs. +It seems that it not possible to converge with Pipek-Mezey +localization with this approach. + +# Parameters +Some other parameters are available for the localization (qp edit for more details). + +# Tests +``` +qp test +``` + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` + diff --git a/src/mo_localization/break_spatial_sym.irp.f b/src/mo_localization/break_spatial_sym.irp.f new file mode 100644 index 00000000..2048aca6 --- /dev/null +++ b/src/mo_localization/break_spatial_sym.irp.f @@ -0,0 +1,27 @@ +! ! A small program to break the spatial symmetry of the MOs. + +! ! You have to defined your MO classes or set security_mo_class to false +! ! with: +! ! qp set orbital_optimization security_mo_class false + +! ! The default angle for the rotations is too big for this kind of +! ! application, a value between 1e-3 and 1e-6 should break the spatial +! ! symmetry with just a small change in the energy. + + +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end diff --git a/src/mo_localization/debug_gradient_loc.irp.f b/src/mo_localization/debug_gradient_loc.irp.f new file mode 100644 index 00000000..d935e782 --- /dev/null +++ b/src/mo_localization/debug_gradient_loc.irp.f @@ -0,0 +1,65 @@ +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end diff --git a/src/mo_localization/debug_hessian_loc.irp.f b/src/mo_localization/debug_hessian_loc.irp.f new file mode 100644 index 00000000..3ee4f0fa --- /dev/null +++ b/src/mo_localization/debug_hessian_loc.irp.f @@ -0,0 +1,65 @@ +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:), H2(:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(H(n),H2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i)) > threshold) then + print*,H(i) + nb_error = nb_error + 1 + if (dabs(H(i)) > max_elem) then + max_elem = H(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end diff --git a/src/mo_localization/kick_the_mos.irp.f b/src/mo_localization/kick_the_mos.irp.f new file mode 100644 index 00000000..b6c77c9e --- /dev/null +++ b/src/mo_localization/kick_the_mos.irp.f @@ -0,0 +1,16 @@ +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end diff --git a/src/mo_localization/localization.irp.f b/src/mo_localization/localization.irp.f new file mode 100644 index 00000000..7ccb2f5a --- /dev/null +++ b/src/mo_localization/localization.irp.f @@ -0,0 +1,520 @@ +program localization + + implicit none + + call set_classes_loc + call run_localization + call unset_classes_loc + +end + + + + +! Variables: +! | pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +! | R(mo_num,mo_num) | double precision | Rotation matrix | +! | tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +! | prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +! | spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +! | criterion | double precision | Localization criterion | +! | prev_criterion | double precision | Previous criterion | +! | criterion_model | double precision | Estimated next criterion | +! | rho | double precision | Ratio to measure the agreement between the model | +! | | | and the reality | +! | delta | double precision | Radisu of the trust region | +! | norm_grad | double precision | Norm of the gradient | +! | info | integer | for dsyev from Lapack | +! | max_elem | double precision | maximal element in the gradient | +! | v_grad(:) | double precision | Gradient | +! | H(:,:) | double precision | Hessian (diagonal) | +! | e_val(:) | double precision | Eigenvalues of the hessian | +! | W(:,:) | double precision | Eigenvectors of the hessian | +! | tmp_x(:) | double precision | Step in 1D (in a subaspace) | +! | tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +! | tmp_list(:) | double precision | List of MOs in a mo_class | +! | i,j,k | integer | Indexes in the full MO space | +! | tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +! | l | integer | Index for the mo_class | +! | key(:) | integer | Key to sort the eigenvalues of the hessian | +! | nb_iter | integer | Number of iterations | +! | must_exit | logical | To exit the trust region loop | +! | cancel_step | logical | To cancel a step | +! | not_*converged | logical | To localize the different mo classes | +! | t* | double precision | To measure the time | +! | n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +! | tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +! | | | Number of dimension in the subspace | + +! Variables in qp_edit for the localization: +! | localization_method | +! | localization_max_nb_iter | +! | default_mo_class | +! | thresh_loc_max_elem_grad | +! | kick_in_mos | +! | angle_pre_rot | + +! + all the variables for the trust region + +! Cf. qp_edit orbital optimization + + +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:), tmp_m_x(:,:), tmp_x(:),W(:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + +! Loc + +! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) + + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Init + nb_iter = 0 + delta = 1d0 + + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta + + ! Rotation submatrix + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + ! To ensure that the rotation matrix is unitary + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + delta = delta * 0.5d0 + cycle + else + delta = min(delta * 2d0, 1d0) + endif + + ! Full rotation matrix and application of the rotation + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + call apply_mo_rotation(R, prev_mos) + + ! Update the needed data + call update_data_localization() + + ! New criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 + + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + enddo + + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef + + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) + + ! Trust region + else + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + W(i) = dble(key(i)) + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,1, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + endif + enddo + + ! Seems unecessary + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end diff --git a/src/mo_localization/localization_sub.irp.f b/src/mo_localization/localization_sub.irp.f new file mode 100644 index 00000000..f5afed07 --- /dev/null +++ b/src/mo_localization/localization_sub.irp.f @@ -0,0 +1,2008 @@ +! Gathering +! Gradient/hessian/criterion for the localization: +! They are chosen in function of the localization method + +! Gradient: + +! qp_edit : +! | localization_method | method for the localization | + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + + +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + print*,'Unkown method:'//localization_method + call abort + endif + +end + + + +! Hessian: + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + + + +! Criterion: + +! Output: +! | criterion | double precision | Criterion for the orbital localization | + + +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + + + +! Subroutine to update the datas needed for the localization + +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + print*,'Unkown method: '//localization_method + call abort + endif +end + + + +! Angles: + +! Output: +! | tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +! | max_elem | double precision | Maximal angle | + + + +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + +! Gradient +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'---End gradient_FB---' + +end subroutine + +! Gradient (OMP) + +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'---End gradient_FB_omp---' + +end subroutine + +! Hessian + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! Internal: +! | beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'---End hessian_FB---' + +end subroutine + +! Hessian (OMP) + +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do i = 1, tmp_n + H(i) = 0d0 + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'---End hessian_FB_omp---' + +end subroutine + +! Gradient v1 + +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end subroutine grad_pipek + +! Gradient + +! The gradient is + +! \begin{align*} +! \left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +! \end{align*} +! with +! \begin{align*} +! \gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +! \end{align*} + +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +! | tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +! | tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +! | | | product and compute tmp_int | +! | CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +! | tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the mo_class | +! | tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the nuclei | +! | tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +! | | | values depending of the nuclei | +! | a | | index to loop over the nuclei | +! | b | | index to loop over the AOs which belongs to the nuclei a | +! | mu | | index to refer to an AO which belongs to the nuclei a | +! | rho | | index to loop over all the AOs | + + +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'---End gradient_PM---' + +end + +! Hessian v1 + +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + +end + +! Hessian + +! The hessian is +! \begin{align*} +! \left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +! \end{align*} +! \begin{align*} +! \beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +! \end{align*} + +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + + +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'---End hessian_PM---' + +end + +! Criterion PM (old) + +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end + +! Criterion PM + +! The criterion is computed as +! \begin{align*} +! \mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +! \end{align*} +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} + + +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + +end + +! Criterion PM v3 + +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + +end + +! Criterion FB (old) + +! The criterion is just computed as + +! \begin{align*} +! C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +! \end{align*} + +! The minus sign is here in order to minimize this criterion + +! Output: +! | criterion | double precision | criterion for the Foster-Boys localization | + + +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +! Criterion FB + +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end + +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end + +! Spatial extent + +! The spatial extent of an orbital $i$ is computed as +! \begin{align*} +! \sum_{\lambda=x,y,z}\sqrt{ - ^2} +! \end{align*} + +! From that we can also compute the average and the standard deviation + + +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end + +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end + +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end + +! Utils + + +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end + +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine + +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end + +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:), tmp_list(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:) + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ',trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end + +function is_core(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a core orbital + END_DOC + + integer, intent(in) :: i + logical :: is_core + + integer :: j + + ! Init + is_core = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_core = .True. + exit + endif + enddo + +end + +function is_del(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a deleted orbital + END_DOC + + integer, intent(in) :: i + logical :: is_del + + integer :: j + + ! Init + is_del = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_del = .True. + exit + endif + enddo + +end + +subroutine set_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + call apply_hole(psi_det(1,1,1), 1, i, res, ok1, N_int) + call apply_hole(psi_det(1,1,1), 2, i, res, ok2, N_int) + if (ok1 .and. ok2) then + mo_class(i) = 'Inactive' + else if (.not. ok1 .and. .not. ok2) then + mo_class(i) = 'Virtual' + else + mo_class(i) = 'Active' + endif + enddo + touch mo_class + endif + +end + +subroutine unset_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + mo_class(i) = 'Active' + enddo + touch mo_class + endif + +end diff --git a/src/mo_localization/org/TANGLE_org_mode.sh b/src/mo_localization/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/mo_localization/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/mo_localization/org/break_spatial_sym.org b/src/mo_localization/org/break_spatial_sym.org new file mode 100644 index 00000000..d82f1c60 --- /dev/null +++ b/src/mo_localization/org/break_spatial_sym.org @@ -0,0 +1,28 @@ +! A small program to break the spatial symmetry of the MOs. + +! You have to defined your MO classes or set security_mo_class to false +! with: +! qp set orbital_optimization security_mo_class false + +! The default angle for the rotations is too big for this kind of +! application, a value between 1e-3 and 1e-6 should break the spatial +! symmetry with just a small change in the energy. + +#+BEGIN_SRC f90 :comments org :tangle break_spatial_sym.irp.f +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end +#+END_SRC diff --git a/src/mo_localization/org/debug_gradient_loc.org b/src/mo_localization/org/debug_gradient_loc.org new file mode 100644 index 00000000..6d147dd0 --- /dev/null +++ b/src/mo_localization/org/debug_gradient_loc.org @@ -0,0 +1,67 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_gradient_loc.irp.f +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end +#+END_SRC diff --git a/src/mo_localization/org/debug_hessian_loc.org b/src/mo_localization/org/debug_hessian_loc.org new file mode 100644 index 00000000..e47cf38d --- /dev/null +++ b/src/mo_localization/org/debug_hessian_loc.org @@ -0,0 +1,67 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_hessian_loc.irp.f +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:), H2(:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(H(n),H2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i)) > threshold) then + print*,H(i) + nb_error = nb_error + 1 + if (dabs(H(i)) > max_elem) then + max_elem = H(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end +#+END_SRC diff --git a/src/mo_localization/org/kick_the_mos.org b/src/mo_localization/org/kick_the_mos.org new file mode 100644 index 00000000..c0c6c02d --- /dev/null +++ b/src/mo_localization/org/kick_the_mos.org @@ -0,0 +1,18 @@ +#+BEGIN_SRC f90 :comments org :tangle kick_the_mos.irp.f +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end +#+END_SRC diff --git a/src/mo_localization/org/localization.org b/src/mo_localization/org/localization.org new file mode 100644 index 00000000..aaf9f18d --- /dev/null +++ b/src/mo_localization/org/localization.org @@ -0,0 +1,2899 @@ +* Orbital localization + +Molecular orbitals localization + +** Doc + +The program localizes the orbitals in function of their mo_class: +- core MOs +- inactive MOs +- active MOs +- virtual MOs +- deleted MOs -> no orbital localization + +Core MOs are localized with core MOs, inactives MOs are localized with +inactives MOs and so on. But deleted orbitals are not localized. + +WARNING: +- The user MUST SPECIFY THE MO CLASSES, otherwise if default mo class + is false the localization will be done for all the orbitals between + them, so the occupied and virtual MOs will be combined together + which is clearly not what we want to do. If default lpmo class is true + the localization will be done for the core, occupied and virtual + orbitals, but pay attention the mo_class are not deleted after... +- The mo class is not important (except "deleted") because it is not + link to the kind of MOs for CASSCF or CIPSI. It is just a way to + separate the MOs in order to localize them separetely, for example + to separate the core MOs, the occupied MOs and the virtuals MOs. +- The user MUST CHANGE THE MO CLASSES AFTER THE LOCALIZATION in order + to have the right mo class for his next calculation... + +For more information on the mo_class: +lpqp set_mo_class -h + +*** Foster-Boys localization +Foster-Boys localization: +- cite Foster +Boys, S. F., 1960, Rev. Mod. Phys. 32, 296. +DOI:https://doi.org/10.1103/RevModPhys.32.300 +Boys, S. F., 1966, in Quantum Theory of Atoms, Molecules, +and the Solid State, edited by P.-O. Löwdin (Academic +Press, New York), p. 253. +Daniel A. Kleier, Thomas A. Halgren, John H. Hall Jr., and William +N. Lipscomb, J. Chem. Phys. 61, 3905 (1974) +doi: 10.1063/1.1681683 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Comput. Chem. 2013, 34, +1456– 1462. DOI: 10.1002/jcc.23281 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Theory +Comput. 2012, 8, 9, 3137–3146 +DOI: https://doi.org/10.1021/ct300473g +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Phys. 137, 224114 +(2012) +DOI: https://doi.org/10.1063/1.4769866 +Nicola Marzari, Arash A. Mostofi, Jonathan R. Yates, Ivo Souza, and David Vanderbilt +Rev. Mod. Phys. 84, 1419 +https://doi.org/10.1103/RevModPhys.84.1419 + +The Foster-Boys localization is a method to generate localized MOs +(LMOs) by minimizing the Foster-Boys criterion: +$$ C_{FB} = \sum_{i=1}^N \left[ < \phi_i | r^2 | \phi_i > - < \phi_i | r | +\phi_i >^2 \right] $$. +In fact it is equivalent to maximise +$$ C_2 = \sum_{i>j, \ i=1}^N \left[ < \phi_i | r | \phi_i > - < +\phi_j | r | \phi_j > \left]^2$$ +or +$$ C_3 = \sum_{i=1}^N \left[ < \phi_i | r | \phi_i > \right]^2.$$ + +Noting +$$A_{ii} = < \phi_i | r^2 | \phi_i > $$ +$$B_{ii} = < \phi_i | r | \phi_i > $$ + +$$ \beta = (B_{pp} - B_{qq})^2 - 4 B_{pq}^2 $$ +$$ \gamma = 4 B_{pq} (B_{pp} - B_{qq}) $$ + +\begin{align*} +C_{FB}(\theta) &= \sum_{i=1}^N \left[ A_{ii} - B_{ii}^2 \right] \\ +&- \left[ A_{pp} - B_{pp}^2 + A_{qq} - B_{qq}^2 \right] \\ +&+ \left[ A_{pp} + A_{qq} - B_{pp}^2 - B_{qq}^2 ++ \frac{1}{4} [(1-\cos(4\theta) \beta + \sin(4\theta) \gamma] \right] \\ +&= C_1(\theta=0) + \frac{1}{4} [(1-\cos(4\theta)) \beta + \sin(4\theta) \gamma] +\end{align*} + +The derivatives are: +\begin{align*} +\frac{\partial C_{FB}(\theta)}{\partial \theta} = \beta \sin(4\theta) + \gamma \cos(4 \theta) +\end{align*} + +\begin{align*} +\frac{\partial^2 C_{FB}(\theta)}{\partial \theta^2} = 4 \beta \cos(4\theta) - 4 \gamma \sin(4 \theta) +\end{align*} + +Similarly: +\begin{align*} +C_3(\theta) &= \sum_{i=1}^N [B_{ii}^2] \\ +&- B_{pp}^2 - B_{qq}^2 \\ +&+ B_{pp}^2 + B_{qq}^2 - \frac{1}{4} [(1-\cos(4\theta) \beta + \sin(4\theta) \gamma] \\ +&= C_3(\theta=0) - \frac{1}{4} [(1-\cos(4\theta)) \beta + \sin(4\theta) \gamma] +\end{align*} + +The derivatives are: +\begin{align*} +\frac{\partial C_3(\theta)}{\partial \theta} = - \beta \sin(4\theta) - \gamma \cos(4 \theta) +\end{align*} + +\begin{align*} +\frac{\partial^2 C_3(\theta)}{\partial \theta^2} = - 4 \beta \cos(4\theta) + 4 \gamma \sin(4 \theta) +\end{align*} + +And since we compute the derivatives around $\theta = 0$ (around the +actual position) we have: +\begin{align*} +\left. \frac{\partial{C_{FB}(\theta)}}{\partial \theta}\right|_{\theta=0} = \gamma +\end{align*} + +\begin{align*} +\left. \frac{\partial^2 C_{FB}(\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta +\end{align*} + +Locality of the orbitals: +- cite Hoyvik +As the Foster-Boys method tries to minimize the sum of the second +moment MO spread, the locality of each MO can be expressed as the +second moment of the MO spread. For the MO i, the locality criterion is +\begin{align*} +\sigma_i &= \sqrt{ - ^2} \\ +&= \sqrt{ - ^2 + - ^2 + - ^2} +\end{align*} + + +*** Pipek-Mezey localization +-cite pipek mezey 1989 +J. Pipek, P. G. Mezey, J. Chem. Phys. 90, 4916 (1989) +DOI: 10.1063/1.456588 + +Foster-Boys localization does not preserve the $\sigma - \pi$ separation of the +MOs, it leads to "banana" orbitals. The Pipek-Mezey localization +normally preserves this separation. + +The optimum functional $\mathcal{P}$ is obtained for the maximum of +$D^{-1}$ +\begin{align*} +\mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +\end{align*} + +As for the Foster Boys localization, the change in the functional for +the rotation of two MOs can be obtained using very similar terms +\begin{align*} +\beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +\end{align*} +\begin{align*} +\gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +\end{align*} +The matrix element of the operator $P_A$ are obtained using +\begin{align*} +<\rho | \tilde{\mu}> = \delta_{\rho \mu} +\end{align*} +which leads to +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +So similarly the first and second derivatives are + +\begin{align*} +\left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +\end{align*} + +\begin{align*} +\left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +\end{align*} + +** Localization procedure + +Localization procedure: + +To do the localization we compute the gradient and the +diagonal hessian of the Foster-Boys criterion with respect to the MO +rotations and we minimize it with the Newton method. + +In order to avoid the problem of starting on a saddle point, the +localization procedure starts by giving a little kick in the MOs, by +putting "kick in mos" true, in order to break the symmetry and escape +from a possible saddle point. + +In order to speed up the iteration we compute the gradient, the +diagonal hessian and the step in temporary matrices of the size +(number MOs in mo class by number MOs in mo class) + +** Remarks + +Variables: + +The indexes i and j refere to the positions of the elements in +the "full space", i.e., the arrays containing elements for all the MOs, +but the indexes tmp_i and tmp_j to the positions of the elements in +the "reduced space/subspace", i.e., the arrays containing elements for +a restricted number of MOs. +Example: +The gradient for the localization of the core MOs can be expressed +as a vector of length mo_num*(mo_num-1)/2 with only +n_core_orb*(n_core_orb-1)/2 non zero elements, so it is more relevant +to use a vector of size n_act_orb*(n_core_orb-1)/2. +So here the gradient is a vector of size +tmp_list_size*(tmp_list_size)/2 where tmp_list_size is the number of +MOs is the corresponding mo class. +The same thing happened for the hessian, the matrix containing the +step and the rotation matrix, which are tmp_list_size by tmp_list_size +matrices. + +Ex gradient for 4 core orbitales: +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +0 \\ +\vdots \\ +0 \\ +\end{pmatrix} +\end{align*} + +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The same thing can be done if indexes of the orbitales are not +consecutives since it's done with lists of MOs: + +\begin{align*} +\begin{pmatrix} +0 & -a & 0 & -b & -d & \hdots & 0 \\ +a & 0 & 0 & -c & -e & \hdots & 0 \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +b & c & 0 & 0 & -f & \hdots & 0 \\ +d & e & 0 & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The dipoles are updated using the "ao to mo" subroutine without the +"restore symmetry" which is actually in N^4 but can be rewrite in N^2 +log(N^2). +The bottleneck of the program is normally N^3 with the matrix +multiplications/diagonalizations. The use of the full hessian can be +an improvement but it will scale in N^4... + +** Program + +#+BEGIN_SRC f90 org :tangle localization.irp.f +program localization + + implicit none + + call set_classes_loc + call run_localization + call unset_classes_loc + +end +#+END_SRC + + +Variables: +| pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +| R(mo_num,mo_num) | double precision | Rotation matrix | +| tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +| prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +| spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +| criterion | double precision | Localization criterion | +| prev_criterion | double precision | Previous criterion | +| criterion_model | double precision | Estimated next criterion | +| rho | double precision | Ratio to measure the agreement between the model | +| | | and the reality | +| delta | double precision | Radisu of the trust region | +| norm_grad | double precision | Norm of the gradient | +| info | integer | for dsyev from Lapack | +| max_elem | double precision | maximal element in the gradient | +| v_grad(:) | double precision | Gradient | +| H(:,:) | double precision | Hessian (diagonal) | +| e_val(:) | double precision | Eigenvalues of the hessian | +| W(:,:) | double precision | Eigenvectors of the hessian | +| tmp_x(:) | double precision | Step in 1D (in a subaspace) | +| tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +| tmp_list(:) | double precision | List of MOs in a mo_class | +| i,j,k | integer | Indexes in the full MO space | +| tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +| l | integer | Index for the mo_class | +| key(:) | integer | Key to sort the eigenvalues of the hessian | +| nb_iter | integer | Number of iterations | +| must_exit | logical | To exit the trust region loop | +| cancel_step | logical | To cancel a step | +| not_*converged | logical | To localize the different mo classes | +| t* | double precision | To measure the time | +| n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +| tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +| | | Number of dimension in the subspace | + +Variables in qp_edit for the localization: +| localization_method | +| localization_max_nb_iter | +| default_mo_class | +| thresh_loc_max_elem_grad | +| kick_in_mos | +| angle_pre_rot | + ++ all the variables for the trust region + +Cf. qp_edit orbital optimization + +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:), tmp_m_x(:,:), tmp_x(:),W(:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + +#+END_SRC + +** Loc +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f + ! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) + + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Init + nb_iter = 0 + delta = 1d0 + + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta + + ! Rotation submatrix + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + ! To ensure that the rotation matrix is unitary + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + delta = delta * 0.5d0 + cycle + else + delta = min(delta * 2d0, 1d0) + endif + + ! Full rotation matrix and application of the rotation + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + call apply_mo_rotation(R, prev_mos) + + ! Update the needed data + call update_data_localization() + + ! New criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 + + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + enddo + + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef + + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) + + ! Trust region + else + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + W(i) = dble(key(i)) + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,1, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + endif + enddo + + ! Seems unecessary + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end +#+END_SRC + +** Gathering +Gradient/hessian/criterion for the localization: +They are chosen in function of the localization method + +Gradient: + +qp_edit : +| localization_method | method for the localization | + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + print*,'Unkown method:'//localization_method + call abort + endif + +end +#+END_SRC + +Hessian: + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +Criterion: + +Output: +| criterion | double precision | Criterion for the orbital localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +Subroutine to update the datas needed for the localization +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + print*,'Unkown method: '//localization_method + call abort + endif +end +#+END_SRC + +Angles: + +Output: +| tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +| max_elem | double precision | Maximal angle | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +** Foster-Boys +*** Gradient +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'---End gradient_FB---' + +end subroutine +#+END_SRC + +*** Gradient (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'---End gradient_FB_omp---' + +end subroutine +#+END_SRC + +*** Hessian + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +Internal: +| beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'---End hessian_FB---' + +end subroutine +#+END_SRC + +*** Hessian (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do i = 1, tmp_n + H(i) = 0d0 + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'---End hessian_FB_omp---' + +end subroutine +#+END_SRC + +** Pipek-Mezey +*** Gradient v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end subroutine grad_pipek +#+END_SRC + +*** Gradient + +The gradient is + +\begin{align*} +\left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +\end{align*} +with +\begin{align*} +\gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +\end{align*} + +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +| tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +| tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +| | | product and compute tmp_int | +| CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +| tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the mo_class | +| tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the nuclei | +| tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +| | | values depending of the nuclei | +| a | | index to loop over the nuclei | +| b | | index to loop over the AOs which belongs to the nuclei a | +| mu | | index to refer to an AO which belongs to the nuclei a | +| rho | | index to loop over all the AOs | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'---End gradient_PM---' + +end +#+END_SRC + +*** Hessian v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + +end +#+END_SRC + +*** Hessian + +The hessian is +\begin{align*} +\left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +\end{align*} +\begin{align*} +\beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +\end{align*} + +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'---End hessian_PM---' + +end + +#+END_SRC + +** Criterion +*** Criterion PM (old) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end +#+END_SRC + +*** Criterion PM + +The criterion is computed as +\begin{align*} +\mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +\end{align*} +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + +end +#+END_SRC + +*** Criterion PM v3 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + +end +#+END_SRC + +*** Criterion FB (old) + +The criterion is just computed as + +\begin{align*} +C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +\end{align*} + +The minus sign is here in order to minimize this criterion + +Output: +| criterion | double precision | criterion for the Foster-Boys localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +*** Criterion FB +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +** Theta + +In: +| n | integer | number of MOs in the considered MO class | +| l | integer | list of MOs of the considered class | + +Out: +| m_x(n,n) | double precision | Matrix containing the rotation angle between all the different | +| | | pairs of MOs to apply the rotations (need a minus sign) | +| max_elem | double precision | Maximal angle in absolute value | + +$$\cos(4 \theta) = \frac{-A{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\sin(4 \theta) = \frac{B{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\tan(4 \theta) = \frac{\sin(4 \theta)}{\cos(4 \theta)}$$ +where $\theta$ is in fact $\theta_{ij}$ + +For Foster-Boys localization: +$$A_{ij} = ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = ( - )$$ + + +For Pipek-Mezey localization: +$$A_{ij} = \sum_A ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = \sum_A ( - )$$ +with +$$ = \frac{1}{2} \sum_\rho \sum_{\mu \in A} ( c_\rho^{i*} S_{\rho +\mu} c_\mu^j + c_\mu^{i*} S_{\mu \rho} c_\rho^j)$$ +$i,j$ MOs +$\mu, \rho$ AOs +$A$ nucleus +$S$ overlap matrix +$c$ MO coefficient +$r$ position operator + +#+begin_src f90 :tangle localization_sub.irp.f +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end +#+end_src + +#+begin_src f90 :comments org :tangle localization_sub.irp.f +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end +#+end_src + +** Spatial extent + +The spatial extent of an orbital $i$ is computed as +\begin{align*} +\sum_{\lambda=x,y,z}\sqrt{ - ^2} +\end{align*} + +From that we can also compute the average and the standard deviation + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end +#+END_SRC + +** Utils + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:), tmp_list(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:) + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ',trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end + +#+END_SRC + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +function is_core(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a core orbital + END_DOC + + integer, intent(in) :: i + logical :: is_core + + integer :: j + + ! Init + is_core = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_core = .True. + exit + endif + enddo + +end + +function is_del(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a deleted orbital + END_DOC + + integer, intent(in) :: i + logical :: is_del + + integer :: j + + ! Init + is_del = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_del = .True. + exit + endif + enddo + +end + +subroutine set_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + call apply_hole(psi_det(1,1,1), 1, i, res, ok1, N_int) + call apply_hole(psi_det(1,1,1), 2, i, res, ok2, N_int) + if (ok1 .and. ok2) then + mo_class(i) = 'Inactive' + else if (.not. ok1 .and. .not. ok2) then + mo_class(i) = 'Virtual' + else + mo_class(i) = 'Active' + endif + enddo + touch mo_class + endif + +end + +subroutine unset_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + mo_class(i) = 'Active' + enddo + touch mo_class + endif + +end +#+END_SRC From b71888f459d7407d7589f001a4d25418dde9df63 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 18 Apr 2023 13:56:30 +0200 Subject: [PATCH 55/87] add mo optimization --- src/mo_optimization/83.mo_optimization.bats | 62 + src/mo_optimization/EZFIO.cfg | 29 + src/mo_optimization/NEED | 7 + src/mo_optimization/README.md | 74 + src/mo_optimization/class.irp.f | 12 + src/mo_optimization/constants.h | 1 + .../debug_gradient_list_opt.irp.f | 78 + src/mo_optimization/debug_gradient_opt.irp.f | 76 + .../debug_hessian_list_opt.irp.f | 147 ++ src/mo_optimization/debug_hessian_opt.irp.f | 171 ++ .../diagonal_hessian_list_opt.irp.f | 1556 ++++++++++++++++ .../diagonal_hessian_opt.irp.f | 1511 ++++++++++++++++ .../diagonalization_hessian.irp.f | 136 ++ .../first_diagonal_hessian_list_opt.irp.f | 372 ++++ .../first_diagonal_hessian_opt.irp.f | 344 ++++ .../first_gradient_list_opt.irp.f | 125 ++ src/mo_optimization/first_gradient_opt.irp.f | 128 ++ .../first_hessian_list_opt.irp.f | 365 ++++ src/mo_optimization/first_hessian_opt.irp.f | 360 ++++ src/mo_optimization/gradient_list_opt.irp.f | 381 ++++ src/mo_optimization/gradient_opt.irp.f | 346 ++++ src/mo_optimization/hessian_list_opt.irp.f | 1129 ++++++++++++ src/mo_optimization/hessian_opt.irp.f | 1043 +++++++++++ src/mo_optimization/my_providers.irp.f | 141 ++ src/mo_optimization/orb_opt.irp.f | 22 + src/mo_optimization/org/TANGLE_org_mode.sh | 7 + src/mo_optimization/org/TODO.org | 17 + .../org/debug_gradient_list_opt.org | 79 + .../org/debug_gradient_opt.org | 77 + .../org/debug_hessian_list_opt.org | 148 ++ src/mo_optimization/org/debug_hessian_opt.org | 172 ++ .../org/diagonal_hessian_list_opt.org | 1561 +++++++++++++++++ .../org/diagonal_hessian_opt.org | 1516 ++++++++++++++++ .../org/diagonalization_hessian.org | 138 ++ .../org/first_diagonal_hessian_list_opt.org | 376 ++++ .../org/first_diagonal_hessian_opt.org | 348 ++++ .../org/first_gradient_list_opt.org | 127 ++ .../org/first_gradient_opt.org | 130 ++ .../org/first_hessian_list_opt.org | 370 ++++ src/mo_optimization/org/first_hessian_opt.org | 365 ++++ src/mo_optimization/org/gradient_list_opt.org | 393 +++++ src/mo_optimization/org/gradient_opt.org | 358 ++++ src/mo_optimization/org/hessian_list_opt.org | 1141 ++++++++++++ src/mo_optimization/org/hessian_opt.org | 1056 +++++++++++ src/mo_optimization/org/my_providers.org | 308 ++++ src/mo_optimization/org/optimization.org | 91 + src/mo_optimization/org/orb_opt_trust_v2.org | 349 ++++ .../org/state_average_energy.org | 73 + .../org/state_weight_normalization.org | 31 + src/mo_optimization/org/update_parameters.org | 16 + .../org/update_st_av_ci_energy.org | 26 + .../run_orb_opt_trust_v2.irp.f | 317 ++++ src/mo_optimization/save_energy.irp.f | 9 + .../state_average_energy.irp.f | 72 + .../state_weight_normalization.irp.f | 29 + src/mo_optimization/update_parameters.irp.f | 15 + .../update_st_av_ci_energy.irp.f | 25 + 57 files changed, 18356 insertions(+) create mode 100644 src/mo_optimization/83.mo_optimization.bats create mode 100644 src/mo_optimization/EZFIO.cfg create mode 100644 src/mo_optimization/NEED create mode 100644 src/mo_optimization/README.md create mode 100644 src/mo_optimization/class.irp.f create mode 100644 src/mo_optimization/constants.h create mode 100644 src/mo_optimization/debug_gradient_list_opt.irp.f create mode 100644 src/mo_optimization/debug_gradient_opt.irp.f create mode 100644 src/mo_optimization/debug_hessian_list_opt.irp.f create mode 100644 src/mo_optimization/debug_hessian_opt.irp.f create mode 100644 src/mo_optimization/diagonal_hessian_list_opt.irp.f create mode 100644 src/mo_optimization/diagonal_hessian_opt.irp.f create mode 100644 src/mo_optimization/diagonalization_hessian.irp.f create mode 100644 src/mo_optimization/first_diagonal_hessian_list_opt.irp.f create mode 100644 src/mo_optimization/first_diagonal_hessian_opt.irp.f create mode 100644 src/mo_optimization/first_gradient_list_opt.irp.f create mode 100644 src/mo_optimization/first_gradient_opt.irp.f create mode 100644 src/mo_optimization/first_hessian_list_opt.irp.f create mode 100644 src/mo_optimization/first_hessian_opt.irp.f create mode 100644 src/mo_optimization/gradient_list_opt.irp.f create mode 100644 src/mo_optimization/gradient_opt.irp.f create mode 100644 src/mo_optimization/hessian_list_opt.irp.f create mode 100644 src/mo_optimization/hessian_opt.irp.f create mode 100644 src/mo_optimization/my_providers.irp.f create mode 100644 src/mo_optimization/orb_opt.irp.f create mode 100755 src/mo_optimization/org/TANGLE_org_mode.sh create mode 100644 src/mo_optimization/org/TODO.org create mode 100644 src/mo_optimization/org/debug_gradient_list_opt.org create mode 100644 src/mo_optimization/org/debug_gradient_opt.org create mode 100644 src/mo_optimization/org/debug_hessian_list_opt.org create mode 100644 src/mo_optimization/org/debug_hessian_opt.org create mode 100644 src/mo_optimization/org/diagonal_hessian_list_opt.org create mode 100644 src/mo_optimization/org/diagonal_hessian_opt.org create mode 100644 src/mo_optimization/org/diagonalization_hessian.org create mode 100644 src/mo_optimization/org/first_diagonal_hessian_list_opt.org create mode 100644 src/mo_optimization/org/first_diagonal_hessian_opt.org create mode 100644 src/mo_optimization/org/first_gradient_list_opt.org create mode 100644 src/mo_optimization/org/first_gradient_opt.org create mode 100644 src/mo_optimization/org/first_hessian_list_opt.org create mode 100644 src/mo_optimization/org/first_hessian_opt.org create mode 100644 src/mo_optimization/org/gradient_list_opt.org create mode 100644 src/mo_optimization/org/gradient_opt.org create mode 100644 src/mo_optimization/org/hessian_list_opt.org create mode 100644 src/mo_optimization/org/hessian_opt.org create mode 100644 src/mo_optimization/org/my_providers.org create mode 100644 src/mo_optimization/org/optimization.org create mode 100644 src/mo_optimization/org/orb_opt_trust_v2.org create mode 100644 src/mo_optimization/org/state_average_energy.org create mode 100644 src/mo_optimization/org/state_weight_normalization.org create mode 100644 src/mo_optimization/org/update_parameters.org create mode 100644 src/mo_optimization/org/update_st_av_ci_energy.org create mode 100644 src/mo_optimization/run_orb_opt_trust_v2.irp.f create mode 100644 src/mo_optimization/save_energy.irp.f create mode 100644 src/mo_optimization/state_average_energy.irp.f create mode 100644 src/mo_optimization/state_weight_normalization.irp.f create mode 100644 src/mo_optimization/update_parameters.irp.f create mode 100644 src/mo_optimization/update_st_av_ci_energy.irp.f diff --git a/src/mo_optimization/83.mo_optimization.bats b/src/mo_optimization/83.mo_optimization.bats new file mode 100644 index 00000000..5bc3d313 --- /dev/null +++ b/src/mo_optimization/83.mo_optimization.bats @@ -0,0 +1,62 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + + +function run() { + thresh=2e-3 + test_exe scf || skip + qp set_file $1 + qp edit --check + qp reset -a + qp run scf + qp set_frozen_core + qp set determinants n_states 2 + qp set determinants read_wf true + qp set mo_two_e_ints io_mo_two_e_integrals None + file="$(echo $1 | sed 's/.ezfio//g')" + qp run cis + qp run debug_gradient_list_opt > $file.debug_g.out + err3="$(grep 'Max error:' $file.debug_g.out | awk '{print $3}')" + qp run debug_hessian_list_opt > $file.debug_h1.out + err1="$(grep 'Max error:' $file.debug_h1.out | awk '{print $3}')" + qp run orb_opt > $file.opt1.out + energy1="$(grep 'State average energy:' $file.opt1.out | tail -n 1 | awk '{print $4}')" + qp set orbital_optimization optimization_method diag + qp reset -d + qp run scf + qp run cis + qp run debug_hessian_list_opt > $file.debug_h2.out + err2="$(grep 'Max error_H:' $file.debug_h2.out | awk '{print $3}')" + qp run orb_opt > $file.opt2.out + energy2="$(grep 'State average energy:' $file.opt2.out | tail -n 1 | awk '{print $4}')" + qp set orbital_optimization optimization_method full + qp reset -d + qp run scf + eq $energy1 $2 $thresh + eq $energy2 $3 $thresh + eq $err1 0.0 1e-12 + eq $err2 0.0 1e-12 + eq $err3 0.0 1e-12 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -48.9852901484277 -48.9852937541510 +} + +@test "h2o" { +run h2o.ezfio -75.9025622449206 -75.8691844585879 +} + +@test "h2s" { +run h2s.ezfio -398.576255809878 -398.574145943928 +} + +@test "hbo" { +run hbo.ezfio -99.9234823022109 -99.9234763597840 +} + +@test "hco" { +run hco.ezfio -113.204915552241 -113.204905207050 +} diff --git a/src/mo_optimization/EZFIO.cfg b/src/mo_optimization/EZFIO.cfg new file mode 100644 index 00000000..8944e507 --- /dev/null +++ b/src/mo_optimization/EZFIO.cfg @@ -0,0 +1,29 @@ +[optimization_method] +type: character*(32) +doc: Define the kind of hessian for the orbital optimization full : full hessian, diag : diagonal hessian, none : no hessian +interface: ezfio,provider,ocaml +default: full + +[n_det_start] +type: integer +doc: Number of determinants after which the orbital optimization will start, n_det_start must be greater than 1. The algorithm does a cipsi until n_det > n_det_start and the optimization starts after +interface: ezfio,provider,ocaml +default: 5 + +[n_det_max_opt] +type: integer +doc: Maximal number of the determinants in the wf for the orbital optimization (to stop the optimization if n_det > n_det_max_opt) +interface: ezfio,provider,ocaml +default: 200000 + +[optimization_max_nb_iter] +type: integer +doc: Maximal number of iterations for the orbital optimization +interface: ezfio,provider,ocaml +default: 20 + +[thresh_opt_max_elem_grad] +type: double precision +doc: Threshold for the convergence, the optimization exits when the biggest element in the gradient is smaller than thresh_optimization_max_elem_grad +interface: ezfio,provider,ocaml +default: 1.e-5 diff --git a/src/mo_optimization/NEED b/src/mo_optimization/NEED new file mode 100644 index 00000000..91f41ee3 --- /dev/null +++ b/src/mo_optimization/NEED @@ -0,0 +1,7 @@ +two_body_rdm +hartree_fock +cipsi +davidson_undressed +selectors_full +generators_full +utils_trust_region diff --git a/src/mo_optimization/README.md b/src/mo_optimization/README.md new file mode 100644 index 00000000..94f29aee --- /dev/null +++ b/src/mo_optimization/README.md @@ -0,0 +1,74 @@ +# Orbital optimization + +## Methods +Different methods are available: +- full hessian +``` +qp set orbital_optimization optimization_method full +``` +- diagonal hessian +``` +qp set orbital_optimization optimization_method diag +``` +- identity matrix +``` +qp set orbital_optimization optimization_method none +``` + +After the optimization the ezfio contains the optimized orbitals + +## For a fixed number of determinants +To optimize the MOs for the actual determinants: +``` +qp run orb_opt +``` + +## For a complete optimization, i.e, with a larger and larger wave function +To optimize the MOs with a larger and larger wave function: +``` +qp run optimization +``` + +The results are stored in the EZFIO in "mo_optimization/result_opt", +with the following format: +(1) (2) (3) (4) +1: Number of determinants in the wf, +2: Cispi energy before the optimization, +3: Cipsi energy after the optimization, +4: Energy difference between (2) and (3). + +The optimization process if the following: +- we do a first cipsi step to obtain a small number of determinants in the wf +- we run an orbital optimization for this wf +- we do a new cipsi step to double the number of determinants in the wf +- we run an orbital optimization for this wf +- ... +- we do that until the energy difference between (2) and (3) is + smaller than the targeted accuracy for the cispi (targeted_accuracy_cipsi in qp edit) + or the wf is larger than a given size (n_det_max_opt in qp_edit) +- after that you can reset your determinants (qp reset -d) and run a clean Cispi calculation + +### End of the optimization +You can choos the number of determinants after what the +optimization will stop: +``` +qp set orbital_optimization n_det_max_opt 1e5 # or any number +``` +## Weight of the states +You can change the weights of the differents states directly in qp edit. +It will affect ths weights used in the orbital optimization. + +# Tests +To run the tests: +``` +qp test +``` + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` + diff --git a/src/mo_optimization/class.irp.f b/src/mo_optimization/class.irp.f new file mode 100644 index 00000000..b4a68ac2 --- /dev/null +++ b/src/mo_optimization/class.irp.f @@ -0,0 +1,12 @@ + BEGIN_PROVIDER [ logical, do_only_1h1p ] +&BEGIN_PROVIDER [ logical, do_only_cas ] +&BEGIN_PROVIDER [ logical, do_ddci ] + implicit none + BEGIN_DOC + ! In the FCI case, all those are always false + END_DOC + do_only_1h1p = .False. + do_only_cas = .False. + do_ddci = .False. +END_PROVIDER + diff --git a/src/mo_optimization/constants.h b/src/mo_optimization/constants.h new file mode 100644 index 00000000..1cd00bda --- /dev/null +++ b/src/mo_optimization/constants.h @@ -0,0 +1 @@ + logical, parameter :: debug=.False. diff --git a/src/mo_optimization/debug_gradient_list_opt.irp.f b/src/mo_optimization/debug_gradient_list_opt.irp.f new file mode 100644 index 00000000..867e0105 --- /dev/null +++ b/src/mo_optimization/debug_gradient_list_opt.irp.f @@ -0,0 +1,78 @@ +! Debug the gradient + +! *Program to check the gradient* + +! The program compares the result of the first and last code for the +! gradient. + +! Provided: +! | mo_num | integer | number of MOs | + +! Internal: +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,i,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program diff --git a/src/mo_optimization/debug_gradient_opt.irp.f b/src/mo_optimization/debug_gradient_opt.irp.f new file mode 100644 index 00000000..8aeec18f --- /dev/null +++ b/src/mo_optimization/debug_gradient_opt.irp.f @@ -0,0 +1,76 @@ +! Debug the gradient + +! *Program to check the gradient* + +! The program compares the result of the first and last code for the +! gradient. + +! Provided: +! | mo_num | integer | number of MOs | + +! Internal: +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program diff --git a/src/mo_optimization/debug_hessian_list_opt.irp.f b/src/mo_optimization/debug_hessian_list_opt.irp.f new file mode 100644 index 00000000..d1aa79c4 --- /dev/null +++ b/src/mo_optimization/debug_hessian_list_opt.irp.f @@ -0,0 +1,147 @@ +! Debug the hessian + +! *Program to check the hessian matrix* + +! The program compares the result of the first and last code for the +! hessian. First of all the 4D hessian and after the 2D hessian. + +! Provided: +! | mo_num | integer | number of MOs | +! | optimization_method | string | Method for the orbital optimization: | +! | | | - 'full' -> full hessian | +! | | | - 'diag' -> diagonal hessian | +! | dim_list_act_orb | integer | number of active MOs | +! | list_act(dim_list_act_orb) | integer | list of the actives MOs | +! | | | | + +! Internal: +! | m | integer | number of MOs in the list | +! | | | (active MOs) | +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + ! Deallocation + deallocate(H, H2, h_f, h_f2) + + else + + print*, 'Use the diagonal hessian matrix' + allocate(H(n,1),H2(n,1)) + call diag_hessian_list_opt(n,m,list_act,H) + call first_diag_hessian_list_opt(n,m,list_act,H2) + + H = H - H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do i = 1, n + if (ABS(H(i,1)) > threshold) then + print*, H(i,1) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,1)) > ABS(max_error_H)) then + max_error_H = H(i,1) + endif + + endif + enddo + + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check of the full hessian' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + print*,'' + else + print*,'Check of the diagonal hessian' + endif + + print*,'Nb error_H:', nb_error_H + print*,'Max error_H:', max_error_H + +end program diff --git a/src/mo_optimization/debug_hessian_opt.irp.f b/src/mo_optimization/debug_hessian_opt.irp.f new file mode 100644 index 00000000..6d22cc01 --- /dev/null +++ b/src/mo_optimization/debug_hessian_opt.irp.f @@ -0,0 +1,171 @@ +! Debug the hessian + +! *Program to check the hessian matrix* + +! The program compares the result of the first and last code for the +! hessian. First of all the 4D hessian and after the 2D hessian. + +! Provided: +! | mo_num | integer | number of MOs | + +! Internal: +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + elseif (optimization_method == 'diag') then + + print*, 'Use the diagonal hessian matrix' + call diag_hessian_opt(n,H,h_f) + call first_diag_hessian_opt(n,H2,h_f2) + + h_f = h_f - h_f2 + max_error = 0d0 + nb_error = 0 + threshold = 1d-12 + + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + + if (ABS(h_f(i,j,k,l)) > threshold) then + + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + + endif + + enddo + enddo + enddo + enddo + + h=H-H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + else + print*,'Unknown optimization_method, please select full, diag' + call abort + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check the full hessian' + else + print*,'Check the diagonal hessian' + endif + + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + print*,'' + print*,'Nb error_H :', nb_error_H + print*,'Max error_H :', max_error_H + + ! Deallocation + deallocate(H,H2,h_f,h_f2) + +end program diff --git a/src/mo_optimization/diagonal_hessian_list_opt.irp.f b/src/mo_optimization/diagonal_hessian_list_opt.irp.f new file mode 100644 index 00000000..fe54fa7a --- /dev/null +++ b/src/mo_optimization/diagonal_hessian_list_opt.irp.f @@ -0,0 +1,1556 @@ +! Diagonal hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! Here for the diagonal of the hessian it's a little more complicated +! than for the hessian. It's not just compute the diagonal terms of the +! hessian because of the permutations. + +! The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +! with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +! a diagonal term, if : +! p = r and q = s, => (p,q,p,q) +! or +! q = r and p = s, => (p,q,q,p) + +! For that reason, we will use 2D temporary arrays to store the +! elements. One for the terms (p,q,p,q) and an other for the terms of +! kind (p,q,q,p). We will also use a 1D temporary array to store the +! terms of the kind (p,p,p,p) due to the kronoecker delta. + +! *Compute the diagonal hessian of energy with respects to orbital +! rotations* +! By diagonal hessian we mean, diagonal elements of the hessian + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | time to compute the hessian | +! | t4,t5,t6 | double precision | time to compute the differ each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +! | tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +! | tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +! | tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +! | tmp_accu_1(mo_num) | double precision | temporary array (private) | +! | tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +! | tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +! | tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +! | tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +! Function: +! | get_two_e_integral | double precision | bi-electronic integrals | + + +subroutine diag_hessian_list_opt(n, m, list, H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n, m, list(m) + + ! out + double precision, intent(out) :: H(n)!, h_tmpr(m,m,m,m) + + ! internal + !double precision, allocatable :: !hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: tmp_p,tmp_q,tmp_r,tmp_s,tmp_pq,tmp_rs + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- Diagonal_hessian_list_opt---' + + ! Allocation of shared arrays + !allocate(hessian(m,m,m,m))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(m),tmp_h_pqpq(m,m),tmp_h_pqqp(m,m)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,m)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,m)) + allocate(tmp_accu_1_shared(m),tmp_accu_shared(m,m)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n,m, mo_one_e_integrals, one_e_dm_mo, list, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_accu(m,m)) + +! Initialization of the arrays + +!!$OMP DO +!do tmp_s = 1,m +! do tmp_r = 1, m +! do tmp_q = 1, m +! do tmp_p = 1, m +! hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqpq(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqqp(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) +! = +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) + & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! \begin{align*} +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +allocate(tmp_bi_int_3(mo_num, mo_num, m),tmp_2rdm_3(mo_num, mo_num, m)) + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 1',t6 +!$OMP END MASTER + +! Line 2, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & +! + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) +& + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & +! + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) +& + tmp_bi_int_3(u,v,tmp_q) * tmp_2rdm_3(u,v,tmp_q) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! \begin{align*} +! \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & +! + get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +! Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +! Using u,v as one variable a matrix multiplication appears. +! $$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,tmp_q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,tmp_p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu(tmp_p,tmp_q) + tmp_accu(tmp_q,tmp_p) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & +! + get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +call wall_time(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3,tmp_2rdm_3) + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 1',t6 +!$OMP END MASTER + +! Line 3, term 2 + +! \begin{align*} +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & +! .or. ((p==s) .and. (q==r))) then + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & +! - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & +! - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) +! = +! - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) +! = +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!---------- +! Part 1.1 +!---------- +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + +allocate(tmp_bi_int_3(m, mo_num, m), tmp_2rdm_3(m, mo_num, m)) + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3(tmp_q,u,tmp_p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_2rdm_3(tmp_q,u,tmp_p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do u = 1, mo_num + do tmp_q = 1, m + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(tmp_q,u,tmp_p) * tmp_2rdm_3(tmp_q,u,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3, tmp_2rdm_3) + + + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!-------- +! Part 1.2 +!-------- +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +allocate(tmp_bi_int_3(mo_num, m, m),tmp_2rdm_3(mo_num, m, m)) + + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do u = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_bi_int_3(t,tmp_q,tmp_p) = 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p= 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_2rdm_3(t,tmp_q,tmp_p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do tmp_p = 1, m + do t = 1, mo_num + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(t,tmp_q,tmp_p) * tmp_2rdm_3(t,tmp_q,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3,tmp_2rdm_3) + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & +! - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) +! = +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +! Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!---------- +! Part 2.1 +!---------- +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,tmp_q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + +!$OMP DO +do tmp_p = 1, m + do tmp_q = 1, m + + tmp_h_pqqp(tmp_q,tmp_p) = tmp_h_pqqp(tmp_q,tmp_p) - tmp_accu(tmp_q,tmp_p) - tmp_accu(tmp_p,tmp_q) + + enddo +enddo +!$OMP END DO + + + +! Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!-------- +! Part 2.2 +!-------- +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,tmp_q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) - tmp_accu(tmp_p,tmp_q) - tmp_accu(tmp_q,tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t2 = t2 - t1 +print*, 'Time to compute the hessian :', t2 +!$OMP END MASTER + +! Deallocation of private arrays +! In the OMP section ! + +deallocate(tmp_accu) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!!$OMP DO +!do tmp_p = 1, m +! hessian(tmp_p,tmp_p,tmp_p,tmp_p) = hessian(tmp_p,tmp_p,tmp_p,tmp_p) + tmp_h_pppp(tmp_p) +!enddo +!!$OMP END DO + +!!$OMP DO +!do tmp_q = 1, m +! do tmp_p = 1, m +! hessian(tmp_p,tmp_q,tmp_p,tmp_q) = hessian(tmp_p,tmp_q,tmp_p,tmp_q) + tmp_h_pqpq(tmp_p,tmp_q) +! enddo +!enddo +!!$OMP END DO +! +!!$OMP DO +!do tmp_q = 1, m +! do tmp_p = 1, m +! hessian(tmp_p,tmp_q,tmp_q,tmp_p) = hessian(tmp_p,tmp_q,tmp_q,tmp_p) + tmp_h_pqqp(tmp_p,tmp_q) +! enddo +!enddo +!!$OMP END DO + +!!$OMP DO +!do tmp_s = 1, m +! do tmp_r = 1, m +! do tmp_q = 1, m +! do tmp_p = 1, m + +! h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & +! - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +!if (debug) then +! print*,'2D diag Hessian matrix' +! do tmp_pq = 1, n +! write(*,'(100(F10.5))') H(tmp_pq,:) +! enddo +!endif + +! Deallocation of shared arrays, end + + +!deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---End diagonal_hessian_list_opt---' + +end subroutine diff --git a/src/mo_optimization/diagonal_hessian_opt.irp.f b/src/mo_optimization/diagonal_hessian_opt.irp.f new file mode 100644 index 00000000..7688ec37 --- /dev/null +++ b/src/mo_optimization/diagonal_hessian_opt.irp.f @@ -0,0 +1,1511 @@ +! Diagonal hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! Here for the diagonal of the hessian it's a little more complicated +! than for the hessian. It's not just compute the diagonal terms of the +! hessian because of the permutations. + +! The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +! with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +! a diagonal term, if : +! p = r and q = s, => (p,q,p,q) +! or +! q = r and p = s, => (p,q,q,p) + +! For that reason, we will use 2D temporary arrays to store the +! elements. One for the terms (p,q,p,q) and an other for the terms of +! kind (p,q,q,p). We will also use a 1D temporary array to store the +! terms of the kind (p,p,p,p) due to the kronoecker delta. + +! *Compute the diagonal hessian of energy with respects to orbital +! rotations* +! By diagonal hessian we mean, diagonal elements of the hessian + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | time to compute the hessian | +! | t4,t5,t6 | double precision | time to compute the differ each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +! | tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +! | tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +! | tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +! | tmp_accu_1(mo_num) | double precision | temporary array (private) | +! | tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +! | tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +! | tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +! | tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +! Function: +! | get_two_e_integral | double precision | bi-electronic integrals | + + +subroutine diag_hessian_opt(n,H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n)!,n), h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + !double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: istate + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- diagonal hessian---' + print*,'Use the diagonal hessian' + + ! Allocation of shared arrays + !allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(mo_num),tmp_h_pqpq(mo_num,mo_num),tmp_h_pqqp(mo_num,mo_num)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_accu_1_shared(mo_num),tmp_accu_shared(mo_num,mo_num)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n, mo_one_e_integrals, one_e_dm_mo, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num),tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num)) + +! Initialization of the arrays + +!!$OMP DO +!do s = 1,mo_num +! do r = 1, mo_num +! do q = 1, mo_num +! do p = 1, mo_num +! hessian(p,q,r,s) = 0d0 +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +!$OMP DO +do p = 1, mo_num + tmp_h_pppp(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqpq(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqqp(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) +! = +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! \begin{align*} +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do p =1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + & + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 1',t6 +!$OMP END MASTER + +! Line 2, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & +! + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) +& + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & +! + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do p = 1,mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) +& + tmp_bi_int_3(u,v,q) * tmp_2rdm_3(u,v,q) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! \begin{align*} +! \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & +! + get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +! Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +! Using u,v as one variable a matrix multiplication appears. +! $$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu(p,q) + tmp_accu(q,p) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & +! + get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +call wall_time(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 1',t6 +!$OMP END MASTER + +! Line 3, term 2 + +! \begin{align*} +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & +! .or. ((p==s) .and. (q==r))) then + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & +! - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & +! - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) +! = +! - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) +! = +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!---------- +! Part 1.1 +!---------- +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,u,p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_2rdm_3(q,u,p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(q,u,p) * tmp_2rdm_3(q,u,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo +enddo +!$OMP END DO + + + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!-------- +! Part 1.2 +!-------- +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do u = 1, mo_num + + do p = 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3(t,q,p) = 2d0*get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do p= 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3(t,q,p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + do t = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(t,q,p) * tmp_2rdm_3(t,q,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & +! - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) +! = +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +! Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!---------- +! Part 2.1 +!---------- +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + +!$OMP DO +do q = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + +!$OMP DO +do p = 1, mo_num + do q = 1, mo_num + + tmp_h_pqqp(q,p) = tmp_h_pqqp(q,p) - tmp_accu(q,p) - tmp_accu(p,q) + + enddo +enddo +!$OMP END DO + + + +! Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!-------- +! Part 2.2 +!-------- +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) - tmp_accu(p,q) - tmp_accu(q,p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t2 = t2 - t1 +print*, 'Time to compute the hessian :', t2 +!$OMP END MASTER + +! Deallocation of private arrays +! In the OMP section ! + +deallocate(tmp_2rdm_3,tmp_bi_int_3) +deallocate(tmp_accu) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!!$OMP DO +!do p = 1, mo_num +! hessian(p,p,p,p) = hessian(p,p,p,p) + tmp_h_pppp(p) +!enddo +!!$OMP END DO + +!!$OMP DO +!do q = 1, mo_num +! do p = 1, mo_num +! hessian(p,q,p,q) = hessian(p,q,p,q) + tmp_h_pqpq(p,q) +! enddo +!enddo +!!$OMP END DO +! +!!$OMP DO +!do q = 1, mo_num +! do p = 1, mo_num +! hessian(p,q,q,p) = hessian(p,q,q,p) + tmp_h_pqqp(p,q) +! enddo +!enddo +!!$OMP END DO + +!!$OMP DO +!do s = 1, mo_num +! do r = 1, mo_num +! do q = 1, mo_num +! do p = 1, mo_num + +! h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +!if (debug) then +! print*,'2D diag Hessian matrix' +! do pq = 1, n +! write(*,'(100(F10.5))') H(pq,:) +! enddo +!endif + +! Deallocation of shared arrays, end + + +!deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---diagonal_hessian' + +end subroutine diff --git a/src/mo_optimization/diagonalization_hessian.irp.f b/src/mo_optimization/diagonalization_hessian.irp.f new file mode 100644 index 00000000..e25879d9 --- /dev/null +++ b/src/mo_optimization/diagonalization_hessian.irp.f @@ -0,0 +1,136 @@ +! Diagonalization of the hessian + +! Just a matrix diagonalization using Lapack + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | H(n,n) | double precision | hessian | + +! Output: +! | e_val(n) | double precision | eigenvalues of the hessian | +! | w(n,n) | double precision | eigenvectors of the hessian | + +! Internal: +! | nb_negative_nv | integer | number of negative eigenvalues | +! | lwork | integer | for Lapack | +! | work(lwork,n) | double precision | temporary array for Lapack | +! | info | integer | if 0 -> ok, else problem in the diagonalization | +! | i,j | integer | dummy indexes | + + +subroutine diagonalization_hessian(n,H,e_val,w) + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: H(n,n) + + ! out + double precision, intent(out) :: e_val(n), w(n,n) + + ! internal + double precision, allocatable :: work(:,:) + integer, allocatable :: key(:) + integer :: info,lwork + integer :: i,j + integer :: nb_negative_vp + double precision :: t1,t2,t3,max_elem + + print*,'' + print*,'---Diagonalization_hessian---' + + call wall_time(t1) + + if (optimization_method == 'full') then + ! Allocation + ! For Lapack + lwork=3*n-1 + + allocate(work(lwork,n)) + + ! Calculation + + ! Copy the hessian matrix, the eigenvectors will be store in W + W=H + + ! Diagonalization of the hessian + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info) + + if (info /= 0) then + print*, 'Error diagonalization : diagonalization_hessian' + print*, 'info = ', info + call ABORT + endif + + if (debug) then + print *, 'vp Hess:' + write(*,'(100(F10.5))') real(e_val(:)) + endif + + ! Number of negative eigenvalues + max_elem = 0d0 + nb_negative_vp = 0 + do i = 1, n + if (e_val(i) < 0d0) then + nb_negative_vp = nb_negative_vp + 1 + if (e_val(i) < max_elem) then + max_elem = e_val(i) + endif + !print*,'e_val < 0 :', e_val(i) + endif + enddo + print*,'Number of negative eigenvalues:', nb_negative_vp + print*,'Lowest eigenvalue:',max_elem + + !nb_negative_vp = 0 + !do i = 1, n + ! if (e_val(i) < -thresh_eig) then + ! nb_negative_vp = nb_negative_vp + 1 + ! endif + !enddo + !print*,'Number of negative eigenvalues <', -thresh_eig,':', nb_negative_vp + + ! Deallocation + deallocate(work) + + elseif (optimization_method == 'diag') then + ! Diagonalization of the diagonal hessian by hands + allocate(key(n)) + + do i = 1, n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, n) + + ! Eigenvectors + W = 0d0 + do i = 1, n + j = key(i) + W(j,i) = 1d0 + enddo + + deallocate(key) + else + print*,'Diagonalization_hessian, abort' + call abort + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in diagonalization_hessian:', t3 + + print*,'---End diagonalization_hessian---' + +end subroutine diff --git a/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f b/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f new file mode 100644 index 00000000..58536993 --- /dev/null +++ b/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f @@ -0,0 +1,372 @@ +subroutine first_diag_hessian_list_opt(tmp_n,m,list,H)!, h_tmpr) + + include 'constants.h' + + implicit none + + !=========================================================================== + ! Compute the diagonal hessian of energy with respects to orbital rotations + !=========================================================================== + + !=========== + ! Variables + !=========== + + ! in + integer, intent(in) :: tmp_n, m, list(m) + ! tmp_n : integer, tmp_n = m*(m-1)/2 + + ! out + double precision, intent(out) :: H(tmp_n)!, h_tmpr(m,m,m,m) + ! H : n by n double precision matrix containing the 2D hessian + + ! internal + double precision, allocatable :: hessian(:,:,:,:), tmp(:,:),h_tmpr(:,:,:,:) + integer :: p,q, tmp_p,tmp_q + integer :: r,s,t,u,v,tmp_r,tmp_s,tmp_t,tmp_u,tmp_v + integer :: pq,rs,tmp_pq,tmp_rs + double precision :: t1,t2,t3 + ! hessian : mo_num 4D double precision matrix containing the hessian before the permutations + ! h_tmpr : mo_num 4D double precision matrix containing the hessian after the permutations + ! p,q,r,s : integer, indexes of the 4D hessian matrix + ! t,u,v : integer, indexes to compute hessian elements + ! pq,rs : integer, indexes for the conversion from 4D to 2D hessian matrix + ! t1,t2,t3 : double precision, t3 = t2 - t1, time to compute the hessian + + ! Function + double precision :: get_two_e_integral + ! get_two_e_integral : double precision function, two e integrals + + ! Provided : + ! mo_one_e_integrals : mono e- integrals + ! get_two_e_integral : two e- integrals + ! one_e_dm_mo_alpha, one_e_dm_mo_beta : one body density matrix + ! two_e_dm_mo : two body density matrix + + print*,'---first_diag_hess_list---' + + !============ + ! Allocation + !============ + + allocate(hessian(m,m,m,m),tmp(tmp_n,tmp_n),h_tmpr(mo_num,mo_num,mo_num,mo_num)) + + !============= + ! Calculation + !============= + + ! From Anderson et. al. (2014) + ! The Journal of Chemical Physics 141, 244104 (2014); doi: 10.1063/1.4904384 + + ! LaTeX formula : + + !\begin{align*} + !H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + !&= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + !+ \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_u^r)] + !-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + !&+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} +v_{uv}^{st} \Gamma_{pt}^{uv}) + !+ \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + !&+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{ps}^{uv}) \\ + !&- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) + !\end{align*} + + !================ + ! Initialization + !================ + hessian = 0d0 + + CALL wall_time(t1) + + !======================== + ! First line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================= + ! First line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! First line, third term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Second line, first term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================== + ! Second line, second term + !========================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! Third line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Third line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo + enddo + + CALL wall_time(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + + !============== + ! Permutations + !============== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + + h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & + - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p vector, transformation +! In addition there is a permutation in the gradient formula : +! \begin{equation} +! P_{pq} = 1 - (p <-> q) +! \end{equation} + +! We need a vector to use the gradient. Here the gradient is a +! antisymetric matrix so we can transform it in a vector of length +! mo_num*(mo_num-1)/2. + +! Here we do these two things at the same time. + + +do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) +enddo + +! Debug, diplay the vector containing the gradient elements +if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) +endif + +! Norm of the gradient +! The norm can be useful. + +norm = dnrm2(n,v_grad,1) +print*, 'Gradient norm : ', norm + +! Maximum element in the gradient +! The maximum element in the gradient is very important for the +! convergence criterion of the Newton method. + + +! Max element of the gradient +max_elem = 0d0 +do i = 1, n + if (DABS(v_grad(i)) > DABS(max_elem)) then + max_elem = v_grad(i) + endif +enddo + +print*,'Max element in the gradient :', max_elem + +! Debug, display the matrix containting the gradient elements +if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,m + do p=1,m + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, m + write(*,'(100(F10.5))') A(i,1:m) + enddo +endif + +! Deallocation of shared arrays and end + +deallocate(grad,A, tmp_mo_one_e_integrals,tmp_one_e_dm_mo) + +print*,'---End gradient---' + +end subroutine diff --git a/src/mo_optimization/gradient_opt.irp.f b/src/mo_optimization/gradient_opt.irp.f new file mode 100644 index 00000000..25be6b5a --- /dev/null +++ b/src/mo_optimization/gradient_opt.irp.f @@ -0,0 +1,346 @@ +! Gradient + +! The gradient of the CI energy with respects to the orbital rotation +! is: +! (C-c C-x C-l) +! $$ +! G(p,q) = \mathcal{P}_{pq} \left[ \sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +! \sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +! \right] +! $$ + + +! $$ +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! $$ + +! $$ +! G(p,q) = \left[ +! \sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +! \sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +! \right] - +! \left[ +! \sum_r (h_q^r \gamma_r^p - h_r^p \gamma_q^r) + +! \sum_{rst}(v_{qt}^{rs} \Gamma_{rs}^{pt} - v_{rs}^{pt} +! \Gamma_{qt}^{rs}) +! \right] +! $$ + +! Where p,q,r,s,t are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! The gradient is a mo_num by mo_num matrix, p,q,r,s,t take all the +! values between 1 and mo_num (1 and mo_num include). + +! To do that we compute $$G(p,q)$$ for all the pairs (p,q). + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo +! E. Scuseria + +! *Compute the gradient of energy with respects to orbital rotations* + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono_electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix | +! | two_e_dm_mo(mo_num,mo_num,mo_num,mo_num) | double precision | two e- density matrix | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | v_grad(n) | double precision | the gradient | +! | max_elem | double precision | maximum element of the gradient | + +! Internal: +! | grad(mo_num,mo_num) | double precison | gradient before the tranformation in a vector | +! | A((mo_num,mo_num) | doubre precision | gradient after the permutations | +! | norm | double precision | norm of the gradient | +! | p, q | integer | indexes of the element in the matrix grad | +! | i | integer | index for the tranformation in a vector | +! | r, s, t | integer | indexes dor the sums | +! | t1, t2, t3 | double precision | t3 = t2 - t1, time to compute the gradient | +! | t4, t5, t6 | double precission | t6 = t5 - t4, time to compute each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bi-electronic integrals | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the two e- density matrix | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array | + +! Function: +! | get_two_e_integral | double precision | bi-electronic integrals | +! | dnrm2 | double precision | (Lapack) norm | + + +subroutine gradient_opt(n,v_grad,max_elem) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: v_grad(n), max_elem + + ! internal + double precision, allocatable :: grad(:,:),A(:,:) + double precision :: norm + integer :: i,p,q,r,s,t + double precision :: t1,t2,t3,t4,t5,t6 + + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:) + + ! Functions + double precision :: get_two_e_integral, dnrm2 + + + print*,'' + print*,'---gradient---' + + ! Allocation of shared arrays + allocate(grad(mo_num,mo_num),A(mo_num,mo_num)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s,t, & + !$OMP tmp_accu, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(grad, one_e_dm_mo, mo_num,mo_one_e_integrals, & + !$OMP mo_integrals_map,t4,t5,t6) & + !$OMP DEFAULT(SHARED) + + ! Allocation of private arrays + allocate(tmp_accu(mo_num,mo_num)) + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num)) + +! Initialization + +!$OMP DO +do q = 1, mo_num + do p = 1,mo_num + grad(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +! Term 1 + +! Without optimization the term 1 is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! grad(p,q) = grad(p,q) & +! + mo_one_e_integrals(p,r) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(r,q) * one_e_dm_mo(p,r) +! enddo +! enddo +! enddo + +! Since the matrix multiplication A.B is defined like : +! \begin{equation} +! c_{ij} = \sum_k a_{ik}.b_{kj} +! \end{equation} +! The previous equation can be rewritten as a matrix multplication + + +!**************** +! Opt first term +!**************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('N','N',mo_num,mo_num,mo_num,1d0,mo_one_e_integrals,& +mo_num,one_e_dm_mo,mo_num,0d0,tmp_accu,mo_num) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + (tmp_accu(p,q) - tmp_accu(q,p)) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'Gradient, first term (s) :', t6 +!$OMP END MASTER + +! Term 2 + +! Without optimization the second term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num +! do t= 1, mo_num + +! grad(p,q) = grad(p,q) & +! + get_two_e_integral(p,t,r,s,mo_integrals_map) * two_e_dm_mo(r,s,q,t) & +! - get_two_e_integral(r,s,q,t,mo_integrals_map) * two_e_dm_mo(p,t,r,s) +! enddo +! enddo +! enddo +! enddo +! enddo + +! Using the bielectronic integral properties : +! get_two_e_integral(p,t,r,s,mo_integrals_map) = get_two_e_integral(r,s,p,t,mo_integrals_map) + +! Using the two body matrix properties : +! two_e_dm_mo(p,t,r,s) = two_e_dm_mo(r,s,p,t) + +! t is one the right, we can put it on the external loop and create 3 +! indexes temporary array +! r,s can be seen as one index + +! By doing so, a matrix multiplication appears + + +!***************** +! Opt second term +!***************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_bi_int_3(r,s,p) = get_two_e_integral(r,s,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_2rdm_3(r,s,q) = two_e_dm_mo(r,s,q,t) + + enddo + enddo + enddo + + call dgemm('T','N',mo_num,mo_num,mo_num*mo_num,1d0,tmp_bi_int_3,& + mo_num*mo_num,tmp_2rdm_3,mo_num*mo_num,0d0,tmp_accu,mo_num) + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + tmp_accu(p,q) - tmp_accu(q,p) + + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'Gradient second term (s) : ', t6 +!$OMP END MASTER + +! Deallocation of private arrays + +deallocate(tmp_bi_int_3,tmp_2rdm_3,tmp_accu) + +!$OMP END PARALLEL + +call omp_set_max_active_levels(4) + +! Permutation, 2D matrix -> vector, transformation +! In addition there is a permutation in the gradient formula : +! \begin{equation} +! P_{pq} = 1 - (p <-> q) +! \end{equation} + +! We need a vector to use the gradient. Here the gradient is a +! antisymetric matrix so we can transform it in a vector of length +! mo_num*(mo_num-1)/2. + +! Here we do these two things at the same time. + + +do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) +enddo + +! Debug, diplay the vector containing the gradient elements +if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) +endif + +! Norm of the gradient +! The norm can be useful. + +norm = dnrm2(n,v_grad,1) +print*, 'Gradient norm : ', norm + +! Maximum element in the gradient +! The maximum element in the gradient is very important for the +! convergence criterion of the Newton method. + + +! Max element of the gradient +max_elem = 0d0 +do i = 1, n + if (ABS(v_grad(i)) > ABS(max_elem)) then + max_elem = v_grad(i) + endif +enddo + +print*,'Max element in the gradient :', max_elem + +! Debug, display the matrix containting the gradient elements +if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,mo_num + do p=1,mo_num + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, mo_num + write(*,'(100(F10.5))') A(i,1:mo_num) + enddo +endif + +! Deallocation of shared arrays and end + +deallocate(grad,A) + +print*,'---End gradient---' + +end subroutine diff --git a/src/mo_optimization/hessian_list_opt.irp.f b/src/mo_optimization/hessian_list_opt.irp.f new file mode 100644 index 00000000..31af769a --- /dev/null +++ b/src/mo_optimization/hessian_list_opt.irp.f @@ -0,0 +1,1129 @@ +! Hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +! values between 1 and mo_num (1 and mo_num include). + +! To do that we compute all the pairs (pq,rs) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! *Compute the hessian of energy with respects to orbital rotations* + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +! | t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +! | ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array | +! | tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +! Function: +! | get_two_e_integral | double precision | bielectronic integrals | + + +subroutine hessian_list_opt(n,m,list,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,m,list(m) + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(m,m,m,m) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q,tmp_p,tmp_q,tmp_r,tmp_s + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:),ind_3_3(:,:,:) + double precision, allocatable :: tmp_bi_int_3_3(:,:,:), tmp_2rdm_3_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:),tmp_one_e_dm_mo(:,:),tmp_mo_one_e_integrals(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(m,m,m,m),tmp_one_e_dm_mo(mo_num,m),tmp_mo_one_e_integrals(mo_num,m)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s,p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3, tmp_bi_int_3_3,tmp_2rdm_3_3, ind_3_3 ) & + !$OMP SHARED(m,list,hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map, & + !$OMP t1,t2,t3,t4,t5,t6,& + !$OMP tmp_mo_one_e_integrals,tmp_one_e_dm_mo)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(m,m), tmp_accu_sym(mo_num,mo_num)) + +! Initialization of the arrays + +!$OMP MASTER +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP MASTER +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_sym(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP DO +do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 + enddo + enddo + enddo +enddo +!$OMP ENDDO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! Without optimization the term 1 of the line 1 is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + tmp_mo_one_e_integrals(u,tmp_p) = mo_one_e_integrals(u,p) + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_s = 1, m + s = list(tmp_s) + do u = 1, mo_num + tmp_one_e_dm_mo(u,tmp_s) = one_e_dm_mo(u,s) + enddo +enddo +!$OMP END DO + + +call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + +!$OMP DO +do tmp_s = 1, m + do tmp_p = 1, m + + tmp_accu_sym(tmp_p,tmp_s) = 0.5d0 * (tmp_accu(tmp_p,tmp_s) + tmp_accu(tmp_s,tmp_p)) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_s = 1, m + do tmp_p = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + +!$OMP DO +do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo +enddo +!OMP END DO + +!$OMP DO +do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo +enddo +!OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! Without optimization the third term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + +! enddo +! enddo +! enddo +! enddo + +! We can just re-order the indexes + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_s = 1, m + s = list(tmp_s) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! Without optimization the fourth term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using bielectronic integral properties : +! get_two_e_integral(s,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,s,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +! There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +! terms like : hessian(p,r,r,s) + + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3, mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_p = 1, m + do tmp_s = 1, m + + tmp_accu_sym(tmp_s,tmp_p) = 0.5d0 * (tmp_accu(tmp_p,tmp_s)+tmp_accu(tmp_s,tmp_p)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_p = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6=t5-t4 +print*,'l2 1', t6 +!$OMP END MASTER + +! Line 2, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(q,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,q,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +! There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +! terms like : hessian(s,q,r,s) + + +!****************************** +! Opt Second line, second term +!****************************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + + + +!$OMP DO +do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3 , mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(u,v,p,r,mo_integrals_map) = +! get_two_e_integral(p,r,u,v,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +! With v on the external loop, using temporary arrays for each v and by +! taking p,r and q,s as one dimension a matrix multplication +! appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +! Part 1 + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +!-------- +! part 1 +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) +!-------- + +allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_bi_int_3_3(tmp_p,tmp_r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_q,tmp_s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do tmp_s = 1, m + + call dgemm('N','N',m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_s),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_p,tmp_r,tmp_q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) + + + +! With v on the external loop, using temporary arrays for each v and by +! taking q,s and p,r as one dimension a matrix multplication +! appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +! Part 2 + +!-------- +! part 2 +! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +!-------- + +allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3_3(tmp_q,tmp_s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_p,tmp_r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do tmp_r = 1, m + + call dgemm('N','N', m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_r),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_q,tmp_s,tmp_p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5 - t4 +print*,'l3 1', t6 +!$OMP END MASTER + +! Line 3, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 1 + +!-------- +! Part 1 +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) +!-------- + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 2 + +!-------- +! Part 2 +!- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) +!-------- + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 3 + +!-------- +! Part 3 +!- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) +!-------- + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 4 + +!-------- +! Part 4 +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +!-------- + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t3 = t2 -t1 +print*,'Time to compute the hessian : ', t3 +!$OMP END MASTER + +! Deallocation of private arrays +! In the omp section ! + +deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do s = 1, m + do r = 1, m + do q = 1, m + do p = 1, m + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'Time for permutations :',t6 +!$OMP END MASTER + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo +endif + +! Deallocation of shared arrays, end + +deallocate(hessian,tmp_one_e_dm_mo,tmp_mo_one_e_integrals)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine diff --git a/src/mo_optimization/hessian_opt.irp.f b/src/mo_optimization/hessian_opt.irp.f new file mode 100644 index 00000000..0b4312c6 --- /dev/null +++ b/src/mo_optimization/hessian_opt.irp.f @@ -0,0 +1,1043 @@ +! Hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +! values between 1 and mo_num (1 and mo_num include). + +! To do that we compute all the pairs (pq,rs) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! *Compute the hessian of energy with respects to orbital rotations* + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +! | t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +! | ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array | +! | tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +! Function: +! | get_two_e_integral | double precision | bielectronic integrals | + + +subroutine hessian_opt(n,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:), tmp_accu_shared(:,:),tmp_accu_sym_shared(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_accu_shared(mo_num,mo_num),tmp_accu_sym_shared(mo_num,mo_num)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3) & + !$OMP SHARED(hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map,tmp_accu_sym_shared, tmp_accu_shared, & + !$OMP t1,t2,t3,t4,t5,t6)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num), tmp_accu_sym(mo_num,mo_num)) + +! Initialization of the arrays + +!$OMP MASTER +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP MASTER +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_sym(p,q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP DO +do s=1,mo_num + do r=1,mo_num + do q=1,mo_num + do p=1,mo_num + hessian(p,q,r,s) = 0d0 + enddo + enddo + enddo +enddo +!$OMP ENDDO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! Without optimization the term 1 of the line 1 is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + +!$OMP DO +do s = 1, mo_num + do p = 1, mo_num + + tmp_accu_sym_shared(p,s) = 0.5d0 * (tmp_accu_shared(p,s) + tmp_accu_shared(s,p)) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do s = 1, mo_num + do p = 1, mo_num + do r = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym_shared(p,s) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + +!$OMP DO +do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym_shared(q,r) = 0.5d0 * (tmp_accu_shared(q,r) + tmp_accu_shared(r,q)) + + enddo +enddo +!OMP END DO + +!$OMP DO +do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym_shared(q,r) + + enddo + enddo +enddo +!OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! Without optimization the third term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + +! enddo +! enddo +! enddo +! enddo + +! We can just re-order the indexes + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! Without optimization the fourth term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using bielectronic integral properties : +! get_two_e_integral(s,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,s,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +! There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +! terms like : hessian(p,r,r,s) + + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3, mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do p = 1, mo_num + do s = 1, mo_num + + tmp_accu_sym(s,p) = 0.5d0 * (tmp_accu(p,s)+tmp_accu(s,p)) + + enddo + enddo + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym(p,s) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6=t5-t4 +print*,'l2 1', t6 +!$OMP END MASTER + +! Line 2, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(q,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,q,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +! There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +! terms like : hessian(s,q,r,s) + + +!****************************** +! Opt Second line, second term +!****************************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3 , mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym(q,r) = 0.5d0 * (tmp_accu(q,r) + tmp_accu(r,q)) + + enddo + enddo + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym(q,r) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(u,v,p,r,mo_integrals_map) = +! get_two_e_integral(p,r,u,v,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +! With v on the external loop, using temporary arrays for each v and by +! taking p,r and q,s as one dimension a matrix multplication +! appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +! Part 1 + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +!-------- +! part 1 +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) +!-------- + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + tmp_bi_int_3(p,r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do s = 1, mo_num + do q = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,q,s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do s = 1, mo_num + + call dgemm('N','N',mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,s),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(p,r,q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With v on the external loop, using temporary arrays for each v and by +! taking q,s and p,r as one dimension a matrix multplication +! appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +! Part 2 + +!-------- +! part 2 +! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +!-------- + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do p = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,p,r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do r = 1, mo_num + call dgemm('N','N', mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,r),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(q,s,p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5 - t4 +print*,'l3 1', t6 +!$OMP END MASTER + +! Line 3, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 1 + +!-------- +! Part 1 +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) +!-------- + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 2 + +!-------- +! Part 2 +!- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) +!-------- + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 3 + +!-------- +! Part 3 +!- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) +!-------- + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 4 + +!-------- +! Part 4 +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +!-------- + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t3 = t2 -t1 +print*,'Time to compute the hessian : ', t3 +!$OMP END MASTER + +! Deallocation of private arrays +! In the omp section ! + +deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'Time for permutations :',t6 +!$OMP END MASTER + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo +endif + +! Deallocation of shared arrays, end + +deallocate(hessian)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine diff --git a/src/mo_optimization/my_providers.irp.f b/src/mo_optimization/my_providers.irp.f new file mode 100644 index 00000000..7469ffd5 --- /dev/null +++ b/src/mo_optimization/my_providers.irp.f @@ -0,0 +1,141 @@ +! Dimensions of MOs + + +BEGIN_PROVIDER [ integer, n_mo_dim ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of MOs we can build, + ! with i>j + END_DOC + + n_mo_dim = mo_num*(mo_num-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_core ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of core MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_core = dim_list_core_orb*(dim_list_core_orb-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_act ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of active MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_act = dim_list_act_orb*(dim_list_act_orb-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_inact ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of inactive MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_inact = dim_list_inact_orb*(dim_list_inact_orb-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_virt ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of virtual MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_virt = dim_list_virt_orb*(dim_list_virt_orb-1)/2 + +END_PROVIDER + +! Energies/criterions + +BEGIN_PROVIDER [ double precision, my_st_av_energy ] + implicit none + BEGIN_DOC + ! State average CI energy + END_DOC + + !call update_st_av_ci_energy(my_st_av_energy) + call state_average_energy(my_st_av_energy) + +END_PROVIDER + +! With all the MOs + +BEGIN_PROVIDER [ double precision, my_gradient_opt, (n_mo_dim) ] +&BEGIN_PROVIDER [ double precision, my_CC1_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map + + call gradient_opt(n_mo_dim, my_gradient_opt, my_CC1_opt, norm_grad) + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, my_hessian_opt, (n_mo_dim, n_mo_dim) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(mo_num, mo_num, mo_num, mo_num)) + + call hessian_list_opt(n_mo_dim, my_hessian_opt, h_f) + +END_PROVIDER + +! With the list of active MOs +! Can be generalized to any mo_class by changing the list/dimension + +BEGIN_PROVIDER [ double precision, my_gradient_list_opt, (n_mo_dim_act) ] +&BEGIN_PROVIDER [ double precision, my_CC2_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map !one_e_dm_mo two_e_dm_mo mo_one_e_integrals + + call gradient_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_list_opt, my_CC2_opt, norm_grad) + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, my_hessian_list_opt, (n_mo_dim_act, n_mo_dim_act) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(dim_list_act_orb, dim_list_act_orb, dim_list_act_orb, dim_list_act_orb)) + + call hessian_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_list_opt, h_f) + +END_PROVIDER diff --git a/src/mo_optimization/orb_opt.irp.f b/src/mo_optimization/orb_opt.irp.f new file mode 100644 index 00000000..71ff9262 --- /dev/null +++ b/src/mo_optimization/orb_opt.irp.f @@ -0,0 +1,22 @@ +! Orbital optimization program + +! This is an optimization program for molecular orbitals. It produces +! orbital rotations in order to lower the energy of a truncated wave +! function. +! This program just optimize the orbitals for a fixed number of +! determinants. This optimization process must be repeated for different +! number of determinants. + + + + +! Main program : orb_opt_trust + + +program orb_opt + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + io_mo_two_e_integrals = 'None' + TOUCH io_mo_two_e_integrals + call run_orb_opt_trust_v2 +end diff --git a/src/mo_optimization/org/TANGLE_org_mode.sh b/src/mo_optimization/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/mo_optimization/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/mo_optimization/org/TODO.org b/src/mo_optimization/org/TODO.org new file mode 100644 index 00000000..960b9ba6 --- /dev/null +++ b/src/mo_optimization/org/TODO.org @@ -0,0 +1,17 @@ +TODO: +** TODO Keep under surveillance the performance of rotation matrix +- is the fix ok ? +** DONE Provider state_average_weight +** DONE Diagonal hessian for orbital optimization with a list of MOs +** DONE Something to force the step cancellation if R.R^T > treshold +** TODO Iterative method to compute the rotation matrix +- doesn't work actually +** DONE Test trust region with polynomial functions +** DONE Optimization/Localization program using the template +** DONE Correction OMP hessian shared/private arrays +** DONE State average energy +** DONE Correction of Rho +** TODO Check the PROVIDE/FREE/TOUCH +** TODO research of lambda without the power 2 +** DONE Clean the OMP sections + diff --git a/src/mo_optimization/org/debug_gradient_list_opt.org b/src/mo_optimization/org/debug_gradient_list_opt.org new file mode 100644 index 00000000..3c6f98c0 --- /dev/null +++ b/src/mo_optimization/org/debug_gradient_list_opt.org @@ -0,0 +1,79 @@ +* Debug the gradient + +*Program to check the gradient* + +The program compares the result of the first and last code for the +gradient. + +Provided: +| mo_num | integer | number of MOs | + +Internal: +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,i,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program +#+END_SRC diff --git a/src/mo_optimization/org/debug_gradient_opt.org b/src/mo_optimization/org/debug_gradient_opt.org new file mode 100644 index 00000000..101e1e8c --- /dev/null +++ b/src/mo_optimization/org/debug_gradient_opt.org @@ -0,0 +1,77 @@ +* Debug the gradient + +*Program to check the gradient* + +The program compares the result of the first and last code for the +gradient. + +Provided: +| mo_num | integer | number of MOs | + +Internal: +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program +#+END_SRC diff --git a/src/mo_optimization/org/debug_hessian_list_opt.org b/src/mo_optimization/org/debug_hessian_list_opt.org new file mode 100644 index 00000000..76e8b337 --- /dev/null +++ b/src/mo_optimization/org/debug_hessian_list_opt.org @@ -0,0 +1,148 @@ +* Debug the hessian + +*Program to check the hessian matrix* + +The program compares the result of the first and last code for the +hessian. First of all the 4D hessian and after the 2D hessian. + +Provided: +| mo_num | integer | number of MOs | +| optimization_method | string | Method for the orbital optimization: | +| | | - 'full' -> full hessian | +| | | - 'diag' -> diagonal hessian | +| dim_list_act_orb | integer | number of active MOs | +| list_act(dim_list_act_orb) | integer | list of the actives MOs | +| | | | + +Internal: +| m | integer | number of MOs in the list | +| | | (active MOs) | +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + ! Deallocation + deallocate(H, H2, h_f, h_f2) + + else + + print*, 'Use the diagonal hessian matrix' + allocate(H(n,1),H2(n,1)) + call diag_hessian_list_opt(n,m,list_act,H) + call first_diag_hessian_list_opt(n,m,list_act,H2) + + H = H - H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do i = 1, n + if (ABS(H(i,1)) > threshold) then + print*, H(i,1) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,1)) > ABS(max_error_H)) then + max_error_H = H(i,1) + endif + + endif + enddo + + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check of the full hessian' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + print*,'' + else + print*,'Check of the diagonal hessian' + endif + + print*,'Nb error_H:', nb_error_H + print*,'Max error_H:', max_error_H + +end program +#+END_SRC diff --git a/src/mo_optimization/org/debug_hessian_opt.org b/src/mo_optimization/org/debug_hessian_opt.org new file mode 100644 index 00000000..40f84c82 --- /dev/null +++ b/src/mo_optimization/org/debug_hessian_opt.org @@ -0,0 +1,172 @@ +* Debug the hessian + +*Program to check the hessian matrix* + +The program compares the result of the first and last code for the +hessian. First of all the 4D hessian and after the 2D hessian. + +Provided: +| mo_num | integer | number of MOs | + +Internal: +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + elseif (optimization_method == 'diag') then + + print*, 'Use the diagonal hessian matrix' + call diag_hessian_opt(n,H,h_f) + call first_diag_hessian_opt(n,H2,h_f2) + + h_f = h_f - h_f2 + max_error = 0d0 + nb_error = 0 + threshold = 1d-12 + + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + + if (ABS(h_f(i,j,k,l)) > threshold) then + + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + + endif + + enddo + enddo + enddo + enddo + + h=H-H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + else + print*,'Unknown optimization_method, please select full, diag' + call abort + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check the full hessian' + else + print*,'Check the diagonal hessian' + endif + + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + print*,'' + print*,'Nb error_H :', nb_error_H + print*,'Max error_H :', max_error_H + + ! Deallocation + deallocate(H,H2,h_f,h_f2) + +end program +#+END_SRC diff --git a/src/mo_optimization/org/diagonal_hessian_list_opt.org b/src/mo_optimization/org/diagonal_hessian_list_opt.org new file mode 100644 index 00000000..a12ca981 --- /dev/null +++ b/src/mo_optimization/org/diagonal_hessian_list_opt.org @@ -0,0 +1,1561 @@ +* Diagonal hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +Here for the diagonal of the hessian it's a little more complicated +than for the hessian. It's not just compute the diagonal terms of the +hessian because of the permutations. + +The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +a diagonal term, if : +p = r and q = s, => (p,q,p,q) +or +q = r and p = s, => (p,q,q,p) + +For that reason, we will use 2D temporary arrays to store the +elements. One for the terms (p,q,p,q) and an other for the terms of +kind (p,q,q,p). We will also use a 1D temporary array to store the +terms of the kind (p,p,p,p) due to the kronoecker delta. + +*Compute the diagonal hessian of energy with respects to orbital +rotations* +By diagonal hessian we mean, diagonal elements of the hessian + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | time to compute the hessian | +| t4,t5,t6 | double precision | time to compute the differ each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +| tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +| tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +| tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +| tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +| tmp_accu_1(mo_num) | double precision | temporary array (private) | +| tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +| tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +| tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +| tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +Function: +| get_two_e_integral | double precision | bi-electronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f +subroutine diag_hessian_list_opt(n, m, list, H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n, m, list(m) + + ! out + double precision, intent(out) :: H(n)!, h_tmpr(m,m,m,m) + + ! internal + !double precision, allocatable :: !hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: tmp_p,tmp_q,tmp_r,tmp_s,tmp_pq,tmp_rs + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- Diagonal_hessian_list_opt---' + + ! Allocation of shared arrays + !allocate(hessian(m,m,m,m))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(m),tmp_h_pqpq(m,m),tmp_h_pqqp(m,m)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,m)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,m)) + allocate(tmp_accu_1_shared(m),tmp_accu_shared(m,m)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n,m, mo_one_e_integrals, one_e_dm_mo, list, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_accu(m,m)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !!$OMP DO + !do tmp_s = 1,m + ! do tmp_r = 1, m + ! do tmp_q = 1, m + ! do tmp_p = 1, m + ! hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqpq(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqqp(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + enddo + enddo + enddo +enddo + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) + = + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) + & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +\begin{align*} +-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 1 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + allocate(tmp_bi_int_3(mo_num, mo_num, m),tmp_2rdm_3(mo_num, mo_num, m)) + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & ++ get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) +& + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & ++ get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) +& + tmp_bi_int_3(u,v,tmp_q) * tmp_2rdm_3(u,v,tmp_q) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +\begin{align*} +\sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +With optimization + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & ++ get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) + = + 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +Using u,v as one variable a matrix multiplication appears. +$$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,tmp_q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,tmp_p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu(tmp_p,tmp_q) + tmp_accu(tmp_q,tmp_p) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & ++ get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) + = + 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + call wall_time(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3,tmp_2rdm_3) + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +\begin{align*} +- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & + - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & + - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) + = + - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + = + - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !---------- + ! Part 1.1 + !---------- + ! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + + allocate(tmp_bi_int_3(m, mo_num, m), tmp_2rdm_3(m, mo_num, m)) + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3(tmp_q,u,tmp_p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_2rdm_3(tmp_q,u,tmp_p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do u = 1, mo_num + do tmp_q = 1, m + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(tmp_q,u,tmp_p) * tmp_2rdm_3(tmp_q,u,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3, tmp_2rdm_3) +#+END_SRC + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !-------- + ! Part 1.2 + !-------- + ! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + + allocate(tmp_bi_int_3(mo_num, m, m),tmp_2rdm_3(mo_num, m, m)) + + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do u = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_bi_int_3(t,tmp_q,tmp_p) = 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p= 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_2rdm_3(t,tmp_q,tmp_p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do tmp_p = 1, m + do t = 1, mo_num + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(t,tmp_q,tmp_p) * tmp_2rdm_3(t,tmp_q,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3,tmp_2rdm_3) +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & + - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) + = + - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !---------- + ! Part 2.1 + !---------- + ! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + ! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,tmp_q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP DO + do tmp_p = 1, m + do tmp_q = 1, m + + tmp_h_pqqp(tmp_q,tmp_p) = tmp_h_pqqp(tmp_q,tmp_p) - tmp_accu(tmp_q,tmp_p) - tmp_accu(tmp_p,tmp_q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !-------- + ! Part 2.2 + !-------- + ! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + ! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,tmp_q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) - tmp_accu(tmp_p,tmp_q) - tmp_accu(tmp_q,tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the OMP section ! +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + deallocate(tmp_accu) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !!$OMP DO + !do tmp_p = 1, m + ! hessian(tmp_p,tmp_p,tmp_p,tmp_p) = hessian(tmp_p,tmp_p,tmp_p,tmp_p) + tmp_h_pppp(tmp_p) + !enddo + !!$OMP END DO + + !!$OMP DO + !do tmp_q = 1, m + ! do tmp_p = 1, m + ! hessian(tmp_p,tmp_q,tmp_p,tmp_q) = hessian(tmp_p,tmp_q,tmp_p,tmp_q) + tmp_h_pqpq(tmp_p,tmp_q) + ! enddo + !enddo + !!$OMP END DO + ! + !!$OMP DO + !do tmp_q = 1, m + ! do tmp_p = 1, m + ! hessian(tmp_p,tmp_q,tmp_q,tmp_p) = hessian(tmp_p,tmp_q,tmp_q,tmp_p) + tmp_h_pqqp(tmp_p,tmp_q) + ! enddo + !enddo + !!$OMP END DO + + !!$OMP DO + !do tmp_s = 1, m + ! do tmp_r = 1, m + ! do tmp_q = 1, m + ! do tmp_p = 1, m + + ! h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & + ! - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !!$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + !if (debug) then + ! print*,'2D diag Hessian matrix' + ! do tmp_pq = 1, n + ! write(*,'(100(F10.5))') H(tmp_pq,:) + ! enddo + !endif +#+END_SRC + +** Deallocation of shared arrays, end + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---End diagonal_hessian_list_opt---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/diagonal_hessian_opt.org b/src/mo_optimization/org/diagonal_hessian_opt.org new file mode 100644 index 00000000..efd75065 --- /dev/null +++ b/src/mo_optimization/org/diagonal_hessian_opt.org @@ -0,0 +1,1516 @@ +* Diagonal hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +Here for the diagonal of the hessian it's a little more complicated +than for the hessian. It's not just compute the diagonal terms of the +hessian because of the permutations. + +The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +a diagonal term, if : +p = r and q = s, => (p,q,p,q) +or +q = r and p = s, => (p,q,q,p) + +For that reason, we will use 2D temporary arrays to store the +elements. One for the terms (p,q,p,q) and an other for the terms of +kind (p,q,q,p). We will also use a 1D temporary array to store the +terms of the kind (p,p,p,p) due to the kronoecker delta. + +*Compute the diagonal hessian of energy with respects to orbital +rotations* +By diagonal hessian we mean, diagonal elements of the hessian + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | time to compute the hessian | +| t4,t5,t6 | double precision | time to compute the differ each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +| tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +| tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +| tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +| tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +| tmp_accu_1(mo_num) | double precision | temporary array (private) | +| tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +| tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +| tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +| tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +Function: +| get_two_e_integral | double precision | bi-electronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f +subroutine diag_hessian_opt(n,H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n)!,n), h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + !double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: istate + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- diagonal hessian---' + print*,'Use the diagonal hessian' + + ! Allocation of shared arrays + !allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(mo_num),tmp_h_pqpq(mo_num,mo_num),tmp_h_pqqp(mo_num,mo_num)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_accu_1_shared(mo_num),tmp_accu_shared(mo_num,mo_num)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n, mo_one_e_integrals, one_e_dm_mo, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num),tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !!$OMP DO + !do s = 1,mo_num + ! do r = 1, mo_num + ! do q = 1, mo_num + ! do p = 1, mo_num + ! hessian(p,q,r,s) = 0d0 + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO + + !$OMP DO + do p = 1, mo_num + tmp_h_pppp(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqpq(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqqp(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + enddo + enddo + enddo +enddo + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) + = + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +\begin{align*} +-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 1 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do p =1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + & + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & ++ get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) +& + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & ++ get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1,mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) +& + tmp_bi_int_3(u,v,q) * tmp_2rdm_3(u,v,q) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +\begin{align*} +\sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +With optimization + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & ++ get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) + = + 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +Using u,v as one variable a matrix multiplication appears. +$$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu(p,q) + tmp_accu(q,p) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & ++ get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) + = + 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + call wall_time(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +\begin{align*} +- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & + - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & + - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) + = + - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + = + - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !---------- + ! Part 1.1 + !---------- + ! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,u,p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_2rdm_3(q,u,p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(q,u,p) * tmp_2rdm_3(q,u,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !-------- + ! Part 1.2 + !-------- + ! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do u = 1, mo_num + + do p = 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3(t,q,p) = 2d0*get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do p= 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3(t,q,p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + do t = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(t,q,p) * tmp_2rdm_3(t,q,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & + - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) + = + - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !---------- + ! Part 2.1 + !---------- + ! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + ! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + + !$OMP DO + do q = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP DO + do p = 1, mo_num + do q = 1, mo_num + + tmp_h_pqqp(q,p) = tmp_h_pqqp(q,p) - tmp_accu(q,p) - tmp_accu(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !-------- + ! Part 2.2 + !-------- + ! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + ! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) - tmp_accu(p,q) - tmp_accu(q,p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the OMP section ! +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + deallocate(tmp_2rdm_3,tmp_bi_int_3) + deallocate(tmp_accu) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !!$OMP DO + !do p = 1, mo_num + ! hessian(p,p,p,p) = hessian(p,p,p,p) + tmp_h_pppp(p) + !enddo + !!$OMP END DO + + !!$OMP DO + !do q = 1, mo_num + ! do p = 1, mo_num + ! hessian(p,q,p,q) = hessian(p,q,p,q) + tmp_h_pqpq(p,q) + ! enddo + !enddo + !!$OMP END DO + ! + !!$OMP DO + !do q = 1, mo_num + ! do p = 1, mo_num + ! hessian(p,q,q,p) = hessian(p,q,q,p) + tmp_h_pqqp(p,q) + ! enddo + !enddo + !!$OMP END DO + + !!$OMP DO + !do s = 1, mo_num + ! do r = 1, mo_num + ! do q = 1, mo_num + ! do p = 1, mo_num + + ! h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !!$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + !if (debug) then + ! print*,'2D diag Hessian matrix' + ! do pq = 1, n + ! write(*,'(100(F10.5))') H(pq,:) + ! enddo + !endif +#+END_SRC + +** Deallocation of shared arrays, end + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---diagonal_hessian' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/diagonalization_hessian.org b/src/mo_optimization/org/diagonalization_hessian.org new file mode 100644 index 00000000..5eed7dd5 --- /dev/null +++ b/src/mo_optimization/org/diagonalization_hessian.org @@ -0,0 +1,138 @@ +* Diagonalization of the hessian + +Just a matrix diagonalization using Lapack + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| H(n,n) | double precision | hessian | + +Output: +| e_val(n) | double precision | eigenvalues of the hessian | +| w(n,n) | double precision | eigenvectors of the hessian | + +Internal: +| nb_negative_nv | integer | number of negative eigenvalues | +| lwork | integer | for Lapack | +| work(lwork,n) | double precision | temporary array for Lapack | +| info | integer | if 0 -> ok, else problem in the diagonalization | +| i,j | integer | dummy indexes | + +#+BEGIN_SRC f90 :comments org :tangle diagonalization_hessian.irp.f +subroutine diagonalization_hessian(n,H,e_val,w) + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: H(n,n) + + ! out + double precision, intent(out) :: e_val(n), w(n,n) + + ! internal + double precision, allocatable :: work(:,:) + integer, allocatable :: key(:) + integer :: info,lwork + integer :: i,j + integer :: nb_negative_vp + double precision :: t1,t2,t3,max_elem + + print*,'' + print*,'---Diagonalization_hessian---' + + call wall_time(t1) + + if (optimization_method == 'full') then + ! Allocation + ! For Lapack + lwork=3*n-1 + + allocate(work(lwork,n)) + + ! Calculation + + ! Copy the hessian matrix, the eigenvectors will be store in W + W=H + + ! Diagonalization of the hessian + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info) + + if (info /= 0) then + print*, 'Error diagonalization : diagonalization_hessian' + print*, 'info = ', info + call ABORT + endif + + if (debug) then + print *, 'vp Hess:' + write(*,'(100(F10.5))') real(e_val(:)) + endif + + ! Number of negative eigenvalues + max_elem = 0d0 + nb_negative_vp = 0 + do i = 1, n + if (e_val(i) < 0d0) then + nb_negative_vp = nb_negative_vp + 1 + if (e_val(i) < max_elem) then + max_elem = e_val(i) + endif + !print*,'e_val < 0 :', e_val(i) + endif + enddo + print*,'Number of negative eigenvalues:', nb_negative_vp + print*,'Lowest eigenvalue:',max_elem + + !nb_negative_vp = 0 + !do i = 1, n + ! if (e_val(i) < -thresh_eig) then + ! nb_negative_vp = nb_negative_vp + 1 + ! endif + !enddo + !print*,'Number of negative eigenvalues <', -thresh_eig,':', nb_negative_vp + + ! Deallocation + deallocate(work) + + elseif (optimization_method == 'diag') then + ! Diagonalization of the diagonal hessian by hands + allocate(key(n)) + + do i = 1, n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, n) + + ! Eigenvectors + W = 0d0 + do i = 1, n + j = key(i) + W(j,i) = 1d0 + enddo + + deallocate(key) + else + print*,'Diagonalization_hessian, abort' + call abort + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in diagonalization_hessian:', t3 + + print*,'---End diagonalization_hessian---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/first_diagonal_hessian_list_opt.org b/src/mo_optimization/org/first_diagonal_hessian_list_opt.org new file mode 100644 index 00000000..391c6033 --- /dev/null +++ b/src/mo_optimization/org/first_diagonal_hessian_list_opt.org @@ -0,0 +1,376 @@ +* First diagonal hessian + +#+BEGIN_SRC f90 :comments :tangle first_diagonal_hessian_list_opt.irp.f +subroutine first_diag_hessian_list_opt(tmp_n,m,list,H)!, h_tmpr) + + include 'constants.h' + + implicit none + + !=========================================================================== + ! Compute the diagonal hessian of energy with respects to orbital rotations + !=========================================================================== + + !=========== + ! Variables + !=========== + + ! in + integer, intent(in) :: tmp_n, m, list(m) + ! tmp_n : integer, tmp_n = m*(m-1)/2 + + ! out + double precision, intent(out) :: H(tmp_n)!, h_tmpr(m,m,m,m) + ! H : n by n double precision matrix containing the 2D hessian + + ! internal + double precision, allocatable :: hessian(:,:,:,:), tmp(:,:),h_tmpr(:,:,:,:) + integer :: p,q, tmp_p,tmp_q + integer :: r,s,t,u,v,tmp_r,tmp_s,tmp_t,tmp_u,tmp_v + integer :: pq,rs,tmp_pq,tmp_rs + double precision :: t1,t2,t3 + ! hessian : mo_num 4D double precision matrix containing the hessian before the permutations + ! h_tmpr : mo_num 4D double precision matrix containing the hessian after the permutations + ! p,q,r,s : integer, indexes of the 4D hessian matrix + ! t,u,v : integer, indexes to compute hessian elements + ! pq,rs : integer, indexes for the conversion from 4D to 2D hessian matrix + ! t1,t2,t3 : double precision, t3 = t2 - t1, time to compute the hessian + + ! Function + double precision :: get_two_e_integral + ! get_two_e_integral : double precision function, two e integrals + + ! Provided : + ! mo_one_e_integrals : mono e- integrals + ! get_two_e_integral : two e- integrals + ! one_e_dm_mo_alpha, one_e_dm_mo_beta : one body density matrix + ! two_e_dm_mo : two body density matrix + + print*,'---first_diag_hess_list---' + + !============ + ! Allocation + !============ + + allocate(hessian(m,m,m,m),tmp(tmp_n,tmp_n),h_tmpr(mo_num,mo_num,mo_num,mo_num)) + + !============= + ! Calculation + !============= + + ! From Anderson et. al. (2014) + ! The Journal of Chemical Physics 141, 244104 (2014); doi: 10.1063/1.4904384 + + ! LaTeX formula : + + !\begin{align*} + !H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + !&= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + !+ \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_u^r)] + !-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + !&+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} +v_{uv}^{st} \Gamma_{pt}^{uv}) + !+ \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + !&+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{ps}^{uv}) \\ + !&- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) + !\end{align*} + + !================ + ! Initialization + !================ + hessian = 0d0 + + CALL wall_time(t1) + + !======================== + ! First line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================= + ! First line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! First line, third term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Second line, first term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================== + ! Second line, second term + !========================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! Third line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Third line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo + enddo + + CALL wall_time(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + + !============== + ! Permutations + !============== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + + h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & + - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p vector, transformation +In addition there is a permutation in the gradient formula : +\begin{equation} +P_{pq} = 1 - (p <-> q) +\end{equation} + +We need a vector to use the gradient. Here the gradient is a +antisymetric matrix so we can transform it in a vector of length +mo_num*(mo_num-1)/2. + +Here we do these two things at the same time. + +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) + enddo + + ! Debug, diplay the vector containing the gradient elements + if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) + endif +#+END_SRC + +*** Norm of the gradient +The norm can be useful. +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + norm = dnrm2(n,v_grad,1) + print*, 'Gradient norm : ', norm +#+END_SRC + +*** Maximum element in the gradient +The maximum element in the gradient is very important for the +convergence criterion of the Newton method. + +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + ! Max element of the gradient + max_elem = 0d0 + do i = 1, n + if (DABS(v_grad(i)) > DABS(max_elem)) then + max_elem = v_grad(i) + endif + enddo + + print*,'Max element in the gradient :', max_elem + + ! Debug, display the matrix containting the gradient elements + if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,m + do p=1,m + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, m + write(*,'(100(F10.5))') A(i,1:m) + enddo + endif +#+END_SRC + +*** Deallocation of shared arrays and end +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + deallocate(grad,A, tmp_mo_one_e_integrals,tmp_one_e_dm_mo) + + print*,'---End gradient---' + + end subroutine + +#+END_SRC + diff --git a/src/mo_optimization/org/gradient_opt.org b/src/mo_optimization/org/gradient_opt.org new file mode 100644 index 00000000..45c761e9 --- /dev/null +++ b/src/mo_optimization/org/gradient_opt.org @@ -0,0 +1,358 @@ +* Gradient + +The gradient of the CI energy with respects to the orbital rotation +is: +(C-c C-x C-l) +$$ +G(p,q) = \mathcal{P}_{pq} \left[ \sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +\sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +\right] +$$ + + +$$ +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +$$ + +$$ +G(p,q) = \left[ +\sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +\sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +\right] - +\left[ +\sum_r (h_q^r \gamma_r^p - h_r^p \gamma_q^r) + +\sum_{rst}(v_{qt}^{rs} \Gamma_{rs}^{pt} - v_{rs}^{pt} +\Gamma_{qt}^{rs}) +\right] +$$ + +Where p,q,r,s,t are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +The gradient is a mo_num by mo_num matrix, p,q,r,s,t take all the +values between 1 and mo_num (1 and mo_num include). + +To do that we compute $$G(p,q)$$ for all the pairs (p,q). + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo +E. Scuseria + +*Compute the gradient of energy with respects to orbital rotations* + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono_electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix | +| two_e_dm_mo(mo_num,mo_num,mo_num,mo_num) | double precision | two e- density matrix | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| v_grad(n) | double precision | the gradient | +| max_elem | double precision | maximum element of the gradient | + +Internal: +| grad(mo_num,mo_num) | double precison | gradient before the tranformation in a vector | +| A((mo_num,mo_num) | doubre precision | gradient after the permutations | +| norm | double precision | norm of the gradient | +| p, q | integer | indexes of the element in the matrix grad | +| i | integer | index for the tranformation in a vector | +| r, s, t | integer | indexes dor the sums | +| t1, t2, t3 | double precision | t3 = t2 - t1, time to compute the gradient | +| t4, t5, t6 | double precission | t6 = t5 - t4, time to compute each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bi-electronic integrals | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the two e- density matrix | +| tmp_accu(mo_num,mo_num) | double precision | temporary array | + +Function: +| get_two_e_integral | double precision | bi-electronic integrals | +| dnrm2 | double precision | (Lapack) norm | + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f +subroutine gradient_opt(n,v_grad,max_elem) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: v_grad(n), max_elem + + ! internal + double precision, allocatable :: grad(:,:),A(:,:) + double precision :: norm + integer :: i,p,q,r,s,t + double precision :: t1,t2,t3,t4,t5,t6 + + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:) + + ! Functions + double precision :: get_two_e_integral, dnrm2 + + + print*,'' + print*,'---gradient---' + + ! Allocation of shared arrays + allocate(grad(mo_num,mo_num),A(mo_num,mo_num)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s,t, & + !$OMP tmp_accu, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(grad, one_e_dm_mo, mo_num,mo_one_e_integrals, & + !$OMP mo_integrals_map,t4,t5,t6) & + !$OMP DEFAULT(SHARED) + + ! Allocation of private arrays + allocate(tmp_accu(mo_num,mo_num)) + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num)) +#+END_SRC + +** Calculation +*** Initialization +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + !$OMP DO + do q = 1, mo_num + do p = 1,mo_num + grad(p,q) = 0d0 + enddo + enddo + !$OMP END DO +#+END_SRC + +*** Term 1 + +Without optimization the term 1 is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + grad(p,q) = grad(p,q) & + + mo_one_e_integrals(p,r) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(r,q) * one_e_dm_mo(p,r) + enddo + enddo +enddo + +Since the matrix multiplication A.B is defined like : +\begin{equation} +c_{ij} = \sum_k a_{ik}.b_{kj} +\end{equation} +The previous equation can be rewritten as a matrix multplication + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + !**************** + ! Opt first term + !**************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('N','N',mo_num,mo_num,mo_num,1d0,mo_one_e_integrals,& + mo_num,one_e_dm_mo,mo_num,0d0,tmp_accu,mo_num) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + (tmp_accu(p,q) - tmp_accu(q,p)) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'Gradient, first term (s) :', t6 + !$OMP END MASTER +#+END_SRC + +*** Term 2 + +Without optimization the second term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + do t= 1, mo_num + + grad(p,q) = grad(p,q) & + + get_two_e_integral(p,t,r,s,mo_integrals_map) * two_e_dm_mo(r,s,q,t) & + - get_two_e_integral(r,s,q,t,mo_integrals_map) * two_e_dm_mo(p,t,r,s) + enddo + enddo + enddo + enddo +enddo + +Using the bielectronic integral properties : +get_two_e_integral(p,t,r,s,mo_integrals_map) = get_two_e_integral(r,s,p,t,mo_integrals_map) + +Using the two body matrix properties : +two_e_dm_mo(p,t,r,s) = two_e_dm_mo(r,s,p,t) + +t is one the right, we can put it on the external loop and create 3 +indexes temporary array +r,s can be seen as one index + +By doing so, a matrix multiplication appears + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + !***************** + ! Opt second term + !***************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_bi_int_3(r,s,p) = get_two_e_integral(r,s,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_2rdm_3(r,s,q) = two_e_dm_mo(r,s,q,t) + + enddo + enddo + enddo + + call dgemm('T','N',mo_num,mo_num,mo_num*mo_num,1d0,tmp_bi_int_3,& + mo_num*mo_num,tmp_2rdm_3,mo_num*mo_num,0d0,tmp_accu,mo_num) + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + tmp_accu(p,q) - tmp_accu(q,p) + + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'Gradient second term (s) : ', t6 + !$OMP END MASTER +#+END_SRC + +*** Deallocation of private arrays +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + deallocate(tmp_bi_int_3,tmp_2rdm_3,tmp_accu) + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) +#+END_SRC + +*** Permutation, 2D matrix -> vector, transformation +In addition there is a permutation in the gradient formula : +\begin{equation} +P_{pq} = 1 - (p <-> q) +\end{equation} + +We need a vector to use the gradient. Here the gradient is a +antisymetric matrix so we can transform it in a vector of length +mo_num*(mo_num-1)/2. + +Here we do these two things at the same time. + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) + enddo + + ! Debug, diplay the vector containing the gradient elements + if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) + endif +#+END_SRC + +*** Norm of the gradient +The norm can be useful. +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + norm = dnrm2(n,v_grad,1) + print*, 'Gradient norm : ', norm +#+END_SRC + +*** Maximum element in the gradient +The maximum element in the gradient is very important for the +convergence criterion of the Newton method. + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + ! Max element of the gradient + max_elem = 0d0 + do i = 1, n + if (ABS(v_grad(i)) > ABS(max_elem)) then + max_elem = v_grad(i) + endif + enddo + + print*,'Max element in the gradient :', max_elem + + ! Debug, display the matrix containting the gradient elements + if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,mo_num + do p=1,mo_num + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, mo_num + write(*,'(100(F10.5))') A(i,1:mo_num) + enddo + endif +#+END_SRC + +*** Deallocation of shared arrays and end +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + deallocate(grad,A) + + print*,'---End gradient---' + + end subroutine + +#+END_SRC + diff --git a/src/mo_optimization/org/hessian_list_opt.org b/src/mo_optimization/org/hessian_list_opt.org new file mode 100644 index 00000000..3df461cf --- /dev/null +++ b/src/mo_optimization/org/hessian_list_opt.org @@ -0,0 +1,1141 @@ +* Hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +values between 1 and mo_num (1 and mo_num include). + +To do that we compute all the pairs (pq,rs) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +*Compute the hessian of energy with respects to orbital rotations* + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +| t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +| ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +| tmp_accu(mo_num,mo_num) | double precision | temporary array | +| tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +Function: +| get_two_e_integral | double precision | bielectronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f +subroutine hessian_list_opt(n,m,list,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,m,list(m) + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(m,m,m,m) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q,tmp_p,tmp_q,tmp_r,tmp_s + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:),ind_3_3(:,:,:) + double precision, allocatable :: tmp_bi_int_3_3(:,:,:), tmp_2rdm_3_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:),tmp_one_e_dm_mo(:,:),tmp_mo_one_e_integrals(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(m,m,m,m),tmp_one_e_dm_mo(mo_num,m),tmp_mo_one_e_integrals(mo_num,m)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s,p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3, tmp_bi_int_3_3,tmp_2rdm_3_3, ind_3_3 ) & + !$OMP SHARED(m,list,hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map, & + !$OMP t1,t2,t3,t4,t5,t6,& + !$OMP tmp_mo_one_e_integrals,tmp_one_e_dm_mo)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(m,m), tmp_accu_sym(mo_num,mo_num)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP MASTER + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_sym(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP DO + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 + enddo + enddo + enddo + enddo + !$OMP ENDDO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +Without optimization the term 1 of the line 1 is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + tmp_mo_one_e_integrals(u,tmp_p) = mo_one_e_integrals(u,p) + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_s = 1, m + s = list(tmp_s) + do u = 1, mo_num + tmp_one_e_dm_mo(u,tmp_s) = one_e_dm_mo(u,s) + enddo + enddo + !$OMP END DO + + + call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + + !$OMP DO + do tmp_s = 1, m + do tmp_p = 1, m + + tmp_accu_sym(tmp_p,tmp_s) = 0.5d0 * (tmp_accu(tmp_p,tmp_s) + tmp_accu(tmp_s,tmp_p)) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_s = 1, m + do tmp_p = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + + !$OMP DO + do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo + enddo + !OMP END DO + + !$OMP DO + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo + enddo + !OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +Without optimization the third term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + + enddo + enddo + enddo +enddo + +We can just re-order the indexes + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_s = 1, m + s = list(tmp_s) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER + +#+END_SRC + +** Line 2, term 1 + +Without optimization the fourth term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using bielectronic integral properties : +get_two_e_integral(s,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,s,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +terms like : hessian(p,r,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3, mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_p = 1, m + do tmp_s = 1, m + + tmp_accu_sym(tmp_s,tmp_p) = 0.5d0 * (tmp_accu(tmp_p,tmp_s)+tmp_accu(tmp_s,tmp_p)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_p = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6=t5-t4 + print*,'l2 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(q,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,q,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +terms like : hessian(s,q,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !****************************** + ! Opt Second line, second term + !****************************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + + + !$OMP DO + do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3 , mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(u,v,p,r,mo_integrals_map) = +get_two_e_integral(p,r,u,v,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +With v on the external loop, using temporary arrays for each v and by +taking p,r and q,s as one dimension a matrix multplication +appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + !-------- + ! part 1 + ! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) + !-------- + + allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_bi_int_3_3(tmp_p,tmp_r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_q,tmp_s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do tmp_s = 1, m + + call dgemm('N','N',m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_s),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_p,tmp_r,tmp_q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) +#+END_SRC + +With v on the external loop, using temporary arrays for each v and by +taking q,s and p,r as one dimension a matrix multplication +appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! part 2 + ! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + !-------- + + allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3_3(tmp_q,tmp_s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_p,tmp_r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do tmp_r = 1, m + + call dgemm('N','N', m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_r),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_q,tmp_s,tmp_p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5 - t4 + print*,'l3 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + enddo + enddo + enddo +enddo + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 1 + ! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) + !-------- + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 2 + !- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) + !-------- + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 3 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 3 + !- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) + !-------- + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 4 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 4 + ! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + !-------- + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t3 = t2 -t1 + print*,'Time to compute the hessian : ', t3 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the omp section ! +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do s = 1, m + do r = 1, m + do q = 1, m + do p = 1, m + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'Time for permutations :',t6 + !$OMP END MASTER +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo + endif +#+END_SRC + +** Deallocation of shared arrays, end +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + deallocate(hessian,tmp_one_e_dm_mo,tmp_mo_one_e_integrals)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/hessian_opt.org b/src/mo_optimization/org/hessian_opt.org new file mode 100644 index 00000000..5b0642e3 --- /dev/null +++ b/src/mo_optimization/org/hessian_opt.org @@ -0,0 +1,1056 @@ +* Hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +values between 1 and mo_num (1 and mo_num include). + +To do that we compute all the pairs (pq,rs) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +*Compute the hessian of energy with respects to orbital rotations* + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +| t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +| ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +| tmp_accu(mo_num,mo_num) | double precision | temporary array | +| tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +Function: +| get_two_e_integral | double precision | bielectronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f +subroutine hessian_opt(n,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:), tmp_accu_shared(:,:),tmp_accu_sym_shared(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_accu_shared(mo_num,mo_num),tmp_accu_sym_shared(mo_num,mo_num)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3) & + !$OMP SHARED(hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map,tmp_accu_sym_shared, tmp_accu_shared, & + !$OMP t1,t2,t3,t4,t5,t6)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num), tmp_accu_sym(mo_num,mo_num)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP MASTER + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_sym(p,q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP DO + do s=1,mo_num + do r=1,mo_num + do q=1,mo_num + do p=1,mo_num + hessian(p,q,r,s) = 0d0 + enddo + enddo + enddo + enddo + !$OMP ENDDO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +Without optimization the term 1 of the line 1 is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + + !$OMP DO + do s = 1, mo_num + do p = 1, mo_num + + tmp_accu_sym_shared(p,s) = 0.5d0 * (tmp_accu_shared(p,s) + tmp_accu_shared(s,p)) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do s = 1, mo_num + do p = 1, mo_num + do r = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym_shared(p,s) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + + !$OMP DO + do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym_shared(q,r) = 0.5d0 * (tmp_accu_shared(q,r) + tmp_accu_shared(r,q)) + + enddo + enddo + !OMP END DO + + !$OMP DO + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym_shared(q,r) + + enddo + enddo + enddo + !OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +Without optimization the third term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + + enddo + enddo + enddo +enddo + +We can just re-order the indexes + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER + +#+END_SRC + +** Line 2, term 1 + +Without optimization the fourth term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using bielectronic integral properties : +get_two_e_integral(s,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,s,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +terms like : hessian(p,r,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3, mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do p = 1, mo_num + do s = 1, mo_num + + tmp_accu_sym(s,p) = 0.5d0 * (tmp_accu(p,s)+tmp_accu(s,p)) + + enddo + enddo + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym(p,s) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6=t5-t4 + print*,'l2 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(q,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,q,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +terms like : hessian(s,q,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !****************************** + ! Opt Second line, second term + !****************************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3 , mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym(q,r) = 0.5d0 * (tmp_accu(q,r) + tmp_accu(r,q)) + + enddo + enddo + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym(q,r) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(u,v,p,r,mo_integrals_map) = +get_two_e_integral(p,r,u,v,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +With v on the external loop, using temporary arrays for each v and by +taking p,r and q,s as one dimension a matrix multplication +appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + !-------- + ! part 1 + ! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) + !-------- + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + tmp_bi_int_3(p,r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do s = 1, mo_num + do q = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,q,s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do s = 1, mo_num + + call dgemm('N','N',mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,s),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(p,r,q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + +#+END_SRC + +With v on the external loop, using temporary arrays for each v and by +taking q,s and p,r as one dimension a matrix multplication +appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! part 2 + ! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + !-------- + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do p = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,p,r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do r = 1, mo_num + call dgemm('N','N', mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,r),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(q,s,p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5 - t4 + print*,'l3 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + enddo + enddo + enddo +enddo + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 1 + ! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) + !-------- + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 2 + !- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) + !-------- + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 3 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 3 + !- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) + !-------- + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 4 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 4 + ! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + !-------- + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t3 = t2 -t1 + print*,'Time to compute the hessian : ', t3 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the omp section ! +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'Time for permutations :',t6 + !$OMP END MASTER +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo + endif +#+END_SRC + +** Deallocation of shared arrays, end +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + deallocate(hessian)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/my_providers.org b/src/mo_optimization/org/my_providers.org new file mode 100644 index 00000000..b88cbd62 --- /dev/null +++ b/src/mo_optimization/org/my_providers.org @@ -0,0 +1,308 @@ +* Providers +** Dimensions of MOs + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of MOs we can build, + ! with i>j + END_DOC + + n_mo_dim = mo_num*(mo_num-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_core ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of core MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_core = dim_list_core_orb*(dim_list_core_orb-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_act ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of active MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_act = dim_list_act_orb*(dim_list_act_orb-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_inact ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of inactive MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_inact = dim_list_inact_orb*(dim_list_inact_orb-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_virt ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of virtual MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_virt = dim_list_virt_orb*(dim_list_virt_orb-1)/2 + +END_PROVIDER +#+END_SRC + +** Energies/criterions +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_st_av_energy ] + implicit none + BEGIN_DOC + ! State average CI energy + END_DOC + + !call update_st_av_ci_energy(my_st_av_energy) + call state_average_energy(my_st_av_energy) + +END_PROVIDER +#+END_SRC + +** Gradient/hessian +*** Orbital optimization +**** With all the MOs +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_opt, (n_mo_dim) ] +&BEGIN_PROVIDER [ double precision, my_CC1_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map + + call gradient_opt(n_mo_dim, my_gradient_opt, my_CC1_opt, norm_grad) + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_opt, (n_mo_dim, n_mo_dim) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(mo_num, mo_num, mo_num, mo_num)) + + call hessian_list_opt(n_mo_dim, my_hessian_opt, h_f) + +END_PROVIDER +#+END_SRC + +**** With the list of active MOs +Can be generalized to any mo_class by changing the list/dimension +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_list_opt, (n_mo_dim_act) ] +&BEGIN_PROVIDER [ double precision, my_CC2_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map !one_e_dm_mo two_e_dm_mo mo_one_e_integrals + + call gradient_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_list_opt, my_CC2_opt, norm_grad) + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_list_opt, (n_mo_dim_act, n_mo_dim_act) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(dim_list_act_orb, dim_list_act_orb, dim_list_act_orb, dim_list_act_orb)) + + call hessian_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_list_opt, h_f) + +END_PROVIDER +#+END_SRC + +*** Orbital localization +**** Gradient +***** Core MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_core, (n_mo_dim_core) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_core ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the core MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_core, dim_list_core_orb, list_core, my_gradient_loc_core, my_CC_loc_core , norm_grad) + +END_PROVIDER +#+END_SRC + +***** Active MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_act, (n_mo_dim_act) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_act ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the active MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_loc_act, my_CC_loc_act , norm_grad) + +END_PROVIDER +#+END_SRC + +***** Inactive MOs +#+BEGIN_SRC f90 :comments org ! +:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_inact, (n_mo_dim_inact) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_inact ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the inactive MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_inact, dim_list_inact_orb, list_inact, my_gradient_loc_inact, my_CC_loc_inact , norm_grad) + +END_PROVIDER +#+END_SRC + +***** Virtual MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_virt, (n_mo_dim_virt) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_virt ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the virtual MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_virt, dim_list_virt_orb, list_virt, my_gradient_loc_virt, my_CC_loc_virt , norm_grad) + +END_PROVIDER +#+END_SRC + +**** Hessian +***** Core MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_core, (n_mo_dim_core) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the core MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_core, dim_list_core_orb, list_core, my_hessian_loc_core) + +END_PROVIDER +#+END_SRC + +***** Active MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_act, (n_mo_dim_act) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the active MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_loc_act) + +END_PROVIDER +#+END_SRC + +***** Inactive MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_inact, (n_mo_dim_inact) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the inactive MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_inact, dim_list_inact_orb, list_inact, my_hessian_loc_inact) + +END_PROVIDER +#+END_SRC + +***** Virtual MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_virt, (n_mo_dim_virt) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the virtual MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_virt, dim_list_virt_orb, list_virt, my_hessian_loc_virt) + +END_PROVIDER +#+END_SRC + diff --git a/src/mo_optimization/org/optimization.org b/src/mo_optimization/org/optimization.org new file mode 100644 index 00000000..cbb7b752 --- /dev/null +++ b/src/mo_optimization/org/optimization.org @@ -0,0 +1,91 @@ +#+BEGIN_SRC f90 :comments org :tangle optimization.irp.f +program optimization + + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + call run_optimization + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle optimization.irp.f +subroutine run_optimization + + implicit none + + double precision :: e_cipsi, e_opt, delta_e + integer :: nb_iter,i + logical :: not_converged + character (len=100) :: filename + + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + + not_converged = .True. + nb_iter = 0 + + ! To start from the wf + N_det_max = max(n_det,5) + TOUCH N_det_max + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt') + write(10,*) " Ndet E_cipsi E_opt Delta_e" + call state_average_energy(e_cipsi) + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0 + close(10) + + do while (not_converged) + print*,'' + print*,'======================' + print*,' Cipsi step:', nb_iter + print*,'======================' + print*,'' + print*,'********** cipsi step **********' + ! cispi calculation + call run_stochastic_cipsi + + ! State average energy after the cipsi step + call state_average_energy(e_cipsi) + + print*,'' + print*,'********** optimization step **********' + ! orbital optimization + call run_orb_opt_trust_v2 + + ! State average energy after the orbital optimization + call state_average_energy(e_opt) + + print*,'' + print*,'********** diff step **********' + ! Gain in energy + delta_e = e_opt - e_cipsi + print*, 'Gain in energy during the orbital optimization:', delta_e + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append') + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e + close(10) + + ! Exit + if (delta_e > 1d-12) then + print*, 'WARNING, something wrong happened' + print*, 'The gain (delta_e) in energy during the optimization process' + print*, 'is > 0, but it must be < 0' + print*, 'The program will exit' + exit + endif + + if (n_det > n_det_max_opt) then + print*, 'The number of determinants in the wf > n_det_max_opt' + print*, 'The program will exit' + exit + endif + + ! To double the number of determinants in the wf + N_det_max = int(dble(n_det * 2)*0.9) + TOUCH N_det_max + + nb_iter = nb_iter + 1 + enddo + +end + +#+END_SRC diff --git a/src/mo_optimization/org/orb_opt_trust_v2.org b/src/mo_optimization/org/orb_opt_trust_v2.org new file mode 100644 index 00000000..dbcd3c19 --- /dev/null +++ b/src/mo_optimization/org/orb_opt_trust_v2.org @@ -0,0 +1,349 @@ +* Orbital optimization program + +This is an optimization program for molecular orbitals. It produces +orbital rotations in order to lower the energy of a truncated wave +function. +This program just optimize the orbitals for a fixed number of +determinants. This optimization process must be repeated for different +number of determinants. + +#+BEGIN_SRC f90 :comments org :tangle orb_opt.irp.f +#+END_SRC + +* Main program : orb_opt_trust + +#+BEGIN_SRC f90 :comments org :tangle orb_opt.irp.f +program orb_opt + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + io_mo_two_e_integrals = 'None' + TOUCH io_mo_two_e_integrals + call run_orb_opt_trust_v2 +end +#+END_SRC + +* Subroutine : run_orb_opt_trust + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + +#+END_SRC + +Subroutine to optimize the MOs using a trust region algorithm: +- choice of the method +- initialization +- optimization until convergence + +The optimization use the trust region algorithm, the different parts +are explained in the corresponding subroutine files. + +qp_edit: +| thresh_opt_max_elem_grad | +| optimization_max_nb_iter | +| optimization_method | + +Provided: +| mo_num | integer | number of MOs | +| ao_num | integer | number of AOs | +| N_states | integer | number of states | +| ci_energy(N_states) | double precision | CI energies | +| state_average_weight(N_states) | double precision | Weight of the different states | + +Variables: +| m | integer | number of active MOs | +| tmp_n | integer | m*(m-1)/2, number of MO parameters | +| tmp_n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +| v_grad(tmp_n) | double precision | gradient | +| H(tmp_n,tmp_n) | double precision | hessian (2D) | +| h_f(m,m,m,m) | double precision | hessian (4D) | +| e_val(m) | double precision | eigenvalues of the hessian | +| w(m,m) | double precision | eigenvectors of the hessian | +| x(m) | double precision | step given by the trust region | +| m_x(m,m) | double precision | step given by the trust region after | +| tmp_R(m,m) | double precision | rotation matrix for active MOs | +| R(mo_num,mo_num) | double precision | full rotation matrix | +| prev_mos(ao_num,mo_num) | double precision | previous MOs (before the rotation) | +| new_mos(ao_num,mo_num) | double precision | new MOs (after the roration) | +| delta | double precision | radius of the trust region | +| rho | double precision | agreement between the model and the exact function | +| max_elem | double precision | maximum element in the gradient | +| i | integer | index | +| tmp_i,tmp_j | integer | indexes in the subspace containing only | +| | | the active MOs | +| converged | logical | convergence of the algorithm | +| cancel_step | logical | if the step must be cancelled | +| nb_iter | integer | number of iterations (accepted) | +| nb_diag | integer | number of diagonalizations of the CI matrix | +| nb_cancel | integer | number of cancelled steps for the actual iteration | +| nb_cancel_tot | integer | total number of cancel steps | +| info | integer | if 0 ok, else problem in the diagonalization of | +| | | the hessian with the Lapack routine | +| criterion | double precision | energy at a given step | +| prev_criterion | double precision | energy before the rotation | +| criterion_model | double precision | estimated energy after the rotation using | +| | | a Taylor series | +| must_exit | logical | To exit the trust region algorithm when | +| | | criterion - criterion_model is too small | +| enforce_step_cancellation | logical | To force the cancellation of the step if the | +| | | error in the rotation matrix is too large | + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f +subroutine run_orb_opt_trust_v2 + + include 'constants.h' + + implicit none + + BEGIN_DOC + ! Orbital optimization + END_DOC + + ! Variables + + double precision, allocatable :: R(:,:) + double precision, allocatable :: H(:,:),h_f(:,:,:,:) + double precision, allocatable :: v_grad(:) + double precision, allocatable :: prev_mos(:,:),new_mos(:,:) + integer :: info + integer :: n + integer :: i,j,p,q,k + double precision :: max_elem_grad, delta, rho, norm_grad, normalization_factor + logical :: cancel_step + integer :: nb_iter, nb_diag, nb_cancel, nb_cancel_tot, nb_sub_iter + double precision :: t1, t2, t3 + double precision :: prev_criterion, criterion, criterion_model + logical :: not_converged, must_exit, enforce_step_cancellation + integer :: m, tmp_n, tmp_i, tmp_j, tmp_k, tmp_n2 + integer,allocatable :: tmp_list(:), key(:) + double precision, allocatable :: tmp_m_x(:,:),tmp_R(:,:), tmp_x(:), W(:,:), e_val(:) + + PROVIDE mo_two_e_integrals_in_map ci_energy psi_det psi_coef +#+END_SRC + +** Allocation + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + allocate(R(mo_num,mo_num)) ! rotation matrix + allocate(prev_mos(ao_num,mo_num), new_mos(ao_num,mo_num)) ! old and new MOs + + ! Definition of m and tmp_n + m = dim_list_act_orb + tmp_n = m*(m-1)/2 + + allocate(tmp_list(m)) + allocate(tmp_R(m,m), tmp_m_x(m,m), tmp_x(tmp_n)) + allocate(e_val(tmp_n),key(tmp_n),v_grad(tmp_n)) + +#+END_SRC + +** Method + There are three different methods : + - the "full" hessian, which uses all the elements of the hessian + matrix" + - the "diagonal" hessian, which uses only the diagonal elements of the + hessian + - without the hessian (hessian = identity matrix) + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + !Display the method + print*, 'Method :', optimization_method + if (optimization_method == 'full') then + print*, 'Full hessian' + allocate(H(tmp_n,tmp_n), h_f(m,m,m,m),W(tmp_n,tmp_n)) + tmp_n2 = tmp_n + elseif (optimization_method == 'diag') then + print*,'Diagonal hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 + elseif (optimization_method == 'none') then + print*,'No hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 + else + print*,'Unknown optimization_method, please select full, diag or none' + call abort + endif + print*, 'Absolute value of the hessian:', absolute_eig +#+END_SRC + +** Calculations +*** Algorithm + +Here is the main algorithm of the optimization: +- First of all we initialize some parameters and we compute the + criterion (the ci energy) before doing any MO rotations +- We compute the gradient and the hessian for the active MOs +- We diagonalize the hessian +- We compute a step and loop to reduce the radius of the + trust region (and the size of the step by the way) until the step is + accepted +- We repeat the process until the convergence + NB: the convergence criterion can be changed + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + ! Loop until the convergence of the optimization + ! call diagonalize_ci + + !### Initialization ### + nb_iter = 0 + rho = 0.5d0 + not_converged = .True. + tmp_list = list_act ! Optimization of the active MOs + nb_cancel_tot = 0 + + ! Renormalization of the weights of the states + call state_weight_normalization + + ! Compute the criterion before the loop + call state_average_energy(prev_criterion) + + do while (not_converged) + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! Gradient + call gradient_list_opt(tmp_n, m, tmp_list, v_grad, max_elem_grad, norm_grad) + + ! Hessian + if (optimization_method == 'full') then + ! Full hessian + call hessian_list_opt(tmp_n, m, tmp_list, H, h_f) + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H, e_val, w) + + elseif (optimization_method == 'diag') then + ! Diagonal hessian + call diag_hessian_list_opt(tmp_n, m, tmp_list, H) + else + ! Identity matrix + do tmp_i = 1, tmp_n + H(tmp_i,1) = 1d0 + enddo + endif + + if (optimization_method /= 'full') then + ! Sort + do tmp_i = 1, tmp_n + key(tmp_i) = tmp_i + e_val(tmp_i) = H(tmp_i,1) + enddo + call dsort(e_val,key,tmp_n) + + ! Eigenvalues and eigenvectors + do tmp_i = 1, tmp_n + w(tmp_i,1) = dble(key(tmp_i)) + enddo + + endif + + ! Init before the internal loop + cancel_step = .True. ! To enter in the loop just after + nb_cancel = 0 + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'' + print*,'-----------------------------' + print*,'Iteration: ', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem_grad + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) + + if (must_exit) then + print*,'step_in_trust_region sends: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, m, tmp_x, tmp_m_x) + + ! Rotation matrix for the active MOs + call rotation_matrix(tmp_m_x, m, tmp_R, m, m, info, enforce_step_cancellation) + + ! Security to ensure an unitary transformation + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(m, tmp_list, tmp_R, R) + + ! MO rotations + call apply_mo_rotation(R, prev_mos) + + ! Update of the energy before the diagonalization of the hamiltonian + call clear_mo_map + TOUCH mo_coef psi_det psi_coef ci_energy two_e_dm_mo + call state_average_energy(criterion) + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step if necessary + if (cancel_step) then + mo_coef = prev_mos + call save_mos() + nb_cancel = nb_cancel + 1 + nb_cancel_tot = nb_cancel_tot + 1 + else + ! Diagonalization of the hamiltonian + FREE ci_energy! To enforce the recomputation + call diagonalize_ci + call save_wavefunction_unsorted + + ! Energy obtained after the diagonalization of the CI matrix + call state_average_energy(prev_criterion) + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem_grad) < thresh_opt_max_elem_grad) then + print*,'Converged: DABS(max_elem_grad) < thresh_opt_max_elem_grad' + not_converged = .False. + endif + if (nb_iter >= optimization_max_nb_iter) then + print*,'Not converged: nb_iter >= optimization_max_nb_iter' + not_converged = .False. + endif + + if (.not. not_converged) then + print*,'#############################' + print*,' End of the optimization' + print*,'#############################' + endif + enddo + +#+END_SRC + +** Deallocation, end + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + deallocate(v_grad,H,R,W,e_val) + deallocate(prev_mos,new_mos) + if (optimization_method == 'full') then + deallocate(h_f) + endif + +end +#+END_SRC + diff --git a/src/mo_optimization/org/state_average_energy.org b/src/mo_optimization/org/state_average_energy.org new file mode 100644 index 00000000..b779845a --- /dev/null +++ b/src/mo_optimization/org/state_average_energy.org @@ -0,0 +1,73 @@ +* State average energy + +Calculation of the state average energy from the integrals and the +density matrices. + +\begin{align*} +E = \sum_{ij} h_{ij} \gamma_{ij} + \frac{1}{2} v_{ij}^{kl} \Gamma_{ij}^{kl} +\end{align*} +$h_{ij}$: mono-electronic integral +$\gamma_{ij}$: one electron density matrix +$v_{ij}^{kl}$: bi-electronic integral +$\Gamma_{ij}^{kl}$: two electrons density matrix + +TODO: OMP version + +PROVIDED: +| mo_one_e_integrals | double precision | mono-electronic integrals | +| get_two_e_integral | double precision | bi-electronic integrals | +| one_e_dm_mo | double precision | one electron density matrix | +| two_e_dm_mo | double precision | two electrons density matrix | +| nuclear_repulsion | double precision | nuclear repulsion | +| mo_num | integer | number of MOs | + +Output: +| energy | double precision | state average energy | + +Internal: +| mono_e | double precision | mono-electronic energy | +| bi_e | double precision | bi-electronic energy | +| i,j,k,l | integer | indexes to loop over the MOs | + +#+BEGIN_SRC f90 :comments org :tangle state_average_energy.irp.f +subroutine state_average_energy(energy) + + implicit none + + double precision, intent(out) :: energy + + double precision :: get_two_e_integral + double precision :: mono_e, bi_e + integer :: i,j,k,l + + ! mono electronic part + mono_e = 0d0 + do j = 1, mo_num + do i = 1, mo_num + mono_e = mono_e + mo_one_e_integrals(i,j) * one_e_dm_mo(i,j) + enddo + enddo + + ! bi electronic part + bi_e = 0d0 + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + bi_e = bi_e + get_two_e_integral(i,j,k,l,mo_integrals_map) * two_e_dm_mo(i,j,k,l) + enddo + enddo + enddo + enddo + + ! State average energy + energy = mono_e + 0.5d0 * bi_e + nuclear_repulsion + + ! Check + !call print_energy_components + + print*,'State average energy:', energy + !print*,ci_energy + +end +#+END_SRC diff --git a/src/mo_optimization/org/state_weight_normalization.org b/src/mo_optimization/org/state_weight_normalization.org new file mode 100644 index 00000000..492ad3d4 --- /dev/null +++ b/src/mo_optimization/org/state_weight_normalization.org @@ -0,0 +1,31 @@ +#+BEGIN_SRC f90 :comments org :tangle state_weight_normalization.irp.f +subroutine state_weight_normalization + + implicit none + + BEGIN_DOC + ! Renormalization of the state weights or enforcing state average + ! weights for orbital optimization + END_DOC + + integer :: i + double precision :: normalization_factor + + ! To normalize the sum of the state weights + normalization_factor = 0d0 + do i = 1, N_states + normalization_factor = normalization_factor + state_average_weight(i) + enddo + normalization_factor = 1d0 / normalization_factor + + do i = 1, N_states + state_average_weight(i) = state_average_weight(i) * normalization_factor + enddo + TOUCH state_average_weight + + print*, 'Number of states:', N_states + print*, 'State average weights:' + print*, state_average_weight(:) + +end +#+END_SRC diff --git a/src/mo_optimization/org/update_parameters.org b/src/mo_optimization/org/update_parameters.org new file mode 100644 index 00000000..cd9b9595 --- /dev/null +++ b/src/mo_optimization/org/update_parameters.org @@ -0,0 +1,16 @@ +Subroutine toupdate the parameters. +Ex: TOUCH mo_coef ... + +#+BEGIN_SRC f90 :comments org :tangle update_parameters.irp.f +subroutine update_parameters() + + implicit none + + !### TODO + ! Touch yours parameters + call clear_mo_map + TOUCH mo_coef psi_det psi_coef + call diagonalize_ci + call save_wavefunction_unsorted +end +#+END_SRC diff --git a/src/mo_optimization/org/update_st_av_ci_energy.org b/src/mo_optimization/org/update_st_av_ci_energy.org new file mode 100644 index 00000000..2dc7f3ee --- /dev/null +++ b/src/mo_optimization/org/update_st_av_ci_energy.org @@ -0,0 +1,26 @@ +* Update the CI state average energy + +Computes the state average energy +\begin{align*} +E =\sum_{i=1}^{N_{states}} E_i . w_i +\end{align*} + +$E_i$: energy of state i +$w_i$: weight of state i + +#+BEGIN_SRC f90 :comments org :tangle update_st_av_ci_energy.irp.f +subroutine update_st_av_ci_energy(energy) + + implicit none + + double precision, intent(out) :: energy + integer :: i + + energy = 0d0 + do i = 1, N_states + energy = energy + ci_energy(i) * state_average_weight(i) + enddo + + print*, 'ci_energy :', energy +end +#+END_SRC diff --git a/src/mo_optimization/run_orb_opt_trust_v2.irp.f b/src/mo_optimization/run_orb_opt_trust_v2.irp.f new file mode 100644 index 00000000..e1431255 --- /dev/null +++ b/src/mo_optimization/run_orb_opt_trust_v2.irp.f @@ -0,0 +1,317 @@ +! Subroutine : run_orb_opt_trust + + + + + + +! Subroutine to optimize the MOs using a trust region algorithm: +! - choice of the method +! - initialization +! - optimization until convergence + +! The optimization use the trust region algorithm, the different parts +! are explained in the corresponding subroutine files. + +! qp_edit: +! | thresh_opt_max_elem_grad | +! | optimization_max_nb_iter | +! | optimization_method | + +! Provided: +! | mo_num | integer | number of MOs | +! | ao_num | integer | number of AOs | +! | N_states | integer | number of states | +! | ci_energy(N_states) | double precision | CI energies | +! | state_average_weight(N_states) | double precision | Weight of the different states | + +! Variables: +! | m | integer | number of active MOs | +! | tmp_n | integer | m*(m-1)/2, number of MO parameters | +! | tmp_n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +! | v_grad(tmp_n) | double precision | gradient | +! | H(tmp_n,tmp_n) | double precision | hessian (2D) | +! | h_f(m,m,m,m) | double precision | hessian (4D) | +! | e_val(m) | double precision | eigenvalues of the hessian | +! | w(m,m) | double precision | eigenvectors of the hessian | +! | x(m) | double precision | step given by the trust region | +! | m_x(m,m) | double precision | step given by the trust region after | +! | tmp_R(m,m) | double precision | rotation matrix for active MOs | +! | R(mo_num,mo_num) | double precision | full rotation matrix | +! | prev_mos(ao_num,mo_num) | double precision | previous MOs (before the rotation) | +! | new_mos(ao_num,mo_num) | double precision | new MOs (after the roration) | +! | delta | double precision | radius of the trust region | +! | rho | double precision | agreement between the model and the exact function | +! | max_elem | double precision | maximum element in the gradient | +! | i | integer | index | +! | tmp_i,tmp_j | integer | indexes in the subspace containing only | +! | | | the active MOs | +! | converged | logical | convergence of the algorithm | +! | cancel_step | logical | if the step must be cancelled | +! | nb_iter | integer | number of iterations (accepted) | +! | nb_diag | integer | number of diagonalizations of the CI matrix | +! | nb_cancel | integer | number of cancelled steps for the actual iteration | +! | nb_cancel_tot | integer | total number of cancel steps | +! | info | integer | if 0 ok, else problem in the diagonalization of | +! | | | the hessian with the Lapack routine | +! | criterion | double precision | energy at a given step | +! | prev_criterion | double precision | energy before the rotation | +! | criterion_model | double precision | estimated energy after the rotation using | +! | | | a Taylor series | +! | must_exit | logical | To exit the trust region algorithm when | +! | | | criterion - criterion_model is too small | +! | enforce_step_cancellation | logical | To force the cancellation of the step if the | +! | | | error in the rotation matrix is too large | + + +subroutine run_orb_opt_trust_v2 + + include 'constants.h' + + implicit none + + BEGIN_DOC + ! Orbital optimization + END_DOC + + ! Variables + + double precision, allocatable :: R(:,:) + double precision, allocatable :: H(:,:),h_f(:,:,:,:) + double precision, allocatable :: v_grad(:) + double precision, allocatable :: prev_mos(:,:),new_mos(:,:) + integer :: info + integer :: n + integer :: i,j,p,q,k + double precision :: max_elem_grad, delta, rho, norm_grad, normalization_factor + logical :: cancel_step + integer :: nb_iter, nb_diag, nb_cancel, nb_cancel_tot, nb_sub_iter + double precision :: t1, t2, t3 + double precision :: prev_criterion, criterion, criterion_model + logical :: not_converged, must_exit, enforce_step_cancellation + integer :: m, tmp_n, tmp_i, tmp_j, tmp_k, tmp_n2 + integer,allocatable :: tmp_list(:), key(:) + double precision, allocatable :: tmp_m_x(:,:),tmp_R(:,:), tmp_x(:), W(:,:), e_val(:) + + PROVIDE mo_two_e_integrals_in_map ci_energy psi_det psi_coef + +! Allocation + + +allocate(R(mo_num,mo_num)) ! rotation matrix +allocate(prev_mos(ao_num,mo_num), new_mos(ao_num,mo_num)) ! old and new MOs + +! Definition of m and tmp_n +m = dim_list_act_orb +tmp_n = m*(m-1)/2 + +allocate(tmp_list(m)) +allocate(tmp_R(m,m), tmp_m_x(m,m), tmp_x(tmp_n)) +allocate(e_val(tmp_n),key(tmp_n),v_grad(tmp_n)) + +! Method +! There are three different methods : +! - the "full" hessian, which uses all the elements of the hessian +! matrix" +! - the "diagonal" hessian, which uses only the diagonal elements of the +! hessian +! - without the hessian (hessian = identity matrix) + + +!Display the method + print*, 'Method :', optimization_method +if (optimization_method == 'full') then + print*, 'Full hessian' + allocate(H(tmp_n,tmp_n), h_f(m,m,m,m),W(tmp_n,tmp_n)) + tmp_n2 = tmp_n +elseif (optimization_method == 'diag') then + print*,'Diagonal hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 +elseif (optimization_method == 'none') then + print*,'No hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 +else + print*,'Unknown optimization_method, please select full, diag or none' + call abort +endif +print*, 'Absolute value of the hessian:', absolute_eig + +! Algorithm + +! Here is the main algorithm of the optimization: +! - First of all we initialize some parameters and we compute the +! criterion (the ci energy) before doing any MO rotations +! - We compute the gradient and the hessian for the active MOs +! - We diagonalize the hessian +! - We compute a step and loop to reduce the radius of the +! trust region (and the size of the step by the way) until the step is +! accepted +! - We repeat the process until the convergence +! NB: the convergence criterion can be changed + + +! Loop until the convergence of the optimization +! call diagonalize_ci + +!### Initialization ### +nb_iter = 0 +rho = 0.5d0 +not_converged = .True. +tmp_list = list_act ! Optimization of the active MOs +nb_cancel_tot = 0 + +! Renormalization of the weights of the states +call state_weight_normalization + +! Compute the criterion before the loop +call state_average_energy(prev_criterion) + +do while (not_converged) + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! Gradient + call gradient_list_opt(tmp_n, m, tmp_list, v_grad, max_elem_grad, norm_grad) + + ! Hessian + if (optimization_method == 'full') then + ! Full hessian + call hessian_list_opt(tmp_n, m, tmp_list, H, h_f) + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H, e_val, w) + + elseif (optimization_method == 'diag') then + ! Diagonal hessian + call diag_hessian_list_opt(tmp_n, m, tmp_list, H) + else + ! Identity matrix + do tmp_i = 1, tmp_n + H(tmp_i,1) = 1d0 + enddo + endif + + if (optimization_method /= 'full') then + ! Sort + do tmp_i = 1, tmp_n + key(tmp_i) = tmp_i + e_val(tmp_i) = H(tmp_i,1) + enddo + call dsort(e_val,key,tmp_n) + + ! Eigenvalues and eigenvectors + do tmp_i = 1, tmp_n + w(tmp_i,1) = dble(key(tmp_i)) + enddo + + endif + + ! Init before the internal loop + cancel_step = .True. ! To enter in the loop just after + nb_cancel = 0 + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'' + print*,'-----------------------------' + print*,'Iteration: ', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem_grad + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) + + if (must_exit) then + print*,'step_in_trust_region sends: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, m, tmp_x, tmp_m_x) + + ! Rotation matrix for the active MOs + call rotation_matrix(tmp_m_x, m, tmp_R, m, m, info, enforce_step_cancellation) + + ! Security to ensure an unitary transformation + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(m, tmp_list, tmp_R, R) + + ! MO rotations + call apply_mo_rotation(R, prev_mos) + + ! Update of the energy before the diagonalization of the hamiltonian + call clear_mo_map + TOUCH mo_coef psi_det psi_coef ci_energy two_e_dm_mo + call state_average_energy(criterion) + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step if necessary + if (cancel_step) then + mo_coef = prev_mos + call save_mos() + nb_cancel = nb_cancel + 1 + nb_cancel_tot = nb_cancel_tot + 1 + else + ! Diagonalization of the hamiltonian + FREE ci_energy! To enforce the recomputation + call diagonalize_ci + call save_wavefunction_unsorted + + ! Energy obtained after the diagonalization of the CI matrix + call state_average_energy(prev_criterion) + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem_grad) < thresh_opt_max_elem_grad) then + print*,'Converged: DABS(max_elem_grad) < thresh_opt_max_elem_grad' + not_converged = .False. + endif + if (nb_iter >= optimization_max_nb_iter) then + print*,'Not converged: nb_iter >= optimization_max_nb_iter' + not_converged = .False. + endif + + if (.not. not_converged) then + print*,'#############################' + print*,' End of the optimization' + print*,'#############################' + endif +enddo + +! Deallocation, end + + +deallocate(v_grad,H,R,W,e_val) + deallocate(prev_mos,new_mos) + if (optimization_method == 'full') then + deallocate(h_f) + endif + +end diff --git a/src/mo_optimization/save_energy.irp.f b/src/mo_optimization/save_energy.irp.f new file mode 100644 index 00000000..5dac8da9 --- /dev/null +++ b/src/mo_optimization/save_energy.irp.f @@ -0,0 +1,9 @@ +subroutine save_energy(E,pt2) + implicit none + BEGIN_DOC +! Saves the energy in |EZFIO|. + END_DOC + double precision, intent(in) :: E(N_states), pt2(N_states) + call ezfio_set_fci_energy(E(1:N_states)) + call ezfio_set_fci_energy_pt2(E(1:N_states)+pt2(1:N_states)) +end diff --git a/src/mo_optimization/state_average_energy.irp.f b/src/mo_optimization/state_average_energy.irp.f new file mode 100644 index 00000000..2cd063da --- /dev/null +++ b/src/mo_optimization/state_average_energy.irp.f @@ -0,0 +1,72 @@ +! State average energy + +! Calculation of the state average energy from the integrals and the +! density matrices. + +! \begin{align*} +! E = \sum_{ij} h_{ij} \gamma_{ij} + \frac{1}{2} v_{ij}^{kl} \Gamma_{ij}^{kl} +! \end{align*} +! $h_{ij}$: mono-electronic integral +! $\gamma_{ij}$: one electron density matrix +! $v_{ij}^{kl}$: bi-electronic integral +! $\Gamma_{ij}^{kl}$: two electrons density matrix + +! TODO: OMP version + +! PROVIDED: +! | mo_one_e_integrals | double precision | mono-electronic integrals | +! | get_two_e_integral | double precision | bi-electronic integrals | +! | one_e_dm_mo | double precision | one electron density matrix | +! | two_e_dm_mo | double precision | two electrons density matrix | +! | nuclear_repulsion | double precision | nuclear repulsion | +! | mo_num | integer | number of MOs | + +! Output: +! | energy | double precision | state average energy | + +! Internal: +! | mono_e | double precision | mono-electronic energy | +! | bi_e | double precision | bi-electronic energy | +! | i,j,k,l | integer | indexes to loop over the MOs | + + +subroutine state_average_energy(energy) + + implicit none + + double precision, intent(out) :: energy + + double precision :: get_two_e_integral + double precision :: mono_e, bi_e + integer :: i,j,k,l + + ! mono electronic part + mono_e = 0d0 + do j = 1, mo_num + do i = 1, mo_num + mono_e = mono_e + mo_one_e_integrals(i,j) * one_e_dm_mo(i,j) + enddo + enddo + + ! bi electronic part + bi_e = 0d0 + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + bi_e = bi_e + get_two_e_integral(i,j,k,l,mo_integrals_map) * two_e_dm_mo(i,j,k,l) + enddo + enddo + enddo + enddo + + ! State average energy + energy = mono_e + 0.5d0 * bi_e + nuclear_repulsion + + ! Check + !call print_energy_components + + print*,'State average energy:', energy + !print*,ci_energy + +end diff --git a/src/mo_optimization/state_weight_normalization.irp.f b/src/mo_optimization/state_weight_normalization.irp.f new file mode 100644 index 00000000..27d30af7 --- /dev/null +++ b/src/mo_optimization/state_weight_normalization.irp.f @@ -0,0 +1,29 @@ +subroutine state_weight_normalization + + implicit none + + BEGIN_DOC + ! Renormalization of the state weights or enforcing state average + ! weights for orbital optimization + END_DOC + + integer :: i + double precision :: normalization_factor + + ! To normalize the sum of the state weights + normalization_factor = 0d0 + do i = 1, N_states + normalization_factor = normalization_factor + state_average_weight(i) + enddo + normalization_factor = 1d0 / normalization_factor + + do i = 1, N_states + state_average_weight(i) = state_average_weight(i) * normalization_factor + enddo + TOUCH state_average_weight + + print*, 'Number of states:', N_states + print*, 'State average weights:' + print*, state_average_weight(:) + +end diff --git a/src/mo_optimization/update_parameters.irp.f b/src/mo_optimization/update_parameters.irp.f new file mode 100644 index 00000000..88e8fc34 --- /dev/null +++ b/src/mo_optimization/update_parameters.irp.f @@ -0,0 +1,15 @@ +! Subroutine toupdate the parameters. +! Ex: TOUCH mo_coef ... + + +subroutine update_parameters() + + implicit none + + !### TODO + ! Touch yours parameters + call clear_mo_map + TOUCH mo_coef psi_det psi_coef + call diagonalize_ci + call save_wavefunction_unsorted +end diff --git a/src/mo_optimization/update_st_av_ci_energy.irp.f b/src/mo_optimization/update_st_av_ci_energy.irp.f new file mode 100644 index 00000000..18b72502 --- /dev/null +++ b/src/mo_optimization/update_st_av_ci_energy.irp.f @@ -0,0 +1,25 @@ +! Update the CI state average energy + +! Computes the state average energy +! \begin{align*} +! E =\sum_{i=1}^{N_{states}} E_i . w_i +! \end{align*} + +! $E_i$: energy of state i +! $w_i$: weight of state i + + +subroutine update_st_av_ci_energy(energy) + + implicit none + + double precision, intent(out) :: energy + integer :: i + + energy = 0d0 + do i = 1, N_states + energy = energy + ci_energy(i) * state_average_weight(i) + enddo + + print*, 'ci_energy :', energy +end From 6b4bf5b601fb759772be99af301b4959d17c18e6 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 18 Apr 2023 14:41:08 +0200 Subject: [PATCH 56/87] Raise error --- external/ezfio | 2 +- external/irpf90 | 2 +- external/qp2-dependencies | 2 +- scripts/compilation/qp_create_ninja | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/external/ezfio b/external/ezfio index d5805497..ed1df9f3 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 +Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c diff --git a/external/irpf90 b/external/irpf90 index 0007f72f..33ca5e10 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 +Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 diff --git a/external/qp2-dependencies b/external/qp2-dependencies index 6e23ebac..9e5b27ce 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a +Subproject commit 9e5b27ce5a174901765cec9db9e7b2aa6170a5de diff --git a/scripts/compilation/qp_create_ninja b/scripts/compilation/qp_create_ninja index aad85778..27b34901 100755 --- a/scripts/compilation/qp_create_ninja +++ b/scripts/compilation/qp_create_ninja @@ -25,7 +25,7 @@ except ImportError: "quantum_package.rc")) print("\n".join(["", "Error:", "source %s" % f, ""])) - sys.exit(1) + raise # Compress path def comp_path(path): From 750ec9ca00559eced5f08ece573bd583f5aa8c2b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 19 Apr 2023 09:31:36 +0200 Subject: [PATCH 57/87] Fix f77zmq --- configure | 2 +- external/qp2-dependencies | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configure b/configure index 5b50d0d7..d3377093 100755 --- a/configure +++ b/configure @@ -232,7 +232,7 @@ EOF execute << EOF cd "\${QP_ROOT}"/external - tar --gunzip --extract --file qp2-dependencies/f77-zmq-4.3.2.tar.gz + tar --gunzip --extract --file qp2-dependencies/f77-zmq-4.3.?.tar.gz cd f77-zmq-* ./configure --prefix=\$QP_ROOT export ZMQ_H="\$QP_ROOT"/include/zmq.h diff --git a/external/qp2-dependencies b/external/qp2-dependencies index ce14f57b..fd43778e 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit ce14f57b50511825a9fedb096749200779d3f4d4 +Subproject commit fd43778e12bb5858c4c780c34346be0f158b8cc7 From f228b0a3a477fa10d7b8194f43ae1615d1200bbc Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 21 Apr 2023 13:43:49 +0200 Subject: [PATCH 58/87] missing program --- src/mo_optimization/optimization.irp.f | 86 ++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 src/mo_optimization/optimization.irp.f diff --git a/src/mo_optimization/optimization.irp.f b/src/mo_optimization/optimization.irp.f new file mode 100644 index 00000000..9892b3e3 --- /dev/null +++ b/src/mo_optimization/optimization.irp.f @@ -0,0 +1,86 @@ +program optimization + + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + call run_optimization + +end + +subroutine run_optimization + + implicit none + + double precision :: e_cipsi, e_opt, delta_e + integer :: nb_iter,i + logical :: not_converged + character (len=100) :: filename + + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + + not_converged = .True. + nb_iter = 0 + + ! To start from the wf + N_det_max = max(n_det,5) + TOUCH N_det_max + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt') + write(10,*) " Ndet E_cipsi E_opt Delta_e" + call state_average_energy(e_cipsi) + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0 + close(10) + + do while (not_converged) + print*,'' + print*,'======================' + print*,' Cipsi step:', nb_iter + print*,'======================' + print*,'' + print*,'********** cipsi step **********' + ! cispi calculation + call run_stochastic_cipsi + + ! State average energy after the cipsi step + call state_average_energy(e_cipsi) + + print*,'' + print*,'********** optimization step **********' + ! orbital optimization + call run_orb_opt_trust_v2 + + ! State average energy after the orbital optimization + call state_average_energy(e_opt) + + print*,'' + print*,'********** diff step **********' + ! Gain in energy + delta_e = e_opt - e_cipsi + print*, 'Gain in energy during the orbital optimization:', delta_e + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append') + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e + close(10) + + ! Exit + if (delta_e > 1d-12) then + print*, 'WARNING, something wrong happened' + print*, 'The gain (delta_e) in energy during the optimization process' + print*, 'is > 0, but it must be < 0' + print*, 'The program will exit' + exit + endif + + if (n_det > n_det_max_opt) then + print*, 'The number of determinants in the wf > n_det_max_opt' + print*, 'The program will exit' + exit + endif + + ! To double the number of determinants in the wf + N_det_max = int(dble(n_det * 2)*0.9) + TOUCH N_det_max + + nb_iter = nb_iter + 1 + enddo + +end From 20d057b7d1c046ef6872d6713e57511e4adc311e Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 21 Apr 2023 13:44:51 +0200 Subject: [PATCH 59/87] unecessary parameter --- src/mo_optimization/EZFIO.cfg | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/mo_optimization/EZFIO.cfg b/src/mo_optimization/EZFIO.cfg index 8944e507..e6aa2d67 100644 --- a/src/mo_optimization/EZFIO.cfg +++ b/src/mo_optimization/EZFIO.cfg @@ -4,12 +4,6 @@ doc: Define the kind of hessian for the orbital optimization full : full hessian interface: ezfio,provider,ocaml default: full -[n_det_start] -type: integer -doc: Number of determinants after which the orbital optimization will start, n_det_start must be greater than 1. The algorithm does a cipsi until n_det > n_det_start and the optimization starts after -interface: ezfio,provider,ocaml -default: 5 - [n_det_max_opt] type: integer doc: Maximal number of the determinants in the wf for the orbital optimization (to stop the optimization if n_det > n_det_max_opt) From 94662d3da070a8f4602578a62b3734fcce709593 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 21 Apr 2023 15:11:50 +0200 Subject: [PATCH 60/87] Introduced JSON --- src/hartree_fock/NEED | 1 + src/hartree_fock/scf.irp.f | 6 +++++ src/json/EZFIO.cfg | 5 ++++ src/json/NEED | 1 + src/json/README.rst | 5 ++++ src/json/json.irp.f | 39 +++++++++++++++++++++++++++ src/json/json_formats.irp.f | 26 ++++++++++++++++++ src/scf_utils/roothaan_hall_scf.irp.f | 39 ++++++++++++++++++++++++--- src/two_body_rdm/io_two_rdm.irp.f | 8 +++--- 9 files changed, 122 insertions(+), 8 deletions(-) create mode 100644 src/json/EZFIO.cfg create mode 100644 src/json/NEED create mode 100644 src/json/README.rst create mode 100644 src/json/json.irp.f create mode 100644 src/json/json_formats.irp.f diff --git a/src/hartree_fock/NEED b/src/hartree_fock/NEED index 2b3fa238..e168bd80 100644 --- a/src/hartree_fock/NEED +++ b/src/hartree_fock/NEED @@ -1,3 +1,4 @@ ao_one_e_ints ao_two_e_ints scf_utils +json diff --git a/src/hartree_fock/scf.irp.f b/src/hartree_fock/scf.irp.f index a7ac9fe4..d22b11ab 100644 --- a/src/hartree_fock/scf.irp.f +++ b/src/hartree_fock/scf.irp.f @@ -80,9 +80,15 @@ subroutine run mo_label = 'Orthonormalized' + write(json_unit,*) '"scf" : [' + call Roothaan_Hall_SCF call ezfio_set_hartree_fock_energy(SCF_energy) + write(json_unit,*) ']' + + call json_close + end diff --git a/src/json/EZFIO.cfg b/src/json/EZFIO.cfg new file mode 100644 index 00000000..7dc8d796 --- /dev/null +++ b/src/json/EZFIO.cfg @@ -0,0 +1,5 @@ +[empty] +type: logical +doc: Needed to create the json directory +interface: ezfio + diff --git a/src/json/NEED b/src/json/NEED new file mode 100644 index 00000000..5a3182ed --- /dev/null +++ b/src/json/NEED @@ -0,0 +1 @@ +ezfio_files diff --git a/src/json/README.rst b/src/json/README.rst new file mode 100644 index 00000000..3dd9ffbb --- /dev/null +++ b/src/json/README.rst @@ -0,0 +1,5 @@ +==== +json +==== + +JSON files to simplify getting output information from QP. diff --git a/src/json/json.irp.f b/src/json/json.irp.f new file mode 100644 index 00000000..5a92f22f --- /dev/null +++ b/src/json/json.irp.f @@ -0,0 +1,39 @@ +BEGIN_PROVIDER [ character*(128), json_filename ] + implicit none + BEGIN_DOC + ! Fortran unit of the JSON file + END_DOC + integer, external :: getUnitAndOpen + integer :: counter + character*(128) :: prefix + logical :: exists + + prefix = trim(ezfio_filename)//'/json/' + + exists = .True. + counter = 0 + do while (exists) + counter += 1 + write(json_filename, '(A,I5.5,A)') trim(prefix), counter, '.json' + INQUIRE(FILE=trim(json_filename), EXIST=exists) + enddo + +END_PROVIDER + +BEGIN_PROVIDER [ integer, json_unit] + implicit none + BEGIN_DOC + ! Unit file for JSON output + END_DOC + integer, external :: getUnitAndOpen + call ezfio_set_json_empty(.False.) + json_unit = getUnitAndOpen(json_filename, 'w') + write(json_unit, '(A)') '{' +END_PROVIDER + +subroutine json_close + write(json_unit, '(A)') '}' + close(json_unit) + FREE json_unit +end + diff --git a/src/json/json_formats.irp.f b/src/json/json_formats.irp.f new file mode 100644 index 00000000..14a8f014 --- /dev/null +++ b/src/json/json_formats.irp.f @@ -0,0 +1,26 @@ + BEGIN_PROVIDER [ character*(64), json_int_fmt ] +&BEGIN_PROVIDER [ character*(64), json_int_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_real_fmt ] +&BEGIN_PROVIDER [ character*(64), json_real_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_str_fmt ] +&BEGIN_PROVIDER [ character*(64), json_str_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_true_fmt ] +&BEGIN_PROVIDER [ character*(64), json_true_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_false_fmt ] +&BEGIN_PROVIDER [ character*(64), json_false_fmtx ] + implicit none + BEGIN_DOC + ! Formats for JSON output. + ! x: used to mark the last write (no comma) + END_DOC + json_int_fmt = '('' "'',A,''": '',I10,'','')' + json_int_fmtx = '('' "'',A,''": '',I10)' + json_real_fmt = '('' "'',A,''": '',E22.15,'','')' + json_real_fmtx = '('' "'',A,''": '',E22.15)' + json_str_fmt = '('' "'',A,''": "'',A,''",'')' + json_str_fmtx = '('' "'',A,''": "'',A,''"'')' + json_true_fmt = '('' "'',A,''": true,'')' + json_true_fmtx = '('' "'',A,''": true'')' + json_false_fmt = '('' "'',A,''": false,'')' + json_false_fmtx = '('' "'',A,''": false'')' +END_PROVIDER diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index 3b9eaeb4..449afdc8 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -12,6 +12,7 @@ END_DOC integer :: iteration_SCF,dim_DIIS,index_dim_DIIS + logical :: converged integer :: i,j logical, external :: qp_stop double precision, allocatable :: mo_coef_save(:,:) @@ -50,10 +51,8 @@ END_DOC ! PROVIDE FPS_SPF_matrix_AO Fock_matrix_AO - do while ( & - ( (max_error_DIIS > threshold_DIIS_nonzero) .or. & - (dabs(Delta_energy_SCF) > thresh_SCF) & - ) .and. (iteration_SCF < n_it_SCF_max) ) + converged = .False. + do while ( .not.converged .and. (iteration_SCF < n_it_SCF_max) ) ! Increment cycle number @@ -144,17 +143,45 @@ END_DOC SOFT_TOUCH level_shift energy_SCF_previous = energy_SCF + converged = ( (max_error_DIIS <= threshold_DIIS_nonzero) .and. & + (dabs(Delta_energy_SCF) <= thresh_SCF) ) + ! Print results at the end of each iteration write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)') & iteration_SCF, energy_SCF, Delta_energy_SCF, max_error_DIIS, level_shift, dim_DIIS +! Write data in JSON file + + if (iteration_SCF == 1) then + write(json_unit, *) '{' + else + write(json_unit, *) '}, {' + endif + write(json_unit, json_int_fmt) 'iteration', iteration_SCF + write(json_unit, json_real_fmt) 'energy', energy_SCF + write(json_unit, json_real_fmt) 'delta_energy_SCF', Delta_energy_SCF + write(json_unit, json_real_fmt) 'max_error_DIIS', max_error_DIIS + write(json_unit, json_real_fmt) 'level_shift', level_shift + write(json_unit, json_int_fmt) 'dim_DIIS', dim_DIIS + if (Delta_energy_SCF < 0.d0) then call save_mos + write(json_unit, json_true_fmt) 'saved' + else + write(json_unit, json_false_fmt) 'saved' endif + + if (converged) then + write(json_unit, json_true_fmtx) 'converged' + else + write(json_unit, json_false_fmtx) 'converged' + endif + if (qp_stop()) exit enddo + write(json_unit, *) '}' if (iteration_SCF < n_it_SCF_max) then mo_label = 'Canonical' @@ -166,6 +193,10 @@ END_DOC write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)') & '====','================','================','================','================' write(6,*) + if (converged) then + write(6,*) 'SCF converged' + endif + if(.not.frozen_orb_scf)then call mo_as_eigvectors_of_mo_matrix(Fock_matrix_mo,size(Fock_matrix_mo,1), & diff --git a/src/two_body_rdm/io_two_rdm.irp.f b/src/two_body_rdm/io_two_rdm.irp.f index f7008ca9..67837682 100644 --- a/src/two_body_rdm/io_two_rdm.irp.f +++ b/src/two_body_rdm/io_two_rdm.irp.f @@ -1,12 +1,12 @@ subroutine write_array_two_rdm(n_orb,nstates,array_tmp,name_file) implicit none integer, intent(in) :: n_orb,nstates - character*(128), intent(in) :: name_file + character*(128), intent(in) :: name_file double precision, intent(in) :: array_tmp(n_orb,n_orb,n_orb,n_orb,nstates) character*(128) :: output integer :: i_unit_output,getUnitAndOpen - PROVIDE ezfio_filename + PROVIDE ezfio_filename output=trim(ezfio_filename)//'/work/'//trim(name_file) i_unit_output = getUnitAndOpen(output,'W') write(i_unit_output)array_tmp @@ -18,9 +18,9 @@ subroutine read_array_two_rdm(n_orb,nstates,array_tmp,name_file) character*(128) :: output integer :: i_unit_output,getUnitAndOpen integer, intent(in) :: n_orb,nstates - character*(128), intent(in) :: name_file + character*(128), intent(in) :: name_file double precision, intent(out) :: array_tmp(n_orb,n_orb,n_orb,n_orb,N_states) - PROVIDE ezfio_filename + PROVIDE ezfio_filename output=trim(ezfio_filename)//'/work/'//trim(name_file) i_unit_output = getUnitAndOpen(output,'R') read(i_unit_output)array_tmp From 5039bb674d6d2433de182011bbb8973748588c25 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 21 Apr 2023 18:06:37 +0200 Subject: [PATCH 61/87] Fixed need for JSON --- external/ezfio | 2 +- external/irpf90 | 2 +- external/qp2-dependencies | 2 +- src/json/json.irp.f | 6 ++++++ src/kohn_sham/ks_scf.irp.f | 4 ++++ src/kohn_sham_rs/rs_ks_scf.irp.f | 3 +++ src/scf_utils/NEED | 1 + src/scf_utils/roothaan_hall_scf.irp.f | 4 ++++ 8 files changed, 21 insertions(+), 3 deletions(-) diff --git a/external/ezfio b/external/ezfio index ed1df9f3..d5805497 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c +Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 diff --git a/external/irpf90 b/external/irpf90 index 33ca5e10..0007f72f 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 +Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 diff --git a/external/qp2-dependencies b/external/qp2-dependencies index fd43778e..e0d0e02e 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit fd43778e12bb5858c4c780c34346be0f158b8cc7 +Subproject commit e0d0e02e9f5ece138d1520106954a881ab0b8db2 diff --git a/src/json/json.irp.f b/src/json/json.irp.f index 5a92f22f..1fc24eb6 100644 --- a/src/json/json.irp.f +++ b/src/json/json.irp.f @@ -10,6 +10,7 @@ BEGIN_PROVIDER [ character*(128), json_filename ] prefix = trim(ezfio_filename)//'/json/' + call lock_io exists = .True. counter = 0 do while (exists) @@ -17,6 +18,7 @@ BEGIN_PROVIDER [ character*(128), json_filename ] write(json_filename, '(A,I5.5,A)') trim(prefix), counter, '.json' INQUIRE(FILE=trim(json_filename), EXIST=exists) enddo + call unlock_io END_PROVIDER @@ -27,13 +29,17 @@ BEGIN_PROVIDER [ integer, json_unit] END_DOC integer, external :: getUnitAndOpen call ezfio_set_json_empty(.False.) + call lock_io json_unit = getUnitAndOpen(json_filename, 'w') write(json_unit, '(A)') '{' + call unlock_io END_PROVIDER subroutine json_close + call lock_io write(json_unit, '(A)') '}' close(json_unit) + call unlock_io FREE json_unit end diff --git a/src/kohn_sham/ks_scf.irp.f b/src/kohn_sham/ks_scf.irp.f index aa6efd52..85bfc333 100644 --- a/src/kohn_sham/ks_scf.irp.f +++ b/src/kohn_sham/ks_scf.irp.f @@ -90,7 +90,11 @@ subroutine run ! Choose SCF algorithm + write(json_unit,*) '"scf" : [' call Roothaan_Hall_SCF + write(json_unit,*) ']' + + call json_close end diff --git a/src/kohn_sham_rs/rs_ks_scf.irp.f b/src/kohn_sham_rs/rs_ks_scf.irp.f index 84b85136..f28fd861 100644 --- a/src/kohn_sham_rs/rs_ks_scf.irp.f +++ b/src/kohn_sham_rs/rs_ks_scf.irp.f @@ -93,7 +93,10 @@ subroutine run level_shift += 1.d0 touch level_shift + write(json_unit,*) '"scf" : [' call Roothaan_Hall_SCF + write(json_unit,*) ']' + call json_close call ezfio_set_kohn_sham_rs_energy(SCF_energy) write(*, '(A22,X,F16.10)') 'one_e_energy = ',one_e_energy diff --git a/src/scf_utils/NEED b/src/scf_utils/NEED index b89695da..292d343a 100644 --- a/src/scf_utils/NEED +++ b/src/scf_utils/NEED @@ -1,2 +1,3 @@ mo_guess bitmask +json diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index 449afdc8..08fe7acf 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -153,6 +153,7 @@ END_DOC ! Write data in JSON file + call lock_io if (iteration_SCF == 1) then write(json_unit, *) '{' else @@ -164,6 +165,7 @@ END_DOC write(json_unit, json_real_fmt) 'max_error_DIIS', max_error_DIIS write(json_unit, json_real_fmt) 'level_shift', level_shift write(json_unit, json_int_fmt) 'dim_DIIS', dim_DIIS + call unlock_io if (Delta_energy_SCF < 0.d0) then call save_mos @@ -172,11 +174,13 @@ END_DOC write(json_unit, json_false_fmt) 'saved' endif + call lock_io if (converged) then write(json_unit, json_true_fmtx) 'converged' else write(json_unit, json_false_fmtx) 'converged' endif + call unlock_io if (qp_stop()) exit From 528bf20e1e00e2677f695bba8b61c203cc053777 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 21 Apr 2023 18:10:08 +0200 Subject: [PATCH 62/87] Cleaning useless function --- src/kohn_sham/print_mos.irp.f | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/kohn_sham/print_mos.irp.f b/src/kohn_sham/print_mos.irp.f index 5e728444..7105c989 100644 --- a/src/kohn_sham/print_mos.irp.f +++ b/src/kohn_sham/print_mos.irp.f @@ -21,10 +21,3 @@ program print_mos end -double precision function f_mu(x) - implicit none - double precision, intent(in) :: x - - - -end From 5fb6ed0180090e1d7d5cd009ce0e3588815670a3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 24 Apr 2023 00:50:07 +0200 Subject: [PATCH 63/87] Added JSON in FCI --- src/cipsi/NEED | 1 + src/cipsi/cipsi.irp.f | 10 +-- src/cipsi/stochastic_cipsi.irp.f | 8 +-- src/cipsi/write_cipsi_json.irp.f | 53 ++++++++++++++ src/fci/fci.irp.f | 7 ++ src/hartree_fock/scf.irp.f | 7 +- src/iterations/EZFIO.cfg | 24 ------- src/iterations/io.irp.f | 37 ---------- src/iterations/iterations.irp.f | 91 +++++++++++++++--------- src/iterations/print_extrapolation.irp.f | 14 ++-- src/json/json_formats.irp.f | 20 ++++++ src/scf_utils/roothaan_hall_scf.irp.f | 6 +- 12 files changed, 162 insertions(+), 116 deletions(-) create mode 100644 src/cipsi/write_cipsi_json.irp.f delete mode 100644 src/iterations/EZFIO.cfg delete mode 100644 src/iterations/io.irp.f diff --git a/src/cipsi/NEED b/src/cipsi/NEED index 5bd742bc..89c128ec 100644 --- a/src/cipsi/NEED +++ b/src/cipsi/NEED @@ -1,3 +1,4 @@ +json perturbation zmq mpi diff --git a/src/cipsi/cipsi.irp.f b/src/cipsi/cipsi.irp.f index 5225c6df..88aaeae0 100644 --- a/src/cipsi/cipsi.irp.f +++ b/src/cipsi/cipsi.irp.f @@ -16,7 +16,6 @@ subroutine run_cipsi double precision, external :: memory_of_double PROVIDE H_apply_buffer_allocated - N_iter = 1 threshold_generators = 1.d0 SOFT_TOUCH threshold_generators @@ -76,7 +75,6 @@ subroutine run_cipsi ) write(*,'(A)') '--------------------------------------------------------------------------------' - to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor) to_select = max(N_states_diag, to_select) if (do_pt2) then @@ -106,10 +104,10 @@ subroutine run_cipsi call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) + call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) call print_extrapolated_energy() call print_mol_properties() - N_iter += 1 + call write_cipsi_json(pt2_data,pt2_data_err) if (qp_stop()) exit @@ -155,11 +153,13 @@ subroutine run_cipsi call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) call print_summary(psi_energy_with_nucl_rep(1:N_states), & pt2_data, pt2_data_err, N_det,N_configuration,N_states,psi_s2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) + call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) call print_extrapolated_energy() call print_mol_properties() + call write_cipsi_json(pt2_data,pt2_data_err) endif call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) end + diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f index 35e80eb8..b83e658a 100644 --- a/src/cipsi/stochastic_cipsi.irp.f +++ b/src/cipsi/stochastic_cipsi.irp.f @@ -15,7 +15,6 @@ subroutine run_stochastic_cipsi double precision, external :: memory_of_double PROVIDE H_apply_buffer_allocated distributed_davidson mo_two_e_integrals_in_map - N_iter = 1 threshold_generators = 1.d0 SOFT_TOUCH threshold_generators @@ -96,10 +95,10 @@ subroutine run_stochastic_cipsi call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) + call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) call print_extrapolated_energy() call print_mol_properties() - N_iter += 1 + call write_cipsi_json(pt2_data,pt2_data_err) if (qp_stop()) exit @@ -135,9 +134,10 @@ subroutine run_stochastic_cipsi call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) call print_summary(psi_energy_with_nucl_rep, & pt2_data , pt2_data_err, N_det, N_configuration, N_states, psi_s2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) + call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) call print_extrapolated_energy() call print_mol_properties() + call write_cipsi_json(pt2_data,pt2_data_err) endif call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) diff --git a/src/cipsi/write_cipsi_json.irp.f b/src/cipsi/write_cipsi_json.irp.f new file mode 100644 index 00000000..98a402a2 --- /dev/null +++ b/src/cipsi/write_cipsi_json.irp.f @@ -0,0 +1,53 @@ +subroutine write_cipsi_json(pt2_data, pt2_data_err) + use selection_types + implicit none + BEGIN_DOC +! Writes JSON data for CIPSI runs + END_DOC + type(pt2_type), intent(in) :: pt2_data, pt2_data_err + integer :: i,j,k + + call lock_io + character*(64), allocatable :: fmtk(:) + integer :: N_states_p, N_iter_p + N_states_p = min(N_states,N_det) + N_iter_p = min(N_iter,8) + allocate(fmtk(0:N_iter_p)) + fmtk(:) = '('' '',E22.15,'','')' + fmtk(N_iter_p) = '('' '',E22.15)' + + write(json_unit, json_dict_uopen_fmt) + write(json_unit, json_int_fmt) 'n_det', N_det + if (s2_eig) then + write(json_unit, json_int_fmt) 'n_cfg', N_configuration + if (only_expected_s2) then + write(json_unit, json_int_fmt) 'n_csf', N_csf + endif + endif + write(json_unit, json_array_open_fmt) 'states' + do k=1,N_states_p + write(json_unit, json_dict_uopen_fmt) + write(json_unit, json_real_fmt) 'energy', psi_energy_with_nucl_rep(k) + write(json_unit, json_real_fmt) 's2', psi_s2(k) + write(json_unit, json_real_fmt) 'pt2', pt2_data % pt2(k) + write(json_unit, json_real_fmt) 'pt2_err', pt2_data_err % pt2(k) + write(json_unit, json_real_fmt) 'rpt2', pt2_data % rpt2(k) + write(json_unit, json_real_fmt) 'rpt2_err', pt2_data_err % rpt2(k) + write(json_unit, json_real_fmt) 'variance', pt2_data % variance(k) + write(json_unit, json_real_fmt) 'variance_err', pt2_data_err % variance(k) + write(json_unit, json_array_open_fmt) 'ex_energy' + do i=2,N_iter_p + write(json_unit, fmtk(i)) extrapolated_energy(i,k) + enddo + write(json_unit, json_array_close_fmtx) + if (k < N_states_p) then + write(json_unit, json_dict_close_fmt) + else + write(json_unit, json_dict_close_fmtx) + endif + enddo + write(json_unit, json_array_close_fmtx) + write(json_unit, json_dict_close_fmt) + deallocate(fmtk) + call unlock_io +end diff --git a/src/fci/fci.irp.f b/src/fci/fci.irp.f index 9d9c0b7d..bb2a93f8 100644 --- a/src/fci/fci.irp.f +++ b/src/fci/fci.irp.f @@ -39,12 +39,19 @@ program fci if (.not.is_zmq_slave) then PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + write(json_unit,json_array_open_fmt) 'fci' + if (do_pt2) then call run_stochastic_cipsi else call run_cipsi endif + write(json_unit,json_dict_uopen_fmt) + write(json_unit,json_dict_close_fmtx) + write(json_unit,json_array_close_fmtx) + call json_close + else PROVIDE mo_two_e_integrals_in_map pt2_min_parallel_tasks diff --git a/src/hartree_fock/scf.irp.f b/src/hartree_fock/scf.irp.f index d22b11ab..a361c04f 100644 --- a/src/hartree_fock/scf.irp.f +++ b/src/hartree_fock/scf.irp.f @@ -80,15 +80,14 @@ subroutine run mo_label = 'Orthonormalized' - write(json_unit,*) '"scf" : [' + write(json_unit,json_array_open_fmt) 'scf' call Roothaan_Hall_SCF - call ezfio_set_hartree_fock_energy(SCF_energy) - - write(json_unit,*) ']' + write(json_unit,json_array_close_fmtx) call json_close + call ezfio_set_hartree_fock_energy(SCF_energy) end diff --git a/src/iterations/EZFIO.cfg b/src/iterations/EZFIO.cfg deleted file mode 100644 index 2a5e94a7..00000000 --- a/src/iterations/EZFIO.cfg +++ /dev/null @@ -1,24 +0,0 @@ -[n_iter] -interface: ezfio -doc: Number of saved iterations -type:integer -default: 1 - -[n_det_iterations] -interface: ezfio, provider -doc: Number of determinants at each iteration -type: integer -size: (100) - -[energy_iterations] -interface: ezfio, provider -doc: The variational energy at each iteration -type: double precision -size: (determinants.n_states,100) - -[pt2_iterations] -interface: ezfio, provider -doc: The |PT2| correction at each iteration -type: double precision -size: (determinants.n_states,100) - diff --git a/src/iterations/io.irp.f b/src/iterations/io.irp.f deleted file mode 100644 index 821f5e84..00000000 --- a/src/iterations/io.irp.f +++ /dev/null @@ -1,37 +0,0 @@ -BEGIN_PROVIDER [ integer, n_iter ] - implicit none - BEGIN_DOC -! number of iterations - END_DOC - - logical :: has - PROVIDE ezfio_filename - if (mpi_master) then - - double precision :: zeros(N_states,100) - integer :: izeros(100) - zeros = 0.d0 - izeros = 0 - call ezfio_set_iterations_n_iter(0) - call ezfio_set_iterations_energy_iterations(zeros) - call ezfio_set_iterations_pt2_iterations(zeros) - call ezfio_set_iterations_n_det_iterations(izeros) - n_iter = 1 - endif - IRP_IF MPI_DEBUG - print *, irp_here, mpi_rank - call MPI_BARRIER(MPI_COMM_WORLD, ierr) - IRP_ENDIF - IRP_IF MPI - include 'mpif.h' - integer :: ierr - call MPI_BCAST( n_iter, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - if (ierr /= MPI_SUCCESS) then - stop 'Unable to read n_iter with MPI' - endif - IRP_ENDIF - - call write_time(6) - -END_PROVIDER - diff --git a/src/iterations/iterations.irp.f b/src/iterations/iterations.irp.f index 2c9faaf8..d06d1b6e 100644 --- a/src/iterations/iterations.irp.f +++ b/src/iterations/iterations.irp.f @@ -1,42 +1,65 @@ -BEGIN_PROVIDER [ double precision, extrapolated_energy, (N_iter,N_states) ] - implicit none - BEGIN_DOC - ! Extrapolated energy, using E_var = f(PT2) where PT2=0 - END_DOC - integer :: i - do i=1,min(N_states,N_det) - call extrapolate_data(N_iter, & - energy_iterations(i,1:N_iter), & - pt2_iterations(i,1:N_iter), & - extrapolated_energy(1:N_iter,i)) - enddo -END_PROVIDER - - -subroutine save_iterations(e_, pt2_,n_) +BEGIN_PROVIDER [ integer, N_iter ] implicit none BEGIN_DOC -! Update the energy in the EZFIO file. +! Number of CIPSI iterations END_DOC - integer, intent(in) :: n_ - double precision, intent(in) :: e_(N_states), pt2_(N_states) - integer :: i - if (N_iter == 101) then - do i=2,N_iter-1 - energy_iterations(1:N_states,N_iter-1) = energy_iterations(1:N_states,N_iter) - pt2_iterations(1:N_states,N_iter-1) = pt2_iterations(1:N_states,N_iter) + N_iter = 0 +END_PROVIDER + +BEGIN_PROVIDER [ integer, N_iter_max ] + implicit none + BEGIN_DOC + ! Max number of iterations for extrapolations + END_DOC + N_iter_max = 8 +END_PROVIDER + + BEGIN_PROVIDER [ double precision, energy_iterations , (n_states,N_iter_max) ] +&BEGIN_PROVIDER [ double precision, pt2_iterations , (n_states,N_iter_max) ] +&BEGIN_PROVIDER [ double precision, extrapolated_energy, (N_iter_max,N_states) ] + implicit none + BEGIN_DOC +! The energy at each iteration for the extrapolations + END_DOC + + energy_iterations = 0.d0 + pt2_iterations = 0.d0 + extrapolated_energy = 0.d0 +END_PROVIDER + +subroutine increment_n_iter(e, pt2_data) + use selection_types + implicit none + BEGIN_DOC +! Does what is necessary to increment n_iter + END_DOC + double precision, intent(in) :: e(*) + type(pt2_type), intent(in) :: pt2_data + integer :: k, i + + if (N_det < N_states) return + + if (N_iter < N_iter_max) then + N_iter += 1 + else + do k=2,N_iter + energy_iterations(1:N_states,k-1) = energy_iterations(1:N_states,k) + pt2_iterations(1:N_states,k-1) = pt2_iterations(1:N_states,k) enddo - N_iter = N_iter-1 - TOUCH N_iter endif + energy_iterations(1:N_states,N_iter) = e(1:N_states) + pt2_iterations(1:N_states,N_iter) = pt2_data % rpt2(1:N_states) - energy_iterations(1:N_states,N_iter) = e_(1:N_states) - pt2_iterations(1:N_states,N_iter) = pt2_(1:N_states) - n_det_iterations(N_iter) = n_ - call ezfio_set_iterations_N_iter(N_iter) - call ezfio_set_iterations_energy_iterations(energy_iterations) - call ezfio_set_iterations_pt2_iterations(pt2_iterations) - call ezfio_set_iterations_n_det_iterations(n_det_iterations) + if (N_iter < 2) then + extrapolated_energy(1,:) = energy_iterations(:,1) + pt2_iterations(:,1) + extrapolated_energy(2,:) = energy_iterations(:,2) + pt2_iterations(:,2) + else + do i=1,N_states + call extrapolate_data(N_iter, & + energy_iterations(i,1:N_iter), & + pt2_iterations(i,1:N_iter), & + extrapolated_energy(1:N_iter,i)) + enddo + endif end - diff --git a/src/iterations/print_extrapolation.irp.f b/src/iterations/print_extrapolation.irp.f index cb46fb67..111429bf 100644 --- a/src/iterations/print_extrapolation.irp.f +++ b/src/iterations/print_extrapolation.irp.f @@ -5,10 +5,14 @@ subroutine print_extrapolated_energy END_DOC integer :: i,k + integer :: N_states_p, N_iter_p if (N_iter< 2) then return endif + N_states_p = min(N_states,N_det) + N_iter_p = min(N_iter, 8) + write(*,'(A)') '' write(*,'(A)') 'Extrapolated energies' write(*,'(A)') '------------------------' @@ -20,20 +24,20 @@ subroutine print_extrapolated_energy write(*,*) '=========== ', '===================' write(*,*) 'minimum PT2 ', 'Extrapolated energy' write(*,*) '=========== ', '===================' - do k=2,min(N_iter,8) - write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,N_iter+1-k), extrapolated_energy(k,1) + do k=2,N_iter_p + write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,k), extrapolated_energy(k,1) enddo write(*,*) '=========== ', '===================' - do i=2, min(N_states,N_det) + do i=2, N_states_p print *, '' print *, 'State ', i print *, '' write(*,*) '=========== ', '=================== ', '=================== ', '===================' write(*,*) 'minimum PT2 ', 'Extrapolated energy ', ' Excitation (a.u) ', ' Excitation (eV) ' write(*,*) '=========== ', '=================== ', '=================== ', '===================' - do k=2,min(N_iter,8) - write(*,'(F11.4,X,3(X,F18.8))') pt2_iterations(i,N_iter+1-k), extrapolated_energy(k,i), & + do k=2,N_iter_p + write(*,'(F11.4,X,3(X,F18.8))') pt2_iterations(i,k), extrapolated_energy(k,i), & extrapolated_energy(k,i) - extrapolated_energy(k,1), & (extrapolated_energy(k,i) - extrapolated_energy(k,1) ) * 27.211396641308d0 enddo diff --git a/src/json/json_formats.irp.f b/src/json/json_formats.irp.f index 14a8f014..773114ba 100644 --- a/src/json/json_formats.irp.f +++ b/src/json/json_formats.irp.f @@ -8,6 +8,16 @@ &BEGIN_PROVIDER [ character*(64), json_true_fmtx ] &BEGIN_PROVIDER [ character*(64), json_false_fmt ] &BEGIN_PROVIDER [ character*(64), json_false_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_array_open_fmt ] +&BEGIN_PROVIDER [ character*(64), json_array_uopen_fmt ] +&BEGIN_PROVIDER [ character*(64), json_array_close_fmt ] +&BEGIN_PROVIDER [ character*(64), json_array_close_uopen_fmt ] +&BEGIN_PROVIDER [ character*(64), json_array_close_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_dict_open_fmt ] +&BEGIN_PROVIDER [ character*(64), json_dict_uopen_fmt ] +&BEGIN_PROVIDER [ character*(64), json_dict_close_uopen_fmt ] +&BEGIN_PROVIDER [ character*(64), json_dict_close_fmt ] +&BEGIN_PROVIDER [ character*(64), json_dict_close_fmtx ] implicit none BEGIN_DOC ! Formats for JSON output. @@ -23,4 +33,14 @@ json_true_fmtx = '('' "'',A,''": true'')' json_false_fmt = '('' "'',A,''": false,'')' json_false_fmtx = '('' "'',A,''": false'')' + json_array_open_fmt = '('' "'',A,''": ['')' + json_array_uopen_fmt = '('' ['')' + json_array_close_fmt = '('' ],'')' + json_array_close_uopen_fmt = '('' ], ['')' + json_array_close_fmtx = '('' ]'')' + json_dict_open_fmt = '('' "'',A,''": {'')' + json_dict_uopen_fmt = '('' {'')' + json_dict_close_fmt = '('' },'')' + json_dict_close_uopen_fmt = '('' }, {'')' + json_dict_close_fmtx = '('' }'')' END_PROVIDER diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index 08fe7acf..cf006035 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -155,9 +155,9 @@ END_DOC call lock_io if (iteration_SCF == 1) then - write(json_unit, *) '{' + write(json_unit, json_dict_uopen_fmt) else - write(json_unit, *) '}, {' + write(json_unit, json_dict_close_uopen_fmt) endif write(json_unit, json_int_fmt) 'iteration', iteration_SCF write(json_unit, json_real_fmt) 'energy', energy_SCF @@ -185,7 +185,7 @@ END_DOC if (qp_stop()) exit enddo - write(json_unit, *) '}' + write(json_unit, json_dict_close_fmtx) if (iteration_SCF < n_it_SCF_max) then mo_label = 'Canonical' From dd5291d90dca05f50976b10d42d9da5657641058 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 24 Apr 2023 01:01:31 +0200 Subject: [PATCH 64/87] Added exc_energy_error.py --- scripts/exc_energy_error.py | 186 ++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100755 scripts/exc_energy_error.py diff --git a/scripts/exc_energy_error.py b/scripts/exc_energy_error.py new file mode 100755 index 00000000..ba9d7917 --- /dev/null +++ b/scripts/exc_energy_error.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python +# Computes the error on the excitation energy of a CIPSI run. + +def student(p,df): + import scipy + from scipy.stats import t + return t.ppf(p, df) + + +def chi2cdf(x,k): + import scipy + import scipy.stats + return scipy.stats.chi2.cdf(x,k) + + +def jarque_bera(data): + + n = max(len(data), 2) + norm = 1./ sum( [ w for (_,w) in data ] ) + + mu = sum( [ w* x for (x,w) in data ] ) * norm + sigma2 = sum( [ w*(x-mu)**2 for (x,w) in data ] ) * norm + if sigma2 > 0.: + S = ( sum( [ w*(x-mu)**3 for (x,w) in data ] ) * norm ) / sigma2**(3./2.) + K = ( sum( [ w*(x-mu)**4 for (x,w) in data ] ) * norm ) / sigma2**2 + else: + S = 0. + K = 0. + + # Value of the Jarque-Bera test + JB = n/6. * (S**2 + 1./4. * (K-3.)**2) + + # Probability that the data comes from a Gaussian distribution + p = 1. - chi2cdf(JB,2) + + return JB, mu, sqrt(sigma2/(n-1)), p + + + +to_eV = 27.2107362681 +import sys, os +import scipy +import scipy.stats +from math import sqrt, gamma, exp +import json + + +def read_data(filename,state): + """ Read energies and PT2 from input file """ + with open(filename,'r') as f: + lines = json.load(f)['fci'] + + print(f"State: {state}") + + gs = [] + es = [] + + for l in lines: + try: + pt2_0 = l['states'][0]['pt2'] + e_0 = l['states'][0]['energy'] + pt2_1 = l['states'][state]['pt2'] + e_1 = l['states'][state]['energy'] + gs.append( (e_0, pt2_0) ) + es.append( (e_1, pt2_1) ) + except: pass + + def f(p_1, p0, p1): + e, pt2 = p0 + y0, x0 = p_1 + y1, x1 = p1 + try: + alpha = (y1-y0)/(x0-x1) + except ZeroDivisionError: + alpha = 1. + return [e, pt2, alpha] + + for l in (gs, es): + p_1, p0, p1 = l[0], l[0], l[1] + l[0] = f(p_1, p0, p1) + + for i in range(1,len(l)-1): + p_1 = (l[i-1][0], l[i-1][1]) + p0 = l[i] + p1 = l[i+1] + l[i] = f(p_1, p0, p1) + + i = len(l)-1 + p_1 = (l[i-1][0], l[i-1][1]) + p0 = l[i] + p1 = l[-1] + l[i] = f(p_1, p0, p1) + + return [ x+y for x,y in zip(gs,es) ] + + +def compute(data): + + d = [] + for e0, p0, a0, e1, p1, a1 in data: + x = (e1+p1)-(e0+p0) + w = 1./sqrt(p0**2 + p1**2) + bias = (a1-1.)*p1 - (a0-1.)*p0 + d.append( (x,w,bias) ) + + x = [] + target = (scipy.stats.norm.cdf(1.)-0.5)*2 # = 0.6827 + + print("| %2s | %8s | %8s | %8s | %8s | %8s |"%( "N", "DE", "+/-", "bias", "P(G)", "J")) + print("|----+----------+----------+----------+----------+----------|") + xmax = (0.,0.,0.,0.,0.,0,0.) + for i in range(len(data)-1): + jb, mu, sigma, p = jarque_bera( [ (x,w) for (x,w,bias) in d[i:] ] ) + bias = sum ( [ w * e for (_,w,e) in d[i:] ] ) / sum ( [ w for (_,w,_) in d[i:] ] ) + mu = (mu+0.5*bias) * to_eV + sigma = sigma * to_eV + bias = bias * to_eV + n = len(data[i:]) + beta = student(0.5*(1.+target/p) ,n) + err = sigma * beta + 0.5*abs(bias) + print("| %2d | %8.3f | %8.3f | %8.3f | %8.3f | %8.3f |"%( n, mu, err, bias, p, jb)) + if n < 3 : + continue + y = (err, p, mu, err, jb,n,bias) + if p > xmax[1]: xmax = y + if p < 0.8: + continue + x.append(y) + + x = sorted(x) + + print("|----+----------+----------+----------+----------+----------|") + if x != []: + xmax = x[0] + _, p, mu, err, jb, n, bias = xmax + beta = student(0.5*(1.+target/p),n) + print("| %2d | %8.3f | %8.3f | %8.3f | %8.3f | %8.3f |\n"%(n, mu, err, bias, p, jb)) + + return mu, err, bias, p + +filename = sys.argv[1] +print(filename) +if len(sys.argv) > 2: + state = int(sys.argv[2]) +else: + state = 1 +data = read_data(filename,state) +mu, err, bias, _ = compute(data) +print(" %s: %8.3f +/- %5.3f eV\n"%(filename, mu, err)) + +import numpy as np +A = np.array( [ [ data[-1][1], 1. ], + [ data[-2][1], 1. ] ] ) +B = np.array( [ [ data[-1][0] ], + [ data[-2][0] ] ] ) +E0 = np.linalg.solve(A,B)[1] +A = np.array( [ [ data[-1][4], 1. ], + [ data[-2][4], 1. ] ] ) +B = np.array( [ [ data[-1][3] ], + [ data[-2][3] ] ] ) +E1 = np.linalg.solve(A,B)[1] +average_2 = (E1-E0)*to_eV + +A = np.array( [ [ data[-1][1], 1. ], + [ data[-2][1], 1. ], + [ data[-3][1], 1. ] ] ) +B = np.array( [ [ data[-1][0] ], + [ data[-2][0] ], + [ data[-3][0] ] ] ) +E0 = np.linalg.lstsq(A,B,rcond=None)[0][1] +A = np.array( [ [ data[-1][4], 1. ], + [ data[-2][4], 1. ], + [ data[-3][4], 1. ] ] ) +B = np.array( [ [ data[-1][3] ], + [ data[-2][3] ], + [ data[-3][3] ] ] ) +E1 = np.linalg.lstsq(A,B,rcond=None)[0][1] +average_3 = (E1-E0)*to_eV + +exc = ((data[-1][3] + data[-1][4]) - (data[-1][0] + data[-1][1])) * to_eV +error_2 = abs(average_2 - average_3) +error_3 = abs(average_3 - exc) +print(" 2-3 points: %.3f +/- %.3f "% (average_3, error_2)) +print(" largest wf: %.3f +/- %.3f "%(average_3, error_3)) + + From 918839fbf6636c36ae9752b2d029ff5f31d157d3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 24 Apr 2023 01:22:24 +0200 Subject: [PATCH 65/87] Added JSON in FCI_TC --- src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f | 10 +++++++++- src/fci_tc_bi/fci_tc_bi_ortho.irp.f | 6 ++++++ src/tc_bi_ortho/h_tc_s2_u0.irp.f | 20 ++++++++++---------- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index 1f4fe849..284a1e2e 100644 --- a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -94,7 +94,15 @@ subroutine run_stochastic_cipsi call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection ! stop - N_iter += 1 + call print_summary(psi_energy_with_nucl_rep, & + pt2_data, pt2_data_err, N_det,N_configuration,N_states,psi_s2) + + call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) + + call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) + call print_extrapolated_energy() + call print_mol_properties() + call write_cipsi_json(pt2_data,pt2_data_err) if (qp_stop()) exit diff --git a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f index 84ac8166..ed75c882 100644 --- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -62,6 +62,7 @@ subroutine run_cipsi_tc endif endif ! --- + write(json_unit,json_array_open_fmt) 'fci_tc' if (do_pt2) then call run_stochastic_cipsi @@ -69,6 +70,11 @@ subroutine run_cipsi_tc call run_cipsi endif + write(json_unit,json_dict_uopen_fmt) + write(json_unit,json_dict_close_fmtx) + write(json_unit,json_array_close_fmtx) + call json_close + else PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks if(elec_alpha_num+elec_beta_num.ge.3)then diff --git a/src/tc_bi_ortho/h_tc_s2_u0.irp.f b/src/tc_bi_ortho/h_tc_s2_u0.irp.f index 30b0f273..b9b85a96 100644 --- a/src/tc_bi_ortho/h_tc_s2_u0.irp.f +++ b/src/tc_bi_ortho/h_tc_s2_u0.irp.f @@ -12,9 +12,9 @@ subroutine get_H_tc_s2_l0_r0(l_0,r_0,N_st,sze,energies, s2) ! istart, iend, ishift, istep are used in ZMQ parallelization. END_DOC integer, intent(in) :: N_st,sze - double precision, intent(in) :: l_0(sze,N_st), r_0(sze,N_st) + double precision, intent(inout) :: l_0(sze,N_st), r_0(sze,N_st) double precision, intent(out) :: energies(N_st), s2(N_st) - logical :: do_right + logical :: do_right integer :: istate double precision, allocatable :: s_0(:,:), v_0(:,:) double precision :: u_dot_v, norm @@ -40,7 +40,7 @@ subroutine H_tc_s2_u_0_opt(v_0,s_0,u_0,N_st,sze) END_DOC integer, intent(in) :: N_st,sze double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) - logical :: do_right + logical :: do_right do_right = .True. call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) end @@ -57,7 +57,7 @@ subroutine H_tc_s2_dagger_u_0_opt(v_0,s_0,u_0,N_st,sze) END_DOC integer, intent(in) :: N_st,sze double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) - logical :: do_right + logical :: do_right do_right = .False. call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) end @@ -77,7 +77,7 @@ subroutine H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) END_DOC integer, intent(in) :: N_st,sze double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) - logical, intent(in) :: do_right + logical, intent(in) :: do_right integer :: k double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t @@ -124,7 +124,7 @@ subroutine H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishi use bitmasks implicit none BEGIN_DOC - ! Computes $v_t = H | u_t\rangle$ + ! Computes $v_t = H | u_t\rangle$ ! ! Default should be 1,N_det,0,1 ! @@ -132,7 +132,7 @@ subroutine H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishi END_DOC integer, intent(in) :: N_st,sze,istart,iend,ishift,istep double precision, intent(in) :: u_t(N_st,N_det) - logical, intent(in) :: do_right + logical, intent(in) :: do_right double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) @@ -165,7 +165,7 @@ subroutine H_tc_s2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,ie END_DOC integer, intent(in) :: N_st,sze,istart,iend,ishift,istep double precision, intent(in) :: u_t(N_st,N_det) - logical, intent(in) :: do_right + logical, intent(in) :: do_right double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) double precision :: hij, sij @@ -542,7 +542,7 @@ compute_singles=.True. lrow = psi_bilinear_matrix_rows(l_a) ASSERT (lrow <= N_det_alpha_unique) - tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) + tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) ! call i_H_j( tmp_det, tmp_det2, $N_int, hij) ! call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij) if(do_right)then @@ -693,7 +693,7 @@ compute_singles=.True. lcol = psi_bilinear_matrix_transp_columns(l_b) ASSERT (lcol <= N_det_beta_unique) - tmp_det2(1:N_int,2) = psi_det_beta_unique(1:N_int, lcol) + tmp_det2(1:N_int,2) = psi_det_beta_unique(1:N_int, lcol) ! call i_H_j( tmp_det, tmp_det2, $N_int, hij) ! call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij) if(do_right)then From 54a88fe4caf88dc8be032d922fe87c0b250deab5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 24 Apr 2023 01:32:05 +0200 Subject: [PATCH 66/87] Added JSON to fci_tc_bi --- src/cipsi_tc_bi_ortho/NEED | 3 +- src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f | 2 +- src/cipsi_tc_bi_ortho/write_cipsi_json.irp.f | 53 ++++++++++ src/fci_tc_bi/NEED | 1 + src/fci_tc_bi/save_energy.irp.f | 4 +- src/iterations_tc/EZFIO.cfg | 24 ----- src/iterations_tc/NEED | 0 src/iterations_tc/io.irp.f | 37 ------- src/iterations_tc/iterations.irp.f | 43 -------- src/iterations_tc/print_extrapolation.irp.f | 46 -------- src/iterations_tc/print_summary.irp.f | 104 ------------------- 11 files changed, 59 insertions(+), 258 deletions(-) create mode 100644 src/cipsi_tc_bi_ortho/write_cipsi_json.irp.f delete mode 100644 src/iterations_tc/EZFIO.cfg delete mode 100644 src/iterations_tc/NEED delete mode 100644 src/iterations_tc/io.irp.f delete mode 100644 src/iterations_tc/iterations.irp.f delete mode 100644 src/iterations_tc/print_extrapolation.irp.f delete mode 100644 src/iterations_tc/print_summary.irp.f diff --git a/src/cipsi_tc_bi_ortho/NEED b/src/cipsi_tc_bi_ortho/NEED index 4dd1af36..8f05be69 100644 --- a/src/cipsi_tc_bi_ortho/NEED +++ b/src/cipsi_tc_bi_ortho/NEED @@ -1,6 +1,7 @@ +json mpi perturbation zmq -iterations_tc +iterations csf tc_bi_ortho diff --git a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index 284a1e2e..a06f28e9 100644 --- a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -101,7 +101,7 @@ subroutine run_stochastic_cipsi call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) call print_extrapolated_energy() - call print_mol_properties() +! call print_mol_properties() call write_cipsi_json(pt2_data,pt2_data_err) if (qp_stop()) exit diff --git a/src/cipsi_tc_bi_ortho/write_cipsi_json.irp.f b/src/cipsi_tc_bi_ortho/write_cipsi_json.irp.f new file mode 100644 index 00000000..98a402a2 --- /dev/null +++ b/src/cipsi_tc_bi_ortho/write_cipsi_json.irp.f @@ -0,0 +1,53 @@ +subroutine write_cipsi_json(pt2_data, pt2_data_err) + use selection_types + implicit none + BEGIN_DOC +! Writes JSON data for CIPSI runs + END_DOC + type(pt2_type), intent(in) :: pt2_data, pt2_data_err + integer :: i,j,k + + call lock_io + character*(64), allocatable :: fmtk(:) + integer :: N_states_p, N_iter_p + N_states_p = min(N_states,N_det) + N_iter_p = min(N_iter,8) + allocate(fmtk(0:N_iter_p)) + fmtk(:) = '('' '',E22.15,'','')' + fmtk(N_iter_p) = '('' '',E22.15)' + + write(json_unit, json_dict_uopen_fmt) + write(json_unit, json_int_fmt) 'n_det', N_det + if (s2_eig) then + write(json_unit, json_int_fmt) 'n_cfg', N_configuration + if (only_expected_s2) then + write(json_unit, json_int_fmt) 'n_csf', N_csf + endif + endif + write(json_unit, json_array_open_fmt) 'states' + do k=1,N_states_p + write(json_unit, json_dict_uopen_fmt) + write(json_unit, json_real_fmt) 'energy', psi_energy_with_nucl_rep(k) + write(json_unit, json_real_fmt) 's2', psi_s2(k) + write(json_unit, json_real_fmt) 'pt2', pt2_data % pt2(k) + write(json_unit, json_real_fmt) 'pt2_err', pt2_data_err % pt2(k) + write(json_unit, json_real_fmt) 'rpt2', pt2_data % rpt2(k) + write(json_unit, json_real_fmt) 'rpt2_err', pt2_data_err % rpt2(k) + write(json_unit, json_real_fmt) 'variance', pt2_data % variance(k) + write(json_unit, json_real_fmt) 'variance_err', pt2_data_err % variance(k) + write(json_unit, json_array_open_fmt) 'ex_energy' + do i=2,N_iter_p + write(json_unit, fmtk(i)) extrapolated_energy(i,k) + enddo + write(json_unit, json_array_close_fmtx) + if (k < N_states_p) then + write(json_unit, json_dict_close_fmt) + else + write(json_unit, json_dict_close_fmtx) + endif + enddo + write(json_unit, json_array_close_fmtx) + write(json_unit, json_dict_close_fmt) + deallocate(fmtk) + call unlock_io +end diff --git a/src/fci_tc_bi/NEED b/src/fci_tc_bi/NEED index 000b0deb..3bb9515a 100644 --- a/src/fci_tc_bi/NEED +++ b/src/fci_tc_bi/NEED @@ -1,3 +1,4 @@ +json tc_bi_ortho davidson_undressed cipsi_tc_bi_ortho diff --git a/src/fci_tc_bi/save_energy.irp.f b/src/fci_tc_bi/save_energy.irp.f index 7c41d00f..421ae5f8 100644 --- a/src/fci_tc_bi/save_energy.irp.f +++ b/src/fci_tc_bi/save_energy.irp.f @@ -4,6 +4,6 @@ subroutine save_energy(E,pt2) ! Saves the energy in |EZFIO|. END_DOC double precision, intent(in) :: E(N_states), pt2(N_states) - call ezfio_set_fci_tc_energy(E(1:N_states)) - call ezfio_set_fci_tc_energy_pt2(E(1:N_states)+pt2(1:N_states)) + call ezfio_set_fci_tc_bi_energy(E(1:N_states)) + call ezfio_set_fci_tc_bi_energy_pt2(E(1:N_states)+pt2(1:N_states)) end diff --git a/src/iterations_tc/EZFIO.cfg b/src/iterations_tc/EZFIO.cfg deleted file mode 100644 index 2a5e94a7..00000000 --- a/src/iterations_tc/EZFIO.cfg +++ /dev/null @@ -1,24 +0,0 @@ -[n_iter] -interface: ezfio -doc: Number of saved iterations -type:integer -default: 1 - -[n_det_iterations] -interface: ezfio, provider -doc: Number of determinants at each iteration -type: integer -size: (100) - -[energy_iterations] -interface: ezfio, provider -doc: The variational energy at each iteration -type: double precision -size: (determinants.n_states,100) - -[pt2_iterations] -interface: ezfio, provider -doc: The |PT2| correction at each iteration -type: double precision -size: (determinants.n_states,100) - diff --git a/src/iterations_tc/NEED b/src/iterations_tc/NEED deleted file mode 100644 index e69de29b..00000000 diff --git a/src/iterations_tc/io.irp.f b/src/iterations_tc/io.irp.f deleted file mode 100644 index 821f5e84..00000000 --- a/src/iterations_tc/io.irp.f +++ /dev/null @@ -1,37 +0,0 @@ -BEGIN_PROVIDER [ integer, n_iter ] - implicit none - BEGIN_DOC -! number of iterations - END_DOC - - logical :: has - PROVIDE ezfio_filename - if (mpi_master) then - - double precision :: zeros(N_states,100) - integer :: izeros(100) - zeros = 0.d0 - izeros = 0 - call ezfio_set_iterations_n_iter(0) - call ezfio_set_iterations_energy_iterations(zeros) - call ezfio_set_iterations_pt2_iterations(zeros) - call ezfio_set_iterations_n_det_iterations(izeros) - n_iter = 1 - endif - IRP_IF MPI_DEBUG - print *, irp_here, mpi_rank - call MPI_BARRIER(MPI_COMM_WORLD, ierr) - IRP_ENDIF - IRP_IF MPI - include 'mpif.h' - integer :: ierr - call MPI_BCAST( n_iter, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - if (ierr /= MPI_SUCCESS) then - stop 'Unable to read n_iter with MPI' - endif - IRP_ENDIF - - call write_time(6) - -END_PROVIDER - diff --git a/src/iterations_tc/iterations.irp.f b/src/iterations_tc/iterations.irp.f deleted file mode 100644 index 2f1cf0c1..00000000 --- a/src/iterations_tc/iterations.irp.f +++ /dev/null @@ -1,43 +0,0 @@ -BEGIN_PROVIDER [ double precision, extrapolated_energy, (N_iter,N_states) ] - implicit none - BEGIN_DOC - ! Extrapolated energy, using E_var = f(PT2) where PT2=0 - END_DOC -! integer :: i - extrapolated_energy = 0.D0 -END_PROVIDER - - subroutine get_extrapolated_energy(Niter,ept2,pt1,extrap_energy) - implicit none - integer, intent(in) :: Niter - double precision, intent(in) :: ept2(Niter),pt1(Niter),extrap_energy(Niter) - call extrapolate_data(Niter,ept2,pt1,extrap_energy) - end - -subroutine save_iterations(e_, pt2_,n_) - implicit none - BEGIN_DOC -! Update the energy in the EZFIO file. - END_DOC - integer, intent(in) :: n_ - double precision, intent(in) :: e_(N_states), pt2_(N_states) - integer :: i - - if (N_iter == 101) then - do i=2,N_iter-1 - energy_iterations(1:N_states,N_iter-1) = energy_iterations(1:N_states,N_iter) - pt2_iterations(1:N_states,N_iter-1) = pt2_iterations(1:N_states,N_iter) - enddo - N_iter = N_iter-1 - TOUCH N_iter - endif - - energy_iterations(1:N_states,N_iter) = e_(1:N_states) - pt2_iterations(1:N_states,N_iter) = pt2_(1:N_states) - n_det_iterations(N_iter) = n_ - call ezfio_set_iterations_N_iter(N_iter) - call ezfio_set_iterations_energy_iterations(energy_iterations) - call ezfio_set_iterations_pt2_iterations(pt2_iterations) - call ezfio_set_iterations_n_det_iterations(n_det_iterations) -end - diff --git a/src/iterations_tc/print_extrapolation.irp.f b/src/iterations_tc/print_extrapolation.irp.f deleted file mode 100644 index cb46fb67..00000000 --- a/src/iterations_tc/print_extrapolation.irp.f +++ /dev/null @@ -1,46 +0,0 @@ -subroutine print_extrapolated_energy - implicit none - BEGIN_DOC -! Print the extrapolated energy in the output - END_DOC - - integer :: i,k - - if (N_iter< 2) then - return - endif - write(*,'(A)') '' - write(*,'(A)') 'Extrapolated energies' - write(*,'(A)') '------------------------' - write(*,'(A)') '' - - print *, '' - print *, 'State ', 1 - print *, '' - write(*,*) '=========== ', '===================' - write(*,*) 'minimum PT2 ', 'Extrapolated energy' - write(*,*) '=========== ', '===================' - do k=2,min(N_iter,8) - write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,N_iter+1-k), extrapolated_energy(k,1) - enddo - write(*,*) '=========== ', '===================' - - do i=2, min(N_states,N_det) - print *, '' - print *, 'State ', i - print *, '' - write(*,*) '=========== ', '=================== ', '=================== ', '===================' - write(*,*) 'minimum PT2 ', 'Extrapolated energy ', ' Excitation (a.u) ', ' Excitation (eV) ' - write(*,*) '=========== ', '=================== ', '=================== ', '===================' - do k=2,min(N_iter,8) - write(*,'(F11.4,X,3(X,F18.8))') pt2_iterations(i,N_iter+1-k), extrapolated_energy(k,i), & - extrapolated_energy(k,i) - extrapolated_energy(k,1), & - (extrapolated_energy(k,i) - extrapolated_energy(k,1) ) * 27.211396641308d0 - enddo - write(*,*) '=========== ', '=================== ', '=================== ', '===================' - enddo - - print *, '' - -end subroutine - diff --git a/src/iterations_tc/print_summary.irp.f b/src/iterations_tc/print_summary.irp.f deleted file mode 100644 index 8e6285e2..00000000 --- a/src/iterations_tc/print_summary.irp.f +++ /dev/null @@ -1,104 +0,0 @@ -subroutine print_summary(e_,pt2_data,pt2_data_err,n_det_,n_configuration_,n_st,s2_) - use selection_types - implicit none - BEGIN_DOC -! Print the extrapolated energy in the output - END_DOC - - integer, intent(in) :: n_det_, n_configuration_, n_st - double precision, intent(in) :: e_(n_st), s2_(n_st) - type(pt2_type) , intent(in) :: pt2_data, pt2_data_err - integer :: i, k - integer :: N_states_p - character*(9) :: pt2_string - character*(512) :: fmt - - if (do_pt2) then - pt2_string = ' ' - else - pt2_string = '(approx)' - endif - - N_states_p = min(N_det_,n_st) - - print *, '' - print '(A,I12)', 'Summary at N_det = ', N_det_ - print '(A)', '-----------------------------------' - print *, '' - - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - write(fmt,*) '(13X,', N_states_p, '(6X,A7,1X,I6,10X))' - write(*,fmt) ('State',k, k=1,N_states_p) - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - write(fmt,*) '(A13,', N_states_p, '(1X,F14.8,15X))' - write(*,fmt) '# E ', e_(1:N_states_p) - if (N_states_p > 1) then - write(*,fmt) '# Excit. (au)', e_(1:N_states_p)-e_(1) - write(*,fmt) '# Excit. (eV)', (e_(1:N_states_p)-e_(1))*27.211396641308d0 - endif - write(fmt,*) '(A13,', 2*N_states_p, '(1X,F14.8))' - write(*,fmt) '# PT2 '//pt2_string, (pt2_data % pt2(k), pt2_data_err % pt2(k), k=1,N_states_p) - write(*,fmt) '# rPT2'//pt2_string, (pt2_data % rpt2(k), pt2_data_err % rpt2(k), k=1,N_states_p) - write(*,'(A)') '#' - write(*,fmt) '# E+PT2 ', (e_(k)+pt2_data % pt2(k),pt2_data_err % pt2(k), k=1,N_states_p) - write(*,fmt) '# E+rPT2 ', (e_(k)+pt2_data % rpt2(k),pt2_data_err % rpt2(k), k=1,N_states_p) - if (N_states_p > 1) then - write(*,fmt) '# Excit. (au)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1)), & - dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1)), k=1,N_states_p) - write(*,fmt) '# Excit. (eV)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1))*27.211396641308d0, & - dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1))*27.211396641308d0, k=1,N_states_p) - endif - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - print *, '' - - print *, 'N_det = ', N_det_ - print *, 'N_states = ', n_st - if (s2_eig) then - print *, 'N_cfg = ', N_configuration_ - if (only_expected_s2) then - print *, 'N_csf = ', N_csf - endif - endif - print *, '' - - do k=1, N_states_p - print*,'* State ',k - print *, '< S^2 > = ', s2_(k) - print *, 'E = ', e_(k) - print *, 'Variance = ', pt2_data % variance(k), ' +/- ', pt2_data_err % variance(k) - print *, 'PT norm = ', dsqrt(pt2_data % overlap(k,k)), ' +/- ', 0.5d0*dsqrt(pt2_data % overlap(k,k)) * pt2_data_err % overlap(k,k) / (pt2_data % overlap(k,k)) - print *, 'PT2 = ', pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k) - print *, 'rPT2 = ', pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k) - print *, 'E+PT2 '//pt2_string//' = ', e_(k)+pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k) - print *, 'E+rPT2'//pt2_string//' = ', e_(k)+pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k) - print *, '' - enddo - - print *, '-----' - if(n_st.gt.1)then - print *, 'Variational Energy difference (au | eV)' - do i=2, N_states_p - print*,'Delta E = ', (e_(i) - e_(1)), & - (e_(i) - e_(1)) * 27.211396641308d0 - enddo - print *, '-----' - print*, 'Variational + perturbative Energy difference (au | eV)' - do i=2, N_states_p - print*,'Delta E = ', (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))), & - (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))) * 27.211396641308d0 - enddo - print *, '-----' - print*, 'Variational + renormalized perturbative Energy difference (au | eV)' - do i=2, N_states_p - print*,'Delta E = ', (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))), & - (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))) * 27.211396641308d0 - enddo - endif - -! call print_energy_components() - -end subroutine - From 67902d437776d5caeb4f9292796baafaa9ca66fe Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 24 Apr 2023 01:36:49 +0200 Subject: [PATCH 67/87] Removed print_e_conv: should be replaced by python script --- src/tools/print_e_conv.irp.f | 80 ------------------------------------ 1 file changed, 80 deletions(-) delete mode 100644 src/tools/print_e_conv.irp.f diff --git a/src/tools/print_e_conv.irp.f b/src/tools/print_e_conv.irp.f deleted file mode 100644 index e358ebc1..00000000 --- a/src/tools/print_e_conv.irp.f +++ /dev/null @@ -1,80 +0,0 @@ -program print_e_conv - implicit none - BEGIN_DOC -! program that prints in a human readable format the convergence of the CIPSI algorithm. -! -! for all istate, this program produces -! -! * a file "EZFIO.istate.conv" containing the variational and var+PT2 energies as a function of N_det -! -! * for istate > 1, a file EZFIO.istate.delta_e.conv containing the energy difference (both var and var+PT2) with the ground state as a function of N_det - END_DOC - - provide ezfio_filename - call routine_e_conv - end - -subroutine routine_e_conv - implicit none - BEGIN_DOC -! routine called by :c:func:`print_e_conv` - END_DOC - integer :: N_iter_tmp - integer :: i,istate - character*(128) :: output - integer :: i_unit_output,getUnitAndOpen - character*(128) :: filename - - integer, allocatable :: n_det_tmp(:) - call ezfio_get_iterations_N_iter(N_iter_tmp) - print*,'N_iter_tmp = ',N_iter_tmp - double precision, allocatable :: e(:,:),pt2(:,:) - allocate(e(N_states, 100),pt2(N_states, 100),n_det_tmp(100)) - call ezfio_get_iterations_energy_iterations(e) - call ezfio_get_iterations_pt2_iterations(pt2) - call ezfio_get_iterations_n_det_iterations(n_det_tmp) - - - do istate = 1, N_states - if (istate.lt.10)then - write (filename, "(I1)")istate - else - write (filename, "(I2)")istate - endif - print*,filename - output=trim(ezfio_filename)//'.'//trim(filename)//'.conv' - output=trim(output) - print*,'output = ',trim(output) - i_unit_output = getUnitAndOpen(output,'w') - write(i_unit_output,*)'# N_det E_var E_var + PT2' - do i = 1, N_iter_tmp - write(i_unit_output,'(I9,X,3(F16.10,X))')n_det_tmp(i),e(istate,i),e(istate,i) + pt2(istate,i) - enddo - enddo - - if(N_states.gt.1)then - double precision, allocatable :: deltae(:,:),deltae_pt2(:,:) - allocate(deltae(N_states,100),deltae_pt2(N_states,100)) - do i = 1, N_iter_tmp - do istate = 1, N_states - deltae(istate,i) = e(istate,i) - e(1,i) - deltae_pt2(istate,i) = e(istate,i) + pt2(istate,i) - (e(1,i) + pt2(1,i)) - enddo - enddo - do istate = 2, N_states - if (istate.lt.10)then - write (filename, "(I1)")istate - else - write (filename, "(I2)")istate - endif - output=trim(ezfio_filename)//'.'//trim(filename)//'.delta_e.conv' - print*,'output = ',trim(output) - i_unit_output = getUnitAndOpen(output,'w') - write(i_unit_output,*)'# N_det Delta E_var Delta (E_var + PT2)' - do i = 1, N_iter_tmp - write(i_unit_output,'(I9,X,100(F16.10,X))')n_det_tmp(i),deltae(istate,i),deltae_pt2(istate,i) - enddo - enddo - endif - -end From 64bfddbb00e50d46bbae11c2cc895435d298bcd0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 24 Apr 2023 10:48:21 +0200 Subject: [PATCH 68/87] Renamed scripts/exc_energy_error.py scripts/qp_exc_energy.py --- scripts/{exc_energy_error.py => qp_exc_energy.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/{exc_energy_error.py => qp_exc_energy.py} (100%) diff --git a/scripts/exc_energy_error.py b/scripts/qp_exc_energy.py similarity index 100% rename from scripts/exc_energy_error.py rename to scripts/qp_exc_energy.py From d3b76e5957aadc11f2090084491e8b90bb6a50ff Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 14 Apr 2023 10:56:07 +0200 Subject: [PATCH 69/87] changed h_p to h --- external/qp2-dependencies | 2 +- src/davidson/diagonalization_hs2_dressed.irp.f | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/external/qp2-dependencies b/external/qp2-dependencies index fd43778e..e0d0e02e 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit fd43778e12bb5858c4c780c34346be0f158b8cc7 +Subproject commit e0d0e02e9f5ece138d1520106954a881ab0b8db2 diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 8117f320..d37b7386 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -465,8 +465,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ integer :: lwork, info double precision, allocatable :: work(:) -! y = h - y = h_p + y = h lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & From c80ebe27b8d2d8592570aecb46e19fac6ca65064 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 28 Apr 2023 10:31:24 +0200 Subject: [PATCH 70/87] Introducing Cholesky-decomposed SCF --- src/ao_two_e_ints/cholesky.irp.f | 100 +++++++++ src/ao_two_e_ints/map_integrals.irp.f | 2 +- src/hartree_fock/fock_matrix_hf.irp.f | 311 ++++++++++++++++---------- src/utils/linear_algebra.irp.f | 2 +- 4 files changed, 298 insertions(+), 117 deletions(-) create mode 100644 src/ao_two_e_ints/cholesky.irp.f diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f new file mode 100644 index 00000000..d4c201aa --- /dev/null +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -0,0 +1,100 @@ +BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] + implicit none + BEGIN_DOC + ! Number of Cholesky vectors in AO basis + END_DOC + + integer :: i,j,k,l + double precision :: xnorm0, x, integral + double precision, external :: ao_two_e_integral + + cholesky_ao_num_guess = 0 + xnorm0 = 0.d0 + x = 0.d0 + do j=1,ao_num + do i=1,ao_num + integral = ao_two_e_integral(i,i,j,j) + if (integral > ao_integrals_threshold) then + cholesky_ao_num_guess += 1 + else + x += integral + endif + enddo + enddo + print *, 'Cholesky decomposition of AO integrals' + print *, '--------------------------------------' + print *, '' + print *, 'Estimated Error: ', x + print *, 'Guess size: ', cholesky_ao_num_guess, '(', 100.d0*dble(cholesky_ao_num_guess)/dble(ao_num*ao_num), ' %)' + +END_PROVIDER + + BEGIN_PROVIDER [ integer, cholesky_ao_num ] +&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, cholesky_ao_num_guess) ] + use mmap_module + implicit none + BEGIN_DOC + ! Cholesky vectors in AO basis: (ik|a): + ! = (ik|jl) = sum_a (ik|a).(a|jl) + END_DOC + + type(c_ptr) :: ptr + integer :: fd, i,j,k,l, rank + double precision, pointer :: ao_integrals(:,:,:,:) + double precision, external :: ao_two_e_integral + + ! Store AO integrals in a memory mapped file + call mmap(trim(ezfio_work_dir)//'ao_integrals', & + (/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), & + 8, fd, .False., ptr) + call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/)) + + double precision :: integral + logical, external :: ao_two_e_integral_zero + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) SCHEDULE(dynamic) + do l=1,ao_num + do j=1,l + do k=1,ao_num + do i=1,k + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = ao_two_e_integral(i,k,j,l) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! Call Lapack + cholesky_ao_num = cholesky_ao_num_guess + call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_integrals_threshold, ao_num*ao_num, cholesky_ao) + print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' + + ! Remove mmap + double precision, external :: getUnitAndOpen + call munmap( & + (/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), & + 8, fd, ptr) + open(unit=99,file=trim(ezfio_work_dir)//'ao_integrals') + close(99, status='delete') + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ] + implicit none + BEGIN_DOC +! Transposed of the Cholesky vectors in AO basis set + END_DOC + integer :: i,j,k + do j=1,ao_num + do i=1,ao_num + do k=1,ao_num + cholesky_ao_transp(k,i,j) = cholesky_ao(i,j,k) + enddo + enddo + enddo +END_PROVIDER + diff --git a/src/ao_two_e_ints/map_integrals.irp.f b/src/ao_two_e_ints/map_integrals.irp.f index fa7c29cc..7d6a7da4 100644 --- a/src/ao_two_e_ints/map_integrals.irp.f +++ b/src/ao_two_e_ints/map_integrals.irp.f @@ -486,7 +486,7 @@ subroutine get_ao_two_e_integrals(j,k,l,sze,out_val) PROVIDE ao_two_e_integrals_in_map ao_integrals_map if (ao_one_e_integral_zero(j,l)) then - out_val = 0.d0 + out_val(1:sze) = 0.d0 return endif diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f index d7d8fa7d..12641516 100644 --- a/src/hartree_fock/fock_matrix_hf.irp.f +++ b/src/hartree_fock/fock_matrix_hf.irp.f @@ -15,115 +15,59 @@ double precision, allocatable :: ao_two_e_integral_alpha_tmp(:,:) double precision, allocatable :: ao_two_e_integral_beta_tmp(:,:) - ao_two_e_integral_alpha = 0.d0 - ao_two_e_integral_beta = 0.d0 - if (do_direct_integrals) then + if (.True.) then ! Use Cholesky-decomposed integrals + ao_two_e_integral_alpha(:,:) = ao_two_e_integral_alpha_chol(:,:) + ao_two_e_integral_beta (:,:) = ao_two_e_integral_beta_chol (:,:) - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,keys,values,p,q,r,s,i0,j0,k0,l0, & - !$OMP ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp, c0, c1, c2, & - !$OMP local_threshold)& - !$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,& - !$OMP ao_integrals_map,ao_integrals_threshold, ao_two_e_integral_schwartz, & - !$OMP ao_two_e_integral_alpha, ao_two_e_integral_beta) + else ! Use integrals in AO basis set - allocate(keys(1), values(1)) - allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), & - ao_two_e_integral_beta_tmp(ao_num,ao_num)) - ao_two_e_integral_alpha_tmp = 0.d0 - ao_two_e_integral_beta_tmp = 0.d0 + ao_two_e_integral_alpha = 0.d0 + ao_two_e_integral_beta = 0.d0 + if (do_direct_integrals) then - q = ao_num*ao_num*ao_num*ao_num - !$OMP DO SCHEDULE(static,64) - do p=1_8,q - call two_e_integrals_index_reverse(kk,ii,ll,jj,p) - if ( (kk(1)>ao_num).or. & - (ii(1)>ao_num).or. & - (jj(1)>ao_num).or. & - (ll(1)>ao_num) ) then - cycle - endif - k = kk(1) - i = ii(1) - l = ll(1) - j = jj(1) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,keys,values,p,q,r,s,i0,j0,k0,l0,& + !$OMP ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp, c0, c1, c2,& + !$OMP local_threshold) & + !$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,& + !$OMP ao_integrals_map,ao_integrals_threshold, ao_two_e_integral_schwartz,& + !$OMP ao_two_e_integral_alpha, ao_two_e_integral_beta) - logical, external :: ao_two_e_integral_zero - if (ao_two_e_integral_zero(i,k,j,l)) then - cycle - endif - local_threshold = ao_two_e_integral_schwartz(k,l)*ao_two_e_integral_schwartz(i,j) - if (local_threshold < ao_integrals_threshold) then - cycle - endif - i0 = i - j0 = j - k0 = k - l0 = l - values(1) = 0.d0 - local_threshold = ao_integrals_threshold/local_threshold - do k2=1,8 - if (kk(k2)==0) then - cycle - endif - i = ii(k2) - j = jj(k2) - k = kk(k2) - l = ll(k2) - c0 = SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l) - c1 = SCF_density_matrix_ao_alpha(k,i) - c2 = SCF_density_matrix_ao_beta(k,i) - if ( dabs(c0)+dabs(c1)+dabs(c2) < local_threshold) then - cycle - endif - if (values(1) == 0.d0) then - values(1) = ao_two_e_integral(k0,l0,i0,j0) - endif - integral = c0 * values(1) - ao_two_e_integral_alpha_tmp(i,j) += integral - ao_two_e_integral_beta_tmp (i,j) += integral - integral = values(1) - ao_two_e_integral_alpha_tmp(l,j) -= c1 * integral - ao_two_e_integral_beta_tmp (l,j) -= c2 * integral - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp - ao_two_e_integral_beta += ao_two_e_integral_beta_tmp - !$OMP END CRITICAL - deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp) - !$OMP END PARALLEL - else - PROVIDE ao_two_e_integrals_in_map + allocate(keys(1), values(1)) + allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), & + ao_two_e_integral_beta_tmp(ao_num,ao_num)) + ao_two_e_integral_alpha_tmp = 0.d0 + ao_two_e_integral_beta_tmp = 0.d0 - integer(omp_lock_kind) :: lck(ao_num) - integer(map_size_kind) :: i8 - integer :: ii(8), jj(8), kk(8), ll(8), k2 - integer(cache_map_size_kind) :: n_elements_max, n_elements - integer(key_kind), allocatable :: keys(:) - double precision, allocatable :: values(:) - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,n_elements_max, & - !$OMP n_elements,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)& - !$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,& - !$OMP ao_integrals_map, ao_two_e_integral_alpha, ao_two_e_integral_beta) - - call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max) - allocate(keys(n_elements_max), values(n_elements_max)) - allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), & - ao_two_e_integral_beta_tmp(ao_num,ao_num)) - ao_two_e_integral_alpha_tmp = 0.d0 - ao_two_e_integral_beta_tmp = 0.d0 - - !$OMP DO SCHEDULE(static,1) - do i8=0_8,ao_integrals_map%map_size - n_elements = n_elements_max - call get_cache_map(ao_integrals_map,i8,keys,values,n_elements) - do k1=1,n_elements - call two_e_integrals_index_reverse(kk,ii,ll,jj,keys(k1)) + q = ao_num*ao_num*ao_num*ao_num + !$OMP DO SCHEDULE(static,64) + do p=1_8,q + call two_e_integrals_index_reverse(kk,ii,ll,jj,p) + if ( (kk(1)>ao_num).or. & + (ii(1)>ao_num).or. & + (jj(1)>ao_num).or. & + (ll(1)>ao_num) ) then + cycle + endif + k = kk(1) + i = ii(1) + l = ll(1) + j = jj(1) + logical, external :: ao_two_e_integral_zero + if (ao_two_e_integral_zero(i,k,j,l)) then + cycle + endif + local_threshold = ao_two_e_integral_schwartz(k,l)*ao_two_e_integral_schwartz(i,j) + if (local_threshold < ao_integrals_threshold) then + cycle + endif + i0 = i + j0 = j + k0 = k + l0 = l + values(1) = 0.d0 + local_threshold = ao_integrals_threshold/local_threshold do k2=1,8 if (kk(k2)==0) then cycle @@ -132,25 +76,162 @@ j = jj(k2) k = kk(k2) l = ll(k2) - integral = (SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l)) * values(k1) + c0 = SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l) + c1 = SCF_density_matrix_ao_alpha(k,i) + c2 = SCF_density_matrix_ao_beta(k,i) + if ( dabs(c0)+dabs(c1)+dabs(c2) < local_threshold) then + cycle + endif + if (values(1) == 0.d0) then + values(1) = ao_two_e_integral(k0,l0,i0,j0) + endif + integral = c0 * values(1) ao_two_e_integral_alpha_tmp(i,j) += integral ao_two_e_integral_beta_tmp (i,j) += integral - integral = values(k1) - ao_two_e_integral_alpha_tmp(l,j) -= SCF_density_matrix_ao_alpha(k,i) * integral - ao_two_e_integral_beta_tmp (l,j) -= SCF_density_matrix_ao_beta (k,i) * integral + integral = values(1) + ao_two_e_integral_alpha_tmp(l,j) -= c1 * integral + ao_two_e_integral_beta_tmp (l,j) -= c2 * integral enddo enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp - ao_two_e_integral_beta += ao_two_e_integral_beta_tmp - !$OMP END CRITICAL - deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp) - !$OMP END PARALLEL + !$OMP END DO NOWAIT + !$OMP CRITICAL + ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp + ao_two_e_integral_beta += ao_two_e_integral_beta_tmp + !$OMP END CRITICAL + deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp) + !$OMP END PARALLEL + else + PROVIDE ao_two_e_integrals_in_map + integer(omp_lock_kind) :: lck(ao_num) + integer(map_size_kind) :: i8 + integer :: ii(8), jj(8), kk(8), ll(8), k2 + integer(cache_map_size_kind) :: n_elements_max, n_elements + integer(key_kind), allocatable :: keys(:) + double precision, allocatable :: values(:) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,n_elements_max,& + !$OMP n_elements,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)& + !$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,& + !$OMP ao_integrals_map, ao_two_e_integral_alpha, ao_two_e_integral_beta) + + call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max) + allocate(keys(n_elements_max), values(n_elements_max)) + allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), & + ao_two_e_integral_beta_tmp(ao_num,ao_num)) + ao_two_e_integral_alpha_tmp = 0.d0 + ao_two_e_integral_beta_tmp = 0.d0 + + !$OMP DO SCHEDULE(static,1) + do i8=0_8,ao_integrals_map%map_size + n_elements = n_elements_max + call get_cache_map(ao_integrals_map,i8,keys,values,n_elements) + do k1=1,n_elements + call two_e_integrals_index_reverse(kk,ii,ll,jj,keys(k1)) + + do k2=1,8 + if (kk(k2)==0) then + cycle + endif + i = ii(k2) + j = jj(k2) + k = kk(k2) + l = ll(k2) + integral = (SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l)) * values(k1) + ao_two_e_integral_alpha_tmp(i,j) += integral + ao_two_e_integral_beta_tmp (i,j) += integral + integral = values(k1) + ao_two_e_integral_alpha_tmp(l,j) -= SCF_density_matrix_ao_alpha(k,i) * integral + ao_two_e_integral_beta_tmp (l,j) -= SCF_density_matrix_ao_beta (k,i) * integral + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp + ao_two_e_integral_beta += ao_two_e_integral_beta_tmp + !$OMP END CRITICAL + deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp) + !$OMP END PARALLEL + + endif endif +END_PROVIDER + + BEGIN_PROVIDER [ double precision, ao_two_e_integral_alpha_chol, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_two_e_integral_beta_chol , (ao_num, ao_num) ] + use map_module + implicit none + BEGIN_DOC + ! Alpha and Beta Fock matrices in AO basis set + END_DOC + + integer :: m,n,l,s,j + double precision :: integral + double precision, allocatable :: X(:), X2(:,:,:,:), X3(:,:,:,:) + + allocate (X(cholesky_ao_num)) + + + ! X(j) = \sum_{mn} SCF_density_matrix_ao(m,n) * cholesky_ao(m,n,j) + call dgemm('T','N',cholesky_ao_num,1,ao_num*ao_num,1.d0, & + cholesky_ao, ao_num*ao_num, & + SCF_density_matrix_ao, ao_num*ao_num,0.d0, & + X, cholesky_ao_num) +! + + ! ao_two_e_integral_alpha(m,n) = \sum_{j} cholesky_ao(m,n,j) * X(j) + call dgemm('N','N',ao_num*ao_num,1,cholesky_ao_num, 1.d0, & + cholesky_ao, ao_num*ao_num, & + X, cholesky_ao_num, 0.d0, & + ao_two_e_integral_alpha_chol, ao_num*ao_num) + + deallocate(X) + + ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol + + + allocate(X2(ao_num,ao_num,cholesky_ao_num,2)) + +! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j) + + call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, & + SCF_density_matrix_ao_alpha, ao_num, & + cholesky_ao, ao_num, 0.d0, & + X2(1,1,1,1), ao_num) + + call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, & + SCF_density_matrix_ao_beta, ao_num, & + cholesky_ao, ao_num, 0.d0, & + X2(1,1,1,2), ao_num) + + allocate(X3(ao_num,cholesky_ao_num,ao_num,2)) + + do s=1,ao_num + do j=1,cholesky_ao_num + do m=1,ao_num + X3(m,j,s,1) = X2(m,s,j,1) + X3(m,j,s,2) = X2(m,s,j,2) + enddo + enddo + enddo + + deallocate(X2) + + call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, & + cholesky_ao, ao_num, & + X3(1,1,1,1), ao_num*cholesky_ao_num, 1.d0, & + ao_two_e_integral_alpha_chol, ao_num) + + call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, & + cholesky_ao, ao_num, & + X3(1,1,1,2), ao_num*cholesky_ao_num, 1.d0, & + ao_two_e_integral_beta_chol, ao_num) + + deallocate(X3) + END_PROVIDER BEGIN_PROVIDER [ double precision, Fock_matrix_ao_alpha, (ao_num, ao_num) ] diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index c02560e3..3b43d607 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1854,7 +1854,7 @@ do k = 1, N end do ! TODO: It should be possible to use only one vector of size (1:rank) as a buffer ! to do the swapping in-place -U = 0.00D+0 +U(:,:) = 0.00D+0 do k = 1, N l = piv(k) U(l, :) = A(1:rank, k) From bfdfb546bd29d6d16d3cb2024f707f758c89a68f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 4 May 2023 11:41:17 +0200 Subject: [PATCH 71/87] Fix pt2_max extra iterations --- src/cipsi/cipsi.irp.f | 8 +------- src/cipsi/stochastic_cipsi.irp.f | 8 +------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/src/cipsi/cipsi.irp.f b/src/cipsi/cipsi.irp.f index 88aaeae0..1279b5cd 100644 --- a/src/cipsi/cipsi.irp.f +++ b/src/cipsi/cipsi.irp.f @@ -128,13 +128,7 @@ subroutine run_cipsi if (qp_stop()) exit enddo - if (.not.qp_stop()) then - if (N_det < N_det_max) then - call diagonalize_CI - call save_wavefunction - call save_energy(psi_energy_with_nucl_rep, zeros) - endif - + if ((.not.qp_stop()).and.(N_det > N_det_max)) then if (do_pt2) then call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f index b83e658a..deffaeb7 100644 --- a/src/cipsi/stochastic_cipsi.irp.f +++ b/src/cipsi/stochastic_cipsi.irp.f @@ -118,13 +118,7 @@ subroutine run_stochastic_cipsi if (qp_stop()) exit enddo - if (.not.qp_stop()) then - if (N_det < N_det_max) then - call diagonalize_CI - call save_wavefunction - call save_energy(psi_energy_with_nucl_rep, zeros) - endif - + if ((.not.qp_stop()).and.(N_det > N_det_max)) then call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) call pt2_alloc(pt2_data, N_states) From ba2e783e8c6531ddeda40ac72d223a1d1ee67dcc Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 4 May 2023 11:41:17 +0200 Subject: [PATCH 72/87] Fix pt2_max extra iterations --- src/cipsi/cipsi.irp.f | 8 +------- src/cipsi/stochastic_cipsi.irp.f | 8 +------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/src/cipsi/cipsi.irp.f b/src/cipsi/cipsi.irp.f index 5225c6df..3b344f62 100644 --- a/src/cipsi/cipsi.irp.f +++ b/src/cipsi/cipsi.irp.f @@ -130,13 +130,7 @@ subroutine run_cipsi if (qp_stop()) exit enddo - if (.not.qp_stop()) then - if (N_det < N_det_max) then - call diagonalize_CI - call save_wavefunction - call save_energy(psi_energy_with_nucl_rep, zeros) - endif - + if ((.not.qp_stop()).and.(N_det > N_det_max)) then if (do_pt2) then call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f index 35e80eb8..6a14fbf3 100644 --- a/src/cipsi/stochastic_cipsi.irp.f +++ b/src/cipsi/stochastic_cipsi.irp.f @@ -119,13 +119,7 @@ subroutine run_stochastic_cipsi if (qp_stop()) exit enddo - if (.not.qp_stop()) then - if (N_det < N_det_max) then - call diagonalize_CI - call save_wavefunction - call save_energy(psi_energy_with_nucl_rep, zeros) - endif - + if ((.not.qp_stop()).and.(N_det > N_det_max)) then call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) call pt2_alloc(pt2_data, N_states) From e8782546c1387bfc030f5e659d6a914d038d280f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 4 May 2023 11:54:29 +0200 Subject: [PATCH 73/87] Better fix for pt2_max --- src/cipsi/cipsi.irp.f | 8 +++++++- src/cipsi/stochastic_cipsi.irp.f | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/cipsi/cipsi.irp.f b/src/cipsi/cipsi.irp.f index 3b344f62..f3a77609 100644 --- a/src/cipsi/cipsi.irp.f +++ b/src/cipsi/cipsi.irp.f @@ -130,7 +130,13 @@ subroutine run_cipsi if (qp_stop()) exit enddo - if ((.not.qp_stop()).and.(N_det > N_det_max)) then + ! If stopped because N_det > N_det_max, do an extra iteration to compute the PT2 + if ((.not.qp_stop()).and. & + (N_det > N_det_max) .and. & + (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and. & + (maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and.& + (correlation_energy_ratio <= correlation_energy_ratio_max) & + ) then if (do_pt2) then call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f index 6a14fbf3..0b9a3548 100644 --- a/src/cipsi/stochastic_cipsi.irp.f +++ b/src/cipsi/stochastic_cipsi.irp.f @@ -119,7 +119,13 @@ subroutine run_stochastic_cipsi if (qp_stop()) exit enddo - if ((.not.qp_stop()).and.(N_det > N_det_max)) then + ! If stopped because N_det > N_det_max, do an extra iteration to compute the PT2 + if ((.not.qp_stop()).and. & + (N_det > N_det_max) .and. & + (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and. & + (maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and.& + (correlation_energy_ratio <= correlation_energy_ratio_max) & + ) then call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) call pt2_alloc(pt2_data, N_states) From 0330ac6ccc4eb24deaef70632aa9dcd1e8fabfaa Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 4 May 2023 15:50:40 +0200 Subject: [PATCH 74/87] 4idx transformation with cholesky --- config/ifort_2021_xHost.cfg | 2 +- src/ao_two_e_ints/EZFIO.cfg | 5 ++ src/hartree_fock/fock_matrix_hf.irp.f | 2 +- src/mo_two_e_ints/cholesky.irp.f | 16 ++++++ src/mo_two_e_ints/mo_bi_integrals.irp.f | 74 +++++++++++++++++++++++-- 5 files changed, 91 insertions(+), 8 deletions(-) create mode 100644 src/mo_two_e_ints/cholesky.irp.f diff --git a/config/ifort_2021_xHost.cfg b/config/ifort_2021_xHost.cfg index 1161833b..9170b059 100644 --- a/config/ifort_2021_xHost.cfg +++ b/config/ifort_2021_xHost.cfg @@ -6,7 +6,7 @@ # --align=32 : Align all provided arrays on a 32-byte boundary # [COMMON] -FC : ifort -fpic +FC : ifort -fpic -diag-disable 5462 LAPACK_LIB : -mkl=parallel IRPF90 : irpf90 IRPF90_FLAGS : --ninja --align=64 -DINTEL diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index b18c65d1..caed4698 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -18,3 +18,8 @@ interface: ezfio,provider,ocaml default: False ezfio_name: direct +[do_ao_cholesky] +type: logical +doc: Perform Cholesky decomposition of AO integrals +interface: ezfio,provider,ocaml +default: True diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f index 12641516..8c6658c5 100644 --- a/src/hartree_fock/fock_matrix_hf.irp.f +++ b/src/hartree_fock/fock_matrix_hf.irp.f @@ -15,7 +15,7 @@ double precision, allocatable :: ao_two_e_integral_alpha_tmp(:,:) double precision, allocatable :: ao_two_e_integral_beta_tmp(:,:) - if (.True.) then ! Use Cholesky-decomposed integrals + if (do_ao_cholesky) then ! Use Cholesky-decomposed integrals ao_two_e_integral_alpha(:,:) = ao_two_e_integral_alpha_chol(:,:) ao_two_e_integral_beta (:,:) = ao_two_e_integral_beta_chol (:,:) diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f new file mode 100644 index 00000000..14d3c696 --- /dev/null +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -0,0 +1,16 @@ +BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis + END_DOC + + integer :: k + + !$OMP PARALLEL DO PRIVATE(k) + do k=1,cholesky_ao_num + call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num) + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index ae299e9f..b7ef901d 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -50,13 +50,16 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] call cpu_time(cpu_1) if(no_vvvv_integrals)then -! call four_idx_novvvv call four_idx_novvvv_old else - if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then - call four_idx_dgemm + if (do_ao_cholesky) then + call add_integrals_to_map_cholesky else - call add_integrals_to_map(full_ijkl_bitmask_4) + if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then + call four_idx_dgemm + else + call add_integrals_to_map(full_ijkl_bitmask_4) + endif endif endif @@ -175,7 +178,7 @@ subroutine add_integrals_to_map(mask_ijkl) implicit none BEGIN_DOC - ! Adds integrals to tha MO map according to some bitmask + ! Adds integrals to the MO map according to some bitmask END_DOC integer(bit_kind), intent(in) :: mask_ijkl(N_int,4) @@ -450,13 +453,72 @@ subroutine add_integrals_to_map(mask_ijkl) end +subroutine add_integrals_to_map_cholesky + use bitmasks + implicit none + + BEGIN_DOC + ! Adds integrals to the MO map using Cholesky vectors + END_DOC + + integer :: i,j,k,l,m + integer :: size_buffer, n_integrals + size_buffer = min(mo_num*mo_num*mo_num,16000000) + + double precision, allocatable :: Vtmp(:,:,:,:) + integer(key_kind) , allocatable :: buffer_i(:) + real(integral_kind), allocatable :: buffer_value(:) + + if (.True.) then + ! In-memory transformation + + allocate (Vtmp(mo_num,mo_num,mo_num,mo_num)) + + call dgemm('N','T',mo_num*mo_num,mo_num*mo_num,cholesky_ao_num,1.d0, & + cholesky_mo, mo_num*mo_num, & + cholesky_mo, mo_num*mo_num, 0.d0, & + Vtmp, mo_num*mo_num) + + !$OMP PARALLEL PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i) + allocate (buffer_i(size_buffer), buffer_value(size_buffer)) + n_integrals = 0 + !$OMP DO + do l=1,mo_num + do k=1,l + do j=1,mo_num + do i=1,j + if (abs(Vtmp(i,j,k,l)) > mo_integrals_threshold) then + n_integrals += 1 + buffer_value(n_integrals) = Vtmp(i,j,k,l) + !DIR$ FORCEINLINE + call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals)) + if (n_integrals == size_buffer) then + call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) + n_integrals = 0 + endif + endif + enddo + enddo + enddo + enddo + !$OMP END DO + call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) + deallocate(buffer_i, buffer_value) + !$OMP END PARALLEL + + deallocate(Vtmp) + call map_unique(mo_integrals_map) + + endif + +end subroutine add_integrals_to_map_three_indices(mask_ijk) use bitmasks implicit none BEGIN_DOC - ! Adds integrals to tha MO map according to some bitmask + ! Adds integrals to the MO map according to some bitmask END_DOC integer(bit_kind), intent(in) :: mask_ijk(N_int,3) From c96e7c754e609fbc7391451a2d6e9588997b5958 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 5 May 2023 12:25:37 +0200 Subject: [PATCH 75/87] mo_num -> n_core_inact_act_orb in RDMs --- src/two_body_rdm/two_e_dm_mo.irp.f | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/two_body_rdm/two_e_dm_mo.irp.f b/src/two_body_rdm/two_e_dm_mo.irp.f index 7e35fc7b..6bd115a2 100644 --- a/src/two_body_rdm/two_e_dm_mo.irp.f +++ b/src/two_body_rdm/two_e_dm_mo.irp.f @@ -16,13 +16,13 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] two_e_dm_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb,istate - do l=1,mo_num + do l=1,n_core_inact_act_orb lorb = list_core_inact_act(l) - do k=1,mo_num + do k=1,n_core_inact_act_orb korb = list_core_inact_act(k) - do j=1,mo_num + do j=1,n_core_inact_act_orb jorb = list_core_inact_act(j) - do i=1,mo_num + do i=1,n_core_inact_act_orb iorb = list_core_inact_act(i) two_e_dm_mo(iorb,jorb,korb,lorb) = state_av_full_occ_2_rdm_spin_trace_mo(i,j,k,l) enddo From f314c5abc291144eab0d76a591e73166ce90fa05 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 10 May 2023 11:14:57 +0200 Subject: [PATCH 76/87] Added qp_json --- scripts/qp_exc_energy.py | 18 ++++++----- scripts/utility/qp_json.py | 66 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 8 deletions(-) create mode 100644 scripts/utility/qp_json.py diff --git a/scripts/qp_exc_energy.py b/scripts/qp_exc_energy.py index ba9d7917..7e7f1d67 100755 --- a/scripts/qp_exc_energy.py +++ b/scripts/qp_exc_energy.py @@ -42,13 +42,15 @@ import sys, os import scipy import scipy.stats from math import sqrt, gamma, exp -import json +import qp_json -def read_data(filename,state): +def read_data(ezfio_filename,state): """ Read energies and PT2 from input file """ - with open(filename,'r') as f: - lines = json.load(f)['fci'] + data = qp_json.load_last(ezfio_filename) + for method in data.keys(): + x = data[method] + lines = x print(f"State: {state}") @@ -138,15 +140,15 @@ def compute(data): return mu, err, bias, p -filename = sys.argv[1] -print(filename) +ezfio_filename = sys.argv[1] +print(ezfio_filename) if len(sys.argv) > 2: state = int(sys.argv[2]) else: state = 1 -data = read_data(filename,state) +data = read_data(ezfio_filename,state) mu, err, bias, _ = compute(data) -print(" %s: %8.3f +/- %5.3f eV\n"%(filename, mu, err)) +print(" %s: %8.3f +/- %5.3f eV\n"%(ezfio_filename, mu, err)) import numpy as np A = np.array( [ [ data[-1][1], 1. ], diff --git a/scripts/utility/qp_json.py b/scripts/utility/qp_json.py new file mode 100644 index 00000000..09ffe1be --- /dev/null +++ b/scripts/utility/qp_json.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +import os +import json + +def fix_json(s): + """Properly termitates an incomplete JSON file""" + + s = s.replace(' ','') + s = s.replace('\n','') + s = s.replace('\t','') + s = s.replace(",{}",'') + tmp = [ c for c in s if c in "[]{}" ] + tmp = "".join(tmp) + tmp_old = "" + while tmp != tmp_old: + tmp_old = tmp + tmp = tmp.replace("{}","") + tmp = tmp.replace("[]","") + while s[-1] in [ ',', '\n', ' ', '\t' ]: + s = s[:-1] + tmp = [ c for c in tmp ] + tmp.reverse() + for c in tmp: + if c == '[': s += "]" + elif c == '{': s += "}" + return s + + +def load(filename): + """Loads a JSON file after calling the fix_json function.""" + with open(filename,'r') as f: + data = f.read() + new_data = fix_json(data) + return json.loads(new_data) + + +def load_all(ezfio_filename): + """Loads all JSON files of an EZFIO.""" + d = {} + prefix = ezfio_filename+'/json/' + for filename in [ x for x in os.listdir(prefix) if x.endswith(".json")]: + d[filename] = load(prefix+filename) + return d + + +def load_last(ezfio_filename): + """Loads last JSON file of an EZFIO.""" + d = {} + prefix = ezfio_filename+'/json/' + l = [ x for x in os.listdir(prefix) if x.endswith(".json")] + l.sort() + filename = l[-1] + print(filename) + return load(prefix+filename) + + +def fix(ezfio_filename): + """Fixes all JSON files in an EZFIO.""" + d = load_all(ezfio_filename) + prefix = ezfio_filename+'/json/' + for filename in d.keys(): + with open(prefix+filename, 'w') as json_file: + json.dump(d[filename], json_file) + + + From 20bed4f44a85e5717ac535cd98360d55119518ac Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 10 May 2023 12:22:56 +0200 Subject: [PATCH 77/87] Fix reversed print of minimum PT2 in extrapolations --- src/iterations/print_extrapolation.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/iterations/print_extrapolation.irp.f b/src/iterations/print_extrapolation.irp.f index 111429bf..a7f85693 100644 --- a/src/iterations/print_extrapolation.irp.f +++ b/src/iterations/print_extrapolation.irp.f @@ -25,7 +25,7 @@ subroutine print_extrapolated_energy write(*,*) 'minimum PT2 ', 'Extrapolated energy' write(*,*) '=========== ', '===================' do k=2,N_iter_p - write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,k), extrapolated_energy(k,1) + write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,N_iter_p+1-k), extrapolated_energy(k,1) enddo write(*,*) '=========== ', '===================' From 52da1de877934f398d76d557457a0b1ae5c8e345 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 10 May 2023 12:54:02 +0200 Subject: [PATCH 78/87] qp_extract_cipsi_data.py uses qp_json --- scripts/qp_extract_cipsi_data.py | 80 +++++++++++++------------------- 1 file changed, 31 insertions(+), 49 deletions(-) diff --git a/scripts/qp_extract_cipsi_data.py b/scripts/qp_extract_cipsi_data.py index 70935d73..200ab7aa 100755 --- a/scripts/qp_extract_cipsi_data.py +++ b/scripts/qp_extract_cipsi_data.py @@ -1,55 +1,37 @@ #!/usr/bin/env python3 -import re +import qp_json import sys -# Read output file -with open(sys.argv[1], 'r') as file: - output = file.read() +if len(sys.argv) == 1: + print(f"syntax: {sys.argv[0]} EZFIO_FILE") + +d = qp_json.load_all(sys.argv[1]) + +k = [ x for x in d.keys() ] +k.sort() + +print("# Energy PT2 PT2_err rPT2 rPT2_err exFCI\n") +for f in k: + try: + j = d[f]["fci"] + except: + continue + + print(f"# {f}") + for e in j: + + out = f" {e['n_det']:8d}" + + nstates = len(e["states"]) + for ee in e["states"]: + try: + exc_energy = ee['ex_energy'][0] + except: + exc_energy = 0. + out += f" {ee['energy']:16.8f} {ee['pt2']:e} {ee['pt2_err']:e} {ee['rpt2']:e} {ee['rpt2_err']:e} {exc_energy:16.8f}" + print(out) + + print("\n") -def extract_data(output): - lines = output.split("\n") - data = [] - - n_det = None - e = None - pt2 = None - err_pt2 = None - rpt2 = None - err_rpt2 = None - e_ex = None - - - reading = False - for iline, line in enumerate(lines): - if line.startswith("Summary at N_det"): - reading = False - - if not reading and line.startswith(" N_det "): - n_det = int(re.search(r"N_det\s+=\s+(\d+)", line).group(1)) - reading = True - - if reading: - if line.startswith(" E "): - e = float(re.search(r"E\s+=\s+(-?\d+\.\d+)", line).group(1)) - elif line.startswith(" PT2 "): - pt2 = float(re.search(r"PT2\s+=\s+(-?\d+\.\d+E?.\d*)", line).group(1)) - err_pt2 = float(re.search(r"\+/-\s+(-?\d+\.\d+E?.\d*)", line).group(1)) - elif line.startswith(" rPT2 "): - rpt2 = float(re.search(r"rPT2\s+=\s+(-?\d+\.\d+E?.\d*)", line).group(1)) - err_rpt2 = float(re.search(r"\+/-\s+(-?\d+\.\d+E?.\d*)", line).group(1)) - elif "minimum PT2 Extrapolated energy" in line: - e_ex_line = lines[iline+2] - e_ex = float(e_ex_line.split()[1]) - reading = False - new_data = " {:8d} {:16.8f} {:e} {:e} {:e} {:e} {:16.8f}".format(n_det, e, pt2, err_pt2, rpt2, err_rpt2, e_ex) - data.append(new_data) - n_det = e = pt2 = err_pt2 = rpt2 = err_rpt2 = e_ex = None - - return data - -data = extract_data(output) - -for item in data: - print(item) From 46e3faed3cdc90fa3c6a82bbd40655378b438ea7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 10 May 2023 14:44:45 +0200 Subject: [PATCH 79/87] allow no basis set --- data/basis/none | 5 + ocaml/Input_ao_basis.ml | 8 +- ocaml/qp_create_ezfio.ml | 262 ++++++++++++----------- scripts/ezfio_interface/qp_edit_template | 36 ++-- src/ao_two_e_ints/EZFIO.cfg | 2 +- src/mo_basis/utils.irp.f | 5 +- 6 files changed, 161 insertions(+), 157 deletions(-) create mode 100644 data/basis/none diff --git a/data/basis/none b/data/basis/none new file mode 100644 index 00000000..df5d59f1 --- /dev/null +++ b/data/basis/none @@ -0,0 +1,5 @@ +$DATA + +HYDROGEN + +$END diff --git a/ocaml/Input_ao_basis.ml b/ocaml/Input_ao_basis.ml index 95d37a7a..841089ea 100644 --- a/ocaml/Input_ao_basis.ml +++ b/ocaml/Input_ao_basis.ml @@ -247,8 +247,7 @@ end = struct let read () = - if (Ezfio.has_ao_basis_ao_basis ()) then - begin + try let result = { ao_basis = read_ao_basis (); ao_num = read_ao_num () ; @@ -267,9 +266,8 @@ end = struct |> MD5.to_string |> Ezfio.set_ao_basis_ao_md5 ; Some result - end - else - None + with + | _ -> (Ezfio.set_ao_basis_ao_md5 "None" ; None) ;; diff --git a/ocaml/qp_create_ezfio.ml b/ocaml/qp_create_ezfio.ml index 4583b118..8e452762 100644 --- a/ocaml/qp_create_ezfio.ml +++ b/ocaml/qp_create_ezfio.ml @@ -478,6 +478,7 @@ let run ?o b au c d m p cart xyz_file = let nmax = Nucl_number.get_max () in + let rec do_work (accu:(Atom.t*Nucl_number.t) list) (n:int) = function | [] -> accu | e::tail -> @@ -520,141 +521,144 @@ let run ?o b au c d m p cart xyz_file = in let long_basis = Long_basis.of_basis basis in let ao_num = List.length long_basis in - Ezfio.set_ao_basis_ao_num ao_num; - Ezfio.set_ao_basis_ao_basis b; - Ezfio.set_basis_basis b; - let ao_prim_num = list_map (fun (_,g,_) -> List.length g.Gto.lc) long_basis - and ao_nucl = list_map (fun (_,_,n) -> Nucl_number.to_int n) long_basis - and ao_power= - let l = list_map (fun (x,_,_) -> x) long_basis in - (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.x)) l)@ - (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.y)) l)@ - (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.z)) l) - in - let ao_prim_num_max = List.fold_left (fun s x -> - if x > s then x - else s) 0 ao_prim_num - in - let gtos = - list_map (fun (_,x,_) -> x) long_basis - in - - let create_expo_coef ec = - let coefs = - begin match ec with - | `Coefs -> list_map (fun x-> - list_map (fun (_,coef) -> - AO_coef.to_float coef) x.Gto.lc) gtos - | `Expos -> list_map (fun x-> - list_map (fun (prim,_) -> AO_expo.to_float - prim.GaussianPrimitive.expo) x.Gto.lc) gtos - end + if ao_num > 0 then + begin + Ezfio.set_ao_basis_ao_num ao_num; + Ezfio.set_ao_basis_ao_basis b; + Ezfio.set_basis_basis b; + let ao_prim_num = list_map (fun (_,g,_) -> List.length g.Gto.lc) long_basis + and ao_nucl = list_map (fun (_,_,n) -> Nucl_number.to_int n) long_basis + and ao_power= + let l = list_map (fun (x,_,_) -> x) long_basis in + (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.x)) l)@ + (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.y)) l)@ + (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.z)) l) in - let rec get_n n accu = function - | [] -> List.rev accu - | h::tail -> - let y = - begin match List.nth_opt h n with - | Some x -> x - | None -> 0. + let ao_prim_num_max = List.fold_left (fun s x -> + if x > s then x + else s) 0 ao_prim_num + in + let gtos = + list_map (fun (_,x,_) -> x) long_basis + in + + let create_expo_coef ec = + let coefs = + begin match ec with + | `Coefs -> list_map (fun x-> + list_map (fun (_,coef) -> + AO_coef.to_float coef) x.Gto.lc) gtos + | `Expos -> list_map (fun x-> + list_map (fun (prim,_) -> AO_expo.to_float + prim.GaussianPrimitive.expo) x.Gto.lc) gtos end - in - get_n n (y::accu) tail + in + let rec get_n n accu = function + | [] -> List.rev accu + | h::tail -> + let y = + begin match List.nth_opt h n with + | Some x -> x + | None -> 0. + end + in + get_n n (y::accu) tail + in + let rec build accu = function + | n when n=ao_prim_num_max -> accu + | n -> build ( accu @ (get_n n [] coefs) ) (n+1) + in + build [] 0 in - let rec build accu = function - | n when n=ao_prim_num_max -> accu - | n -> build ( accu @ (get_n n [] coefs) ) (n+1) - in - build [] 0 - in - let ao_coef = create_expo_coef `Coefs - and ao_expo = create_expo_coef `Expos - in - let () = - let shell_num = List.length basis in - let lc : (GaussianPrimitive.t * Qptypes.AO_coef.t) list list = - list_map ( fun (g,_) -> g.Gto.lc ) basis - in - let ang_mom = - list_map (fun (l : (GaussianPrimitive.t * Qptypes.AO_coef.t) list) -> - let x, _ = List.hd l in - Angmom.to_l x.GaussianPrimitive.sym |> Qptypes.Positive_int.to_int - ) lc - in - let expo = - list_map (fun l -> list_map (fun (x,_) -> Qptypes.AO_expo.to_float x.GaussianPrimitive.expo) l ) lc - |> List.concat - in - let coef = - list_map (fun l -> - list_map (fun (_,x) -> Qptypes.AO_coef.to_float x) l - ) lc - |> List.concat - in - let shell_prim_num = - list_map List.length lc - in - let shell_idx = - let rec make_list n accu = function - | 0 -> accu - | i -> make_list n (n :: accu) (i-1) + let ao_coef = create_expo_coef `Coefs + and ao_expo = create_expo_coef `Expos in - let rec aux count accu = function - | [] -> List.rev accu - | l::rest -> - let new_l = make_list count accu (List.length l) in - aux (count+1) new_l rest - in - aux 1 [] lc - in - let prim_num = List.length coef in - Ezfio.set_basis_typ "Gaussian"; - Ezfio.set_basis_shell_num shell_num; - Ezfio.set_basis_prim_num prim_num ; - Ezfio.set_basis_shell_prim_num (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| shell_num |] ~data:shell_prim_num); - Ezfio.set_basis_shell_ang_mom (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| shell_num |] ~data:ang_mom ) ; - Ezfio.set_basis_shell_index (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| prim_num |] ~data:shell_idx) ; - Ezfio.set_basis_basis_nucleus_index (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| shell_num |] - ~data:( list_map (fun (_,n) -> Nucl_number.to_int n) basis) - ) ; - Ezfio.set_basis_nucleus_shell_num(Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| nucl_num |] - ~data:( - list_map (fun (_,n) -> Nucl_number.to_int n) basis - |> List.fold_left (fun accu i -> - match accu with - | [] -> [(1,i)] - | (h,j) :: rest -> if j == i then ((h+1,j)::rest) else ((1,i)::(h,j)::rest) - ) [] - |> List.rev - |> List.map fst - )) ; - Ezfio.set_basis_prim_coef (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| prim_num |] ~data:coef) ; - Ezfio.set_basis_prim_expo (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| prim_num |] ~data:expo) ; + let () = + let shell_num = List.length basis in + let lc : (GaussianPrimitive.t * Qptypes.AO_coef.t) list list = + list_map ( fun (g,_) -> g.Gto.lc ) basis + in + let ang_mom = + list_map (fun (l : (GaussianPrimitive.t * Qptypes.AO_coef.t) list) -> + let x, _ = List.hd l in + Angmom.to_l x.GaussianPrimitive.sym |> Qptypes.Positive_int.to_int + ) lc + in + let expo = + list_map (fun l -> list_map (fun (x,_) -> Qptypes.AO_expo.to_float x.GaussianPrimitive.expo) l ) lc + |> List.concat + in + let coef = + list_map (fun l -> + list_map (fun (_,x) -> Qptypes.AO_coef.to_float x) l + ) lc + |> List.concat + in + let shell_prim_num = + list_map List.length lc + in + let shell_idx = + let rec make_list n accu = function + | 0 -> accu + | i -> make_list n (n :: accu) (i-1) + in + let rec aux count accu = function + | [] -> List.rev accu + | l::rest -> + let new_l = make_list count accu (List.length l) in + aux (count+1) new_l rest + in + aux 1 [] lc + in + let prim_num = List.length coef in + Ezfio.set_basis_typ "Gaussian"; + Ezfio.set_basis_shell_num shell_num; + Ezfio.set_basis_prim_num prim_num ; + Ezfio.set_basis_shell_prim_num (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| shell_num |] ~data:shell_prim_num); + Ezfio.set_basis_shell_ang_mom (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| shell_num |] ~data:ang_mom ) ; + Ezfio.set_basis_shell_index (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| prim_num |] ~data:shell_idx) ; + Ezfio.set_basis_basis_nucleus_index (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| shell_num |] + ~data:( list_map (fun (_,n) -> Nucl_number.to_int n) basis) + ) ; + Ezfio.set_basis_nucleus_shell_num(Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| nucl_num |] + ~data:( + list_map (fun (_,n) -> Nucl_number.to_int n) basis + |> List.fold_left (fun accu i -> + match accu with + | [] -> [(1,i)] + | (h,j) :: rest -> if j == i then ((h+1,j)::rest) else ((1,i)::(h,j)::rest) + ) [] + |> List.rev + |> List.map fst + )) ; + Ezfio.set_basis_prim_coef (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| prim_num |] ~data:coef) ; + Ezfio.set_basis_prim_expo (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| prim_num |] ~data:expo) ; - Ezfio.set_ao_basis_ao_prim_num (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| ao_num |] ~data:ao_prim_num) ; - Ezfio.set_ao_basis_ao_nucl(Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| ao_num |] ~data:ao_nucl) ; - Ezfio.set_ao_basis_ao_power(Ezfio.ezfio_array_of_list - ~rank:2 ~dim:[| ao_num ; 3 |] ~data:ao_power) ; - Ezfio.set_ao_basis_ao_coef(Ezfio.ezfio_array_of_list - ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_coef) ; - Ezfio.set_ao_basis_ao_expo(Ezfio.ezfio_array_of_list - ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_expo) ; - Ezfio.set_ao_basis_ao_cartesian(cart); - in - match Input.Ao_basis.read () with - | None -> failwith "Error in basis" - | Some x -> Input.Ao_basis.write x + Ezfio.set_ao_basis_ao_prim_num (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| ao_num |] ~data:ao_prim_num) ; + Ezfio.set_ao_basis_ao_nucl(Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| ao_num |] ~data:ao_nucl) ; + Ezfio.set_ao_basis_ao_power(Ezfio.ezfio_array_of_list + ~rank:2 ~dim:[| ao_num ; 3 |] ~data:ao_power) ; + Ezfio.set_ao_basis_ao_coef(Ezfio.ezfio_array_of_list + ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_coef) ; + Ezfio.set_ao_basis_ao_expo(Ezfio.ezfio_array_of_list + ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_expo) ; + Ezfio.set_ao_basis_ao_cartesian(cart); + in + match Input.Ao_basis.read () with + | None -> failwith "Error in basis" + | Some x -> Input.Ao_basis.write x + end in let () = try write_file () with @@ -781,7 +785,7 @@ If a file with the same name as the basis set exists, this file will be read. O run ?o:output basis au charge dummy multiplicity pseudo cart xyz_filename ) with - | Failure txt -> Printf.eprintf "Fatal error: %s\n%!" txt +(* | Failure txt -> Printf.eprintf "Fatal error: %s\n%!" txt *) | Command_line.Error txt -> Printf.eprintf "Command line error: %s\n%!" txt diff --git a/scripts/ezfio_interface/qp_edit_template b/scripts/ezfio_interface/qp_edit_template index 4218456d..fe718a50 100644 --- a/scripts/ezfio_interface/qp_edit_template +++ b/scripts/ezfio_interface/qp_edit_template @@ -172,25 +172,23 @@ let run check_only ?ndet ?state ezfio_filename = (* Reorder basis set *) begin - let aos = - match Input.Ao_basis.read() with - | Some x -> x - | _ -> assert false - in - let ordering = Input.Ao_basis.ordering aos in - let test = Array.copy ordering in - Array.sort compare test ; - if test <> ordering then - begin - Printf.eprintf "Warning: Basis set is not properly ordered. Redordering.\n"; - let new_aos = Input.Ao_basis.reorder aos in - Input.Ao_basis.write new_aos; - match Input.Mo_basis.read() with - | None -> () - | Some mos -> - let new_mos = Input.Mo_basis.reorder mos ordering in - Input.Mo_basis.write new_mos - end + match Input.Ao_basis.read() with + | Some aos -> + let ordering = Input.Ao_basis.ordering aos in + let test = Array.copy ordering in + Array.sort compare test ; + if test <> ordering then + begin + Printf.eprintf "Warning: Basis set is not properly ordered. Redordering.\n"; + let new_aos = Input.Ao_basis.reorder aos in + Input.Ao_basis.write new_aos; + match Input.Mo_basis.read() with + | None -> () + | Some mos -> + let new_mos = Input.Mo_basis.reorder mos ordering in + Input.Mo_basis.write new_mos + end + | _ -> () end; begin diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index caed4698..4ab080ec 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -22,4 +22,4 @@ ezfio_name: direct type: logical doc: Perform Cholesky decomposition of AO integrals interface: ezfio,provider,ocaml -default: True +default: False diff --git a/src/mo_basis/utils.irp.f b/src/mo_basis/utils.irp.f index 12c6c79d..5f664c41 100644 --- a/src/mo_basis/utils.irp.f +++ b/src/mo_basis/utils.irp.f @@ -3,7 +3,6 @@ subroutine save_mos double precision, allocatable :: buffer(:,:) integer :: i,j - call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) call ezfio_set_mo_basis_mo_num(mo_num) call ezfio_set_mo_basis_mo_label(mo_label) call ezfio_set_mo_basis_ao_md5(ao_md5) @@ -27,7 +26,7 @@ subroutine save_mos_no_occ double precision, allocatable :: buffer(:,:) integer :: i,j - call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) +! call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) !call ezfio_set_mo_basis_mo_num(mo_num) !call ezfio_set_mo_basis_mo_label(mo_label) !call ezfio_set_mo_basis_ao_md5(ao_md5) @@ -48,7 +47,7 @@ subroutine save_mos_truncated(n) double precision, allocatable :: buffer(:,:) integer :: i,j,n - call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) +! call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) call ezfio_set_mo_basis_mo_num(n) call ezfio_set_mo_basis_mo_label(mo_label) From f0b71bc2b0ec38dc8d151f7bc9c410b781e28b03 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 10 May 2023 18:06:50 +0200 Subject: [PATCH 80/87] Add libtrexio in configure --- configure | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/configure b/configure index d3377093..4dd753ff 100755 --- a/configure +++ b/configure @@ -9,6 +9,8 @@ echo "QP_ROOT="$QP_ROOT unset CC unset CCXX +TREXIO_VERSION=2.3.1 + # Force GCC instead of ICC for dependencies export CC=gcc @@ -189,7 +191,7 @@ if [[ "${PACKAGES}.x" != ".x" ]] ; then fi if [[ ${PACKAGES} = all ]] ; then - PACKAGES="zlib ninja zeromq f77zmq gmp ocaml docopt resultsFile bats" + PACKAGES="zlib ninja zeromq f77zmq gmp ocaml docopt resultsFile bats trexio" fi @@ -203,6 +205,33 @@ for PACKAGE in ${PACKAGES} ; do mv ninja "\${QP_ROOT}"/bin/ EOF + elif [[ ${PACKAGE} = trexio-nohdf5 ]] ; then + + VERSION=$TREXIO_VERSION + execute << EOF + cd "\${QP_ROOT}"/external + wget https://github.com/TREX-CoE/trexio/releases/download/v${VERSION}/trexio-${VERSION}.tar.gz + tar -zxf trexio-${VERSION}.tar.gz + cd trexio-${VERSION} + ./configure --prefix=\${QP_ROOT} --without-hdf5 + make -j 8 && make -j 8 check && make -j 8 install + cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files + tar -zxvf "\${QP_ROOT}"/external/qp2-dependencies/${ARCHITECTURE}/ninja.tar.gz + mv ninja "\${QP_ROOT}"/bin/ +EOF + elif [[ ${PACKAGE} = trexio ]] ; then + + VERSION=$TREXIO_VERSION + execute << EOF + cd "\${QP_ROOT}"/external + wget https://github.com/TREX-CoE/trexio/releases/download/v${VERSION}/trexio-${VERSION}.tar.gz + tar -zxf trexio-${VERSION}.tar.gz + cd trexio-${VERSION} + ./configure --prefix=\${QP_ROOT} + make -j 8 && make -j 8 check && make -j 8 install + cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files +EOF + elif [[ ${PACKAGE} = gmp ]] ; then @@ -338,6 +367,12 @@ if [[ ${ZEROMQ} = $(not_found) ]] ; then fail fi +TREXIO=$(find_lib -ltrexio) +if [[ ${TREXIO} = $(not_found) ]] ; then + error "TREXIO (trexio,trexio-nohdf5) is not installed. If you don't have HDF5, use trexio-nohdf5" + fail +fi + F77ZMQ=$(find_lib -lzmq -lf77zmq -lpthread) if [[ ${F77ZMQ} = $(not_found) ]] ; then error "Fortran binding of ZeroMQ (f77zmq) is not installed." From 01b70ffb17389485d32069d2f53041998c94763d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 11 May 2023 22:45:18 +0200 Subject: [PATCH 81/87] Removed penalty method from TCSCF: problem with normal ordering --- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f index ea9cacff..3e89bbe2 100644 --- a/src/tc_bi_ortho/dav_h_tc_s2.irp.f +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -304,22 +304,23 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N ! Penalty method ! -------------- - if (s2_eig) then - h_p = s_ - do k=1,shift2 - h_p(k,k) = h_p(k,k) - expected_s2 - enddo - if (only_expected_s2) then - alpha = 0.1d0 - h_p = h + alpha*h_p - else - alpha = 0.0001d0 - h_p = h + alpha*h_p - endif - else +! if (s2_eig) then +! h_p = s_ +! do k=1,shift2 +! h_p(k,k) = h_p(k,k) - expected_s2 +! enddo +! if (only_expected_s2) then +! alpha = 0.1d0 +! h_p = h + alpha*h_p +! else +! alpha = 0.0001d0 +! h_p = h + alpha*h_p +! endif +! else h_p = h alpha = 0.d0 - endif +! endif + ! Diagonalize h y = lambda y ! --------------------------- From 49598822938da0ac9fbe9334f6f1d61d18de7f93 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 11 May 2023 22:48:48 +0200 Subject: [PATCH 82/87] Added TREXIO module --- src/trexio/EZFIO.cfg | 54 ++ src/trexio/README.rst | 6 + src/trexio/export_trexio.irp.f | 7 + src/trexio/export_trexio_routines.irp.f | 604 ++++++++++++++++++++ src/trexio/import_trexio_determinants.irp.f | 79 +++ src/trexio/import_trexio_integrals.irp.f | 146 +++++ src/trexio/qp_import_trexio.py | 415 ++++++++++++++ src/trexio/trexio_file.irp.f | 20 + src/trexio/trexio_module.F90 | 1 + 9 files changed, 1332 insertions(+) create mode 100644 src/trexio/EZFIO.cfg create mode 100644 src/trexio/README.rst create mode 100644 src/trexio/export_trexio.irp.f create mode 100644 src/trexio/export_trexio_routines.irp.f create mode 100644 src/trexio/import_trexio_determinants.irp.f create mode 100644 src/trexio/import_trexio_integrals.irp.f create mode 100755 src/trexio/qp_import_trexio.py create mode 100644 src/trexio/trexio_file.irp.f create mode 100644 src/trexio/trexio_module.F90 diff --git a/src/trexio/EZFIO.cfg b/src/trexio/EZFIO.cfg new file mode 100644 index 00000000..8606e908 --- /dev/null +++ b/src/trexio/EZFIO.cfg @@ -0,0 +1,54 @@ +[backend] +type: integer +doc: Back-end used in TREXIO. 0: HDF5, 1:Text +interface: ezfio, ocaml, provider +default: 0 + +[trexio_file] +type: character*(256) +doc: Name of the exported TREXIO file +interface: ezfio, ocaml, provider +default: None + +[export_rdm] +type: logical +doc: If True, export two-body reduced density matrix +interface: ezfio, ocaml, provider +default: False + +[export_ao_one_e_ints] +type: logical +doc: If True, export one-electron integrals in AO basis +interface: ezfio, ocaml, provider +default: False + +[export_mo_one_e_ints] +type: logical +doc: If True, export one-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + +[export_ao_two_e_ints] +type: logical +doc: If True, export two-electron integrals in AO basis +interface: ezfio, ocaml, provider +default: False + +[export_ao_two_e_ints_cholesky] +type: logical +doc: If True, export Cholesky-decomposed two-electron integrals in AO basis +interface: ezfio, ocaml, provider +default: False + +[export_mo_two_e_ints] +type: logical +doc: If True, export two-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + +[export_mo_two_e_ints_cholesky] +type: logical +doc: If True, export Cholesky-decomposed two-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + diff --git a/src/trexio/README.rst b/src/trexio/README.rst new file mode 100644 index 00000000..7d7304c6 --- /dev/null +++ b/src/trexio/README.rst @@ -0,0 +1,6 @@ +====== +trexio +====== + +Module for handling TREXIO files. +See https://github.com/trex-coe/trexio diff --git a/src/trexio/export_trexio.irp.f b/src/trexio/export_trexio.irp.f new file mode 100644 index 00000000..3ae0dcb4 --- /dev/null +++ b/src/trexio/export_trexio.irp.f @@ -0,0 +1,7 @@ +program export_trexio_prog + implicit none + read_wf = .True. + SOFT_TOUCH read_wf + call export_trexio +end + diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f new file mode 100644 index 00000000..d69e7a70 --- /dev/null +++ b/src/trexio/export_trexio_routines.irp.f @@ -0,0 +1,604 @@ +subroutine export_trexio + use trexio + implicit none + BEGIN_DOC + ! Exports the wave function in TREXIO format + END_DOC + + integer(trexio_t) :: f ! TREXIO file handle + integer(trexio_exit_code) :: rc + double precision, allocatable :: factor(:) + + print *, 'TREXIO file : '//trim(trexio_filename) + print *, '' + + call system('cp '//trim(trexio_filename)//' '//trim(trexio_filename)//'.bak') + if (backend == 0) then + f = trexio_open(trexio_filename, 'u', TREXIO_HDF5, rc) + else if (backend == 1) then + f = trexio_open(trexio_filename, 'u', TREXIO_TEXT, rc) + endif + if (f == 0_8) then + print *, 'Unable to open TREXIO file for writing' + print *, 'rc = ', rc + stop -1 + endif + call ezfio_set_trexio_trexio_file(trexio_filename) + +! ------------------------------------------------------------------------------ + +! Electrons +! --------- + + print *, 'Electrons' + + rc = trexio_write_electron_up_num(f, elec_alpha_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_electron_dn_num(f, elec_beta_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + +! Nuclei +! ------ + + print *, 'Nuclei' + + rc = trexio_write_nucleus_num(f, nucl_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_charge(f, nucl_charge) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_coord(f, nucl_coord_transp) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_label(f, nucl_label, 32) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_repulsion(f, nuclear_repulsion) + call trexio_assert(rc, TREXIO_SUCCESS) + + +! Pseudo-potentials +! ----------------- + + if (do_pseudo) then + + print *, 'ECP' + integer :: num + + num = 0 + do k=1,pseudo_klocmax + do i=1,nucl_num + if (pseudo_dz_k(i,k) /= 0.d0) then + num = num+1 + end if + end do + end do + + do l=0,pseudo_lmax + do k=1,pseudo_kmax + do i=1,nucl_num + if (pseudo_dz_kl(i,k,l) /= 0.d0) then + num = num+1 + end if + end do + end do + end do + + integer, allocatable :: ang_mom(:), nucleus_index(:), power(:), lmax(:) + double precision, allocatable :: exponent(:), coefficient(:) + + allocate(ang_mom(num), nucleus_index(num), exponent(num), coefficient(num), power(num), & + lmax(nucl_num) ) + + do i=1,nucl_num + lmax(i) = -1 + do l=0,pseudo_lmax + do k=1,pseudo_kmax + if (pseudo_dz_kl_transp(k,l,i) /= 0.d0) then + lmax(i) = max(lmax(i), l) + end if + end do + end do + end do + + j = 0 + do i=1,nucl_num + do k=1,pseudo_klocmax + if (pseudo_dz_k_transp(k,i) /= 0.d0) then + j = j+1 + ang_mom(j) = lmax(i)+1 + nucleus_index(j) = i + exponent(j) = pseudo_dz_k_transp(k,i) + coefficient(j) = pseudo_v_k_transp(k,i) + power(j) = pseudo_n_k_transp(k,i) + end if + end do + + do l=0,lmax(i) + do k=1,pseudo_kmax + if (pseudo_dz_kl_transp(k,l,i) /= 0.d0) then + j = j+1 + ang_mom(j) = l + nucleus_index(j) = i + exponent(j) = pseudo_dz_kl_transp(k,l,i) + coefficient(j) = pseudo_v_kl_transp(k,l,i) + power(j) = pseudo_n_kl_transp(k,l,i) + end if + end do + end do + end do + + + lmax(:) = lmax(:)+1 + rc = trexio_write_ecp_max_ang_mom_plus_1(f, lmax) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_z_core(f, int(nucl_charge_remove)) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_num(f, num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_ang_mom(f, ang_mom) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_nucleus_index(f, nucleus_index) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_exponent(f, exponent) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_coefficient(f, coefficient) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_power(f, power) + call trexio_assert(rc, TREXIO_SUCCESS) + + endif + + +! Basis +! ----- + + print *, 'Basis' + + + rc = trexio_write_basis_type(f, 'Gaussian', len('Gaussian')) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_prim_num(f, prim_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_shell_num(f, shell_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_nucleus_index(f, basis_nucleus_index) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_shell_ang_mom(f, shell_ang_mom) + call trexio_assert(rc, TREXIO_SUCCESS) + + allocate(factor(shell_num)) + if (ao_normalized) then + factor(1:shell_num) = shell_normalization_factor(1:shell_num) + else + factor(1:shell_num) = 1.d0 + endif + rc = trexio_write_basis_shell_factor(f, factor) + call trexio_assert(rc, TREXIO_SUCCESS) + + deallocate(factor) + + rc = trexio_write_basis_shell_index(f, shell_index) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_exponent(f, prim_expo) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_coefficient(f, prim_coef) + call trexio_assert(rc, TREXIO_SUCCESS) + + allocate(factor(prim_num)) + if (primitives_normalized) then + factor(1:prim_num) = prim_normalization_factor(1:prim_num) + else + factor(1:prim_num) = 1.d0 + endif + rc = trexio_write_basis_prim_factor(f, factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) + + +! Atomic orbitals +! --------------- + + print *, 'AOs' + + rc = trexio_write_ao_num(f, ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_cartesian(f, 1) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_shell(f, ao_shell) + call trexio_assert(rc, TREXIO_SUCCESS) + + integer :: i, pow0(3), powA(3), j, k, l, nz + double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c + nz=100 + + C_A(1) = 0.d0 + C_A(2) = 0.d0 + C_A(3) = 0.d0 + + allocate(factor(ao_num)) + if (ao_normalized) then + do i=1,ao_num + l = ao_first_of_shell(ao_shell(i)) + factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) + enddo + else + factor(:) = 1.d0 + endif + rc = trexio_write_ao_normalization(f, factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) + +! One-e AO integrals +! ------------------ + + if (export_ao_one_e_ints) then + print *, 'AO one-e integrals' + + rc = trexio_write_ao_1e_int_overlap(f,ao_overlap) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_1e_int_kinetic(f,ao_kinetic_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_1e_int_potential_n_e(f,ao_integrals_n_e) + call trexio_assert(rc, TREXIO_SUCCESS) + + if (do_pseudo) then + rc = trexio_write_ao_1e_int_ecp(f, ao_pseudo_integrals_local + ao_pseudo_integrals_non_local) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_ao_1e_int_core_hamiltonian(f,ao_one_e_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + +! Two-e AO integrals +! ------------------ + + if (export_ao_two_e_ints) then + print *, 'AO two-e integrals' + PROVIDE ao_two_e_integrals_in_map + + integer(8), parameter :: BUFSIZE=100000_8 + double precision :: eri_buffer(BUFSIZE), integral + integer(4) :: eri_index(4,BUFSIZE) + integer(8) :: icount, offset + + double precision, external :: get_ao_two_e_integral + + + icount = 0_8 + offset = 0_8 + do l=1,ao_num + do k=1,ao_num + do j=l,ao_num + do i=k,ao_num + if (i==j .and. k= 0_8) then + rc = trexio_write_ao_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + +! Two-e AO integrals - Cholesky +! ----------------------------- + + integer(4) :: chol_index(3,BUFSIZE) + double precision :: chol_buffer(BUFSIZE) + + if (export_ao_two_e_ints_cholesky) then + print *, 'AO two-e integrals Cholesky' + + rc = trexio_write_ao_2e_int_eri_cholesky_num(f, cholesky_ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + icount = 0_8 + offset = 0_8 + do k=1,cholesky_ao_num + do j=1,ao_num + do i=1,ao_num + integral = cholesky_ao(i,j,k) + if (integral == 0.d0) cycle + icount += 1_8 + chol_buffer(icount) = integral + chol_index(1,icount) = i + chol_index(2,icount) = j + chol_index(3,icount) = k + if (icount == BUFSIZE) then + rc = trexio_write_ao_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + offset += icount + icount = 0_8 + end if + end do + end do + end do + + if (icount > 0_8) then + rc = trexio_write_ao_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + + + +! Molecular orbitals +! ------------------ + + print *, 'MOs' + + rc = trexio_write_mo_type(f, mo_label, len(trim(mo_label))) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_mo_num(f, mo_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_mo_coefficient(f, mo_coef) + call trexio_assert(rc, TREXIO_SUCCESS) + + if ( (trim(mo_label) == 'Canonical').and. & + (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then + rc = trexio_write_mo_energy(f, fock_matrix_diag_mo) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_mo_class(f, mo_class, len(mo_class(1))) + call trexio_assert(rc, TREXIO_SUCCESS) + +! One-e MO integrals +! ------------------ + + if (export_mo_one_e_ints) then + print *, 'MO one-e integrals' + + rc = trexio_write_mo_1e_int_kinetic(f,mo_kinetic_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_mo_1e_int_potential_n_e(f,mo_integrals_n_e) + call trexio_assert(rc, TREXIO_SUCCESS) + + if (do_pseudo) then + rc = trexio_write_mo_1e_int_ecp(f,mo_pseudo_integrals_local) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_mo_1e_int_core_hamiltonian(f,mo_one_e_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + +! Two-e MO integrals +! ------------------ + + if (export_mo_two_e_ints) then + print *, 'MO two-e integrals' + PROVIDE mo_two_e_integrals_in_map + + double precision, external :: mo_two_e_integral + + + icount = 0_8 + offset = 0_8 + do l=1,mo_num + do k=1,mo_num + do j=l,mo_num + do i=k,mo_num + if (i==j .and. k 0_8) then + rc = trexio_write_mo_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + +! Two-e MO integrals - Cholesky +! ----------------------------- + + if (export_mo_two_e_ints_cholesky) then + print *, 'MO two-e integrals Cholesky' + + rc = trexio_write_mo_2e_int_eri_cholesky_num(f, cholesky_ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + icount = 0_8 + offset = 0_8 + do k=1,cholesky_ao_num + do j=1,mo_num + do i=1,mo_num + integral = cholesky_mo(i,j,k) + if (integral == 0.d0) cycle + icount += 1_8 + chol_buffer(icount) = integral + chol_index(1,icount) = i + chol_index(2,icount) = j + chol_index(3,icount) = k + if (icount == BUFSIZE) then + rc = trexio_write_mo_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + offset += icount + icount = 0_8 + end if + end do + end do + end do + + if (icount > 0_8) then + rc = trexio_write_mo_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + + +! One-e RDM +! --------- + + rc = trexio_write_rdm_1e(f,one_e_dm_mo) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_rdm_1e_up(f,one_e_dm_mo_alpha_average) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_rdm_1e_dn(f,one_e_dm_mo_beta_average) + call trexio_assert(rc, TREXIO_SUCCESS) + + +! Two-e RDM +! --------- + + if (export_rdm) then + PROVIDE two_e_dm_mo + print *, 'Two-e RDM' + + icount = 0_8 + offset = 0_8 + do l=1,mo_num + do k=1,mo_num + do j=1,mo_num + do i=1,mo_num + integral = two_e_dm_mo(i,j,k,l) + if (integral == 0.d0) cycle + icount += 1_8 + eri_buffer(icount) = integral + eri_index(1,icount) = i + eri_index(2,icount) = j + eri_index(3,icount) = k + eri_index(4,icount) = l + if (icount == BUFSIZE) then + rc = trexio_write_rdm_2e(f, offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + offset += icount + icount = 0_8 + end if + end do + end do + end do + end do + + if (icount >= 0_8) then + rc = trexio_write_rdm_2e(f, offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + + +! ------------------------------------------------------------------------------ + + ! Determinants + ! ------------ + + integer*8, allocatable :: det_buffer(:,:,:) + double precision, allocatable :: coef_buffer(:,:) + integer :: nint + +! rc = trexio_read_determinant_int64_num(f, nint) +! call trexio_assert(rc, TREXIO_SUCCESS) + nint = N_int + if (nint /= N_int) then + stop 'Problem with N_int' + endif + allocate ( det_buffer(nint, 2, BUFSIZE), coef_buffer(BUFSIZE, n_states) ) + + icount = 0_8 + offset = 0_8 + rc = trexio_write_state_num(f, n_states) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_set_state (f, 0) + call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,n_det + icount += 1_8 + det_buffer(1:nint, 1:2, icount) = psi_det(1:N_int, 1:2, k) + coef_buffer(icount,1:N_states) = psi_coef(k,1:N_states) + if (icount == BUFSIZE) then + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_list(f, offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + do i=1,N_states + rc = trexio_set_state (f, i-1) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_coefficient(f, offset, icount, coef_buffer(1,i)) + end do + rc = trexio_set_state (f, 0) + offset += icount + icount = 0_8 + end if + end do + + if (icount >= 0_8) then + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_list(f, offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + do i=1,N_states + rc = trexio_set_state (f, i-1) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_coefficient(f, offset, icount, coef_buffer(1,i)) + end do + rc = trexio_set_state (f, 0) + end if + + deallocate ( det_buffer, coef_buffer ) + + rc = trexio_close(f) + call trexio_assert(rc, TREXIO_SUCCESS) + +end + + +! -*- mode: f90 -*- diff --git a/src/trexio/import_trexio_determinants.irp.f b/src/trexio/import_trexio_determinants.irp.f new file mode 100644 index 00000000..1759bb94 --- /dev/null +++ b/src/trexio/import_trexio_determinants.irp.f @@ -0,0 +1,79 @@ +program import_determinants_ao + call run +end + +subroutine run + use trexio + use map_module + implicit none + BEGIN_DOC +! Program to import determinants from TREXIO + END_DOC + + integer(trexio_t) :: f ! TREXIO file handle + integer(trexio_exit_code) :: rc + + integer :: m + + double precision, allocatable :: coef_buffer(:,:) + integer*8 , allocatable :: det_buffer(:,:,:) + + f = trexio_open(trexio_filename, 'r', TREXIO_AUTO, rc) + if (f == 0_8) then + print *, 'Unable to open TREXIO file for reading' + print *, 'rc = ', rc + stop -1 + endif + + + + ! Determinants + ! ------------ + + integer :: nint, nstates + integer :: bufsize + + rc = trexio_read_state_num(f, nstates) + call trexio_assert(rc, TREXIO_SUCCESS) + +! rc = trexio_read_determinant_int64_num(f, nint) +! call trexio_assert(rc, TREXIO_SUCCESS) + nint = N_int + if (nint /= N_int) then + stop 'Problem with N_int' + endif + + integer*8 :: offset, icount + + rc = trexio_read_determinant_num(f, bufsize) + call trexio_assert(rc, TREXIO_SUCCESS) + print *, 'N_det = ', bufsize + + allocate ( det_buffer(nint, 2, bufsize), coef_buffer(bufsize, n_states) ) + + + offset = 0_8 + icount = bufsize + + rc = trexio_read_determinant_list(f, offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + if (icount /= bufsize) then + print *, 'error: bufsize /= N_det: ', bufsize, icount + stop -1 + endif + + do m=1,nstates + rc = trexio_set_state(f, m-1) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_read_determinant_coefficient(f, offset, icount, coef_buffer(1,m)) + call trexio_assert(rc, TREXIO_SUCCESS) + if (icount /= bufsize) then + print *, 'error: bufsize /= N_det for state', m, ':', icount, bufsize + stop -1 + endif + enddo + + call save_wavefunction_general(bufsize,nstates,det_buffer,size(coef_buffer,1),coef_buffer) + + +end diff --git a/src/trexio/import_trexio_integrals.irp.f b/src/trexio/import_trexio_integrals.irp.f new file mode 100644 index 00000000..9f9ad9d6 --- /dev/null +++ b/src/trexio/import_trexio_integrals.irp.f @@ -0,0 +1,146 @@ +program import_integrals_ao + use trexio + implicit none + integer(trexio_t) :: f ! TREXIO file handle + integer(trexio_exit_code) :: rc + + f = trexio_open(trexio_filename, 'r', TREXIO_AUTO, rc) + if (f == 0_8) then + print *, 'Unable to open TREXIO file for reading' + print *, 'rc = ', rc + stop -1 + endif + + call run(f) + rc = trexio_close(f) + call trexio_assert(rc, TREXIO_SUCCESS) +end + +subroutine run(f) + use trexio + use map_module + implicit none + BEGIN_DOC +! Program to import integrals from TREXIO + END_DOC + + integer(trexio_t), intent(in) :: f ! TREXIO file handle + integer(trexio_exit_code) :: rc + + integer ::i,j,k,l + integer(8) :: m, n_integrals + double precision :: integral + + integer(key_kind), allocatable :: buffer_i(:) + real(integral_kind), allocatable :: buffer_values(:) + + + double precision, allocatable :: A(:,:) + double precision, allocatable :: V(:) + integer , allocatable :: Vi(:,:) + double precision :: s + + if (trexio_has_nucleus_repulsion(f) == TREXIO_SUCCESS) then + rc = trexio_read_nucleus_repulsion(f, s) + call trexio_assert(rc, TREXIO_SUCCESS) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here, rc + print *, 'Error reading nuclear repulsion' + stop -1 + endif + call ezfio_set_nuclei_nuclear_repulsion(s) + call ezfio_set_nuclei_io_nuclear_repulsion('Read') + endif + + ! AO integrals + ! ------------ + + allocate(A(ao_num, ao_num)) + + + if (trexio_has_ao_1e_int_overlap(f) == TREXIO_SUCCESS) then + rc = trexio_read_ao_1e_int_overlap(f, A) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here + print *, 'Error reading AO overlap' + stop -1 + endif + call ezfio_set_ao_one_e_ints_ao_integrals_overlap(A) + call ezfio_set_ao_one_e_ints_io_ao_integrals_overlap('Read') + endif + + if (trexio_has_ao_1e_int_kinetic(f) == TREXIO_SUCCESS) then + rc = trexio_read_ao_1e_int_kinetic(f, A) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here + print *, 'Error reading AO kinetic integrals' + stop -1 + endif + call ezfio_set_ao_one_e_ints_ao_integrals_kinetic(A) + call ezfio_set_ao_one_e_ints_io_ao_integrals_kinetic('Read') + endif + +! if (trexio_has_ao_1e_int_ecp(f) == TREXIO_SUCCESS) then +! rc = trexio_read_ao_1e_int_ecp(f, A) +! if (rc /= TREXIO_SUCCESS) then +! print *, irp_here +! print *, 'Error reading AO ECP local integrals' +! stop -1 +! endif +! call ezfio_set_ao_one_e_ints_ao_integrals_pseudo(A) +! call ezfio_set_ao_one_e_ints_io_ao_integrals_pseudo('Read') +! endif + + if (trexio_has_ao_1e_int_potential_n_e(f) == TREXIO_SUCCESS) then + rc = trexio_read_ao_1e_int_potential_n_e(f, A) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here + print *, 'Error reading AO potential N-e integrals' + stop -1 + endif + call ezfio_set_ao_one_e_ints_ao_integrals_n_e(A) + call ezfio_set_ao_one_e_ints_io_ao_integrals_n_e('Read') + endif + + deallocate(A) + + ! AO 2e integrals + ! --------------- + PROVIDE ao_integrals_map + + integer*4 :: BUFSIZE + BUFSIZE=ao_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + + integer*8 :: offset, icount + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset + + call map_sort(ao_integrals_map) + call map_unique(ao_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) + call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') + +end diff --git a/src/trexio/qp_import_trexio.py b/src/trexio/qp_import_trexio.py new file mode 100755 index 00000000..de8d1269 --- /dev/null +++ b/src/trexio/qp_import_trexio.py @@ -0,0 +1,415 @@ +#!/usr/bin/env python3 +""" +convert TREXIO file to EZFIO + +Usage: + qp_import_trexio [-o EZFIO_DIR] FILE + +Options: + -o --output=EZFIO_DIR Produced directory + by default is FILE.ezfio + +""" + +import sys +import os +import trexio +import numpy as np +from functools import reduce +from ezfio import ezfio +from docopt import docopt + + +try: + QP_ROOT = os.environ["QP_ROOT"] + QP_EZFIO = os.environ["QP_EZFIO"] +except KeyError: + print("Error: QP_ROOT environment variable not found.") + sys.exit(1) +else: + sys.path = [QP_EZFIO + "/Python", + QP_ROOT + "/install/resultsFile", + QP_ROOT + "/install", + QP_ROOT + "/scripts"] + sys.path + + +def generate_xyz(l): + + def create_z(x,y,z): + return (x, y, l-(x+y)) + + def create_y(accu,x,y,z): + if y == 0: + result = [create_z(x,y,z)] + accu + else: + result = create_y([create_z(x,y,z)] + accu , x, y-1, z) + return result + + def create_x(accu,x,y,z): + if x == 0: + result = create_y([], x,y,z) + accu + else: + xnew = x-1 + ynew = l-xnew + result = create_x(create_y([],x,y,z) + accu , xnew, ynew, z) + return result + + result = create_x([], l, 0, 0) + result.reverse() + return result + + + +def write_ezfio(trexio_filename, filename): + + try: + trexio_file = trexio.File(trexio_filename,mode='r',back_end=trexio.TREXIO_TEXT) + except: + trexio_file = trexio.File(trexio_filename,mode='r',back_end=trexio.TREXIO_HDF5) + + ezfio.set_file(filename) + ezfio.set_trexio_trexio_file(trexio_filename) + + print("Nuclei\t\t...\t", end=' ') + + charge = [0.] + if trexio.has_nucleus(trexio_file): + charge = trexio.read_nucleus_charge(trexio_file) + ezfio.set_nuclei_nucl_num(len(charge)) + ezfio.set_nuclei_nucl_charge(charge) + + coord = trexio.read_nucleus_coord(trexio_file) + coord = np.transpose(coord) + ezfio.set_nuclei_nucl_coord(coord) + + label = trexio.read_nucleus_label(trexio_file) + nucl_num = trexio.read_nucleus_num(trexio_file) + + # Transformt H1 into H + import re + p = re.compile(r'(\d*)$') + label = [p.sub("", x).capitalize() for x in label] + ezfio.set_nuclei_nucl_label(label) + + else: + ezfio.set_nuclei_nucl_num(1) + ezfio.set_nuclei_nucl_charge([0.]) + ezfio.set_nuclei_nucl_coord([0.,0.,0.]) + ezfio.set_nuclei_nucl_label(["X"]) + + print("OK") + + + print("Electrons\t...\t", end=' ') + + try: + num_beta = trexio.read_electron_dn_num(trexio_file) + except: + num_beta = sum(charge)//2 + + try: + num_alpha = trexio.read_electron_up_num(trexio_file) + except: + num_alpha = sum(charge) - num_beta + + if num_alpha == 0: + print("\n\nError: There are zero electrons in the TREXIO file.\n\n") + sys.exit(1) + ezfio.set_electrons_elec_alpha_num(num_alpha) + ezfio.set_electrons_elec_beta_num(num_beta) + + print("OK") + + print("Basis\t\t...\t", end=' ') + + shell_num = 0 + try: + basis_type = trexio.read_basis_type(trexio_file) + + if basis_type.lower() not in ["gaussian", "slater"]: + raise TypeError + + shell_num = trexio.read_basis_shell_num(trexio_file) + prim_num = trexio.read_basis_prim_num(trexio_file) + ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) + nucl_index = trexio.read_basis_nucleus_index(trexio_file) + exponent = trexio.read_basis_exponent(trexio_file) + coefficient = trexio.read_basis_coefficient(trexio_file) + shell_index = trexio.read_basis_shell_index(trexio_file) + ao_shell = trexio.read_ao_shell(trexio_file) + + ezfio.set_basis_basis("Read from TREXIO") + ezfio.set_basis_shell_num(shell_num) + ezfio.set_basis_prim_num(prim_num) + ezfio.set_basis_shell_ang_mom(ang_mom) + ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) + ezfio.set_basis_prim_expo(exponent) + ezfio.set_basis_prim_coef(coefficient) + + nucl_shell_num = [] + prev = None + m = 0 + for i in ao_shell: + if i != prev: + m += 1 + if prev is None or nucl_index[i] != nucl_index[prev]: + nucl_shell_num.append(m) + m = 0 + prev = i + assert (len(nucl_shell_num) == nucl_num) + + shell_prim_num = [] + prev = shell_index[0] + count = 0 + for i in shell_index: + if i != prev: + shell_prim_num.append(count) + count = 0 + count += 1 + prev = i + shell_prim_num.append(count) + + assert (len(shell_prim_num) == shell_num) + + ezfio.set_basis_shell_prim_num(shell_prim_num) + ezfio.set_basis_shell_index([x+1 for x in shell_index]) + ezfio.set_basis_nucleus_shell_num(nucl_shell_num) + + + shell_factor = trexio.read_basis_shell_factor(trexio_file) + prim_factor = trexio.read_basis_prim_factor(trexio_file) + + print("OK") + except: + print("None") + ezfio.set_ao_basis_ao_cartesian(True) + + print("AOS\t\t...\t", end=' ') + + try: + cartesian = trexio.read_ao_cartesian(trexio_file) + except: + cartesian = True + + if not cartesian: + raise TypeError('Only cartesian TREXIO files can be converted') + + ao_num = trexio.read_ao_num(trexio_file) + ezfio.set_ao_basis_ao_num(ao_num) + + if shell_num > 0: + ao_shell = trexio.read_ao_shell(trexio_file) + at = [ nucl_index[i]+1 for i in ao_shell ] + ezfio.set_ao_basis_ao_nucl(at) + + num_prim0 = [ 0 for i in range(shell_num) ] + for i in shell_index: + num_prim0[i] += 1 + + coef = {} + expo = {} + for i,c in enumerate(coefficient): + idx = shell_index[i] + if idx in coef: + coef[idx].append(c) + expo[idx].append(exponent[i]) + else: + coef[idx] = [c] + expo[idx] = [exponent[i]] + + coefficient = [] + exponent = [] + power_x = [] + power_y = [] + power_z = [] + num_prim = [] + + for i in range(shell_num): + for x,y,z in generate_xyz(ang_mom[i]): + power_x.append(x) + power_y.append(y) + power_z.append(z) + coefficient.append(coef[i]) + exponent.append(expo[i]) + num_prim.append(num_prim0[i]) + + assert (len(coefficient) == ao_num) + ezfio.set_ao_basis_ao_power(power_x + power_y + power_z) + ezfio.set_ao_basis_ao_prim_num(num_prim) + + prim_num_max = max( [ len(x) for x in coefficient ] ) + + for i in range(ao_num): + coefficient[i] += [0. for j in range(len(coefficient[i]), prim_num_max)] + exponent [i] += [0. for j in range(len(exponent[i]), prim_num_max)] + + coefficient = reduce(lambda x, y: x + y, coefficient, []) + exponent = reduce(lambda x, y: x + y, exponent , []) + + coef = [] + expo = [] + for i in range(prim_num_max): + for j in range(i, len(coefficient), prim_num_max): + coef.append(coefficient[j]) + expo.append(exponent[j]) + +# ezfio.set_ao_basis_ao_prim_num_max(prim_num_max) + ezfio.set_ao_basis_ao_coef(coef) + ezfio.set_ao_basis_ao_expo(expo) + ezfio.set_ao_basis_ao_basis("Read from TREXIO") + + print("OK") + + + # _ + # |\/| _ _ |_) _. _ o _ + # | | (_) _> |_) (_| _> | _> + # + + print("MOS\t\t...\t", end=' ') + + labels = { "Canonical" : "Canonical", + "RHF" : "Canonical", + "BOYS" : "Localized", + "ROHF" : "Canonical", + "UHF" : "Canonical", + "Natural": "Natural" } + try: + label = labels[trexio.read_mo_type(trexio_file)] + except: + label = "None" + ezfio.set_mo_basis_mo_label(label) + + try: + clss = trexio.read_mo_class(trexio_file) + core = [ i for i in clss if i.lower() == "core" ] + inactive = [ i for i in clss if i.lower() == "inactive" ] + active = [ i for i in clss if i.lower() == "active" ] + virtual = [ i for i in clss if i.lower() == "virtual" ] + deleted = [ i for i in clss if i.lower() == "deleted" ] + except trexio.Error: + pass + + try: + mo_num = trexio.read_mo_num(trexio_file) + ezfio.set_mo_basis_mo_num(mo_num) + + MoMatrix = trexio.read_mo_coefficient(trexio_file) + ezfio.set_mo_basis_mo_coef(MoMatrix) + + mo_occ = [ 0. for i in range(mo_num) ] + for i in range(num_alpha): + mo_occ[i] += 1. + for i in range(num_beta): + mo_occ[i] += 1. + ezfio.set_mo_basis_mo_occ(mo_occ) + except: + pass + + print("OK") + + + print("Pseudos\t\t...\t", end=' ') + + ezfio.set_pseudo_do_pseudo(False) + + if trexio.has_ecp_ang_mom(trexio_file): + ezfio.set_pseudo_do_pseudo(True) + max_ang_mom_plus_1 = trexio.read_ecp_max_ang_mom_plus_1(trexio_file) + z_core = trexio.read_ecp_z_core(trexio_file) + ang_mom = trexio.read_ecp_ang_mom(trexio_file) + nucleus_index = trexio.read_ecp_nucleus_index(trexio_file) + exponent = trexio.read_ecp_exponent(trexio_file) + coefficient = trexio.read_ecp_coefficient(trexio_file) + power = trexio.read_ecp_power(trexio_file) + + lmax = max( max_ang_mom_plus_1 ) - 1 + ezfio.set_pseudo_pseudo_lmax(lmax) + ezfio.set_pseudo_nucl_charge_remove(z_core) + + prev_center = None + ecp = {} + for i in range(len(ang_mom)): + center = nucleus_index[i] + if center != prev_center: + ecp[center] = { "lmax": max_ang_mom_plus_1[center], + "zcore": z_core[center], + "contr": {} } + for j in range(max_ang_mom_plus_1[center]+1): + ecp[center]["contr"][j] = [] + + ecp[center]["contr"][ang_mom[i]].append( (coefficient[i], power[i], exponent[i]) ) + prev_center = center + + ecp_loc = {} + ecp_nl = {} + kmax = 0 + klocmax = 0 + for center in ecp: + ecp_nl [center] = {} + for k in ecp[center]["contr"]: + if k == ecp[center]["lmax"]: + ecp_loc[center] = ecp[center]["contr"][k] + klocmax = max(len(ecp_loc[center]), klocmax) + else: + ecp_nl [center][k] = ecp[center]["contr"][k] + kmax = max(len(ecp_nl [center][k]), kmax) + + ezfio.set_pseudo_pseudo_klocmax(klocmax) + ezfio.set_pseudo_pseudo_kmax(kmax) + + pseudo_n_k = [[0 for _ in range(nucl_num)] for _ in range(klocmax)] + pseudo_v_k = [[0. for _ in range(nucl_num)] for _ in range(klocmax)] + pseudo_dz_k = [[0. for _ in range(nucl_num)] for _ in range(klocmax)] + pseudo_n_kl = [[[0 for _ in range(nucl_num)] for _ in range(kmax)] for _ in range(lmax+1)] + pseudo_v_kl = [[[0. for _ in range(nucl_num)] for _ in range(kmax)] for _ in range(lmax+1)] + pseudo_dz_kl = [[[0. for _ in range(nucl_num)] for _ in range(kmax)] for _ in range(lmax+1)] + for center in ecp_loc: + for k in range( len(ecp_loc[center]) ): + v, n, dz = ecp_loc[center][k] + pseudo_n_k[k][center] = n + pseudo_v_k[k][center] = v + pseudo_dz_k[k][center] = dz + + ezfio.set_pseudo_pseudo_n_k(pseudo_n_k) + ezfio.set_pseudo_pseudo_v_k(pseudo_v_k) + ezfio.set_pseudo_pseudo_dz_k(pseudo_dz_k) + + for center in ecp_nl: + for l in range( len(ecp_nl[center]) ): + for k in range( len(ecp_nl[center][l]) ): + v, n, dz = ecp_nl[center][l][k] + pseudo_n_kl[l][k][center] = n + pseudo_v_kl[l][k][center] = v + pseudo_dz_kl[l][k][center] = dz + + ezfio.set_pseudo_pseudo_n_kl(pseudo_n_kl) + ezfio.set_pseudo_pseudo_v_kl(pseudo_v_kl) + ezfio.set_pseudo_pseudo_dz_kl(pseudo_dz_kl) + + + print("OK") + + + + +def get_full_path(file_path): + file_path = os.path.expanduser(file_path) + file_path = os.path.expandvars(file_path) + return file_path + + +if __name__ == '__main__': + ARGUMENTS = docopt(__doc__) + + FILE = get_full_path(ARGUMENTS['FILE']) + trexio_filename = FILE + + if ARGUMENTS["--output"]: + EZFIO_FILE = get_full_path(ARGUMENTS["--output"]) + else: + EZFIO_FILE = "{0}.ezfio".format(FILE) + + write_ezfio(trexio_filename, EZFIO_FILE) + sys.stdout.flush() + diff --git a/src/trexio/trexio_file.irp.f b/src/trexio/trexio_file.irp.f new file mode 100644 index 00000000..c9897748 --- /dev/null +++ b/src/trexio/trexio_file.irp.f @@ -0,0 +1,20 @@ +BEGIN_PROVIDER [ character*(1024), trexio_filename ] + implicit none + BEGIN_DOC + ! Name of the TREXIO file + END_DOC + character*(1024) :: prefix + + trexio_filename = trexio_file + + if (trexio_file == 'None') then + prefix = trim(ezfio_work_dir)//trim(ezfio_filename) + if (backend == 0) then + trexio_filename = trim(prefix)//'.h5' + else if (backend == 1) then + trexio_filename = trim(prefix) + endif + endif +END_PROVIDER + + diff --git a/src/trexio/trexio_module.F90 b/src/trexio/trexio_module.F90 new file mode 100644 index 00000000..acd08492 --- /dev/null +++ b/src/trexio/trexio_module.F90 @@ -0,0 +1 @@ +#include "trexio_f.f90" From a2627e79255f3b490beca663ae1f1464e76ab96d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 16:04:45 +0200 Subject: [PATCH 83/87] Introduced TREXIO in QP --- configure | 2 +- scripts/compilation/qp_create_ninja | 2 +- src/trexio/export_trexio_routines.irp.f | 223 +++++++++++++----------- src/two_body_rdm/two_e_dm_mo.irp.f | 1 - 4 files changed, 123 insertions(+), 105 deletions(-) diff --git a/configure b/configure index 4dd753ff..66bc9419 100755 --- a/configure +++ b/configure @@ -9,7 +9,7 @@ echo "QP_ROOT="$QP_ROOT unset CC unset CCXX -TREXIO_VERSION=2.3.1 +TREXIO_VERSION=2.3.2 # Force GCC instead of ICC for dependencies export CC=gcc diff --git a/scripts/compilation/qp_create_ninja b/scripts/compilation/qp_create_ninja index 27b34901..606fd0f6 100755 --- a/scripts/compilation/qp_create_ninja +++ b/scripts/compilation/qp_create_ninja @@ -38,7 +38,7 @@ def comp_path(path): from qp_path import QP_ROOT, QP_SRC, QP_EZFIO -LIB = " -lz" +LIB = " -lz -ltrexio" EZFIO_LIB = join("$QP_ROOT", "lib", "libezfio_irp.a") ZMQ_LIB = join("$QP_ROOT", "lib", "libf77zmq.a") + " " + join("$QP_ROOT", "lib", "libzmq.a") + " -lstdc++ -lrt -ldl" ROOT_BUILD_NINJA = join("$QP_ROOT", "config", "build.ninja") diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f index d69e7a70..c55ddc5e 100644 --- a/src/trexio/export_trexio_routines.irp.f +++ b/src/trexio/export_trexio_routines.irp.f @@ -5,24 +5,35 @@ subroutine export_trexio ! Exports the wave function in TREXIO format END_DOC - integer(trexio_t) :: f ! TREXIO file handle + integer(trexio_t) :: f(N_states) ! TREXIO file handle integer(trexio_exit_code) :: rc + integer :: k double precision, allocatable :: factor(:) + character*(256) :: filenames(N_states) - print *, 'TREXIO file : '//trim(trexio_filename) + filenames(1) = trexio_filename + do k=2,N_states + write(filenames(k),'(A,I3.3)') trim(trexio_filename)//'.', k-1 + enddo + + do k=1,N_states + print *, 'TREXIO file : ', trim(filenames(k)) + call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + enddo print *, '' - call system('cp '//trim(trexio_filename)//' '//trim(trexio_filename)//'.bak') - if (backend == 0) then - f = trexio_open(trexio_filename, 'u', TREXIO_HDF5, rc) - else if (backend == 1) then - f = trexio_open(trexio_filename, 'u', TREXIO_TEXT, rc) - endif - if (f == 0_8) then - print *, 'Unable to open TREXIO file for writing' - print *, 'rc = ', rc - stop -1 - endif + do k=1,N_states + if (backend == 0) then + f(k) = trexio_open(filenames(k), 'u', TREXIO_HDF5, rc) + else if (backend == 1) then + f(k) = trexio_open(filenames(k), 'u', TREXIO_TEXT, rc) + endif + if (f(k) == 0_8) then + print *, 'Unable to open TREXIO file for writing' + print *, 'rc = ', rc + stop -1 + endif + enddo call ezfio_set_trexio_trexio_file(trexio_filename) ! ------------------------------------------------------------------------------ @@ -32,10 +43,10 @@ subroutine export_trexio print *, 'Electrons' - rc = trexio_write_electron_up_num(f, elec_alpha_num) + rc = trexio_write_electron_up_num(f(1), elec_alpha_num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_electron_dn_num(f, elec_beta_num) + rc = trexio_write_electron_dn_num(f(1), elec_beta_num) call trexio_assert(rc, TREXIO_SUCCESS) @@ -44,19 +55,19 @@ subroutine export_trexio print *, 'Nuclei' - rc = trexio_write_nucleus_num(f, nucl_num) + rc = trexio_write_nucleus_num(f(1), nucl_num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_nucleus_charge(f, nucl_charge) + rc = trexio_write_nucleus_charge(f(1), nucl_charge) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_nucleus_coord(f, nucl_coord_transp) + rc = trexio_write_nucleus_coord(f(1), nucl_coord_transp) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_nucleus_label(f, nucl_label, 32) + rc = trexio_write_nucleus_label(f(1), nucl_label, 32) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_nucleus_repulsion(f, nuclear_repulsion) + rc = trexio_write_nucleus_repulsion(f(1), nuclear_repulsion) call trexio_assert(rc, TREXIO_SUCCESS) @@ -133,28 +144,28 @@ subroutine export_trexio lmax(:) = lmax(:)+1 - rc = trexio_write_ecp_max_ang_mom_plus_1(f, lmax) + rc = trexio_write_ecp_max_ang_mom_plus_1(f(1), lmax) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_z_core(f, int(nucl_charge_remove)) + rc = trexio_write_ecp_z_core(f(1), int(nucl_charge_remove)) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_num(f, num) + rc = trexio_write_ecp_num(f(1), num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_ang_mom(f, ang_mom) + rc = trexio_write_ecp_ang_mom(f(1), ang_mom) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_nucleus_index(f, nucleus_index) + rc = trexio_write_ecp_nucleus_index(f(1), nucleus_index) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_exponent(f, exponent) + rc = trexio_write_ecp_exponent(f(1), exponent) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_coefficient(f, coefficient) + rc = trexio_write_ecp_coefficient(f(1), coefficient) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_power(f, power) + rc = trexio_write_ecp_power(f(1), power) call trexio_assert(rc, TREXIO_SUCCESS) endif @@ -166,19 +177,19 @@ subroutine export_trexio print *, 'Basis' - rc = trexio_write_basis_type(f, 'Gaussian', len('Gaussian')) + rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian')) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_prim_num(f, prim_num) + rc = trexio_write_basis_prim_num(f(1), prim_num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_num(f, shell_num) + rc = trexio_write_basis_shell_num(f(1), shell_num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_nucleus_index(f, basis_nucleus_index) + rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_ang_mom(f, shell_ang_mom) + rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom) call trexio_assert(rc, TREXIO_SUCCESS) allocate(factor(shell_num)) @@ -187,18 +198,18 @@ subroutine export_trexio else factor(1:shell_num) = 1.d0 endif - rc = trexio_write_basis_shell_factor(f, factor) + rc = trexio_write_basis_shell_factor(f(1), factor) call trexio_assert(rc, TREXIO_SUCCESS) deallocate(factor) - rc = trexio_write_basis_shell_index(f, shell_index) + rc = trexio_write_basis_shell_index(f(1), shell_index) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_exponent(f, prim_expo) + rc = trexio_write_basis_exponent(f(1), prim_expo) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_coefficient(f, prim_coef) + rc = trexio_write_basis_coefficient(f(1), prim_coef) call trexio_assert(rc, TREXIO_SUCCESS) allocate(factor(prim_num)) @@ -207,7 +218,7 @@ subroutine export_trexio else factor(1:prim_num) = 1.d0 endif - rc = trexio_write_basis_prim_factor(f, factor) + rc = trexio_write_basis_prim_factor(f(1), factor) call trexio_assert(rc, TREXIO_SUCCESS) deallocate(factor) @@ -217,16 +228,16 @@ subroutine export_trexio print *, 'AOs' - rc = trexio_write_ao_num(f, ao_num) + rc = trexio_write_ao_num(f(1), ao_num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_cartesian(f, 1) + rc = trexio_write_ao_cartesian(f(1), 1) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_shell(f, ao_shell) + rc = trexio_write_ao_shell(f(1), ao_shell) call trexio_assert(rc, TREXIO_SUCCESS) - integer :: i, pow0(3), powA(3), j, k, l, nz + integer :: i, pow0(3), powA(3), j, l, nz double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c nz=100 @@ -243,7 +254,7 @@ subroutine export_trexio else factor(:) = 1.d0 endif - rc = trexio_write_ao_normalization(f, factor) + rc = trexio_write_ao_normalization(f(1), factor) call trexio_assert(rc, TREXIO_SUCCESS) deallocate(factor) @@ -253,21 +264,21 @@ subroutine export_trexio if (export_ao_one_e_ints) then print *, 'AO one-e integrals' - rc = trexio_write_ao_1e_int_overlap(f,ao_overlap) + rc = trexio_write_ao_1e_int_overlap(f(1),ao_overlap) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_1e_int_kinetic(f,ao_kinetic_integrals) + rc = trexio_write_ao_1e_int_kinetic(f(1),ao_kinetic_integrals) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_1e_int_potential_n_e(f,ao_integrals_n_e) + rc = trexio_write_ao_1e_int_potential_n_e(f(1),ao_integrals_n_e) call trexio_assert(rc, TREXIO_SUCCESS) if (do_pseudo) then - rc = trexio_write_ao_1e_int_ecp(f, ao_pseudo_integrals_local + ao_pseudo_integrals_non_local) + rc = trexio_write_ao_1e_int_ecp(f(1), ao_pseudo_integrals_local + ao_pseudo_integrals_non_local) call trexio_assert(rc, TREXIO_SUCCESS) endif - rc = trexio_write_ao_1e_int_core_hamiltonian(f,ao_one_e_integrals) + rc = trexio_write_ao_1e_int_core_hamiltonian(f(1),ao_one_e_integrals) call trexio_assert(rc, TREXIO_SUCCESS) end if @@ -303,7 +314,7 @@ subroutine export_trexio eri_index(3,icount) = k eri_index(4,icount) = l if (icount == BUFSIZE) then - rc = trexio_write_ao_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_ao_2e_int_eri(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) offset += icount icount = 0_8 @@ -314,7 +325,7 @@ subroutine export_trexio end do if (icount >= 0_8) then - rc = trexio_write_ao_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_ao_2e_int_eri(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) end if end if @@ -328,7 +339,7 @@ subroutine export_trexio if (export_ao_two_e_ints_cholesky) then print *, 'AO two-e integrals Cholesky' - rc = trexio_write_ao_2e_int_eri_cholesky_num(f, cholesky_ao_num) + rc = trexio_write_ao_2e_int_eri_cholesky_num(f(1), cholesky_ao_num) call trexio_assert(rc, TREXIO_SUCCESS) icount = 0_8 @@ -344,7 +355,7 @@ subroutine export_trexio chol_index(2,icount) = j chol_index(3,icount) = k if (icount == BUFSIZE) then - rc = trexio_write_ao_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + rc = trexio_write_ao_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) call trexio_assert(rc, TREXIO_SUCCESS) offset += icount icount = 0_8 @@ -354,7 +365,7 @@ subroutine export_trexio end do if (icount > 0_8) then - rc = trexio_write_ao_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + rc = trexio_write_ao_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) call trexio_assert(rc, TREXIO_SUCCESS) end if end if @@ -366,22 +377,24 @@ subroutine export_trexio print *, 'MOs' - rc = trexio_write_mo_type(f, mo_label, len(trim(mo_label))) + rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label))) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_mo_num(f, mo_num) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,N_states + rc = trexio_write_mo_num(f(k), mo_num) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo - rc = trexio_write_mo_coefficient(f, mo_coef) + rc = trexio_write_mo_coefficient(f(1), mo_coef) call trexio_assert(rc, TREXIO_SUCCESS) if ( (trim(mo_label) == 'Canonical').and. & (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then - rc = trexio_write_mo_energy(f, fock_matrix_diag_mo) + rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo) call trexio_assert(rc, TREXIO_SUCCESS) endif - rc = trexio_write_mo_class(f, mo_class, len(mo_class(1))) + rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1))) call trexio_assert(rc, TREXIO_SUCCESS) ! One-e MO integrals @@ -390,18 +403,18 @@ subroutine export_trexio if (export_mo_one_e_ints) then print *, 'MO one-e integrals' - rc = trexio_write_mo_1e_int_kinetic(f,mo_kinetic_integrals) + rc = trexio_write_mo_1e_int_kinetic(f(1),mo_kinetic_integrals) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_mo_1e_int_potential_n_e(f,mo_integrals_n_e) + rc = trexio_write_mo_1e_int_potential_n_e(f(1),mo_integrals_n_e) call trexio_assert(rc, TREXIO_SUCCESS) if (do_pseudo) then - rc = trexio_write_mo_1e_int_ecp(f,mo_pseudo_integrals_local) + rc = trexio_write_mo_1e_int_ecp(f(1),mo_pseudo_integrals_local) call trexio_assert(rc, TREXIO_SUCCESS) endif - rc = trexio_write_mo_1e_int_core_hamiltonian(f,mo_one_e_integrals) + rc = trexio_write_mo_1e_int_core_hamiltonian(f(1),mo_one_e_integrals) call trexio_assert(rc, TREXIO_SUCCESS) end if @@ -432,7 +445,7 @@ subroutine export_trexio eri_index(3,icount) = k eri_index(4,icount) = l if (icount == BUFSIZE) then - rc = trexio_write_mo_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_mo_2e_int_eri(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) offset += icount icount = 0_8 @@ -443,7 +456,7 @@ subroutine export_trexio end do if (icount > 0_8) then - rc = trexio_write_mo_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_mo_2e_int_eri(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) end if end if @@ -454,7 +467,7 @@ subroutine export_trexio if (export_mo_two_e_ints_cholesky) then print *, 'MO two-e integrals Cholesky' - rc = trexio_write_mo_2e_int_eri_cholesky_num(f, cholesky_ao_num) + rc = trexio_write_mo_2e_int_eri_cholesky_num(f(1), cholesky_ao_num) call trexio_assert(rc, TREXIO_SUCCESS) icount = 0_8 @@ -470,7 +483,7 @@ subroutine export_trexio chol_index(2,icount) = j chol_index(3,icount) = k if (icount == BUFSIZE) then - rc = trexio_write_mo_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + rc = trexio_write_mo_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) call trexio_assert(rc, TREXIO_SUCCESS) offset += icount icount = 0_8 @@ -480,7 +493,7 @@ subroutine export_trexio end do if (icount > 0_8) then - rc = trexio_write_mo_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + rc = trexio_write_mo_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) call trexio_assert(rc, TREXIO_SUCCESS) end if end if @@ -489,14 +502,16 @@ subroutine export_trexio ! One-e RDM ! --------- - rc = trexio_write_rdm_1e(f,one_e_dm_mo) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,N_states + rc = trexio_write_rdm_1e(f(k),one_e_dm_mo_alpha(:,:,k) + one_e_dm_mo_beta(:,:,k)) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_rdm_1e_up(f,one_e_dm_mo_alpha_average) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_rdm_1e_up(f(k),one_e_dm_mo_alpha(:,:,k)) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_rdm_1e_dn(f,one_e_dm_mo_beta_average) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_rdm_1e_dn(f(k),one_e_dm_mo_beta(:,:,k)) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo ! Two-e RDM @@ -521,7 +536,7 @@ subroutine export_trexio eri_index(3,icount) = k eri_index(4,icount) = l if (icount == BUFSIZE) then - rc = trexio_write_rdm_2e(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_rdm_2e(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) offset += icount icount = 0_8 @@ -532,7 +547,7 @@ subroutine export_trexio end do if (icount >= 0_8) then - rc = trexio_write_rdm_2e(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_rdm_2e(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) end if end if @@ -547,56 +562,60 @@ subroutine export_trexio double precision, allocatable :: coef_buffer(:,:) integer :: nint -! rc = trexio_read_determinant_int64_num(f, nint) -! call trexio_assert(rc, TREXIO_SUCCESS) - nint = N_int + rc = trexio_get_int64_num(f(1), nint) + call trexio_assert(rc, TREXIO_SUCCESS) +! nint = N_int if (nint /= N_int) then stop 'Problem with N_int' endif allocate ( det_buffer(nint, 2, BUFSIZE), coef_buffer(BUFSIZE, n_states) ) - icount = 0_8 - offset = 0_8 - rc = trexio_write_state_num(f, n_states) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1, N_states + icount = 0_8 + offset = 0_8 + rc = trexio_write_state_num(f(k), n_states) + call trexio_assert(rc, TREXIO_SUCCESS) + +! Will need to be updated with TREXIO 2.4 +! rc = trexio_write_state_id(f(k), k-1) + rc = trexio_write_state_id(f(k), k) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_state_file_name(f(k), filenames, len(filenames(1))) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo - rc = trexio_set_state (f, 0) - call trexio_assert(rc, TREXIO_SUCCESS) do k=1,n_det icount += 1_8 det_buffer(1:nint, 1:2, icount) = psi_det(1:N_int, 1:2, k) coef_buffer(icount,1:N_states) = psi_coef(k,1:N_states) if (icount == BUFSIZE) then - call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_determinant_list(f, offset, icount, det_buffer) - call trexio_assert(rc, TREXIO_SUCCESS) do i=1,N_states - rc = trexio_set_state (f, i-1) + rc = trexio_write_determinant_list(f(i), offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_coefficient(f(i), offset, icount, coef_buffer(1,i)) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_determinant_coefficient(f, offset, icount, coef_buffer(1,i)) end do - rc = trexio_set_state (f, 0) offset += icount icount = 0_8 end if end do if (icount >= 0_8) then - call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_determinant_list(f, offset, icount, det_buffer) - call trexio_assert(rc, TREXIO_SUCCESS) - do i=1,N_states - rc = trexio_set_state (f, i-1) - call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_determinant_coefficient(f, offset, icount, coef_buffer(1,i)) - end do - rc = trexio_set_state (f, 0) + do i=1,N_states + rc = trexio_write_determinant_list(f(i), offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_coefficient(f(i), offset, icount, coef_buffer(1,i)) + call trexio_assert(rc, TREXIO_SUCCESS) + end do end if deallocate ( det_buffer, coef_buffer ) - rc = trexio_close(f) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,N_states + rc = trexio_close(f(k)) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo end diff --git a/src/two_body_rdm/two_e_dm_mo.irp.f b/src/two_body_rdm/two_e_dm_mo.irp.f index 6bd115a2..99be1f54 100644 --- a/src/two_body_rdm/two_e_dm_mo.irp.f +++ b/src/two_body_rdm/two_e_dm_mo.irp.f @@ -30,7 +30,6 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] enddo enddo two_e_dm_mo(:,:,:,:) = two_e_dm_mo(:,:,:,:) -! * 2.d0 END_PROVIDER From 3aae1dbf77f20ef9a2e46adb6462beabca8ab8ee Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 16:06:07 +0200 Subject: [PATCH 84/87] fix completion in qp set_file --- etc/qp.rc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/qp.rc b/etc/qp.rc index d339f475..9eec4570 100644 --- a/etc/qp.rc +++ b/etc/qp.rc @@ -188,7 +188,7 @@ _qp_Complete() ;; esac;; set_file) - COMPREPLY=( $(compgen -W "$(for i in $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) ) + COMPREPLY=( $(compgen -W "$(for i in */ $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) ) return 0 ;; plugins) From 0fa576f90930bcf0ffc7a933a42d5667071ad3cd Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 16:51:17 +0200 Subject: [PATCH 85/87] Accelerated (T) --- src/ccsd/ccsd_t_space_orb.irp.f | 244 ++++++++++++++++++++++---------- 1 file changed, 172 insertions(+), 72 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb.irp.f b/src/ccsd/ccsd_t_space_orb.irp.f index 1f1db87e..24b86972 100644 --- a/src/ccsd/ccsd_t_space_orb.irp.f +++ b/src/ccsd/ccsd_t_space_orb.irp.f @@ -8,15 +8,15 @@ subroutine ccsd_par_t_space(nO,nV,t1,t2,energy) double precision, intent(in) :: t1(nO, nV) double precision, intent(in) :: t2(nO, nO, nV, nV) double precision, intent(out) :: energy - + double precision, allocatable :: W(:,:,:,:,:,:) double precision, allocatable :: V(:,:,:,:,:,:) integer :: i,j,k,a,b,c - + allocate(W(nO,nO,nO,nV,nV,nV)) allocate(V(nO,nO,nO,nV,nV,nV)) - call form_w(nO,nV,t2,W) + call form_w(nO,nV,t2,W) call form_v(nO,nV,t1,W,V) energy = 0d0 @@ -33,9 +33,9 @@ subroutine ccsd_par_t_space(nO,nV,t1,t2,energy) enddo enddo enddo - + energy = energy / 3d0 - + deallocate(V,W) end @@ -46,7 +46,7 @@ subroutine form_w(nO,nV,t2,W) integer, intent(in) :: nO,nV double precision, intent(in) :: t2(nO, nO, nV, nV) double precision, intent(out) :: W(nO, nO, nO, nV, nV, nV) - + integer :: i,j,k,l,a,b,c,d W = 0d0 @@ -133,7 +133,7 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, intent(in) :: t2(nO,nO,nV,nV) double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) double precision, intent(out) :: energy - + double precision, allocatable :: W(:,:,:,:,:,:) double precision, allocatable :: V(:,:,:,:,:,:) double precision, allocatable :: W_ijk(:,:,:), V_ijk(:,:,:) @@ -141,7 +141,7 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) integer :: i,j,k,l,a,b,c,d double precision :: e,ta,tb, delta, delta_ijk - + !allocate(W(nV,nV,nV,nO,nO,nO)) !allocate(V(nV,nV,nV,nO,nO,nO)) allocate(W_ijk(nV,nV,nV), V_ijk(nV,nV,nV)) @@ -154,10 +154,10 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & !$OMP PRIVATE(a,b,c,d,i,j,k,l) & !$OMP DEFAULT(NONE) - + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) - + !$OMP DO collapse(3) do i = 1, nO do a = 1, nV @@ -181,7 +181,7 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo enddo !$OMP END DO nowait - + !v_vooo(c,j,k,l) * t2(i,l,a,b) & !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & @@ -208,10 +208,10 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo enddo !$OMP END DO nowait - + !v_vvoo(b,c,j,k) * t1(i,a) & !X_vvoo(b,c,k,j) * T1_vo(a,i) & - + !$OMP DO collapse(3) do j = 1, nO do k = 1, nO @@ -267,7 +267,7 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) call wall_time(tb) write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' enddo - + energy = energy / 3d0 deallocate(W_ijk,V_ijk,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) @@ -285,78 +285,178 @@ subroutine form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) double precision, intent(out) :: W(nV,nV,nV)!,nO,nO,nO) - + integer :: l,a,b,c,d + double precision, allocatable, dimension(:,:,:) :: X, Y, Z !W = 0d0 !do i = 1, nO ! do j = 1, nO ! do k = 1, nO - !$OMP PARALLEL & - !$OMP SHARED(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) & - !$OMP PRIVATE(a,b,c,d,l) & - !$OMP DEFAULT(NONE) - !$OMP DO collapse(2) - do c = 1, nV - do b = 1, nV - do a = 1, nV - W(a,b,c) = 0d0 + allocate(X(nV,nV,nV)) + allocate(Y(nV,nV,nV)) + allocate(Z(nV,nV,nV)) - do d = 1, nV - !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & - W(a,b,c) = W(a,b,c) & - ! chem (bd|ai) - ! phys - !+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & - !+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj - !+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik - !+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij - !+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj - !+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik - + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & - + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & ! bc kj - + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & ! prev ac ik - + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & ! prev ab ij - + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & ! prev bc kj - + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) ! prev ac ik - enddo - + !$OMP PARALLEL DO + do b = 1, nV + do a = 1, nV + do d = 1, nV + Z(d,a,b) = X_vvvo(d,b,a,i) enddo enddo enddo - !$OMP END DO nowait + !$OMP END PARALLEL DO - !$OMP DO collapse(2) + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + Z, nV, T_vvoo(1,1,k,j), nV, 0.d0, W, nV*nV) + + !$OMP PARALLEL DO do c = 1, nV - do b = 1, nV - do a = 1, nV - - do l = 1, nO - !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & - W(a,b,c) = W(a,b,c) & - ! chem (ck|jl) - ! phys - !- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & - !- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj - !- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik - !- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij - !- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj - !- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik - - X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & - - X_ovoo(l,b,k,j) * T_ovvo(l,a,c,i) & ! bc kj - - X_ovoo(l,b,i,j) * T_ovvo(l,c,a,k) & ! prev ac ik - - X_ovoo(l,a,j,i) * T_ovvo(l,c,b,k) & ! prev ab ij - - X_ovoo(l,a,k,i) * T_ovvo(l,b,c,j) & ! prev bc kj - - X_ovoo(l,c,i,k) * T_ovvo(l,b,a,j) ! prev ac ik - enddo - + do a = 1, nV + do d = 1, nV + Z(d,a,c) = X_vvvo(d,c,a,i) enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL - + !$OMP END PARALLEL DO + + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + Z, nV, T_vvoo(1,1,j,k), nV, 0.d0, Y, nV*nV) + + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + X_vvvo(1,1,1,k), nV, T_vvoo(1,1,j,i), nV, 1.d0, Y, nV*nV) + + call dgemm('T','N',nV,nV*nV,nV, 1.d0, & + T_vvoo(1,1,i,j), nV, X_vvvo(1,1,1,k), nV, 1.d0, W, nV) + + call dgemm('T','N',nV,nV*nV,nV, 1.d0, & + T_vvoo(1,1,i,k), nV, X_vvvo(1,1,1,j), nV, 1.d0, Y, nV) + + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + X_vvvo(1,1,1,j), nV, T_vvoo(1,1,k,i), nV, 1.d0, W, nV*nV) + + deallocate(Z) + + + allocate(Z(nO,nV,nV)) + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + T_ovvo(1,1,1,i), nO, X_ovoo(1,1,j,k), nO, 1.d0, W, nV*nV) + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + T_ovvo(1,1,1,i), nO, X_ovoo(1,1,k,j), nO, 1.d0, Y, nV*nV) + + !$OMP PARALLEL DO + do c = 1, nV + do a = 1, nV + do l = 1, nO + Z(l,a,c) = T_ovvo(l,c,a,k) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + Z, nO, X_ovoo(1,1,i,j), nO, 1.d0, Y, nV*nV) + + call dgemm('T','N',nV,nV*nV,nO, -1.d0, & + X_ovoo(1,1,j,i), nO, T_ovvo(1,1,1,k), nO, 1.d0, Y, nV) + + call dgemm('T','N',nV,nV*nV,nO, -1.d0, & + X_ovoo(1,1,k,i), nO, T_ovvo(1,1,1,j), nO, 1.d0, W, nV) + + !$OMP PARALLEL DO + do b = 1, nV + do a = 1, nV + do l = 1, nO + Z(l,a,b) = T_ovvo(l,b,a,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + Z, nO, X_ovoo(1,1,i,k), nO, 1.d0, W, nV*nV) + + !$OMP PARALLEL DO + do c = 1, nV + do b = 1, nV + do a = 1, nV + W(a,b,c) = W(a,b,c) + Y(a,c,b) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(X,Y,Z) + + +! !$OMP PARALLEL & +! !$OMP SHARED(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) & +! !$OMP PRIVATE(a,b,c,d,l) & +! !$OMP DEFAULT(NONE) +! +! !$OMP DO collapse(2) +! do c = 1, nV +! do b = 1, nV +! do a = 1, nV +! W(a,b,c) = 0.d0 +! +! do d = 1, nV +! !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & +! W(a,b,c) = W(a,b,c) & +! ! chem (bd|ai) +! ! phys +! !+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & +! !+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj +! !+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik +! !+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij +! !+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj +! !+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik +! + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & +! + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & ! bc kj +! + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & ! prev ac ik +! + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & ! prev ab ij +! + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & ! prev bc kj +! + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) ! prev ac ik +! enddo +! +! enddo +! enddo +! enddo +! !$OMP END DO nowait +! +! !$OMP DO collapse(2) +! do c = 1, nV +! do b = 1, nV +! do a = 1, nV +! +! do l = 1, nO +! !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & +! W(a,b,c) = W(a,b,c) & +! ! chem (ck|jl) +! ! phys +! !- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & +! !- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj +! !- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik +! !- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij +! !- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj +! !- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik +! - T_ovvo(l,a,b,i) * X_ovoo(l,c,j,k) & +! - T_ovvo(l,a,c,i) * X_ovoo(l,b,k,j) & ! bc kj +! - T_ovvo(l,c,a,k) * X_ovoo(l,b,i,j) & ! prev ac ik +! - T_ovvo(l,c,b,k) * X_ovoo(l,a,j,i) & ! prev ab ij +! - T_ovvo(l,b,c,j) * X_ovoo(l,a,k,i) & ! prev bc kj +! - T_ovvo(l,b,a,j) * X_ovoo(l,c,i,k) ! prev ac ik +! enddo +! +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL + ! enddo ! enddo !enddo @@ -382,7 +482,7 @@ implicit none !do i = 1, nO ! do j = 1, nO ! do k = 1, nO - + !$OMP PARALLEL & !$OMP SHARED(nO,nV,i,j,k,T_vo,X_vvoo,W,V) & !$OMP PRIVATE(a,b,c) & @@ -404,7 +504,7 @@ implicit none enddo !$OMP END DO !$OMP END PARALLEL - + ! enddo ! enddo !enddo From 69a76c6dba05188e1856c0e195ada4daa25984f5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 19:17:07 +0200 Subject: [PATCH 86/87] Added W_abc routines for (T) --- src/ccsd/ccsd_t_space_orb.irp.f | 5 +- src/ccsd/ccsd_t_space_orb_abc.irp.f | 252 ++++++++++++++++++++++++++++ 2 files changed, 255 insertions(+), 2 deletions(-) create mode 100644 src/ccsd/ccsd_t_space_orb_abc.irp.f diff --git a/src/ccsd/ccsd_t_space_orb.irp.f b/src/ccsd/ccsd_t_space_orb.irp.f index 24b86972..37f2b484 100644 --- a/src/ccsd/ccsd_t_space_orb.irp.f +++ b/src/ccsd/ccsd_t_space_orb.irp.f @@ -257,7 +257,7 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo enddo enddo - !$OMP END DO + !$OMP END DO NOWAIT !$OMP CRITICAL energy = energy + e !$OMP END CRITICAL @@ -426,7 +426,7 @@ subroutine form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) ! enddo ! enddo ! !$OMP END DO nowait -! +! ! !$OMP DO collapse(2) ! do c = 1, nV ! do b = 1, nV @@ -510,3 +510,4 @@ implicit none !enddo end + diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f new file mode 100644 index 00000000..3b762a06 --- /dev/null +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -0,0 +1,252 @@ +! Main + +subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + double precision, allocatable :: W_abc(:,:,:), V_abc(:,:,:) + double precision, allocatable :: W_cab(:,:,:), W_cba(:,:,:) + double precision, allocatable :: W_bca(:,:,:), V_cba(:,:,:) + double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:) + double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) + integer :: i,j,k,l,a,b,c,d + double precision :: e,ta,tb, delta, delta_abc + + !allocate(W(nV,nV,nV,nO,nO,nO)) + !allocate(V(nV,nV,nV,nO,nO,nO)) + allocate(W_abc(nO,nO,nO), V_abc(nO,nO,nO), W_cab(nO,nO,nO)) + allocate(W_bca(nO,nO,nO), V_cba(nO,nO,nO), W_cba(nO,nO,nO)) + allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO)) + allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO)) + + ! Temporary arrays + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, & + !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & + !$OMP PRIVATE(a,b,c,d,i,j,k,l) & + !$OMP DEFAULT(NONE) + + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) + + !$OMP DO collapse(3) + do i = 1, nO + do a = 1, nV + do b = 1, nV + do d = 1, nV + X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do d = 1, nV + T_vvoo(d,c,k,j) = t2(k,j,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vooo(c,j,k,l) * t2(i,l,a,b) & + !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + + !$OMP DO collapse(3) + do k = 1, nO + do j = 1, nO + do c = 1, nV + do l = 1, nO + X_ovoo(l,c,j,k) = v_vooo(c,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do l = 1, nO + T_ovvo(l,a,b,i) = t2(i,l,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vvoo(b,c,j,k) * t1(i,a) & + !X_vvoo(b,c,k,j) * T1_vo(a,i) & + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do b = 1, nV + X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(1) + do i = 1, nO + do a = 1, nV + T_vo(a,i) = t1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(ta) + energy = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + call form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) + call form_w_abc(nO,nV,b,c,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_bca) + call form_w_abc(nO,nV,c,a,b,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cab) + call form_w_abc(nO,nV,c,b,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cba) + + call form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W_abc,V_abc) + call form_v_abc(nO,nV,c,b,a,T_vo,X_vvoo,W_cba,V_cba) + !$OMP PARALLEL & + !$OMP SHARED(energy,nO,a,b,c,W_abc,W_cab,W_bca,V_abc,V_cba,f_o,f_v,delta_abc)& + !$OMP PRIVATE(i,j,k,e,delta) & + !$OMP DEFAULT(NONE) + e = 0d0 + !$OMP DO + do i = 1, nO + do j = 1, nO + do k = 1, nO + delta = 1d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + e = e + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k))& + * (V_abc(i,j,k) - V_cba(i,j,k)) * delta + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + energy = energy + e + !$OMP END CRITICAL + !$OMP END PARALLEL + enddo + enddo + call wall_time(tb) + write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' + enddo + + energy = energy / 3d0 + + deallocate(W_abc,V_abc,W_cab,V_cba,W_bca,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) + !deallocate(V,W) +end + + +subroutine form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) + + implicit none + + integer, intent(in) :: nO,nV,a,b,c + !double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) + double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: W_abc(nO,nO,nO) + + integer :: l,i,j,k,d + + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) & + !$OMP PRIVATE(i,j,k,d,l) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do k = 1, nO + do j = 1, nO + do i = 1, nO + W_abc(i,j,k) = 0.d0 + + do d = 1, nV + W_abc(i,j,k) = W_abc(i,j,k) & + + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & + + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & + + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & + + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & + + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & + + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) + + enddo + + do l = 1, nO + W_abc(i,j,k) = W_abc(i,j,k) & + - T_ovvo(l,a,b,i) * X_ovoo(l,c,j,k) & + - T_ovvo(l,a,c,i) * X_ovoo(l,b,k,j) & ! bc kj + - T_ovvo(l,c,a,k) * X_ovoo(l,b,i,j) & ! prev ac ik + - T_ovvo(l,c,b,k) * X_ovoo(l,a,j,i) & ! prev ab ij + - T_ovvo(l,b,c,j) * X_ovoo(l,a,k,i) & ! prev bc kj + - T_ovvo(l,b,a,j) * X_ovoo(l,c,i,k) ! prev ac ik + enddo + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + +end + + +! V_abc + +subroutine form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W,V) + +implicit none + + integer, intent(in) :: nO,nV,a,b,c + !double precision, intent(in) :: t1(nO,nV) + double precision, intent(in) :: T_vo(nV,nO) + double precision, intent(in) :: X_vvoo(nV,nV,nO,nO) + double precision, intent(in) :: W(nO,nO,nO) + double precision, intent(out) :: V(nO,nO,nO) + + integer :: i,j,k + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,a,b,c,T_vo,X_vvoo,W,V) & + !$OMP PRIVATE(i,j,k) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do k = 1, nO + do j = 1, nO + do i = 1, nO + !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + V(i,j,k) = W(i,j,k) & + + X_vvoo(b,c,k,j) * T_vo(a,i) & + + X_vvoo(a,c,k,i) * T_vo(b,j) & + + X_vvoo(a,b,j,i) * T_vo(c,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + From 85ca8854188289b34010eb431813919df5507aed Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 19:20:08 +0200 Subject: [PATCH 87/87] Fixing github actions --- .github/workflows/compilation.yml | 1 + .github/workflows/configuration.yml | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml index 42710ce5..85daf7db 100644 --- a/.github/workflows/compilation.yml +++ b/.github/workflows/compilation.yml @@ -48,6 +48,7 @@ jobs: ./configure -i docopt || : ./configure -i resultsFile || : ./configure -i bats || : + ./configure -i trexio-nohdf5 || : ./configure -c ./config/gfortran_debug.cfg - name: Compilation run: | diff --git a/.github/workflows/configuration.yml b/.github/workflows/configuration.yml index 14019e5d..ba37f5dd 100644 --- a/.github/workflows/configuration.yml +++ b/.github/workflows/configuration.yml @@ -22,7 +22,7 @@ jobs: - uses: actions/checkout@v3 - name: Install dependencies run: | - sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config + sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config hdf5 - name: zlib run: | ./configure -i zlib || echo OK @@ -50,6 +50,12 @@ jobs: - name: bats run: | ./configure -i bats || echo OK + - name: trexio-nohdf5 + run: | + ./configure -i trexio-nohdf5 || echo OK + - name: trexio + run: | + ./configure -i trexio || echo OK - name: Final check run: | ./configure -c config/gfortran_debug.cfg