From d32b170fd33d99e6a642cc540a5cba7e2e0abc54 Mon Sep 17 00:00:00 2001 From: eginer Date: Sat, 22 Oct 2022 16:21:37 +0200 Subject: [PATCH 01/10] documentation --- src/ao_many_one_e_ints/ao_erf_gauss.irp.f | 2 +- .../prim_int_erf_gauss.irp.f | 2 +- src/non_h_ints_mu/README.rst | 4 + src/non_h_ints_mu/fit_j.irp.f | 2 +- src/non_h_ints_mu/grad_squared.irp.f | 97 +++++++++++++++++++ src/non_h_ints_mu/new_grad_tc.irp.f | 2 +- src/non_h_ints_mu/test_non_h_ints.irp.f | 43 +++++++- 7 files changed, 143 insertions(+), 9 deletions(-) diff --git a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f index 39be352f..0c5430d5 100644 --- a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f @@ -90,7 +90,7 @@ end subroutine erfc_mu_gauss_xyz_ij_ao(i,j,mu, C_center, delta,gauss_ints) implicit none BEGIN_DOC - ! gauss_ints(m) = \int dr exp(-delta (r - C)^2 ) x/y/z * ( 1 - erf(mu |r-r'|))/ |r-r'| * AO_i(r') * AO_j(r') + ! gauss_ints(m) = \int dr exp(-delta (r - C)^2 ) x/y/z * ( 1 - erf(mu |r-C|))/ |r-C| * AO_i(r) * AO_j(r) ! ! with m = 1 ==> x, m = 2, m = 3 ==> z ! diff --git a/src/ao_many_one_e_ints/prim_int_erf_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_erf_gauss.irp.f index 641d25fe..4c2b65a6 100644 --- a/src/ao_many_one_e_ints/prim_int_erf_gauss.irp.f +++ b/src/ao_many_one_e_ints/prim_int_erf_gauss.irp.f @@ -142,7 +142,7 @@ double precision function erf_mu_gauss(D_center,delta,mu,A_center,B_center,power ! ! .. math:: ! - ! \int dr exp(-delta (r - D)^2 ) erf(mu*|r-r'|)/ |r-r'| * (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) + ! \int dr exp(-delta (r - D)^2 ) erf(mu*|r-D|)/ |r-D| * (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) ! END_DOC diff --git a/src/non_h_ints_mu/README.rst b/src/non_h_ints_mu/README.rst index 6a36bb98..fb1c25b1 100644 --- a/src/non_h_ints_mu/README.rst +++ b/src/non_h_ints_mu/README.rst @@ -9,3 +9,7 @@ The two providers are : +) ao_non_hermit_term_chemist which returns the non hermitian part of the two-electron TC Hamiltonian on the MO basis. +) mo_non_hermit_term_chemist which returns the non hermitian part of the two-electron TC Hamiltonian on the BI-ORTHO MO basis. + +!\sum_mm = 1,3 \sum_R phi_i(R) \phi_k(R) grad_1_u_ij_mu(j,l,R,mm) grad_1_u_ij_mu(m,n,R,mm) +!\sum_mm+= 1,3 \sum_R phi_j(R) \phi_l(R) grad_1_u_ij_mu(i,k,R,mm) grad_1_u_ij_mu(m,n,R,mm) +!\sum_mm+= 1,3 \sum_R phi_m(R) \phi_n(R) grad_1_u_ij_mu(i,k,R,mm) grad_1_u_ij_mu(j,l,R,mm) diff --git a/src/non_h_ints_mu/fit_j.irp.f b/src/non_h_ints_mu/fit_j.irp.f index 695ead7f..6624459f 100644 --- a/src/non_h_ints_mu/fit_j.irp.f +++ b/src/non_h_ints_mu/fit_j.irp.f @@ -20,7 +20,7 @@ END_PROVIDER ! ! J(mu,r12) = 0.5/mu * F(r12*mu) where F(x) = x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2) ! -! F(x) is fitted by - 1/sqrt(pi) * exp(-alpha * x) exp(-beta*mu^2x^2) (see expo_j_xmu) +! F(x) is fitted by - 1/sqrt(pi) * exp(-alpha * x) exp(-beta * x^2) (see expo_j_xmu) ! ! The slater function exp(-alpha * x) is fitted with n_max_fit_slat gaussians ! diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index a88521a1..01b5d8d6 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -32,6 +32,103 @@ print*,'Wall time for grad_1_squared_u_ij_mu = ',time1 - time0 END_PROVIDER + BEGIN_PROVIDER [ double precision, grad_1_squared_u_ij_mu_new, (n_points_final_grid, ao_num, ao_num)] + implicit none + integer :: ipoint,i,j,m,igauss + BEGIN_DOC + ! grad_1_squared_u_ij_mu(j,i,ipoint) = -1/2 \int dr2 phi_j(r2) phi_i(r2) |\grad_r1 u(r1,r2,\mu)|^2 + ! |\grad_r1 u(r1,r2,\mu)|^2 = 1/4 * (1 - erf(mu*r12))^2 + ! ! (1 - erf(mu*r12))^2 = \sum_i coef_gauss_1_erf_x_2(i) * exp(-expo_gauss_1_erf_x_2(i) * r12^2) + END_DOC + include 'constants.include.F' + double precision :: r(3),delta,coef + double precision :: overlap_gauss_r12_ao,time0,time1 + integer :: num_a,num_b,power_A(3), power_B(3),l,k + double precision :: A_center(3), B_center(3),overlap_gauss_r12,alpha,beta,analytical_j + double precision :: A_new(0:max_dim,3)! new polynom + double precision :: A_center_new(3) ! new center + integer :: iorder_a_new(3) ! i_order(i) = order of the new polynom ==> should be equal to power_A + double precision :: alpha_new ! new exponent + double precision :: fact_a_new, coef_i, coef_j, k_ab,center_new(3),p_new,c_tmp,coef_last ! constant factor + double precision :: coefxy, coefx, coefy, coefz,coefxyz + integer :: d(3),lx,ly,lz,iorder_tmp(3),dim1 + double precision :: overlap,overlap_x,overlap_y,overlap_z,thr + dim1=100 + thr = 0.d0 + print*,'providing grad_1_squared_u_ij_mu_new ...' + grad_1_squared_u_ij_mu_new = 0.d0 + call wall_time(time0) + !TODO : strong optmization : write the loops in a different way + ! : for each couple of AO, the gaussian product are done once for all + d = 0 + do i = 1, ao_num + do j = 1, ao_num + ! \int dr2 phi_j(r2) phi_i(r2) (1 - erf(mu*r12))^2 + ! = \sum_i coef_gauss_1_erf_x_2(i) \int dr2 phi_j(r2) phi_i(r2) exp(-expo_gauss_1_erf_x_2(i) * (r_1 - r_2)^2) + if(ao_overlap_abs(j,i).lt.1.d-12)then + cycle + endif + num_A = ao_nucl(i) + power_A(1:3)= ao_power(i,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + num_B = ao_nucl(j) + power_B(1:3)= ao_power(j,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + do l=1,ao_prim_num(i) + coef_i = ao_coef_normalized_ordered_transp(l,i) + alpha = ao_expo_ordered_transp(l,i) + do k=1,ao_prim_num(j) + beta = ao_expo_ordered_transp(k,j) + coef_j = ao_coef_normalized_ordered_transp(k,j) + + ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order + ! from gaussian_A * gaussian_B + call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & + beta,alpha,power_B,power_A,B_center,A_center,n_pt_max_integrals) + c_tmp = coef_i*coef_j*fact_a_new + if(dabs(c_tmp).lt.thr)cycle + do ipoint = 1, n_points_final_grid + r(1) = final_grid_points(1,ipoint) + r(2) = final_grid_points(2,ipoint) + r(3) = final_grid_points(3,ipoint) + do igauss = 1, n_max_fit_slat + delta = expo_gauss_1_erf_x_2(igauss) + coef = coef_gauss_1_erf_x_2(igauss) + coef_last = c_tmp * coef + if(dabs(coef_last).lt.thr)cycle + do lx = 0, iorder_a_new(1) + coefx = A_new(lx,1) + coefx *= coef_last +! if(dabs(coefx).lt.thr)cycle + iorder_tmp(1) = lx + do ly = 0, iorder_a_new(2) + coefy = A_new(ly,2) + coefxy= coefx*coefy +! if(dabs(coefxy).lt.thr)cycle + iorder_tmp(2) = ly + do lz = 0, iorder_a_new(3) + coefz = A_new(lz,3) + coefxyz = coefz * coefxy +! if(dabs(coefxyz).lt.thr)cycle + iorder_tmp(3) = lz +! call gaussian_product(alpha_new,A_center_new,delta,r,k_ab,p_new,center_new) +! if(dabs(coef_last*k_ab).lt.thr)cycle + call overlap_gaussian_xyz(A_center_new,r,alpha_new,delta,iorder_tmp,d,overlap_x,overlap_y,overlap_z,overlap,dim1) + grad_1_squared_u_ij_mu_new(ipoint,j,i) += -0.25 * coefxyz * overlap + enddo ! igauss + enddo ! ipoint + enddo ! lz + enddo ! ly + enddo ! lx + enddo ! k + enddo ! l + enddo ! j + enddo ! i + call wall_time(time1) + print*,'Wall time for grad_1_squared_u_ij_mu_new = ',time1 - time0 + END_PROVIDER + + BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)] implicit none BEGIN_DOC diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index 068381b4..f205b781 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -28,7 +28,7 @@ END_PROVIDER BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ao_num)] implicit none BEGIN_DOC - ! tc_grad_and_lapl_ao(k,i,l,j) = + ! tc_grad_and_lapl_ao(k,i,l,j) = ! ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) ! diff --git a/src/non_h_ints_mu/test_non_h_ints.irp.f b/src/non_h_ints_mu/test_non_h_ints.irp.f index c535d0c5..c098b0f5 100644 --- a/src/non_h_ints_mu/test_non_h_ints.irp.f +++ b/src/non_h_ints_mu/test_non_h_ints.irp.f @@ -1,13 +1,14 @@ program test_non_h implicit none my_grid_becke = .True. - my_n_pt_r_grid = 50 - my_n_pt_a_grid = 74 -! my_n_pt_r_grid = 10 ! small grid for quick debug -! my_n_pt_a_grid = 26 ! small grid for quick debug +! my_n_pt_r_grid = 50 +! my_n_pt_a_grid = 74 + my_n_pt_r_grid = 10 ! small grid for quick debug + my_n_pt_a_grid = 26 ! small grid for quick debug touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid !call routine_grad_squared - call routine_fit +! call routine_fit + call routine_grad_squared_new end subroutine routine_lapl_grad @@ -85,6 +86,38 @@ subroutine routine_grad_squared end +subroutine routine_grad_squared_new + implicit none + integer :: i,j,k,l,ipoint + double precision :: grad_squared, get_ao_tc_sym_two_e_pot,new,accu,contrib + double precision :: count_n,accu_relat + accu = 0.d0 + accu_relat = 0.d0 + count_n = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + do ipoint = 1, n_points_final_grid + grad_squared = grad_1_squared_u_ij_mu(j,i,ipoint) + new = grad_1_squared_u_ij_mu_new(ipoint,j,i) + contrib = dabs(new - grad_squared) + if(dabs(grad_squared).gt.1.d-12)then + count_n += 1.d0 + accu_relat += 2.0d0 * contrib/dabs(grad_squared+new) + endif + if(contrib.gt.1.d-10)then + print*,i,j,ipoint + print*,grad_squared,new,contrib + print*,2.0d0*contrib/dabs(grad_squared+new+1.d-12) + endif + accu += contrib + enddo + enddo + enddo + print*,'accu = ',accu/count_n + print*,'accu/rel = ',accu_relat/count_n + +end + subroutine routine_fit implicit none integer :: i,nx From aa55f7de28634a31b5da5536c2b6e3622128840a Mon Sep 17 00:00:00 2001 From: eginer Date: Sat, 22 Oct 2022 19:09:22 +0200 Subject: [PATCH 02/10] it compiles ! --- src/non_h_ints_mu/test_non_h_ints.irp.f | 135 ------------------------ 1 file changed, 135 deletions(-) delete mode 100644 src/non_h_ints_mu/test_non_h_ints.irp.f diff --git a/src/non_h_ints_mu/test_non_h_ints.irp.f b/src/non_h_ints_mu/test_non_h_ints.irp.f deleted file mode 100644 index c098b0f5..00000000 --- a/src/non_h_ints_mu/test_non_h_ints.irp.f +++ /dev/null @@ -1,135 +0,0 @@ -program test_non_h - implicit none - my_grid_becke = .True. -! my_n_pt_r_grid = 50 -! my_n_pt_a_grid = 74 - my_n_pt_r_grid = 10 ! small grid for quick debug - my_n_pt_a_grid = 26 ! small grid for quick debug - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid -!call routine_grad_squared -! call routine_fit - call routine_grad_squared_new -end - -subroutine routine_lapl_grad - implicit none - integer :: i,j,k,l - double precision :: grad_lapl, get_ao_tc_sym_two_e_pot,new,accu,contrib - double precision :: ao_two_e_integral_erf,get_ao_two_e_integral,count_n,accu_relat -! !!!!!!!!!!!!!!!!!!!!! WARNING -! THIS ROUTINE MAKES SENSE ONLY IF HAND MODIFIED coef_gauss_eff_pot(1:n_max_fit_slat) = 0. to cancel (1-erf(mu*r12))^2 - accu = 0.d0 - accu_relat = 0.d0 - count_n = 0.d0 - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - grad_lapl = get_ao_tc_sym_two_e_pot(i,j,k,l,ao_tc_sym_two_e_pot_map) ! pure gaussian part : comes from Lapl - grad_lapl += ao_two_e_integral_erf(i, k, j, l) ! erf(mu r12)/r12 : comes from Lapl - grad_lapl += ao_non_hermit_term_chemist(k,i,l,j) ! \grad u(r12) . grad - new = tc_grad_and_lapl_ao(k,i,l,j) - new += get_ao_two_e_integral(i,j,k,l,ao_integrals_map) - contrib = dabs(new - grad_lapl) - if(dabs(grad_lapl).gt.1.d-12)then - count_n += 1.d0 - accu_relat += 2.0d0 * contrib/dabs(grad_lapl+new) - endif - if(contrib.gt.1.d-10)then - print*,i,j,k,l - print*,grad_lapl,new,contrib - print*,2.0d0*contrib/dabs(grad_lapl+new+1.d-12) - endif - accu += contrib - enddo - enddo - enddo - enddo - print*,'accu = ',accu/count_n - print*,'accu/rel = ',accu_relat/count_n - -end - -subroutine routine_grad_squared - implicit none - integer :: i,j,k,l - double precision :: grad_squared, get_ao_tc_sym_two_e_pot,new,accu,contrib - double precision :: count_n,accu_relat -! !!!!!!!!!!!!!!!!!!!!! WARNING -! THIS ROUTINE MAKES SENSE ONLY IF HAND MODIFIED coef_gauss_eff_pot(n_max_fit_slat:n_max_fit_slat+1) = 0. to cancel exp(-'mu*r12)^2) - accu = 0.d0 - accu_relat = 0.d0 - count_n = 0.d0 - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - grad_squared = get_ao_tc_sym_two_e_pot(i,j,k,l,ao_tc_sym_two_e_pot_map) ! pure gaussian part : comes from Lapl - new = tc_grad_square_ao(k,i,l,j) - contrib = dabs(new - grad_squared) - if(dabs(grad_squared).gt.1.d-12)then - count_n += 1.d0 - accu_relat += 2.0d0 * contrib/dabs(grad_squared+new) - endif - if(contrib.gt.1.d-10)then - print*,i,j,k,l - print*,grad_squared,new,contrib - print*,2.0d0*contrib/dabs(grad_squared+new+1.d-12) - endif - accu += contrib - enddo - enddo - enddo - enddo - print*,'accu = ',accu/count_n - print*,'accu/rel = ',accu_relat/count_n - -end - -subroutine routine_grad_squared_new - implicit none - integer :: i,j,k,l,ipoint - double precision :: grad_squared, get_ao_tc_sym_two_e_pot,new,accu,contrib - double precision :: count_n,accu_relat - accu = 0.d0 - accu_relat = 0.d0 - count_n = 0.d0 - do i = 1, ao_num - do j = 1, ao_num - do ipoint = 1, n_points_final_grid - grad_squared = grad_1_squared_u_ij_mu(j,i,ipoint) - new = grad_1_squared_u_ij_mu_new(ipoint,j,i) - contrib = dabs(new - grad_squared) - if(dabs(grad_squared).gt.1.d-12)then - count_n += 1.d0 - accu_relat += 2.0d0 * contrib/dabs(grad_squared+new) - endif - if(contrib.gt.1.d-10)then - print*,i,j,ipoint - print*,grad_squared,new,contrib - print*,2.0d0*contrib/dabs(grad_squared+new+1.d-12) - endif - accu += contrib - enddo - enddo - enddo - print*,'accu = ',accu/count_n - print*,'accu/rel = ',accu_relat/count_n - -end - -subroutine routine_fit - implicit none - integer :: i,nx - double precision :: dx,xmax,x,j_mu,j_mu_F_x_j,j_mu_fit_gauss - nx = 500 - xmax = 5.d0 - dx = xmax/dble(nx) - x = 0.d0 - print*,'coucou',mu_erf - do i = 1, nx - write(33,'(100(F16.10,X))') x,j_mu(x),j_mu_F_x_j(x),j_mu_fit_gauss(x) - x += dx - enddo - -end From b01b7d4d55d62c13f1b4493d6a14423d39b624bb Mon Sep 17 00:00:00 2001 From: eginer Date: Sat, 22 Oct 2022 19:51:56 +0200 Subject: [PATCH 03/10] minor assert fixed --- src/ao_many_one_e_ints/ao_gaus_gauss.irp.f | 2 +- src/tc_keywords/j1b_pen.irp.f | 3 ++- src/tc_keywords/tc_keywords.irp.f | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f index c058d0d8..fde4daa7 100644 --- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f @@ -170,7 +170,7 @@ double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center, double precision, external :: overlap_gauss_r12, overlap_gauss_r12_ao - ASSERT(beta .gt. 0.d0) + ASSERT(beta .ge. 0.d0) if(beta .lt. 1d-10) then overlap_gauss_r12_ao_with1s = overlap_gauss_r12_ao(D_center, delta, i, j) return diff --git a/src/tc_keywords/j1b_pen.irp.f b/src/tc_keywords/j1b_pen.irp.f index 57220245..faf8ffde 100644 --- a/src/tc_keywords/j1b_pen.irp.f +++ b/src/tc_keywords/j1b_pen.irp.f @@ -47,10 +47,11 @@ BEGIN_PROVIDER [ double precision, j1b_pen, (nucl_num) ] integer :: i do i = 1, nucl_num - j1b_pen(i) = 1d5 + j1b_pen(i) = 1.d0 enddo endif + print*,'j1b_pen = ',j1b_pen END_PROVIDER diff --git a/src/tc_keywords/tc_keywords.irp.f b/src/tc_keywords/tc_keywords.irp.f index 3bc68550..d8b862f5 100644 --- a/src/tc_keywords/tc_keywords.irp.f +++ b/src/tc_keywords/tc_keywords.irp.f @@ -4,4 +4,5 @@ program tc_keywords ! TODO : Put the documentation of the program here END_DOC print *, 'Hello world' + provide j1b_pen end From 7d05268ec85cf4ca3e077323d29751e14c495f13 Mon Sep 17 00:00:00 2001 From: eginer Date: Sat, 22 Oct 2022 20:47:55 +0200 Subject: [PATCH 04/10] it woooorks --- src/tc_bi_ortho/print_tc_wf.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tc_bi_ortho/print_tc_wf.irp.f b/src/tc_bi_ortho/print_tc_wf.irp.f index 58a733a7..e6ea5965 100644 --- a/src/tc_bi_ortho/print_tc_wf.irp.f +++ b/src/tc_bi_ortho/print_tc_wf.irp.f @@ -24,7 +24,7 @@ subroutine write_l_r_wf output=trim(ezfio_filename)//'.tc_wf' i_unit_output = getUnitAndOpen(output,'w') integer :: i - print*,'Writing the left-right wf' + print*,'Writing the left-right wf, and usual psi' do i = 1, N_det write(i_unit_output,*)i,psi_l_coef_sorted_bi_ortho_left(i),psi_r_coef_sorted_bi_ortho_right(i) enddo From 97c6afda396c536a4688b8bed6a6d30560dde582 Mon Sep 17 00:00:00 2001 From: eginer Date: Sat, 22 Oct 2022 23:00:06 +0200 Subject: [PATCH 05/10] changed the davidson non sym keyword --- src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f | 8 ++++---- src/tc_keywords/j1b_pen.irp.f | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f index cc689391..1bed60fe 100644 --- a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f +++ b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f @@ -128,10 +128,10 @@ subroutine davidson_general_ext_rout_nonsym_b1space(u_in, H_jj, energies, sze, N if(itermax > 4) then itermax = itermax - 1 - else if (m==1.and.disk_based_davidson) then - m = 0 - disk_based = .True. - itermax = 6 +! else if (m==1.and.disk_based_davidson) then +! m = 0 +! disk_based = .True. +! itermax = 6 else nproc_target = nproc_target - 1 endif diff --git a/src/tc_keywords/j1b_pen.irp.f b/src/tc_keywords/j1b_pen.irp.f index faf8ffde..207a9134 100644 --- a/src/tc_keywords/j1b_pen.irp.f +++ b/src/tc_keywords/j1b_pen.irp.f @@ -47,7 +47,7 @@ BEGIN_PROVIDER [ double precision, j1b_pen, (nucl_num) ] integer :: i do i = 1, nucl_num - j1b_pen(i) = 1.d0 + j1b_pen(i) = 100.d0 enddo endif From c619b30fe5ff9c0f6423369c598c1eb3b089ffd2 Mon Sep 17 00:00:00 2001 From: eginer Date: Sun, 23 Oct 2022 20:50:14 +0200 Subject: [PATCH 06/10] added cosgtos_ao_int --- src/cosgtos_ao_int/EZFIO.cfg | 19 + src/cosgtos_ao_int/NEED | 1 + src/cosgtos_ao_int/README.rst | 4 + src/cosgtos_ao_int/aos_cosgtos.irp.f | 210 +++ src/cosgtos_ao_int/cosgtos_ao_int.irp.f | 7 + src/cosgtos_ao_int/expoim_opt.py | 172 ++ src/cosgtos_ao_int/gauss_legendre.irp.f | 57 + src/cosgtos_ao_int/one_e_Coul_integrals.irp.f | 535 ++++++ src/cosgtos_ao_int/one_e_kin_integrals.irp.f | 223 +++ src/cosgtos_ao_int/two_e_Coul_integrals.irp.f | 1584 +++++++++++++++++ 10 files changed, 2812 insertions(+) create mode 100644 src/cosgtos_ao_int/EZFIO.cfg create mode 100644 src/cosgtos_ao_int/NEED create mode 100644 src/cosgtos_ao_int/README.rst create mode 100644 src/cosgtos_ao_int/aos_cosgtos.irp.f create mode 100644 src/cosgtos_ao_int/cosgtos_ao_int.irp.f create mode 100644 src/cosgtos_ao_int/expoim_opt.py create mode 100644 src/cosgtos_ao_int/gauss_legendre.irp.f create mode 100644 src/cosgtos_ao_int/one_e_Coul_integrals.irp.f create mode 100644 src/cosgtos_ao_int/one_e_kin_integrals.irp.f create mode 100644 src/cosgtos_ao_int/two_e_Coul_integrals.irp.f diff --git a/src/cosgtos_ao_int/EZFIO.cfg b/src/cosgtos_ao_int/EZFIO.cfg new file mode 100644 index 00000000..8edeecd0 --- /dev/null +++ b/src/cosgtos_ao_int/EZFIO.cfg @@ -0,0 +1,19 @@ +[ao_expoim_cosgtos] +type: double precision +doc: imag part for Exponents for each primitive of each cosGTOs |AO| +size: (ao_basis.ao_num,ao_basis.ao_prim_num_max) +interface: ezfio, provider + +[use_cosgtos] +type: logical +doc: If true, use cosgtos for AO integrals +interface: ezfio,provider,ocaml +default: False + +[ao_integrals_threshold] +type: Threshold +doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero +interface: ezfio,provider,ocaml +default: 1.e-15 +ezfio_name: threshold_ao + diff --git a/src/cosgtos_ao_int/NEED b/src/cosgtos_ao_int/NEED new file mode 100644 index 00000000..43553132 --- /dev/null +++ b/src/cosgtos_ao_int/NEED @@ -0,0 +1 @@ +ao_basis diff --git a/src/cosgtos_ao_int/README.rst b/src/cosgtos_ao_int/README.rst new file mode 100644 index 00000000..01f25d6d --- /dev/null +++ b/src/cosgtos_ao_int/README.rst @@ -0,0 +1,4 @@ +============== +cosgtos_ao_int +============== + diff --git a/src/cosgtos_ao_int/aos_cosgtos.irp.f b/src/cosgtos_ao_int/aos_cosgtos.irp.f new file mode 100644 index 00000000..6a4d54fd --- /dev/null +++ b/src/cosgtos_ao_int/aos_cosgtos.irp.f @@ -0,0 +1,210 @@ + +! --- + +BEGIN_PROVIDER [ double precision, ao_coef_norm_ord_transp_cosgtos, (ao_prim_num_max, ao_num) ] + + implicit none + integer :: i, j + + do j = 1, ao_num + do i = 1, ao_prim_num_max + ao_coef_norm_ord_transp_cosgtos(i,j) = ao_coef_norm_ord_cosgtos(j,i) + enddo + enddo + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ complex*16, ao_expo_ord_transp_cosgtos, (ao_prim_num_max, ao_num) ] + + implicit none + integer :: i, j + + do j = 1, ao_num + do i = 1, ao_prim_num_max + ao_expo_ord_transp_cosgtos(i,j) = ao_expo_ord_cosgtos(j,i) + enddo + enddo + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, ao_coef_norm_cosgtos, (ao_num, ao_prim_num_max) ] + + implicit none + + integer :: i, j, powA(3), nz + double precision :: norm + complex*16 :: overlap_x, overlap_y, overlap_z, C_A(3) + complex*16 :: integ1, integ2, expo + + nz = 100 + + C_A(1) = (0.d0, 0.d0) + C_A(2) = (0.d0, 0.d0) + C_A(3) = (0.d0, 0.d0) + + ao_coef_norm_cosgtos = 0.d0 + + do i = 1, ao_num + + powA(1) = ao_power(i,1) + powA(2) = ao_power(i,2) + powA(3) = ao_power(i,3) + + ! Normalization of the primitives + if(primitives_normalized) then + + do j = 1, ao_prim_num(i) + + expo = ao_expo(i,j) + (0.d0, 1.d0) * ao_expoim_cosgtos(i,j) + + call overlap_cgaussian_xyz(C_A, C_A, expo, expo, powA, powA, overlap_x, overlap_y, overlap_z, integ1, nz) + call overlap_cgaussian_xyz(C_A, C_A, conjg(expo), expo, powA, powA, overlap_x, overlap_y, overlap_z, integ2, nz) + + norm = 2.d0 * real( integ1 + integ2 ) + + ao_coef_norm_cosgtos(i,j) = ao_coef(i,j) / dsqrt(norm) + enddo + + else + + do j = 1, ao_prim_num(i) + ao_coef_norm_cosgtos(i,j) = ao_coef(i,j) + enddo + + endif + + enddo + +END_PROVIDER + +! --- + + BEGIN_PROVIDER [ double precision, ao_coef_norm_ord_cosgtos, (ao_num, ao_prim_num_max) ] +&BEGIN_PROVIDER [ complex*16 , ao_expo_ord_cosgtos, (ao_num, ao_prim_num_max) ] + + implicit none + integer :: i, j + integer :: iorder(ao_prim_num_max) + double precision :: d(ao_prim_num_max,3) + + d = 0.d0 + + do i = 1, ao_num + + do j = 1, ao_prim_num(i) + iorder(j) = j + d(j,1) = ao_expo(i,j) + d(j,2) = ao_coef_norm_cosgtos(i,j) + d(j,3) = ao_expoim_cosgtos(i,j) + enddo + + call dsort (d(1,1), iorder, ao_prim_num(i)) + call dset_order(d(1,2), iorder, ao_prim_num(i)) + call dset_order(d(1,3), iorder, ao_prim_num(i)) + + do j = 1, ao_prim_num(i) + ao_expo_ord_cosgtos (i,j) = d(j,1) + (0.d0, 1.d0) * d(j,3) + ao_coef_norm_ord_cosgtos(i,j) = d(j,2) + enddo + + enddo + +END_PROVIDER + +! --- + + BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos_x, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos_y, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos_z, (ao_num, ao_num) ] + + implicit none + integer :: i, j, n, l, dim1, power_A(3), power_B(3) + double precision :: c, overlap, overlap_x, overlap_y, overlap_z + complex*16 :: alpha, beta, A_center(3), B_center(3) + complex*16 :: overlap1, overlap_x1, overlap_y1, overlap_z1 + complex*16 :: overlap2, overlap_x2, overlap_y2, overlap_z2 + + ao_overlap_cosgtos = 0.d0 + ao_overlap_cosgtos_x = 0.d0 + ao_overlap_cosgtos_y = 0.d0 + ao_overlap_cosgtos_z = 0.d0 + + dim1 = 100 + + !$OMP PARALLEL DO SCHEDULE(GUIDED) & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE( A_center, B_center, power_A, power_B, alpha, beta, i, j, n, l, c & + !$OMP , overlap_x , overlap_y , overlap_z , overlap & + !$OMP , overlap_x1, overlap_y1, overlap_z1, overlap1 & + !$OMP , overlap_x2, overlap_y2, overlap_z2, overlap2 ) & + !$OMP SHARED( nucl_coord, ao_power, ao_prim_num, ao_num, ao_nucl, dim1 & + !$OMP , ao_overlap_cosgtos_x, ao_overlap_cosgtos_y, ao_overlap_cosgtos_z, ao_overlap_cosgtos & + !$OMP , ao_coef_norm_ord_transp_cosgtos, ao_expo_ord_transp_cosgtos ) + + do j = 1, ao_num + + A_center(1) = nucl_coord(ao_nucl(j),1) * (1.d0, 0.d0) + A_center(2) = nucl_coord(ao_nucl(j),2) * (1.d0, 0.d0) + A_center(3) = nucl_coord(ao_nucl(j),3) * (1.d0, 0.d0) + power_A(1) = ao_power(j,1) + power_A(2) = ao_power(j,2) + power_A(3) = ao_power(j,3) + + do i = 1, ao_num + + B_center(1) = nucl_coord(ao_nucl(i),1) * (1.d0, 0.d0) + B_center(2) = nucl_coord(ao_nucl(i),2) * (1.d0, 0.d0) + B_center(3) = nucl_coord(ao_nucl(i),3) * (1.d0, 0.d0) + power_B(1) = ao_power(i,1) + power_B(2) = ao_power(i,2) + power_B(3) = ao_power(i,3) + + do n = 1, ao_prim_num(j) + alpha = ao_expo_ord_transp_cosgtos(n,j) + + do l = 1, ao_prim_num(i) + c = ao_coef_norm_ord_transp_cosgtos(n,j) * ao_coef_norm_ord_transp_cosgtos(l,i) + beta = ao_expo_ord_transp_cosgtos(l,i) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x1, overlap_y1, overlap_z1, overlap1, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, conjg(alpha), beta, power_A, power_B & + , overlap_x2, overlap_y2, overlap_z2, overlap2, dim1 ) + + overlap_x = 2.d0 * real( overlap_x1 + overlap_x2 ) + overlap_y = 2.d0 * real( overlap_y1 + overlap_y2 ) + overlap_z = 2.d0 * real( overlap_z1 + overlap_z2 ) + overlap = 2.d0 * real( overlap1 + overlap2 ) + + ao_overlap_cosgtos(i,j) = ao_overlap_cosgtos(i,j) + c * overlap + + if( isnan(ao_overlap_cosgtos(i,j)) ) then + print*,'i, j', i, j + print*,'l, n', l, n + print*,'c, overlap', c, overlap + print*, overlap_x, overlap_y, overlap_z + stop + endif + + ao_overlap_cosgtos_x(i,j) = ao_overlap_cosgtos_x(i,j) + c * overlap_x + ao_overlap_cosgtos_y(i,j) = ao_overlap_cosgtos_y(i,j) + c * overlap_y + ao_overlap_cosgtos_z(i,j) = ao_overlap_cosgtos_z(i,j) + c * overlap_z + + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- + + + diff --git a/src/cosgtos_ao_int/cosgtos_ao_int.irp.f b/src/cosgtos_ao_int/cosgtos_ao_int.irp.f new file mode 100644 index 00000000..d65dfba5 --- /dev/null +++ b/src/cosgtos_ao_int/cosgtos_ao_int.irp.f @@ -0,0 +1,7 @@ +program cosgtos_ao_int + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + print *, 'Hello world' +end diff --git a/src/cosgtos_ao_int/expoim_opt.py b/src/cosgtos_ao_int/expoim_opt.py new file mode 100644 index 00000000..d15b0151 --- /dev/null +++ b/src/cosgtos_ao_int/expoim_opt.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python + +import sys +import os +import subprocess +from datetime import datetime +import time +import numpy as np +from modif_powell_imp import my_fmin_powell + +QP_PATH=os.environ["QP_ROOT"] +sys.path.insert(0, QP_PATH+"external/ezfio/Python") +from ezfio import ezfio + + + + + +#------------------------------------------------------------------------------ +# +def get_expoim(): + + expo_im = np.array(ezfio.get_cosgtos_ao_int_ao_expoim_cosgtos()).T + #print(expo_im.shape) + + x = [] + for i in range(ao_num): + for j in range(ao_prim_num[i]): + x.append(expo_im[i,j]) + + return x + +# --- + +def set_expoim(x): + + expo_im = np.zeros((ao_num, ao_prim_num_max)) + + ii = 0 + for i in range(ao_num): + for j in range(ao_prim_num[i]): + expo_im[i,j] = x[ii] + ii = ii + 1 + + ezfio.set_cosgtos_ao_int_ao_expoim_cosgtos(expo_im.T) +# +#------------------------------------------------------------------------------ + + +#------------------------------------------------------------------------------ +# +def save_res(results, file_output): + + lines = results.splitlines() + with open(file_output, "w") as f: + for line in lines: + f.write(f"{line}\n") + +# +#------------------------------------------------------------------------------ + + + +#------------------------------------------------------------------------------ +# +def get_scfenergy(results): + + scf_energy = 0.0 + + lines = results.splitlines() + for line in lines: + if("SCF energy" in line): + scf_energy = float(line.split()[-1]) + + return scf_energy +# +#------------------------------------------------------------------------------ + + + + +#------------------------------------------------------------------------------ +# +def run_scf(): + + return subprocess.check_output( ['qp_run', 'scf', EZFIO_file] + , encoding = "utf-8" ) +# +#------------------------------------------------------------------------------ + + + +#------------------------------------------------------------------------------ +# +def f_scf(x): + + global i_call + i_call += 1 + + #print(x) + + # set expo + set_expoim(x) + + # run scf + results = run_scf() + #save_res(results, "scf_"+str(i_call)) + + + # get scf_energy + scf_energy = get_scfenergy(results) + print( scf_energy ) + sys.stdout.flush() + + return scf_energy +# +#------------------------------------------------------------------------------ + + + + +if __name__ == '__main__': + + t0 = time.time() + + EZFIO_file = sys.argv[1] + ezfio.set_file(EZFIO_file) + print(" Today's date:", datetime.now() ) + print(" EZFIO file = {}".format(EZFIO_file)) + + + ao_num = ezfio.get_ao_basis_ao_num() + print(f" ao_num = {ao_num}") + + ao_prim_num = ezfio.get_ao_basis_ao_prim_num() + + ao_prim_num_max = np.amax(ao_prim_num) + print(f" ao_prim_num_max = {ao_prim_num_max}") + + ezfio.set_ao_basis_ao_prim_num_max(ao_prim_num_max) + + x = get_expoim() + + n_par = len(x) + print(' nb of parameters = {}'.format(n_par)) + + sys.stdout.flush() + + #x = (np.random.rand(n_par) - 0.5) * 1.0 + x = [ (+0.00) for _ in range(n_par)] + + x_min = [ (-10.0) for _ in range(n_par)] + x_max = [ (+10.0) for _ in range(n_par)] + + i_call = 0 + memo_val = {'fmin': 100.} + + opt = my_fmin_powell( f_scf + , x, x_min, x_max + #, xtol = 1e-1 + #, ftol = 1e-1 + , maxfev = 1e8 + , full_output = 1 + , verbose = 1 ) + + + print(" x = " + str(opt)) + + print(" end after {:.3f} minutes".format((time.time()-t0)/60.) ) + + # !!! +# !!! diff --git a/src/cosgtos_ao_int/gauss_legendre.irp.f b/src/cosgtos_ao_int/gauss_legendre.irp.f new file mode 100644 index 00000000..4bdadb6e --- /dev/null +++ b/src/cosgtos_ao_int/gauss_legendre.irp.f @@ -0,0 +1,57 @@ + BEGIN_PROVIDER [ double precision, gauleg_t2, (n_pt_max_integrals,n_pt_max_integrals/2) ] +&BEGIN_PROVIDER [ double precision, gauleg_w, (n_pt_max_integrals,n_pt_max_integrals/2) ] + implicit none + BEGIN_DOC + ! t_w(i,1,k) = w(i) + ! t_w(i,2,k) = t(i) + END_DOC + integer :: i,j,l + l=0 + do i = 2,n_pt_max_integrals,2 + l = l+1 + call gauleg(0.d0,1.d0,gauleg_t2(1,l),gauleg_w(1,l),i) + do j=1,i + gauleg_t2(j,l) *= gauleg_t2(j,l) + enddo + enddo + +END_PROVIDER + +subroutine gauleg(x1,x2,x,w,n) + implicit none + BEGIN_DOC + ! Gauss-Legendre + END_DOC + integer, intent(in) :: n + double precision, intent(in) :: x1, x2 + double precision, intent (out) :: x(n),w(n) + double precision, parameter :: eps=3.d-14 + + integer :: m,i,j + double precision :: xm, xl, z, z1, p1, p2, p3, pp, dn + m=(n+1)/2 + xm=0.5d0*(x2+x1) + xl=0.5d0*(x2-x1) + dn = dble(n) + do i=1,m + z=dcos(3.141592654d0*(dble(i)-.25d0)/(dble(n)+.5d0)) + z1 = z+1.d0 + do while (dabs(z-z1) > eps) + p1=1.d0 + p2=0.d0 + do j=1,n + p3=p2 + p2=p1 + p1=(dble(j+j-1)*z*p2-dble(j-1)*p3)/j + enddo + pp=dn*(z*p1-p2)/(z*z-1.d0) + z1=z + z=z1-p1/pp + end do + x(i)=xm-xl*z + x(n+1-i)=xm+xl*z + w(i)=(xl+xl)/((1.d0-z*z)*pp*pp) + w(n+1-i)=w(i) + enddo +end + diff --git a/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f b/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f new file mode 100644 index 00000000..7f94f226 --- /dev/null +++ b/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f @@ -0,0 +1,535 @@ + +! --- + +BEGIN_PROVIDER [ double precision, ao_integrals_n_e_cosgtos, (ao_num, ao_num)] + + BEGIN_DOC + ! + ! Nucleus-electron interaction, in the cosgtos |AO| basis set. + ! + ! :math:`\langle \chi_i | -\sum_A \frac{1}{|r-R_A|} | \chi_j \rangle` + ! + END_DOC + + implicit none + integer :: num_A, num_B, power_A(3), power_B(3) + integer :: i, j, k, l, n_pt_in, m + double precision :: c, Z, A_center(3), B_center(3), C_center(3) + complex*16 :: alpha, beta, c1, c2 + + complex*16 :: NAI_pol_mult_cosgtos + + ao_integrals_n_e_cosgtos = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE ( i, j, k, l, m, alpha, beta, A_center, B_center, C_center & + !$OMP , power_A, power_B, num_A, num_B, Z, c, c1, c2, n_pt_in ) & + !$OMP SHARED ( ao_num, ao_prim_num, ao_nucl, nucl_coord, ao_power, nucl_num, nucl_charge & + !$OMP , ao_expo_ord_transp_cosgtos, ao_coef_norm_ord_transp_cosgtos & + !$OMP , n_pt_max_integrals, ao_integrals_n_e_cosgtos ) + + n_pt_in = n_pt_max_integrals + + !$OMP DO SCHEDULE (dynamic) + + do j = 1, ao_num + num_A = ao_nucl(j) + power_A(1:3) = ao_power(j,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + + do i = 1, ao_num + num_B = ao_nucl(i) + power_B(1:3) = ao_power(i,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + do l = 1, ao_prim_num(j) + alpha = ao_expo_ord_transp_cosgtos(l,j) + + do m = 1, ao_prim_num(i) + beta = ao_expo_ord_transp_cosgtos(m,i) + + c = 0.d0 + do k = 1, nucl_num + + Z = nucl_charge(k) + + C_center(1:3) = nucl_coord(k,1:3) + + !print *, ' ' + !print *, A_center, B_center, C_center, power_A, power_B + !print *, real(alpha), real(beta) + + c1 = NAI_pol_mult_cosgtos( A_center, B_center, power_A, power_B & + , alpha, beta, C_center, n_pt_in ) + + !c2 = c1 + c2 = NAI_pol_mult_cosgtos( A_center, B_center, power_A, power_B & + , conjg(alpha), beta, C_center, n_pt_in ) + + !print *, ' c1 = ', real(c1) + !print *, ' c2 = ', real(c2) + + c = c - Z * 2.d0 * real(c1 + c2) + + enddo + ao_integrals_n_e_cosgtos(i,j) = ao_integrals_n_e_cosgtos(i,j) & + + ao_coef_norm_ord_transp_cosgtos(l,j) & + * ao_coef_norm_ord_transp_cosgtos(m,i) * c + enddo + enddo + enddo + enddo + + !$OMP END DO + !$OMP END PARALLEL + +END_PROVIDER + +! --- + +complex*16 function NAI_pol_mult_cosgtos(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in) + + BEGIN_DOC + ! + ! Computes the electron-nucleus attraction with two primitves cosgtos. + ! + ! :math:`\langle g_i | \frac{1}{|r-R_c|} | g_j \rangle` + ! + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, power_A(3), power_B(3) + double precision, intent(in) :: C_center(3), A_center(3), B_center(3) + complex*16, intent(in) :: alpha, beta + + integer :: i, n_pt, n_pt_out + double precision :: dist, const_mod + complex*16 :: p, p_inv, rho, dist_integral, const, const_factor, coeff, factor + complex*16 :: accu, P_center(3) + complex*16 :: d(0:n_pt_in) + + complex*16 :: V_n_e_cosgtos + complex*16 :: crint + + if ( (A_center(1)/=B_center(1)) .or. (A_center(2)/=B_center(2)) .or. (A_center(3)/=B_center(3)) .or. & + (A_center(1)/=C_center(1)) .or. (A_center(2)/=C_center(2)) .or. (A_center(3)/=C_center(3)) ) then + + continue + + else + + NAI_pol_mult_cosgtos = V_n_e_cosgtos( power_A(1), power_A(2), power_A(3) & + , power_B(1), power_B(2), power_B(3) & + , alpha, beta ) + return + + endif + + p = alpha + beta + p_inv = (1.d0, 0.d0) / p + rho = alpha * beta * p_inv + + dist = 0.d0 + dist_integral = (0.d0, 0.d0) + do i = 1, 3 + P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv + dist += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i)) + dist_integral += (P_center(i) - C_center(i)) * (P_center(i) - C_center(i)) + enddo + + const_factor = dist * rho + const = p * dist_integral + + const_mod = dsqrt(real(const_factor)*real(const_factor) + aimag(const_factor)*aimag(const_factor)) + if(const_mod > 80.d0) then + NAI_pol_mult_cosgtos = (0.d0, 0.d0) + return + endif + + factor = zexp(-const_factor) + coeff = dtwo_pi * factor * p_inv + + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + + n_pt = 2 * ( (power_A(1) + power_B(1)) + (power_A(2) + power_B(2)) + (power_A(3) + power_B(3)) ) + if(n_pt == 0) then + NAI_pol_mult_cosgtos = coeff * crint(0, const) + return + endif + + call give_cpolynomial_mult_center_one_e( A_center, B_center, alpha, beta & + , power_A, power_B, C_center, n_pt_in, d, n_pt_out) + + if(n_pt_out < 0) then + NAI_pol_mult_cosgtos = (0.d0, 0.d0) + return + endif + + accu = (0.d0, 0.d0) + do i = 0, n_pt_out, 2 + accu += crint(shiftr(i, 1), const) * d(i) + +! print *, shiftr(i, 1), real(const), real(d(i)), real(crint(shiftr(i, 1), const)) + enddo + NAI_pol_mult_cosgtos = accu * coeff + +end function NAI_pol_mult_cosgtos + +! --- + +subroutine give_cpolynomial_mult_center_one_e( A_center, B_center, alpha, beta & + , power_A, power_B, C_center, n_pt_in, d, n_pt_out) + + BEGIN_DOC + ! Returns the explicit polynomial in terms of the "t" variable of the following + ! + ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$. + END_DOC + + implicit none + + integer, intent(in) :: n_pt_in, power_A(3), power_B(3) + double precision, intent(in) :: A_center(3), B_center(3), C_center(3) + complex*16, intent(in) :: alpha, beta + integer, intent(out) :: n_pt_out + complex*16, intent(out) :: d(0:n_pt_in) + + integer :: a_x, b_x, a_y, b_y, a_z, b_z + integer :: n_pt1, n_pt2, n_pt3, dim, i, n_pt_tmp + complex*16 :: p, P_center(3), rho, p_inv, p_inv_2 + complex*16 :: R1x(0:2), B01(0:2), R1xp(0:2),R2x(0:2) + complex*16 :: d1(0:n_pt_in), d2(0:n_pt_in), d3(0:n_pt_in) + + ASSERT (n_pt_in > 1) + + p = alpha + beta + p_inv = (1.d0, 0.d0) / p + p_inv_2 = 0.5d0 * p_inv + + do i = 1, 3 + P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv + enddo + + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + d1(i) = (0.d0, 0.d0) + d2(i) = (0.d0, 0.d0) + d3(i) = (0.d0, 0.d0) + enddo + + ! --- + + n_pt1 = n_pt_in + + R1x(0) = (P_center(1) - A_center(1)) + R1x(1) = (0.d0, 0.d0) + R1x(2) = -(P_center(1) - C_center(1)) + + R1xp(0) = (P_center(1) - B_center(1)) + R1xp(1) = (0.d0, 0.d0) + R1xp(2) = -(P_center(1) - C_center(1)) + + R2x(0) = p_inv_2 + R2x(1) = (0.d0, 0.d0) + R2x(2) = -p_inv_2 + + a_x = power_A(1) + b_x = power_B(1) + call I_x1_pol_mult_one_e_cosgtos(a_x, b_x, R1x, R1xp, R2x, d1, n_pt1, n_pt_in) + + if(n_pt1 < 0) then + n_pt_out = -1 + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + return + endif + + ! --- + + n_pt2 = n_pt_in + + R1x(0) = (P_center(2) - A_center(2)) + R1x(1) = (0.d0, 0.d0) + R1x(2) = -(P_center(2) - C_center(2)) + + R1xp(0) = (P_center(2) - B_center(2)) + R1xp(1) = (0.d0, 0.d0) + R1xp(2) = -(P_center(2) - C_center(2)) + + a_y = power_A(2) + b_y = power_B(2) + call I_x1_pol_mult_one_e_cosgtos(a_y, b_y, R1x, R1xp, R2x, d2, n_pt2, n_pt_in) + + if(n_pt2 < 0) then + n_pt_out = -1 + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + return + endif + + ! --- + + n_pt3 = n_pt_in + + R1x(0) = (P_center(3) - A_center(3)) + R1x(1) = (0.d0, 0.d0) + R1x(2) = -(P_center(3) - C_center(3)) + + R1xp(0) = (P_center(3) - B_center(3)) + R1xp(1) = (0.d0, 0.d0) + R1xp(2) = -(P_center(3) - C_center(3)) + + a_z = power_A(3) + b_z = power_B(3) + call I_x1_pol_mult_one_e_cosgtos(a_z, b_z, R1x, R1xp, R2x, d3, n_pt3, n_pt_in) + + if(n_pt3 < 0) then + n_pt_out = -1 + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + return + endif + + ! --- + + n_pt_tmp = 0 + call multiply_cpoly(d1, n_pt1, d2, n_pt2, d, n_pt_tmp) + do i = 0, n_pt_tmp + d1(i) = (0.d0, 0.d0) + enddo + + n_pt_out = 0 + call multiply_cpoly(d, n_pt_tmp, d3, n_pt3, d1, n_pt_out) + do i = 0, n_pt_out + d(i) = d1(i) + enddo + +end subroutine give_cpolynomial_mult_center_one_e + +! --- + +recursive subroutine I_x1_pol_mult_one_e_cosgtos(a, c, R1x, R1xp, R2x, d, nd, n_pt_in) + + BEGIN_DOC + ! Recursive routine involved in the electron-nucleus potential + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: a, c, n_pt_in + complex*16, intent(in) :: R1x(0:2), R1xp(0:2), R2x(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:n_pt_in) + + integer :: nx, ix, dim, iy, ny + complex*16 :: X(0:max_dim) + complex*16 :: Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y + + dim = n_pt_in + + if( (a==0) .and. (c==0)) then + + nd = 0 + d(0) = (1.d0, 0.d0) + return + + elseif( (c < 0) .or. (nd < 0) ) then + + nd = -1 + return + + elseif((a == 0) .and. (c .ne. 0)) then + + call I_x2_pol_mult_one_e_cosgtos(c, R1x, R1xp, R2x, d, nd, n_pt_in) + + elseif(a == 1) then + + nx = nd + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + Y(ix) = (0.d0, 0.d0) + enddo + + call I_x2_pol_mult_one_e_cosgtos(c-1, R1x, R1xp, R2x, X, nx, n_pt_in) + + do ix = 0, nx + X(ix) *= dble(c) + enddo + + call multiply_cpoly(X, nx, R2x, 2, d, nd) + + ny = 0 + call I_x2_pol_mult_one_e_cosgtos(c, R1x, R1xp, R2x, Y, ny, n_pt_in) + call multiply_cpoly(Y, ny, R1x, 2, d, nd) + + else + + nx = 0 + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + Y(ix) = (0.d0, 0.d0) + enddo + + call I_x1_pol_mult_one_e_cosgtos(a-2, c, R1x, R1xp, R2x, X, nx, n_pt_in) + + do ix = 0, nx + X(ix) *= dble(a-1) + enddo + call multiply_cpoly(X, nx, R2x, 2, d, nd) + + nx = nd + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + + call I_x1_pol_mult_one_e_cosgtos(a-1, c-1, R1x, R1xp, R2x, X, nx, n_pt_in) + do ix = 0, nx + X(ix) *= dble(c) + enddo + + call multiply_cpoly(X, nx, R2x, 2, d, nd) + + ny = 0 + call I_x1_pol_mult_one_e_cosgtos(a-1, c, R1x, R1xp, R2x, Y, ny, n_pt_in) + call multiply_cpoly(Y, ny, R1x, 2, d, nd) + + endif + +end subroutine I_x1_pol_mult_one_e_cosgtos + +! --- + +recursive subroutine I_x2_pol_mult_one_e_cosgtos(c, R1x, R1xp, R2x, d, nd, dim) + + BEGIN_DOC + ! Recursive routine involved in the electron-nucleus potential + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: dim, c + complex*16, intent(in) :: R1x(0:2), R1xp(0:2), R2x(0:2) + integer, intent(inout) :: nd + complex*16, intent(out) :: d(0:max_dim) + + integer :: i, nx, ix, ny + complex*16 :: X(0:max_dim), Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y + + if(c == 0) then + + nd = 0 + d(0) = (1.d0, 0.d0) + return + + elseif((nd < 0) .or. (c < 0)) then + + nd = -1 + return + + else + + nx = 0 + do ix = 0, dim + X(ix) = (0.d0, 0.d0) + Y(ix) = (0.d0, 0.d0) + enddo + + call I_x1_pol_mult_one_e_cosgtos(0, c-2, R1x, R1xp, R2x, X, nx, dim) + + do ix = 0, nx + X(ix) *= dble(c-1) + enddo + + call multiply_cpoly(X, nx, R2x, 2, d, nd) + + ny = 0 + do ix = 0, dim + Y(ix) = (0.d0, 0.d0) + enddo + + call I_x1_pol_mult_one_e_cosgtos(0, c-1, R1x, R1xp, R2x, Y, ny, dim) + + if(ny .ge. 0) then + call multiply_cpoly(Y, ny, R1xp, 2, d, nd) + endif + + endif + +end subroutine I_x2_pol_mult_one_e_cosgtos + +! --- + +complex*16 function V_n_e_cosgtos(a_x, a_y, a_z, b_x, b_y, b_z, alpha, beta) + + BEGIN_DOC + ! Primitve nuclear attraction between the two primitves centered on the same atom. + ! + ! $p_1 = x^{a_x} y^{a_y} z^{a_z} \exp(-\alpha r^2)$ + ! + ! $p_2 = x^{b_x} y^{b_y} z^{b_z} \exp(-\beta r^2)$ + END_DOC + + implicit none + + integer, intent(in) :: a_x, a_y, a_z, b_x, b_y, b_z + complex*16, intent(in) :: alpha, beta + + double precision :: V_phi, V_theta + complex*16 :: V_r_cosgtos + + if( (iand(a_x + b_x, 1) == 1) .or. & + (iand(a_y + b_y, 1) == 1) .or. & + (iand(a_z + b_z, 1) == 1) ) then + + V_n_e_cosgtos = (0.d0, 0.d0) + + else + + V_n_e_cosgtos = V_r_cosgtos(a_x + b_x + a_y + b_y + a_z + b_z + 1, alpha + beta) & + * V_phi(a_x + b_x, a_y + b_y) & + * V_theta(a_z + b_z, a_x + b_x + a_y + b_y + 1) + endif + +end function V_n_e_cosgtos + +! --- + +complex*16 function V_r_cosgtos(n, alpha) + + BEGIN_DOC + ! Computes the radial part of the nuclear attraction integral: + ! + ! $\int_{0}^{\infty} r^n \exp(-\alpha r^2) dr$ + ! + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer , intent(in) :: n + complex*16, intent(in) :: alpha + + double precision :: fact + + if(iand(n, 1) .eq. 1) then + V_r_cosgtos = 0.5d0 * fact(shiftr(n, 1)) / (alpha**(shiftr(n, 1) + 1)) + else + V_r_cosgtos = sqpi * fact(n) / fact(shiftr(n, 1)) * (0.5d0/zsqrt(alpha))**(n+1) + endif + +end function V_r_cosgtos + +! --- + diff --git a/src/cosgtos_ao_int/one_e_kin_integrals.irp.f b/src/cosgtos_ao_int/one_e_kin_integrals.irp.f new file mode 100644 index 00000000..710b04d4 --- /dev/null +++ b/src/cosgtos_ao_int/one_e_kin_integrals.irp.f @@ -0,0 +1,223 @@ + +! --- + + BEGIN_PROVIDER [ double precision, ao_deriv2_cosgtos_x, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_deriv2_cosgtos_y, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_deriv2_cosgtos_z, (ao_num, ao_num) ] + + implicit none + integer :: i, j, n, l, dim1, power_A(3), power_B(3) + double precision :: c, deriv_tmp + complex*16 :: alpha, beta, A_center(3), B_center(3) + complex*16 :: overlap_x, overlap_y, overlap_z, overlap + complex*16 :: overlap_x0_1, overlap_y0_1, overlap_z0_1 + complex*16 :: overlap_x0_2, overlap_y0_2, overlap_z0_2 + complex*16 :: overlap_m2_1, overlap_p2_1 + complex*16 :: overlap_m2_2, overlap_p2_2 + complex*16 :: deriv_tmp_1, deriv_tmp_2 + + + dim1 = 100 + + ! -- Dummy call to provide everything + + A_center(:) = (0.0d0, 0.d0) + B_center(:) = (1.0d0, 0.d0) + alpha = (1.0d0, 0.d0) + beta = (0.1d0, 0.d0) + power_A = 1 + power_B = 0 + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x0_1, overlap_y0_1, overlap_z0_1, overlap, dim1 ) + + ! --- + + !$OMP PARALLEL DO SCHEDULE(GUIDED) & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE( A_center, B_center, power_A, power_B, alpha, beta, i, j, l, n, c & + !$OMP , deriv_tmp, deriv_tmp_1, deriv_tmp_2 & + !$OMP , overlap_x, overlap_y, overlap_z, overlap & + !$OMP , overlap_m2_1, overlap_p2_1, overlap_m2_2, overlap_p2_2 & + !$OMP , overlap_x0_1, overlap_y0_1, overlap_z0_1, overlap_x0_2, overlap_y0_2, overlap_z0_2 ) & + !$OMP SHARED( nucl_coord, ao_power, ao_prim_num, ao_num, ao_nucl, dim1 & + !$OMP , ao_coef_norm_ord_transp_cosgtos, ao_expo_ord_transp_cosgtos & + !$OMP , ao_deriv2_cosgtos_x, ao_deriv2_cosgtos_y, ao_deriv2_cosgtos_z ) + + do j = 1, ao_num + A_center(1) = nucl_coord(ao_nucl(j),1) * (1.d0, 0.d0) + A_center(2) = nucl_coord(ao_nucl(j),2) * (1.d0, 0.d0) + A_center(3) = nucl_coord(ao_nucl(j),3) * (1.d0, 0.d0) + power_A(1) = ao_power(j,1) + power_A(2) = ao_power(j,2) + power_A(3) = ao_power(j,3) + + do i = 1, ao_num + B_center(1) = nucl_coord(ao_nucl(i),1) * (1.d0, 0.d0) + B_center(2) = nucl_coord(ao_nucl(i),2) * (1.d0, 0.d0) + B_center(3) = nucl_coord(ao_nucl(i),3) * (1.d0, 0.d0) + power_B(1) = ao_power(i,1) + power_B(2) = ao_power(i,2) + power_B(3) = ao_power(i,3) + + ao_deriv2_cosgtos_x(i,j) = 0.d0 + ao_deriv2_cosgtos_y(i,j) = 0.d0 + ao_deriv2_cosgtos_z(i,j) = 0.d0 + + do n = 1, ao_prim_num(j) + alpha = ao_expo_ord_transp_cosgtos(n,j) + + do l = 1, ao_prim_num(i) + c = ao_coef_norm_ord_transp_cosgtos(n,j) * ao_coef_norm_ord_transp_cosgtos(l,i) + beta = ao_expo_ord_transp_cosgtos(l,i) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x0_1, overlap_y0_1, overlap_z0_1, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_x0_2, overlap_y0_2, overlap_z0_2, overlap, dim1 ) + + ! --- + + power_A(1) = power_A(1) - 2 + if(power_A(1) > -1) then + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_m2_1, overlap_y, overlap_z, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_m2_2, overlap_y, overlap_z, overlap, dim1 ) + else + overlap_m2_1 = (0.d0, 0.d0) + overlap_m2_2 = (0.d0, 0.d0) + endif + + power_A(1) = power_A(1) + 4 + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_p2_1, overlap_y, overlap_z, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_p2_2, overlap_y, overlap_z, overlap, dim1 ) + + power_A(1) = power_A(1) - 2 + + deriv_tmp_1 = ( -2.d0 * alpha * (2.d0 * power_A(1) + 1.d0) * overlap_x0_1 & + + power_A(1) * (power_A(1) - 1.d0) * overlap_m2_1 & + + 4.d0 * alpha * alpha * overlap_p2_1 ) * overlap_y0_1 * overlap_z0_1 + + deriv_tmp_2 = ( -2.d0 * alpha * (2.d0 * power_A(1) + 1.d0) * overlap_x0_2 & + + power_A(1) * (power_A(1) - 1.d0) * overlap_m2_2 & + + 4.d0 * alpha * alpha * overlap_p2_2 ) * overlap_y0_2 * overlap_z0_2 + + deriv_tmp = 2.d0 * real(deriv_tmp_1 + deriv_tmp_2) + + ao_deriv2_cosgtos_x(i,j) += c * deriv_tmp + + ! --- + + power_A(2) = power_A(2) - 2 + if(power_A(2) > -1) then + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x, overlap_m2_1, overlap_y, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_x, overlap_m2_2, overlap_y, overlap, dim1 ) + else + overlap_m2_1 = (0.d0, 0.d0) + overlap_m2_2 = (0.d0, 0.d0) + endif + + power_A(2) = power_A(2) + 4 + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x, overlap_p2_1, overlap_y, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_x, overlap_p2_2, overlap_y, overlap, dim1 ) + + power_A(2) = power_A(2) - 2 + + deriv_tmp_1 = ( -2.d0 * alpha * (2.d0 * power_A(2) + 1.d0) * overlap_y0_1 & + + power_A(2) * (power_A(2) - 1.d0) * overlap_m2_1 & + + 4.d0 * alpha * alpha * overlap_p2_1 ) * overlap_x0_1 * overlap_z0_1 + + deriv_tmp_2 = ( -2.d0 * alpha * (2.d0 * power_A(2) + 1.d0) * overlap_y0_2 & + + power_A(2) * (power_A(2) - 1.d0) * overlap_m2_2 & + + 4.d0 * alpha * alpha * overlap_p2_2 ) * overlap_x0_2 * overlap_z0_2 + + deriv_tmp = 2.d0 * real(deriv_tmp_1 + deriv_tmp_2) + + ao_deriv2_cosgtos_y(i,j) += c * deriv_tmp + + ! --- + + power_A(3) = power_A(3) - 2 + if(power_A(3) > -1) then + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x, overlap_y, overlap_m2_1, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_x, overlap_y, overlap_m2_2, overlap, dim1 ) + else + overlap_m2_1 = (0.d0, 0.d0) + overlap_m2_2 = (0.d0, 0.d0) + endif + + power_A(3) = power_A(3) + 4 + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x, overlap_y, overlap_p2_1, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_x, overlap_y, overlap_p2_2, overlap, dim1 ) + + power_A(3) = power_A(3) - 2 + + deriv_tmp_1 = ( -2.d0 * alpha * (2.d0 * power_A(3) + 1.d0) * overlap_z0_1 & + + power_A(3) * (power_A(3) - 1.d0) * overlap_m2_1 & + + 4.d0 * alpha * alpha * overlap_p2_1 ) * overlap_x0_1 * overlap_y0_1 + + deriv_tmp_2 = ( -2.d0 * alpha * (2.d0 * power_A(3) + 1.d0) * overlap_z0_2 & + + power_A(3) * (power_A(3) - 1.d0) * overlap_m2_2 & + + 4.d0 * alpha * alpha * overlap_p2_2 ) * overlap_x0_2 * overlap_y0_2 + + deriv_tmp = 2.d0 * real(deriv_tmp_1 + deriv_tmp_2) + + ao_deriv2_cosgtos_z(i,j) += c * deriv_tmp + + ! --- + + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, ao_kinetic_integrals_cosgtos, (ao_num, ao_num)] + + BEGIN_DOC + ! + ! Kinetic energy integrals in the cosgtos |AO| basis. + ! + ! $\langle \chi_i |\hat{T}| \chi_j \rangle$ + ! + END_DOC + + implicit none + integer :: i, j + + !$OMP PARALLEL DO DEFAULT(NONE) & + !$OMP PRIVATE(i, j) & + !$OMP SHARED(ao_num, ao_kinetic_integrals_cosgtos, ao_deriv2_cosgtos_x, ao_deriv2_cosgtos_y, ao_deriv2_cosgtos_z) + do j = 1, ao_num + do i = 1, ao_num + ao_kinetic_integrals_cosgtos(i,j) = -0.5d0 * ( ao_deriv2_cosgtos_x(i,j) & + + ao_deriv2_cosgtos_y(i,j) & + + ao_deriv2_cosgtos_z(i,j) ) + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- diff --git a/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f b/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f new file mode 100644 index 00000000..527a98d5 --- /dev/null +++ b/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f @@ -0,0 +1,1584 @@ + +! --- + +double precision function ao_two_e_integral_cosgtos(i, j, k, l) + + BEGIN_DOC + ! integral of the AO basis or (ij|kl) + ! i(r1) j(r1) 1/r12 k(r2) l(r2) + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: i, j, k, l + + integer :: p, q, r, s + integer :: num_i, num_j, num_k, num_l, dim1, I_power(3), J_power(3), K_power(3), L_power(3) + integer :: iorder_p1(3), iorder_p2(3), iorder_p3(3), iorder_p4(3), iorder_q1(3), iorder_q2(3) + double precision :: coef1, coef2, coef3, coef4 + complex*16 :: I_center(3), J_center(3), K_center(3), L_center(3) + complex*16 :: expo1, expo2, expo3, expo4 + complex*16 :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv + complex*16 :: P2_new(0:max_dim,3), P2_center(3), fact_p2, pp2, p2_inv + complex*16 :: P3_new(0:max_dim,3), P3_center(3), fact_p3, pp3, p3_inv + complex*16 :: P4_new(0:max_dim,3), P4_center(3), fact_p4, pp4, p4_inv + complex*16 :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv + complex*16 :: Q2_new(0:max_dim,3), Q2_center(3), fact_q2, qq2, q2_inv + complex*16 :: integral1, integral2, integral3, integral4 + complex*16 :: integral5, integral6, integral7, integral8 + complex*16 :: integral_tot + + double precision :: ao_two_e_integral_cosgtos_schwartz_accel + complex*16 :: ERI_cosgtos + complex*16 :: general_primitive_integral_cosgtos + + if(ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024) then + + !print *, ' with shwartz acc ' + ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) + + else + !print *, ' without shwartz acc ' + + dim1 = n_pt_max_integrals + + num_i = ao_nucl(i) + num_j = ao_nucl(j) + num_k = ao_nucl(k) + num_l = ao_nucl(l) + + ao_two_e_integral_cosgtos = 0.d0 + + if(num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k) then + !print *, ' not the same center' + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + I_center(p) = nucl_coord(num_i,p) * (1.d0, 0.d0) + J_center(p) = nucl_coord(num_j,p) * (1.d0, 0.d0) + K_center(p) = nucl_coord(num_k,p) * (1.d0, 0.d0) + L_center(p) = nucl_coord(num_l,p) * (1.d0, 0.d0) + enddo + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_norm_ord_transp_cosgtos(p,i) + expo1 = ao_expo_ord_transp_cosgtos(p,i) + + do q = 1, ao_prim_num(j) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j) + expo2 = ao_expo_ord_transp_cosgtos(q,j) + + call give_explicit_cpoly_and_cgaussian( P1_new, P1_center, pp1, fact_p1, iorder_p1 & + , expo1, expo2, I_power, J_power, I_center, J_center, dim1 ) + p1_inv = (1.d0,0.d0) / pp1 + + call give_explicit_cpoly_and_cgaussian( P2_new, P2_center, pp2, fact_p2, iorder_p2 & + , conjg(expo1), expo2, I_power, J_power, I_center, J_center, dim1 ) + p2_inv = (1.d0,0.d0) / pp2 + + call give_explicit_cpoly_and_cgaussian( P3_new, P3_center, pp3, fact_p3, iorder_p3 & + , expo1, conjg(expo2), I_power, J_power, I_center, J_center, dim1 ) + p3_inv = (1.d0,0.d0) / pp3 + + call give_explicit_cpoly_and_cgaussian( P4_new, P4_center, pp4, fact_p4, iorder_p4 & + , conjg(expo1), conjg(expo2), I_power, J_power, I_center, J_center, dim1 ) + p4_inv = (1.d0,0.d0) / pp4 + + !integer :: ii + !do ii = 1, 3 + ! print *, 'fact_p1', fact_p1 + ! print *, 'fact_p2', fact_p2 + ! print *, 'fact_p3', fact_p3 + ! print *, 'fact_p4', fact_p4 + ! !print *, pp1, p1_inv + ! !print *, pp2, p2_inv + ! !print *, pp3, p3_inv + ! !print *, pp4, p4_inv + !enddo + ! if( abs(aimag(P1_center(ii))) .gt. 0.d0 ) then + ! print *, ' P_1 is complex !!' + ! print *, P1_center + ! print *, expo1, expo2 + ! print *, conjg(expo1), conjg(expo2) + ! stop + ! endif + ! if( abs(aimag(P2_center(ii))) .gt. 0.d0 ) then + ! print *, ' P_2 is complex !!' + ! print *, P2_center + ! print *, ' old expos:' + ! print *, expo1, expo2 + ! print *, conjg(expo1), conjg(expo2) + ! print *, ' new expo:' + ! print *, pp2, p2_inv + ! print *, ' factor:' + ! print *, fact_p2 + ! print *, ' old centers:' + ! print *, I_center, J_center + ! print *, ' powers:' + ! print *, I_power, J_power + ! stop + ! endif + ! if( abs(aimag(P3_center(ii))) .gt. 0.d0 ) then + ! print *, ' P_3 is complex !!' + ! print *, P3_center + ! print *, expo1, expo2 + ! print *, conjg(expo1), conjg(expo2) + ! stop + ! endif + ! if( abs(aimag(P4_center(ii))) .gt. 0.d0 ) then + ! print *, ' P_4 is complex !!' + ! print *, P4_center + ! print *, expo1, expo2 + ! print *, conjg(expo1), conjg(expo2) + ! stop + ! endif + !enddo + + do r = 1, ao_prim_num(k) + coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k) + expo3 = ao_expo_ord_transp_cosgtos(r,k) + + do s = 1, ao_prim_num(l) + coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l) + expo4 = ao_expo_ord_transp_cosgtos(s,l) + + call give_explicit_cpoly_and_cgaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q1 & + , expo3, expo4, K_power, L_power, K_center, L_center, dim1 ) + q1_inv = (1.d0,0.d0) / qq1 + + call give_explicit_cpoly_and_cgaussian( Q2_new, Q2_center, qq2, fact_q2, iorder_q2 & + , conjg(expo3), expo4, K_power, L_power, K_center, L_center, dim1 ) + q2_inv = (1.d0,0.d0) / qq2 + + !do ii = 1, 3 + ! !print *, qq1, q1_inv + ! !print *, qq2, q2_inv + ! print *, 'fact_q1', fact_q1 + ! print *, 'fact_q2', fact_q2 + !enddo + ! if( abs(aimag(Q1_center(ii))) .gt. 0.d0 ) then + ! print *, ' Q_1 is complex !!' + ! print *, Q1_center + ! print *, expo3, expo4 + ! print *, conjg(expo3), conjg(expo4) + ! stop + ! endif + ! if( abs(aimag(Q2_center(ii))) .gt. 0.d0 ) then + ! print *, ' Q_2 is complex !!' + ! print *, Q2_center + ! print *, expo3, expo4 + ! print *, conjg(expo3), conjg(expo4) + ! stop + ! endif + !enddo + + + integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + !integral_tot = integral1 + !print*, integral_tot + + ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos + coef4 * 2.d0 * real(integral_tot) + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + else + !print *, ' the same center' + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + enddo + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_norm_ord_transp_cosgtos(p,i) + expo1 = ao_expo_ord_transp_cosgtos(p,i) + + do q = 1, ao_prim_num(j) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j) + expo2 = ao_expo_ord_transp_cosgtos(q,j) + + do r = 1, ao_prim_num(k) + coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k) + expo3 = ao_expo_ord_transp_cosgtos(r,k) + + do s = 1, ao_prim_num(l) + coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l) + expo4 = ao_expo_ord_transp_cosgtos(s,l) + + integral1 = ERI_cosgtos( expo1, expo2, expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral2 = ERI_cosgtos( expo1, expo2, conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral3 = ERI_cosgtos( conjg(expo1), expo2, expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral5 = ERI_cosgtos( expo1, conjg(expo2), expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + + ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos + coef4 * 2.d0 * real(integral_tot) + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + endif + endif + +end function ao_two_e_integral_cosgtos + +! --- + +double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) + + BEGIN_DOC + ! integral of the AO basis or (ij|kl) + ! i(r1) j(r1) 1/r12 k(r2) l(r2) + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: i, j, k, l + + integer :: p, q, r, s + integer :: num_i, num_j, num_k, num_l, dim1, I_power(3), J_power(3), K_power(3), L_power(3) + integer :: iorder_p1(3), iorder_p2(3), iorder_p3(3), iorder_p4(3), iorder_q1(3), iorder_q2(3) + double precision :: coef1, coef2, coef3, coef4 + complex*16 :: I_center(3), J_center(3), K_center(3), L_center(3) + complex*16 :: expo1, expo2, expo3, expo4 + complex*16 :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv + complex*16 :: P2_new(0:max_dim,3), P2_center(3), fact_p2, pp2, p2_inv + complex*16 :: P3_new(0:max_dim,3), P3_center(3), fact_p3, pp3, p3_inv + complex*16 :: P4_new(0:max_dim,3), P4_center(3), fact_p4, pp4, p4_inv + complex*16 :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv + complex*16 :: Q2_new(0:max_dim,3), Q2_center(3), fact_q2, qq2, q2_inv + complex*16 :: integral1, integral2, integral3, integral4 + complex*16 :: integral5, integral6, integral7, integral8 + complex*16 :: integral_tot + + double precision, allocatable :: schwartz_kl(:,:) + double precision :: thr + double precision :: schwartz_ij + + complex*16 :: ERI_cosgtos + complex*16 :: general_primitive_integral_cosgtos + + ao_two_e_integral_cosgtos_schwartz_accel = 0.d0 + + dim1 = n_pt_max_integrals + + num_i = ao_nucl(i) + num_j = ao_nucl(j) + num_k = ao_nucl(k) + num_l = ao_nucl(l) + + + thr = ao_integrals_threshold*ao_integrals_threshold + + allocate( schwartz_kl(0:ao_prim_num(l),0:ao_prim_num(k)) ) + + if(num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k) then + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + I_center(p) = nucl_coord(num_i,p) * (1.d0, 0.d0) + J_center(p) = nucl_coord(num_j,p) * (1.d0, 0.d0) + K_center(p) = nucl_coord(num_k,p) * (1.d0, 0.d0) + L_center(p) = nucl_coord(num_l,p) * (1.d0, 0.d0) + enddo + + schwartz_kl(0,0) = 0.d0 + do r = 1, ao_prim_num(k) + coef1 = ao_coef_norm_ord_transp_cosgtos(r,k) * ao_coef_norm_ord_transp_cosgtos(r,k) + expo1 = ao_expo_ord_transp_cosgtos(r,k) + + schwartz_kl(0,r) = 0.d0 + do s = 1, ao_prim_num(l) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(s,l) * ao_coef_norm_ord_transp_cosgtos(s,l) + expo2 = ao_expo_ord_transp_cosgtos(s,l) + + call give_explicit_cpoly_and_cgaussian( P1_new, P1_center, pp1, fact_p1, iorder_p1 & + , expo1, expo2, K_power, L_power, K_center, L_center, dim1 ) + p1_inv = (1.d0,0.d0) / pp1 + + call give_explicit_cpoly_and_cgaussian( P2_new, P2_center, pp2, fact_p2, iorder_p2 & + , conjg(expo1), expo2, K_power, L_power, K_center, L_center, dim1 ) + p2_inv = (1.d0,0.d0) / pp2 + + call give_explicit_cpoly_and_cgaussian( P3_new, P3_center, pp3, fact_p3, iorder_p3 & + , expo1, conjg(expo2), K_power, L_power, K_center, L_center, dim1 ) + p3_inv = (1.d0,0.d0) / pp3 + + call give_explicit_cpoly_and_cgaussian( P4_new, P4_center, pp4, fact_p4, iorder_p4 & + , conjg(expo1), conjg(expo2), K_power, L_power, K_center, L_center, dim1 ) + p4_inv = (1.d0,0.d0) / pp4 + + integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + + schwartz_kl(s,r) = coef2 * 2.d0 * real(integral_tot) + + schwartz_kl(0,r) = max(schwartz_kl(0,r), schwartz_kl(s,r)) + enddo + + schwartz_kl(0,0) = max(schwartz_kl(0,r), schwartz_kl(0,0)) + enddo + + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_norm_ord_transp_cosgtos(p,i) + expo1 = ao_expo_ord_transp_cosgtos(p,i) + + do q = 1, ao_prim_num(j) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j) + expo2 = ao_expo_ord_transp_cosgtos(q,j) + + call give_explicit_cpoly_and_cgaussian( P1_new, P1_center, pp1, fact_p1, iorder_p1 & + , expo1, expo2, I_power, J_power, I_center, J_center, dim1 ) + p1_inv = (1.d0,0.d0) / pp1 + + call give_explicit_cpoly_and_cgaussian( P2_new, P2_center, pp2, fact_p2, iorder_p2 & + , conjg(expo1), expo2, I_power, J_power, I_center, J_center, dim1 ) + p2_inv = (1.d0,0.d0) / pp2 + + call give_explicit_cpoly_and_cgaussian( P3_new, P3_center, pp3, fact_p3, iorder_p3 & + , expo1, conjg(expo2), I_power, J_power, I_center, J_center, dim1 ) + p3_inv = (1.d0,0.d0) / pp3 + + call give_explicit_cpoly_and_cgaussian( P4_new, P4_center, pp4, fact_p4, iorder_p4 & + , conjg(expo1), conjg(expo2), I_power, J_power, I_center, J_center, dim1 ) + p4_inv = (1.d0,0.d0) / pp4 + + integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + schwartz_ij = coef2 * coef2 * 2.d0 * real(integral_tot) + + if(schwartz_kl(0,0)*schwartz_ij < thr) cycle + + do r = 1, ao_prim_num(k) + if(schwartz_kl(0,r)*schwartz_ij < thr) cycle + + coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k) + expo3 = ao_expo_ord_transp_cosgtos(r,k) + + do s = 1, ao_prim_num(l) + if(schwartz_kl(s,r)*schwartz_ij < thr) cycle + + coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l) + expo4 = ao_expo_ord_transp_cosgtos(s,l) + + call give_explicit_cpoly_and_cgaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q1 & + , expo3, expo4, K_power, L_power, K_center, L_center, dim1 ) + q1_inv = (1.d0,0.d0) / qq1 + + call give_explicit_cpoly_and_cgaussian( Q2_new, Q2_center, qq2, fact_q2, iorder_q2 & + , conjg(expo3), expo4, K_power, L_power, K_center, L_center, dim1 ) + q2_inv = (1.d0,0.d0) / qq2 + + integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + + integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & + + coef4 * 2.d0 * real(integral_tot) + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + else + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + enddo + + schwartz_kl(0,0) = 0.d0 + do r = 1, ao_prim_num(k) + coef1 = ao_coef_norm_ord_transp_cosgtos(r,k) * ao_coef_norm_ord_transp_cosgtos(r,k) + expo1 = ao_expo_ord_transp_cosgtos(r,k) + + schwartz_kl(0,r) = 0.d0 + do s = 1, ao_prim_num(l) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(s,l) * ao_coef_norm_ord_transp_cosgtos(s,l) + expo2 = ao_expo_ord_transp_cosgtos(s,l) + + integral1 = ERI_cosgtos( expo1, expo2, expo1, expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + integral2 = ERI_cosgtos( expo1, expo2, conjg(expo1), expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + + integral3 = ERI_cosgtos( conjg(expo1), expo2, expo1, expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo1), expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + + integral5 = ERI_cosgtos( expo1, conjg(expo2), expo1, expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo1), expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + + integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo1, expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo1), expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + + schwartz_kl(s,r) = coef2 * 2.d0 * real(integral_tot) + + schwartz_kl(0,r) = max(schwartz_kl(0,r), schwartz_kl(s,r)) + enddo + schwartz_kl(0,0) = max(schwartz_kl(0,r), schwartz_kl(0,0)) + enddo + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_norm_ord_transp_cosgtos(p,i) + expo1 = ao_expo_ord_transp_cosgtos(p,i) + + do q = 1, ao_prim_num(j) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j) + expo2 = ao_expo_ord_transp_cosgtos(q,j) + + integral1 = ERI_cosgtos( expo1, expo2, expo1, expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral2 = ERI_cosgtos( expo1, expo2, conjg(expo1), expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral3 = ERI_cosgtos( conjg(expo1), expo2, expo1, expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo1), expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral5 = ERI_cosgtos( expo1, conjg(expo2), expo1, expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo1), expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo1, expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo1), expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + schwartz_ij = coef2 * coef2 * 2.d0 * real(integral_tot) + + if(schwartz_kl(0,0)*schwartz_ij < thr) cycle + do r = 1, ao_prim_num(k) + if(schwartz_kl(0,r)*schwartz_ij < thr) cycle + + coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k) + expo3 = ao_expo_ord_transp_cosgtos(r,k) + + do s = 1, ao_prim_num(l) + if(schwartz_kl(s,r)*schwartz_ij < thr) cycle + + coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l) + expo4 = ao_expo_ord_transp_cosgtos(s,l) + + integral1 = ERI_cosgtos( expo1, expo2, expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral2 = ERI_cosgtos( expo1, expo2, conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral3 = ERI_cosgtos( conjg(expo1), expo2, expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral5 = ERI_cosgtos( expo1, conjg(expo2), expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & + + coef4 * 2.d0 * real(integral_tot) + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + endif + + deallocate(schwartz_kl) + +end function ao_two_e_integral_cosgtos_schwartz_accel + +! --- + +BEGIN_PROVIDER [ double precision, ao_two_e_integral_cosgtos_schwartz, (ao_num,ao_num) ] + + BEGIN_DOC + ! Needed to compute Schwartz inequalities + END_DOC + + implicit none + integer :: i, k + double precision :: ao_two_e_integral_cosgtos + + ao_two_e_integral_cosgtos_schwartz(1,1) = ao_two_e_integral_cosgtos(1, 1, 1, 1) + + !$OMP PARALLEL DO PRIVATE(i,k) & + !$OMP DEFAULT(NONE) & + !$OMP SHARED(ao_num, ao_two_e_integral_cosgtos_schwartz) & + !$OMP SCHEDULE(dynamic) + do i = 1, ao_num + do k = 1, i + ao_two_e_integral_cosgtos_schwartz(i,k) = dsqrt(ao_two_e_integral_cosgtos(i, i, k, k)) + ao_two_e_integral_cosgtos_schwartz(k,i) = ao_two_e_integral_cosgtos_schwartz(i,k) + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- + +complex*16 function general_primitive_integral_cosgtos( dim, P_new, P_center, fact_p, p, p_inv, iorder_p & + , Q_new, Q_center, fact_q, q, q_inv, iorder_q ) + + BEGIN_DOC + ! + ! Computes the integral where p,q,r,s are cos-cGTOS primitives + ! + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: dim + integer, intent(in) :: iorder_p(3), iorder_q(3) + complex*16, intent(in) :: P_new(0:max_dim,3), P_center(3), fact_p, p, p_inv + complex*16, intent(in) :: Q_new(0:max_dim,3), Q_center(3), fact_q, q, q_inv + + integer :: i, j, nx, ny, nz, n_Ix, n_Iy, n_Iz, iorder, n_pt_tmp, n_pt_out + double precision :: tmp_mod + double precision :: ppq_re, ppq_im, ppq_mod, sq_ppq_re, sq_ppq_im + complex*16 :: pq, pq_inv, pq_inv_2, p01_1, p01_2, p10_1, p10_2, ppq, sq_ppq + complex*16 :: rho, dist, const + complex*16 :: accu, tmp_p, tmp_q + complex*16 :: dx(0:max_dim), Ix_pol(0:max_dim), dy(0:max_dim), Iy_pol(0:max_dim), dz(0:max_dim), Iz_pol(0:max_dim) + complex*16 :: d1(0:max_dim), d_poly(0:max_dim) + + complex*16 :: crint_sum + + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: dx, Ix_pol, dy, Iy_pol, dz, Iz_pol + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: d1, d_poly + + general_primitive_integral_cosgtos = (0.d0, 0.d0) + + pq = (0.5d0, 0.d0) * p_inv * q_inv + pq_inv = (0.5d0, 0.d0) / (p + q) + pq_inv_2 = pq_inv + pq_inv + p10_1 = q * pq ! 1/(2p) + p01_1 = p * pq ! 1/(2q) + p10_2 = pq_inv_2 * p10_1 * q ! 0.5d0*q/(pq + p*p) + p01_2 = pq_inv_2 * p01_1 * p ! 0.5d0*p/(q*q + pq) + + ! get \sqrt(p + q) + !ppq = p + q + !ppq_re = REAL (ppq) + !ppq_im = AIMAG(ppq) + !ppq_mod = dsqrt(ppq_re*ppq_re + ppq_im*ppq_im) + !sq_ppq_re = sq_op5 * dsqrt(ppq_re + ppq_mod) + !sq_ppq_im = 0.5d0 * ppq_im / sq_ppq_re + !sq_ppq = sq_ppq_re + (0.d0, 1.d0) * sq_ppq_im + sq_ppq = zsqrt(p + q) + + ! --- + + iorder = iorder_p(1) + iorder_q(1) + iorder_p(1) + iorder_q(1) + + do i = 0, iorder + Ix_pol(i) = (0.d0, 0.d0) + enddo + + n_Ix = 0 + do i = 0, iorder_p(1) + + tmp_p = P_new(i,1) + tmp_mod = dsqrt(REAL(tmp_p)*REAL(tmp_p) + AIMAG(tmp_p)*AIMAG(tmp_p)) + if(tmp_mod < thresh) cycle + + do j = 0, iorder_q(1) + + tmp_q = tmp_p * Q_new(j,1) + tmp_mod = dsqrt(REAL(tmp_q)*REAL(tmp_q) + AIMAG(tmp_q)*AIMAG(tmp_q)) + if(tmp_mod < thresh) cycle + + !DIR$ FORCEINLINE + call give_cpolynom_mult_center_x(P_center(1), Q_center(1), i, j, p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dx, nx) + !DIR$ FORCEINLINE + call add_cpoly_multiply(dx, nx, tmp_q, Ix_pol, n_Ix) + enddo + enddo + if(n_Ix == -1) then + return + endif + + ! --- + + iorder = iorder_p(2) + iorder_q(2) + iorder_p(2) + iorder_q(2) + + do i = 0, iorder + Iy_pol(i) = (0.d0, 0.d0) + enddo + + n_Iy = 0 + do i = 0, iorder_p(2) + + tmp_p = P_new(i,2) + tmp_mod = dsqrt(REAL(tmp_p)*REAL(tmp_p) + AIMAG(tmp_p)*AIMAG(tmp_p)) + if(tmp_mod < thresh) cycle + + do j = 0, iorder_q(2) + + tmp_q = tmp_p * Q_new(j,2) + tmp_mod = dsqrt(REAL(tmp_q)*REAL(tmp_q) + AIMAG(tmp_q)*AIMAG(tmp_q)) + if(tmp_mod < thresh) cycle + + !DIR$ FORCEINLINE + call give_cpolynom_mult_center_x(P_center(2), Q_center(2), i, j, p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dy, ny) + !DIR$ FORCEINLINE + call add_cpoly_multiply(dy, ny, tmp_q, Iy_pol, n_Iy) + enddo + enddo + + if(n_Iy == -1) then + return + endif + + ! --- + + iorder = iorder_p(3) + iorder_q(3) + iorder_p(3) + iorder_q(3) + + do i = 0, iorder + Iz_pol(i) = (0.d0, 0.d0) + enddo + + n_Iz = 0 + do i = 0, iorder_p(3) + + tmp_p = P_new(i,3) + tmp_mod = dsqrt(REAL(tmp_p)*REAL(tmp_p) + AIMAG(tmp_p)*AIMAG(tmp_p)) + if(tmp_mod < thresh) cycle + + do j = 0, iorder_q(3) + + tmp_q = tmp_p * Q_new(j,3) + tmp_mod = dsqrt(REAL(tmp_q)*REAL(tmp_q) + AIMAG(tmp_q)*AIMAG(tmp_q)) + if(tmp_mod < thresh) cycle + + !DIR$ FORCEINLINE + call give_cpolynom_mult_center_x(P_center(3), Q_center(3), i, j, p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dz, nz) + !DIR$ FORCEINLINE + call add_cpoly_multiply(dz, nz, tmp_q, Iz_pol, n_Iz) + enddo + enddo + + if(n_Iz == -1) then + return + endif + + ! --- + + rho = p * q * pq_inv_2 + dist = (P_center(1) - Q_center(1)) * (P_center(1) - Q_center(1)) & + + (P_center(2) - Q_center(2)) * (P_center(2) - Q_center(2)) & + + (P_center(3) - Q_center(3)) * (P_center(3) - Q_center(3)) + const = dist * rho + + n_pt_tmp = n_Ix + n_Iy + do i = 0, n_pt_tmp + d_poly(i) = (0.d0, 0.d0) + enddo + + !DIR$ FORCEINLINE + call multiply_cpoly(Ix_pol, n_Ix, Iy_pol, n_Iy, d_poly, n_pt_tmp) + if(n_pt_tmp == -1) then + return + endif + n_pt_out = n_pt_tmp + n_Iz + do i = 0, n_pt_out + d1(i) = (0.d0, 0.d0) + enddo + + !DIR$ FORCEINLINE + call multiply_cpoly(d_poly, n_pt_tmp, Iz_pol, n_Iz, d1, n_pt_out) + + accu = crint_sum(n_pt_out, const, d1) +! print *, n_pt_out, real(d1(0:n_pt_out)) +! print *, real(accu) + + general_primitive_integral_cosgtos = fact_p * fact_q * accu * pi_5_2 * p_inv * q_inv / sq_ppq + +end function general_primitive_integral_cosgtos + +! --- + +complex*16 function ERI_cosgtos(alpha, beta, delta, gama, a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z) + + BEGIN_DOC + ! ATOMIC PRIMTIVE two-electron integral between the 4 primitives :: + ! primitive_1 = x1**(a_x) y1**(a_y) z1**(a_z) exp(-alpha * r1**2) + ! primitive_2 = x1**(b_x) y1**(b_y) z1**(b_z) exp(- beta * r1**2) + ! primitive_3 = x2**(c_x) y2**(c_y) z2**(c_z) exp(-delta * r2**2) + ! primitive_4 = x2**(d_x) y2**(d_y) z2**(d_z) exp(- gama * r2**2) + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z + complex*16, intent(in) :: delta, gama, alpha, beta + + integer :: a_x_2, b_x_2, c_x_2, d_x_2, a_y_2, b_y_2, c_y_2, d_y_2, a_z_2, b_z_2, c_z_2, d_z_2 + integer :: i, j, k, l, n_pt + integer :: nx, ny, nz + double precision :: ppq_re, ppq_im, ppq_mod, sq_ppq_re, sq_ppq_im + complex*16 :: p, q, ppq, sq_ppq, coeff, I_f + + ERI_cosgtos = (0.d0, 0.d0) + + ASSERT (REAL(alpha) >= 0.d0) + ASSERT (REAL(beta ) >= 0.d0) + ASSERT (REAL(delta) >= 0.d0) + ASSERT (REAL(gama ) >= 0.d0) + + nx = a_x + b_x + c_x + d_x + if(iand(nx,1) == 1) then + ERI_cosgtos = (0.d0, 0.d0) + return + endif + + ny = a_y + b_y + c_y + d_y + if(iand(ny,1) == 1) then + ERI_cosgtos = (0.d0, 0.d0) + return + endif + + nz = a_z + b_z + c_z + d_z + if(iand(nz,1) == 1) then + ERI_cosgtos = (0.d0, 0.d0) + return + endif + + n_pt = shiftl(nx+ny+nz, 1) + + p = alpha + beta + q = delta + gama + + ! get \sqrt(p + q) + !ppq = p + q + !ppq_re = REAL (ppq) + !ppq_im = AIMAG(ppq) + !ppq_mod = dsqrt(ppq_re*ppq_re + ppq_im*ppq_im) + !sq_ppq_re = sq_op5 * dsqrt(ppq_re + ppq_mod) + !sq_ppq_im = 0.5d0 * ppq_im / sq_ppq_re + !sq_ppq = sq_ppq_re + (0.d0, 1.d0) * sq_ppq_im + sq_ppq = zsqrt(p + q) + + coeff = pi_5_2 / (p * q * sq_ppq) + if(n_pt == 0) then + ERI_cosgtos = coeff + return + endif + + call integrale_new_cosgtos(I_f, a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z, p, q, n_pt) + + ERI_cosgtos = I_f * coeff + +end function ERI_cosgtos + +! --- + +subroutine integrale_new_cosgtos(I_f, a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z, p, q, n_pt) + + BEGIN_DOC + ! Calculates the integral of the polynomial : + ! + ! $I_{x_1}(a_x+b_x, c_x+d_x, p, q) \, I_{x_1}(a_y+b_y, c_y+d_y, p, q) \, I_{x_1}(a_z+b_z, c_z+d_z, p, q)$ + ! in $( 0 ; 1)$ + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt + integer, intent(in) :: a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z + complex*16, intent(out) :: I_f + + integer :: i, j, ix, iy, iz, jx, jy, jz, sx, sy, sz + complex*16 :: p, q + complex*16 :: pq_inv, p10_1, p10_2, p01_1, p01_2, pq_inv_2 + complex*16 :: B00(n_pt_max_integrals), B10(n_pt_max_integrals), B01(n_pt_max_integrals) + complex*16 :: t1(n_pt_max_integrals), t2(n_pt_max_integrals) + + + ASSERT (n_pt > 1) + + j = shiftr(n_pt, 1) + + pq_inv = (0.5d0, 0.d0) / (p + q) + p10_1 = (0.5d0, 0.d0) / p + p01_1 = (0.5d0, 0.d0) / q + p10_2 = (0.5d0, 0.d0) * q /(p * q + p * p) + p01_2 = (0.5d0, 0.d0) * p /(q * q + q * p) + pq_inv_2 = pq_inv + pq_inv + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: t1, t2, B10, B01, B00 + ix = a_x + b_x + jx = c_x + d_x + iy = a_y + b_y + jy = c_y + d_y + iz = a_z + b_z + jz = c_z + d_z + sx = ix + jx + sy = iy + jy + sz = iz + jz + + do i = 1, n_pt + B10(i) = p10_1 - gauleg_t2(i, j) * p10_2 + B01(i) = p01_1 - gauleg_t2(i, j) * p01_2 + B00(i) = gauleg_t2(i, j) * pq_inv + enddo + + if(sx > 0) then + call I_x1_new_cosgtos(ix, jx, B10, B01, B00, t1, n_pt) + else + do i = 1, n_pt + t1(i) = (1.d0, 0.d0) + enddo + endif + + if(sy > 0) then + call I_x1_new_cosgtos(iy, jy, B10, B01, B00, t2, n_pt) + do i = 1, n_pt + t1(i) = t1(i) * t2(i) + enddo + endif + + if(sz > 0) then + call I_x1_new_cosgtos(iz, jz, B10, B01, B00, t2, n_pt) + do i = 1, n_pt + t1(i) = t1(i) * t2(i) + enddo + endif + + I_f = (0.d0, 0.d0) + do i = 1, n_pt + I_f += gauleg_w(i, j) * t1(i) + enddo + +end subroutine integrale_new_cosgtos + +! --- + +recursive subroutine I_x1_new_cosgtos(a, c, B_10, B_01, B_00, res, n_pt) + + BEGIN_DOC + ! recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: a, c, n_pt + complex*16, intent(in) :: B_10(n_pt_max_integrals), B_01(n_pt_max_integrals), B_00(n_pt_max_integrals) + complex*16, intent(out) :: res(n_pt_max_integrals) + + integer :: i + complex*16 :: res2(n_pt_max_integrals) + + if(c < 0) then + + do i = 1, n_pt + res(i) = (0.d0, 0.d0) + enddo + + else if (a == 0) then + + call I_x2_new_cosgtos(c, B_10, B_01, B_00, res, n_pt) + + else if (a == 1) then + + call I_x2_new_cosgtos(c-1, B_10, B_01, B_00, res, n_pt) + do i = 1, n_pt + res(i) = dble(c) * B_00(i) * res(i) + enddo + + else + + call I_x1_new_cosgtos(a-2, c , B_10, B_01, B_00, res , n_pt) + call I_x1_new_cosgtos(a-1, c-1, B_10, B_01, B_00, res2, n_pt) + do i = 1, n_pt + res(i) = dble(a-1) * B_10(i) * res(i) + dble(c) * B_00(i) * res2(i) + enddo + + endif + +end subroutine I_x1_new_cosgtos + +! --- + +recursive subroutine I_x2_new_cosgtos(c, B_10, B_01, B_00, res, n_pt) + + BEGIN_DOC + ! recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: c, n_pt + complex*16, intent(in) :: B_10(n_pt_max_integrals), B_01(n_pt_max_integrals), B_00(n_pt_max_integrals) + complex*16, intent(out) :: res(n_pt_max_integrals) + + integer :: i + + if(c == 1) then + + do i = 1, n_pt + res(i) = (0.d0, 0.d0) + enddo + + elseif(c == 0) then + + do i = 1, n_pt + res(i) = (1.d0, 0.d0) + enddo + + else + + call I_x1_new_cosgtos(0, c-2, B_10, B_01, B_00, res, n_pt) + do i = 1, n_pt + res(i) = dble(c-1) * B_01(i) * res(i) + enddo + + endif + +end subroutine I_x2_new_cosgtos + +! --- + +subroutine give_cpolynom_mult_center_x( P_center, Q_center, a_x, d_x, p, q, n_pt_in & + , pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, d, n_pt_out) + + BEGIN_DOC + ! subroutine that returns the explicit polynom in term of the "t" + ! variable of the following polynoms : + ! + ! $I_{x_1}(a_x,d_x,p,q) \, I_{x_1}(a_y,d_y,p,q) \ I_{x_1}(a_z,d_z,p,q)$ + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, a_x, d_x + complex*16, intent(in) :: P_center, Q_center, p, q, pq_inv, p10_1, p01_1, p10_2, p01_2, pq_inv_2 + integer, intent(out) :: n_pt_out + complex*16, intent(out) :: d(0:max_dim) + + integer :: n_pt1, i + complex*16 :: B10(0:2), B01(0:2), B00(0:2), C00(0:2), D00(0:2) + + ASSERT (n_pt_in >= 0) + + B10(0) = p10_1 + B10(1) = (0.d0, 0.d0) + B10(2) = -p10_2 + + B01(0) = p01_1 + B01(1) = (0.d0, 0.d0) + B01(2) = -p01_2 + + B00(0) = (0.d0, 0.d0) + B00(1) = (0.d0, 0.d0) + B00(2) = pq_inv + + C00(0) = (0.d0, 0.d0) + C00(1) = (0.d0, 0.d0) + C00(2) = -q * (P_center - Q_center) * pq_inv_2 + + D00(0) = (0.d0, 0.d0) + D00(1) = (0.d0, 0.d0) + D00(2) = -p * (Q_center - P_center) * pq_inv_2 + + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + + n_pt1 = n_pt_in + + !DIR$ FORCEINLINE + call I_x1_pol_mult_cosgtos(a_x, d_x, B10, B01, B00, C00, D00, d, n_pt1, n_pt_in) + n_pt_out = n_pt1 + +! print *, ' ' +! print *, a_x, d_x +! print *, real(B10), real(B01), real(B00), real(C00), real(D00) +! print *, n_pt1, real(d(0:n_pt1)) +! print *, ' ' + + if(n_pt1 < 0) then + n_pt_out = -1 + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + return + endif + +end subroutine give_cpolynom_mult_center_x + +! --- + +subroutine I_x1_pol_mult_cosgtos(a, c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + + BEGIN_DOC + ! Recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, a, c + complex*16, intent(in) :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max_dim) + + if( (c >= 0) .and. (nd >= 0) ) then + + if(a == 1) then + call I_x1_pol_mult_a1_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + else if(a == 2) then + call I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + else if(a > 2) then + call I_x1_pol_mult_recurs_cosgtos(a, c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + else ! a == 0 + + if(c == 0)then + nd = 0 + d(0) = (1.d0, 0.d0) + return + endif + + call I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + endif + + else + + nd = -1 + + endif + +end subroutine I_x1_pol_mult_cosgtos + +! --- + +recursive subroutine I_x1_pol_mult_recurs_cosgtos(a, c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + + BEGIN_DOC + ! Recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, a, c + complex*16, intent(in) :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max_dim) + + integer :: nx, ix, iy, ny + complex*16 :: X(0:max_dim) + complex*16 :: Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y + + ASSERT (a > 2) + + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + + nx = 0 + if(a == 3) then + call I_x1_pol_mult_a1_cosgtos(c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + elseif(a == 4) then + call I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + else + ASSERT (a >= 5) + call I_x1_pol_mult_recurs_cosgtos(a-2, c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + endif + + !DIR$ LOOP COUNT(8) + do ix = 0, nx + X(ix) *= dble(a-1) + enddo + + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_10, 2, d, nd) + nx = nd + + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + + if(c > 0) then + + if(a == 3) then + call I_x1_pol_mult_a2_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + else + ASSERT(a >= 4) + call I_x1_pol_mult_recurs_cosgtos(a-1, c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + endif + + if(c > 1) then + !DIR$ LOOP COUNT(8) + do ix = 0, nx + X(ix) *= dble(c) + enddo + endif + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_00, 2, d, nd) + + endif + + ny = 0 + + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + Y(ix) = (0.d0, 0.d0) + enddo + + ASSERT (a > 2) + + if(a == 3) then + call I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in) + else + ASSERT(a >= 4) + call I_x1_pol_mult_recurs_cosgtos(a-1, c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in) + endif + + !DIR$ FORCEINLINE + call multiply_cpoly(Y, ny, C_00, 2, d, nd) + +end subroutine I_x1_pol_mult_recurs_cosgtos + +! --- + +recursive subroutine I_x1_pol_mult_a1_cosgtos(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) + + BEGIN_DOC + ! Recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, c + complex*16, intent(in) :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max_dim) + + integer :: nx, ix, iy, ny + complex*16 :: X(0:max_dim) + complex*16 :: Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y + + if( (c < 0) .or. (nd < 0) ) then + nd = -1 + return + endif + + nx = nd + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + call I_x2_pol_mult_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + + if(c > 1) then + !DIR$ LOOP COUNT(8) + do ix = 0, nx + X(ix) *= dble(c) + enddo + endif + + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_00, 2, d, nd) + + ny = 0 + + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + Y(ix) = (0.d0, 0.d0) + enddo + call I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in) + + !DIR$ FORCEINLINE + call multiply_cpoly(Y, ny, C_00, 2, d, nd) + +end subroutine I_x1_pol_mult_a1_cosgtos + +! --- + +recursive subroutine I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + + BEGIN_DOC + ! Recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, c + complex*16, intent(in) :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max_dim) + + integer :: nx, ix, iy, ny + complex*16 :: X(0:max_dim) + complex*16 :: Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y + + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + + nx = 0 + call I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_10, 2, d, nd) + + nx = nd + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + + !DIR$ FORCEINLINE + call I_x1_pol_mult_a1_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + + if (c>1) then + !DIR$ LOOP COUNT(8) + do ix = 0, nx + X(ix) *= dble(c) + enddo + endif + + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_00, 2, d, nd) + + ny = 0 + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + Y(ix) = 0.d0 + enddo + !DIR$ FORCEINLINE + call I_x1_pol_mult_a1_cosgtos(c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in) + + !DIR$ FORCEINLINE + call multiply_cpoly(Y, ny, C_00, 2, d, nd) + +end subroutine I_x1_pol_mult_a2_cosgtos + +! --- + +recursive subroutine I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, dim) + + BEGIN_DOC + ! Recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: dim, c + complex*16, intent(in) :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max_dim) + + integer :: i + integer :: nx, ix, ny + complex*16 :: X(0:max_dim), Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y + + select case (c) + + case (0) + nd = 0 + d(0) = (1.d0, 0.d0) + return + + case (:-1) + nd = -1 + return + + case (1) + nd = 2 + d(0) = D_00(0) + d(1) = D_00(1) + d(2) = D_00(2) + return + + case (2) + nd = 2 + d(0) = B_01(0) + d(1) = B_01(1) + d(2) = B_01(2) + + ny = 2 + Y(0) = D_00(0) + Y(1) = D_00(1) + Y(2) = D_00(2) + + !DIR$ FORCEINLINE + call multiply_cpoly(Y, ny, D_00, 2, d, nd) + return + + case default + + !DIR$ LOOP COUNT(6) + do ix = 0, c+c + X(ix) = (0.d0, 0.d0) + enddo + nx = 0 + call I_x2_pol_mult_cosgtos(c-2, B_10, B_01, B_00, C_00, D_00, X, nx, dim) + + !DIR$ LOOP COUNT(6) + do ix = 0, nx + X(ix) *= dble(c-1) + enddo + + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_01, 2, d, nd) + + ny = 0 + !DIR$ LOOP COUNT(6) + do ix = 0, c+c + Y(ix) = 0.d0 + enddo + call I_x2_pol_mult_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, Y, ny, dim) + + !DIR$ FORCEINLINE + call multiply_cpoly(Y, ny, D_00, 2, d, nd) + + end select + +end subroutine I_x2_pol_mult_cosgtos + +! --- + + From 8a5026af683acef49678f7e7de23d112a8c42728 Mon Sep 17 00:00:00 2001 From: eginer Date: Sun, 23 Oct 2022 20:51:33 +0200 Subject: [PATCH 07/10] added davidson_keywords --- src/davidson_keywords/EZFIO.cfg | 54 +++++++++++++++++++++++++++++++ src/davidson_keywords/NEED | 1 + src/davidson_keywords/README.rst | 4 +++ src/davidson_keywords/input.irp.f | 43 ++++++++++++++++++++++++ src/davidson_keywords/usef.irp.f | 33 +++++++++++++++++++ 5 files changed, 135 insertions(+) create mode 100644 src/davidson_keywords/EZFIO.cfg create mode 100644 src/davidson_keywords/NEED create mode 100644 src/davidson_keywords/README.rst create mode 100644 src/davidson_keywords/input.irp.f create mode 100644 src/davidson_keywords/usef.irp.f diff --git a/src/davidson_keywords/EZFIO.cfg b/src/davidson_keywords/EZFIO.cfg new file mode 100644 index 00000000..5df761f3 --- /dev/null +++ b/src/davidson_keywords/EZFIO.cfg @@ -0,0 +1,54 @@ +[threshold_davidson] +type: Threshold +doc: Thresholds of Davidson's algorithm if threshold_davidson_from_pt2 is false. +interface: ezfio,provider,ocaml +default: 1.e-10 + +[threshold_nonsym_davidson] +type: Threshold +doc: Thresholds of non-symetric Davidson's algorithm +interface: ezfio,provider,ocaml +default: 1.e-12 + +[davidson_sze_max] +type: Strictly_positive_int +doc: Number of micro-iterations before re-contracting +default: 15 +interface: ezfio,provider,ocaml + +[state_following] +type: logical +doc: If |true|, the states are re-ordered to match the input states +default: False +interface: ezfio,provider,ocaml + +[disk_based_davidson] +type: logical +doc: If |true|, a memory-mapped file may be used to store the W and S2 vectors if not enough RAM is availabl +default: True +interface: ezfio,provider,ocaml + +[n_states_diag] +type: States_number +doc: Controls the number of states to consider during the Davdison diagonalization. The number of states is n_states * n_states_diag +default: 4 +interface: ezfio,ocaml + +[n_det_max_full] +type: Det_number_max +doc: Maximum number of determinants where |H| is fully diagonalized +interface: ezfio,provider,ocaml +default: 1000 + +[threshold_davidson_from_pt2] +type: logical +doc: Thresholds of Davidson's algorithm is set to E(rPT2)*threshold_davidson_from_pt2 +interface: ezfio,provider,ocaml +default: false + +[distributed_davidson] +type: logical +doc: If |true|, use the distributed algorithm +default: True +interface: ezfio,provider,ocaml + diff --git a/src/davidson_keywords/NEED b/src/davidson_keywords/NEED new file mode 100644 index 00000000..5a3182ed --- /dev/null +++ b/src/davidson_keywords/NEED @@ -0,0 +1 @@ +ezfio_files diff --git a/src/davidson_keywords/README.rst b/src/davidson_keywords/README.rst new file mode 100644 index 00000000..cdc7cc1f --- /dev/null +++ b/src/davidson_keywords/README.rst @@ -0,0 +1,4 @@ +================= +davidson_keywords +================= + diff --git a/src/davidson_keywords/input.irp.f b/src/davidson_keywords/input.irp.f new file mode 100644 index 00000000..4bd79036 --- /dev/null +++ b/src/davidson_keywords/input.irp.f @@ -0,0 +1,43 @@ + +! --- + +BEGIN_PROVIDER [ integer, n_states_diag ] + implicit none + BEGIN_DOC +! Number of states to consider during the Davdison diagonalization + END_DOC + + logical :: has + PROVIDE ezfio_filename + if (mpi_master) then + + call ezfio_has_davidson_keywords_n_states_diag(has) + if (has) then + call ezfio_get_davidson_keywords_n_states_diag(n_states_diag) + else + print *, 'davidson_keywords/n_states_diag not found in EZFIO file' + stop 1 + endif + n_states_diag = max(2,N_states * N_states_diag) + endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + IRP_IF MPI + include 'mpif.h' + integer :: ierr + call MPI_BCAST( n_states_diag, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read n_states_diag with MPI' + endif + IRP_ENDIF + + call write_time(6) + if (mpi_master) then + write(6, *) 'Read n_states_diag' + endif + +END_PROVIDER + +! --- diff --git a/src/davidson_keywords/usef.irp.f b/src/davidson_keywords/usef.irp.f new file mode 100644 index 00000000..fed2ba9b --- /dev/null +++ b/src/davidson_keywords/usef.irp.f @@ -0,0 +1,33 @@ +use bitmasks +use f77_zmq + + +! --- + +BEGIN_PROVIDER [ integer, nthreads_davidson ] + implicit none + BEGIN_DOC + ! Number of threads for Davidson + END_DOC + nthreads_davidson = nproc + character*(32) :: env + call getenv('QP_NTHREADS_DAVIDSON',env) + if (trim(env) /= '') then + read(env,*) nthreads_davidson + call write_int(6,nthreads_davidson,'Target number of threads for ') + endif +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, threshold_davidson_pt2 ] + implicit none + BEGIN_DOC + ! Threshold of Davidson's algorithm, using PT2 as a guide + END_DOC + threshold_davidson_pt2 = threshold_davidson + +END_PROVIDER + +! --- + From af2ba9fa38c7e2892c47470ffe8563983a4ac07e Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 24 Oct 2022 11:14:19 +0200 Subject: [PATCH 08/10] added cipsi_tc_bi_ortho --- src/cipsi_tc_bi_ortho/EZFIO.cfg | 36 + src/cipsi_tc_bi_ortho/NEED | 6 + src/cipsi_tc_bi_ortho/cipsi.irp.f | 136 ++ src/cipsi_tc_bi_ortho/energy.irp.f | 51 + src/cipsi_tc_bi_ortho/environment.irp.f | 14 + src/cipsi_tc_bi_ortho/get_d.irp.f | 1735 +++++++++++++++++ src/cipsi_tc_bi_ortho/lock_2rdm.irp.f | 0 src/cipsi_tc_bi_ortho/pt2.irp.f | 89 + .../pt2_stoch_routines.irp.f | 869 +++++++++ src/cipsi_tc_bi_ortho/pt2_type.irp.f | 128 ++ src/cipsi_tc_bi_ortho/run_pt2_slave.irp.f | 549 ++++++ .../run_selection_slave.irp.f | 255 +++ src/cipsi_tc_bi_ortho/selection.irp.f | 1029 ++++++++++ src/cipsi_tc_bi_ortho/selection_buffer.irp.f | 416 ++++ src/cipsi_tc_bi_ortho/selection_weight.irp.f | 134 ++ src/cipsi_tc_bi_ortho/slave_cipsi.irp.f | 350 ++++ src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f | 147 ++ src/cipsi_tc_bi_ortho/zmq_selection.irp.f | 235 +++ 18 files changed, 6179 insertions(+) create mode 100644 src/cipsi_tc_bi_ortho/EZFIO.cfg create mode 100644 src/cipsi_tc_bi_ortho/NEED create mode 100644 src/cipsi_tc_bi_ortho/cipsi.irp.f create mode 100644 src/cipsi_tc_bi_ortho/energy.irp.f create mode 100644 src/cipsi_tc_bi_ortho/environment.irp.f create mode 100644 src/cipsi_tc_bi_ortho/get_d.irp.f create mode 100644 src/cipsi_tc_bi_ortho/lock_2rdm.irp.f create mode 100644 src/cipsi_tc_bi_ortho/pt2.irp.f create mode 100644 src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f create mode 100644 src/cipsi_tc_bi_ortho/pt2_type.irp.f create mode 100644 src/cipsi_tc_bi_ortho/run_pt2_slave.irp.f create mode 100644 src/cipsi_tc_bi_ortho/run_selection_slave.irp.f create mode 100644 src/cipsi_tc_bi_ortho/selection.irp.f create mode 100644 src/cipsi_tc_bi_ortho/selection_buffer.irp.f create mode 100644 src/cipsi_tc_bi_ortho/selection_weight.irp.f create mode 100644 src/cipsi_tc_bi_ortho/slave_cipsi.irp.f create mode 100644 src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f create mode 100644 src/cipsi_tc_bi_ortho/zmq_selection.irp.f diff --git a/src/cipsi_tc_bi_ortho/EZFIO.cfg b/src/cipsi_tc_bi_ortho/EZFIO.cfg new file mode 100644 index 00000000..7fcf19eb --- /dev/null +++ b/src/cipsi_tc_bi_ortho/EZFIO.cfg @@ -0,0 +1,36 @@ +[save_wf_after_selection] +type: logical +doc: If true, saves the wave function after the selection, before the diagonalization +interface: ezfio,provider,ocaml +default: False + +[seniority_max] +type: integer +doc: Maximum number of allowed open shells. Using -1 selects all determinants +interface: ezfio,ocaml,provider +default: -1 + +[excitation_ref] +type: integer +doc: 1: Hartree-Fock determinant, 2:All determinants of the dominant configuration +interface: ezfio,ocaml,provider +default: 1 + +[excitation_max] +type: integer +doc: Maximum number of excitation with respect to the Hartree-Fock determinant. Using -1 selects all determinants +interface: ezfio,ocaml,provider +default: -1 + +[excitation_alpha_max] +type: integer +doc: Maximum number of excitation for alpha determinants with respect to the Hartree-Fock determinant. Using -1 selects all determinants +interface: ezfio,ocaml,provider +default: -1 + +[excitation_beta_max] +type: integer +doc: Maximum number of excitation for beta determinants with respect to the Hartree-Fock determinant. Using -1 selects all determinants +interface: ezfio,ocaml,provider +default: -1 + diff --git a/src/cipsi_tc_bi_ortho/NEED b/src/cipsi_tc_bi_ortho/NEED new file mode 100644 index 00000000..4dd1af36 --- /dev/null +++ b/src/cipsi_tc_bi_ortho/NEED @@ -0,0 +1,6 @@ +mpi +perturbation +zmq +iterations_tc +csf +tc_bi_ortho diff --git a/src/cipsi_tc_bi_ortho/cipsi.irp.f b/src/cipsi_tc_bi_ortho/cipsi.irp.f new file mode 100644 index 00000000..b1941068 --- /dev/null +++ b/src/cipsi_tc_bi_ortho/cipsi.irp.f @@ -0,0 +1,136 @@ +subroutine run_cipsi + + BEGIN_DOC + ! Selected Full Configuration Interaction with deterministic selection and + ! stochastic PT2. + END_DOC + + use selection_types + + implicit none + + integer :: i,j,k,ndet + type(pt2_type) :: pt2_data, pt2_data_err + double precision, allocatable :: zeros(:) + integer :: to_select + logical, external :: qp_stop + + double precision :: threshold_generators_save + double precision :: rss + double precision, external :: memory_of_double + double precision :: correlation_energy_ratio,E_denom,E_tc,norm + + PROVIDE H_apply_buffer_allocated distributed_davidson + + print*,'Diagonal elements of the Fock matrix ' + do i = 1, mo_num + write(*,*)i,Fock_matrix_tc_mo_tot(i,i) + enddo + + N_iter = 1 + threshold_generators = 1.d0 + SOFT_TOUCH threshold_generators + + rss = memory_of_double(N_states)*4.d0 + call check_mem(rss,irp_here) + + allocate (zeros(N_states)) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) + + double precision :: hf_energy_ref + logical :: has, print_pt2 + double precision :: relative_error + + relative_error=PT2_relative_error + + zeros = 0.d0 + pt2_data % pt2 = -huge(1.e0) + pt2_data % rpt2 = -huge(1.e0) + pt2_data % overlap(:,:) = 0.d0 + pt2_data % variance = huge(1.e0) + + if (s2_eig) then + call make_s2_eigenfunction + endif + print_pt2 = .False. + call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) + + call ezfio_has_hartree_fock_energy(has) + if (has) then + call ezfio_get_hartree_fock_energy(hf_energy_ref) + else + hf_energy_ref = ref_bitmask_energy + endif + + if (N_det > N_det_max) then + psi_det(1:N_int,1:2,1:N_det) = psi_det_sorted_tc_gen(1:N_int,1:2,1:N_det) + psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states) + N_det = N_det_max + soft_touch N_det psi_det psi_coef + if (s2_eig) then + call make_s2_eigenfunction + endif + print_pt2 = .False. + call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) +! call routine_save_right + endif + + correlation_energy_ratio = 0.d0 + + print_pt2 = .True. + do while ( & + (N_det < N_det_max) .and. & + (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) & + ) + write(*,'(A)') '--------------------------------------------------------------------------------' + + + to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor) + to_select = max(N_states_diag, to_select) + + E_denom = E_tc ! TC Energy of the current wave function + if (do_pt2) then + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) + threshold_generators_save = threshold_generators + threshold_generators = 1.d0 + SOFT_TOUCH threshold_generators + call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection + threshold_generators = threshold_generators_save + SOFT_TOUCH threshold_generators + else + call pt2_dealloc(pt2_data) + call pt2_alloc(pt2_data, N_states) + call ZMQ_selection(to_select, pt2_data) + endif + + N_iter += 1 + + if (qp_stop()) exit + + ! Add selected determinants + call copy_H_apply_buffer_to_wf() + + if (save_wf_after_selection) then + call save_wavefunction + endif + + PROVIDE psi_coef + PROVIDE psi_det + PROVIDE psi_det_sorted_tc + + call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) + if (qp_stop()) exit + enddo + + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) + call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection + call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) + +end diff --git a/src/cipsi_tc_bi_ortho/energy.irp.f b/src/cipsi_tc_bi_ortho/energy.irp.f new file mode 100644 index 00000000..16f4528e --- /dev/null +++ b/src/cipsi_tc_bi_ortho/energy.irp.f @@ -0,0 +1,51 @@ +BEGIN_PROVIDER [ logical, initialize_pt2_E0_denominator ] + implicit none + BEGIN_DOC + ! If true, initialize pt2_E0_denominator + END_DOC + initialize_pt2_E0_denominator = .True. +END_PROVIDER + +BEGIN_PROVIDER [ double precision, pt2_E0_denominator, (N_states) ] + implicit none + BEGIN_DOC + ! E0 in the denominator of the PT2 + END_DOC + integer :: i,j + + pt2_E0_denominator = eigval_right_tc_bi_orth + +! if (initialize_pt2_E0_denominator) then +! if (h0_type == "EN") then +! pt2_E0_denominator(1:N_states) = psi_energy(1:N_states) +! else if (h0_type == "HF") then +! do i=1,N_states +! j = maxloc(abs(psi_coef(:,i)),1) +! pt2_E0_denominator(i) = psi_det_hii(j) +! enddo +! else if (h0_type == "Barycentric") then +! pt2_E0_denominator(1:N_states) = barycentric_electronic_energy(1:N_states) +! else if (h0_type == "CFG") then +! pt2_E0_denominator(1:N_states) = psi_energy(1:N_states) +! else +! print *, h0_type, ' not implemented' +! stop +! endif +! do i=1,N_states +! call write_double(6,pt2_E0_denominator(i)+nuclear_repulsion, 'PT2 Energy denominator') +! enddo +! else +! pt2_E0_denominator = -huge(1.d0) +! endif + +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, pt2_overlap, (N_states, N_states) ] + implicit none + BEGIN_DOC + ! Overlap between the perturbed wave functions + END_DOC + pt2_overlap(1:N_states,1:N_states) = 0.d0 +END_PROVIDER + diff --git a/src/cipsi_tc_bi_ortho/environment.irp.f b/src/cipsi_tc_bi_ortho/environment.irp.f new file mode 100644 index 00000000..5c0e0820 --- /dev/null +++ b/src/cipsi_tc_bi_ortho/environment.irp.f @@ -0,0 +1,14 @@ +BEGIN_PROVIDER [ integer, nthreads_pt2 ] + implicit none + BEGIN_DOC + ! Number of threads for Davidson + END_DOC + nthreads_pt2 = nproc + character*(32) :: env + call getenv('QP_NTHREADS_PT2',env) + if (trim(env) /= '') then + read(env,*) nthreads_pt2 + call write_int(6,nthreads_pt2,'Target number of threads for PT2') + endif +END_PROVIDER + diff --git a/src/cipsi_tc_bi_ortho/get_d.irp.f b/src/cipsi_tc_bi_ortho/get_d.irp.f new file mode 100644 index 00000000..c642f420 --- /dev/null +++ b/src/cipsi_tc_bi_ortho/get_d.irp.f @@ -0,0 +1,1735 @@ + +! --- + +double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2, Nint) + + use bitmasks + implicit none + + integer, intent(in) :: Nint + integer, intent(in) :: s1, s2, h1, h2, p1, p2 + integer(bit_kind), intent(in) :: phasemask(Nint,2) + + double precision, save :: res(0:1) = (/1d0, -1d0/) + + integer :: np + integer :: h1_int, h2_int + integer :: p1_int, p2_int + integer :: h1_bit, h2_bit + integer :: p1_bit, p2_bit + logical :: change + + h1_int = shiftr(h1-1,bit_kind_shift)+1 + h1_bit = h1 - shiftl(h1_int-1,bit_kind_shift)-1 + + h2_int = shiftr(h2-1,bit_kind_shift)+1 + h2_bit = h2 - shiftl(h2_int-1,bit_kind_shift)-1 + + p1_int = shiftr(p1-1,bit_kind_shift)+1 + p1_bit = p1 - shiftl(p1_int-1,bit_kind_shift)-1 + + p2_int = shiftr(p2-1,bit_kind_shift)+1 + p2_bit = p2 - shiftl(p2_int-1,bit_kind_shift)-1 + + ! Put the phasemask bits at position 0, and add them all + h1_bit = int( shiftr( phasemask(h1_int,s1), h1_bit ) ) + p1_bit = int( shiftr( phasemask(p1_int,s1), p1_bit ) ) + h2_bit = int( shiftr( phasemask(h2_int,s2), h2_bit ) ) + p2_bit = int( shiftr( phasemask(p2_int,s2), p2_bit ) ) + + np = h1_bit + p1_bit + h2_bit + p2_bit + + if(p1 < h1) np = np + 1 + if(p2 < h2) np = np + 1 + + if(s1 == s2 .and. max(h1, p1) > min(h2, p2)) np = np + 1 + get_phase_bi = res(iand(np,1)) + +end function get_phase_bi + +! --- + +subroutine get_d3_htc(gen, bannedOrb, banned, mat_m, mat_p, mask, p, sp, rcoefs, lcoefs) + + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer, intent(in) :: p(0:4,2), sp + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + double precision, intent(in) :: rcoefs(N_states), lcoefs(N_states) + double precision, intent(inout) :: mat_m(N_states, mo_num, mo_num), mat_p(N_states, mo_num, mo_num) + + integer(bit_kind) :: det(N_int, 2) + integer :: k, h1, h2, p1, p2, puti, putj + double precision :: i_h_alpha, alpha_h_i + logical :: ok + + if(sp == 3) then ! AB + + h1 = p(1,1) + h2 = p(1,2) + do p1 = 1, mo_num + if(bannedOrb(p1, 1)) cycle + do p2 = 1, mo_num + if(bannedOrb(p2,2)) cycle + if(banned(p1, p2, 1)) cycle ! rentable? + + call apply_particles(mask, 1, p1, 2, p2, det, ok, N_int) + call htilde_mu_mat_bi_ortho_tot(gen, det, N_int, i_h_alpha) + call htilde_mu_mat_bi_ortho_tot(det,gen, N_int, alpha_h_i) +! call hji_hij_mu_mat_tot(gen, det, N_int,i_h_alpha , alpha_h_i) + if( dabs(alpha_h_i) .gt. 0.d0) then + !DIR$ LOOP COUNT AVG(4) + do k = 1, N_states + mat_p(k, p1, p2) = mat_p(k, p1, p2) + rcoefs(k) * alpha_h_i + enddo + endif + if( dabs(i_h_alpha) .gt. 0.d0) then + !DIR$ LOOP COUNT AVG(4) + do k = 1, N_states + mat_m(k, p1, p2) = mat_m(k, p1, p2) + lcoefs(k) * i_h_alpha + enddo + endif + + enddo + enddo + + else ! AA BB + + p1 = p(1,sp) + p2 = p(2,sp) + do puti = 1, mo_num + if(bannedOrb(puti, sp)) cycle + do putj = puti+1, mo_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, 1)) cycle ! rentable? + + call apply_particles(mask, sp, puti, sp, putj, det, ok, N_int) +! call hji_hij_mu_mat_tot(gen, det, N_int, i_h_alpha, alpha_h_i) + call htilde_mu_mat_bi_ortho_tot(gen, det, N_int, i_h_alpha) + call htilde_mu_mat_bi_ortho_tot( det,gen, N_int, alpha_h_i) + if( dabs(alpha_h_i) .gt. 0.d0) then + !DIR$ LOOP COUNT AVG(4) + do k = 1, N_states + mat_p(k, puti, putj) = mat_p(k, puti, putj) + rcoefs(k) * alpha_h_i + enddo + endif + if( dabs(i_h_alpha) .gt. 0.d0) then + !DIR$ LOOP COUNT AVG(4) + do k = 1, N_states + mat_m(k, puti, putj) = mat_m(k, puti, putj) + lcoefs(k) * i_h_alpha + enddo + endif + + enddo + enddo + + endif + +end subroutine get_d3_htc + +! --- + +subroutine get_d3_h(gen, bannedOrb, banned, mat, mask, p, sp, coefs) + + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer, intent(in) :: p(0:4,2), sp + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + + integer(bit_kind) :: det(N_int, 2) + integer :: k, h1, h2, p1, p2, puti, putj + double precision :: hij + logical :: ok + + if(sp == 3) then ! AB + + h1 = p(1,1) + h2 = p(1,2) + do p1 = 1, mo_num + if(bannedOrb(p1, 1)) cycle + do p2 = 1, mo_num + if(bannedOrb(p2,2)) cycle + if(banned(p1, p2, 1)) cycle ! rentable? + + call apply_particles(mask, 1, p1, 2, p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + if (hij == 0.d0) cycle + !DIR$ LOOP COUNT AVG(4) + do k = 1, N_states + mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij + enddo + + enddo + enddo + + else ! AA BB + + p1 = p(1,sp) + p2 = p(2,sp) + do puti = 1, mo_num + if(bannedOrb(puti, sp)) cycle + do putj = puti+1, mo_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, 1)) cycle ! rentable? + + call apply_particles(mask, sp, puti, sp, putj, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + !DIR$ LOOP COUNT AVG(4) + do k = 1, N_states + mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij + enddo + + enddo + enddo + + endif + +end subroutine get_d3_h + +! --- + +subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + double precision, external :: get_phase_bi, mo_two_e_integral + + integer :: i, j, k, tip, ma, mi, puti, putj + integer :: h1, h2, p1, p2, i1, i2 + double precision :: hij, phase + + integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) + integer, parameter :: turn2(2) = (/2, 1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + bant = 1 + + tip = p(0,1) * p(0,2) + + ma = sp + if(p(0,1) > p(0,2)) ma = 1 + if(p(0,1) < p(0,2)) ma = 2 + mi = mod(ma, 2) + 1 + + if(sp == 3) then + if(ma == 2) bant = 2 + + if(tip == 3) then + puti = p(1, mi) + if(bannedOrb(puti, mi)) return + h1 = h(1, ma) + h2 = h(2, ma) + + do i = 1, 3 + putj = p(i, ma) + if(banned(putj,puti,bant)) cycle + i1 = turn3(1,i) + i2 = turn3(2,i) + p1 = p(i1, ma) + p2 = p(i2, ma) + + ! --> --> < p2 p1 | H^tilde| h1 h2 > + ! + ! - + ! < p2 p1 | H^tilde^dag| h1 h2 > = < h1 h2 | w_ee^h + t^nh | p1 p2 > + hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2, p1, h1, h2) + if (hij == 0.d0) cycle + + hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + + if(ma == 1) then + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat(k, putj, puti) = mat(k, putj, puti) + coefs(k) * hij + enddo + else + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij + enddo + end if + end do + else + h1 = h(1,1) + h2 = h(1,2) + do j = 1,2 + putj = p(j, 2) + if(bannedOrb(putj, 2)) cycle + p2 = p(turn2(j), 2) + do i = 1,2 + puti = p(i, 1) + + if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle + p1 = p(turn2(i), 1) + + hij = mo_two_e_integral(p1, p2, h1, h2) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij + enddo + endif + end do + end do + end if + + else + if(tip == 0) then + h1 = h(1, ma) + h2 = h(2, ma) + do i=1,3 + puti = p(i, ma) + if(bannedOrb(puti,ma)) cycle + do j=i+1,4 + putj = p(j, ma) + if(bannedOrb(putj,ma)) cycle + if(banned(puti,putj,1)) cycle + + i1 = turn2d(1, i, j) + i2 = turn2d(2, i, j) + p1 = p(i1, ma) + p2 = p(i2, ma) + hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2) + if (hij == 0.d0) cycle + + hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij + enddo + end do + end do + else if(tip == 3) then + h1 = h(1, mi) + h2 = h(1, ma) + p1 = p(1, mi) + do i=1,3 + puti = p(turn3(1,i), ma) + if(bannedOrb(puti,ma)) cycle + putj = p(turn3(2,i), ma) + if(bannedOrb(putj,ma)) cycle + if(banned(puti,putj,1)) cycle + p2 = p(i, ma) + + hij = mo_two_e_integral(p1, p2, h1, h2) + if (hij == 0.d0) cycle + + hij = hij * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) + if (puti < putj) then + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij + enddo + else + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat(k, putj, puti) = mat(k, putj, puti) + coefs(k) * hij + enddo + endif + end do + else ! tip == 4 + puti = p(1, sp) + putj = p(2, sp) + if(.not. banned(puti,putj,1)) then + p1 = p(1, mi) + p2 = p(2, mi) + h1 = h(1, mi) + h2 = h(2, mi) + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij + enddo + end if + end if + end if + end if + +end subroutine get_d2 + +! --- + +subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision, external :: get_phase_bi, mo_two_e_integral + logical :: ok + + logical, allocatable :: lbanned(:,:) + integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j + integer :: hfix, pfix, h1, h2, p1, p2, ib, k, l + + integer, parameter :: turn2(2) = (/2,1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + double precision, allocatable :: hij_cache(:,:) + double precision :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num) + + PROVIDE mo_integrals_map N_int + + allocate (lbanned(mo_num, 2)) + allocate (hij_cache(mo_num,2)) + lbanned = bannedOrb + + do i=1, p(0,1) + lbanned(p(i,1), 1) = .true. + end do + do i=1, p(0,2) + lbanned(p(i,2), 2) = .true. + end do + + ma = 1 + if(p(0,2) >= 2) ma = 2 + mi = turn2(ma) + + bant = 1 + + if(sp == 3) then + !move MA + if(ma == 2) bant = 2 + puti = p(1,mi) + hfix = h(1,ma) + p1 = p(1,ma) + p2 = p(2,ma) + if(.not. bannedOrb(puti, mi)) then + call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map) + tmp_row = 0d0 + do putj=1, hfix-1 + if(lbanned(putj, ma)) cycle + if(banned(putj, puti,bant)) cycle + hij = hij_cache(putj,1) - hij_cache(putj,2) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k) + enddo + endif + end do + do putj=hfix+1, mo_num + if(lbanned(putj, ma)) cycle + if(banned(putj, puti,bant)) cycle + hij = hij_cache(putj,2) - hij_cache(putj,1) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k) + enddo + endif + end do + + if(ma == 1) then + mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num) + else + do l=1,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat(k,puti,l) = mat(k,puti,l) + tmp_row(k,l) + enddo + enddo + end if + end if + + !MOVE MI + pfix = p(1,mi) + tmp_row = 0d0 + tmp_row2 = 0d0 + call get_mo_two_e_integrals(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map) + putj = p1 + do puti = 1, mo_num !HOT + + if(lbanned(puti,mi)) cycle + !p1 fixed + putj = p1 + if(.not. banned(putj,puti,bant)) then + hij = hij_cache(puti,2) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k) + enddo + endif + endif + + putj = p2 + if(.not. banned(putj,puti,bant)) then + hij = hij_cache(puti,1) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) + do k=1,N_states + tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k) + enddo + endif + endif + + enddo + + if(mi == 1) then + mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:) + mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:) + else + do l=1,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat(k,p1,l) = mat(k,p1,l) + tmp_row(k,l) + mat(k,p2,l) = mat(k,p2,l) + tmp_row2(k,l) + enddo + enddo + end if + + else ! sp /= 3 + + if(p(0,ma) == 3) then + do i=1,3 + hfix = h(1,ma) + puti = p(i, ma) + p1 = p(turn3(1,i), ma) + p2 = p(turn3(2,i), ma) + call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map) + tmp_row = 0d0 + do putj=1,hfix-1 + if(banned(putj,puti,1)) cycle + if(lbanned(putj,ma)) cycle + hij = hij_cache(putj,1) - hij_cache(putj,2) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) + endif + end do + do putj=hfix+1,mo_num + if(banned(putj,puti,1)) cycle + if(lbanned(putj,ma)) cycle + hij = hij_cache(putj,2) - hij_cache(putj,1) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) + endif + end do + + mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1) + do l=puti,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat(k, puti, l) = mat(k, puti,l) + tmp_row(k,l) + enddo + enddo + end do + else + hfix = h(1,mi) + pfix = p(1,mi) + p1 = p(1,ma) + p2 = p(2,ma) + tmp_row = 0d0 + tmp_row2 = 0d0 + call get_mo_two_e_integrals(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map) + putj = p2 + do puti=1,mo_num + if(lbanned(puti,ma)) cycle + putj = p2 + if(.not. banned(puti,putj,1)) then + hij = hij_cache(puti,1) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k) + enddo + endif + end if + + putj = p1 + if(.not. banned(puti,putj,1)) then + hij = hij_cache(puti,2) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) + do k=1,N_states + tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k) + enddo + endif + end if + end do + mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1) + do l=p2,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat(k,p2,l) = mat(k,p2,l) + tmp_row(k,l) + enddo + enddo + mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1) + do l=p1,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat(k,p1,l) = mat(k,p1,l) + tmp_row2(k,l) + enddo + enddo + end if + end if + deallocate(lbanned,hij_cache) + + !! MONO + if(sp == 3) then + s1 = 1 + s2 = 2 + else + s1 = sp + s2 = sp + end if + + do i1 = 1, p(0,s1) + ib = 1 + if(s1 == s2) ib = i1+1 + do i2 = ib, p(0,s2) + p1 = p(i1,s1) + p2 = p(i2,s2) + if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + !DIR$ LOOP COUNT AVG(4) + do k = 1, N_states + mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij + enddo + enddo + enddo + +end subroutine get_d1 + +! --- + +subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + integer :: i, j, k, s, h1, h2, p1, p2, puti, putj + double precision :: hij, phase + double precision, external :: get_phase_bi, mo_two_e_integral + logical :: ok + + integer, parameter :: bant=1 + double precision, allocatable :: hij_cache1(:), hij_cache2(:) + allocate (hij_cache1(mo_num),hij_cache2(mo_num)) + + + if(sp == 3) then ! AB + h1 = p(1,1) + h2 = p(1,2) + do p1=1, mo_num + if(bannedOrb(p1, 1)) cycle + call get_mo_two_e_integrals(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map) + do p2=1, mo_num + if(bannedOrb(p2,2)) cycle + if(banned(p1, p2, bant)) cycle ! rentable? + if(p1 == h1 .or. p2 == h2) then + call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + else + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hij = hij_cache1(p2) * phase + end if + if (hij == 0.d0) cycle + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij ! HOTSPOT + enddo + end do + end do + + else ! AA BB + p1 = p(1,sp) + p2 = p(2,sp) + do puti=1, mo_num + if(bannedOrb(puti, sp)) cycle + call get_mo_two_e_integrals(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map) + call get_mo_two_e_integrals(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map) + do putj=puti+1, mo_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, bant)) cycle ! rentable? + if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then + call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + if (hij == 0.d0) cycle + else + hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj)) + if (hij == 0.d0) cycle + hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + end if + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij + enddo + end do + end do + end if + + deallocate(hij_cache1,hij_cache2) + +end subroutine get_d0 + +! --- + +! ___________________________________________________________________________________________________________________________________________________ +! ___________________________________________________________________________________________________________________________________________________ + +!subroutine get_pm2(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, coefs) +! +! use bitmasks +! +! implicit none +! +! integer, intent(in) :: h(0:2,2), p(0:4,2), sp +! integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2), phasemask(N_int,2) +! logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) +! double precision, intent(in) :: coefs(N_states) +! double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num) +! +! integer, parameter :: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) +! integer, parameter :: turn2(2) = (/2, 1/) +! integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) +! +! integer :: i, j, k, tip, ma, mi, puti, putj +! integer :: h1, h2, p1, p2, i1, i2 +! integer :: bant +! double precision :: hij_p, hij_m, phase +! +! double precision, external :: get_phase_bi +! double precision, external :: get_mo_two_e_integral_tc_int, get_mo_two_e_integral_tcdag_int +! +! PROVIDE mo_integrals_tc_int_map mo_integrals_tcdag_int_map +! +! bant = 1 +! +! tip = p(0,1) * p(0,2) +! +! ma = sp +! if(p(0,1) > p(0,2)) ma = 1 +! if(p(0,1) < p(0,2)) ma = 2 +! mi = mod(ma, 2) + 1 +! +! if(sp == 3) then +! if(ma == 2) bant = 2 +! if(tip == 3) then +! puti = p(1, mi) +! if(bannedOrb(puti, mi)) return +! h1 = h(1, ma) +! h2 = h(2, ma) +! +! do i = 1, 3 +! putj = p(i, ma) +! if(banned(putj,puti,bant)) cycle +! i1 = turn3(1,i) +! i2 = turn3(2,i) +! p1 = p(i1, ma) +! p2 = p(i2, ma) +! +! hij_p = get_mo_two_e_integral_tc_int (p1, p2, h1, h2, mo_integrals_tc_int_map ) & +! - get_mo_two_e_integral_tc_int (p2, p1, h1, h2, mo_integrals_tc_int_map ) +! hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) & +! - get_mo_two_e_integral_tcdag_int(p2, p1, h1, h2, mo_integrals_tcdag_int_map) +! +! if( (hij_p.eq.0.d0) .and. (hij_m.eq.0.d0) ) cycle +! +! hij_p = hij_p * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) +! hij_m = hij_m * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) +! +! if(ma == 1) then +! !DIR$ LOOP COUNT AVG(4) +! do k = 1, N_states +! mat_p(k, putj, puti) = mat_p(k, putj, puti) + coefs(k) * hij_p +! mat_m(k, putj, puti) = mat_m(k, putj, puti) + coefs(k) * hij_m +! enddo +! else +! !DIR$ LOOP COUNT AVG(4) +! do k = 1, N_states +! mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p +! mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m +! enddo +! end if +! end do +! +! else +! +! h1 = h(1,1) +! h2 = h(1,2) +! do j = 1,2 +! putj = p(j, 2) +! if(bannedOrb(putj, 2)) cycle +! p2 = p(turn2(j), 2) +! do i = 1,2 +! puti = p(i, 1) +! +! if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle +! p1 = p(turn2(i), 1) +! +! hij_p = get_mo_two_e_integral_tc_int (p1, p2, h1, h2, mo_integrals_tc_int_map ) +! hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) +! +! if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then +! hij_p = hij_p * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) +! hij_m = hij_m * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) +! !DIR$ LOOP COUNT AVG(4) +! do k = 1, N_states +! mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p +! mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m +! enddo +! endif +! end do +! end do +! end if +! +! else +! if(tip == 0) then +! h1 = h(1, ma) +! h2 = h(2, ma) +! do i=1,3 +! puti = p(i, ma) +! if(bannedOrb(puti,ma)) cycle +! do j=i+1,4 +! putj = p(j, ma) +! if(bannedOrb(putj,ma)) cycle +! if(banned(puti,putj,1)) cycle +! +! i1 = turn2d(1, i, j) +! i2 = turn2d(2, i, j) +! p1 = p(i1, ma) +! p2 = p(i2, ma) +! +! hij_p = get_mo_two_e_integral_tc_int (p1, p2, h1, h2, mo_integrals_tc_int_map ) & +! - get_mo_two_e_integral_tc_int (p2, p1, h1, h2, mo_integrals_tc_int_map ) +! hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) & +! - get_mo_two_e_integral_tcdag_int(p2, p1, h1, h2, mo_integrals_tcdag_int_map) +! +! if( (hij_p.eq.0.d0) .and. (hij_m.eq.0.d0) ) cycle +! +! hij_p = hij_p * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) +! hij_m = hij_m * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) +! +! !DIR$ LOOP COUNT AVG(4) +! do k=1,N_states +! mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p +! mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m +! enddo +! end do +! end do +! +! else if(tip == 3) then +! h1 = h(1, mi) +! h2 = h(1, ma) +! p1 = p(1, mi) +! do i=1,3 +! puti = p(turn3(1,i), ma) +! if(bannedOrb(puti,ma)) cycle +! putj = p(turn3(2,i), ma) +! if(bannedOrb(putj,ma)) cycle +! if(banned(puti,putj,1)) cycle +! p2 = p(i, ma) +! +! hij_p = get_mo_two_e_integral_tc_int (p1, p2, h1, h2, mo_integrals_tc_int_map ) +! hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) +! +! if( (hij_p.eq.0.d0) .and. (hij_m.eq.0.d0) ) cycle +! +! hij_p = hij_p * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) +! hij_m = hij_m * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) +! if (puti < putj) then +! !DIR$ LOOP COUNT AVG(4) +! do k=1,N_states +! mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p +! mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m +! enddo +! else +! !DIR$ LOOP COUNT AVG(4) +! do k=1,N_states +! mat_p(k, putj, puti) = mat_p(k, putj, puti) + coefs(k) * hij_p +! mat_m(k, putj, puti) = mat_m(k, putj, puti) + coefs(k) * hij_m +! enddo +! endif +! end do +! else ! tip == 4 +! puti = p(1, sp) +! putj = p(2, sp) +! if(.not. banned(puti,putj,1)) then +! p1 = p(1, mi) +! p2 = p(2, mi) +! h1 = h(1, mi) +! h2 = h(2, mi) +! +! hij_p = get_mo_two_e_integral_tc_int (p1, p2, h1, h2, mo_integrals_tc_int_map ) & +! - get_mo_two_e_integral_tc_int (p2, p1, h1, h2, mo_integrals_tc_int_map ) +! hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) & +! - get_mo_two_e_integral_tcdag_int(p2, p1, h1, h2, mo_integrals_tcdag_int_map) +! +! if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then +! hij_p = hij_p * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) +! hij_m = hij_m * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) +! !DIR$ LOOP COUNT AVG(4) +! do k=1,N_states +! mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p +! mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m +! enddo +! end if +! end if +! end if +! end if +! +!end subroutine get_pm2 +! ___________________________________________________________________________________________________________________________________________________ +! ___________________________________________________________________________________________________________________________________________________ +! ___________________________________________________________________________________________________________________________________________________ + + +! ___________________________________________________________________________________________________________________________________________________ +! ___________________________________________________________________________________________________________________________________________________ + +!subroutine get_pm1(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, coefs) +! +! use bitmasks +! +! implicit none +! +! integer(bit_kind) :: det(N_int, 2) +! integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) +! integer(bit_kind), intent(in) :: phasemask(N_int,2) +! logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) +! double precision, intent(in) :: coefs(N_states) +! integer, intent(in) :: h(0:2,2), p(0:4,2), sp +! double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num) +! +! double precision, external :: get_phase_bi +! double precision, external :: get_mo_two_e_integral_tc_int, get_mo_two_e_integral_tcdag_int +! +! logical :: ok +! logical, allocatable :: lbanned(:,:) +! integer :: bant +! integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j +! integer :: hfix, pfix, h1, h2, p1, p2, ib, k, l +! double precision :: tmp_row_p (N_states, mo_num), tmp_row_m (N_states, mo_num) +! double precision :: hij_p, hij_m, tmp_row2_p(N_states, mo_num), tmp_row2_m(N_states, mo_num) +! double precision, allocatable :: hijp_cache(:,:), hijm_cache(:,:) +! +! integer, parameter :: turn2(2) = (/2,1/) +! integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) +! +! PROVIDE mo_integrals_tc_int_map mo_integrals_tcdag_int_map +! +! allocate( lbanned(mo_num, 2) ) +! allocate( hijp_cache(mo_num,2), hijm_cache(mo_num,2) ) +! lbanned = bannedOrb +! +! do i=1, p(0,1) +! lbanned(p(i,1), 1) = .true. +! end do +! do i=1, p(0,2) +! lbanned(p(i,2), 2) = .true. +! end do +! +! ma = 1 +! if(p(0,2) >= 2) ma = 2 +! mi = turn2(ma) +! +! bant = 1 +! +! if(sp == 3) then +! !move MA +! if(ma == 2) bant = 2 +! puti = p(1,mi) +! hfix = h(1,ma) +! p1 = p(1,ma) +! p2 = p(2,ma) +! if(.not. bannedOrb(puti, mi)) then +! +! call get_mo_two_e_integrals_tc_int (hfix, p1, p2, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map ) +! call get_mo_two_e_integrals_tc_int (hfix, p2, p1, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map ) +! call get_mo_two_e_integrals_tcdag_int(hfix, p1, p2, mo_num, hijm_cache(1,1), mo_integrals_tcdag_int_map) +! call get_mo_two_e_integrals_tcdag_int(hfix, p2, p1, mo_num, hijm_cache(1,2), mo_integrals_tcdag_int_map) +! +! tmp_row_p = 0d0 +! tmp_row_m = 0d0 +! do putj=1, hfix-1 +! if(lbanned(putj, ma)) cycle +! if(banned(putj, puti,bant)) cycle +! +! hij_p = hijp_cache(putj,1) - hijp_cache(putj,2) +! hij_m = hijm_cache(putj,1) - hijm_cache(putj,2) +! +! if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then +! hij_p = hij_p * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) +! hij_m = hij_m * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) +! !DIR$ LOOP COUNT AVG(4) +! do k=1,N_states +! tmp_row_p(k,putj) = tmp_row_p(k,putj) + hij_p * coefs(k) +! tmp_row_m(k,putj) = tmp_row_m(k,putj) + hij_m * coefs(k) +! enddo +! endif +! end do +! do putj=hfix+1, mo_num +! if(lbanned(putj, ma)) cycle +! if(banned(putj, puti,bant)) cycle +! +! hij_p = hijp_cache(putj,2) - hijp_cache(putj,1) +! hij_m = hijm_cache(putj,2) - hijm_cache(putj,1) +! +! if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then +! hij_p = hij_p * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) +! hij_m = hij_m * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) +! !DIR$ LOOP COUNT AVG(4) +! do k=1,N_states +! tmp_row_p(k,putj) = tmp_row_p(k,putj) + hij_p * coefs(k) +! tmp_row_m(k,putj) = tmp_row_m(k,putj) + hij_m * coefs(k) +! enddo +! endif +! end do +! +! if(ma == 1) then +! mat_p(1:N_states,1:mo_num,puti) = mat_p(1:N_states,1:mo_num,puti) + tmp_row_p(1:N_states,1:mo_num) +! mat_m(1:N_states,1:mo_num,puti) = mat_m(1:N_states,1:mo_num,puti) + tmp_row_m(1:N_states,1:mo_num) +! else +! do l=1,mo_num +! !DIR$ LOOP COUNT AVG(4) +! do k=1,N_states +! mat_p(k,puti,l) = mat_p(k,puti,l) + tmp_row_p(k,l) +! mat_m(k,puti,l) = mat_m(k,puti,l) + tmp_row_m(k,l) +! enddo +! enddo +! end if +! end if +! +! !MOVE MI +! pfix = p(1,mi) +! tmp_row_p = 0d0 +! tmp_row_m = 0d0 +! tmp_row2_p = 0d0 +! tmp_row2_m = 0d0 +! +! call get_mo_two_e_integrals_tc_int (hfix, pfix, p1, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map ) +! call get_mo_two_e_integrals_tc_int (hfix, pfix, p2, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map ) +! call get_mo_two_e_integrals_tcdag_int(hfix, pfix, p1, mo_num, hijm_cache(1,1), mo_integrals_tcdag_int_map) +! call get_mo_two_e_integrals_tcdag_int(hfix, pfix, p2, mo_num, hijm_cache(1,2), mo_integrals_tcdag_int_map) +! +! putj = p1 +! do puti=1,mo_num !HOT +! if(lbanned(puti,mi)) cycle +! !p1 fixed +! putj = p1 +! if(.not. banned(putj,puti,bant)) then +! +! hij_p = hijp_cache(puti,2) +! hij_m = hijm_cache(puti,2) +! +! if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then +! hij_p = hij_p * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) +! hij_m = hij_m * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) +! !DIR$ LOOP COUNT AVG(4) +! do k=1,N_states +! tmp_row_p(k,puti) = tmp_row_p(k,puti) + hij_p * coefs(k) +! tmp_row_m(k,puti) = tmp_row_m(k,puti) + hij_m * coefs(k) +! enddo +! endif +! end if +! +! putj = p2 +! if(.not. banned(putj,puti,bant)) then +! +! hij_p = hijp_cache(puti,1) +! hij_m = hijm_cache(puti,1) +! +! if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then +! hij_p = hij_p * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) +! hij_m = hij_m * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) +! do k=1,N_states +! tmp_row2_p(k,puti) = tmp_row2_p(k,puti) + hij_p * coefs(k) +! tmp_row2_m(k,puti) = tmp_row2_m(k,puti) + hij_m * coefs(k) +! enddo +! endif +! end if +! end do +! +! if(mi == 1) then +! mat_p(:,:,p1) = mat_p(:,:,p1) + tmp_row_p (:,:) +! mat_p(:,:,p2) = mat_p(:,:,p2) + tmp_row2_p(:,:) +! mat_m(:,:,p1) = mat_m(:,:,p1) + tmp_row_m (:,:) +! mat_m(:,:,p2) = mat_m(:,:,p2) + tmp_row2_m(:,:) +! else +! do l=1,mo_num +! !DIR$ LOOP COUNT AVG(4) +! do k=1,N_states +! mat_p(k,p1,l) = mat_p(k,p1,l) + tmp_row_p (k,l) +! mat_p(k,p2,l) = mat_p(k,p2,l) + tmp_row2_p(k,l) +! mat_m(k,p1,l) = mat_m(k,p1,l) + tmp_row_m (k,l) +! mat_m(k,p2,l) = mat_m(k,p2,l) + tmp_row2_m(k,l) +! enddo +! enddo +! end if +! +! else ! sp /= 3 +! +! if(p(0,ma) == 3) then +! do i=1,3 +! hfix = h(1,ma) +! puti = p(i, ma) +! p1 = p(turn3(1,i), ma) +! p2 = p(turn3(2,i), ma) +! +! call get_mo_two_e_integrals_tc_int (hfix, p1, p2, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map ) +! call get_mo_two_e_integrals_tc_int (hfix, p2, p1, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map ) +! call get_mo_two_e_integrals_tcdag_int(hfix, p1, p2, mo_num, hijm_cache(1,1), mo_integrals_tcdag_int_map) +! call get_mo_two_e_integrals_tcdag_int(hfix, p2, p1, mo_num, hijm_cache(1,2), mo_integrals_tcdag_int_map) +! +! tmp_row_p = 0d0 +! tmp_row_m = 0d0 +! do putj=1,hfix-1 +! if(banned(putj,puti,1)) cycle +! if(lbanned(putj,ma)) cycle +! +! hij_p = hijp_cache(putj,1) - hijp_cache(putj,2) +! hij_m = hijm_cache(putj,1) - hijm_cache(putj,2) +! +! if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then +! hij_p = hij_p * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) +! hij_m = hij_m * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) +! tmp_row_p(:,putj) = tmp_row_p(:,putj) + hij_p * coefs(:) +! tmp_row_m(:,putj) = tmp_row_m(:,putj) + hij_m * coefs(:) +! endif +! end do +! do putj=hfix+1,mo_num +! if(banned(putj,puti,1)) cycle +! if(lbanned(putj,ma)) cycle +! +! hij_p = hijp_cache(putj,2) - hijp_cache(putj,1) +! hij_m = hijm_cache(putj,2) - hijm_cache(putj,1) +! +! if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then +! hij_p = hij_p * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) +! hij_m = hij_m * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) +! tmp_row_p(:,putj) = tmp_row_p(:,putj) + hij_p * coefs(:) +! tmp_row_m(:,putj) = tmp_row_m(:,putj) + hij_m * coefs(:) +! endif +! end do +! +! mat_p(:, :puti-1, puti) = mat_p(:, :puti-1, puti) + tmp_row_p(:,:puti-1) +! mat_m(:, :puti-1, puti) = mat_m(:, :puti-1, puti) + tmp_row_m(:,:puti-1) +! do l=puti,mo_num +! !DIR$ LOOP COUNT AVG(4) +! do k=1,N_states +! mat_p(k, puti, l) = mat_p(k, puti,l) + tmp_row_p(k,l) +! mat_m(k, puti, l) = mat_m(k, puti,l) + tmp_row_m(k,l) +! enddo +! enddo +! end do +! else +! hfix = h(1,mi) +! pfix = p(1,mi) +! p1 = p(1,ma) +! p2 = p(2,ma) +! tmp_row_p = 0d0 +! tmp_row_m = 0d0 +! tmp_row2_p = 0d0 +! tmp_row2_m = 0d0 +! +! call get_mo_two_e_integrals_tc_int (hfix, p1, pfix, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map ) +! call get_mo_two_e_integrals_tc_int (hfix, p2, pfix, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map ) +! call get_mo_two_e_integrals_tcdag_int(hfix, p1, pfix, mo_num, hijp_cache(1,1), mo_integrals_tcdag_int_map) +! call get_mo_two_e_integrals_tcdag_int(hfix, p2, pfix, mo_num, hijp_cache(1,2), mo_integrals_tcdag_int_map) +! +! putj = p2 +! do puti=1,mo_num +! if(lbanned(puti,ma)) cycle +! putj = p2 +! if(.not. banned(puti,putj,1)) then +! +! hij_p = hijp_cache(puti,1) +! hij_m = hijm_cache(puti,1) +! +! if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then +! hij_p = hij_p * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) +! hij_m = hij_m * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) +! !DIR$ LOOP COUNT AVG(4) +! do k=1,N_states +! tmp_row_p(k,puti) = tmp_row_p(k,puti) + hij_p * coefs(k) +! tmp_row_m(k,puti) = tmp_row_m(k,puti) + hij_m * coefs(k) +! enddo +! endif +! end if +! +! putj = p1 +! if(.not. banned(puti,putj,1)) then +! hij_p = hijp_cache(puti,2) +! hij_m = hijm_cache(puti,2) +! if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then +! hij_p = hij_p * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) +! hij_m = hij_m * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) +! do k=1,N_states +! tmp_row2_p(k,puti) = tmp_row2_p(k,puti) + hij_p * coefs(k) +! tmp_row2_m(k,puti) = tmp_row2_m(k,puti) + hij_m * coefs(k) +! enddo +! endif +! end if +! end do +! mat_p(:,:p2-1,p2) = mat_p(:,:p2-1,p2) + tmp_row_p(:,:p2-1) +! mat_m(:,:p2-1,p2) = mat_m(:,:p2-1,p2) + tmp_row_m(:,:p2-1) +! do l=p2,mo_num +! !DIR$ LOOP COUNT AVG(4) +! do k=1,N_states +! mat_p(k,p2,l) = mat_p(k,p2,l) + tmp_row_p(k,l) +! mat_m(k,p2,l) = mat_m(k,p2,l) + tmp_row_m(k,l) +! enddo +! enddo +! mat_p(:,:p1-1,p1) = mat_p(:,:p1-1,p1) + tmp_row2_p(:,:p1-1) +! mat_m(:,:p1-1,p1) = mat_m(:,:p1-1,p1) + tmp_row2_m(:,:p1-1) +! do l=p1,mo_num +! !DIR$ LOOP COUNT AVG(4) +! do k=1,N_states +! mat_p(k,p1,l) = mat_p(k,p1,l) + tmp_row2_p(k,l) +! mat_m(k,p1,l) = mat_m(k,p1,l) + tmp_row2_m(k,l) +! enddo +! enddo +! end if +! end if +! deallocate(lbanned,hijp_cache, hijm_cache) +! +! !! MONO +! if(sp == 3) then +! s1 = 1 +! s2 = 2 +! else +! s1 = sp +! s2 = sp +! end if +! +! do i1 = 1, p(0,s1) +! ib = 1 +! if(s1 == s2) ib = i1+1 +! do i2 = ib, p(0,s2) +! p1 = p(i1,s1) +! p2 = p(i2,s2) +! if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle +! call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) +! +! call htilde_mu_mat_tot (gen, det, N_int, hij_p) +! call htildedag_mu_mat_tot(gen, det, N_int, hij_m) +! +! !DIR$ LOOP COUNT AVG(4) +! do k = 1, N_states +! mat_p(k, p1, p2) = mat_p(k, p1, p2) + coefs(k) * hij_p +! mat_m(k, p1, p2) = mat_m(k, p1, p2) + coefs(k) * hij_m +! enddo +! enddo +! enddo +! +!end subroutine get_pm1 +! ___________________________________________________________________________________________________________________________________________________ +! ___________________________________________________________________________________________________________________________________________________ +! ___________________________________________________________________________________________________________________________________________________ + + + +! ___________________________________________________________________________________________________________________________________________________ +! ___________________________________________________________________________________________________________________________________________________ + +!subroutine get_pm0(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, coefs) +! +! use bitmasks +! implicit none +! +! integer(bit_kind) :: det(N_int, 2) +! integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) +! integer(bit_kind), intent(in) :: phasemask(N_int,2) +! integer, intent(in) :: h(0:2,2), p(0:4,2), sp +! logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) +! double precision, intent(in) :: coefs(N_states) +! double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num) +! +! double precision, external :: get_phase_bi, mo_two_e_integral +! double precision, external :: get_mo_two_e_integral_tc_int, get_mo_two_e_integral_tcdag_int +! integer, parameter :: bant=1 +! integer :: i, j, k, s, h1, h2, p1, p2, puti, putj +! logical :: ok +! double precision :: hij_p, hij_m, phase +! double precision, allocatable :: hijp_cache1(:), hijp_cache2(:), hijm_cache1(:), hijm_cache2(:) +! +! PROVIDE mo_integrals_tc_int_map mo_integrals_tcdag_int_map +! +! allocate( hijp_cache1(mo_num) , hijp_cache2(mo_num) ) +! allocate( hijm_cache1(mo_num) , hijm_cache2(mo_num) ) +! +! if(sp == 3) then ! AB +! h1 = p(1,1) +! h2 = p(1,2) +! do p1=1, mo_num +! if(bannedOrb(p1, 1)) cycle +! +! call get_mo_two_e_integrals_tc_int (p1, h2, h1, mo_num, hijp_cache1, mo_integrals_tc_int_map ) +! call get_mo_two_e_integrals_tcdag_int(p1, h2, h1, mo_num, hijm_cache1, mo_integrals_tcdag_int_map) +! +! do p2 = 1, mo_num +! if(bannedOrb(p2,2)) cycle +! if(banned(p1, p2, bant)) cycle ! rentable? +! if(p1 == h1 .or. p2 == h2) then +! call apply_particles(mask, 1, p1, 2, p2, det, ok, N_int) +! call htilde_mu_mat_tot (gen, det, N_int, hij_p) +! call htildedag_mu_mat_tot(gen, det, N_int, hij_m) +! else +! phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) +! hij_p = hijp_cache1(p2) * phase +! hij_m = hijm_cache1(p2) * phase +! end if +! if( (hij_p.eq.0.d0).and.(hij_m.eq.0.d0) ) cycle +! !DIR$ LOOP COUNT AVG(4) +! do k = 1, N_states +! mat_p(k, p1, p2) = mat_p(k, p1, p2) + coefs(k) * hij_p ! HOTSPOT +! mat_m(k, p1, p2) = mat_m(k, p1, p2) + coefs(k) * hij_m ! HOTSPOT +! enddo +! end do +! end do +! +! else ! AA BB +! p1 = p(1,sp) +! p2 = p(2,sp) +! do puti=1, mo_num +! if(bannedOrb(puti, sp)) cycle +! +! call get_mo_two_e_integrals_tc_int (puti, p2, p1, mo_num, hijp_cache1, mo_integrals_tc_int_map ) +! call get_mo_two_e_integrals_tc_int (puti, p1, p2, mo_num, hijp_cache2, mo_integrals_tc_int_map ) +! call get_mo_two_e_integrals_tcdag_int(puti, p2, p1, mo_num, hijm_cache1, mo_integrals_tcdag_int_map) +! call get_mo_two_e_integrals_tcdag_int(puti, p1, p2, mo_num, hijm_cache2, mo_integrals_tcdag_int_map) +! +! do putj=puti+1, mo_num +! if(bannedOrb(putj, sp)) cycle +! if(banned(puti, putj, bant)) cycle ! rentable? +! if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then +! call apply_particles(mask, sp, puti, sp, putj, det, ok, N_int) +! call htilde_mu_mat_tot (gen, det, N_int, hij_p) +! call htildedag_mu_mat_tot(gen, det, N_int, hij_m) +! if( (hij_p.eq.0.d0).and.(hij_m.eq.0.d0) ) cycle +! else +! +! hij_p = get_mo_two_e_integral_tc_int (p1, p2, puti, putj, mo_integrals_tc_int_map ) & +! - get_mo_two_e_integral_tc_int (p2, p1, puti, putj, mo_integrals_tc_int_map ) +! hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, puti, putj, mo_integrals_tcdag_int_map) & +! - get_mo_two_e_integral_tcdag_int(p2, p1, puti, putj, mo_integrals_tcdag_int_map) +! +! if( (hij_p.eq.0.d0).and.(hij_m.eq.0.d0) ) cycle +! +! hij_p = hij_p * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) +! hij_m = hij_m * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) +! +! end if +! !DIR$ LOOP COUNT AVG(4) +! do k=1,N_states +! mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p +! mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m +! enddo +! end do +! end do +! end if +! +! deallocate( hijp_cache1 , hijp_cache2 ) +! deallocate( hijm_cache1 , hijm_cache2 ) +! +!end subroutine get_pm0 +! ___________________________________________________________________________________________________________________________________________________ +! ___________________________________________________________________________________________________________________________________________________ +! ___________________________________________________________________________________________________________________________________________________ + + +! OLD unoptimized routines for debugging +! ====================================== + +subroutine get_d0_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + integer :: i, j, s, h1, h2, p1, p2, puti, putj + double precision :: hij, phase + double precision, external :: get_phase_bi, mo_two_e_integral + logical :: ok + + integer :: bant + bant = 1 + + + if(sp == 3) then ! AB + h1 = p(1,1) + h2 = p(1,2) + do p1=1, mo_num + if(bannedOrb(p1, 1)) cycle + do p2=1, mo_num + if(bannedOrb(p2,2)) cycle + if(banned(p1, p2, bant)) cycle ! rentable? + if(p1 == h1 .or. p2 == h2) then + call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + else + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hij = mo_two_e_integral(p1, p2, h1, h2) * phase + end if + mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij + end do + end do + else ! AA BB + p1 = p(1,sp) + p2 = p(2,sp) + do puti=1, mo_num + if(bannedOrb(puti, sp)) cycle + do putj=puti+1, mo_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, bant)) cycle ! rentable? + if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then + call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + else + hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + end if + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end do + end do + end if + +end subroutine get_d0_reference + +! --- + +subroutine get_d1_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num) + double precision, external :: get_phase_bi, mo_two_e_integral + logical :: ok + + logical, allocatable :: lbanned(:,:) + integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j + integer :: hfix, pfix, h1, h2, p1, p2, ib + + integer, parameter :: turn2(2) = (/2,1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + + + allocate (lbanned(mo_num, 2)) + lbanned = bannedOrb + + do i=1, p(0,1) + lbanned(p(i,1), 1) = .true. + end do + do i=1, p(0,2) + lbanned(p(i,2), 2) = .true. + end do + + ma = 1 + if(p(0,2) >= 2) ma = 2 + mi = turn2(ma) + + bant = 1 + + if(sp == 3) then + !move MA + if(ma == 2) bant = 2 + puti = p(1,mi) + hfix = h(1,ma) + p1 = p(1,ma) + p2 = p(2,ma) + if(.not. bannedOrb(puti, mi)) then + tmp_row = 0d0 + do putj=1, hfix-1 + if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle + hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) + end do + do putj=hfix+1, mo_num + if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle + hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) + end do + + if(ma == 1) then + mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num) + else + mat(1:N_states,puti,1:mo_num) = mat(1:N_states,puti,1:mo_num) + tmp_row(1:N_states,1:mo_num) + end if + end if + + !MOVE MI + pfix = p(1,mi) + tmp_row = 0d0 + tmp_row2 = 0d0 + do puti=1,mo_num + if(lbanned(puti,mi)) cycle + !p1 fixed + putj = p1 + if(.not. banned(putj,puti,bant)) then + hij = mo_two_e_integral(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) + tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) + end if + + putj = p2 + if(.not. banned(putj,puti,bant)) then + hij = mo_two_e_integral(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) + tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) + end if + end do + + if(mi == 1) then + mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:) + mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:) + else + mat(:,p1,:) = mat(:,p1,:) + tmp_row(:,:) + mat(:,p2,:) = mat(:,p2,:) + tmp_row2(:,:) + end if + else + if(p(0,ma) == 3) then + do i=1,3 + hfix = h(1,ma) + puti = p(i, ma) + p1 = p(turn3(1,i), ma) + p2 = p(turn3(2,i), ma) + tmp_row = 0d0 + do putj=1,hfix-1 + if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle + hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) + end do + do putj=hfix+1,mo_num + if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle + hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) + end do + + mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1) + mat(:, puti, puti:) = mat(:, puti, puti:) + tmp_row(:,puti:) + end do + else + hfix = h(1,mi) + pfix = p(1,mi) + p1 = p(1,ma) + p2 = p(2,ma) + tmp_row = 0d0 + tmp_row2 = 0d0 + do puti=1,mo_num + if(lbanned(puti,ma)) cycle + putj = p2 + if(.not. banned(puti,putj,1)) then + hij = mo_two_e_integral(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) + tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) + end if + + putj = p1 + if(.not. banned(puti,putj,1)) then + hij = mo_two_e_integral(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) + tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) + end if + end do + mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1) + mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row(:,p2:) + mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1) + mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row2(:,p1:) + end if + end if + deallocate(lbanned) + + !! MONO + if(sp == 3) then + s1 = 1 + s2 = 2 + else + s1 = sp + s2 = sp + end if + + do i1=1,p(0,s1) + ib = 1 + if(s1 == s2) ib = i1+1 + do i2=ib,p(0,s2) + p1 = p(i1,s1) + p2 = p(i2,s2) + if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij + end do + end do + +end subroutine get_d1_reference + +! --- + +subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(2,N_int) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + double precision, external :: get_phase_bi, mo_two_e_integral + + integer :: i, j, tip, ma, mi, puti, putj + integer :: h1, h2, p1, p2, i1, i2 + double precision :: hij, phase + + integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) + integer, parameter :: turn2(2) = (/2, 1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + bant = 1 + + tip = p(0,1) * p(0,2) + + ma = sp + if(p(0,1) > p(0,2)) ma = 1 + if(p(0,1) < p(0,2)) ma = 2 + mi = mod(ma, 2) + 1 + + if(sp == 3) then + if(ma == 2) bant = 2 + + if(tip == 3) then + puti = p(1, mi) + do i = 1, 3 + putj = p(i, ma) + if(banned(putj,puti,bant)) cycle + i1 = turn3(1,i) + i2 = turn3(2,i) + p1 = p(i1, ma) + p2 = p(i2, ma) + h1 = h(1, ma) + h2 = h(2, ma) + + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + if(ma == 1) then + mat(:, putj, puti) = mat(:, putj, puti) + coefs(:) * hij + else + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end if + end do + else + h1 = h(1,1) + h2 = h(1,2) + do j = 1,2 + putj = p(j, 2) + p2 = p(turn2(j), 2) + do i = 1,2 + puti = p(i, 1) + + if(banned(puti,putj,bant)) cycle + p1 = p(turn2(i), 1) + + hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2,N_int) + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end do + end do + end if + + else + if(tip == 0) then + h1 = h(1, ma) + h2 = h(2, ma) + do i=1,3 + puti = p(i, ma) + do j=i+1,4 + putj = p(j, ma) + if(banned(puti,putj,1)) cycle + + i1 = turn2d(1, i, j) + i2 = turn2d(2, i, j) + p1 = p(i1, ma) + p2 = p(i2, ma) + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2,N_int) + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end do + end do + else if(tip == 3) then + h1 = h(1, mi) + h2 = h(1, ma) + p1 = p(1, mi) + do i=1,3 + puti = p(turn3(1,i), ma) + putj = p(turn3(2,i), ma) + if(banned(puti,putj,1)) cycle + p2 = p(i, ma) + + hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2,N_int) + mat(:, min(puti, putj), max(puti, putj)) = mat(:, min(puti, putj), max(puti, putj)) + coefs(:) * hij + end do + else ! tip == 4 + puti = p(1, sp) + putj = p(2, sp) + if(.not. banned(puti,putj,1)) then + p1 = p(1, mi) + p2 = p(2, mi) + h1 = h(1, mi) + h2 = h(2, mi) + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2,N_int) + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end if + end if + end if + +end subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + +! --- + diff --git a/src/cipsi_tc_bi_ortho/lock_2rdm.irp.f b/src/cipsi_tc_bi_ortho/lock_2rdm.irp.f new file mode 100644 index 00000000..e69de29b diff --git a/src/cipsi_tc_bi_ortho/pt2.irp.f b/src/cipsi_tc_bi_ortho/pt2.irp.f new file mode 100644 index 00000000..e7dca456 --- /dev/null +++ b/src/cipsi_tc_bi_ortho/pt2.irp.f @@ -0,0 +1,89 @@ +subroutine pt2_tc_bi_ortho + use selection_types + implicit none + BEGIN_DOC +! Selected Full Configuration Interaction with Stochastic selection and PT2. + END_DOC + integer :: i,j,k,ndet + double precision, allocatable :: zeros(:) + integer :: to_select + type(pt2_type) :: pt2_data, pt2_data_err + logical, external :: qp_stop + logical :: print_pt2 + + double precision :: rss + double precision, external :: memory_of_double + double precision :: correlation_energy_ratio,E_denom,E_tc,norm + double precision, allocatable :: ept2(:), pt1(:),extrap_energy(:) + PROVIDE H_apply_buffer_allocated distributed_davidson mo_two_e_integrals_in_map + + print*,'Diagonal elements of the Fock matrix ' + do i = 1, mo_num + write(*,*)i,Fock_matrix_tc_mo_tot(i,i) + enddo + N_iter = 1 + threshold_generators = 1.d0 + SOFT_TOUCH threshold_generators + + rss = memory_of_double(N_states)*4.d0 + call check_mem(rss,irp_here) + + allocate (zeros(N_states)) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) + + double precision :: hf_energy_ref + logical :: has + double precision :: relative_error + + relative_error=PT2_relative_error + + zeros = 0.d0 + pt2_data % pt2 = -huge(1.e0) + pt2_data % rpt2 = -huge(1.e0) + pt2_data % overlap= 0.d0 + pt2_data % variance = huge(1.e0) + + if (s2_eig) then + call make_s2_eigenfunction + endif + print_pt2 = .False. + call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) +! call routine_save_right + + if (N_det > N_det_max) then + psi_det(1:N_int,1:2,1:N_det) = psi_det_sorted_tc_gen(1:N_int,1:2,1:N_det) + psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states) + N_det = N_det_max + soft_touch N_det psi_det psi_coef + if (s2_eig) then + call make_s2_eigenfunction + endif + print_pt2 = .False. + call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) + endif + + allocate(ept2(1000),pt1(1000),extrap_energy(100)) + + correlation_energy_ratio = 0.d0 + +! thresh_it_dav = 5.d-5 +! soft_touch thresh_it_dav + + print_pt2 = .True. + to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor) + to_select = max(N_states_diag, to_select) + + E_denom = E_tc ! TC Energy of the current wave function + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) + call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection + + N_iter += 1 + + call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) + +end + diff --git a/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f b/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f new file mode 100644 index 00000000..56e6bd14 --- /dev/null +++ b/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f @@ -0,0 +1,869 @@ +BEGIN_PROVIDER [ integer, pt2_stoch_istate ] + implicit none + BEGIN_DOC + ! State for stochatsic PT2 + END_DOC + pt2_stoch_istate = 1 +END_PROVIDER + + BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ] +&BEGIN_PROVIDER [ integer, pt2_n_tasks_max ] + implicit none + logical, external :: testTeethBuilding + integer :: i,j + pt2_n_tasks_max = elec_alpha_num*elec_alpha_num + elec_alpha_num*elec_beta_num - n_core_orb*2 + pt2_n_tasks_max = min(pt2_n_tasks_max,1+N_det_generators/10000) + call write_int(6,pt2_n_tasks_max,'pt2_n_tasks_max') + + pt2_F(:) = max(int(sqrt(float(pt2_n_tasks_max))),1) + do i=1,pt2_n_0(1+pt2_N_teeth/4) + pt2_F(i) = pt2_n_tasks_max*pt2_min_parallel_tasks + enddo + do i=1+pt2_n_0(pt2_N_teeth-pt2_N_teeth/4), pt2_n_0(pt2_N_teeth-pt2_N_teeth/10) + pt2_F(i) = pt2_min_parallel_tasks + enddo + do i=1+pt2_n_0(pt2_N_teeth-pt2_N_teeth/10), N_det_generators + pt2_F(i) = 1 + enddo + +END_PROVIDER + + BEGIN_PROVIDER [ integer, pt2_N_teeth ] +&BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ] + implicit none + logical, external :: testTeethBuilding + + if(N_det_generators < 500) then + pt2_minDetInFirstTeeth = 1 + pt2_N_teeth = 1 + else + pt2_minDetInFirstTeeth = min(5, N_det_generators) + do pt2_N_teeth=100,2,-1 + if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit + end do + end if + call write_int(6,pt2_N_teeth,'Number of comb teeth') +END_PROVIDER + + +logical function testTeethBuilding(minF, N) + implicit none + integer, intent(in) :: minF, N + integer :: n0, i + double precision :: u0, Wt, r + + double precision, allocatable :: tilde_w(:), tilde_cW(:) + integer, external :: dress_find_sample + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + + rss = memory_of_double(2*N_det_generators+1) + call check_mem(rss,irp_here) + + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) + + double precision :: norm2 + norm2 = 0.d0 + do i=N_det_generators,1,-1 + tilde_w(i) = psi_coef_sorted_tc_gen(i,pt2_stoch_istate) * & + psi_coef_sorted_tc_gen(i,pt2_stoch_istate) + norm2 = norm2 + tilde_w(i) + enddo + + f = 1.d0/norm2 + tilde_w(:) = tilde_w(:) * f + + tilde_cW(0) = -1.d0 + do i=1,N_det_generators + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) + enddo + tilde_cW(:) = tilde_cW(:) + 1.d0 + deallocate(tilde_w) + + n0 = 0 + testTeethBuilding = .false. + double precision :: f + integer :: minFN + minFN = N_det_generators - minF * N + f = 1.d0/dble(N) + do + u0 = tilde_cW(n0) + r = tilde_cW(n0 + minF) + Wt = (1d0 - u0) * f + if (dabs(Wt) <= 1.d-3) then + exit + endif + if(Wt >= r - u0) then + testTeethBuilding = .true. + exit + end if + n0 += 1 + if(n0 > minFN) then + exit + end if + end do + deallocate(tilde_cW) + +end function + + + +subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in) + use f77_zmq + use selection_types + + implicit none + + integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull + integer, intent(in) :: N_in +! integer, intent(inout) :: N_in + double precision, intent(in) :: relative_error, E(N_states) + type(pt2_type), intent(inout) :: pt2_data, pt2_data_err +! + integer :: i, N + + double precision :: state_average_weight_save(N_states), w(N_states,4) + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + type(selection_buffer) :: b + + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted_tc + PROVIDE psi_det_hii selection_weight pseudo_sym + PROVIDE n_act_orb n_inact_orb n_core_orb n_virt_orb n_del_orb seniority_max + PROVIDE excitation_beta_max excitation_alpha_max excitation_max + PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp + + if (h0_type == 'CFG') then + PROVIDE psi_configuration_hii det_to_configuration + endif + + if (N_det <= max(4,N_states) .or. pt2_N_teeth < 2) then + print*,'ZMQ_selection' + call ZMQ_selection(N_in, pt2_data) + else + print*,'else ZMQ_selection' + + N = max(N_in,1) * N_states + state_average_weight_save(:) = state_average_weight(:) + if (int(N,8)*2_8 > huge(1)) then + print *, irp_here, ': integer too large' + stop -1 + endif + call create_selection_buffer(N, N*2, b) + ASSERT (associated(b%det)) + ASSERT (associated(b%val)) + + do pt2_stoch_istate=1,N_states + state_average_weight(:) = 0.d0 + state_average_weight(pt2_stoch_istate) = 1.d0 + TOUCH state_average_weight pt2_stoch_istate selection_weight + + PROVIDE nproc pt2_F mo_two_e_integrals_in_map mo_one_e_integrals pt2_w + PROVIDE psi_selectors pt2_u pt2_J pt2_R + call new_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2') + + integer, external :: zmq_put_psi + integer, external :: zmq_put_N_det_generators + integer, external :: zmq_put_N_det_selectors + integer, external :: zmq_put_dvector + integer, external :: zmq_put_ivector + if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then + stop 'Unable to put psi on ZMQ server' + endif + if (zmq_put_N_det_generators(zmq_to_qp_run_socket, 1) == -1) then + stop 'Unable to put N_det_generators on ZMQ server' + endif + if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) then + stop 'Unable to put N_det_selectors on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',pt2_e0_denominator,size(pt2_e0_denominator)) == -1) then + stop 'Unable to put energy on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) then + stop 'Unable to put state_average_weight on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) then + stop 'Unable to put selection_weight on ZMQ server' + endif + if (zmq_put_ivector(zmq_to_qp_run_socket,1,'pt2_stoch_istate',pt2_stoch_istate,1) == -1) then + stop 'Unable to put pt2_stoch_istate on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) then + stop 'Unable to put threshold_generators on ZMQ server' + endif + + + integer, external :: add_task_to_taskserver + character(300000) :: task + + integer :: j,k,ipos,ifirst + ifirst=0 + + ipos=0 + do i=1,N_det_generators + if (pt2_F(i) > 1) then + ipos += 1 + endif + enddo + call write_int(6,sum(pt2_F),'Number of tasks') + call write_int(6,ipos,'Number of fragmented tasks') + + ipos=1 + do i= 1, N_det_generators + do j=1,pt2_F(pt2_J(i)) + write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, pt2_J(i), N_in + ipos += 30 + if (ipos > 300000-30) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task to task server' + endif + ipos=1 + if (ifirst == 0) then + ifirst=1 + if (zmq_set_running(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Failed in zmq_set_running' + endif + endif + endif + end do + enddo + if (ipos > 1) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task to task server' + endif + endif + + integer, external :: zmq_set_running + if (zmq_set_running(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Failed in zmq_set_running' + endif + + + double precision :: mem_collector, mem, rss + + call resident_memory(rss) + + mem_collector = 8.d0 * & ! bytes + ( 1.d0*pt2_n_tasks_max & ! task_id, index + + 0.635d0*N_det_generators & ! f,d + + pt2_n_tasks_max*pt2_type_size(N_states) & ! pt2_data_task + + N_det_generators*pt2_type_size(N_states) & ! pt2_data_I + + 4.d0*(pt2_N_teeth+1) & ! S, S2, T2, T3 + + 1.d0*(N_int*2.d0*N + N) & ! selection buffer + + 1.d0*(N_int*2.d0*N + N) & ! sort selection buffer + ) / 1024.d0**3 + + integer :: nproc_target, ii + nproc_target = nthreads_pt2 + ii = min(N_det, (elec_alpha_num*(mo_num-elec_alpha_num))**2) + + do + mem = mem_collector + & ! + nproc_target * 8.d0 * & ! bytes + ( 0.5d0*pt2_n_tasks_max & ! task_id + + 64.d0*pt2_n_tasks_max & ! task + + pt2_type_size(N_states)*pt2_n_tasks_max*N_states & ! pt2, variance, overlap + + 1.d0*pt2_n_tasks_max & ! i_generator, subset + + 1.d0*(N_int*2.d0*ii+ ii) & ! selection buffer + + 1.d0*(N_int*2.d0*ii+ ii) & ! sort selection buffer + + 2.0d0*(ii) & ! preinteresting, interesting, + ! prefullinteresting, fullinteresting + + 2.0d0*(N_int*2*ii) & ! minilist, fullminilist + + 1.0d0*(N_states*mo_num*mo_num) & ! mat + ) / 1024.d0**3 + + if (nproc_target == 0) then + call check_mem(mem,irp_here) + nproc_target = 1 + exit + endif + + if (mem+rss < qp_max_mem) then + exit + endif + + nproc_target = nproc_target - 1 + + enddo + call write_int(6,nproc_target,'Number of threads for PT2') + call write_double(6,mem,'Memory (Gb)') + + call omp_set_max_active_levels(1) + + + print '(A)', '========== ======================= ===================== ===================== ===========' + print '(A)', ' Samples Energy Variance Norm^2 Seconds' + print '(A)', '========== ======================= ===================== ===================== ===========' + + PROVIDE global_selection_buffer + + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc_target+1) & + !$OMP PRIVATE(i) + i = omp_get_thread_num() + if (i==0) then + + call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, pt2_data, pt2_data_err, b, N) + pt2_data % rpt2(pt2_stoch_istate) = & + pt2_data % pt2(pt2_stoch_istate)/(1.d0+pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate)) + + !TODO : We should use here the correct formula for the error of X/Y + pt2_data_err % rpt2(pt2_stoch_istate) = & + pt2_data_err % pt2(pt2_stoch_istate)/(1.d0 + pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate)) + + else + call pt2_slave_inproc(i) + endif + !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2') + call omp_set_max_active_levels(8) + + print '(A)', '========== ======================= ===================== ===================== ===========' + + do k=1,N_states + pt2_overlap(pt2_stoch_istate,k) = pt2_data % overlap(k,pt2_stoch_istate) + enddo + SOFT_TOUCH pt2_overlap + + enddo + FREE pt2_stoch_istate + + ! Symmetrize overlap + do j=2,N_states + do i=1,j-1 + pt2_overlap(i,j) = 0.5d0 * (pt2_overlap(i,j) + pt2_overlap(j,i)) + pt2_overlap(j,i) = pt2_overlap(i,j) + enddo + enddo + + print *, 'Overlap of perturbed states:' + do k=1,N_states + print *, pt2_overlap(k,:) + enddo + print *, '-------' + + if (N_in > 0) then + b%cur = min(N_in,b%cur) + if (s2_eig) then + call make_selection_buffer_s2(b) + else + call remove_duplicates_in_selection_buffer(b) + endif + call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) + endif + call delete_selection_buffer(b) + + state_average_weight(:) = state_average_weight_save(:) + TOUCH state_average_weight + call update_pt2_and_variance_weights(pt2_data, N_states) + endif + + +end subroutine + + +subroutine pt2_slave_inproc(i) + implicit none + integer, intent(in) :: i + + PROVIDE global_selection_buffer + call run_pt2_slave(1,i,pt2_e0_denominator) +end + + +subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_err, b, N_) + use f77_zmq + use selection_types + use bitmasks + implicit none + + + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + double precision, intent(in) :: relative_error, E + type(pt2_type), intent(inout) :: pt2_data, pt2_data_err + type(selection_buffer), intent(inout) :: b + integer, intent(in) :: N_ + + type(pt2_type), allocatable :: pt2_data_task(:) + type(pt2_type), allocatable :: pt2_data_I(:) + type(pt2_type), allocatable :: pt2_data_S(:) + type(pt2_type), allocatable :: pt2_data_S2(:) + type(pt2_type) :: pt2_data_teeth + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + integer, external :: zmq_delete_tasks_async_send + integer, external :: zmq_delete_tasks_async_recv + integer, external :: zmq_abort + integer, external :: pt2_find_sample_lr + + PROVIDE pt2_stoch_istate + + integer :: more, n, i, p, c, t, n_tasks, U + integer, allocatable :: task_id(:) + integer, allocatable :: index(:) + + double precision :: v, x, x2, x3, avg, avg2, avg3(N_states), eqt, E0, v0, n0(N_states) + double precision :: eqta(N_states) + double precision :: time, time1, time0 + + integer, allocatable :: f(:) + logical, allocatable :: d(:) + logical :: do_exit, stop_now, sending + logical, external :: qp_stop + type(selection_buffer) :: b2 + + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + + sending =.False. + + rss = memory_of_int(pt2_n_tasks_max*2+N_det_generators*2) + rss += memory_of_double(N_states*N_det_generators)*3.d0 + rss += memory_of_double(N_states*pt2_n_tasks_max)*3.d0 + rss += memory_of_double(pt2_N_teeth+1)*4.d0 + call check_mem(rss,irp_here) + + ! If an allocation is added here, the estimate of the memory should also be + ! updated in ZMQ_pt2 + allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators)) + allocate(d(N_det_generators+1)) + allocate(pt2_data_task(pt2_n_tasks_max)) + allocate(pt2_data_I(N_det_generators)) + allocate(pt2_data_S(pt2_N_teeth+1)) + allocate(pt2_data_S2(pt2_N_teeth+1)) + + + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + call create_selection_buffer(N_, N_*2, b2) + + + pt2_data % pt2(pt2_stoch_istate) = -huge(1.) + pt2_data_err % pt2(pt2_stoch_istate) = huge(1.) + pt2_data % variance(pt2_stoch_istate) = huge(1.) + pt2_data_err % variance(pt2_stoch_istate) = huge(1.) + pt2_data % overlap(:,pt2_stoch_istate) = 0.d0 + pt2_data_err % overlap(:,pt2_stoch_istate) = huge(1.) + n = 1 + t = 0 + U = 0 + do i=1,pt2_n_tasks_max + call pt2_alloc(pt2_data_task(i),N_states) + enddo + do i=1,pt2_N_teeth+1 + call pt2_alloc(pt2_data_S(i),N_states) + call pt2_alloc(pt2_data_S2(i),N_states) + enddo + do i=1,N_det_generators + call pt2_alloc(pt2_data_I(i),N_states) + enddo + f(:) = pt2_F(:) + d(:) = .false. + n_tasks = 0 + E0 = E + v0 = 0.d0 + n0(:) = 0.d0 + more = 1 + call wall_time(time0) + time1 = time0 + + do_exit = .false. + stop_now = .false. + do while (n <= N_det_generators) + if(f(pt2_J(n)) == 0) then + d(pt2_J(n)) = .true. + do while(d(U+1)) + U += 1 + end do + + ! Deterministic part + do while(t <= pt2_N_teeth) + if(U >= pt2_n_0(t+1)) then + t=t+1 + E0 = 0.d0 + v0 = 0.d0 + n0(:) = 0.d0 + do i=pt2_n_0(t),1,-1 + E0 += pt2_data_I(i) % pt2(pt2_stoch_istate) + v0 += pt2_data_I(i) % variance(pt2_stoch_istate) + n0(:) += pt2_data_I(i) % overlap(:,pt2_stoch_istate) + end do + else + exit + end if + end do + + ! Add Stochastic part + c = pt2_R(n) + if(c > 0) then + + call pt2_alloc(pt2_data_teeth,N_states) + do p=pt2_N_teeth, 1, -1 + v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1)) + i = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(p),pt2_n_0(p+1)) + v = pt2_W_T / pt2_w(i) + call pt2_add ( pt2_data_teeth, v, pt2_data_I(i) ) + call pt2_add ( pt2_data_S(p), 1.d0, pt2_data_teeth ) + call pt2_add2( pt2_data_S2(p), 1.d0, pt2_data_teeth ) + enddo + call pt2_dealloc(pt2_data_teeth) + + avg = E0 + pt2_data_S(t) % pt2(pt2_stoch_istate) / dble(c) + avg2 = v0 + pt2_data_S(t) % variance(pt2_stoch_istate) / dble(c) + avg3(:) = n0(:) + pt2_data_S(t) % overlap(:,pt2_stoch_istate) / dble(c) + if ((avg /= 0.d0) .or. (n == N_det_generators) ) then + do_exit = .true. + endif + if (qp_stop()) then + stop_now = .True. + endif + pt2_data % pt2(pt2_stoch_istate) = avg + pt2_data % variance(pt2_stoch_istate) = avg2 + pt2_data % overlap(:,pt2_stoch_istate) = avg3(:) + call wall_time(time) + ! 1/(N-1.5) : see Brugger, The American Statistician (23) 4 p. 32 (1969) + if(c > 2) then + eqt = dabs((pt2_data_S2(t) % pt2(pt2_stoch_istate) / c) - (pt2_data_S(t) % pt2(pt2_stoch_istate)/c)**2) ! dabs for numerical stability + eqt = sqrt(eqt / (dble(c) - 1.5d0)) + pt2_data_err % pt2(pt2_stoch_istate) = eqt + + eqt = dabs((pt2_data_S2(t) % variance(pt2_stoch_istate) / c) - (pt2_data_S(t) % variance(pt2_stoch_istate)/c)**2) ! dabs for numerical stability + eqt = sqrt(eqt / (dble(c) - 1.5d0)) + pt2_data_err % variance(pt2_stoch_istate) = eqt + + eqta(:) = dabs((pt2_data_S2(t) % overlap(:,pt2_stoch_istate) / c) - (pt2_data_S(t) % overlap(:,pt2_stoch_istate)/c)**2) ! dabs for numerical stability + eqta(:) = sqrt(eqta(:) / (dble(c) - 1.5d0)) + pt2_data_err % overlap(:,pt2_stoch_istate) = eqta(:) + + + if ((time - time1 > 1.d0) .or. (n==N_det_generators)) then + time1 = time + print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.4)', c, & + pt2_data % pt2(pt2_stoch_istate) +E, & + pt2_data_err % pt2(pt2_stoch_istate), & + pt2_data % variance(pt2_stoch_istate), & + pt2_data_err % variance(pt2_stoch_istate), & + pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate), & + pt2_data_err % overlap(pt2_stoch_istate,pt2_stoch_istate), & + time-time0 + if (stop_now .or. ( & + (do_exit .and. (dabs(pt2_data_err % pt2(pt2_stoch_istate)) / & + (1.d-20 + dabs(pt2_data % pt2(pt2_stoch_istate)) ) <= relative_error))) ) then + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + call sleep(10) + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Error in sending abort signal (2)' + endif + endif + endif + endif + endif + end if + n += 1 + else if(more == 0) then + exit + else + call pull_pt2_results(zmq_socket_pull, index, pt2_data_task, task_id, n_tasks, b2) + if(n_tasks > pt2_n_tasks_max)then + print*,'PB !!!' + print*,'If you see this, send a bug report with the following content' + print*,irp_here + print*,'n_tasks,pt2_n_tasks_max = ',n_tasks,pt2_n_tasks_max + stop -1 + endif + if (zmq_delete_tasks_async_send(zmq_to_qp_run_socket,task_id,n_tasks,sending) == -1) then + stop 'PT2: Unable to delete tasks (send)' + endif + do i=1,n_tasks + if(index(i).gt.size(pt2_data_I,1).or.index(i).lt.1)then + print*,'PB !!!' + print*,'If you see this, send a bug report with the following content' + print*,irp_here + print*,'i,index(i),size(pt2_data_I,1) = ',i,index(i),size(pt2_data_I,1) + stop -1 + endif + call pt2_add(pt2_data_I(index(i)),1.d0,pt2_data_task(i)) + f(index(i)) -= 1 + end do + do i=1, b2%cur + ! We assume the pulled buffer is sorted + if (b2%val(i) > b%mini) exit + call add_to_selection_buffer(b, b2%det(1,1,i), b2%val(i)) + end do + if (zmq_delete_tasks_async_recv(zmq_to_qp_run_socket,more,sending) == -1) then + stop 'PT2: Unable to delete tasks (recv)' + endif + end if + end do + do i=1,N_det_generators + call pt2_dealloc(pt2_data_I(i)) + enddo + do i=1,pt2_N_teeth+1 + call pt2_dealloc(pt2_data_S(i)) + call pt2_dealloc(pt2_data_S2(i)) + enddo + do i=1,pt2_n_tasks_max + call pt2_dealloc(pt2_data_task(i)) + enddo +!print *, 'deleting b2' + call delete_selection_buffer(b2) +!print *, 'sorting b' + call sort_selection_buffer(b) +!print *, 'done' + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + +end subroutine + + +integer function pt2_find_sample(v, w) + implicit none + double precision, intent(in) :: v, w(0:N_det_generators) + integer, external :: pt2_find_sample_lr + + pt2_find_sample = pt2_find_sample_lr(v, w, 0, N_det_generators) +end function + + +integer function pt2_find_sample_lr(v, w, l_in, r_in) + implicit none + double precision, intent(in) :: v, w(0:N_det_generators) + integer, intent(in) :: l_in,r_in + integer :: i,l,r + + l=l_in + r=r_in + + do while(r-l > 1) + i = shiftr(r+l,1) + if(w(i) < v) then + l = i + else + r = i + end if + end do + i = r + do r=i+1,N_det_generators + if (w(r) /= w(i)) then + exit + endif + enddo + pt2_find_sample_lr = r-1 +end function + + +BEGIN_PROVIDER [ integer, pt2_n_tasks ] + implicit none + BEGIN_DOC + ! Number of parallel tasks for the Monte Carlo + END_DOC + pt2_n_tasks = N_det_generators +END_PROVIDER + +BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)] + implicit none + integer, allocatable :: seed(:) + integer :: m,i + call random_seed(size=m) + allocate(seed(m)) + do i=1,m + seed(i) = i + enddo + call random_seed(put=seed) + deallocate(seed) + + call RANDOM_NUMBER(pt2_u) + END_PROVIDER + + BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)] +&BEGIN_PROVIDER[ integer, pt2_R, (N_det_generators)] + implicit none + BEGIN_DOC +! pt2_J contains the list of generators after ordering them according to the +! Monte Carlo sampling. +! +! pt2_R(i) is the number of combs drawn when determinant i is computed. + END_DOC + integer :: N_c, N_j + integer :: U, t, i + double precision :: v + integer, external :: pt2_find_sample_lr + + logical, allocatable :: pt2_d(:) + integer :: m,l,r,k + integer :: ncache + integer, allocatable :: ii(:,:) + double precision :: dt + + ncache = min(N_det_generators,10000) + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + rss = memory_of_int(ncache)*dble(pt2_N_teeth) + memory_of_int(N_det_generators) + call check_mem(rss,irp_here) + + allocate(ii(pt2_N_teeth,ncache),pt2_d(N_det_generators)) + + pt2_R(:) = 0 + pt2_d(:) = .false. + N_c = 0 + N_j = pt2_n_0(1) + do i=1,N_j + pt2_d(i) = .true. + pt2_J(i) = i + end do + + U = 0 + do while(N_j < pt2_n_tasks) + + if (N_c+ncache > N_det_generators) then + ncache = N_det_generators - N_c + endif + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(dt,v,t,k) + do k=1, ncache + dt = pt2_u_0 + do t=1, pt2_N_teeth + v = dt + pt2_W_T *pt2_u(N_c+k) + dt = dt + pt2_W_T + ii(t,k) = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(t),pt2_n_0(t+1)) + end do + enddo + !$OMP END PARALLEL DO + + do k=1,ncache + !ADD_COMB + N_c = N_c+1 + do t=1, pt2_N_teeth + i = ii(t,k) + if(.not. pt2_d(i)) then + N_j += 1 + pt2_J(N_j) = i + pt2_d(i) = .true. + end if + end do + + pt2_R(N_j) = N_c + + !FILL_TOOTH + do while(U < N_det_generators) + U += 1 + if(.not. pt2_d(U)) then + N_j += 1 + pt2_J(N_j) = U + pt2_d(U) = .true. + exit + end if + end do + if (N_j >= pt2_n_tasks) exit + end do + enddo + + if(N_det_generators > 1) then + pt2_R(N_det_generators-1) = 0 + pt2_R(N_det_generators) = N_c + end if + + deallocate(ii,pt2_d) + +END_PROVIDER + + + + BEGIN_PROVIDER [ double precision, pt2_w, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_cW, (0:N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_W_T ] +&BEGIN_PROVIDER [ double precision, pt2_u_0 ] +&BEGIN_PROVIDER [ integer, pt2_n_0, (pt2_N_teeth+1) ] + implicit none + integer :: i, t + double precision, allocatable :: tilde_w(:), tilde_cW(:) + double precision :: r, tooth_width + integer, external :: pt2_find_sample + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + rss = memory_of_double(2*N_det_generators+1) + call check_mem(rss,irp_here) + + if (N_det_generators == 1) then + + pt2_w(1) = 1.d0 + pt2_cw(1) = 1.d0 + pt2_u_0 = 1.d0 + pt2_W_T = 0.d0 + pt2_n_0(1) = 0 + pt2_n_0(2) = 1 + + else + + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) + + tilde_cW(0) = 0d0 + + do i=1,N_det_generators + tilde_w(i) = psi_coef_sorted_tc_gen(i,pt2_stoch_istate)**2 !+ 1.d-20 + enddo + + double precision :: norm2 + norm2 = 0.d0 + do i=N_det_generators,1,-1 + norm2 += tilde_w(i) + enddo + + tilde_w(:) = tilde_w(:) / norm2 + + tilde_cW(0) = -1.d0 + do i=1,N_det_generators + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) + enddo + tilde_cW(:) = tilde_cW(:) + 1.d0 + + pt2_n_0(1) = 0 + do + pt2_u_0 = tilde_cW(pt2_n_0(1)) + r = tilde_cW(pt2_n_0(1) + pt2_minDetInFirstTeeth) + pt2_W_T = (1d0 - pt2_u_0) / dble(pt2_N_teeth) + if(pt2_W_T >= r - pt2_u_0) then + exit + end if + pt2_n_0(1) += 1 + if(N_det_generators - pt2_n_0(1) < pt2_minDetInFirstTeeth * pt2_N_teeth) then + print *, "teeth building failed" + stop -1 + end if + end do + + do t=2, pt2_N_teeth + r = pt2_u_0 + pt2_W_T * dble(t-1) + pt2_n_0(t) = pt2_find_sample(r, tilde_cW) + end do + pt2_n_0(pt2_N_teeth+1) = N_det_generators + + pt2_w(:pt2_n_0(1)) = tilde_w(:pt2_n_0(1)) + do t=1, pt2_N_teeth + tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t)) + if (tooth_width == 0.d0) then + tooth_width = sum(tilde_w(pt2_n_0(t):pt2_n_0(t+1))) + endif + ASSERT(tooth_width > 0.d0) + do i=pt2_n_0(t)+1, pt2_n_0(t+1) + pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width + end do + end do + + pt2_cW(0) = 0d0 + do i=1,N_det_generators + pt2_cW(i) = pt2_cW(i-1) + pt2_w(i) + end do + pt2_n_0(pt2_N_teeth+1) = N_det_generators + + endif +END_PROVIDER + + + + + diff --git a/src/cipsi_tc_bi_ortho/pt2_type.irp.f b/src/cipsi_tc_bi_ortho/pt2_type.irp.f new file mode 100644 index 00000000..ee90d421 --- /dev/null +++ b/src/cipsi_tc_bi_ortho/pt2_type.irp.f @@ -0,0 +1,128 @@ +subroutine pt2_alloc(pt2_data,N) + implicit none + use selection_types + type(pt2_type), intent(inout) :: pt2_data + integer, intent(in) :: N + integer :: k + + allocate(pt2_data % pt2(N) & + ,pt2_data % variance(N) & + ,pt2_data % rpt2(N) & + ,pt2_data % overlap(N,N) & + ) + + pt2_data % pt2(:) = 0.d0 + pt2_data % variance(:) = 0.d0 + pt2_data % rpt2(:) = 0.d0 + pt2_data % overlap(:,:) = 0.d0 + +end subroutine + +subroutine pt2_dealloc(pt2_data) + implicit none + use selection_types + type(pt2_type), intent(inout) :: pt2_data + deallocate(pt2_data % pt2 & + ,pt2_data % variance & + ,pt2_data % rpt2 & + ,pt2_data % overlap & + ) +end subroutine + +subroutine pt2_add(p1, w, p2) + implicit none + use selection_types + BEGIN_DOC +! p1 += w * p2 + END_DOC + type(pt2_type), intent(inout) :: p1 + double precision, intent(in) :: w + type(pt2_type), intent(in) :: p2 + + if (w == 1.d0) then + + p1 % pt2(:) = p1 % pt2(:) + p2 % pt2(:) + p1 % rpt2(:) = p1 % rpt2(:) + p2 % rpt2(:) + p1 % variance(:) = p1 % variance(:) + p2 % variance(:) + p1 % overlap(:,:) = p1 % overlap(:,:) + p2 % overlap(:,:) + + else + + p1 % pt2(:) = p1 % pt2(:) + w * p2 % pt2(:) + p1 % rpt2(:) = p1 % rpt2(:) + w * p2 % rpt2(:) + p1 % variance(:) = p1 % variance(:) + w * p2 % variance(:) + p1 % overlap(:,:) = p1 % overlap(:,:) + w * p2 % overlap(:,:) + + endif + +end subroutine + + +subroutine pt2_add2(p1, w, p2) + implicit none + use selection_types + BEGIN_DOC +! p1 += w * p2**2 + END_DOC + type(pt2_type), intent(inout) :: p1 + double precision, intent(in) :: w + type(pt2_type), intent(in) :: p2 + + if (w == 1.d0) then + + p1 % pt2(:) = p1 % pt2(:) + p2 % pt2(:) * p2 % pt2(:) + p1 % rpt2(:) = p1 % rpt2(:) + p2 % rpt2(:) * p2 % rpt2(:) + p1 % variance(:) = p1 % variance(:) + p2 % variance(:) * p2 % variance(:) + p1 % overlap(:,:) = p1 % overlap(:,:) + p2 % overlap(:,:) * p2 % overlap(:,:) + + else + + p1 % pt2(:) = p1 % pt2(:) + w * p2 % pt2(:) * p2 % pt2(:) + p1 % rpt2(:) = p1 % rpt2(:) + w * p2 % rpt2(:) * p2 % rpt2(:) + p1 % variance(:) = p1 % variance(:) + w * p2 % variance(:) * p2 % variance(:) + p1 % overlap(:,:) = p1 % overlap(:,:) + w * p2 % overlap(:,:) * p2 % overlap(:,:) + + endif + +end subroutine + + +subroutine pt2_serialize(pt2_data, n, x) + implicit none + use selection_types + type(pt2_type), intent(in) :: pt2_data + integer, intent(in) :: n + double precision, intent(out) :: x(*) + + integer :: i,k,n2 + + n2 = n*n + x(1:n) = pt2_data % pt2(1:n) + k=n + x(k+1:k+n) = pt2_data % rpt2(1:n) + k=k+n + x(k+1:k+n) = pt2_data % variance(1:n) + k=k+n + x(k+1:k+n2) = reshape(pt2_data % overlap(1:n,1:n), (/ n2 /)) + +end + +subroutine pt2_deserialize(pt2_data, n, x) + implicit none + use selection_types + type(pt2_type), intent(inout) :: pt2_data + integer, intent(in) :: n + double precision, intent(in) :: x(*) + + integer :: i,k,n2 + + n2 = n*n + pt2_data % pt2(1:n) = x(1:n) + k=n + pt2_data % rpt2(1:n) = x(k+1:k+n) + k=k+n + pt2_data % variance(1:n) = x(k+1:k+n) + k=k+n + pt2_data % overlap(1:n,1:n) = reshape(x(k+1:k+n2), (/ n, n /)) + +end diff --git a/src/cipsi_tc_bi_ortho/run_pt2_slave.irp.f b/src/cipsi_tc_bi_ortho/run_pt2_slave.irp.f new file mode 100644 index 00000000..aa6546e7 --- /dev/null +++ b/src/cipsi_tc_bi_ortho/run_pt2_slave.irp.f @@ -0,0 +1,549 @@ + use omp_lib + use selection_types + use f77_zmq +BEGIN_PROVIDER [ integer(omp_lock_kind), global_selection_buffer_lock ] + use omp_lib + implicit none + BEGIN_DOC + ! Global buffer for the OpenMP selection + END_DOC + call omp_init_lock(global_selection_buffer_lock) +END_PROVIDER + +BEGIN_PROVIDER [ type(selection_buffer), global_selection_buffer ] + use omp_lib + implicit none + BEGIN_DOC + ! Global buffer for the OpenMP selection + END_DOC + call omp_set_lock(global_selection_buffer_lock) + call delete_selection_buffer(global_selection_buffer) + call create_selection_buffer(N_det_generators, 2*N_det_generators, & + global_selection_buffer) + call omp_unset_lock(global_selection_buffer_lock) +END_PROVIDER + + +subroutine run_pt2_slave(thread,iproc,energy) + use selection_types + use f77_zmq + implicit none + + double precision, intent(in) :: energy(N_states_diag) + integer, intent(in) :: thread, iproc + if (N_det > 100000 ) then + call run_pt2_slave_large(thread,iproc,energy) + else + call run_pt2_slave_small(thread,iproc,energy) + endif +end + +subroutine run_pt2_slave_small(thread,iproc,energy) + use selection_types + use f77_zmq + implicit none + + double precision, intent(in) :: energy(N_states_diag) + integer, intent(in) :: thread, iproc + integer :: rc, i + + integer :: worker_id, ctask, ltask + character*(512), allocatable :: task(:) + integer, allocatable :: task_id(:) + + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_push_socket + integer(ZMQ_PTR) :: zmq_socket_push + + type(selection_buffer) :: b + logical :: done, buffer_ready + + type(pt2_type), allocatable :: pt2_data(:) + integer :: n_tasks, k, N + integer, allocatable :: i_generator(:), subset(:) + + double precision, external :: memory_of_double, memory_of_int + integer :: bsize ! Size of selection buffers + + allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max)) + allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + integer, external :: connect_to_taskserver + if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + return + endif + + zmq_socket_push = new_zmq_push_socket(thread) + + b%N = 0 + buffer_ready = .False. + n_tasks = 1 + + done = .False. + do while (.not.done) + + n_tasks = max(1,n_tasks) + n_tasks = min(pt2_n_tasks_max,n_tasks) + + integer, external :: get_tasks_from_taskserver + if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then + exit + endif + done = task_id(n_tasks) == 0 + if (done) then + n_tasks = n_tasks-1 + endif + if (n_tasks == 0) exit + + do k=1,n_tasks + call sscanf_ddd(task(k), subset(k), i_generator(k), N) + enddo + if (b%N == 0) then + ! Only first time + bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2) + call create_selection_buffer(bsize, bsize*2, b) + buffer_ready = .True. + else + ASSERT (b%N == bsize) + endif + + double precision :: time0, time1 + call wall_time(time0) + do k=1,n_tasks + call pt2_alloc(pt2_data(k),N_states) + b%cur = 0 + call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k))) + enddo + call wall_time(time1) + + integer, external :: tasks_done_to_taskserver + if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then + done = .true. + endif + call sort_selection_buffer(b) + call push_pt2_results(zmq_socket_push, i_generator, pt2_data, b, task_id, n_tasks) + do k=1,n_tasks + call pt2_dealloc(pt2_data(k)) + enddo + b%cur=0 + +! ! Try to adjust n_tasks around nproc/2 seconds per job + n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0))) + n_tasks = min(n_tasks, pt2_n_tasks_max) +! n_tasks = 1 + end do + + integer, external :: disconnect_from_taskserver + do i=1,300 + if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) /= -2) exit + call usleep(500) + print *, 'Retry disconnect...' + end do + + call end_zmq_push_socket(zmq_socket_push,thread) + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + if (buffer_ready) then + call delete_selection_buffer(b) + endif + deallocate(pt2_data) +end subroutine + + +subroutine run_pt2_slave_large(thread,iproc,energy) + use selection_types + use f77_zmq + implicit none + + double precision, intent(in) :: energy(N_states_diag) + integer, intent(in) :: thread, iproc + integer :: rc, i + + integer :: worker_id, ctask, ltask + character*(512) :: task + integer :: task_id(1) + + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_push_socket + integer(ZMQ_PTR) :: zmq_socket_push + + type(selection_buffer) :: b + logical :: done, buffer_ready + + type(pt2_type) :: pt2_data + integer :: n_tasks, k, N + integer :: i_generator, subset + + integer :: bsize ! Size of selection buffers + logical :: sending + double precision :: time_shift + + PROVIDE global_selection_buffer global_selection_buffer_lock + + call random_number(time_shift) + time_shift = time_shift*15.d0 + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + integer, external :: connect_to_taskserver + if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + return + endif + + zmq_socket_push = new_zmq_push_socket(thread) + + b%N = 0 + buffer_ready = .False. + n_tasks = 1 + + sending = .False. + done = .False. + double precision :: time0, time1 + call wall_time(time0) + time0 = time0+time_shift + do while (.not.done) + + integer, external :: get_tasks_from_taskserver + if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then + exit + endif + done = task_id(1) == 0 + if (done) then + n_tasks = n_tasks-1 + endif + if (n_tasks == 0) exit + + call sscanf_ddd(task, subset, i_generator, N) + if( pt2_F(i_generator) <= 0 .or. pt2_F(i_generator) > N_det ) then + print *, irp_here + stop 'bug in selection' + endif + if (b%N == 0) then + ! Only first time + bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2) + call create_selection_buffer(bsize, bsize*2, b) + buffer_ready = .True. + else + ASSERT (b%N == bsize) + endif + + call pt2_alloc(pt2_data,N_states) + b%cur = 0 + call select_connected(i_generator,energy,pt2_data,b,subset,pt2_F(i_generator)) + + integer, external :: tasks_done_to_taskserver + if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then + done = .true. + endif + call sort_selection_buffer(b) + + call wall_time(time1) +! if (time1-time0 > 15.d0) then + call omp_set_lock(global_selection_buffer_lock) + global_selection_buffer%mini = b%mini + call merge_selection_buffers(b,global_selection_buffer) + b%cur=0 + call omp_unset_lock(global_selection_buffer_lock) + call wall_time(time0) +! endif + + call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending) + if ( iproc == 1 .or. i_generator < 100 .or. done) then + call omp_set_lock(global_selection_buffer_lock) + call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), global_selection_buffer, (/task_id/), 1,sending) + global_selection_buffer%cur = 0 + call omp_unset_lock(global_selection_buffer_lock) + else + call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), b, (/task_id/), 1,sending) + endif + + call pt2_dealloc(pt2_data) + end do + call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending) + + integer, external :: disconnect_from_taskserver + do i=1,300 + if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) /= -2) exit + call sleep(1) + print *, 'Retry disconnect...' + end do + + call end_zmq_push_socket(zmq_socket_push,thread) + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + if (buffer_ready) then + call delete_selection_buffer(b) + endif + FREE global_selection_buffer +end subroutine + + +subroutine push_pt2_results(zmq_socket_push, index, pt2_data, b, task_id, n_tasks) + use selection_types + use f77_zmq + implicit none + + integer(ZMQ_PTR), intent(in) :: zmq_socket_push + type(pt2_type), intent(in) :: pt2_data(n_tasks) + integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks) + type(selection_buffer), intent(inout) :: b + + logical :: sending + sending = .False. + call push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending) + call push_pt2_results_async_recv(zmq_socket_push, b%mini, sending) +end subroutine + + +subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending) + use selection_types + use f77_zmq + implicit none + + integer(ZMQ_PTR), intent(in) :: zmq_socket_push + type(pt2_type), intent(in) :: pt2_data(n_tasks) + integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks) + type(selection_buffer), intent(inout) :: b + logical, intent(inout) :: sending + integer :: rc, i + integer*8 :: rc8 + double precision, allocatable :: pt2_serialized(:,:) + + if (sending) then + print *, irp_here, ': sending is true' + stop -1 + endif + sending = .True. + + rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE) + if (rc == -1) then + print *, irp_here, ': error sending result' + stop 1 + return + else if(rc /= 4) then + stop 'push' + endif + + + rc = f77_zmq_send( zmq_socket_push, index, 4*n_tasks, ZMQ_SNDMORE) + if (rc == -1) then + print *, irp_here, ': error sending result' + stop 2 + return + else if(rc /= 4*n_tasks) then + stop 'push' + endif + + + allocate(pt2_serialized (pt2_type_size(N_states),n_tasks) ) + do i=1,n_tasks + call pt2_serialize(pt2_data(i),N_states,pt2_serialized(1,i)) + enddo + + rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE) + deallocate(pt2_serialized) + if (rc == -1) then + print *, irp_here, ': error sending result' + stop 3 + return + else if(rc /= size(pt2_serialized)*8) then + stop 'push' + endif + + + rc = f77_zmq_send( zmq_socket_push, task_id, n_tasks*4, ZMQ_SNDMORE) + if (rc == -1) then + print *, irp_here, ': error sending result' + stop 6 + return + else if(rc /= 4*n_tasks) then + stop 'push' + endif + + + if (b%cur == 0) then + + rc = f77_zmq_send( zmq_socket_push, b%cur, 4, 0) + if (rc == -1) then + print *, irp_here, ': error sending result' + stop 7 + return + else if(rc /= 4) then + stop 'push' + endif + + else + + rc = f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE) + if (rc == -1) then + print *, irp_here, ': error sending result' + stop 7 + return + else if(rc /= 4) then + stop 'push' + endif + + + rc8 = f77_zmq_send8( zmq_socket_push, b%val, 8_8*int(b%cur,8), ZMQ_SNDMORE) + if (rc8 == -1_8) then + print *, irp_here, ': error sending result' + stop 8 + return + else if(rc8 /= 8_8*int(b%cur,8)) then + stop 'push' + endif + + + rc8 = f77_zmq_send8( zmq_socket_push, b%det, int(bit_kind*N_int*2,8)*int(b%cur,8), 0) + if (rc8 == -1_8) then + print *, irp_here, ': error sending result' + stop 9 + return + else if(rc8 /= int(N_int*2*8,8)*int(b%cur,8)) then + stop 'push' + endif + + endif + +end subroutine + +subroutine push_pt2_results_async_recv(zmq_socket_push,mini,sending) + use selection_types + use f77_zmq + implicit none + + integer(ZMQ_PTR), intent(in) :: zmq_socket_push + double precision, intent(out) :: mini + logical, intent(inout) :: sending + integer :: rc + + if (.not.sending) return + +! Activate is zmq_socket_push is a REQ +IRP_IF ZMQ_PUSH +IRP_ELSE + character*(2) :: ok + rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0) + if (rc == -1) then + print *, irp_here, ': error sending result' + stop 10 + return + else if ((rc /= 2).and.(ok(1:2) /= 'ok')) then + print *, irp_here//': error in receiving ok' + stop -1 + endif + rc = f77_zmq_recv( zmq_socket_push, mini, 8, 0) + if (rc == -1) then + print *, irp_here, ': error sending result' + stop 11 + return + else if (rc /= 8) then + print *, irp_here//': error in receiving mini' + stop 12 + endif +IRP_ENDIF + sending = .False. +end subroutine + + + +subroutine pull_pt2_results(zmq_socket_pull, index, pt2_data, task_id, n_tasks, b) + use selection_types + use f77_zmq + implicit none + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + type(pt2_type), intent(inout) :: pt2_data(*) + type(selection_buffer), intent(inout) :: b + integer, intent(out) :: index(*) + integer, intent(out) :: n_tasks, task_id(*) + integer :: rc, rn, i + integer*8 :: rc8 + double precision, allocatable :: pt2_serialized(:,:) + + rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0) + if (rc == -1) then + n_tasks = 1 + task_id(1) = 0 + else if(rc /= 4) then + stop 'pull' + endif + + rc = f77_zmq_recv( zmq_socket_pull, index, 4*n_tasks, 0) + if (rc == -1) then + n_tasks = 1 + task_id(1) = 0 + else if(rc /= 4*n_tasks) then + stop 'pull' + endif + + allocate(pt2_serialized (pt2_type_size(N_states),n_tasks) ) + rc = f77_zmq_recv( zmq_socket_pull, pt2_serialized, 8*size(pt2_serialized)*n_tasks, 0) + if (rc == -1) then + n_tasks = 1 + task_id(1) = 0 + else if(rc /= 8*size(pt2_serialized)) then + stop 'pull' + endif + + do i=1,n_tasks + call pt2_deserialize(pt2_data(i),N_states,pt2_serialized(1,i)) + enddo + deallocate(pt2_serialized) + + rc = f77_zmq_recv( zmq_socket_pull, task_id, n_tasks*4, 0) + if (rc == -1) then + n_tasks = 1 + task_id(1) = 0 + else if(rc /= 4*n_tasks) then + stop 'pull' + endif + + rc = f77_zmq_recv( zmq_socket_pull, b%cur, 4, 0) + if (rc == -1) then + n_tasks = 1 + task_id(1) = 0 + else if(rc /= 4) then + stop 'pull' + endif + + if (b%cur > 0) then + + rc8 = f77_zmq_recv8( zmq_socket_pull, b%val, 8_8*int(b%cur,8), 0) + if (rc8 == -1_8) then + n_tasks = 1 + task_id(1) = 0 + else if(rc8 /= 8_8*int(b%cur,8)) then + stop 'pull' + endif + + rc8 = f77_zmq_recv8( zmq_socket_pull, b%det, int(bit_kind*N_int*2,8)*int(b%cur,8), 0) + if (rc8 == -1_8) then + n_tasks = 1 + task_id(1) = 0 + else if(rc8 /= int(N_int*2*8,8)*int(b%cur,8)) then + stop 'pull' + endif + + endif + +! Activate is zmq_socket_pull is a REP +IRP_IF ZMQ_PUSH +IRP_ELSE + rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, ZMQ_SNDMORE) + if (rc == -1) then + n_tasks = 1 + task_id(1) = 0 + else if (rc /= 2) then + print *, irp_here//': error in sending ok' + stop -1 + endif + rc = f77_zmq_send( zmq_socket_pull, b%mini, 8, 0) +IRP_ENDIF + +end subroutine + diff --git a/src/cipsi_tc_bi_ortho/run_selection_slave.irp.f b/src/cipsi_tc_bi_ortho/run_selection_slave.irp.f new file mode 100644 index 00000000..e6b016fa --- /dev/null +++ b/src/cipsi_tc_bi_ortho/run_selection_slave.irp.f @@ -0,0 +1,255 @@ +subroutine run_selection_slave(thread, iproc, energy) + + use f77_zmq + use selection_types + + implicit none + + double precision, intent(in) :: energy(N_states) + integer, intent(in) :: thread, iproc + + integer :: rc, i + integer :: worker_id, task_id(1), ctask, ltask + character*(512) :: task + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_socket_push + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR), external :: new_zmq_push_socket + type(selection_buffer) :: buf, buf2 + type(pt2_type) :: pt2_data + logical :: done, buffer_ready + + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order N_int pt2_F pseudo_sym + PROVIDE psi_selectors_coef_transp psi_det_sorted_tc weight_selection + + call pt2_alloc(pt2_data,N_states) + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + integer, external :: connect_to_taskserver + if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + return + endif + + zmq_socket_push = new_zmq_push_socket(thread) + + buf%N = 0 + buffer_ready = .False. + ctask = 1 + + do + integer, external :: get_task_from_taskserver + if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id(ctask), task) == -1) then + exit + endif + done = task_id(ctask) == 0 + if (done) then + ctask = ctask - 1 + else + integer :: i_generator, N, subset, bsize + call sscanf_ddd(task, subset, i_generator, N) + if(buf%N == 0) then + ! Only first time + call create_selection_buffer(N, N*2, buf) + buffer_ready = .True. + else + if (N /= buf%N) then + print *, 'N=', N + print *, 'buf%N=', buf%N + print *, 'bug in ', irp_here + stop '-1' + end if + end if + call select_connected(i_generator, energy, pt2_data, buf,subset, pt2_F(i_generator)) + endif + + integer, external :: task_done_to_taskserver + + if(done .or. ctask == size(task_id)) then + do i=1, ctask + if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) == -1) then + call usleep(100) + if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) == -1) then + ctask = 0 + done = .true. + exit + endif + endif + end do + if(ctask > 0) then + call sort_selection_buffer(buf) +! call merge_selection_buffers(buf,buf2) + call push_selection_results(zmq_socket_push, pt2_data, buf, task_id(1), ctask) + call pt2_dealloc(pt2_data) + call pt2_alloc(pt2_data,N_states) +! buf%mini = buf2%mini + buf%cur = 0 + end if + ctask = 0 + end if + + if(done) exit + ctask = ctask + 1 + end do + + if(ctask > 0) then + call sort_selection_buffer(buf) +! call merge_selection_buffers(buf,buf2) + call push_selection_results(zmq_socket_push, pt2_data, buf, task_id(1), ctask) +! buf%mini = buf2%mini + buf%cur = 0 + end if + ctask = 0 + call pt2_dealloc(pt2_data) + + integer, external :: disconnect_from_taskserver + if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then + continue + endif + + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) + if (buffer_ready) then + call delete_selection_buffer(buf) +! call delete_selection_buffer(buf2) + endif +end subroutine + + +subroutine push_selection_results(zmq_socket_push, pt2_data, b, task_id, ntasks) + use f77_zmq + use selection_types + implicit none + + integer(ZMQ_PTR), intent(in) :: zmq_socket_push + type(pt2_type), intent(in) :: pt2_data + type(selection_buffer), intent(inout) :: b + integer, intent(in) :: ntasks, task_id(*) + integer :: rc + double precision, allocatable :: pt2_serialized(:) + + rc = f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE) + if(rc /= 4) then + print *, 'f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE)' + endif + + + allocate(pt2_serialized (pt2_type_size(N_states)) ) + call pt2_serialize(pt2_data,N_states,pt2_serialized) + + rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE) + if (rc == -1) then + print *, irp_here, ': error sending result' + stop 3 + return + else if(rc /= size(pt2_serialized)*8) then + stop 'push' + endif + deallocate(pt2_serialized) + + if (b%cur > 0) then + + rc = f77_zmq_send( zmq_socket_push, b%val(1), 8*b%cur, ZMQ_SNDMORE) + if(rc /= 8*b%cur) then + print *, 'f77_zmq_send( zmq_socket_push, b%val(1), 8*b%cur, ZMQ_SNDMORE)' + endif + + rc = f77_zmq_send( zmq_socket_push, b%det(1,1,1), bit_kind*N_int*2*b%cur, ZMQ_SNDMORE) + if(rc /= bit_kind*N_int*2*b%cur) then + print *, 'f77_zmq_send( zmq_socket_push, b%det(1,1,1), bit_kind*N_int*2*b%cur, ZMQ_SNDMORE)' + endif + + endif + + rc = f77_zmq_send( zmq_socket_push, ntasks, 4, ZMQ_SNDMORE) + if(rc /= 4) then + print *, 'f77_zmq_send( zmq_socket_push, ntasks, 4, ZMQ_SNDMORE)' + endif + + rc = f77_zmq_send( zmq_socket_push, task_id(1), ntasks*4, 0) + if(rc /= 4*ntasks) then + print *, 'f77_zmq_send( zmq_socket_push, task_id(1), ntasks*4, 0)' + endif + +! Activate is zmq_socket_push is a REQ +IRP_IF ZMQ_PUSH +IRP_ELSE + character*(2) :: ok + rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0) + if ((rc /= 2).and.(ok(1:2) /= 'ok')) then + print *, irp_here//': error in receiving ok' + stop -1 + endif +IRP_ENDIF + +end subroutine + + +subroutine pull_selection_results(zmq_socket_pull, pt2_data, val, det, N, task_id, ntasks) + use f77_zmq + use selection_types + implicit none + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + type(pt2_type), intent(inout) :: pt2_data + double precision, intent(out) :: val(*) + integer(bit_kind), intent(out) :: det(N_int, 2, *) + integer, intent(out) :: N, ntasks, task_id(*) + integer :: rc, rn, i + double precision, allocatable :: pt2_serialized(:) + + rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0) + if(rc /= 4) then + print *, 'f77_zmq_recv( zmq_socket_pull, N, 4, 0)' + endif + + allocate(pt2_serialized (pt2_type_size(N_states)) ) + rc = f77_zmq_recv( zmq_socket_pull, pt2_serialized, 8*size(pt2_serialized), 0) + if (rc == -1) then + ntasks = 1 + task_id(1) = 0 + else if(rc /= 8*size(pt2_serialized)) then + stop 'pull' + endif + + call pt2_deserialize(pt2_data,N_states,pt2_serialized) + deallocate(pt2_serialized) + + if (N>0) then + rc = f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0) + if(rc /= 8*N) then + print *, 'f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0)' + endif + + rc = f77_zmq_recv( zmq_socket_pull, det(1,1,1), bit_kind*N_int*2*N, 0) + if(rc /= bit_kind*N_int*2*N) then + print *, 'f77_zmq_recv( zmq_socket_pull, det(1,1,1), bit_kind*N_int*2*N, 0)' + endif + endif + + rc = f77_zmq_recv( zmq_socket_pull, ntasks, 4, 0) + if(rc /= 4) then + print *, 'f77_zmq_recv( zmq_socket_pull, ntasks, 4, 0)' + endif + + rc = f77_zmq_recv( zmq_socket_pull, task_id(1), ntasks*4, 0) + if(rc /= 4*ntasks) then + print *, 'f77_zmq_recv( zmq_socket_pull, task_id(1), ntasks*4, 0)' + endif + +! Activate is zmq_socket_pull is a REP +IRP_IF ZMQ_PUSH +IRP_ELSE + rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0) + if (rc /= 2) then + print *, irp_here//': error in sending ok' + stop -1 + endif +IRP_ENDIF +end subroutine + + + diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f new file mode 100644 index 00000000..6b93f663 --- /dev/null +++ b/src/cipsi_tc_bi_ortho/selection.irp.f @@ -0,0 +1,1029 @@ +use bitmasks + +! --- + +subroutine select_connected(i_generator, E0, pt2_data, b, subset, csubset) + + use bitmasks + use selection_types + + implicit none + integer, intent(in) :: i_generator, subset, csubset + double precision, intent(in) :: E0(N_states) + type(selection_buffer), intent(inout) :: b + type(pt2_type), intent(inout) :: pt2_data + + integer :: k, l + integer(bit_kind) :: hole_mask(N_int,2), particle_mask(N_int,2) + double precision, allocatable :: fock_diag_tmp(:,:) + + allocate(fock_diag_tmp(2,mo_num+1)) + + call build_fock_tmp(fock_diag_tmp, psi_det_generators(1,1,i_generator), N_int) + + do k = 1, N_int + hole_mask(k,1) = iand(generators_bitmask(k,1,s_hole), psi_det_generators(k,1,i_generator)) + hole_mask(k,2) = iand(generators_bitmask(k,2,s_hole), psi_det_generators(k,2,i_generator)) + particle_mask(k,1) = iand(generators_bitmask(k,1,s_part), not(psi_det_generators(k,1,i_generator)) ) + particle_mask(k,2) = iand(generators_bitmask(k,2,s_part), not(psi_det_generators(k,2,i_generator)) ) + enddo + call select_singles_and_doubles(i_generator, hole_mask, particle_mask, fock_diag_tmp, E0, pt2_data, b, subset, csubset) + + deallocate(fock_diag_tmp) + +end subroutine select_connected + +! --- + +subroutine select_singles_and_doubles(i_generator, hole_mask,particle_mask, fock_diag_tmp, E0, pt2_data, buf, subset, csubset) + + BEGIN_DOC + ! WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted_tc + END_DOC + + use bitmasks + use selection_types + implicit none + + integer, intent(in) :: i_generator, subset, csubset + integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) + double precision, intent(in) :: fock_diag_tmp(mo_num) + double precision, intent(in) :: E0(N_states) + type(pt2_type), intent(inout) :: pt2_data + type(selection_buffer), intent(inout) :: buf + + double precision, parameter :: norm_thr = 1.d-16 + + integer :: h1, h2, s1, s2, s3, i1, i2, ib, sp, k, i, j, nt, ii, sze + integer :: maskInd + integer :: N_holes(2), N_particles(2) + integer :: hole_list(N_int*bit_kind_size,2) + integer :: particle_list(N_int*bit_kind_size,2) + integer :: l_a, nmax, idx + integer :: nb_count, maskInd_save + integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2), pmask(N_int, 2) + integer(bit_kind) :: mobMask(N_int, 2), negMask(N_int, 2) + logical :: fullMatch, ok + logical :: monoAdo, monoBdo + logical :: monoBdo_save + logical :: found + + integer, allocatable :: preinteresting(:), prefullinteresting(:) + integer, allocatable :: interesting(:), fullinteresting(:) + integer, allocatable :: tmp_array(:) + integer, allocatable :: indices(:), exc_degree(:), iorder(:) + integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) + logical, allocatable :: banned(:,:,:), bannedOrb(:,:) + double precision, allocatable :: coef_fullminilist_rev(:,:) + double precision, allocatable :: mat(:,:,:), mat_p(:,:,:), mat_m(:,:,:) + + + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp + PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp + + PROVIDE banned_excitation + + monoAdo = .true. + monoBdo = .true. + + do k = 1, N_int + hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) + hole (k,2) = iand(psi_det_generators(k,2,i_generator), hole_mask(k,2)) + particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), particle_mask(k,1)) + particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), particle_mask(k,2)) + enddo + + call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) + call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) + + allocate( indices(N_det), exc_degree( max(N_det_alpha_unique, N_det_beta_unique) ) ) + + ! Pre-compute excitation degrees wrt alpha determinants + k = 1 + do i = 1, N_det_alpha_unique + call get_excitation_degree_spin(psi_det_alpha_unique(1,i), psi_det_generators(1,1,i_generator), exc_degree(i), N_int) + enddo + + ! Iterate on 0SD beta, and find alphas 0SDTQ such that exc_degree <= 4 + do j = 1, N_det_beta_unique + call get_excitation_degree_spin(psi_det_beta_unique(1,j), psi_det_generators(1,2,i_generator), nt, N_int) + if (nt > 2) cycle + do l_a = psi_bilinear_matrix_columns_loc(j), psi_bilinear_matrix_columns_loc(j+1)-1 + i = psi_bilinear_matrix_rows(l_a) + if(nt + exc_degree(i) <= 4) then + idx = psi_det_sorted_tc_order(psi_bilinear_matrix_order(l_a)) + if (psi_average_norm_contrib_sorted_tc(idx) > norm_thr) then + indices(k) = idx + k = k + 1 + endif + endif + enddo + enddo + + ! Pre-compute excitation degrees wrt beta determinants + do i = 1, N_det_beta_unique + call get_excitation_degree_spin(psi_det_beta_unique(1,i), psi_det_generators(1,2,i_generator), exc_degree(i), N_int) + enddo + + ! Iterate on 0S alpha, and find betas TQ such that exc_degree <= 4 + ! Remove also contributions < 1.d-20) + do j = 1, N_det_alpha_unique + call get_excitation_degree_spin(psi_det_alpha_unique(1,j), psi_det_generators(1,1,i_generator), nt, N_int) + if (nt > 1) cycle + do l_a = psi_bilinear_matrix_transp_rows_loc(j), psi_bilinear_matrix_transp_rows_loc(j+1)-1 + i = psi_bilinear_matrix_transp_columns(l_a) + if(exc_degree(i) < 3) cycle + if(nt + exc_degree(i) <= 4) then + idx = psi_det_sorted_tc_order( & + psi_bilinear_matrix_order( & + psi_bilinear_matrix_transp_order(l_a))) + if(psi_average_norm_contrib_sorted_tc(idx) > norm_thr) then + indices(k) = idx + k = k + 1 + endif + endif + enddo + enddo + + deallocate(exc_degree) + nmax = k - 1 + + call isort_noidx(indices,nmax) + + ! Start with 32 elements. Size will double along with the filtering. + allocate(preinteresting(0:32), prefullinteresting(0:32), interesting(0:32), fullinteresting(0:32)) + preinteresting(:) = 0 + prefullinteresting(:) = 0 + + do i = 1, N_int + negMask(i,1) = not(psi_det_generators(i,1,i_generator)) + negMask(i,2) = not(psi_det_generators(i,2,i_generator)) + enddo + + do k = 1, nmax + + i = indices(k) + mobMask(1,1) = iand(negMask(1,1), psi_det_sorted_tc(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), psi_det_sorted_tc(1,2,i)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + do j = 2, N_int + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted_tc(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted_tc(j,2,i)) + nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + enddo + + if(nt <= 4) then + if(i <= N_det_selectors) then + sze = preinteresting(0) + if(sze+1 == size(preinteresting)) then + allocate(tmp_array(0:sze)) + tmp_array(0:sze) = preinteresting(0:sze) + deallocate(preinteresting) + allocate(preinteresting(0:2*sze)) + preinteresting(0:sze) = tmp_array(0:sze) + deallocate(tmp_array) + endif + preinteresting(0) = sze+1 + preinteresting(sze+1) = i + elseif(nt <= 2) then + sze = prefullinteresting(0) + if(sze+1 == size(prefullinteresting)) then + allocate (tmp_array(0:sze)) + tmp_array(0:sze) = prefullinteresting(0:sze) + deallocate(prefullinteresting) + allocate(prefullinteresting(0:2*sze)) + prefullinteresting(0:sze) = tmp_array(0:sze) + deallocate(tmp_array) + endif + prefullinteresting(0) = sze+1 + prefullinteresting(sze+1) = i + endif + endif + + enddo + deallocate(indices) + + allocate( banned(mo_num, mo_num,2), bannedOrb(mo_num, 2) ) + allocate( mat(N_states, mo_num, mo_num) ) + allocate( mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num) ) + maskInd = -1 + + do s1 = 1, 2 + do i1 = N_holes(s1), 1, -1 ! Generate low excitations first + + found = .False. + monoBdo_save = monoBdo + maskInd_save = maskInd + do s2 = s1, 2 + ib = 1 + if(s1 == s2) ib = i1+1 + do i2 = N_holes(s2), ib, -1 + maskInd = maskInd + 1 + if(mod(maskInd, csubset) == (subset-1)) then + found = .True. + end if + enddo + if(s1 /= s2) monoBdo = .false. + enddo + + if (.not.found) cycle + monoBdo = monoBdo_save + maskInd = maskInd_save + + h1 = hole_list(i1,s1) + call apply_hole(psi_det_generators(1,1,i_generator), s1, h1, pmask, ok, N_int) + + negMask = not(pmask) + + interesting(0) = 0 + fullinteresting(0) = 0 + + do ii = 1, preinteresting(0) + i = preinteresting(ii) + select case(N_int) + case(1) + mobMask(1,1) = iand(negMask(1,1), psi_det_sorted_tc(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), psi_det_sorted_tc(1,2,i)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + case(2) + mobMask(1:2,1) = iand(negMask(1:2,1), psi_det_sorted_tc(1:2,1,i)) + mobMask(1:2,2) = iand(negMask(1:2,2), psi_det_sorted_tc(1:2,2,i)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + & + popcnt(mobMask(2, 1)) + popcnt(mobMask(2, 2)) + case(3) + mobMask(1:3,1) = iand(negMask(1:3,1), psi_det_sorted_tc(1:3,1,i)) + mobMask(1:3,2) = iand(negMask(1:3,2), psi_det_sorted_tc(1:3,2,i)) + nt = 0 + do j = 3, 1, -1 + if (mobMask(j,1) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 1)) + if (nt > 4) exit + endif + if (mobMask(j,2) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 2)) + if (nt > 4) exit + endif + enddo + case(4) + mobMask(1:4,1) = iand(negMask(1:4,1), psi_det_sorted_tc(1:4,1,i)) + mobMask(1:4,2) = iand(negMask(1:4,2), psi_det_sorted_tc(1:4,2,i)) + nt = 0 + do j = 4, 1, -1 + if (mobMask(j,1) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 1)) + if (nt > 4) exit + endif + if (mobMask(j,2) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 2)) + if (nt > 4) exit + endif + enddo + case default + mobMask(1:N_int,1) = iand(negMask(1:N_int,1), psi_det_sorted_tc(1:N_int,1,i)) + mobMask(1:N_int,2) = iand(negMask(1:N_int,2), psi_det_sorted_tc(1:N_int,2,i)) + nt = 0 + do j = N_int, 1, -1 + if (mobMask(j,1) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 1)) + if (nt > 4) exit + endif + if (mobMask(j,2) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 2)) + if (nt > 4) exit + endif + enddo + end select + + if(nt <= 4) then + sze = interesting(0) + if(sze+1 == size(interesting)) then + allocate (tmp_array(0:sze)) + tmp_array(0:sze) = interesting(0:sze) + deallocate(interesting) + allocate(interesting(0:2*sze)) + interesting(0:sze) = tmp_array(0:sze) + deallocate(tmp_array) + endif + interesting(0) = sze+1 + interesting(sze+1) = i + if(nt <= 2) then + sze = fullinteresting(0) + if(sze+1 == size(fullinteresting)) then + allocate (tmp_array(0:sze)) + tmp_array(0:sze) = fullinteresting(0:sze) + deallocate(fullinteresting) + allocate(fullinteresting(0:2*sze)) + fullinteresting(0:sze) = tmp_array(0:sze) + deallocate(tmp_array) + endif + fullinteresting(0) = sze+1 + fullinteresting(sze+1) = i + endif + endif + + enddo + + do ii = 1, prefullinteresting(0) + i = prefullinteresting(ii) + nt = 0 + mobMask(1,1) = iand(negMask(1,1), psi_det_sorted_tc(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), psi_det_sorted_tc(1,2,i)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + if (nt > 2) cycle + do j=N_int,2,-1 + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted_tc(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted_tc(j,2,i)) + nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + if (nt > 2) exit + end do + + if(nt <= 2) then + sze = fullinteresting(0) + if (sze+1 == size(fullinteresting)) then + allocate (tmp_array(0:sze)) + tmp_array(0:sze) = fullinteresting(0:sze) + deallocate(fullinteresting) + allocate(fullinteresting(0:2*sze)) + fullinteresting(0:sze) = tmp_array(0:sze) + deallocate(tmp_array) + endif + fullinteresting(0) = sze+1 + fullinteresting(sze+1) = i + endif + enddo + + allocate( fullminilist (N_int, 2, fullinteresting(0)), & + minilist (N_int, 2, interesting(0)) ) + + do i = 1, fullinteresting(0) + do k = 1, N_int + fullminilist(k,1,i) = psi_det_sorted_tc(k,1,fullinteresting(i)) + fullminilist(k,2,i) = psi_det_sorted_tc(k,2,fullinteresting(i)) + enddo + enddo + + do i = 1, interesting(0) + do k = 1, N_int + minilist(k,1,i) = psi_det_sorted_tc(k,1,interesting(i)) + minilist(k,2,i) = psi_det_sorted_tc(k,2,interesting(i)) + enddo + enddo + + do s2 = s1, 2 + sp = s1 + + if(s1 /= s2) sp = 3 + + ib = 1 + if(s1 == s2) ib = i1+1 + monoAdo = .true. + do i2 = N_holes(s2), ib, -1 ! Generate low excitations first + + h2 = hole_list(i2,s2) + call apply_hole(pmask, s2,h2, mask, ok, N_int) + banned(:,:,1) = banned_excitation(:,:) + banned(:,:,2) = banned_excitation(:,:) + do j = 1, mo_num + bannedOrb(j, 1) = .true. + bannedOrb(j, 2) = .true. + enddo + do s3 = 1, 2 + do i = 1, N_particles(s3) + bannedOrb(particle_list(i,s3), s3) = .false. + enddo + enddo + if(s1 /= s2) then + if(monoBdo) then + bannedOrb(h1,s1) = .false. + endif + if(monoAdo) then + bannedOrb(h2,s2) = .false. + monoAdo = .false. + endif + endif + + maskInd = maskInd + 1 + if(mod(maskInd, csubset) == (subset-1)) then + + call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) + if(fullMatch) cycle + + call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting, mat_p, mat_m) + + call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf, mat_p, mat_m) + endif + + enddo + + if(s1 /= s2) monoBdo = .false. + enddo + + deallocate(fullminilist, minilist) + + enddo + enddo + + deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) + deallocate(banned, bannedOrb,mat) + deallocate(mat_p, mat_m) + +end subroutine select_singles_and_doubles + +! --- + +subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) + + use bitmasks + implicit none + + BEGIN_DOC + ! Identify the determinants in det which are in the internal space. These are + ! the determinants that can be produced by creating two particles on the mask. + END_DOC + + integer, intent(in) :: i_gen, N + integer, intent(in) :: interesting(0:N) + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) + logical, intent(inout) :: banned(mo_num, mo_num) + logical, intent(out) :: fullMatch + + integer :: i, j, na, nb, list(3) + integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2) + + fullMatch = .false. + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + genl : do i=1, N + ! If det(i) can't be generated by the mask, cycle + do j=1, N_int + if(iand(det(j,1,i), mask(j,1)) /= mask(j, 1)) cycle genl + if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl + end do + + ! If det(i) < det(i_gen), it hs already been considered + if(interesting(i) < i_gen) then + fullMatch = .true. + return + end if + + ! Identify the particles + do j=1, N_int + myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1)) + myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2)) + end do + + call bitstring_to_list_in_selection(myMask(1,1), list(1), na, N_int) + call bitstring_to_list_in_selection(myMask(1,2), list(na+1), nb, N_int) + banned(list(1), list(2)) = .true. + end do genl + +end subroutine spot_isinwf + +! --- + +subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting, mat_p, mat_m) + + BEGIN_DOC + ! Computes the contributions A(r,s) by + ! comparing the external determinant to all the internal determinants det(i). + ! an applying two particles (r,s) to the mask. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: sp, i_gen, N_sel + integer, intent(in) :: interesting(0:N_sel) + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N_sel) + logical, intent(inout) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num, 2) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num) + + integer :: i, ii, j, k, l, h(0:2,2), p(0:4,2), nt + integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) + integer(bit_kind) :: phasemask(N_int,2) + + + PROVIDE psi_selectors_coef_transp psi_det_sorted_tc + PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp + + + mat = 0d0 + mat_p = 0d0 + mat_m = 0d0 + + do i = 1, N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + do i = 1, N_sel + if(interesting(i) < 0) then + stop 'prefetch interesting(i) and det(i)' + endif + + mobMask(1,1) = iand(negMask(1,1), det(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), det(1,2,i)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + + if(nt > 4) cycle + + do j = 2, N_int + mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) + nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + enddo + + if(nt > 4) cycle + + if (interesting(i) == i_gen) then + if(sp == 3) then + do k = 1, mo_num + do j = 1, mo_num + banned(j,k,2) = banned(k,j,1) + enddo + enddo + else + do k = 1, mo_num + do l = k+1, mo_num + banned(l,k,1) = banned(k,l,1) + enddo + enddo + endif + endif + + if (interesting(i) >= i_gen) then + + call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) + call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) + + call get_d3_h ( det(1,1,i), bannedOrb, banned, mat , mask, p, sp, psi_selectors_coef_transp (1, interesting(i)) ) + call get_d3_htc( det(1,1,i), bannedOrb, banned, mat_m, mat_p, mask, p, sp, psi_selectors_rcoef_bi_orth_transp(1, interesting(i)) & + , psi_selectors_lcoef_bi_orth_transp(1, interesting(i)) ) + + !perMask(1,1) = iand(mask(1,1), not(det(1,1,i))) + !perMask(1,2) = iand(mask(1,2), not(det(1,2,i))) + !do j=2,N_int + ! perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) + ! perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) + !end do + !call bitstring_to_list_in_selection(perMask(1,1), h(1,1), h(0,1), N_int) + !call bitstring_to_list_in_selection(perMask(1,2), h(1,2), h(0,2), N_int) + !call get_mask_phase(psi_det_sorted_tc(1,1,interesting(i)), phasemask,N_int) + !if(nt == 4) then + ! call get_d2 (det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + ! call get_pm2(det(1,1,i), phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + !elseif(nt == 3) then + ! call get_d1 (det(1,1,i), phasemask, bannedOrb, banned, mat , mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + ! call get_pm1(det(1,1,i), phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + !else + ! call get_d0 (det(1,1,i), phasemask, bannedOrb, banned, mat , mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + ! call get_pm0(det(1,1,i), phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + !endif + elseif(nt == 4) then + call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) + call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) + call past_d2(banned, p, sp) + elseif(nt == 3) then + call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) + call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) + call past_d1(bannedOrb, p) + endif + enddo + +end subroutine splash_pq + +! --- + +subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf, mat_p, mat_m) + + use bitmasks + use selection_types + implicit none + + integer, intent(in) :: i_generator, sp, h1, h2 + double precision, intent(in) :: mat(N_states, mo_num, mo_num) + double precision, intent(in) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num) + double precision, intent(in) :: fock_diag_tmp(mo_num) + double precision, intent(in) :: E0(N_states) + type(pt2_type), intent(inout) :: pt2_data + type(selection_buffer), intent(inout) :: buf + + integer :: iii, s, degree + integer :: s1, s2, p1, p2, ib, j, istate, jstate + integer :: info, k , iwork(N_states+1) + integer(bit_kind) :: occ(N_int,2), n + integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) + logical :: do_cycle, ok, do_diag + double precision :: delta_E, val, Hii, w, tmp, alpha_h_psi + double precision :: E_shift + double precision :: i_h_alpha, alpha_h_i, psi_h_alpha + double precision :: e_pert(N_states), coef(N_states) + double precision :: s_weight(N_states,N_states) + double precision :: eigvalues(N_states+1) + double precision :: work(1+6*(N_states+1)+2*(N_states+1)**2) + + integer, external :: number_of_holes, number_of_particles + logical, external :: is_a_two_holes_two_particles + logical, external :: is_a_1h1p + double precision, external :: diag_H_mat_elem_fock + + + PROVIDE dominant_dets_of_cfgs N_dominant_dets_of_cfgs + + do jstate = 1, N_states + do istate = 1, N_states + s_weight(istate,jstate) = dsqrt(selection_weight(istate)*selection_weight(jstate)) + enddo + enddo + + if(sp == 3) then + s1 = 1 + s2 = 2 + else + s1 = sp + s2 = sp + end if + call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int) + E_shift = 0.d0 + + if (h0_type == 'CFG') then + j = det_to_configuration(i_generator) + E_shift = psi_det_Hii(i_generator) - psi_configuration_Hii(j) + endif + + do p1 = 1, mo_num + + if(bannedOrb(p1, s1)) cycle + ib = 1 + if(sp /= 3) ib = p1+1 + + do p2 = ib, mo_num + + if(bannedOrb(p2, s2)) cycle + if(banned(p1,p2)) cycle + + ! TODO ?? + !if(pseudo_sym)then + ! if(dabs(mat(1, p1, p2)).lt.thresh_sym)then + ! w = 0.d0 + ! endif + !endif + + ! MANU: ERREUR dans les calculs puisque < I | H | J > = 0 + ! n'implique pas < I | H_TC | J > = 0 ?? + !val = maxval(abs(mat(1:N_states, p1, p2))) + !if( val == 0d0) cycle + + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + + if(do_only_cas) then + if( number_of_particles(det) > 0 ) cycle + if( number_of_holes(det) > 0 ) cycle + endif + + if(do_ddci) then + if(is_a_two_holes_two_particles(det)) cycle + endif + + if(do_only_1h1p) then + if(.not.is_a_1h1p(det)) cycle + endif + + if(seniority_max >= 0) then + s = 0 + do k = 1, N_int + s = s + popcnt(ieor(det(k,1),det(k,2))) + enddo + if (s > seniority_max) cycle + endif + + if(excitation_max >= 0) then + do_cycle = .True. + if(excitation_ref == 1) then + call get_excitation_degree(HF_bitmask, det(1,1), degree, N_int) + do_cycle = do_cycle .and. (degree > excitation_max) + elseif(excitation_ref == 2) then + do k = 1, N_dominant_dets_of_cfgs + call get_excitation_degree(dominant_dets_of_cfgs(1,1,k), det(1,1), degree, N_int) + do_cycle = do_cycle .and. (degree > excitation_max) + enddo + endif + if(do_cycle) cycle + endif + + if(excitation_alpha_max >= 0) then + do_cycle = .True. + if(excitation_ref == 1) then + call get_excitation_degree_spin(HF_bitmask, det(1,1), degree, N_int) + do_cycle = do_cycle .and. (degree > excitation_max) + elseif (excitation_ref == 2) then + do k = 1, N_dominant_dets_of_cfgs + call get_excitation_degree_spin(dominant_dets_of_cfgs(1,1,k), det(1,1), degree, N_int) + do_cycle = do_cycle .and. (degree > excitation_alpha_max) + enddo + endif + if(do_cycle) cycle + endif + + if(excitation_beta_max >= 0) then + do_cycle = .True. + if(excitation_ref == 1) then + call get_excitation_degree_spin(HF_bitmask, det(1,2), degree, N_int) + do_cycle = do_cycle .and. (degree > excitation_max) + elseif(excitation_ref == 2) then + do k = 1, N_dominant_dets_of_cfgs + call get_excitation_degree(dominant_dets_of_cfgs(1,2,k), det(1,2), degree, N_int) + do_cycle = do_cycle .and. (degree > excitation_beta_max) + enddo + endif + if(do_cycle) cycle + endif + + + w = 0.d0 + + e_pert = 0.d0 + coef = 0.d0 + do_diag = .False. + + ! psi_det_generators --> |i> of psi_0 + ! psi_coef_generators --> c_i of psi_0 + ! + ! = \sum_i c_i + + ! ------------------------------------------- + ! Non hermitian + ! c_alpha = /delta_E(alpha) + ! e_alpha = c_alpha * + ! and + ! and transpose + ! ------------------------------------------- + + istate = 1 + call htilde_mu_mat_bi_ortho_tot(det, det, N_int, Hii) + delta_E = E0(istate) - Hii + E_shift + !delta_E = 1.d0 + +! call get_excitation_degree( HF_bitmask, det, degree, N_int) + +! psi_h_alpha = mat_m(istate, p1, p2) +! alpha_h_psi = mat_p(istate, p1, p2) +! + psi_h_alpha = 0.d0 + alpha_h_psi = 0.d0 + do iii = 1, N_det + call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,iii), det, N_int, i_h_alpha) + call htilde_mu_mat_bi_ortho_tot(det, psi_det(1,1,iii), N_int, alpha_h_i) + psi_h_alpha += i_h_alpha * leigvec_tc_bi_orth(iii,1) + alpha_h_psi += alpha_h_i * reigvec_tc_bi_orth(iii,1) + enddo + + !if(alpha_h_psi*psi_h_alpha/delta_E.gt.1.d-10)then + ! print*, 'E0,Hii,E_shift' + ! print*, E0(istate), Hii, E_shift + ! print*, psi_h_alpha, alpha_h_psi, delta_E + ! print*, psi_h_alpha * alpha_h_psi / delta_E + ! !if(Hii .lt. E0(istate)) then + ! ! call debug_det(det, N_int) + ! ! print*, ' |E0| < |Hii| !!!' + ! ! print*, ' E0 = ', E0(istate) + ! ! print*, ' Hii = ', Hii + ! !endif + !endif + + coef(istate) = alpha_h_psi / delta_E + e_pert(istate) = coef(istate) * psi_h_alpha + if(selection_tc == 1 )then + if(e_pert(istate).lt.0.d0)then + e_pert(istate) = 0.d0 + endif + else if(selection_tc == -1)then + if(e_pert(istate).gt.0.d0)then + e_pert(istate) = 0.d0 + endif + endif + + + !if(e_pert(istate) .gt. 1.d-15) then + ! print*, 'E0,Hii,E_shift' + ! print*, E0(istate), Hii, E_shift + ! print*, psi_h_alpha, alpha_h_psi, delta_E + ! print*, psi_h_alpha*alpha_h_psi/delta_E + !endif + +! elseif(cipsi_tc == "h_tc_2x2") then + + + do_diag = sum(dabs(coef)) > 0.001d0 .and. N_states > 1 + + do istate = 1, N_states + + alpha_h_psi = mat(istate, p1, p2) + + pt2_data % overlap(:,istate) = pt2_data % overlap(:,istate) + coef(:) * coef(istate) + pt2_data % variance(istate) = pt2_data % variance(istate) + dabs(e_pert(istate)) + pt2_data % pt2(istate) = pt2_data % pt2(istate) + e_pert(istate) + + select case (weight_selection) + case(5) + ! Variance selection + if (h0_type == 'CFG') then + w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) & + / c0_weight(istate) + else + w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) + endif + case(6) + if (h0_type == 'CFG') then + w = min(w,- coef(istate) * coef(istate) * s_weight(istate,istate)) & + / c0_weight(istate) + else + w = min(w,- coef(istate) * coef(istate) * s_weight(istate,istate)) + endif + case default + ! Energy selection + if (h0_type == 'CFG') then + !w = min(w, e_pert(istate) * s_weight(istate,istate)) / c0_weight(istate) + w = min(w, -dabs(e_pert(istate)) * s_weight(istate,istate)) / c0_weight(istate) + else + !w = min(w, e_pert(istate) * s_weight(istate,istate)) + w = min(w, -dabs( e_pert(istate) ) * s_weight(istate,istate)) + endif + endselect + enddo + + if(h0_type == 'CFG') then + do k = 1, N_int + occ(k,1) = ieor(det(k,1), det(k,2)) + occ(k,2) = iand(det(k,1), det(k,2)) + enddo + call configuration_to_dets_size(occ, n, elec_alpha_num, N_int) + n = max(n,1) + w *= dsqrt(dble(n)) + endif + + if(w <= buf%mini) then + call add_to_selection_buffer(buf, det, w) + endif + + enddo ! end do p2 + enddo ! end do p1 + +end subroutine fill_buffer_double + +! --- + +subroutine get_mask_phase(det1, pm, Nint) + + use bitmasks + implicit none + + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2) + integer(bit_kind), intent(out) :: pm(Nint,2) + integer(bit_kind) :: tmp1, tmp2 + integer :: i + tmp1 = 0_8 + tmp2 = 0_8 + select case (Nint) + +BEGIN_TEMPLATE + case ($Nint) + do i=1,$Nint + pm(i,1) = ieor(det1(i,1), shiftl(det1(i,1), 1)) + pm(i,2) = ieor(det1(i,2), shiftl(det1(i,2), 1)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 2)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 2)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 4)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 4)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 8)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 8)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 16)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 16)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 32)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 32)) + pm(i,1) = ieor(pm(i,1), tmp1) + pm(i,2) = ieor(pm(i,2), tmp2) + if(iand(popcnt(det1(i,1)), 1) == 1) tmp1 = not(tmp1) + if(iand(popcnt(det1(i,2)), 1) == 1) tmp2 = not(tmp2) + end do +SUBST [ Nint ] +1;; +2;; +3;; +4;; +END_TEMPLATE + case default + do i=1,Nint + pm(i,1) = ieor(det1(i,1), shiftl(det1(i,1), 1)) + pm(i,2) = ieor(det1(i,2), shiftl(det1(i,2), 1)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 2)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 2)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 4)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 4)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 8)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 8)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 16)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 16)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 32)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 32)) + pm(i,1) = ieor(pm(i,1), tmp1) + pm(i,2) = ieor(pm(i,2), tmp2) + if(iand(popcnt(det1(i,1)), 1) == 1) tmp1 = not(tmp1) + if(iand(popcnt(det1(i,2)), 1) == 1) tmp2 = not(tmp2) + end do + end select + +end subroutine get_mask_phase + +! --- + +subroutine past_d1(bannedOrb, p) + + use bitmasks + implicit none + + logical, intent(inout) :: bannedOrb(mo_num, 2) + integer, intent(in) :: p(0:4, 2) + integer :: i,s + + do s = 1, 2 + do i = 1, p(0, s) + bannedOrb(p(i, s), s) = .true. + end do + end do + +end subroutine past_d1 + +! --- + +subroutine past_d2(banned, p, sp) + + use bitmasks + implicit none + + logical, intent(inout) :: banned(mo_num, mo_num) + integer, intent(in) :: p(0:4, 2), sp + integer :: i,j + + if(sp == 3) then + do j=1,p(0,2) + do i=1,p(0,1) + banned(p(i,1), p(j,2)) = .true. + end do + end do + else + do i=1,p(0, sp) + do j=1,i-1 + banned(p(j,sp), p(i,sp)) = .true. + banned(p(i,sp), p(j,sp)) = .true. + end do + end do + end if + +end subroutine past_d2 + +! --- + +subroutine bitstring_to_list_in_selection( string, list, n_elements, Nint) + + BEGIN_DOC + ! Gives the inidices(+1) of the bits set to 1 in the bit string + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: string(Nint) + integer, intent(out) :: list(Nint*bit_kind_size) + integer, intent(out) :: n_elements + + integer :: i, ishift + integer(bit_kind) :: l + + n_elements = 0 + ishift = 2 + do i=1,Nint + l = string(i) + do while (l /= 0_bit_kind) + n_elements = n_elements+1 + list(n_elements) = ishift+popcnt(l-1_bit_kind) - popcnt(l) + l = iand(l,l-1_bit_kind) + enddo + ishift = ishift + bit_kind_size + enddo + +end subroutine bitstring_to_list_in_selection + +! --- + diff --git a/src/cipsi_tc_bi_ortho/selection_buffer.irp.f b/src/cipsi_tc_bi_ortho/selection_buffer.irp.f new file mode 100644 index 00000000..10132086 --- /dev/null +++ b/src/cipsi_tc_bi_ortho/selection_buffer.irp.f @@ -0,0 +1,416 @@ + +subroutine create_selection_buffer(N, size_in, res) + use selection_types + implicit none + BEGIN_DOC +! Allocates the memory for a selection buffer. +! The arrays have dimension size_in and the maximum number of elements is N + END_DOC + + integer, intent(in) :: N, size_in + type(selection_buffer), intent(out) :: res + + integer :: siz + siz = max(size_in,1) + + double precision :: rss + double precision, external :: memory_of_double + rss = memory_of_double(siz)*(N_int*2+1) + call check_mem(rss,irp_here) + + allocate(res%det(N_int, 2, siz), res%val(siz)) + + res%val(:) = 0d0 + res%det(:,:,:) = 0_8 + res%N = N + res%mini = 0d0 + res%cur = 0 +end subroutine + +subroutine delete_selection_buffer(b) + use selection_types + implicit none + type(selection_buffer), intent(inout) :: b + if (associated(b%det)) then + deallocate(b%det) + endif + if (associated(b%val)) then + deallocate(b%val) + endif + NULLIFY(b%det) + NULLIFY(b%val) + b%cur = 0 + b%mini = 0.d0 + b%N = 0 +end + + +subroutine add_to_selection_buffer(b, det, val) + use selection_types + implicit none + + type(selection_buffer), intent(inout) :: b + integer(bit_kind), intent(in) :: det(N_int, 2) + double precision, intent(in) :: val + integer :: i + + if(b%N > 0 .and. val <= b%mini) then + b%cur += 1 + b%det(1:N_int,1:2,b%cur) = det(1:N_int,1:2) + b%val(b%cur) = val + if(b%cur == size(b%val)) then + call sort_selection_buffer(b) + end if + end if +end subroutine + +subroutine merge_selection_buffers(b1, b2) + use selection_types + implicit none + BEGIN_DOC +! Merges the selection buffers b1 and b2 into b2 + END_DOC + type(selection_buffer), intent(inout) :: b1 + type(selection_buffer), intent(inout) :: b2 + integer(bit_kind), pointer :: detmp(:,:,:) + double precision, pointer :: val(:) + integer :: i, i1, i2, k, nmwen, sze + if (b1%cur == 0) return + do while (b1%val(b1%cur) > b2%mini) + b1%cur = b1%cur-1 + if (b1%cur == 0) then + return + endif + enddo + nmwen = min(b1%N, b1%cur+b2%cur) + double precision :: rss + double precision, external :: memory_of_double + sze = max(size(b1%val), size(b2%val)) + rss = memory_of_double(sze) + 2*N_int*memory_of_double(sze) + call check_mem(rss,irp_here) + allocate(val(sze), detmp(N_int, 2, sze)) + i1=1 + i2=1 + do i=1,nmwen + if ( (i1 > b1%cur).and.(i2 > b2%cur) ) then + exit + else if (i1 > b1%cur) then + val(i) = b2%val(i2) + detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2) + detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2) + i2=i2+1 + else if (i2 > b2%cur) then + val(i) = b1%val(i1) + detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1) + detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1) + i1=i1+1 + else + if (b1%val(i1) <= b2%val(i2)) then + val(i) = b1%val(i1) + detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1) + detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1) + i1=i1+1 + else + val(i) = b2%val(i2) + detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2) + detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2) + i2=i2+1 + endif + endif + enddo + deallocate(b2%det, b2%val) + do i=nmwen+1,b2%N + val(i) = 0.d0 + detmp(1:N_int,1:2,i) = 0_bit_kind + enddo + b2%det => detmp + b2%val => val + b2%mini = min(b2%mini,b2%val(b2%N)) + b2%cur = nmwen +end + + +subroutine sort_selection_buffer(b) + use selection_types + implicit none + + type(selection_buffer), intent(inout) :: b + integer, allocatable :: iorder(:) + integer(bit_kind), pointer :: detmp(:,:,:) + integer :: i, nmwen + logical, external :: detEq + if (b%N == 0 .or. b%cur == 0) return + nmwen = min(b%N, b%cur) + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + rss = memory_of_int(b%cur) + 2*N_int*memory_of_double(size(b%det,3)) + call check_mem(rss,irp_here) + allocate(iorder(b%cur), detmp(N_int, 2, size(b%det,3))) + do i=1,b%cur + iorder(i) = i + end do + call dsort(b%val, iorder, b%cur) + do i=1, nmwen + detmp(1:N_int,1,i) = b%det(1:N_int,1,iorder(i)) + detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i)) + end do + deallocate(b%det,iorder) + b%det => detmp + b%mini = min(b%mini,b%val(b%N)) + b%cur = nmwen +end subroutine + +subroutine make_selection_buffer_s2(b) + use selection_types + type(selection_buffer), intent(inout) :: b + + integer(bit_kind), allocatable :: o(:,:,:) + double precision, allocatable :: val(:) + + integer :: n_d + integer :: i,k,sze,n_alpha,j,n + logical :: dup + + ! Sort + integer, allocatable :: iorder(:) + integer*8, allocatable :: bit_tmp(:) + integer*8, external :: configuration_search_key + integer(bit_kind), allocatable :: tmp_array(:,:,:) + logical, allocatable :: duplicate(:) + + n_d = b%cur + double precision :: rss + double precision, external :: memory_of_double + rss = (4*N_int+4)*memory_of_double(n_d) + call check_mem(rss,irp_here) + allocate(o(N_int,2,n_d), iorder(n_d), duplicate(n_d), bit_tmp(n_d), & + tmp_array(N_int,2,n_d), val(n_d) ) + + do i=1,n_d + do k=1,N_int + o(k,1,i) = ieor(b%det(k,1,i), b%det(k,2,i)) + o(k,2,i) = iand(b%det(k,1,i), b%det(k,2,i)) + enddo + iorder(i) = i + bit_tmp(i) = configuration_search_key(o(1,1,i),N_int) + enddo + + deallocate(b%det) + + call i8sort(bit_tmp,iorder,n_d) + + do i=1,n_d + do k=1,N_int + tmp_array(k,1,i) = o(k,1,iorder(i)) + tmp_array(k,2,i) = o(k,2,iorder(i)) + enddo + val(i) = b%val(iorder(i)) + duplicate(i) = .False. + enddo + + ! Find duplicates + do i=1,n_d-1 + if (duplicate(i)) then + cycle + endif + j = i+1 + do while (bit_tmp(j)==bit_tmp(i)) + if (duplicate(j)) then + j+=1 + if (j>n_d) then + exit + endif + cycle + endif + dup = .True. + do k=1,N_int + if ( (tmp_array(k,1,i) /= tmp_array(k,1,j)) & + .or. (tmp_array(k,2,i) /= tmp_array(k,2,j)) ) then + dup = .False. + exit + endif + enddo + if (dup) then + val(i) = max(val(i), val(j)) + duplicate(j) = .True. + endif + j+=1 + if (j>n_d) then + exit + endif + enddo + enddo + + deallocate (b%val) + ! Copy filtered result + integer :: n_p + n_p=0 + do i=1,n_d + if (duplicate(i)) then + cycle + endif + n_p = n_p + 1 + do k=1,N_int + o(k,1,n_p) = tmp_array(k,1,i) + o(k,2,n_p) = tmp_array(k,2,i) + enddo + val(n_p) = val(i) + enddo + + ! Sort by importance + do i=1,n_p + iorder(i) = i + end do + call dsort(val,iorder,n_p) + do i=1,n_p + do k=1,N_int + tmp_array(k,1,i) = o(k,1,iorder(i)) + tmp_array(k,2,i) = o(k,2,iorder(i)) + enddo + enddo + do i=1,n_p + do k=1,N_int + o(k,1,i) = tmp_array(k,1,i) + o(k,2,i) = tmp_array(k,2,i) + enddo + enddo + + ! Create determinants + n_d = 0 + do i=1,n_p + call configuration_to_dets_size(o(1,1,i),sze,elec_alpha_num,N_int) + n_d = n_d + sze + if (n_d > b%cur) then +! if (n_d - b%cur > b%cur - n_d + sze) then +! n_d = n_d - sze +! endif + exit + endif + enddo + + rss = (4*N_int+2)*memory_of_double(n_d) + call check_mem(rss,irp_here) + allocate(b%det(N_int,2,2*n_d), b%val(2*n_d)) + k=1 + do i=1,n_p + n=n_d + call configuration_to_dets_size(o(1,1,i),n,elec_alpha_num,N_int) + call configuration_to_dets(o(1,1,i),b%det(1,1,k),n,elec_alpha_num,N_int) + do j=k,k+n-1 + b%val(j) = val(i) + enddo + k = k+n + if (k > n_d) exit + enddo + deallocate(o) + b%cur = n_d + b%N = n_d +end + + + + +subroutine remove_duplicates_in_selection_buffer(b) + use selection_types + type(selection_buffer), intent(inout) :: b + + integer(bit_kind), allocatable :: o(:,:,:) + double precision, allocatable :: val(:) + + integer :: n_d + integer :: i,k,sze,n_alpha,j,n + logical :: dup + + ! Sort + integer, allocatable :: iorder(:) + integer*8, allocatable :: bit_tmp(:) + integer*8, external :: det_search_key + integer(bit_kind), allocatable :: tmp_array(:,:,:) + logical, allocatable :: duplicate(:) + + n_d = b%cur + logical :: found_duplicates + double precision :: rss + double precision, external :: memory_of_double + rss = (4*N_int+4)*memory_of_double(n_d) + call check_mem(rss,irp_here) + + found_duplicates = .False. + allocate(iorder(n_d), duplicate(n_d), bit_tmp(n_d), & + tmp_array(N_int,2,n_d), val(n_d) ) + + do i=1,n_d + iorder(i) = i + bit_tmp(i) = det_search_key(b%det(1,1,i),N_int) + enddo + + call i8sort(bit_tmp,iorder,n_d) + + do i=1,n_d + do k=1,N_int + tmp_array(k,1,i) = b%det(k,1,iorder(i)) + tmp_array(k,2,i) = b%det(k,2,iorder(i)) + enddo + val(i) = b%val(iorder(i)) + duplicate(i) = .False. + enddo + + ! Find duplicates + do i=1,n_d-1 + if (duplicate(i)) then + cycle + endif + j = i+1 + do while (bit_tmp(j)==bit_tmp(i)) + if (duplicate(j)) then + j+=1 + if (j>n_d) then + exit + endif + cycle + endif + dup = .True. + do k=1,N_int + if ( (tmp_array(k,1,i) /= tmp_array(k,1,j)) & + .or. (tmp_array(k,2,i) /= tmp_array(k,2,j)) ) then + dup = .False. + exit + endif + enddo + if (dup) then + duplicate(j) = .True. + found_duplicates = .True. + endif + j+=1 + if (j>n_d) then + exit + endif + enddo + enddo + + if (found_duplicates) then + + ! Copy filtered result + integer :: n_p + n_p=0 + do i=1,n_d + if (duplicate(i)) then + cycle + endif + n_p = n_p + 1 + do k=1,N_int + b%det(k,1,n_p) = tmp_array(k,1,i) + b%det(k,2,n_p) = tmp_array(k,2,i) + enddo + val(n_p) = val(i) + enddo + b%cur=n_p + b%N=n_p + + endif + +end + + + diff --git a/src/cipsi_tc_bi_ortho/selection_weight.irp.f b/src/cipsi_tc_bi_ortho/selection_weight.irp.f new file mode 100644 index 00000000..3c09e59a --- /dev/null +++ b/src/cipsi_tc_bi_ortho/selection_weight.irp.f @@ -0,0 +1,134 @@ +BEGIN_PROVIDER [ double precision, pt2_match_weight, (N_states) ] + implicit none + BEGIN_DOC + ! Weights adjusted along the selection to make the PT2 contributions + ! of each state coincide. + END_DOC + pt2_match_weight(:) = 1.d0 +END_PROVIDER + + + +BEGIN_PROVIDER [ double precision, variance_match_weight, (N_states) ] + implicit none + BEGIN_DOC + ! Weights adjusted along the selection to make the variances + ! of each state coincide. + END_DOC + variance_match_weight(:) = 1.d0 +END_PROVIDER + + + +subroutine update_pt2_and_variance_weights(pt2_data, N_st) + implicit none + use selection_types + BEGIN_DOC +! Updates the PT2- and Variance- matching weights. + END_DOC + integer, intent(in) :: N_st + type(pt2_type), intent(in) :: pt2_data + double precision :: pt2(N_st) + double precision :: variance(N_st) + + double precision :: avg, element, dt, x + integer :: k + pt2(:) = pt2_data % pt2(:) + variance(:) = pt2_data % variance(:) + + avg = sum(pt2(1:N_st)) / dble(N_st) + 1.d-32 ! Avoid future division by zero + + dt = 8.d0 !* selection_factor + do k=1,N_st + element = exp(dt*(pt2(k)/avg - 1.d0)) + element = min(2.0d0 , element) + element = max(0.5d0 , element) + pt2_match_weight(k) *= element + enddo + + + avg = sum(variance(1:N_st)) / dble(N_st) + 1.d-32 ! Avoid future division by zero + + do k=1,N_st + element = exp(dt*(variance(k)/avg -1.d0)) + element = min(2.0d0 , element) + element = max(0.5d0 , element) + variance_match_weight(k) *= element + enddo + + if (N_det < 100) then + ! For tiny wave functions, weights are 1.d0 + pt2_match_weight(:) = 1.d0 + variance_match_weight(:) = 1.d0 + endif + + threshold_davidson_pt2 = min(1.d-6, & + max(threshold_davidson, 1.e-1 * PT2_relative_error * minval(abs(pt2(1:N_states)))) ) + + SOFT_TOUCH pt2_match_weight variance_match_weight threshold_davidson_pt2 +end + + + + +BEGIN_PROVIDER [ double precision, selection_weight, (N_states) ] + implicit none + BEGIN_DOC + ! Weights used in the selection criterion + END_DOC + select case (weight_selection) + + case (0) + print *, 'Using input weights in selection' + selection_weight(1:N_states) = c0_weight(1:N_states) * state_average_weight(1:N_states) + + case (1) + print *, 'Using 1/c_max^2 weight in selection' + selection_weight(1:N_states) = c0_weight(1:N_states) + + case (2) + print *, 'Using pt2-matching weight in selection' + selection_weight(1:N_states) = c0_weight(1:N_states) * pt2_match_weight(1:N_states) + print *, '# PT2 weight ', real(pt2_match_weight(:),4) + + case (3) + print *, 'Using variance-matching weight in selection' + selection_weight(1:N_states) = c0_weight(1:N_states) * variance_match_weight(1:N_states) + print *, '# var weight ', real(variance_match_weight(:),4) + + case (4) + print *, 'Using variance- and pt2-matching weights in selection' + selection_weight(1:N_states) = c0_weight(1:N_states) * sqrt(variance_match_weight(1:N_states) * pt2_match_weight(1:N_states)) + print *, '# PT2 weight ', real(pt2_match_weight(:),4) + print *, '# var weight ', real(variance_match_weight(:),4) + + case (5) + print *, 'Using variance-matching weight in selection' + selection_weight(1:N_states) = c0_weight(1:N_states) * variance_match_weight(1:N_states) + print *, '# var weight ', real(variance_match_weight(:),4) + + case (6) + print *, 'Using CI coefficient-based selection' + selection_weight(1:N_states) = c0_weight(1:N_states) + + case (7) + print *, 'Input weights multiplied by variance- and pt2-matching' + selection_weight(1:N_states) = c0_weight(1:N_states) * sqrt(variance_match_weight(1:N_states) * pt2_match_weight(1:N_states)) * state_average_weight(1:N_states) + print *, '# PT2 weight ', real(pt2_match_weight(:),4) + print *, '# var weight ', real(variance_match_weight(:),4) + + case (8) + print *, 'Input weights multiplied by pt2-matching' + selection_weight(1:N_states) = c0_weight(1:N_states) * pt2_match_weight(1:N_states) * state_average_weight(1:N_states) + print *, '# PT2 weight ', real(pt2_match_weight(:),4) + + case (9) + print *, 'Input weights multiplied by variance-matching' + selection_weight(1:N_states) = c0_weight(1:N_states) * variance_match_weight(1:N_states) * state_average_weight(1:N_states) + print *, '# var weight ', real(variance_match_weight(:),4) + + end select + print *, '# Total weight ', real(selection_weight(:),4) + +END_PROVIDER + diff --git a/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f b/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f new file mode 100644 index 00000000..c3a49280 --- /dev/null +++ b/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f @@ -0,0 +1,350 @@ +subroutine run_slave_cipsi + + BEGIN_DOC + ! Helper program for distributed parallelism + END_DOC + + implicit none + + call omp_set_max_active_levels(1) + distributed_davidson = .False. + read_wf = .False. + SOFT_TOUCH read_wf distributed_davidson + call provide_everything + call switch_qp_run_to_master + call run_slave_main +end + +subroutine provide_everything + PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context N_states_diag + PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp + + PROVIDE pt2_e0_denominator mo_num N_int ci_energy mpi_master zmq_state zmq_context + PROVIDE psi_det psi_coef threshold_generators state_average_weight + PROVIDE N_det_selectors pt2_stoch_istate N_det selection_weight pseudo_sym +end + + +subroutine run_slave_main + + use f77_zmq + + implicit none + IRP_IF MPI + include 'mpif.h' + IRP_ENDIF + + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + double precision :: energy(N_states) + character*(64) :: states(10) + character*(64) :: old_state + integer :: rc, i, ierr + double precision :: t0, t1 + + integer, external :: zmq_get_dvector, zmq_get_N_det_generators + integer, external :: zmq_get8_dvector + integer, external :: zmq_get_ivector + integer, external :: zmq_get_psi, zmq_get_N_det_selectors, zmq_get_psi_bilinear + integer, external :: zmq_get_psi_notouch + integer, external :: zmq_get_N_states_diag + + zmq_context = f77_zmq_ctx_new () + states(1) = 'selection' + states(2) = 'davidson' + states(3) = 'pt2' + old_state = 'Waiting' + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + PROVIDE psi_det psi_coef threshold_generators state_average_weight mpi_master + PROVIDE zmq_state N_det_selectors pt2_stoch_istate N_det pt2_e0_denominator + PROVIDE N_det_generators N_states N_states_diag pt2_e0_denominator mpi_rank + + IRP_IF MPI + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + do + + if (mpi_master) then + call wait_for_states(states,zmq_state,size(states)) + if (zmq_state(1:64) == old_state(1:64)) then + call usleep(200) + cycle + else + old_state(1:64) = zmq_state(1:64) + endif + print *, trim(zmq_state) + endif + + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + IRP_IF MPI + call MPI_BCAST (zmq_state, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here, 'error in broadcast of zmq_state' + endif + IRP_ENDIF + + if(zmq_state(1:7) == 'Stopped') then + exit + endif + + + if (zmq_state(1:9) == 'selection') then + + ! Selection + ! --------- + + call wall_time(t0) + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_psi') + IRP_ENDIF + if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_dvector threshold_generators') + IRP_ENDIF + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) cycle + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_dvector energy') + IRP_ENDIF + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_N_det_generators') + IRP_ENDIF + if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_N_det_selectors') + IRP_ENDIF + if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_dvector state_average_weight') + IRP_ENDIF + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_dvector selection_weight') + IRP_ENDIF + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) cycle + pt2_e0_denominator(1:N_states) = energy(1:N_states) + TOUCH pt2_e0_denominator state_average_weight threshold_generators selection_weight psi_det psi_coef + + if (mpi_master) then + print *, 'N_det', N_det + print *, 'N_det_generators', N_det_generators + print *, 'N_det_selectors', N_det_selectors + print *, 'pt2_e0_denominator', pt2_e0_denominator + print *, 'pt2_stoch_istate', pt2_stoch_istate + print *, 'state_average_weight', state_average_weight + print *, 'selection_weight', selection_weight + endif + call wall_time(t1) + call write_double(6,(t1-t0),'Broadcast time') + + IRP_IF MPI_DEBUG + call mpi_print('Entering OpenMP section') + IRP_ENDIF + !$OMP PARALLEL PRIVATE(i) + i = omp_get_thread_num() + call run_selection_slave(0,i,energy) + !$OMP END PARALLEL + print *, mpi_rank, ': Selection done' + IRP_IF MPI + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here, 'error in barrier' + endif + IRP_ENDIF + call mpi_print('----------') + + else if (zmq_state(1:8) == 'davidson') then + + ! Davidson + ! -------- + + call wall_time(t0) + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_N_states_diag') + IRP_ENDIF + if (zmq_get_N_states_diag(zmq_to_qp_run_socket,1) == -1) cycle + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_psi') + IRP_ENDIF + if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle + + call wall_time(t1) + call write_double(6,(t1-t0),'Broadcast time') + + !--- + call omp_set_max_active_levels(8) + call davidson_slave_tcp(0) + call omp_set_max_active_levels(1) + print *, mpi_rank, ': Davidson done' + !--- + + IRP_IF MPI + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here, 'error in barrier' + endif + IRP_ENDIF + call mpi_print('----------') + + else if (zmq_state(1:3) == 'pt2') then + + ! PT2 + ! --- + + IRP_IF MPI + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here, 'error in barrier' + endif + IRP_ENDIF + call wall_time(t0) + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_psi') + IRP_ENDIF + if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_N_det_generators') + IRP_ENDIF + if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_N_det_selectors') + IRP_ENDIF + if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_dvector threshold_generators') + IRP_ENDIF + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) cycle + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_dvector energy') + IRP_ENDIF + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_ivector pt2_stoch_istate') + IRP_ENDIF + if (zmq_get_ivector(zmq_to_qp_run_socket,1,'pt2_stoch_istate',pt2_stoch_istate,1) == -1) cycle + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_dvector state_average_weight') + IRP_ENDIF + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle + IRP_IF MPI_DEBUG + call mpi_print('zmq_get_dvector selection_weight') + IRP_ENDIF + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) cycle + pt2_e0_denominator(1:N_states) = energy(1:N_states) + SOFT_TOUCH pt2_e0_denominator state_average_weight pt2_stoch_istate threshold_generators selection_weight psi_det psi_coef N_det_generators N_det_selectors + + + call wall_time(t1) + call write_double(6,(t1-t0),'Broadcast time') + IRP_IF MPI + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here, 'error in barrier' + endif + IRP_ENDIF + + + IRP_IF MPI_DEBUG + call mpi_print('Entering OpenMP section') + IRP_ENDIF + if (.true.) then + integer :: nproc_target, ii + double precision :: mem_collector, mem, rss + + call resident_memory(rss) + + nproc_target = nthreads_pt2 + ii = min(N_det, (elec_alpha_num*(mo_num-elec_alpha_num))**2) + + do + mem = rss + & ! + nproc_target * 8.d0 * & ! bytes + ( 0.5d0*pt2_n_tasks_max & ! task_id + + 64.d0*pt2_n_tasks_max & ! task + + 3.d0*pt2_n_tasks_max*N_states & ! pt2, variance, norm + + 1.d0*pt2_n_tasks_max & ! i_generator, subset + + 3.d0*(N_int*2.d0*ii+ ii) & ! selection buffer + + 1.d0*(N_int*2.d0*ii+ ii) & ! sort selection buffer + + 2.0d0*(ii) & ! preinteresting, interesting, + ! prefullinteresting, fullinteresting + + 2.0d0*(N_int*2*ii) & ! minilist, fullminilist + + 1.0d0*(N_states*mo_num*mo_num) & ! mat + ) / 1024.d0**3 + + if (nproc_target == 0) then + call check_mem(mem,irp_here) + nproc_target = 1 + exit + endif + + if (mem+rss < qp_max_mem) then + exit + endif + + nproc_target = nproc_target - 1 + + enddo + + if (N_det > 100000) then + + if (mpi_master) then + print *, 'N_det', N_det + print *, 'N_det_generators', N_det_generators + print *, 'N_det_selectors', N_det_selectors + print *, 'pt2_e0_denominator', pt2_e0_denominator + print *, 'pt2_stoch_istate', pt2_stoch_istate + print *, 'state_average_weight', state_average_weight + print *, 'selection_weight', selection_weight + print *, 'Number of threads', nproc_target + endif + + if (h0_type == 'CFG') then + PROVIDE det_to_configuration + endif + + PROVIDE global_selection_buffer pt2_N_teeth pt2_F N_det_generators + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted_tc + PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp + + PROVIDE psi_det_hii selection_weight pseudo_sym pt2_min_parallel_tasks + + if (mpi_master) then + print *, 'Running PT2' + endif + !$OMP PARALLEL PRIVATE(i) NUM_THREADS(nproc_target+1) + i = omp_get_thread_num() + call run_pt2_slave(0,i,pt2_e0_denominator) + !$OMP END PARALLEL + FREE state_average_weight + print *, mpi_rank, ': PT2 done' + print *, '-------' + + endif + endif + + IRP_IF MPI + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here, 'error in barrier' + endif + IRP_ENDIF + call mpi_print('----------') + + endif + + end do + IRP_IF MPI + call MPI_finalize(ierr) + IRP_ENDIF +end + + + diff --git a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f new file mode 100644 index 00000000..33fe23fc --- /dev/null +++ b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -0,0 +1,147 @@ +subroutine run_stochastic_cipsi + use selection_types + implicit none + BEGIN_DOC +! Selected Full Configuration Interaction with Stochastic selection and PT2. + END_DOC + integer :: i,j,k,ndet + double precision, allocatable :: zeros(:) + integer :: to_select + type(pt2_type) :: pt2_data, pt2_data_err + logical, external :: qp_stop + logical :: print_pt2 + + double precision :: rss + double precision, external :: memory_of_double + double precision :: correlation_energy_ratio,E_denom,E_tc,norm + double precision, allocatable :: ept2(:), pt1(:),extrap_energy(:) + PROVIDE H_apply_buffer_allocated distributed_davidson + + print*,'Diagonal elements of the Fock matrix ' + do i = 1, mo_num + write(*,*)i,Fock_matrix_tc_mo_tot(i,i) + enddo + N_iter = 1 + threshold_generators = 1.d0 + SOFT_TOUCH threshold_generators + + rss = memory_of_double(N_states)*4.d0 + call check_mem(rss,irp_here) + + allocate (zeros(N_states)) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) + + double precision :: hf_energy_ref + logical :: has + double precision :: relative_error + + relative_error=PT2_relative_error + + zeros = 0.d0 + pt2_data % pt2 = -huge(1.e0) + pt2_data % rpt2 = -huge(1.e0) + pt2_data % overlap= 0.d0 + pt2_data % variance = huge(1.e0) + + if (s2_eig) then + call make_s2_eigenfunction + endif + print_pt2 = .False. + call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) +! call routine_save_right + + + if (N_det > N_det_max) then + psi_det(1:N_int,1:2,1:N_det) = psi_det_sorted_tc_gen(1:N_int,1:2,1:N_det) + psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states) + N_det = N_det_max + soft_touch N_det psi_det psi_coef + if (s2_eig) then + call make_s2_eigenfunction + endif + print_pt2 = .False. + call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) +! call routine_save_right + endif + + allocate(ept2(1000),pt1(1000),extrap_energy(100)) + + correlation_energy_ratio = 0.d0 + +! thresh_it_dav = 5.d-5 +! soft_touch thresh_it_dav + + print_pt2 = .True. + do while ( & + (N_det < N_det_max) .and. & + (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) & + ) + write(*,'(A)') '--------------------------------------------------------------------------------' + + + to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor) + to_select = max(N_states_diag, to_select) + + E_denom = E_tc ! TC Energy of the current wave function + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) + call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection + + N_iter += 1 + + if (qp_stop()) exit + + ! Add selected determinants + call copy_H_apply_buffer_to_wf_tc() + + PROVIDE psi_l_coef_bi_ortho psi_r_coef_bi_ortho + PROVIDE psi_det + PROVIDE psi_det_sorted_tc + + ept2(N_iter-1) = E_tc + nuclear_repulsion + (pt2_data % pt2(1))/norm + pt1(N_iter-1) = dsqrt(pt2_data % overlap(1,1)) + call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) + if (qp_stop()) exit + enddo +! print*,'data to extrapolate ' +! do i = 2, N_iter +! print*,'iteration ',i +! print*,'pt1,Ept2',pt1(i),ept2(i) +! call get_extrapolated_energy(i-1,ept2(i),pt1(i),extrap_energy(i)) +! do j = 2, i +! print*,'j,e,energy',j,extrap_energy(j) +! enddo +! enddo + +! thresh_it_dav = 5.d-6 +! soft_touch thresh_it_dav + + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) + call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection + call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) +! if (.not.qp_stop()) then +! if (N_det < N_det_max) then +! thresh_it_dav = 5.d-7 +! soft_touch thresh_it_dav +! call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) +! endif +! +! call pt2_dealloc(pt2_data) +! call pt2_dealloc(pt2_data_err) +! call pt2_alloc(pt2_data, N_states) +! call pt2_alloc(pt2_data_err, N_states) +! call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error, 0) ! Stochastic PT2 +! call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) +! endif +! call pt2_dealloc(pt2_data) +! call pt2_dealloc(pt2_data_err) +! call routine_save_right + +end + diff --git a/src/cipsi_tc_bi_ortho/zmq_selection.irp.f b/src/cipsi_tc_bi_ortho/zmq_selection.irp.f new file mode 100644 index 00000000..dc3e0f27 --- /dev/null +++ b/src/cipsi_tc_bi_ortho/zmq_selection.irp.f @@ -0,0 +1,235 @@ +subroutine ZMQ_selection(N_in, pt2_data) + use f77_zmq + use selection_types + + implicit none + + integer(ZMQ_PTR) :: zmq_to_qp_run_socket , zmq_socket_pull + integer, intent(in) :: N_in + type(selection_buffer) :: b + integer :: i, l, N + integer, external :: omp_get_thread_num + type(pt2_type), intent(inout) :: pt2_data + + PROVIDE psi_det psi_coef N_det qp_max_mem N_states pt2_F s2_eig N_det_generators + + N = max(N_in,1) + N = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2) + if (.True.) then + PROVIDE pt2_e0_denominator nproc + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order selection_weight pseudo_sym + PROVIDE n_act_orb n_inact_orb n_core_orb n_virt_orb n_del_orb seniority_max + PROVIDE excitation_beta_max excitation_alpha_max excitation_max + + call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'selection') + + integer, external :: zmq_put_psi + integer, external :: zmq_put_N_det_generators + integer, external :: zmq_put_N_det_selectors + integer, external :: zmq_put_dvector + + if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then + stop 'Unable to put psi on ZMQ server' + endif + if (zmq_put_N_det_generators(zmq_to_qp_run_socket, 1) == -1) then + stop 'Unable to put N_det_generators on ZMQ server' + endif + if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) then + stop 'Unable to put N_det_selectors on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',pt2_e0_denominator,size(pt2_e0_denominator)) == -1) then + stop 'Unable to put energy on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) then + stop 'Unable to put state_average_weight on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) then + stop 'Unable to put selection_weight on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) then + stop 'Unable to put threshold_generators on ZMQ server' + endif + call create_selection_buffer(N, N*2, b) + endif + + integer, external :: add_task_to_taskserver + character(len=100000) :: task + integer :: j,k,ipos + ipos=1 + task = ' ' + + + do i= 1, N_det_generators + do j=1,pt2_F(i) + write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, i, N + ipos += 30 + if (ipos > 100000-30) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task to task server' + endif + ipos=1 + endif + end do + enddo + if (ipos > 1) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task to task server' + endif + endif + N = max(N_in,1) + + + ASSERT (associated(b%det)) + ASSERT (associated(b%val)) + + integer, external :: zmq_set_running + if (zmq_set_running(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Failed in zmq_set_running' + endif + + integer :: nproc_target + if (N_det < 3*nproc) then + nproc_target = N_det/4 + else + nproc_target = nproc + endif + double precision :: mem + mem = 8.d0 * N_det * (N_int * 2.d0 * 3.d0 + 3.d0 + 5.d0) / (1024.d0**3) + call write_double(6,mem,'Estimated memory/thread (Gb)') + if (qp_max_mem > 0) then + nproc_target = max(1,int(dble(qp_max_mem)/(0.1d0 + mem))) + nproc_target = min(nproc_target,nproc) + endif + + f(:) = 1.d0 + if (.not.do_pt2) then + double precision :: f(N_states), u_dot_u + do k=1,min(N_det,N_states) + f(k) = 1.d0 / u_dot_u(psi_selectors_coef(1,k), N_det_selectors) + enddo + endif + + !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2_data) PRIVATE(i) NUM_THREADS(nproc_target+1) + i = omp_get_thread_num() + if (i==0) then + call selection_collector(zmq_socket_pull, b, N, pt2_data) + else + call selection_slave_inproc(i) + endif + !$OMP END PARALLEL + + call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'selection') + if (N_in > 0) then + if (s2_eig) then + call make_selection_buffer_s2(b) + endif + call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) + endif + call delete_selection_buffer(b) + + do k=1,N_states + pt2_data % pt2(k) = pt2_data % pt2(k) * f(k) + pt2_data % variance(k) = pt2_data % variance(k) * f(k) + do l=1,N_states + pt2_data % overlap(k,l) = pt2_data % overlap(k,l) * dsqrt(f(k)*f(l)) + pt2_data % overlap(l,k) = pt2_data % overlap(l,k) * dsqrt(f(k)*f(l)) + enddo + + pt2_data % rpt2(k) = & + pt2_data % pt2(k)/(1.d0 + pt2_data % overlap(k,k)) + enddo + + pt2_overlap(:,:) = pt2_data % overlap(:,:) + + print *, 'Overlap of perturbed states:' + do l=1,N_states + print *, pt2_overlap(l,:) + enddo + print *, '-------' + SOFT_TOUCH pt2_overlap + call update_pt2_and_variance_weights(pt2_data, N_states) + +end subroutine + + +subroutine selection_slave_inproc(i) + implicit none + integer, intent(in) :: i + + call run_selection_slave(1,i,pt2_e0_denominator) +end + +subroutine selection_collector(zmq_socket_pull, b, N, pt2_data) + use f77_zmq + use selection_types + use bitmasks + implicit none + + + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + type(selection_buffer), intent(inout) :: b + integer, intent(in) :: N + type(pt2_type), intent(inout) :: pt2_data + type(pt2_type) :: pt2_data_tmp + + double precision :: pt2_mwen(N_states) + double precision :: variance_mwen(N_states) + double precision :: norm2_mwen(N_states) + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_pull_socket + + integer :: msg_size, rc, more + integer :: acc, i, j, robin, ntask + double precision, pointer :: val(:) + integer(bit_kind), pointer :: det(:,:,:) + integer, allocatable :: task_id(:) + type(selection_buffer) :: b2 + + + + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + call create_selection_buffer(N, N*2, b2) + integer :: k + double precision :: rss + double precision, external :: memory_of_int + rss = memory_of_int(N_det_generators) + call check_mem(rss,irp_here) + allocate(task_id(N_det_generators)) + more = 1 + pt2_data % pt2(:) = 0d0 + pt2_data % variance(:) = 0.d0 + pt2_data % overlap(:,:) = 0.d0 + call pt2_alloc(pt2_data_tmp,N_states) + do while (more == 1) + call pull_selection_results(zmq_socket_pull, pt2_data_tmp, b2%val(1), b2%det(1,1,1), b2%cur, task_id, ntask) + + call pt2_add(pt2_data, 1.d0, pt2_data_tmp) + do i=1, b2%cur + call add_to_selection_buffer(b, b2%det(1,1,i), b2%val(i)) + if (b2%val(i) > b%mini) exit + end do + + do i=1, ntask + if(task_id(i) == 0) then + print *, "Error in collector" + endif + integer, external :: zmq_delete_task + if (zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) == -1) then + stop 'Unable to delete task' + endif + end do + end do + call pt2_dealloc(pt2_data_tmp) + + + call delete_selection_buffer(b2) + call sort_selection_buffer(b) + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) +end subroutine + From 4b715bfcd9f34d741a3f3a55e2985c50821a4aaa Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 24 Oct 2022 11:14:52 +0200 Subject: [PATCH 09/10] added iterations_tc --- src/iterations_tc/EZFIO.cfg | 24 +++++ src/iterations_tc/NEED | 0 src/iterations_tc/io.irp.f | 37 +++++++ src/iterations_tc/iterations.irp.f | 43 ++++++++ src/iterations_tc/print_extrapolation.irp.f | 46 +++++++++ src/iterations_tc/print_summary.irp.f | 104 ++++++++++++++++++++ 6 files changed, 254 insertions(+) create mode 100644 src/iterations_tc/EZFIO.cfg create mode 100644 src/iterations_tc/NEED create mode 100644 src/iterations_tc/io.irp.f create mode 100644 src/iterations_tc/iterations.irp.f create mode 100644 src/iterations_tc/print_extrapolation.irp.f create mode 100644 src/iterations_tc/print_summary.irp.f diff --git a/src/iterations_tc/EZFIO.cfg b/src/iterations_tc/EZFIO.cfg new file mode 100644 index 00000000..2a5e94a7 --- /dev/null +++ b/src/iterations_tc/EZFIO.cfg @@ -0,0 +1,24 @@ +[n_iter] +interface: ezfio +doc: Number of saved iterations +type:integer +default: 1 + +[n_det_iterations] +interface: ezfio, provider +doc: Number of determinants at each iteration +type: integer +size: (100) + +[energy_iterations] +interface: ezfio, provider +doc: The variational energy at each iteration +type: double precision +size: (determinants.n_states,100) + +[pt2_iterations] +interface: ezfio, provider +doc: The |PT2| correction at each iteration +type: double precision +size: (determinants.n_states,100) + diff --git a/src/iterations_tc/NEED b/src/iterations_tc/NEED new file mode 100644 index 00000000..e69de29b diff --git a/src/iterations_tc/io.irp.f b/src/iterations_tc/io.irp.f new file mode 100644 index 00000000..821f5e84 --- /dev/null +++ b/src/iterations_tc/io.irp.f @@ -0,0 +1,37 @@ +BEGIN_PROVIDER [ integer, n_iter ] + implicit none + BEGIN_DOC +! number of iterations + END_DOC + + logical :: has + PROVIDE ezfio_filename + if (mpi_master) then + + double precision :: zeros(N_states,100) + integer :: izeros(100) + zeros = 0.d0 + izeros = 0 + call ezfio_set_iterations_n_iter(0) + call ezfio_set_iterations_energy_iterations(zeros) + call ezfio_set_iterations_pt2_iterations(zeros) + call ezfio_set_iterations_n_det_iterations(izeros) + n_iter = 1 + endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + IRP_IF MPI + include 'mpif.h' + integer :: ierr + call MPI_BCAST( n_iter, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read n_iter with MPI' + endif + IRP_ENDIF + + call write_time(6) + +END_PROVIDER + diff --git a/src/iterations_tc/iterations.irp.f b/src/iterations_tc/iterations.irp.f new file mode 100644 index 00000000..2f1cf0c1 --- /dev/null +++ b/src/iterations_tc/iterations.irp.f @@ -0,0 +1,43 @@ +BEGIN_PROVIDER [ double precision, extrapolated_energy, (N_iter,N_states) ] + implicit none + BEGIN_DOC + ! Extrapolated energy, using E_var = f(PT2) where PT2=0 + END_DOC +! integer :: i + extrapolated_energy = 0.D0 +END_PROVIDER + + subroutine get_extrapolated_energy(Niter,ept2,pt1,extrap_energy) + implicit none + integer, intent(in) :: Niter + double precision, intent(in) :: ept2(Niter),pt1(Niter),extrap_energy(Niter) + call extrapolate_data(Niter,ept2,pt1,extrap_energy) + end + +subroutine save_iterations(e_, pt2_,n_) + implicit none + BEGIN_DOC +! Update the energy in the EZFIO file. + END_DOC + integer, intent(in) :: n_ + double precision, intent(in) :: e_(N_states), pt2_(N_states) + integer :: i + + if (N_iter == 101) then + do i=2,N_iter-1 + energy_iterations(1:N_states,N_iter-1) = energy_iterations(1:N_states,N_iter) + pt2_iterations(1:N_states,N_iter-1) = pt2_iterations(1:N_states,N_iter) + enddo + N_iter = N_iter-1 + TOUCH N_iter + endif + + energy_iterations(1:N_states,N_iter) = e_(1:N_states) + pt2_iterations(1:N_states,N_iter) = pt2_(1:N_states) + n_det_iterations(N_iter) = n_ + call ezfio_set_iterations_N_iter(N_iter) + call ezfio_set_iterations_energy_iterations(energy_iterations) + call ezfio_set_iterations_pt2_iterations(pt2_iterations) + call ezfio_set_iterations_n_det_iterations(n_det_iterations) +end + diff --git a/src/iterations_tc/print_extrapolation.irp.f b/src/iterations_tc/print_extrapolation.irp.f new file mode 100644 index 00000000..cb46fb67 --- /dev/null +++ b/src/iterations_tc/print_extrapolation.irp.f @@ -0,0 +1,46 @@ +subroutine print_extrapolated_energy + implicit none + BEGIN_DOC +! Print the extrapolated energy in the output + END_DOC + + integer :: i,k + + if (N_iter< 2) then + return + endif + write(*,'(A)') '' + write(*,'(A)') 'Extrapolated energies' + write(*,'(A)') '------------------------' + write(*,'(A)') '' + + print *, '' + print *, 'State ', 1 + print *, '' + write(*,*) '=========== ', '===================' + write(*,*) 'minimum PT2 ', 'Extrapolated energy' + write(*,*) '=========== ', '===================' + do k=2,min(N_iter,8) + write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,N_iter+1-k), extrapolated_energy(k,1) + enddo + write(*,*) '=========== ', '===================' + + do i=2, min(N_states,N_det) + print *, '' + print *, 'State ', i + print *, '' + write(*,*) '=========== ', '=================== ', '=================== ', '===================' + write(*,*) 'minimum PT2 ', 'Extrapolated energy ', ' Excitation (a.u) ', ' Excitation (eV) ' + write(*,*) '=========== ', '=================== ', '=================== ', '===================' + do k=2,min(N_iter,8) + write(*,'(F11.4,X,3(X,F18.8))') pt2_iterations(i,N_iter+1-k), extrapolated_energy(k,i), & + extrapolated_energy(k,i) - extrapolated_energy(k,1), & + (extrapolated_energy(k,i) - extrapolated_energy(k,1) ) * 27.211396641308d0 + enddo + write(*,*) '=========== ', '=================== ', '=================== ', '===================' + enddo + + print *, '' + +end subroutine + diff --git a/src/iterations_tc/print_summary.irp.f b/src/iterations_tc/print_summary.irp.f new file mode 100644 index 00000000..8e6285e2 --- /dev/null +++ b/src/iterations_tc/print_summary.irp.f @@ -0,0 +1,104 @@ +subroutine print_summary(e_,pt2_data,pt2_data_err,n_det_,n_configuration_,n_st,s2_) + use selection_types + implicit none + BEGIN_DOC +! Print the extrapolated energy in the output + END_DOC + + integer, intent(in) :: n_det_, n_configuration_, n_st + double precision, intent(in) :: e_(n_st), s2_(n_st) + type(pt2_type) , intent(in) :: pt2_data, pt2_data_err + integer :: i, k + integer :: N_states_p + character*(9) :: pt2_string + character*(512) :: fmt + + if (do_pt2) then + pt2_string = ' ' + else + pt2_string = '(approx)' + endif + + N_states_p = min(N_det_,n_st) + + print *, '' + print '(A,I12)', 'Summary at N_det = ', N_det_ + print '(A)', '-----------------------------------' + print *, '' + + write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' + write(*,fmt) + write(fmt,*) '(13X,', N_states_p, '(6X,A7,1X,I6,10X))' + write(*,fmt) ('State',k, k=1,N_states_p) + write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' + write(*,fmt) + write(fmt,*) '(A13,', N_states_p, '(1X,F14.8,15X))' + write(*,fmt) '# E ', e_(1:N_states_p) + if (N_states_p > 1) then + write(*,fmt) '# Excit. (au)', e_(1:N_states_p)-e_(1) + write(*,fmt) '# Excit. (eV)', (e_(1:N_states_p)-e_(1))*27.211396641308d0 + endif + write(fmt,*) '(A13,', 2*N_states_p, '(1X,F14.8))' + write(*,fmt) '# PT2 '//pt2_string, (pt2_data % pt2(k), pt2_data_err % pt2(k), k=1,N_states_p) + write(*,fmt) '# rPT2'//pt2_string, (pt2_data % rpt2(k), pt2_data_err % rpt2(k), k=1,N_states_p) + write(*,'(A)') '#' + write(*,fmt) '# E+PT2 ', (e_(k)+pt2_data % pt2(k),pt2_data_err % pt2(k), k=1,N_states_p) + write(*,fmt) '# E+rPT2 ', (e_(k)+pt2_data % rpt2(k),pt2_data_err % rpt2(k), k=1,N_states_p) + if (N_states_p > 1) then + write(*,fmt) '# Excit. (au)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1)), & + dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1)), k=1,N_states_p) + write(*,fmt) '# Excit. (eV)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1))*27.211396641308d0, & + dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1))*27.211396641308d0, k=1,N_states_p) + endif + write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' + write(*,fmt) + print *, '' + + print *, 'N_det = ', N_det_ + print *, 'N_states = ', n_st + if (s2_eig) then + print *, 'N_cfg = ', N_configuration_ + if (only_expected_s2) then + print *, 'N_csf = ', N_csf + endif + endif + print *, '' + + do k=1, N_states_p + print*,'* State ',k + print *, '< S^2 > = ', s2_(k) + print *, 'E = ', e_(k) + print *, 'Variance = ', pt2_data % variance(k), ' +/- ', pt2_data_err % variance(k) + print *, 'PT norm = ', dsqrt(pt2_data % overlap(k,k)), ' +/- ', 0.5d0*dsqrt(pt2_data % overlap(k,k)) * pt2_data_err % overlap(k,k) / (pt2_data % overlap(k,k)) + print *, 'PT2 = ', pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k) + print *, 'rPT2 = ', pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k) + print *, 'E+PT2 '//pt2_string//' = ', e_(k)+pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k) + print *, 'E+rPT2'//pt2_string//' = ', e_(k)+pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k) + print *, '' + enddo + + print *, '-----' + if(n_st.gt.1)then + print *, 'Variational Energy difference (au | eV)' + do i=2, N_states_p + print*,'Delta E = ', (e_(i) - e_(1)), & + (e_(i) - e_(1)) * 27.211396641308d0 + enddo + print *, '-----' + print*, 'Variational + perturbative Energy difference (au | eV)' + do i=2, N_states_p + print*,'Delta E = ', (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))), & + (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))) * 27.211396641308d0 + enddo + print *, '-----' + print*, 'Variational + renormalized perturbative Energy difference (au | eV)' + do i=2, N_states_p + print*,'Delta E = ', (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))), & + (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))) * 27.211396641308d0 + enddo + endif + +! call print_energy_components() + +end subroutine + From 7f653b9eee5e48b245ba6d4609934ccf70b8db3d Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 24 Oct 2022 11:23:57 +0200 Subject: [PATCH 10/10] added non_hermit_dav --- src/non_hermit_dav/NEED | 1 + src/non_hermit_dav/biorthog.irp.f | 1088 ++++++++ src/non_hermit_dav/gram_schmit.irp.f | 56 + src/non_hermit_dav/htilde_mat.irp.f | 93 + .../lapack_diag_non_hermit.irp.f | 2391 +++++++++++++++++ src/non_hermit_dav/new_routines.irp.f | 669 +++++ src/non_hermit_dav/project.irp.f | 53 + src/non_hermit_dav/utils.irp.f | 325 +++ 8 files changed, 4676 insertions(+) create mode 100644 src/non_hermit_dav/NEED create mode 100644 src/non_hermit_dav/biorthog.irp.f create mode 100644 src/non_hermit_dav/gram_schmit.irp.f create mode 100644 src/non_hermit_dav/htilde_mat.irp.f create mode 100644 src/non_hermit_dav/lapack_diag_non_hermit.irp.f create mode 100644 src/non_hermit_dav/new_routines.irp.f create mode 100644 src/non_hermit_dav/project.irp.f create mode 100644 src/non_hermit_dav/utils.irp.f diff --git a/src/non_hermit_dav/NEED b/src/non_hermit_dav/NEED new file mode 100644 index 00000000..9487075c --- /dev/null +++ b/src/non_hermit_dav/NEED @@ -0,0 +1 @@ +utils diff --git a/src/non_hermit_dav/biorthog.irp.f b/src/non_hermit_dav/biorthog.irp.f new file mode 100644 index 00000000..d6add005 --- /dev/null +++ b/src/non_hermit_dav/biorthog.irp.f @@ -0,0 +1,1088 @@ +subroutine non_hrmt_diag_split_degen(n, A, leigvec, reigvec, n_real_eigv, eigval) + + BEGIN_DOC + ! + ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors + ! + ! of a non hermitian matrix A(n,n) + ! + ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" + ! + END_DOC + + implicit none + + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + integer, intent(out) :: n_real_eigv + double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) + double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:) + + integer :: i, j, n_degen,k , iteration + integer :: n_good + double precision :: shift,shift_current + double precision :: r,thr + integer, allocatable :: list_good(:), iorder_origin(:),iorder(:) + double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:) + double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:) + double precision, allocatable :: im_part(:),re_part(:) + + + print*,'Computing the left/right eigenvectors ...' + print*,'Using the degeneracy splitting algorithm' + + + ! pre-processing the matrix :: sorting by diagonal elements + allocate(reigvec_tmp(n,n), leigvec_tmp(n,n)) + allocate(diag_elem(n),iorder_origin(n),A_save(n,n)) + do i = 1, n + iorder_origin(i) = i + diag_elem(i) = A(i,i) + enddo + call dsort(diag_elem, iorder_origin, n) + do i = 1, n + do j = 1, n + A_save(j,i) = A(iorder_origin(j),iorder_origin(i)) + enddo + enddo + + shift = 1.d-15 + shift_current = shift + iteration = 1 + logical :: good_ortho + good_ortho = .False. + do while(n_real_eigv.ne.n.or. .not.good_ortho) + if(shift.gt.1.d-3)then + print*,'shift > 1.d-3 !!' + print*,'Your matrix intrinsically contains complex eigenvalues' + stop + endif + print*,'***** iteration = ',iteration + print*,'shift = ',shift + allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) + Aw = A_save + do i = 1, n + do j = 1, n + if(dabs(Aw(j,i)).lt.shift)then + Aw(j,i) = 0.d0 + endif + enddo + enddo + call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) + allocate(im_part(n),iorder(n)) + do i = 1, n + im_part(i) = -dabs(WI(i)) + iorder(i) = i + enddo + call dsort(im_part, iorder, n) + + shift_current = max(10.d0 * dabs(im_part(1)),shift) + print*,'Largest imaginary part found in eigenvalues = ',im_part(1) + print*,'Splitting the degeneracies by ',shift_current + Aw = A_save + call split_matrix_degen(Aw,n,shift_current) + deallocate( im_part, iorder ) + call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) + ! You track the real eigenvalues + n_good = 0 + do i = 1, n + if(dabs(WI(i)).lt.1.d-20)then + n_good += 1 + else + print*,'Found an imaginary component to eigenvalue' + print*,'Re(i) + Im(i)',WR(i),WI(i) + endif + enddo + allocate( list_good(n_good), iorder(n_good) ) + n_good = 0 + do i = 1, n + if(dabs(WI(i)).lt.1.d-20)then + n_good += 1 + list_good(n_good) = i + eigval(n_good) = WR(i) + endif + enddo + deallocate( WR, WI ) + + n_real_eigv = n_good + do i = 1, n_good + iorder(i) = i + enddo + + ! You sort the real eigenvalues + call dsort(eigval, iorder, n_good) + + reigvec(:,:) = 0.d0 + leigvec(:,:) = 0.d0 + do i = 1, n_real_eigv + do j = 1, n + reigvec_tmp(j,i) = VR(j,list_good(iorder(i))) + leigvec_tmp(j,i) = Vl(j,list_good(iorder(i))) + enddo + enddo + + if(n_real_eigv == n)then + allocate(S(n,n)) + call check_bi_ortho(reigvec_tmp,leigvec_tmp,n,S,accu_nd) + print*,'accu_nd = ',accu_nd + double precision :: accu_nd + good_ortho = accu_nd .lt. 1.d-10 + deallocate(S) + endif + + deallocate( list_good, iorder ) + deallocate( VL, VR, Aw) + shift *= 10.d0 + iteration += 1 + enddo + do i = 1, n + do j = 1, n + reigvec(iorder_origin(j),i) = reigvec_tmp(j,i) + leigvec(iorder_origin(j),i) = leigvec_tmp(j,i) + enddo + enddo + +end subroutine non_hrmt_diag_split_degen + +! --- + +subroutine non_hrmt_real_diag_new(n, A, leigvec, reigvec, n_real_eigv, eigval) + + BEGIN_DOC + ! + ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors + ! + ! of a non hermitian matrix A(n,n) + ! + ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" + ! + END_DOC + + implicit none + + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + integer, intent(out) :: n_real_eigv + double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) + + integer :: i, j + integer :: n_good + double precision :: shift,shift_current + double precision :: r,thr + integer, allocatable :: list_good(:), iorder(:) + double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:) + double precision, allocatable :: Aw(:,:) + double precision, allocatable :: im_part(:) + + + print*,'Computing the left/right eigenvectors ...' + + ! Eigvalue(n) = WR(n) + i * WI(n) + shift = 1.d-10 + do while(n_real_eigv.ne.n.or.shift.gt.1.d-3) + allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) + Aw = A + call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) + allocate(im_part(n), iorder(n)) + do i = 1, n + im_part(i) = -dabs(WI(i)) + iorder(i) = i + enddo + shift_current = max(10.d0 * dabs(im_part(1)),shift) + print*,'adding random number of magnitude ',shift_current + Aw = A + do i = 1, n + call RANDOM_NUMBER(r) + Aw(i,i) += shift_current * r + enddo + deallocate( im_part, iorder ) + call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) + + ! You track the real eigenvalues + thr = 1.d-10 + n_good = 0 + do i = 1, n + if(dabs(WI(i)).lt.thr)then + n_good += 1 + else + print*,'Found an imaginary component to eigenvalue' + print*,'Re(i) + Im(i)',WR(i),WI(i) + endif + enddo + + allocate( list_good(n_good), iorder(n_good) ) + n_good = 0 + do i = 1, n + if(dabs(WI(i)).lt.thr)then + n_good += 1 + list_good(n_good) = i + eigval(n_good) = WR(i) + endif + enddo + + deallocate( WR, WI ) + + n_real_eigv = n_good + do i = 1, n_good + iorder(i) = i + enddo + + ! You sort the real eigenvalues + call dsort(eigval, iorder, n_good) + + reigvec(:,:) = 0.d0 + leigvec(:,:) = 0.d0 + do i = 1, n_real_eigv + do j = 1, n + reigvec(j,i) = VR(j,list_good(iorder(i))) + leigvec(j,i) = Vl(j,list_good(iorder(i))) + enddo + enddo + + deallocate( list_good, iorder ) + deallocate( VL, VR, Aw) + shift *= 10.d0 + enddo + if(shift.gt.1.d-3)then + print*,'shift > 1.d-3 !!' + print*,'Your matrix intrinsically contains complex eigenvalues' + endif + +end subroutine non_hrmt_real_diag_new + +! --- + +subroutine non_hrmt_bieig(n, A, leigvec, reigvec, n_real_eigv, eigval) + + BEGIN_DOC + ! + ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors + ! of a non hermitian matrix A(n,n) + ! + ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" + ! + END_DOC + + implicit none + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + integer, intent(out) :: n_real_eigv + double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) + + integer :: i, j + integer :: n_good + double precision :: thr, thr_cut, thr_diag, thr_norm + double precision :: accu_d, accu_nd, thr_d, thr_nd + + integer, allocatable :: list_good(:), iorder(:) + double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:) + double precision, allocatable :: S(:,:) + + + ! ------------------------------------------------------------------------------------- + ! + + print *, ' ' + print *, ' Computing the left/right eigenvectors ...' + print *, ' ' + + allocate( WR(n), WI(n), VL(n,n), VR(n,n) ) + + print *, ' fock matrix' + do i = 1, n + write(*, '(1000(F16.10,X))') A(i,:) + enddo + + !thr_cut = 1.d-15 + !call cancel_small_elmts(A, n, thr_cut) + + !call lapack_diag_non_sym_right(n, A, WR, WI, VR) + call lapack_diag_non_sym(n, A, WR, WI, VL, VR) + !call lapack_diag_non_sym_new(n, A, WR, WI, VL, VR) + + print *, ' ' + print *, ' eigenvalues' + do i = 1, n + write(*, '(1000(F16.10,X))') WR(i), WI(i) + enddo + !print *, ' right eigenvect bef' + !do i = 1, n + ! write(*, '(1000(F16.10,X))') VR(:,i) + !enddo + !print *, ' left eigenvect bef' + !do i = 1, n + ! write(*, '(1000(F16.10,X))') VL(:,i) + !enddo + + thr_diag = 1d-10 + thr_norm = 1d+10 + call check_EIGVEC(n, n, A, WR, VL, VR, thr_diag, thr_norm, .false.) + + ! + ! ------------------------------------------------------------------------------------- + + ! --- + + ! ------------------------------------------------------------------------------------- + ! track & sort the real eigenvalues + + n_good = 0 + thr = 1.d-5 + do i = 1, n + if(dabs(WI(i)) .lt. thr) then + n_good += 1 + else + print*, 'Found an imaginary component to eigenvalue on i = ', i + print*, 'Re(i) + Im(i)', WR(i), WI(i) + stop + endif + enddo + + allocate(list_good(n_good), iorder(n_good)) + + n_good = 0 + do i = 1, n + if( dabs(WI(i)).lt.thr ) then + n_good += 1 + list_good(n_good) = i + eigval(n_good) = WR(i) + endif + enddo + + deallocate( WR, WI ) + + n_real_eigv = n_good + do i = 1, n_good + iorder(i) = i + enddo + call dsort(eigval, iorder, n_good) + + reigvec(:,:) = 0.d0 + leigvec(:,:) = 0.d0 + do i = 1, n_real_eigv + do j = 1, n + reigvec(j,i) = VR(j,list_good(iorder(i))) + leigvec(j,i) = VL(j,list_good(iorder(i))) + enddo + enddo + + deallocate( list_good, iorder ) + deallocate( VL, VR ) + + ASSERT(n==n_real_eigv) + + !print *, ' eigenvalues' + !do i = 1, n + ! write(*, '(1000(F16.10,X))') eigval(i) + !enddo + !print *, ' right eigenvect aft ord' + !do i = 1, n + ! write(*, '(1000(F16.10,X))') reigvec(:,i) + !enddo + !print *, ' left eigenvect aft ord' + !do i = 1, n + ! write(*, '(1000(F16.10,X))') leigvec(:,i) + !enddo + + ! + ! ------------------------------------------------------------------------------------- + + ! --- + + ! ------------------------------------------------------------------------------------- + ! check bi-orthogonality + + thr_d = 1d-8 + thr_nd = 1d-8 + + allocate( S(n_real_eigv,n_real_eigv) ) + call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, .false.) + + if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv)) .lt. thr_d) ) then + + print *, ' lapack vectors are normalized and bi-orthogonalized' + deallocate(S) + return + + elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv)) .gt. thr_d) ) then + + print *, ' lapack vectors are not normalized but bi-orthogonalized' + call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, .true.) + + thr_diag = 1d-10 + thr_norm = 1d+10 + call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.) + + deallocate(S) + return + + else + + print *, ' lapack vectors are not normalized neither bi-orthogonalized' + + ! --- + + !call impose_orthog_degen_eigvec(n, eigval, reigvec) + !call impose_orthog_degen_eigvec(n, eigval, leigvec) + + call impose_biorthog_degen_eigvec(n, eigval, leigvec, reigvec) + + + !call impose_orthog_biorthog_degen_eigvec(n, eigval, leigvec, reigvec) + + !call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, ao_overlap, leigvec, reigvec) + + ! --- + + call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, .false.) + if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv)) .gt. thr_d) ) then + call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, .true.) + endif + call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, .true.) + + !call impose_biorthog_qr(n, n_real_eigv, leigvec, reigvec) + !call impose_biorthog_lu(n, n_real_eigv, leigvec, reigvec) + + ! --- + + thr_diag = 1d-10 + thr_norm = 1d+10 + call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.) + + deallocate(S) + + endif + + ! + ! ------------------------------------------------------------------------------------- + + return + +end subroutine non_hrmt_bieig + +! --- + +subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigval) + + BEGIN_DOC + ! + ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors + ! of a non hermitian matrix A(n,n) + ! + ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" + ! + END_DOC + + implicit none + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + integer, intent(out) :: n_real_eigv + double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) + + integer :: i, j + integer :: n_good + double precision :: thr + double precision :: accu_nd + + integer, allocatable :: list_good(:), iorder(:) + double precision, allocatable :: Aw(:,:) + double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:) + double precision, allocatable :: S(:,:) + double precision :: r + + + ! ------------------------------------------------------------------------------------- + ! + + print *, 'Computing the left/right eigenvectors ...' + allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n) ) + + Aw(:,:) = A(:,:) + call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR) + + thr = 1.d-12 + double precision, allocatable :: im_part(:) + n_good = 0 + do i = 1, n + if( dabs(WI(i)).lt.thr ) then + n_good += 1 + else + print*, 'Found an imaginary component to eigenvalue on i = ', i + print*, 'Re(i) + Im(i)', WR(i), WI(i) + endif + enddo + print*,'n_good = ',n_good + if(n_good .lt. n)then + print*,'Removing degeneracies to remove imaginary parts' + allocate(im_part(n),iorder(n)) + r = 0.d0 + do i = 1, n + im_part(i) = -dabs(WI(i)) + iorder(i) = i + enddo + call dsort(im_part,iorder,n) + thr = 10.d0 * dabs(im_part(1)) + print*,'adding random numbers on the diagonal of magnitude ',thr + Aw(:,:) = A(:,:) + do i = 1, n + call RANDOM_NUMBER(r) + print*,'r = ',r*thr + Aw(i,i) += thr * r + enddo + print*,'Rediagonalizing the matrix with random numbers' + call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR) + deallocate(im_part,iorder) + endif + deallocate( Aw ) + + ! + ! ------------------------------------------------------------------------------------- + + ! --- + + ! ------------------------------------------------------------------------------------- + ! track & sort the real eigenvalues + + n_good = 0 + thr = 1.d-5 + do i = 1, n + if( dabs(WI(i)).lt.thr ) then + n_good += 1 + else + print*, 'Found an imaginary component to eigenvalue on i = ', i + print*, 'Re(i) + Im(i)', WR(i), WI(i) + endif + enddo + print*,'n_good = ',n_good + allocate( list_good(n_good), iorder(n_good) ) + + n_good = 0 + do i = 1, n + if( dabs(WI(i)).lt.thr ) then + n_good += 1 + list_good(n_good) = i + eigval(n_good) = WR(i) + endif + enddo + + deallocate( WR, WI ) + + n_real_eigv = n_good + do i = 1, n_good + iorder(i) = i + enddo + call dsort(eigval, iorder, n_good) + + reigvec(:,:) = 0.d0 + leigvec(:,:) = 0.d0 + do i = 1, n_real_eigv + do j = 1, n + reigvec(j,i) = VR(j,list_good(iorder(i))) + leigvec(j,i) = VL(j,list_good(iorder(i))) + enddo + enddo + + deallocate( list_good, iorder ) + deallocate( VL, VR ) + + ! + ! ------------------------------------------------------------------------------------- + + ! --- + + ! ------------------------------------------------------------------------------------- + ! check bi-orthogonality + + allocate( S(n_real_eigv,n_real_eigv) ) + + ! S = VL x VR + call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n, 1.d0 & + , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & + , 0.d0, S, size(S, 1) ) + + accu_nd = 0.d0 + do i = 1, n_real_eigv + do j = 1, n_real_eigv + if(i==j) cycle + accu_nd = accu_nd + S(j,i) * S(j,i) + enddo + enddo + accu_nd = dsqrt(accu_nd) + + if(accu_nd .lt. 1d-8) then + ! L x R is already bi-orthogonal + + print *, ' L & T bi-orthogonality: ok' + deallocate( S ) + return + + else + ! impose bi-orthogonality + + print *, ' L & T bi-orthogonality: not imposed yet' + print *, ' accu_nd = ', accu_nd + call impose_biorthog_qr(n, n_real_eigv, leigvec, reigvec) + deallocate( S ) + + endif + + ! + ! ------------------------------------------------------------------------------------- + + return + +end + +! --- + +subroutine non_hrmt_real_im(n, A, leigvec, reigvec, n_real_eigv, eigval) + + BEGIN_DOC + ! + ! routine which returns the EIGENVALUES sorted the REAL part and corresponding LEFT/RIGHT eigenvetors + ! of a non hermitian matrix A(n,n) + ! + ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" + ! + END_DOC + + implicit none + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + integer, intent(out) :: n_real_eigv + double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) + + integer :: i, j + integer :: n_bad + double precision :: thr + double precision :: accu_nd + + integer, allocatable :: iorder(:) + double precision, allocatable :: Aw(:,:) + double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:) + double precision, allocatable :: S(:,:) + double precision :: r + + ! ------------------------------------------------------------------------------------- + ! + + print *, 'Computing the left/right eigenvectors ...' + allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), iorder(n)) + + Aw(:,:) = A(:,:) + do i = 1, n + call RANDOM_NUMBER(r) + Aw(i,i) += 10.d-10* r + enddo + call lapack_diag_non_sym(n, Aw, WR, WI, VL, VR) + + ! ------------------------------------------------------------------------------------- + ! track & sort the real eigenvalues + + i = 1 + thr = 1.d-15 + n_real_eigv = 0 + do while (i.le.n) +! print*,i,dabs(WI(i)) + if( dabs(WI(i)).gt.thr ) then + print*, 'Found an imaginary component to eigenvalue on i = ', i + print*, 'Re(i) , Im(i) ', WR(i), WI(i) + iorder(i) = i + eigval(i) = WR(i) + i+=1 + print*, 'Re(i+1),Im(i+1)',WR(i), WI(i) + iorder(i) = i + eigval(i) = WR(i) + i+=1 + else + n_real_eigv += 1 + iorder(i) = i + eigval(i) = WR(i) + i+=1 + endif + enddo + call dsort(eigval, iorder, n) + reigvec(:,:) = 0.d0 + leigvec(:,:) = 0.d0 + do i = 1, n + do j = 1, n + reigvec(j,i) = VR(j,iorder(i)) + leigvec(j,i) = VL(j,iorder(i)) + enddo + enddo + + deallocate( iorder ) + deallocate( VL, VR ) + + ! + ! ------------------------------------------------------------------------------------- + + ! --- + + ! ------------------------------------------------------------------------------------- + ! check bi-orthogonality + + allocate( S(n,n) ) + + ! S = VL x VR + call dgemm( 'T', 'N', n, n, n, 1.d0 & + , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & + , 0.d0, S, size(S, 1) ) + + accu_nd = 0.d0 + do i = 1, n + do j = 1, n + if(i==j) cycle + accu_nd = accu_nd + S(j,i) * S(j,i) + enddo + enddo + accu_nd = dsqrt(accu_nd) + + deallocate( S ) + +end subroutine non_hrmt_real_im + +! --- + +subroutine non_hrmt_generalized_real_im(n, A, B, leigvec, reigvec, n_real_eigv, eigval) + + BEGIN_DOC + ! + ! routine which returns the EIGENVALUES sorted the REAL part and corresponding LEFT/RIGHT eigenvetors + ! for A R = lambda B R and A^\dagger L = lambda B^\dagger L + ! + ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" + ! + END_DOC + + implicit none + integer, intent(in) :: n + double precision, intent(in) :: A(n,n),B(n,n) + integer, intent(out) :: n_real_eigv + double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) + + integer :: i, j + integer :: n_bad + double precision :: thr + double precision :: accu_nd + + integer, allocatable :: iorder(:) + double precision, allocatable :: Aw(:,:),Bw(:,:) + double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:), beta(:) + double precision, allocatable :: S(:,:) + double precision :: r + + ! ------------------------------------------------------------------------------------- + ! + + print *, 'Computing the left/right eigenvectors ...' + allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), Bw(n,n),iorder(n),beta(n)) + + Aw(:,:) = A(:,:) + Bw(:,:) = B(:,:) + call lapack_diag_general_non_sym(n,Aw,Bw,WR,beta,WI,VL,VR) + + ! ------------------------------------------------------------------------------------- + ! track & sort the real eigenvalues + + i = 1 + thr = 1.d-10 + n_real_eigv = 0 + do while (i.le.n) + if( dabs(WI(i)).gt.thr ) then + print*, 'Found an imaginary component to eigenvalue on i = ', i + print*, 'Re(i) , Im(i) ', WR(i), WI(i) + iorder(i) = i + eigval(i) = WR(i)/(beta(i) + 1.d-10) + i+=1 + print*, 'Re(i+1),Im(i+1)',WR(i), WI(i) + iorder(i) = i + eigval(i) = WR(i)/(beta(i) + 1.d-10) + i+=1 + else + n_real_eigv += 1 + iorder(i) = i + eigval(i) = WR(i)/(beta(i) + 1.d-10) + i+=1 + endif + enddo + call dsort(eigval, iorder, n) + reigvec(:,:) = 0.d0 + leigvec(:,:) = 0.d0 + do i = 1, n + do j = 1, n + reigvec(j,i) = VR(j,iorder(i)) + leigvec(j,i) = VL(j,iorder(i)) + enddo + enddo + + deallocate( iorder ) + deallocate( VL, VR ) + + ! + ! ------------------------------------------------------------------------------------- + + ! --- + + ! ------------------------------------------------------------------------------------- + ! check bi-orthogonality + + allocate( S(n,n) ) + + ! S = VL x VR + call dgemm( 'T', 'N', n, n, n, 1.d0 & + , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & + , 0.d0, S, size(S, 1) ) + + accu_nd = 0.d0 + do i = 1, n + do j = 1, n + if(i==j) cycle + accu_nd = accu_nd + S(j,i) * S(j,i) + enddo + enddo + accu_nd = dsqrt(accu_nd) + + deallocate( S ) + +end subroutine non_hrmt_generalized_real_im + +! --- + +subroutine non_hrmt_bieig_fullvect(n, A, leigvec, reigvec, n_real_eigv, eigval) + + BEGIN_DOC + ! + ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors + ! of a non hermitian matrix A(n,n) + ! + ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" + ! + END_DOC + + implicit none + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + integer, intent(out) :: n_real_eigv + double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) + + integer :: i, j + integer :: n_good + double precision :: thr + double precision :: accu_nd + + integer, allocatable :: iorder(:) + double precision, allocatable :: Aw(:,:) + double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:) + double precision, allocatable :: S(:,:) + double precision, allocatable :: eigval_sorted(:) + + + ! ------------------------------------------------------------------------------------- + ! + + print *, 'Computing the left/right eigenvectors ...' + + allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n) ) + Aw(:,:) = A(:,:) + + call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR) + + deallocate( Aw ) + + ! + ! ------------------------------------------------------------------------------------- + + ! --- + + ! ------------------------------------------------------------------------------------- + ! track & sort the real eigenvalues + + allocate( eigval_sorted(n), iorder(n) ) + + n_good = 0 + thr = 1.d-10 + + do i = 1, n + + iorder(i) = i + eigval_sorted(i) = WR(i) + + if(dabs(WI(i)) .gt. thr) then + print*, ' Found an imaginary component to eigenvalue on i = ', i + print*, ' Re(i) + Im(i)', WR(i), WI(i) + else + n_good += 1 + endif + + enddo + + n_real_eigv = n_good + + call dsort(eigval_sorted, iorder, n) + + reigvec(:,:) = 0.d0 + leigvec(:,:) = 0.d0 + do i = 1, n + eigval(i) = WR(i) + do j = 1, n + reigvec(j,i) = VR(j,iorder(i)) + leigvec(j,i) = VL(j,iorder(i)) + enddo + enddo + + deallocate( eigval_sorted, iorder ) + deallocate( WR, WI ) + deallocate( VL, VR ) + + ! + ! ------------------------------------------------------------------------------------- + + ! --- + + ! ------------------------------------------------------------------------------------- + ! check bi-orthogonality + + allocate( S(n,n) ) + + ! S = VL x VR + call dgemm( 'T', 'N', n, n, n, 1.d0 & + , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & + , 0.d0, S, size(S, 1) ) + + accu_nd = 0.d0 + do i = 1, n + do j = 1, n + if(i==j) cycle + accu_nd = accu_nd + S(j,i) * S(j,i) + enddo + enddo + accu_nd = dsqrt(accu_nd) + + if( accu_nd .lt. 1d-8 ) then + ! L x R is already bi-orthogonal + + !print *, ' L & T bi-orthogonality: ok' + deallocate( S ) + return + + else + ! impose bi-orthogonality + + !print *, ' L & T bi-orthogonality: not imposed yet' + !print *, ' accu_nd = ', accu_nd + call impose_biorthog_qr(n, n, leigvec, reigvec) + deallocate( S ) + + endif + + ! + ! ------------------------------------------------------------------------------------- + + return + +end subroutine non_hrmt_bieig_fullvect + +! --- + + +subroutine split_matrix_degen(aw,n,shift) + implicit none + BEGIN_DOC + ! subroutines that splits the degeneracies of a matrix by adding a splitting of magnitude thr * n_degen/2 + ! + ! WARNING !! THE MATRIX IS ASSUMED TO BE PASSED WITH INCREASING DIAGONAL ELEMENTS + END_DOC + double precision,intent(inout) :: Aw(n,n) + double precision,intent(in) :: shift + integer, intent(in) :: n + integer :: i,j,n_degen + logical :: keep_on + i=1 + do while(i.lt.n) + if(dabs(Aw(i,i)-Aw(i+1,i+1)).lt.shift)then + j=1 + keep_on = .True. + do while(keep_on) + if(i+j.gt.n)then + keep_on = .False. + exit + endif + if(dabs(Aw(i,i)-Aw(i+j,i+j)).lt.shift)then + j+=1 + else + keep_on=.False. + exit + endif + enddo + n_degen = j + j=0 + keep_on = .True. + do while(keep_on) + if(i+j+1.gt.n)then + keep_on = .False. + exit + endif + if(dabs(Aw(i+j,i+j)-Aw(i+j+1,i+j+1)).lt.shift)then + Aw(i+j,i+j) += (j-n_degen/2) * shift + j+=1 + else + keep_on = .False. + exit + endif + enddo + Aw(i+n_degen-1,i+n_degen-1) += (n_degen-1-n_degen/2) * shift + i+=n_degen + else + i+=1 + endif + enddo + +end + +subroutine cancel_small_elmts(aw,n,shift) + implicit none + BEGIN_DOC + ! subroutines that splits the degeneracies of a matrix by adding a splitting of magnitude thr * n_degen/2 + ! + ! WARNING !! THE MATRIX IS ASSUMED TO BE PASSED WITH INCREASING DIAGONAL ELEMENTS + END_DOC + double precision,intent(inout) :: Aw(n,n) + double precision,intent(in) :: shift + integer, intent(in) :: n + integer :: i,j + do i = 1, n + do j = 1, n + if(dabs(Aw(j,i)).lt.shift)then + Aw(j,i) = 0.d0 + endif + enddo + enddo +end + +subroutine check_bi_ortho(reigvec,leigvec,n,S,accu_nd) + implicit none + integer, intent(in) :: n + double precision,intent(in) :: reigvec(n,n),leigvec(n,n) + double precision, intent(out) :: S(n,n),accu_nd + BEGIN_DOC +! retunrs the overlap matrix S = Leigvec^T Reigvec +! +! and the square root of the sum of the squared off-diagonal elements of S + END_DOC + integer :: i,j + ! S = VL x VR + call dgemm( 'T', 'N', n, n, n, 1.d0 & + , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & + , 0.d0, S, size(S, 1) ) + accu_nd = 0.d0 + do i = 1, n + do j = 1, n + if(i.ne.j) then + accu_nd = accu_nd + S(j,i) * S(j,i) + endif + enddo + enddo + accu_nd = dsqrt(accu_nd) + +end diff --git a/src/non_hermit_dav/gram_schmit.irp.f b/src/non_hermit_dav/gram_schmit.irp.f new file mode 100644 index 00000000..520661b8 --- /dev/null +++ b/src/non_hermit_dav/gram_schmit.irp.f @@ -0,0 +1,56 @@ +subroutine bi_ortho_gram_schmidt(wi,vi,n,ni,wk,wk_schmidt) + implicit none + BEGIN_DOC +! you enter with a set of "ni" BI-ORTHONORMAL vectors of length "n" +! +! vi(j,i) = , wi(j,i) = , = delta_{ij} S_ii, S_ii = +! +! and a vector vk(j) = +! +! you go out with a vector vk_schmidt(j) = +! +! which is Gram-Schmidt orthonormalized with respect to the "vi" +! +! = 0 +! +! |wk_schmidt> = |wk> - \sum_{i=1}^ni (/) |wi> +! +! according to Eq. (5), (6) of Computers Structures, Vol 56, No. 4, pp 605-613, 1995 +! +! https://doi.org/10.1016/0045-7949(94)00565-K + END_DOC + integer, intent(in) :: n,ni + double precision, intent(in) :: wi(n,ni),vi(n,ni),wk(n) + double precision, intent(out):: wk_schmidt(n) + double precision :: vi_wk,u_dot_v,tmp,u_dot_u + double precision, allocatable :: sii(:) + integer :: i,j + allocate( sii(ni) ) + wk_schmidt = wk + do i = 1, ni + sii(i) = u_dot_v(vi(1,i),wi(1,i),n) + enddo +! do i = 1, n +! print*,i,'wk',wk(i) +! enddo +! print*,'' +! print*,'' + do i = 1, ni +! print*,'i',i + ! Gram-Schmidt + vi_wk = u_dot_v(vi(1,i),wk,n) + vi_wk = vi_wk / sii(i) +! print*,'' + do j = 1, n +! print*,j,vi_wk,wi(j,i) + wk_schmidt(j) -= vi_wk * wi(j,i) + enddo + enddo + tmp = u_dot_u(wk_schmidt,n) + tmp = 1.d0/dsqrt(tmp) + wk_schmidt = tmp * wk_schmidt +! do j = 1, n +! print*,j,'wk_scc',wk_schmidt(j) +! enddo +! pause +end diff --git a/src/non_hermit_dav/htilde_mat.irp.f b/src/non_hermit_dav/htilde_mat.irp.f new file mode 100644 index 00000000..6d5101ac --- /dev/null +++ b/src/non_hermit_dav/htilde_mat.irp.f @@ -0,0 +1,93 @@ +BEGIN_PROVIDER [ integer, n_mat] + implicit none + n_mat = 2 +END_PROVIDER + + BEGIN_PROVIDER [ double precision, h_non_hermit, (n_mat, n_mat)] +&BEGIN_PROVIDER [ double precision, h_non_hermit_transp, (n_mat, n_mat)] +&BEGIN_PROVIDER [ double precision, reigvec_ht, (n_mat, n_mat)] +&BEGIN_PROVIDER [ double precision, leigvec_ht, (n_mat, n_mat)] +&BEGIN_PROVIDER [ double precision, eigval_ht, (n_mat)] +&BEGIN_PROVIDER [ integer, n_real_ht, (n_mat)] + implicit none + integer :: i,j + do i = 1, n_mat + read(33,*)h_non_hermit(i,1:n_mat) + enddo + print*,'' + print*,'H_mat ' + print*,'' + do i = 1, n_mat + write(*,'(1000(F16.10,X))')h_non_hermit(i,:) + enddo + do i = 1, n_mat + do j = 1, n_mat + h_non_hermit_transp(j,i) = h_non_hermit(i,j) + enddo + enddo + call non_hrmt_real_diag(n_mat,h_non_hermit,reigvec_ht,leigvec_ht,n_real_ht,eigval_ht) + + +END_PROVIDER + + +subroutine hcalc_r_tmp(v,u,N_st,sze) ! v = H u + implicit none + BEGIN_DOC + ! Template of routine for the application of H + ! + ! Here, it is done with the Hamiltonian matrix + ! + ! on the set of determinants of psi_det + ! + ! Computes $v = H | u \rangle$ + ! + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u(sze,N_st) + double precision, intent(inout) :: v(sze,N_st) + integer :: i,j,istate + v = 0.d0 + do istate = 1, N_st + do j = 1, sze + do i = 1, sze + v(i,istate) += h_non_hermit(i,j) * u(j,istate) +! print*,i,j,h_non_hermit(i,j),u(j,istate) + enddo + enddo + enddo + print*,'HU' + do i = 1, sze + print*,v(i,1) + enddo +end + +subroutine hcalc_l_tmp(v,u,N_st,sze) ! v = H^\dagger u + implicit none + BEGIN_DOC + ! Template of routine for the application of H + ! + ! Here, it is done with the Hamiltonian matrix + ! + ! on the set of determinants of psi_det + ! + ! Computes $v = H | u \rangle$ + ! + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u(sze,N_st) + double precision, intent(inout) :: v(sze,N_st) + integer :: i,j,istate + v = 0.d0 + do istate = 1, N_st + do j = 1, sze + do i = 1, sze + v(i,istate) += h_non_hermit_transp(i,j) * u(j,istate) + enddo + enddo + enddo + print*,'HU' + do i = 1, sze + print*,v(i,1) + enddo +end diff --git a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f new file mode 100644 index 00000000..5bd227c6 --- /dev/null +++ b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f @@ -0,0 +1,2391 @@ +subroutine lapack_diag_non_sym(n, A, WR, WI, VL, VR) + + BEGIN_DOC + ! You enter with a general non hermitian matrix A(n,n) + ! + ! You get out with the real WR and imaginary part WI of the eigenvalues + ! + ! Eigvalue(n) = WR(n) + i * WI(n) + ! + ! And the left VL and right VR eigenvectors + ! + ! VL(i,j) = :: projection on the basis element |i> on the jth left eigenvector + ! + ! VR(i,j) = :: projection on the basis element |i> on the jth right eigenvector + ! + ! The real part of the matrix A can be written as A = VR D VL^T + ! + END_DOC + + implicit none + + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + double precision, intent(out) :: WR(n), WI(n), VL(n,n), VR(n,n) + + integer :: lda, ldvl, ldvr, LWORK, INFO + double precision, allocatable :: Atmp(:,:), WORK(:) + + lda = n + ldvl = n + ldvr = n + + allocate( Atmp(n,n) ) + Atmp(1:n,1:n) = A(1:n,1:n) + + allocate(WORK(1)) + LWORK = -1 ! to ask for the optimal size of WORK + call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO) + if(INFO.gt.0)then + print*,'dgeev failed !!',INFO + stop + endif + LWORK = max(int(WORK(1)), 1) ! this is the optimal size of WORK + deallocate(WORK) + + allocate(WORK(LWORK)) + + ! Actual diagonalization + call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO) + if(INFO.ne.0) then + print*,'dgeev failed !!', INFO + stop + endif + + deallocate(Atmp, WORK) + +end subroutine lapack_diag_non_sym + + +subroutine non_sym_diag_inv_right(n,A,leigvec,reigvec,n_real_eigv,eigval) + implicit none + BEGIN_DOC +! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors +! +! of a non hermitian matrix A(n,n) +! +! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" + END_DOC + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + double precision, intent(out) :: reigvec(n,n),leigvec(n,n),eigval(n) + double precision, allocatable :: Aw(:,:) + integer, intent(out) :: n_real_eigv + print*,'Computing the left/right eigenvectors ...' + character*1 :: JOBVL,JOBVR + JOBVL = "V" ! computes the left eigenvectors + JOBVR = "V" ! computes the right eigenvectors + double precision, allocatable :: WR(:),WI(:),Vl(:,:),VR(:,:),S(:,:),inv_reigvec(:,:) + integer :: i,j + integer :: n_good + integer, allocatable :: list_good(:), iorder(:) + double precision :: thr + thr = 1.d-10 + ! Eigvalue(n) = WR(n) + i * WI(n) + allocate(WR(n),WI(n),VL(n,n),VR(n,n),Aw(n,n)) + Aw = A + do i = 1, n + do j = i+1, n + if(dabs(Aw(j,j)-Aw(i,i)).lt.thr)then + Aw(j,j)+= thr + Aw(i,i)-= thr +! if(Aw(j,i) * A(i,j) .lt.0d0 )then +! if(dabs(Aw(j,i) * A(i,j)).lt.thr**(1.5d0))then +! print*,Aw(j,j),Aw(i,i) +! print*,Aw(j,i) , A(i,j) + Aw(j,i) = 0.d0 + Aw(i,j) = Aw(j,i) +! endif +! endif + endif + enddo + enddo + call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) + ! You track the real eigenvalues + n_good = 0 +! do i = 1, n +! write(*,'(100(F16.12,X))')A(:,i) +! enddo + do i = 1, n + print*,'Im part of lambda = ',dabs(WI(i)) + if(dabs(WI(i)).lt.thr)then + n_good += 1 + else + print*,'Found an imaginary component to eigenvalue' + print*,'Re(i) + Im(i)',WR(i),WI(i) + write(*,'(100(F10.5,X))')VR(:,i) + write(*,'(100(F10.5,X))')VR(:,i+1) + write(*,'(100(F10.5,X))')VL(:,i) + write(*,'(100(F10.5,X))')VL(:,i+1) + endif + enddo + allocate(list_good(n_good),iorder(n_good)) + n_good = 0 + do i = 1, n + if(dabs(WI(i)).lt.thr)then + n_good += 1 + list_good(n_good) = i + eigval(n_good) = WR(i) + endif + enddo + n_real_eigv = n_good + do i = 1, n_good + iorder(i) = i + enddo + ! You sort the real eigenvalues + call dsort(eigval,iorder,n_good) + do i = 1, n_real_eigv + do j = 1, n + reigvec(j,i) = VR(j,list_good(iorder(i))) + leigvec(j,i) = VL(j,list_good(iorder(i))) + enddo + enddo + allocate(inv_reigvec(n_real_eigv,n_real_eigv)) +! call get_pseudo_inverse(reigvec,n_real_eigv,n_real_eigv,n_real_eigv,inv_reigvec,n_real_eigv,thr) +! do i = 1, n_real_eigv +! do j = 1, n +! leigvec(j,i) = inv_reigvec(i,j) +! enddo +! enddo + allocate( S(n_real_eigv,n_real_eigv) ) + + ! S = VL x VR + call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n_real_eigv, 1.d0 & + , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & + , 0.d0, S, size(S, 1) ) + do i = 1,n_real_eigv + write(*,'(100(F10.5,X))')S(:,i) + enddo +! call lapack_diag_non_sym(n,S,WR,WI,VL,VR) +! print*,'Eigenvalues of S' +! do i = 1, n +! print*,WR(i),dabs(WI(i)) +! enddo + call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n_real_eigv, 1.d0 & + , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & + , 0.d0, S, size(S, 1) ) +! call get_inv_half_svd(S, n_real_eigv, inv_reigvec) + + double precision :: accu_d,accu_nd + accu_nd = 0.d0 + accu_d = 0.d0 + do i = 1, n_real_eigv + do j = 1, n_real_eigv + if(i==j) then + accu_d += S(j,i) * S(j,i) + else + accu_nd = accu_nd + S(j,i) * S(j,i) + endif + enddo + enddo + accu_nd = dsqrt(accu_nd) + + print*,'accu_nd = ',accu_nd + if( accu_nd .lt. 1d-10 ) then + ! L x R is already bi-orthogonal + !print *, ' L & T bi-orthogonality: ok' + return + else + print*,'PB with bi-orthonormality!!' + stop + endif +end + +subroutine lapack_diag_non_sym_new(n, A, WR, WI, VL, VR) + + BEGIN_DOC + ! + ! You enter with a general non hermitian matrix A(n,n) + ! + ! You get out with the real WR and imaginary part WI of the eigenvalues + ! + ! Eigvalue(n) = WR(n) + i * WI(n) + ! + ! And the left VL and right VR eigenvectors + ! + ! VL(i,j) = :: projection on the basis element |i> on the jth left eigenvector + ! + ! VR(i,j) = :: projection on the basis element |i> on the jth right eigenvector + ! + END_DOC + + implicit none + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + double precision, intent(out) :: WR(n), WI(n), VL(n,n), VR(n,n) + + character*1 :: JOBVL,JOBVR,BALANC,SENSE + integer :: ILO, IHI + integer :: lda, ldvl, ldvr, LWORK, INFO + double precision :: ABNRM + integer, allocatable :: IWORK(:) + double precision, allocatable :: WORK(:), SCALE_array(:), RCONDE(:), RCONDV(:) + double precision, allocatable :: Atmp(:,:) + + allocate( Atmp(n,n) ) + Atmp(1:n,1:n) = A(1:n,1:n) + + JOBVL = "V" ! computes the left eigenvectors + JOBVR = "V" ! computes the right eigenvectors + BALANC = "B" ! Diagonal scaling and Permutation for optimization + SENSE = "B" + lda = n + ldvl = n + ldvr = n + allocate(WORK(1),SCALE_array(n),RCONDE(n),RCONDV(n),IWORK(2*n-2)) + LWORK = -1 ! to ask for the optimal size of WORK + call dgeevx(BALANC,JOBVL,JOBVR,SENSE,& ! CHARACTERS + n,Atmp,lda, & ! MATRIX TO DIAGONALIZE + WR,WI, & ! REAL AND IMAGINARY PART OF EIGENVALUES + VL,ldvl,VR,ldvr, & ! LEFT AND RIGHT EIGENVECTORS + ILO,IHI,SCALE_array,ABNRM,RCONDE,RCONDV, & ! OUTPUTS OF OPTIMIZATION + WORK,LWORK,IWORK,INFO) + + !if(INFO.gt.0)then + ! print*,'dgeev failed !!',INFO + if( INFO.ne.0 ) then + print *, 'dgeevx failed !!', INFO + stop + endif + + LWORK = max(int(work(1)), 1) ! this is the optimal size of WORK + deallocate(WORK) + allocate(WORK(LWORK)) + ! Actual dnon_hrmt_real_diag_newiagonalization + call dgeevx(BALANC,JOBVL,JOBVR,SENSE,& ! CHARACTERS + n,Atmp,lda, & ! MATRIX TO DIAGONALIZE + WR,WI, & ! REAL AND IMAGINARY PART OF EIGENVALUES + VL,ldvl,VR,ldvr, & ! LEFT AND RIGHT EIGENVECTORS + ILO,IHI,SCALE_array,ABNRM,RCONDE,RCONDV, & ! OUTPUTS OF OPTIMIZATION + WORK,LWORK,IWORK,INFO) + + !if(INFO.ne.0)then + ! print*,'dgeev failed !!',INFO + if( INFO.ne.0 ) then + print *, 'dgeevx failed !!', INFO + stop + endif + + deallocate( Atmp ) + deallocate( WORK, SCALE_array, RCONDE, RCONDV, IWORK ) + +end subroutine lapack_diag_non_sym_new + +! --- + +subroutine lapack_diag_non_sym_right(n, A, WR, WI, VR) + + implicit none + + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + double precision, intent(out) :: WR(n), WI(n), VR(n,n) + + integer :: i, lda, ldvl, ldvr, LWORK, INFO + double precision, allocatable :: Atmp(:,:), WORK(:), VL(:,:) + + lda = n + ldvl = 1 + ldvr = n + + allocate( Atmp(n,n), VL(1,1) ) + Atmp(1:n,1:n) = A(1:n,1:n) + + allocate(WORK(1)) + LWORK = -1 + call dgeev('N', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO) + if(INFO.gt.0)then + print*,'dgeev failed !!',INFO + stop + endif + + LWORK = max(int(WORK(1)), 1) ! this is the optimal size of WORK + deallocate(WORK) + + allocate(WORK(LWORK)) + + ! Actual diagonalization + call dgeev('N', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO) + if(INFO.ne.0) then + print*,'dgeev failed !!', INFO + stop + endif + + deallocate(Atmp, WORK, VL) + +! print *, ' JOBL = F' +! print *, ' eigenvalues' +! do i = 1, n +! write(*, '(1000(F16.10,X))') WR(i), WI(i) +! enddo +! print *, ' right eigenvect' +! do i = 1, n +! write(*, '(1000(F16.10,X))') VR(:,i) +! enddo + +end subroutine lapack_diag_non_sym_right + +! --- + +subroutine non_hrmt_real_diag(n, A, leigvec, reigvec, n_real_eigv, eigval) + + BEGIN_DOC + ! + ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors + ! + ! of a non hermitian matrix A(n,n) + ! + ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" + ! + END_DOC + + implicit none + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + integer, intent(out) :: n_real_eigv + double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) + + integer :: i, j, n_good + double precision :: thr, threshold, accu_d, accu_nd + integer, allocatable :: list_good(:), iorder(:) + double precision, allocatable :: Aw(:,:) + double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:), S(:,:), S_inv_half_tmp(:,:) + + print*, ' Computing the left/right eigenvectors with lapack ...' + + ! Eigvalue(n) = WR(n) + i * WI(n) + allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) + Aw = A + call lapack_diag_non_sym(n, Aw, WR, WI, VL, VR) + + ! --- + ! You track the real eigenvalues + + thr = 1d-15 + + n_good = 0 + do i = 1, n + if(dabs(WI(i)).lt.thr) then + n_good += 1 + else + print*, ' Found an imaginary component to eigenvalue' + print*, ' Re(i) + Im(i)', WR(i), WI(i) + endif + enddo + + allocate(list_good(n_good), iorder(n_good)) + n_good = 0 + do i = 1, n + if(dabs(WI(i)).lt.thr) then + n_good += 1 + list_good(n_good) = i + eigval(n_good) = WR(i) + endif + enddo + n_real_eigv = n_good + do i = 1, n_good + iorder(i) = i + enddo + + ! You sort the real eigenvalues + call dsort(eigval, iorder, n_good) + do i = 1, n_real_eigv + do j = 1, n + reigvec(j,i) = VR(j,list_good(iorder(i))) + leigvec(j,i) = Vl(j,list_good(iorder(i))) + enddo + enddo + +! print *, ' ordered eigenvalues' +! print *, ' right eigenvect' +! do i = 1, n +! print *, i, eigval(i) +! write(*, '(1000(F16.10,X))') reigvec(:,i) +! enddo + + ! --- + + allocate( S(n_real_eigv,n_real_eigv), S_inv_half_tmp(n_real_eigv,n_real_eigv) ) + + ! S = VL x VR + call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n_real_eigv, 1.d0 & + , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & + , 0.d0, S, size(S, 1) ) + + accu_nd = 0.d0 + accu_d = 0.d0 + do i = 1, n_real_eigv + do j = 1, n_real_eigv + if(i==j) then + accu_d += S(j,i) + else + accu_nd = accu_nd + S(j,i) * S(j,i) + endif + enddo + enddo + accu_nd = dsqrt(accu_nd) + + threshold = 1.d-15 + if( (accu_nd .gt. threshold) .or. (dabs(accu_d-dble(n_real_eigv)) .gt. threshold) ) then + + print*, ' sum of off-diag S elements = ', accu_nd + print*, ' Should be zero ' + print*, ' sum of diag S elements = ', accu_d + print*, ' Should be ',n + print*, ' Not bi-orthonormal !!' + print*, ' Notice that if you are interested in ground state it is not a problem :)' + endif + +end subroutine non_hrmt_real_diag + +! --- + +subroutine lapack_diag_general_non_sym(n, A, B, WR, WI, VL, VR) + + BEGIN_DOC + ! You enter with a general non hermitian matrix A(n,n) and another B(n,n) + ! + ! You get out with the real WR and imaginary part WI of the eigenvalues + ! + ! Eigvalue(n) = (WR(n) + i * WI(n)) + ! + ! And the left VL and right VR eigenvectors + ! + ! VL(i,j) = :: projection on the basis element |i> on the jth left eigenvector + ! + ! VR(i,j) = :: projection on the basis element |i> on the jth right eigenvector + END_DOC + + implicit none + integer, intent(in) :: n + double precision, intent(in) :: A(n,n), B(n,n) + double precision, intent(out) :: WR(n), WI(n), VL(n,n), VR(n,n) + + integer :: lda, ldvl, ldvr, LWORK, INFO + integer :: n_good + double precision, allocatable :: WORK(:) + double precision, allocatable :: Atmp(:,:) + + lda = n + ldvl = n + ldvr = n + + allocate( Atmp(n,n) ) + Atmp(1:n,1:n) = A(1:n,1:n) + + allocate(WORK(1)) + LWORK = -1 + call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO) + if(INFO.gt.0) then + print*,'dgeev failed !!',INFO + stop + endif + + LWORK = max(int(WORK(1)), 1) + deallocate(WORK) + + allocate(WORK(LWORK)) + + call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO) + if(INFO.ne.0) then + print*,'dgeev failed !!', INFO + stop + endif + + deallocate( WORK, Atmp ) + +end subroutine lapack_diag_general_non_sym + +! --- + +subroutine non_hrmt_general_real_diag(n, A, B, reigvec, leigvec, n_real_eigv, eigval) + + BEGIN_DOC + ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors + ! + ! of a non hermitian matrix A(n,n) and B(n,n) + ! + ! A reigvec = eigval * B * reigvec + ! + ! (A)^\dagger leigvec = eigval * B * leigvec + ! + ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" + END_DOC + + implicit none + integer, intent(in) :: n + double precision, intent(in) :: A(n,n), B(n,n) + integer, intent(out) :: n_real_eigv + double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) + + integer :: i, j + integer :: n_good + integer, allocatable :: list_good(:), iorder(:) + double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:) + double precision, allocatable :: Aw(:,:), Bw(:,:) + + print*,'Computing the left/right eigenvectors ...' + + allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), Bw(n,n)) + Aw = A + Bw = B + + call lapack_diag_general_non_sym(n, A, B, WR, WI, VL, VR) + + ! You track the real eigenvalues + n_good = 0 + do i = 1, n + if(dabs(WI(i)) .lt. 1.d-12) then + n_good += 1 + else + print*,'Found an imaginary component to eigenvalue' + print*,'Re(i) + Im(i)',WR(i),WI(i) + endif + enddo + + allocate(list_good(n_good), iorder(n_good)) + n_good = 0 + do i = 1, n + if(dabs(WI(i)).lt.1.d-12)then + n_good += 1 + list_good(n_good) = i + eigval(n_good) = WR(i) + endif + enddo + n_real_eigv = n_good + do i = 1, n_good + iorder(i) = i + enddo + + ! You sort the real eigenvalues + call dsort(eigval, iorder, n_good) + print*,'n_real_eigv = ', n_real_eigv + print*,'n = ', n + do i = 1, n_real_eigv + print*,i,'eigval(i) = ', eigval(i) + do j = 1, n + reigvec(j,i) = VR(j,list_good(iorder(i))) + leigvec(j,i) = Vl(j,list_good(iorder(i))) + enddo + enddo + +end subroutine non_hrmt_general_real_diag + +! --- + +subroutine impose_biorthog_qr(m, n, Vl, Vr) + + implicit none + integer, intent(in) :: m, n + double precision, intent(inout) :: Vl(m,n), Vr(m,n) + + integer :: i, j + integer :: LWORK, INFO + double precision :: accu_nd, accu_d, thr_nd, thr_d + double precision, allocatable :: TAU(:), WORK(:) + double precision, allocatable :: S(:,:), R(:,:), tmp(:,:) + + ! --- + + call check_biorthog_binormalize(m, n, Vl, Vr, .false.) + + ! --- + + allocate(S(n,n)) + call dgemm( 'T', 'N', n, n, m, 1.d0 & + , Vl, size(Vl, 1), Vr, size(Vr, 1) & + , 0.d0, S, size(S, 1) ) + + accu_nd = 0.d0 + accu_d = 0.d0 + do i = 1, n + do j = 1, n + if(i==j) then + accu_d += S(j,i) + else + accu_nd = accu_nd + S(j,i) * S(j,i) + endif + enddo + enddo + accu_nd = dsqrt(accu_nd) + + thr_d = 1d-10 + thr_nd = 1d-12 + if((accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n)) .lt. thr_d)) then + print *, ' bi-orthogonal vectors without QR !' + deallocate(S) + return + endif + + ! ------------------------------------------------------------------------------------- + ! QR factorization of S: S = Q x R + + + print *, ' apply QR decomposition ...' + + allocate( TAU(n), WORK(1) ) + + LWORK = -1 + call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO) + if(INFO .ne. 0) then + print*,'dgeqrf failed !!', INFO + stop + endif + + LWORK = max(n, int(WORK(1))) + deallocate(WORK) + + allocate( WORK(LWORK) ) + call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO) + if(INFO .ne. 0) then + print*,'dgeqrf failed !!', INFO + stop + endif + + ! save the upper triangular R + allocate( R(n,n) ) + R(:,:) = S(:,:) + + ! get Q + LWORK = -1 + call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO) + if(INFO .ne. 0) then + print*,'dorgqr failed !!', INFO + stop + endif + + LWORK = max(n, int(WORK(1))) + deallocate(WORK) + + allocate( WORK(LWORK) ) + call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO) + if(INFO .ne. 0) then + print*,'dorgqr failed !!', INFO + stop + endif + + deallocate( WORK, TAU ) + + ! + ! ------------------------------------------------------------------------------------- + + ! --- + + ! ------------------------------------------------------------------------------------- + ! get bi-orhtog left & right vectors: + ! Vr' = Vr x inv(R) + ! Vl' = inv(Q) x Vl = Q.T x Vl + + ! Q.T x Vl, where Q = S + + allocate( tmp(n,m) ) + call dgemm( 'T', 'T', n, m, n, 1.d0 & + , S, size(S, 1), Vl, size(Vl, 1) & + , 0.d0, tmp, size(tmp, 1) ) + + do i = 1, n + do j = 1, m + Vl(j,i) = tmp(i,j) + enddo + enddo + deallocate(tmp) + + ! --- + + ! inv(R) + !print *, ' inversing upper triangular matrix ...' + call dtrtri("U", "N", n, R, n, INFO) + if(INFO .ne. 0) then + print*,'dtrtri failed !!', INFO + stop + endif + !print *, ' inversing upper triangular matrix OK' + + do i = 1, n-1 + do j = i+1, n + R(j,i) = 0.d0 + enddo + enddo + + !print *, ' inv(R):' + !do i = 1, n + ! write(*, '(1000(F16.10,X))') R(i,:) + !enddo + + ! Vr x inv(R) + allocate( tmp(m,n) ) + call dgemm( 'N', 'N', m, n, n, 1.d0 & + , Vr, size(Vr, 1), R, size(R, 1) & + , 0.d0, tmp, size(tmp, 1) ) + deallocate( R ) + + do i = 1, n + do j = 1, m + Vr(j,i) = tmp(j,i) + enddo + enddo + deallocate(tmp) + + return +end subroutine impose_biorthog_qr + +! --- + +subroutine impose_biorthog_lu(m, n, Vl, Vr, S) + + implicit none + integer, intent(in) :: m, n + double precision, intent(inout) :: Vl(m,n), Vr(m,n), S(n,n) + + integer :: i, j + integer :: INFO + double precision :: nrm + integer, allocatable :: IPIV(:) + double precision, allocatable :: L(:,:), tmp(:,:), vectmp(:) + !double precision, allocatable :: T(:,:), ll(:,:), rr(:,:), tt(:,:) + + !allocate( T(n,n) ) + !T(:,:) = S(:,:) + + print *, ' apply LU decomposition ...' + + ! ------------------------------------------------------------------------------------- + ! LU factorization of S: S = P x L x U + + allocate( IPIV(n) ) + + call dgetrf(n, n, S, n, IPIV, INFO) + if(INFO .ne. 0) then + print*, 'dgetrf failed !!', INFO + stop + endif + + ! check | S - P x L x U | + !allocate( ll(n,n), rr(n,n), tmp(n,n) ) + !ll = S + !rr = S + !do i = 1, n-1 + ! ll(i,i) = 1.d0 + ! do j = i+1, n + ! ll(i,j) = 0.d0 + ! rr(j,i) = 0.d0 + ! enddo + !enddo + !ll(n,n) = 1.d0 + !call dgemm( 'N', 'N', n, n, n, 1.d0 & + ! , ll, size(ll, 1), rr, size(rr, 1) & + ! , 0.d0, tmp, size(tmp, 1) ) + ! deallocate(ll, rr) + !allocate( vectmp(n) ) + !do j = n-1, 1, -1 + ! i = IPIV(j) + ! if(i.ne.j) then + ! print *, j, i + ! vectmp(:) = tmp(i,:) + ! tmp(i,:) = tmp(j,:) + ! tmp(j,:) = vectmp(:) + ! endif + !enddo + !deallocate( vectmp ) + !nrm = 0.d0 + !do i = 1, n + ! do j = 1, n + ! nrm += dabs(tmp(j,i) - T(j,i)) + ! enddo + !enddo + !deallocate( tmp ) + !print*, '|L.T x R - S| =', nrm + !stop + + ! ------ + ! inv(L) + ! ------ + + allocate( L(n,n) ) + L(:,:) = S(:,:) + + call dtrtri("L", "U", n, L, n, INFO) + if(INFO .ne. 0) then + print*, 'dtrtri failed !!', INFO + stop + endif + do i = 1, n-1 + L(i,i) = 1.d0 + do j = i+1, n + L(i,j) = 0.d0 + enddo + enddo + L(n,n) = 1.d0 + + ! ------ + ! inv(U) + ! ------ + + call dtrtri("U", "N", n, S, n, INFO) + if(INFO .ne. 0) then + print*, 'dtrtri failed !!', INFO + stop + endif + + do i = 1, n-1 + do j = i+1, n + S(j,i) = 0.d0 + enddo + enddo + + ! + ! ------------------------------------------------------------------------------------- + + ! --- + + ! ------------------------------------------------------------------------------------- + ! get bi-orhtog left & right vectors: + ! Vr' = Vr x inv(U) + ! Vl' = inv(L) x inv(P) x Vl + + ! inv(P) x Vl + allocate( vectmp(n) ) + do j = n-1, 1, -1 + i = IPIV(j) + if(i.ne.j) then + vectmp(:) = L(:,j) + L(:,j) = L(:,i) + L(:,i) = vectmp(:) + endif + enddo + deallocate( vectmp ) + + ! Vl' + allocate( tmp(m,n) ) + call dgemm( 'N', 'T', m, n, n, 1.d0 & + , Vl, size(Vl, 1), L, size(L, 1) & + , 0.d0, tmp, size(tmp, 1) ) + deallocate(L) + + Vl = tmp + deallocate(tmp) + + ! --- + + ! Vr x inv(U) + allocate( tmp(m,n) ) + call dgemm( 'N', 'N', m, n, n, 1.d0 & + , Vr, size(Vr, 1), S, size(S, 1) & + , 0.d0, tmp, size(tmp, 1) ) + Vr = tmp + deallocate(tmp) + + !allocate( tmp(n,n) ) + !call dgemm( 'T', 'N', n, n, m, 1.d0 & + ! , Vl, size(Vl, 1), Vr, size(Vr, 1) & + ! , 0.d0, tmp, size(tmp, 1) ) + !nrm = 0.d0 + !do i = 1, n + ! do j = 1, n + ! nrm += dabs(tmp(j,i)) + ! enddo + !enddo + !deallocate( tmp ) + !print*, '|L.T x R| =', nrm + !stop + + return +end subroutine impose_biorthog_lu + +! --- + +subroutine check_EIGVEC(n, m, A, eigval, leigvec, reigvec, thr_diag, thr_norm, stop_ifnot) + + implicit none + integer, intent(in) :: n, m + logical, intent(in) :: stop_ifnot + double precision, intent(in) :: A(n,n), eigval(m), leigvec(n,m), reigvec(n,m), thr_diag, thr_norm + + integer :: i, j + double precision :: tmp, tmp_abs, tmp_nrm, tmp_rel, tmp_dif + double precision :: V_nrm, U_nrm + double precision, allocatable :: Mtmp(:,:) + + allocate( Mtmp(n,m) ) + + ! --- + + Mtmp = 0.d0 + call dgemm( 'N', 'N', n, m, n, 1.d0 & + , A, size(A, 1), reigvec, size(reigvec, 1) & + , 0.d0, Mtmp, size(Mtmp, 1) ) + + V_nrm = 0.d0 + tmp_nrm = 0.d0 + tmp_abs = 0.d0 + do j = 1, m + + tmp = 0.d0 + U_nrm = 0.d0 + do i = 1, n + tmp = tmp + dabs(Mtmp(i,j) - eigval(j) * reigvec(i,j)) + tmp_nrm = tmp_nrm + dabs(Mtmp(i,j)) + U_nrm = U_nrm + reigvec(i,j) * reigvec(i,j) + enddo + + tmp_abs = tmp_abs + tmp + V_nrm = V_nrm + U_nrm + print *, j, tmp, U_nrm + + enddo + + tmp_rel = tmp_abs / tmp_nrm + tmp_dif = dabs(V_nrm - dble(m)) + + if( stop_ifnot .and. ((tmp_rel .gt. thr_diag) .or. (tmp_dif .gt. thr_norm)) ) then + print *, ' error in right-eigenvectors' + print *, ' err estim = ', tmp_abs, tmp_rel + print *, ' CR norm = ', V_nrm + stop + endif + + ! --- + + Mtmp = 0.d0 + call dgemm( 'T', 'N', n, m, n, 1.d0 & + , A, size(A, 1), leigvec, size(leigvec, 1) & + , 0.d0, Mtmp, size(Mtmp, 1) ) + + V_nrm = 0.d0 + tmp_nrm = 0.d0 + tmp_abs = 0.d0 + do j = 1, m + + tmp = 0.d0 + U_nrm = 0.d0 + do i = 1, n + tmp = tmp + dabs(Mtmp(i,j) - eigval(j) * leigvec(i,j)) + tmp_nrm = tmp_nrm + dabs(Mtmp(i,j)) + U_nrm = U_nrm + leigvec(i,j) * leigvec(i,j) + enddo + + tmp_abs = tmp_abs + tmp + V_nrm = V_nrm + U_nrm + print *, j, tmp, U_nrm + + enddo + + tmp_rel = tmp_abs / tmp_nrm + if( stop_ifnot .and. ((tmp_rel .gt. thr_diag) .or. (tmp_dif .gt. thr_norm)) ) then + print *, ' error in left-eigenvectors' + print *, ' err estim = ', tmp_abs, tmp_rel + print *, ' CR norm = ', V_nrm + stop + endif + + ! --- + + deallocate( Mtmp ) + +end subroutine check_EIGVEC + +! --- + +subroutine check_degen(n, m, eigval, leigvec, reigvec) + + implicit none + integer, intent(in) :: n, m + double precision, intent(in) :: eigval(m) + double precision, intent(inout) :: leigvec(n,m), reigvec(n,m) + + integer :: i, j + double precision :: ei, ej, de, de_thr, accu_nd + double precision, allocatable :: S(:,:) + + de_thr = 1d-7 + + do i = 1, m-1 + ei = eigval(i) + + do j = i+1, m + ej = eigval(j) + de = dabs(ei - ej) + + if(de .lt. de_thr) then + + leigvec(:,i) = 0.d0 + leigvec(:,j) = 0.d0 + leigvec(i,i) = 1.d0 + leigvec(j,j) = 1.d0 + + reigvec(:,i) = 0.d0 + reigvec(:,j) = 0.d0 + reigvec(i,i) = 1.d0 + reigvec(j,j) = 1.d0 + + endif + + enddo + enddo + + ! --- + + allocate( S(m,m) ) + + ! S = VL x VR + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & + , 0.d0, S, size(S, 1) ) + + accu_nd = 0.d0 + do i = 1, m + do j = 1, m + if(i==j) cycle + accu_nd = accu_nd + S(j,i) * S(j,i) + enddo + enddo + accu_nd = dsqrt(accu_nd) + + deallocate( S ) + + print *, ' check_degen: L & T bi-orthogonality: ok' + print *, ' accu_nd = ', accu_nd + + if( accu_nd .lt. 1d-8 ) then + return + else + stop + endif + +end subroutine check_degen + +! --- + +subroutine impose_weighted_orthog_svd(n, m, W, C) + + implicit none + + integer, intent(in) :: n, m + double precision, intent(inout) :: C(n,m), W(n,n) + + integer :: i, j, num_linear_dependencies + double precision :: threshold + double precision, allocatable :: S(:,:), tmp(:,:) + double precision, allocatable :: U(:,:), Vt(:,:), D(:) + + print *, ' apply SVD to orthogonalize vectors' + + ! --- + + ! C.T x W x C + allocate(S(m,m)) + allocate(tmp(m,n)) + call dgemm( 'T', 'N', m, n, n, 1.d0 & + , C, size(C, 1), W, size(W, 1) & + , 0.d0, tmp, size(tmp, 1) ) + call dgemm( 'N', 'N', m, m, n, 1.d0 & + , tmp, size(tmp, 1), C, size(C, 1) & + , 0.d0, S, size(S, 1) ) + deallocate(tmp) + + print *, ' eigenvec overlap bef SVD: ' + do i = 1, m + write(*, '(1000(F16.10,X))') S(i,:) + enddo + + ! --- + + allocate(U(m,m), Vt(m,m), D(m)) + + call svd(S, m, U, m, D, Vt, m, m, m) + + deallocate(S) + + threshold = 1.d-6 + num_linear_dependencies = 0 + do i = 1, m + if(abs(D(i)) <= threshold) then + D(i) = 0.d0 + num_linear_dependencies += 1 + else + ASSERT (D(i) > 0.d0) + D(i) = 1.d0 / dsqrt(D(i)) + endif + enddo + if(num_linear_dependencies > 0) then + write(*,*) ' linear dependencies = ', num_linear_dependencies + write(*,*) ' m = ', m + stop + endif + + ! --- + + allocate(tmp(n,m)) + + ! tmp <-- C x U + call dgemm( 'N', 'N', n, m, m, 1.d0 & + , C, size(C, 1), U, size(U, 1) & + , 0.d0, tmp, size(tmp, 1) ) + + deallocate(U, Vt) + + ! C <-- tmp x sigma^-0.5 + do j = 1, m + do i = 1, n + C(i,j) = tmp(i,j) * D(j) + enddo + enddo + + deallocate(D, tmp) + + ! --- + + ! C.T x W x C + allocate(S(m,m)) + allocate(tmp(m,n)) + call dgemm( 'T', 'N', m, n, n, 1.d0 & + , C, size(C, 1), W, size(W, 1) & + , 0.d0, tmp, size(tmp, 1) ) + call dgemm( 'N', 'N', m, m, n, 1.d0 & + , tmp, size(tmp, 1), C, size(C, 1) & + , 0.d0, S, size(S, 1) ) + deallocate(tmp) + + print *, ' eigenvec overlap aft SVD: ' + do i = 1, m + write(*, '(1000(F16.10,X))') S(i,:) + enddo + + deallocate(S) + + ! --- + +end subroutine impose_weighted_orthog_svd + +! --- + +subroutine impose_orthog_svd(n, m, C) + + implicit none + + integer, intent(in) :: n, m + double precision, intent(inout) :: C(n,m) + + integer :: i, j, num_linear_dependencies + double precision :: threshold + double precision, allocatable :: S(:,:), tmp(:,:) + double precision, allocatable :: U(:,:), Vt(:,:), D(:) + + print *, ' apply SVD to orthogonalize vectors' + + ! --- + + allocate(S(m,m)) + + ! S = C.T x C + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , C, size(C, 1), C, size(C, 1) & + , 0.d0, S, size(S, 1) ) + + print *, ' eigenvec overlap bef SVD: ' + do i = 1, m + write(*, '(1000(F16.10,X))') S(i,:) + enddo + + ! --- + + allocate(U(m,m), Vt(m,m), D(m)) + + call svd(S, m, U, m, D, Vt, m, m, m) + + deallocate(S) + + threshold = 1.d-6 + num_linear_dependencies = 0 + do i = 1, m + if(abs(D(i)) <= threshold) then + D(i) = 0.d0 + num_linear_dependencies += 1 + else + ASSERT (D(i) > 0.d0) + D(i) = 1.d0 / dsqrt(D(i)) + endif + enddo + if(num_linear_dependencies > 0) then + write(*,*) ' linear dependencies = ', num_linear_dependencies + write(*,*) ' m = ', m + stop + endif + + ! --- + + allocate(tmp(n,m)) + + ! tmp <-- C x U + call dgemm( 'N', 'N', n, m, m, 1.d0 & + , C, size(C, 1), U, size(U, 1) & + , 0.d0, tmp, size(tmp, 1) ) + + deallocate(U, Vt) + + ! C <-- tmp x sigma^-0.5 + do j = 1, m + do i = 1, n + C(i,j) = tmp(i,j) * D(j) + enddo + enddo + + deallocate(D, tmp) + + ! --- + + allocate(S(m,m)) + + ! S = C.T x C + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , C, size(C, 1), C, size(C, 1) & + , 0.d0, S, size(S, 1) ) + + print *, ' eigenvec overlap aft SVD: ' + do i = 1, m + write(*, '(1000(F16.10,X))') S(i,:) + enddo + + deallocate(S) + + ! --- + +end subroutine impose_orthog_svd + +! --- + +subroutine impose_orthog_GramSchmidt(n, m, C) + + implicit none + + integer, intent(in) :: n, m + double precision, intent(inout) :: C(n,m) + + integer :: i, j, k + double precision :: Ojk, Ojj, fact_ct + double precision, allocatable :: S(:,:) + + print *, '' + print *, ' apply Gram-Schmidt to orthogonalize vectors' + print *, '' + + ! --- + + allocate(S(m,m)) + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , C, size(C, 1), C, size(C, 1) & + , 0.d0, S, size(S, 1) ) + + print *, ' eigenvec overlap bef Gram-Schmidt: ' + do i = 1, m + write(*, '(1000(F16.10,X))') S(i,:) + enddo + + ! --- + + do k = 2, m + do j = 1, k-1 + + Ojk = 0.d0 + Ojj = 0.d0 + do i = 1, n + Ojk = Ojk + C(i,j) * C(i,k) + Ojj = Ojj + C(i,j) * C(i,j) + enddo + fact_ct = Ojk / Ojj + + do i = 1, n + C(i,k) = C(i,k) - fact_ct * C(i,j) + enddo + + enddo + enddo + + do k = 1, m + fact_ct = 0.d0 + do i = 1, n + fact_ct = fact_ct + C(i,k) * C(i,k) + enddo + fact_ct = dsqrt(fact_ct) + do i = 1, n + C(i,k) = C(i,k) / fact_ct + enddo + enddo + + ! --- + + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , C, size(C, 1), C, size(C, 1) & + , 0.d0, S, size(S, 1) ) + + print *, ' eigenvec overlap aft Gram-Schmidt: ' + do i = 1, m + write(*, '(1000(F16.10,X))') S(i,:) + enddo + + deallocate(S) + + ! --- + +end subroutine impose_orthog_GramSchmidt + +! --- + +subroutine impose_orthog_ones(n, deg_num, C) + + + implicit none + + integer, intent(in) :: n + integer, intent(in) :: deg_num(n) + double precision, intent(inout) :: C(n,n) + + integer :: i, j, ii, di, dj + + print *, '' + print *, ' orthogonalize vectors by hand' + print *, '' + + do i = 1, n-1 + di = deg_num(i) + + if(di .gt. 1) then + + do ii = 1, di + C(: ,i+ii-1) = 0.d0 + C(i+ii-1,i+ii-1) = 1.d0 + enddo + + do j = i+di+1, n + dj = deg_num(j) + if(dj .eq. di) then + do ii = 1, dj + C(:, j+ii-1) = 0.d0 + C(j+ii-1,j+ii-1) = 1.d0 + enddo + endif + enddo + + endif + enddo + +end subroutine impose_orthog_ones + +! --- + +subroutine impose_orthog_degen_eigvec(n, e0, C0) + + implicit none + + integer, intent(in) :: n + double precision, intent(in) :: e0(n) + double precision, intent(inout) :: C0(n,n) + + integer :: i, j, k, m + double precision :: ei, ej, de, de_thr + integer, allocatable :: deg_num(:) + double precision, allocatable :: C(:,:) + + ! --- + + allocate( deg_num(n) ) + do i = 1, n + deg_num(i) = 1 + enddo + + de_thr = 1d-10 + + do i = 1, n-1 + ei = e0(i) + + ! already considered in degen vectors + if(deg_num(i).eq.0) cycle + + do j = i+1, n + ej = e0(j) + de = dabs(ei - ej) + + if(de .lt. de_thr) then + deg_num(i) = deg_num(i) + 1 + deg_num(j) = 0 + endif + + enddo + enddo + + + do i = 1, n + if(deg_num(i).gt.1) then + print *, ' degen on', i, deg_num(i) + endif + enddo + + ! --- + +! call impose_orthog_ones(n, deg_num, C0) + + do i = 1, n + m = deg_num(i) + + if(m .gt. 1) then + !if(m.eq.3) then + + allocate(C(n,m)) + do j = 1, m + C(1:n,j) = C0(1:n,i+j-1) + enddo + + ! --- + + ! C <= C U sigma^-0.5 + call impose_orthog_svd(n, m, C) + + ! --- + + ! C = I + !C = 0.d0 + !do j = 1, m + ! C(i+j-1,j) = 1.d0 + !enddo + + ! --- + +! call impose_orthog_GramSchmidt(n, m, C) + + ! --- + + do j = 1, m + C0(1:n,i+j-1) = C(1:n,j) + enddo + deallocate(C) + + endif + enddo + +end subroutine impose_orthog_degen_eigvec + +! --- + +subroutine get_halfinv_svd(n, S) + + implicit none + + integer, intent(in) :: n + double precision, intent(inout) :: S(n,n) + + integer :: num_linear_dependencies + integer :: i, j, k + double precision :: accu_d, accu_nd, thresh + double precision, parameter :: threshold = 1.d-6 + double precision, allocatable :: U(:,:), Vt(:,:), D(:) + double precision, allocatable :: S0(:,:), Stmp(:,:), Stmp2(:,:) + + allocate( S0(n,n) ) + S0(1:n,1:n) = S(1:n,1:n) + + allocate(U(n,n), Vt(n,n), D(n)) + call svd(S, n, U, n, D, Vt, n, n, n) + + num_linear_dependencies = 0 + do i = 1, n + if(abs(D(i)) <= threshold) then + D(i) = 0.d0 + num_linear_dependencies += 1 + else + ASSERT (D(i) > 0.d0) + D(i) = 1.d0 / dsqrt(D(i)) + endif + enddo + write(*,*) ' linear dependencies', num_linear_dependencies + + S(:,:) = 0.d0 + do k = 1, n + if(D(k) /= 0.d0) then + do j = 1, n + do i = 1, n + S(i,j) = S(i,j) + U(i,k) * D(k) * Vt(k,j) + enddo + enddo + endif + enddo + deallocate(U, D, Vt) + + allocate( Stmp(n,n), Stmp2(n,n) ) + Stmp = 0.d0 + Stmp2 = 0.d0 + ! S^-1/2 x S + call dgemm( 'N', 'N', n, n, n, 1.d0 & + , S, size(S, 1), S0, size(S0, 1) & + , 0.d0, Stmp, size(Stmp, 1) ) + ! ( S^-1/2 x S ) x S^-1/2 + call dgemm( 'N', 'N', n, n, n, 1.d0 & + , Stmp, size(Stmp, 1), S, size(S, 1) & + , 0.d0, Stmp2, size(Stmp2, 1) ) + + accu_nd = 0.d0 + accu_d = 0.d0 + thresh = 1.d-10 + do i = 1, n + do j = 1, n + if(i==j) then + accu_d += Stmp2(j,i) + else + accu_nd = accu_nd + Stmp2(j,i) * Stmp2(j,i) + endif + enddo + enddo + accu_nd = dsqrt(accu_nd) + if( accu_nd.gt.thresh .or. dabs(accu_d-dble(n)).gt.thresh) then + print*, ' after S^-1/2: sum of off-diag S elements = ', accu_nd + print*, ' after S^-1/2: sum of diag S elements = ', accu_d + do i = 1, n + write(*,'(1000(F16.10,X))') Stmp2(i,:) + enddo + stop + endif + + deallocate(S0, Stmp, Stmp2) + +end subroutine get_halfinv_svd + +! --- + +subroutine check_biorthog_binormalize(n, m, Vl, Vr, stop_ifnot) + + implicit none + + integer, intent(in) :: n, m + logical, intent(in) :: stop_ifnot + double precision, intent(inout) :: Vl(n,m), Vr(n,m) + + integer :: i, j + double precision :: thr_d, thr_nd + double precision :: accu_d, accu_nd, s_tmp + double precision, allocatable :: S(:,:) + + thr_d = 1d-6 + thr_nd = 1d-7 + + print *, ' check bi-orthonormality' + + ! --- + + allocate(S(m,m)) + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , Vl, size(Vl, 1), Vr, size(Vr, 1) & + , 0.d0, S, size(S, 1) ) + !print *, ' overlap matrix before:' + !do i = 1, m + ! write(*,'(1000(F16.10,X))') S(i,:) + !enddo + + ! S(i,i) = -1 + do i = 1, m + if( (S(i,i) + 1.d0) .lt. thr_d ) then + do j = 1, n + Vl(j,i) = -1.d0 * Vl(j,i) + enddo + S(i,i) = 1.d0 + endif + enddo + + accu_d = 0.d0 + accu_nd = 0.d0 + do i = 1, m + do j = 1, m + if(i==j) then + accu_d = accu_d + S(i,i) + else + accu_nd = accu_nd + S(j,i) * S(j,i) + endif + enddo + enddo + accu_nd = dsqrt(accu_nd) + print*, ' diag acc: ', accu_d + print*, ' nondiag acc: ', accu_nd + + ! --- + + if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then + + do i = 1, m + print *, i, S(i,i) + if(dabs(S(i,i) - 1.d0) .gt. thr_d) then + s_tmp = 1.d0 / dsqrt(S(i,i)) + do j = 1, n + Vl(j,i) = Vl(j,i) * s_tmp + Vr(j,i) = Vr(j,i) * s_tmp + enddo + endif + enddo + + endif + + ! --- + + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , Vl, size(Vl, 1), Vr, size(Vr, 1) & + , 0.d0, S, size(S, 1) ) + !print *, ' overlap matrix after:' + !do i = 1, m + ! write(*,'(1000(F16.10,X))') S(i,:) + !enddo + + accu_d = 0.d0 + accu_nd = 0.d0 + do i = 1, m + do j = 1, m + if(i==j) then + accu_d = accu_d + S(i,i) + else + accu_nd = accu_nd + S(j,i) * S(j,i) + endif + enddo + enddo + accu_nd = dsqrt(accu_nd) + print *, ' diag acc: ', accu_d + print *, ' nondiag acc: ', accu_nd + + deallocate(S) + + ! --- + + if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d)) ) then + print *, accu_nd, thr_nd + print *, dabs(accu_d-dble(m))/dble(m), thr_d + print *, ' biorthog_binormalize failed !' + stop + endif + +end subroutine check_biorthog_binormalize + +! --- + +subroutine check_weighted_biorthog(n, m, W, Vl, Vr, accu_d, accu_nd, S, stop_ifnot) + + implicit none + + integer, intent(in) :: n, m + double precision, intent(in) :: Vl(n,m), Vr(n,m), W(n,n) + logical, intent(in) :: stop_ifnot + double precision, intent(out) :: accu_d, accu_nd, S(m,m) + + integer :: i, j + double precision :: thr_d, thr_nd + double precision, allocatable :: SS(:,:), tmp(:,:) + + thr_d = 1d-6 + thr_nd = 1d-10 + + print *, ' check weighted bi-orthogonality' + + ! --- + + allocate(tmp(m,n)) + call dgemm( 'T', 'N', m, n, n, 1.d0 & + , Vl, size(Vl, 1), W, size(W, 1) & + , 0.d0, tmp, size(tmp, 1) ) + call dgemm( 'N', 'N', m, m, n, 1.d0 & + , tmp, size(tmp, 1), Vr, size(Vr, 1) & + , 0.d0, S, size(S, 1) ) + deallocate(tmp) + + print *, ' overlap matrix:' + do i = 1, m + write(*,'(1000(F16.10,X))') S(i,:) + enddo + + accu_d = 0.d0 + accu_nd = 0.d0 + do i = 1, m + do j = 1, m + if(i==j) then + accu_d = accu_d + dabs(S(i,i)) + else + accu_nd = accu_nd + S(j,i) * S(j,i) + endif + enddo + enddo + accu_nd = dsqrt(accu_nd) + + print *, ' accu_nd = ', accu_nd + print *, ' accu_d = ', dabs(accu_d-dble(m))/dble(m) + + ! --- + + if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then + print *, ' non bi-orthogonal vectors !' + print *, ' accu_nd = ', accu_nd + print *, ' accu_d = ', dabs(accu_d-dble(m))/dble(m) + !print *, ' overlap matrix:' + !do i = 1, m + ! write(*,'(1000(F16.10,X))') S(i,:) + !enddo + stop + endif + +end subroutine check_weighted_biorthog + +! --- + +subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, stop_ifnot) + + implicit none + + integer, intent(in) :: n, m + double precision, intent(in) :: Vl(n,m), Vr(n,m) + logical, intent(in) :: stop_ifnot + double precision, intent(out) :: accu_d, accu_nd, S(m,m) + + integer :: i, j + double precision :: thr_d, thr_nd + double precision, allocatable :: SS(:,:) + + thr_d = 1d-6 + thr_nd = 1d-10 + + print *, ' check bi-orthogonality' + + ! --- + + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , Vl, size(Vl, 1), Vr, size(Vr, 1) & + , 0.d0, S, size(S, 1) ) + print *, ' overlap matrix:' + do i = 1, m + write(*,'(1000(F16.10,X))') S(i,:) + enddo + + accu_d = 0.d0 + accu_nd = 0.d0 + do i = 1, m + do j = 1, m + if(i==j) then + accu_d = accu_d + dabs(S(i,i)) + else + accu_nd = accu_nd + S(j,i) * S(j,i) + endif + enddo + enddo + accu_nd = dsqrt(accu_nd) + + print *, ' accu_nd = ', accu_nd + print *, ' accu_d = ', dabs(accu_d-dble(m))/dble(m) + + ! --- + + if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then + print *, ' non bi-orthogonal vectors !' + print *, ' accu_nd = ', accu_nd + print *, ' accu_d = ', dabs(accu_d-dble(m))/dble(m) + !print *, ' overlap matrix:' + !do i = 1, m + ! write(*,'(1000(F16.10,X))') S(i,:) + !enddo + stop + endif + +end subroutine check_biorthog + +! --- + +subroutine check_orthog(n, m, V, accu_d, accu_nd, S) + + implicit none + + integer, intent(in) :: n, m + double precision, intent(in) :: V(n,m) + double precision, intent(out) :: accu_d, accu_nd, S(m,m) + + integer :: i, j + + S = 0.d0 + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , V, size(V, 1), V, size(V, 1) & + , 0.d0, S, size(S, 1) ) + + print *, '' + print *, ' overlap matrix:' + do i = 1, m + write(*,'(1000(F16.10,X))') S(i,:) + enddo + print *, '' + + accu_d = 0.d0 + accu_nd = 0.d0 + do i = 1, m + do j = 1, m + if(i==j) then + accu_d = accu_d + dabs(S(i,i)) + else + accu_nd = accu_nd + S(j,i) * S(j,i) + endif + enddo + enddo + accu_nd = dsqrt(accu_nd) + + !print*, ' diag acc: ', accu_d + !print*, ' nondiag acc: ', accu_nd + +end subroutine check_orthog + +! --- + +subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0) + + implicit none + + integer, intent(in) :: n + double precision, intent(in) :: e0(n) + double precision, intent(inout) :: L0(n,n), R0(n,n) + + logical :: complex_root + integer :: i, j, k, m + double precision :: ei, ej, de, de_thr + double precision :: accu_d, accu_nd + integer, allocatable :: deg_num(:) + double precision, allocatable :: L(:,:), R(:,:), S(:,:), S_inv_half(:,:) + + ! --- + + allocate( deg_num(n) ) + do i = 1, n + deg_num(i) = 1 + enddo + + de_thr = 1d-10 + + do i = 1, n-1 + ei = e0(i) + + ! already considered in degen vectors + if(deg_num(i).eq.0) cycle + + do j = i+1, n + ej = e0(j) + de = dabs(ei - ej) + + if(de .lt. de_thr) then + deg_num(i) = deg_num(i) + 1 + deg_num(j) = 0 + endif + + enddo + enddo + + do i = 1, n + if(deg_num(i).gt.1) then + print *, ' degen on', i, deg_num(i) + endif + enddo + + ! --- + + do i = 1, n + m = deg_num(i) + + if(m .gt. 1) then + + allocate(L(n,m)) + allocate(R(n,m)) + + do j = 1, m + L(1:n,j) = L0(1:n,i+j-1) + R(1:n,j) = R0(1:n,i+j-1) + enddo + + ! --- + + call impose_orthog_svd(n, m, L) + call impose_orthog_svd(n, m, R) + + ! --- + + !allocate(S(m,m)) + !call dgemm( 'T', 'N', m, m, n, 1.d0 & + ! , L, size(L, 1), R, size(R, 1) & + ! , 0.d0, S, size(S, 1) ) + !allocate(S_inv_half(m,m)) + !call get_inv_half_nonsymmat_diago(S, m, S_inv_half, complex_root) + !if(complex_root) then + ! print*, ' complex roots in inv_half !!! ' + ! stop + !endif + !call bi_ortho_s_inv_half(m, L, R, S_inv_half) + !deallocate(S, S_inv_half) + + call impose_biorthog_svd(n, m, L, R) + + !call impose_biorthog_qr(n, m, L, R) + + ! --- + + do j = 1, m + L0(1:n,i+j-1) = L(1:n,j) + R0(1:n,i+j-1) = R(1:n,j) + enddo + + deallocate(L, R) + + endif + enddo + +end subroutine impose_biorthog_degen_eigvec + +! --- + +subroutine impose_orthog_biorthog_degen_eigvec(n, e0, L0, R0) + + implicit none + + integer, intent(in) :: n + double precision, intent(in) :: e0(n) + double precision, intent(inout) :: L0(n,n), R0(n,n) + + integer :: i, j, k, m + double precision :: ei, ej, de, de_thr + double precision :: accu_d, accu_nd + integer, allocatable :: deg_num(:) + double precision, allocatable :: L(:,:), R(:,:), S(:,:) + + ! --- + + allocate( deg_num(n) ) + do i = 1, n + deg_num(i) = 1 + enddo + + de_thr = 1d-10 + + do i = 1, n-1 + ei = e0(i) + + ! already considered in degen vectors + if(deg_num(i).eq.0) cycle + + do j = i+1, n + ej = e0(j) + de = dabs(ei - ej) + + if(de .lt. de_thr) then + deg_num(i) = deg_num(i) + 1 + deg_num(j) = 0 + endif + + enddo + enddo + + do i = 1, n + if(deg_num(i).gt.1) then + print *, ' degen on', i, deg_num(i) + endif + enddo + + ! --- + + do i = 1, n + m = deg_num(i) + + if(m .gt. 1) then + + allocate(L(n,m)) + allocate(R(n,m)) + + do j = 1, m + L(1:n,j) = L0(1:n,i+j-1) + R(1:n,j) = R0(1:n,i+j-1) + enddo + + ! --- + + call impose_orthog_svd(n, m, L) + call impose_orthog_svd(n, m, R) + + ! --- + + call impose_biorthog_qr(n, m, L, R) + + allocate(S(m,m)) + call check_biorthog(n, m, L, R, accu_d, accu_nd, S, .true.) + !call check_biorthog(n, m, L, L, accu_d, accu_nd, S, .true.) + !call check_biorthog(n, m, R, R, accu_d, accu_nd, S, .false.) + deallocate(S) + + ! --- + + do j = 1, m + L0(1:n,i+j-1) = L(1:n,j) + R0(1:n,i+j-1) = R(1:n,j) + enddo + + deallocate(L, R) + + endif + enddo + +end subroutine impose_orthog_biorthog_degen_eigvec + +! --- + +subroutine impose_unique_biorthog_degen_eigvec(n, e0, C0, W0, L0, R0) + + implicit none + + integer, intent(in) :: n + double precision, intent(in) :: e0(n), W0(n,n), C0(n,n) + double precision, intent(inout) :: L0(n,n), R0(n,n) + + logical :: complex_root + integer :: i, j, k, m + double precision :: ei, ej, de, de_thr + integer, allocatable :: deg_num(:) + double precision, allocatable :: L(:,:), R(:,:), C(:,:) + double precision, allocatable :: S(:,:), S_inv_half(:,:), tmp(:,:) + + ! --- + + allocate( deg_num(n) ) + do i = 1, n + deg_num(i) = 1 + enddo + + de_thr = 1d-10 + + do i = 1, n-1 + ei = e0(i) + + ! already considered in degen vectors + if(deg_num(i).eq.0) cycle + + do j = i+1, n + ej = e0(j) + de = dabs(ei - ej) + + if(de .lt. de_thr) then + deg_num(i) = deg_num(i) + 1 + deg_num(j) = 0 + endif + + enddo + enddo + + do i = 1, n + if(deg_num(i).gt.1) then + print *, ' degen on', i, deg_num(i) + endif + enddo + + ! --- + + do i = 1, n + m = deg_num(i) + + if(m .gt. 1) then + + allocate(L(n,m)) + allocate(R(n,m)) + allocate(C(n,m)) + + do j = 1, m + L(1:n,j) = L0(1:n,i+j-1) + R(1:n,j) = R0(1:n,i+j-1) + C(1:n,j) = C0(1:n,i+j-1) + enddo + + ! --- + + call impose_orthog_svd(n, m, L) + call impose_orthog_svd(n, m, R) + + ! --- + + + ! TODO: + ! select C correctly via overlap + ! or via selecting degen in HF + + !call max_overlap_qr(n, m, C, L) + !call max_overlap_qr(n, m, C, R) + + + allocate(tmp(m,n)) + allocate(S(m,m)) + + call dgemm( 'T', 'N', m, n, n, 1.d0 & + , L, size(L, 1), W0, size(W0, 1) & + , 0.d0, tmp, size(tmp, 1) ) + call dgemm( 'N', 'N', m, m, n, 1.d0 & + , tmp, size(tmp, 1), C, size(C, 1) & + , 0.d0, S, size(S, 1) ) + + call max_overlap_qr(n, m, S, L) + !call max_overlap_invprod(n, m, S, L) + + call dgemm( 'T', 'N', m, n, n, 1.d0 & + , C, size(C, 1), W0, size(W0, 1) & + , 0.d0, tmp, size(tmp, 1) ) + call dgemm( 'N', 'N', m, m, n, 1.d0 & + , tmp, size(tmp, 1), R, size(R, 1) & + , 0.d0, S, size(S, 1) ) + + call max_overlap_qr(n, m, S, R) + !call max_overlap_invprod(n, m, S, R) + + deallocate(S, tmp) + + ! --- + + allocate(S(m,m), S_inv_half(m,m)) + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , L, size(L, 1), R, size(R, 1) & + , 0.d0, S, size(S, 1) ) + call get_inv_half_nonsymmat_diago(S, m, S_inv_half, complex_root) + if(complex_root)then + call impose_biorthog_svd(n, m, L, R) + !call impose_biorthog_qr(n, m, L, R) + else + call bi_ortho_s_inv_half(m, L, R, S_inv_half) + endif + deallocate(S, S_inv_half) + + ! --- + + do j = 1, m + L0(1:n,i+j-1) = L(1:n,j) + R0(1:n,i+j-1) = R(1:n,j) + enddo + + deallocate(L, R, C) + + endif + enddo + +end subroutine impose_unique_biorthog_degen_eigvec + +! --- + +subroutine max_overlap_qr(m, n, S0, V) + + implicit none + integer, intent(in) :: m, n + double precision, intent(in) :: S0(n,n) + double precision, intent(inout) :: V(m,n) + + integer :: i, j + integer :: LWORK, INFO + double precision, allocatable :: TAU(:), WORK(:) + double precision, allocatable :: S(:,:), tmp(:,:) + + allocate(S(n,n)) + S = S0 + + ! --- + + allocate( TAU(n), WORK(1) ) + + LWORK = -1 + call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO) + if(INFO .ne. 0) then + print*,'dgeqrf failed !!', INFO + stop + endif + + LWORK = max(n, int(WORK(1))) + deallocate(WORK) + + allocate( WORK(LWORK) ) + call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO) + if(INFO .ne. 0) then + print*,'dgeqrf failed !!', INFO + stop + endif + + ! get Q in S matrix + LWORK = -1 + call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO) + if(INFO .ne. 0) then + print*,'dorgqr failed !!', INFO + stop + endif + + LWORK = max(n, int(WORK(1))) + deallocate(WORK) + + allocate( WORK(LWORK) ) + call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO) + if(INFO .ne. 0) then + print*,'dorgqr failed !!', INFO + stop + endif + + deallocate( WORK, TAU ) + + ! --- + + ! V0.T <-- Q.T x V0.T, where Q = S + + allocate( tmp(n,m) ) + + call dgemm( 'T', 'T', n, m, n, 1.d0 & + , S, size(S, 1), V, size(V, 1) & + , 0.d0, tmp, size(tmp, 1) ) + + deallocate(S) + + do i = 1, n + do j = 1, m + V(j,i) = tmp(i,j) + enddo + enddo + + deallocate(tmp) + + ! --- + + return +end subroutine max_overlap_qr + +! --- + +subroutine max_overlap_invprod(n, m, S, V) + + implicit none + integer, intent(in) :: m, n + double precision, intent(in) :: S(m,m) + double precision, intent(inout) :: V(n,m) + + integer :: i + double precision, allocatable :: invS(:,:), tmp(:,:) + + allocate(invS(m,m)) + call get_inverse(S, size(S, 1), m, invS, size(invS, 1)) + print *, ' overlap ' + do i = 1, m + write(*, '(1000(F16.10,X))') S(i,:) + enddo + print *, ' inv overlap ' + do i = 1, m + write(*, '(1000(F16.10,X))') invS(i,:) + enddo + + allocate(tmp(n,m)) + tmp = V + + call dgemm( 'N', 'N', n, m, m, 1.d0 & + , tmp, size(tmp, 1), invS, size(invS, 1) & + , 0.d0, V, size(V, 1) ) + + deallocate(tmp, invS) + + return +end subroutine max_overlap_invprod + +! --- + +subroutine impose_biorthog_svd(n, m, L, R) + + implicit none + + integer, intent(in) :: n, m + double precision, intent(inout) :: L(n,m), R(n,m) + + integer :: i, j, num_linear_dependencies + double precision :: threshold + double precision, allocatable :: S(:,:), tmp(:,:) + double precision, allocatable :: U(:,:), V(:,:), Vt(:,:), D(:) + + ! --- + + allocate(S(m,m)) + + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , L, size(L, 1), R, size(R, 1) & + , 0.d0, S, size(S, 1) ) + + print *, ' overlap bef SVD: ' + do i = 1, m + write(*, '(1000(F16.10,X))') S(i,:) + enddo + + ! --- + + allocate(U(m,m), Vt(m,m), D(m)) + + call svd(S, m, U, m, D, Vt, m, m, m) + + deallocate(S) + + threshold = 1.d-6 + num_linear_dependencies = 0 + do i = 1, m + if(abs(D(i)) <= threshold) then + D(i) = 0.d0 + num_linear_dependencies += 1 + else + ASSERT (D(i) > 0.d0) + D(i) = 1.d0 / dsqrt(D(i)) + endif + enddo + if(num_linear_dependencies > 0) then + write(*,*) ' linear dependencies = ', num_linear_dependencies + write(*,*) ' m = ', m + stop + endif + + allocate(V(m,m)) + do i = 1, m + do j = 1, m + V(j,i) = Vt(i,j) + enddo + enddo + deallocate(Vt) + + ! --- + + allocate(tmp(n,m)) + + ! tmp <-- R x V + call dgemm( 'N', 'N', n, m, m, 1.d0 & + , R, size(R, 1), V, size(V, 1) & + , 0.d0, tmp, size(tmp, 1) ) + deallocate(V) + ! R <-- tmp x sigma^-0.5 + do j = 1, m + do i = 1, n + R(i,j) = tmp(i,j) * D(j) + enddo + enddo + + ! tmp <-- L x U + call dgemm( 'N', 'N', n, m, m, 1.d0 & + , L, size(L, 1), U, size(U, 1) & + , 0.d0, tmp, size(tmp, 1) ) + deallocate(U) + ! L <-- tmp x sigma^-0.5 + do j = 1, m + do i = 1, n + L(i,j) = tmp(i,j) * D(j) + enddo + enddo + + deallocate(D, tmp) + + ! --- + + allocate(S(m,m)) + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , L, size(L, 1), R, size(R, 1) & + , 0.d0, S, size(S, 1) ) + + print *, ' overlap aft SVD: ' + do i = 1, m + write(*, '(1000(F16.10,X))') S(i,:) + enddo + deallocate(S) + + ! --- + +end subroutine impose_biorthog_svd + +! --- + diff --git a/src/non_hermit_dav/new_routines.irp.f b/src/non_hermit_dav/new_routines.irp.f new file mode 100644 index 00000000..07ac5917 --- /dev/null +++ b/src/non_hermit_dav/new_routines.irp.f @@ -0,0 +1,669 @@ +subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_eigv, eigval) + + BEGIN_DOC + ! + ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors + ! + ! of a non hermitian matrix A(n,n) + ! + ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" + ! + END_DOC + + implicit none + + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + integer, intent(out) :: n_real_eigv + double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) + double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:) + + integer :: i, j, n_degen,k , iteration + double precision :: shift_current + double precision :: r,thr,accu_d, accu_nd + integer, allocatable :: iorder_origin(:),iorder(:) + double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:) + double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:) + double precision, allocatable :: im_part(:),re_part(:) + double precision :: accu,thr_cut, thr_norm=1d0 + + + thr_cut = 1.d-15 + print*,'Computing the left/right eigenvectors ...' + print*,'Using the degeneracy splitting algorithm' + ! initialization + shift_current = 1.d-15 + iteration = 0 + print*,'***** iteration = ',iteration + + + ! pre-processing the matrix :: sorting by diagonal elements + allocate(reigvec_tmp(n,n), leigvec_tmp(n,n)) + allocate(diag_elem(n),iorder_origin(n),A_save(n,n)) +! print*,'Aw' + do i = 1, n + iorder_origin(i) = i + diag_elem(i) = A(i,i) +! write(*,'(100(F16.10,X))')A(:,i) + enddo + call dsort(diag_elem, iorder_origin, n) + do i = 1, n + do j = 1, n + A_save(j,i) = A(iorder_origin(j),iorder_origin(i)) + enddo + enddo + + allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) + allocate(im_part(n),iorder(n)) + allocate( S(n,n) ) + + + Aw = A_save + call cancel_small_elmts(aw,n,thr_cut) + call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) + do i = 1, n + im_part(i) = -dabs(WI(i)) + iorder(i) = i + enddo + call dsort(im_part, iorder, n) + n_real_eigv = 0 + do i = 1, n + if(dabs(WI(i)).lt.1.d-20)then + n_real_eigv += 1 + else +! print*,'Found an imaginary component to eigenvalue' +! print*,'Re(i) + Im(i)',WR(i),WI(i) + endif + enddo + if(n_real_eigv.ne.n)then + shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) + print*,'Largest imaginary part found in eigenvalues = ',im_part(1) + print*,'Splitting the degeneracies by ',shift_current + else + print*,'All eigenvalues are real !' + endif + + + do while(n_real_eigv.ne.n) + iteration += 1 + print*,'***** iteration = ',iteration + if(shift_current.gt.1.d-3)then + print*,'shift_current > 1.d-3 !!' + print*,'Your matrix intrinsically contains complex eigenvalues' + stop + endif + Aw = A_save + call cancel_small_elmts(Aw,n,thr_cut) + call split_matrix_degen(Aw,n,shift_current) + call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) + n_real_eigv = 0 + do i = 1, n + if(dabs(WI(i)).lt.1.d-20)then + n_real_eigv+= 1 + else +! print*,'Found an imaginary component to eigenvalue' +! print*,'Re(i) + Im(i)',WR(i),WI(i) + endif + enddo + if(n_real_eigv.ne.n)then + do i = 1, n + im_part(i) = -dabs(WI(i)) + iorder(i) = i + enddo + call dsort(im_part, iorder, n) + shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) + print*,'Largest imaginary part found in eigenvalues = ',im_part(1) + print*,'Splitting the degeneracies by ',shift_current + else + print*,'All eigenvalues are real !' + endif + enddo + !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES + do i = 1, n + eigval(i) = WR(i) + iorder(i) = i + enddo + call dsort(eigval,iorder,n) + do i = 1, n +! print*,'eigval(i) = ',eigval(i) + reigvec_tmp(:,i) = VR(:,iorder(i)) + leigvec_tmp(:,i) = Vl(:,iorder(i)) + enddo + +!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY + ! check bi-orthogonality + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.) + print *, ' accu_nd bi-orthog = ', accu_nd + if( accu_nd .lt. 1d-10 ) then + print *, ' bi-orthogonality: ok' + else + print *, ' ' + print *, ' bi-orthogonality: not imposed yet' + print *, ' ' + print *, ' ' + print *, ' orthog between degen eigenvect' + print *, ' ' + double precision, allocatable :: S_nh_inv_half(:,:) + allocate(S_nh_inv_half(n,n)) + logical :: complex_root + deallocate(S_nh_inv_half) + call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp) + call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp) + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.) + if( accu_nd .lt. 1d-10 ) then + print *, ' bi-orthogonality: ok' + else + print*,'New vectors not bi-orthonormals at ',accu_nd + call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S) + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.) + if( accu_nd .lt. 1d-10 ) then + print *, ' bi-orthogonality: ok' + else + print*,'New vectors not bi-orthonormals at ',accu_nd + print*,'Must be a deep problem ...' + stop + endif + endif + endif + + !! EIGENVECTORS SORTED AND BI-ORTHONORMAL + do i = 1, n + do j = 1, n + VR(iorder_origin(j),i) = reigvec_tmp(j,i) + VL(iorder_origin(j),i) = leigvec_tmp(j,i) + enddo + enddo + + !! RECOMPUTING THE EIGENVALUES + eigval = 0.d0 + do i = 1, n + iorder(i) = i + accu = 0.d0 + do j = 1, n + accu += VL(j,i) * VR(j,i) + do k = 1, n + eigval(i) += VL(j,i) * A(j,k) * VR(k,i) + enddo + enddo + eigval(i) *= 1.d0/accu +! print*,'eigval(i) = ',eigval(i) + enddo + !! RESORT JUST TO BE SURE + call dsort(eigval, iorder, n) + do i = 1, n + do j = 1, n + reigvec(j,i) = VR(j,iorder(i)) + leigvec(j,i) = VL(j,iorder(i)) + enddo + enddo + print*,'Checking for final reigvec/leigvec' + shift_current = max(1.d-10,shift_current) + print*,'Thr for eigenvectors = ',shift_current + call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.) + call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, .false.) + print *, ' accu_nd bi-orthog = ', accu_nd + + if( accu_nd .lt. 1d-10 ) then + print *, ' bi-orthogonality: ok' + else + print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog' + print*,'Eigenvectors are not bi orthonormal ..' + print*,'accu_nd = ',accu_nd + stop + endif + +end + + + +subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_eigv, eigval) + + BEGIN_DOC + ! + ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors + ! + ! of a non hermitian matrix A(n,n) + ! + ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" + ! + END_DOC + + implicit none + + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + integer, intent(out) :: n_real_eigv + double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) + double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:) + + integer :: i, j, n_degen,k , iteration + double precision :: shift_current + double precision :: r,thr,accu_d, accu_nd + integer, allocatable :: iorder_origin(:),iorder(:) + double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:) + double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:) + double precision, allocatable :: im_part(:),re_part(:) + double precision :: accu,thr_cut, thr_norm=1.d0 + double precision, allocatable :: S_nh_inv_half(:,:) + logical :: complex_root + + + thr_cut = 1.d-15 + print*,'Computing the left/right eigenvectors ...' + print*,'Using the degeneracy splitting algorithm' + ! initialization + shift_current = 1.d-15 + iteration = 0 + print*,'***** iteration = ',iteration + + + ! pre-processing the matrix :: sorting by diagonal elements + allocate(reigvec_tmp(n,n), leigvec_tmp(n,n)) + allocate(diag_elem(n),iorder_origin(n),A_save(n,n)) +! print*,'Aw' + do i = 1, n + iorder_origin(i) = i + diag_elem(i) = A(i,i) +! write(*,'(100(F16.10,X))')A(:,i) + enddo + call dsort(diag_elem, iorder_origin, n) + do i = 1, n + do j = 1, n + A_save(j,i) = A(iorder_origin(j),iorder_origin(i)) + enddo + enddo + + allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) + allocate(im_part(n),iorder(n)) + allocate( S(n,n) ) + allocate(S_nh_inv_half(n,n)) + + + Aw = A_save + call cancel_small_elmts(aw,n,thr_cut) + call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) + do i = 1, n + im_part(i) = -dabs(WI(i)) + iorder(i) = i + enddo + call dsort(im_part, iorder, n) + n_real_eigv = 0 + do i = 1, n + if(dabs(WI(i)).lt.1.d-20)then + n_real_eigv += 1 + else +! print*,'Found an imaginary component to eigenvalue' +! print*,'Re(i) + Im(i)',WR(i),WI(i) + endif + enddo + if(n_real_eigv.ne.n)then + shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) + print*,'Largest imaginary part found in eigenvalues = ',im_part(1) + print*,'Splitting the degeneracies by ',shift_current + else + print*,'All eigenvalues are real !' + endif + + + do while(n_real_eigv.ne.n) + iteration += 1 + print*,'***** iteration = ',iteration + if(shift_current.gt.1.d-3)then + print*,'shift_current > 1.d-3 !!' + print*,'Your matrix intrinsically contains complex eigenvalues' + stop + endif + Aw = A_save +! thr_cut = shift_current + call cancel_small_elmts(Aw,n,thr_cut) + call split_matrix_degen(Aw,n,shift_current) + call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) + n_real_eigv = 0 + do i = 1, n + if(dabs(WI(i)).lt.1.d-20)then + n_real_eigv+= 1 + else +! print*,'Found an imaginary component to eigenvalue' +! print*,'Re(i) + Im(i)',WR(i),WI(i) + endif + enddo + if(n_real_eigv.ne.n)then + do i = 1, n + im_part(i) = -dabs(WI(i)) + iorder(i) = i + enddo + call dsort(im_part, iorder, n) + shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) + print*,'Largest imaginary part found in eigenvalues = ',im_part(1) + print*,'Splitting the degeneracies by ',shift_current + else + print*,'All eigenvalues are real !' + endif + enddo + !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES + do i = 1, n + eigval(i) = WR(i) + iorder(i) = i + enddo + call dsort(eigval,iorder,n) + do i = 1, n +! print*,'eigval(i) = ',eigval(i) + reigvec_tmp(:,i) = VR(:,iorder(i)) + leigvec_tmp(:,i) = Vl(:,iorder(i)) + enddo + +!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY + ! check bi-orthogonality + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.) + print *, ' accu_nd bi-orthog = ', accu_nd + if( accu_nd .lt. 1d-10 ) then + print *, ' bi-orthogonality: ok' + else + print *, ' ' + print *, ' bi-orthogonality: not imposed yet' + if(complex_root)then + print *, ' ' + print *, ' ' + print *, ' orthog between degen eigenvect' + print *, ' ' + ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right + call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp) ! orthogonalization of reigvec + call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp) ! orthogonalization of leigvec + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S) + + if( accu_nd .lt. 1d-10 ) then + print *, ' bi-orthogonality: ok' + else + print*,'New vectors not bi-orthonormals at ', accu_nd + call get_inv_half_nonsymmat_diago(S, n, S_nh_inv_half, complex_root) + if(complex_root)then + call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp) ! bi-orthonormalization using QR + else + print*,'S^{-1/2} exists !!' + call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization + endif + endif + else ! the matrix S^{-1/2} exists + print*,'S^{-1/2} exists !!' + call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization + endif + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.) + if( accu_nd .lt. 1d-10 ) then + print *, ' bi-orthogonality: ok' + else + print*,'New vectors not bi-orthonormals at ',accu_nd + print*,'Must be a deep problem ...' + stop + endif + endif + + !! EIGENVECTORS SORTED AND BI-ORTHONORMAL + do i = 1, n + do j = 1, n + VR(iorder_origin(j),i) = reigvec_tmp(j,i) + VL(iorder_origin(j),i) = leigvec_tmp(j,i) + enddo + enddo + + !! RECOMPUTING THE EIGENVALUES + eigval = 0.d0 + do i = 1, n + iorder(i) = i + accu = 0.d0 + do j = 1, n + accu += VL(j,i) * VR(j,i) + do k = 1, n + eigval(i) += VL(j,i) * A(j,k) * VR(k,i) + enddo + enddo + eigval(i) *= 1.d0/accu +! print*,'eigval(i) = ',eigval(i) + enddo + !! RESORT JUST TO BE SURE + call dsort(eigval, iorder, n) + do i = 1, n + do j = 1, n + reigvec(j,i) = VR(j,iorder(i)) + leigvec(j,i) = VL(j,iorder(i)) + enddo + enddo + print*,'Checking for final reigvec/leigvec' + shift_current = max(1.d-10,shift_current) + print*,'Thr for eigenvectors = ',shift_current + call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.) + call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, .false.) + print *, ' accu_nd bi-orthog = ', accu_nd + + if( accu_nd .lt. 1d-10 ) then + print *, ' bi-orthogonality: ok' + else + print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog' + print*,'Eigenvectors are not bi orthonormal ..' + print*,'accu_nd = ',accu_nd + stop + endif + +end + + +subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval) + + BEGIN_DOC + ! + ! routine returning the eigenvalues and left/right eigenvectors of the TC fock matrix + ! + END_DOC + + implicit none + + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + integer, intent(out) :: n_real_eigv + double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) + double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:) + + integer :: i, j, n_degen,k , iteration + double precision :: shift_current + double precision :: r,thr,accu_d, accu_nd + integer, allocatable :: iorder_origin(:),iorder(:) + double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:) + double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:) + double precision, allocatable :: im_part(:),re_part(:) + double precision :: accu,thr_cut + double precision, allocatable :: S_nh_inv_half(:,:) + logical :: complex_root + + + thr_cut = 1.d-15 + print*,'Computing the left/right eigenvectors ...' + print*,'Using the degeneracy splitting algorithm' + ! initialization + shift_current = 1.d-15 + iteration = 0 + print*,'***** iteration = ',iteration + + + ! pre-processing the matrix :: sorting by diagonal elements + allocate(reigvec_tmp(n,n), leigvec_tmp(n,n)) + allocate(diag_elem(n),iorder_origin(n),A_save(n,n)) +! print*,'Aw' + do i = 1, n + iorder_origin(i) = i + diag_elem(i) = A(i,i) +! write(*,'(100(F16.10,X))')A(:,i) + enddo + call dsort(diag_elem, iorder_origin, n) + do i = 1, n + do j = 1, n + A_save(j,i) = A(iorder_origin(j),iorder_origin(i)) + enddo + enddo + + allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) + allocate(im_part(n),iorder(n)) + allocate( S(n,n) ) + allocate(S_nh_inv_half(n,n)) + + + Aw = A_save + call cancel_small_elmts(aw,n,thr_cut) + call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) + do i = 1, n + im_part(i) = -dabs(WI(i)) + iorder(i) = i + enddo + call dsort(im_part, iorder, n) + n_real_eigv = 0 + do i = 1, n + if(dabs(WI(i)).lt.1.d-20)then + n_real_eigv += 1 + else +! print*,'Found an imaginary component to eigenvalue' +! print*,'Re(i) + Im(i)',WR(i),WI(i) + endif + enddo + if(n_real_eigv.ne.n)then + shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) + print*,'Largest imaginary part found in eigenvalues = ',im_part(1) + print*,'Splitting the degeneracies by ',shift_current + else + print*,'All eigenvalues are real !' + endif + + + do while(n_real_eigv.ne.n) + iteration += 1 + print*,'***** iteration = ',iteration + if(shift_current.gt.1.d-3)then + print*,'shift_current > 1.d-3 !!' + print*,'Your matrix intrinsically contains complex eigenvalues' + stop + endif + Aw = A_save +! thr_cut = shift_current + call cancel_small_elmts(Aw,n,thr_cut) + call split_matrix_degen(Aw,n,shift_current) + call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) + n_real_eigv = 0 + do i = 1, n + if(dabs(WI(i)).lt.1.d-20)then + n_real_eigv+= 1 + else +! print*,'Found an imaginary component to eigenvalue' +! print*,'Re(i) + Im(i)',WR(i),WI(i) + endif + enddo + if(n_real_eigv.ne.n)then + do i = 1, n + im_part(i) = -dabs(WI(i)) + iorder(i) = i + enddo + call dsort(im_part, iorder, n) + shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) + print*,'Largest imaginary part found in eigenvalues = ',im_part(1) + print*,'Splitting the degeneracies by ',shift_current + else + print*,'All eigenvalues are real !' + endif + enddo + !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES + do i = 1, n + eigval(i) = WR(i) + iorder(i) = i + enddo + call dsort(eigval,iorder,n) + do i = 1, n +! print*,'eigval(i) = ',eigval(i) + reigvec_tmp(:,i) = VR(:,iorder(i)) + leigvec_tmp(:,i) = Vl(:,iorder(i)) + enddo + +!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY + ! check bi-orthogonality + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S) + print *, ' accu_nd bi-orthog = ', accu_nd + if( accu_nd .lt. 1d-10 ) then + print *, ' bi-orthogonality: ok' + else + print *, ' ' + print *, ' bi-orthogonality: not imposed yet' + print *, ' ' + print *, ' ' + print *, ' Using impose_unique_biorthog_degen_eigvec' + print *, ' ' + ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right + call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, leigvec_tmp, reigvec_tmp) + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S) + print*,'accu_nd = ',accu_nd + if( accu_nd .lt. 1d-10 ) then + print *, ' bi-orthogonality: ok' + else + print*,'New vectors not bi-orthonormals at ',accu_nd + call get_inv_half_nonsymmat_diago(S, n, S_nh_inv_half,complex_root) + if(complex_root)then + print*,'S^{-1/2} does not exits, using QR bi-orthogonalization' + call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S) ! bi-orthonormalization using QR + else + print*,'S^{-1/2} exists !!' + call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization + endif + endif + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S) + if( accu_nd .lt. 1d-10 ) then + print *, ' bi-orthogonality: ok' + else + print*,'New vectors not bi-orthonormals at ',accu_nd + print*,'Must be a deep problem ...' + stop + endif + endif + + !! EIGENVECTORS SORTED AND BI-ORTHONORMAL + do i = 1, n + do j = 1, n + VR(iorder_origin(j),i) = reigvec_tmp(j,i) + VL(iorder_origin(j),i) = leigvec_tmp(j,i) + enddo + enddo + + !! RECOMPUTING THE EIGENVALUES + eigval = 0.d0 + do i = 1, n + iorder(i) = i + accu = 0.d0 + do j = 1, n + accu += VL(j,i) * VR(j,i) + do k = 1, n + eigval(i) += VL(j,i) * A(j,k) * VR(k,i) + enddo + enddo + eigval(i) *= 1.d0/accu +! print*,'eigval(i) = ',eigval(i) + enddo + !! RESORT JUST TO BE SURE + call dsort(eigval, iorder, n) + do i = 1, n + do j = 1, n + reigvec(j,i) = VR(j,iorder(i)) + leigvec(j,i) = VL(j,iorder(i)) + enddo + enddo + print*,'Checking for final reigvec/leigvec' + shift_current = max(1.d-10,shift_current) + print*,'Thr for eigenvectors = ',shift_current + call check_EIGVEC(n, n, A, eigval, leigvec, reigvec,shift_current) + call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S) + print *, ' accu_nd bi-orthog = ', accu_nd + + if( accu_nd .lt. 1d-10 ) then + print *, ' bi-orthogonality: ok' + else + print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog' + print*,'Eigenvectors are not bi orthonormal ..' + print*,'accu_nd = ',accu_nd + stop + endif + +end + + diff --git a/src/non_hermit_dav/project.irp.f b/src/non_hermit_dav/project.irp.f new file mode 100644 index 00000000..c04719ac --- /dev/null +++ b/src/non_hermit_dav/project.irp.f @@ -0,0 +1,53 @@ +subroutine h_non_hermite(v,u,Hmat,a,N_st,sze) + implicit none + BEGIN_DOC + ! Template of routine for the application of H + ! + ! Here, it is done with the Hamiltonian matrix + ! + ! on the set of determinants of psi_det + ! + ! Computes $v = a * H | u \rangle$ + ! + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u(sze,N_st), Hmat(sze,sze), a + double precision, intent(inout) :: v(sze,N_st) + integer :: i,j,k + do k = 1, N_st + do j = 1, sze + do i = 1, sze + v(i,k) += a * u(j,k) * Hmat(i,j) + enddo + enddo + enddo +end + + +subroutine exp_tau_H(u,v,hmat,tau,et,N_st,sze) + implicit none + BEGIN_DOC +! realises v = (1 - tau (H - et)) u + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(in) :: hmat(sze,sze), u(sze,N_st), tau, et + double precision, intent(out):: v(sze,N_st) + double precision :: a + integer :: i,j + v = (1.d0 + tau * et) * u + a = -1.d0 * tau + call h_non_hermite(v,u,Hmat,a,N_st,sze) +end + +double precision function project_phi0(u,Hmat0,N_st,sze) + implicit none + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u(sze,N_st), Hmat0(sze) + integer :: j + project_phi0 = 0.d0 + do j = 1, sze + project_phi0 += u(j,1) * Hmat0(j) + enddo + project_phi0 *= 1.d0 / u(1,1) +end + diff --git a/src/non_hermit_dav/utils.irp.f b/src/non_hermit_dav/utils.irp.f new file mode 100644 index 00000000..7f331a6b --- /dev/null +++ b/src/non_hermit_dav/utils.irp.f @@ -0,0 +1,325 @@ + +subroutine get_inv_half_svd(matrix, n, matrix_inv_half) + + BEGIN_DOC + ! :math:`X = S^{-1/2}` obtained by SVD + END_DOC + + implicit none + + integer, intent(in) :: n + double precision, intent(in) :: matrix(n,n) + double precision, intent(out) :: matrix_inv_half(n,n) + + integer :: num_linear_dependencies + integer :: LDA, LDC + integer :: info, i, j, k + double precision, parameter :: threshold = 1.d-6 + double precision, allocatable :: U(:,:),Vt(:,:), D(:),matrix_half(:,:),D_half(:) + + double precision :: accu_d,accu_nd + + LDA = size(matrix, 1) + LDC = size(matrix_inv_half, 1) + if(LDA .ne. LDC) then + print*, ' LDA != LDC' + stop + endif + + print*, ' n = ', n + print*, ' LDA = ', LDA + print*, ' LDC = ', LDC + + double precision,allocatable :: WR(:),WI(:),VL(:,:),VR(:,:) + allocate(WR(n),WI(n),VL(n,n),VR(n,n)) + call lapack_diag_non_sym(n,matrix,WR,WI,VL,VR) + do i = 1, n + print*,'WR,WI',WR(i),WI(i) + enddo + + + allocate(U(LDC,n), Vt(LDA,n), D(n)) + + call svd(matrix, LDA, U, LDC, D, Vt, LDA, n, n) + double precision, allocatable :: tmp1(:,:),tmp2(:,:),D_mat(:,:) + allocate(tmp1(n,n),tmp2(n,n),D_mat(n,n),matrix_half(n,n),D_half(n)) + D_mat = 0.d0 + do i = 1,n + D_mat(i,i) = D(i) + enddo + ! matrix = U D Vt + ! tmp1 = U D + tmp1 = 0.d0 + call dgemm( 'N', 'N', n, n, n, 1.d0 & + , U, size(U, 1), D_mat, size(D_mat, 1) & + , 0.d0, tmp1, size(tmp1, 1) ) + ! tmp2 = tmp1 X Vt = matrix + tmp2 = 0.d0 + call dgemm( 'N', 'N', n, n, n, 1.d0 & + , tmp1, size(tmp1, 1), Vt, size(Vt, 1) & + , 0.d0, tmp2, size(tmp2, 1) ) + print*,'Checking the recomposition of the matrix' + accu_nd = 0.d0 + accu_d = 0.d0 + do i = 1, n + accu_d += dabs(tmp2(i,i) - matrix(i,i)) + do j = 1, n + if(i==j)cycle + accu_nd += dabs(tmp2(j,i) - matrix(j,i)) + enddo + enddo + print*,'accu_d =',accu_d + print*,'accu_nd =',accu_nd + print*,'passed the recomposition' + + num_linear_dependencies = 0 + do i = 1, n + if(abs(D(i)) <= threshold) then + D(i) = 0.d0 + num_linear_dependencies += 1 + else + ASSERT (D(i) > 0.d0) + D_half(i) = dsqrt(D(i)) + D(i) = 1.d0 / dsqrt(D(i)) + endif + enddo + write(*,*) ' linear dependencies', num_linear_dependencies + + matrix_inv_half = 0.d0 + matrix_half = 0.d0 + do k = 1, n + if(D(k) /= 0.d0) then + do j = 1, n + do i = 1, n +! matrix_inv_half(i,j) = matrix_inv_half(i,j) + U(i,k) * D(k) * Vt(k,j) + matrix_inv_half(i,j) = matrix_inv_half(i,j) + U(i,k) * D(k) * Vt(j,k) + matrix_half(i,j) = matrix_half(i,j) + U(i,k) * D_half(k) * Vt(j,k) + enddo + enddo + endif + enddo + print*,'testing S^1/2 * S^1/2= S' + ! tmp1 = S^1/2 X S^1/2 + tmp1 = 0.d0 + call dgemm( 'N', 'N', n, n, n, 1.d0 & + , matrix_half, size(matrix_half, 1), matrix_half, size(matrix_half, 1) & + , 0.d0, tmp1, size(tmp1, 1) ) + accu_nd = 0.d0 + accu_d = 0.d0 + do i = 1, n + accu_d += dabs(tmp1(i,i) - matrix(i,i)) + do j = 1, n + if(i==j)cycle + accu_nd += dabs(tmp1(j,i) - matrix(j,i)) + enddo + enddo + print*,'accu_d =',accu_d + print*,'accu_nd =',accu_nd + +! print*,'S inv half' +! do i = 1, n +! write(*, '(1000(F16.10,X))') matrix_inv_half(i,:) +! enddo + + double precision, allocatable :: pseudo_inverse(:,:),identity(:,:) + allocate( pseudo_inverse(n,n),identity(n,n)) + call get_pseudo_inverse(matrix,n,n,n,pseudo_inverse,n,threshold) + + ! S^-1 X S = 1 +! identity = 0.d0 +! call dgemm( 'N', 'N', n, n, n, 1.d0 & +! , matrix, size(matrix, 1), pseudo_inverse, size(pseudo_inverse, 1) & +! , 0.d0, identity, size(identity, 1) ) + print*,'Checking S^-1/2 X S^-1/2 = S^-1 ?' + ! S^-1/2 X S^-1/2 = S^-1 ? + tmp1 = 0.d0 + call dgemm( 'N', 'N', n, n, n, 1.d0 & + ,matrix_inv_half, size(matrix_inv_half, 1), matrix_inv_half, size(matrix_inv_half, 1) & + , 0.d0, tmp1, size(tmp1, 1) ) + accu_nd = 0.d0 + accu_d = 0.d0 + do i = 1, n + accu_d += dabs(1.d0 - pseudo_inverse(i,i)) + do j = 1, n + if(i==j)cycle + accu_nd += dabs(tmp1(j,i) - pseudo_inverse(j,i)) + enddo + enddo + print*,'accu_d =',accu_d + print*,'accu_nd =',accu_nd + + stop +! +! ! ( S^-1/2 x S ) x S^-1/2 +! Stmp2 = 0.d0 +! call dgemm( 'N', 'N', n, n, n, 1.d0 & +! , Stmp, size(Stmp, 1), matrix_inv_half, size(matrix_inv_half, 1) & +! , 0.d0, Stmp2, size(Stmp2, 1) ) + + ! S^-1/2 x ( S^-1/2 x S ) +! Stmp2 = 0.d0 +! call dgemm( 'N', 'N', n, n, n, 1.d0 & +! , matrix_inv_half, size(matrix_inv_half, 1), Stmp, size(Stmp, 1) & +! , 0.d0, Stmp2, size(Stmp2, 1) ) + +! do i = 1, n +! do j = 1, n +! if(i==j) then +! accu_d += Stmp2(j,i) +! else +! accu_nd = accu_nd + Stmp2(j,i) * Stmp2(j,i) +! endif +! enddo +! enddo +! accu_nd = dsqrt(accu_nd) +! print*, ' after S^-1/2: sum of off-diag S elements = ', accu_nd +! print*, ' after S^-1/2: sum of diag S elements = ', accu_d +! do i = 1, n +! write(*,'(1000(F16.10,X))') Stmp2(i,:) +! enddo + + !double precision :: thresh + !thresh = 1.d-10 + !if( accu_nd.gt.thresh .or. dabs(accu_d-dble(n)).gt.thresh) then + ! stop + !endif + +end subroutine get_inv_half_svd + +! --- + +subroutine get_inv_half_nonsymmat_diago(matrix, n, matrix_inv_half, complex_root) + + BEGIN_DOC + ! input: S = matrix + ! output: S^{-1/2} = matrix_inv_half obtained by diagonalization + ! + ! S = VR D VL^T + ! = VR D^{1/2} D^{1/2} VL^T + ! = VR D^{1/2} VL^T VR D^{1/2} VL^T + ! = S^{1/2} S^{1/2} with S = VR D^{1/2} VL^T + ! + ! == > S^{-1/2} = VR D^{-1/2} VL^T + ! + END_DOC + + implicit none + + integer, intent(in) :: n + double precision, intent(in) :: matrix(n,n) + logical, intent(out) :: complex_root + double precision, intent(out) :: matrix_inv_half(n,n) + + integer :: i, j + double precision :: accu_d, accu_nd + double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:), S(:,:), S_diag(:) + double precision, allocatable :: tmp1(:,:), D_mat(:,:) + + complex_root = .False. + + matrix_inv_half = 0.D0 + print*,'Computing S^{-1/2}' + + allocate(WR(n), WI(n), VL(n,n), VR(n,n)) + call lapack_diag_non_sym(n, matrix, WR, WI, VL, VR) + + allocate(S(n,n)) + call check_biorthog(n, n, VL, VR, accu_d, accu_nd, S) + print*,'accu_nd S^{-1/2}',accu_nd + if(accu_nd.gt.1.d-10) then + complex_root = .True. ! if vectors are not bi-orthogonal return + print*,'Eigenvectors of S are not bi-orthonormal, skipping S^{-1/2}' + return + endif + + allocate(S_diag(n)) + do i = 1, n + S_diag(i) = 1.d0/dsqrt(S(i,i)) + if(dabs(WI(i)).gt.1.d-20.or.WR(i).lt.0.d0)then ! check that eigenvalues are real and positive + complex_root = .True. + print*,'Eigenvalues of S have imaginary part ' + print*,'WR(i),WI(i)',WR(i), WR(i) + print*,'Skipping S^{-1/2}' + return + endif + enddo + deallocate(S) + + if(complex_root) return + + ! normalization of vectors + do i = 1, n + if(S_diag(i).eq.1.d0) cycle + do j = 1,n + VL(j,i) *= S_diag(i) + VR(j,i) *= S_diag(i) + enddo + enddo + deallocate(S_diag) + + allocate(tmp1(n,n), D_mat(n,n)) + + D_mat = 0.d0 + do i = 1, n + D_mat(i,i) = 1.d0/dsqrt(WR(i)) + enddo + deallocate(WR, WI) + + ! tmp1 = VR D^{-1/2} + tmp1 = 0.d0 + call dgemm( 'N', 'N', n, n, n, 1.d0 & + , VR, size(VR, 1), D_mat, size(D_mat, 1) & + , 0.d0, tmp1, size(tmp1, 1) ) + deallocate(VR, D_mat) + + ! S^{-1/2} = tmp1 X VL^T + matrix_inv_half = 0.d0 + call dgemm( 'N', 'T', n, n, n, 1.d0 & + , tmp1, size(tmp1, 1), VL, size(VL, 1) & + , 0.d0, matrix_inv_half, size(matrix_inv_half, 1) ) + deallocate(tmp1, VL) + +end + +! --- + +subroutine bi_ortho_s_inv_half(n,leigvec,reigvec,S_nh_inv_half) + implicit none + integer, intent(in) :: n + double precision, intent(in) :: S_nh_inv_half(n,n) + double precision, intent(inout) :: leigvec(n,n),reigvec(n,n) + BEGIN_DOC + ! bi-orthonormalization of left and right vectors + ! + ! S = VL^T VR + ! + ! S^{-1/2} S S^{-1/2} = 1 = S^{-1/2} VL^T VR S^{-1/2} = VL_new^T VR_new + ! + ! VL_new = VL (S^{-1/2})^T + ! + ! VR_new = VR S^{^{-1/2}} + END_DOC + double precision,allocatable :: vl_tmp(:,:),vr_tmp(:,:) + print*,'Bi-orthonormalization using S^{-1/2}' + allocate(vl_tmp(n,n),vr_tmp(n,n)) + vl_tmp = leigvec + vr_tmp = reigvec + ! VL_new = VL (S^{-1/2})^T + call dgemm( 'N', 'T', n, n, n, 1.d0 & + , vl_tmp, size(vl_tmp, 1), S_nh_inv_half, size(S_nh_inv_half, 1) & + , 0.d0, leigvec, size(leigvec, 1) ) + ! VR_new = VR S^{^{-1/2}} + call dgemm( 'N', 'N', n, n, n, 1.d0 & + , vr_tmp, size(vr_tmp, 1), S_nh_inv_half, size(S_nh_inv_half, 1) & + , 0.d0, reigvec, size(reigvec, 1) ) + double precision :: accu_d, accu_nd + double precision,allocatable :: S(:,:) + allocate(S(n,n)) + call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S) + if(dabs(accu_d - n).gt.1.d-10 .or. accu_nd .gt.1.d-8 )then + print*,'Pb in bi_ortho_s_inv_half !!' + print*,'accu_d =',accu_d + print*,'accu_nd =',accu_nd + stop + endif +end