From 3cc8fca451c7146f9ee430447da16424014b6861 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 29 Sep 2014 18:17:30 +0200 Subject: [PATCH] Fixed issue #12 --- src/BiInts/ao_bi_integrals.irp.f | 270 +++------------------- src/BiInts/gauss_legendre.irp.f | 13 +- src/BiInts/mo_bi_integrals.irp.f | 2 + src/Dets/H_apply_template.f | 6 +- src/Dets/utils.irp.f | 4 + src/Hartree_Fock/Fock_matrix.irp.f | 2 +- src/Hartree_Fock/ref_bitmask.irp.f | 3 +- src/Selectors_full/e_corr_selectors.irp.f | 1 + 8 files changed, 61 insertions(+), 240 deletions(-) diff --git a/src/BiInts/ao_bi_integrals.irp.f b/src/BiInts/ao_bi_integrals.irp.f index 4f7ba341..3756bac5 100644 --- a/src/BiInts/ao_bi_integrals.irp.f +++ b/src/BiInts/ao_bi_integrals.irp.f @@ -121,8 +121,8 @@ double precision function ao_bielec_integral_schwartz_accel(i,j,k,l) double precision :: P_new(0:max_dim,3),P_center(3),fact_p,pp double precision :: Q_new(0:max_dim,3),Q_center(3),fact_q,qq integer :: iorder_p(3), iorder_q(3) - real, allocatable :: schwartz_kl(:,:) - real :: schwartz_ij + double precision, allocatable :: schwartz_kl(:,:) + double precision :: schwartz_ij PROVIDE all_utils @@ -151,10 +151,10 @@ double precision function ao_bielec_integral_schwartz_accel(i,j,k,l) L_center(p) = nucl_coord(num_l,p) enddo - schwartz_kl(0,0) = 0. + schwartz_kl(0,0) = 0.d0 do r = 1, ao_prim_num(k) coef1 = ao_coef_transp(r,k)*ao_coef_transp(r,k) - schwartz_kl(0,r) = 0. + schwartz_kl(0,r) = 0.d0 do s = 1, ao_prim_num(l) coef2 = coef1 * ao_coef_transp(s,l) * ao_coef_transp(s,l) call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q,& @@ -205,9 +205,10 @@ double precision function ao_bielec_integral_schwartz_accel(i,j,k,l) ao_expo_transp(r,k),ao_expo_transp(s,l), & K_power,L_power,K_center,L_center,dim1) q_inv = 1.d0/qq - integral += general_primitive_integral(dim1, & + integral = general_primitive_integral(dim1, & P_new,P_center,fact_p,pp,p_inv,iorder_p, & - Q_new,Q_center,fact_q,qq,q_inv,iorder_q) * coef4 + Q_new,Q_center,fact_q,qq,q_inv,iorder_q) + ao_bielec_integral_schwartz_accel += coef4 * integral enddo ! s enddo ! r enddo ! q @@ -223,10 +224,10 @@ double precision function ao_bielec_integral_schwartz_accel(i,j,k,l) enddo double precision :: ERI - schwartz_kl(0,0) = 0. + schwartz_kl(0,0) = 0.d0 do r = 1, ao_prim_num(k) coef1 = ao_coef_transp(r,k)*ao_coef_transp(r,k) - schwartz_kl(0,r) = 0. + schwartz_kl(0,r) = 0.d0 do s = 1, ao_prim_num(l) coef2 = coef1*ao_coef_transp(s,l)*ao_coef_transp(s,l) schwartz_kl(s,r) = ERI( & @@ -262,7 +263,7 @@ double precision function ao_bielec_integral_schwartz_accel(i,j,k,l) cycle endif coef4 = coef3*ao_coef_transp(s,l) - integral += coef4*ERI( & + integral = ERI( & ao_expo_transp(p,i),ao_expo_transp(q,j),ao_expo_transp(r,k),ao_expo_transp(s,l),& I_power(1),J_power(1),K_power(1),L_power(1), & I_power(2),J_power(2),K_power(2),L_power(2), & @@ -341,8 +342,9 @@ BEGIN_PROVIDER [ logical, ao_bielec_integrals_in_map ] integer :: i,j,k,l double precision :: ao_bielec_integral,cpu_1,cpu_2, wall_1, wall_2 - double precision :: wall_0 - double precision :: thresh, thresh2 + double precision :: integral, wall_0 + double precision :: thresh + thresh = ao_integrals_threshold ! For integrals file integer*8,allocatable :: buffer_i(:) @@ -352,37 +354,12 @@ BEGIN_PROVIDER [ logical, ao_bielec_integrals_in_map ] integer :: n_integrals, n_centers, thread_num integer :: jl_pairs(2,ao_num*(ao_num+1)/2), kk, m, j1, i1, lmax - - - integer :: p,q,r,s - double precision :: I_center(3),J_center(3),K_center(3),L_center(3) - integer :: num_i,num_j,num_k,num_l,dim1,I_power(3),J_power(3),K_power(3),L_power(3) - double precision :: integral - include 'include/constants.F' - double precision :: P_new(0:max_dim,3),P_center(3),fact_p,pp - double precision :: Q_new(0:max_dim,3),Q_center(3),fact_q,qq - integer :: iorder_p(3), iorder_q(3) - real, allocatable :: schwartz_kl(:,:) - real :: schwartz_ij - double precision :: coef1 - double precision :: coef2 - double precision :: p_inv,q_inv - double precision :: coef3 - double precision :: coef4 - double precision :: general_primitive_integral - double precision :: ERI - - thresh = ao_integrals_threshold PROVIDE gauleg_t2 ao_integrals_map all_utils PROVIDE ao_bielec_integral_schwartz integral = ao_bielec_integral(1,1,1,1) - dim1 = n_pt_max_integrals real :: map_mb - - thresh2 = thresh*thresh - if (read_ao_integrals) then integer :: load_ao_integrals if (load_ao_integrals(trim(ezfio_filename)//'/work/ao_integrals.bin') == 0) then @@ -394,8 +371,8 @@ BEGIN_PROVIDER [ logical, ao_bielec_integrals_in_map ] kk=1 do l = 1, ao_num ! r2 - do k = 1, l ! r2 - jl_pairs(1,kk) = k + do j = 1, l ! r2 + jl_pairs(1,kk) = j jl_pairs(2,kk) = l kk += 1 enddo @@ -410,17 +387,12 @@ BEGIN_PROVIDER [ logical, ao_bielec_integrals_in_map ] call cpu_time(cpu_1) !$OMP PARALLEL PRIVATE(i,j,k,l,kk, & !$OMP integral,buffer_i,buffer_value,n_integrals, & - !$OMP cpu_2,wall_2,i1,j1,thread_num, num_i, num_j, num_k, num_l,& - !$OMP schwartz_kl,schwartz_ij,I_power,J_power,K_power,L_power, & - !$OMP I_center,J_center,K_center,L_center,coef1,coef2,coef3,coef4,& - !$OMP pp,qq,p_inv,q_inv,P_center,Q_center,P_new,Q_new,fact_p, & - !$OMP fact_q,iorder_p,iorder_q) & + !$OMP cpu_2,wall_2,i1,j1,thread_num) & !$OMP DEFAULT(NONE) & - !$OMP SHARED (ao_num, jl_pairs, ao_integrals_map,thresh,thresh2,& - !$OMP cpu_1,wall_1,lock, lmax,n_centers,ao_nucl,nucl_coord, & - !$OMP ao_bielec_integral_schwartz,output_BiInts,abort_here, & - !$OMP wall_0,ao_prim_num,ao_expo_transp,ao_coef_transp,dim1, & - !$OMP ao_power) + !$OMP SHARED (ao_num, jl_pairs, ao_integrals_map,thresh, & + !$OMP cpu_1,wall_1,lock, lmax,n_centers,ao_nucl, & + !$OMP ao_overlap_abs,ao_overlap,output_BiInts,abort_here, & + !$OMP wall_0) allocate(buffer_i(size_buffer)) allocate(buffer_value(size_buffer)) @@ -437,198 +409,33 @@ IRP_ENDIF if (abort_here) then cycle endif - k = jl_pairs(1,kk) + j = jl_pairs(1,kk) l = jl_pairs(2,kk) - j1 = k+ishft(l*l-l,-1) - num_k = ao_nucl(k) - num_l = ao_nucl(l) - allocate(schwartz_kl(0:ao_prim_num(l),0:ao_prim_num(k))) - do p = 1, 3 - K_power(p) = ao_power(k,p) - L_power(p) = ao_power(l,p) - K_center(p) = nucl_coord(num_k,p) - L_center(p) = nucl_coord(num_l,p) - enddo - - ! ==================================== - ! Schwartz screening for kl primitives - ! ------------------------------------ - - schwartz_kl(0,0) = 0. - if (num_k /= num_l) then - - do r = 1, ao_prim_num(k) - coef1 = ao_coef_transp(r,k)*ao_coef_transp(r,k) - schwartz_kl(0,r) = 0. - do s = 1, ao_prim_num(l) - coef2 = coef1 * ao_coef_transp(s,l) * ao_coef_transp(s,l) - call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q,& - ao_expo_transp(r,k),ao_expo_transp(s,l), & - K_power,L_power,K_center,L_center,dim1) - q_inv = 1.d0/qq - schwartz_kl(s,r) = general_primitive_integral(dim1, & - Q_new,Q_center,fact_q,qq,q_inv,iorder_q, & - Q_new,Q_center,fact_q,qq,q_inv,iorder_q) & - * coef2 - schwartz_kl(0,r) = max(schwartz_kl(0,r),schwartz_kl(s,r)) - enddo - schwartz_kl(0,0) = max(schwartz_kl(0,r),schwartz_kl(0,0)) - enddo - - else - - do r = 1, ao_prim_num(k) - coef1 = ao_coef_transp(r,k)*ao_coef_transp(r,k) - schwartz_kl(0,r) = 0. - do s = 1, ao_prim_num(l) - coef2 = coef1*ao_coef_transp(s,l)*ao_coef_transp(s,l) - schwartz_kl(s,r) = ERI( & - ao_expo_transp(r,k),ao_expo_transp(s,l),ao_expo_transp(r,k),ao_expo_transp(s,l),& - K_power(1),L_power(1),K_power(1),L_power(1), & - K_power(2),L_power(2),K_power(2),L_power(2), & - K_power(3),L_power(3),K_power(3),L_power(3)) * & - coef2 - schwartz_kl(0,r) = max(schwartz_kl(0,r),schwartz_kl(s,r)) - enddo - schwartz_kl(0,0) = max(schwartz_kl(0,r),schwartz_kl(0,0)) - enddo - + j1 = j+ishft(l*l-l,-1) + if (ao_overlap_abs(j,l) < thresh) then + cycle endif - ! ------------------------------------------- - ! End of Schwartz screening for kl primitives - ! =========================================== - - do j = 1, ao_num ! r1 - i1 = ishft(j*j-j,-1) + do k = 1, ao_num ! r1 + i1 = ishft(k*k-k,-1) if (i1 > j1) then exit endif - num_j = ao_nucl(j) - do p = 1, 3 - J_power(p) = ao_power(j,p) - J_center(p) = nucl_coord(num_j,p) - enddo - do i = 1, j + do i = 1, k i1 += 1 if (i1 > j1) then exit endif - num_i = ao_nucl(i) - do p = 1, 3 - I_power(p) = ao_power(i,p) - I_center(p) = nucl_coord(num_i,p) - enddo - - integral = 0.d0 - - - if (num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k)then - - ! ===================== - ! Multi-center integral - ! --------------------- - - do p = 1, ao_prim_num(i) - coef1 = ao_coef_transp(p,i) - do q = 1, ao_prim_num(j) - coef2 = coef1*ao_coef_transp(q,j) - call give_explicit_poly_and_gaussian(P_new,P_center,pp,fact_p,iorder_p,& - ao_expo_transp(p,i),ao_expo_transp(q,j), & - I_power,J_power,I_center,J_center,dim1) - p_inv = 1.d0/pp - schwartz_ij = general_primitive_integral(dim1, & - P_new,P_center,fact_p,pp,p_inv,iorder_p, & - P_new,P_center,fact_p,pp,p_inv,iorder_p) * & - coef2*coef2 - if (schwartz_kl(0,0)*schwartz_ij < thresh2) then - cycle - endif - do r = 1, ao_prim_num(k) - if (schwartz_kl(0,r)*schwartz_ij < thresh2) then - cycle - endif - coef3 = coef2*ao_coef_transp(r,k) - do s = 1, ao_prim_num(l) - if (schwartz_kl(s,r)*schwartz_ij < thresh2) then - cycle - endif - coef4 = coef3*ao_coef_transp(s,l) - call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q,& - ao_expo_transp(r,k),ao_expo_transp(s,l), & - K_power,L_power,K_center,L_center,dim1) - q_inv = 1.d0/qq - integral += coef4 * general_primitive_integral(dim1, & - P_new,P_center,fact_p,pp,p_inv,iorder_p, & - Q_new,Q_center,fact_q,qq,q_inv,iorder_q) - enddo ! s - enddo ! r - enddo ! q - enddo ! p - - ! ------------------------- - ! End Multi-center integral - ! ========================= - - - else - - - ! =================== - ! One-center integral - ! ------------------- - - do p = 1, ao_prim_num(i) - coef1 = ao_coef_transp(p,i) - do q = 1, ao_prim_num(j) - coef2 = coef1*ao_coef_transp(q,j) - schwartz_ij = ERI( & - ao_expo_transp(p,i),ao_expo_transp(q,j),ao_expo_transp(p,i),ao_expo_transp(q,j),& - I_power(1),J_power(1),I_power(1),J_power(1), & - I_power(2),J_power(2),I_power(2),J_power(2), & - I_power(3),J_power(3),I_power(3),J_power(3))*coef2*coef2 - if (schwartz_kl(0,0)*schwartz_ij < thresh2) then - cycle - endif - do r = 1, ao_prim_num(k) - if (schwartz_kl(0,r)*schwartz_ij < thresh2) then - cycle - endif - coef3 = coef2*ao_coef_transp(r,k) - do s = 1, ao_prim_num(l) - if (schwartz_kl(s,r)*schwartz_ij < thresh2) then - cycle - endif - coef4 = coef3*ao_coef_transp(s,l) - integral += coef4*ERI( & - ao_expo_transp(p,i),ao_expo_transp(q,j),ao_expo_transp(r,k),ao_expo_transp(s,l),& - I_power(1),J_power(1),K_power(1),L_power(1), & - I_power(2),J_power(2),K_power(2),L_power(2), & - I_power(3),J_power(3),K_power(3),L_power(3)) - enddo ! s - enddo ! r - enddo ! q - enddo ! p - - ! ----------------------- - ! End One-center integral - ! ======================= - - + if (ao_overlap_abs(i,k)*ao_overlap_abs(j,l) < thresh) then + cycle endif - - - + !DIR$ FORCEINLINE + integral = ao_bielec_integral(i,k,j,l) if (abs(integral) < thresh) then cycle endif - - ! ====================== - ! Append integral to map - ! ---------------------- - n_integrals += 1 !DIR$ FORCEINLINE - call bielec_integrals_index(i,k,j,l,buffer_i(n_integrals)) + call bielec_integrals_index(i,j,k,l,buffer_i(n_integrals)) buffer_value(n_integrals) = integral if (n_integrals > 1024 ) then if (omp_test_lock(lock)) then @@ -641,11 +448,6 @@ IRP_ENDIF call insert_into_ao_integrals_map(n_integrals,buffer_i,buffer_value) n_integrals = 0 endif - - ! -------------------------- - ! End Append integral to map - ! ========================== - enddo enddo call wall_time(wall_2) @@ -657,7 +459,6 @@ IRP_ENDIF wall_2-wall_1, 's', map_mb(ao_integrals_map) ,'MB' endif endif - deallocate (schwartz_kl) enddo !$OMP END DO NOWAIT call insert_into_ao_integrals_map(n_integrals,buffer_i,buffer_value) @@ -694,7 +495,7 @@ END_PROVIDER -BEGIN_PROVIDER [ real, ao_bielec_integral_schwartz,(ao_num,ao_num) ] +BEGIN_PROVIDER [ double precision, ao_bielec_integral_schwartz,(ao_num,ao_num) ] implicit none BEGIN_DOC ! Needed to compute Schwartz inequalities @@ -711,7 +512,7 @@ BEGIN_PROVIDER [ real, ao_bielec_integral_schwartz,(ao_num,ao_num) ] !$OMP SCHEDULE(dynamic) do i=1,ao_num do k=1,i - ao_bielec_integral_schwartz(i,k) = sqrt(ao_bielec_integral(i,k,i,k)) + ao_bielec_integral_schwartz(i,k) = dsqrt(ao_bielec_integral(i,k,i,k)) ao_bielec_integral_schwartz(k,i) = ao_bielec_integral_schwartz(i,k) enddo enddo @@ -1006,7 +807,8 @@ recursive subroutine I_x1_new(a,c,B_10,B_01,B_00,res,n_pt) call I_x1_new(a-1,c-1,B_10,B_01,B_00,res2,n_pt) !DIR$ VECTOR ALIGNED do i=1,n_pt - res(i) = (a-1) * B_10(i) * res(i) + c * B_00(i) * res2(i) + res(i) = (a-1) * B_10(i) * res(i) & + + c * B_00(i) * res2(i) enddo endif end diff --git a/src/BiInts/gauss_legendre.irp.f b/src/BiInts/gauss_legendre.irp.f index e7545d58..e62febeb 100644 --- a/src/BiInts/gauss_legendre.irp.f +++ b/src/BiInts/gauss_legendre.irp.f @@ -1,5 +1,14 @@ - BEGIN_PROVIDER [ double precision, gauleg_t2, (n_pt_max_integrals,n_pt_max_integrals/2) ] -&BEGIN_PROVIDER [ double precision, gauleg_w, (n_pt_max_integrals,n_pt_max_integrals/2) ] +BEGIN_PROVIDER [ integer, n_pt_max_integrals_16 ] + implicit none + BEGIN_DOC + ! Aligned n_pt_max_integrals + END_DOC + integer, external :: align_double + n_pt_max_integrals_16 = align_double(n_pt_max_integrals) +END_PROVIDER + + BEGIN_PROVIDER [ double precision, gauleg_t2, (n_pt_max_integrals_16,n_pt_max_integrals/2) ] +&BEGIN_PROVIDER [ double precision, gauleg_w, (n_pt_max_integrals_16,n_pt_max_integrals/2) ] implicit none BEGIN_DOC ! t_w(i,1,k) = w(i) diff --git a/src/BiInts/mo_bi_integrals.irp.f b/src/BiInts/mo_bi_integrals.irp.f index 3f24b8dc..39a56977 100644 --- a/src/BiInts/mo_bi_integrals.irp.f +++ b/src/BiInts/mo_bi_integrals.irp.f @@ -24,6 +24,8 @@ BEGIN_PROVIDER [ logical, mo_bielec_integrals_in_map ] BEGIN_DOC ! If True, the map of MO bielectronic integrals is provided END_DOC + + PROVIDE all_mo_integrals mo_bielec_integrals_in_map = .True. if (read_mo_integrals) then diff --git a/src/Dets/H_apply_template.f b/src/Dets/H_apply_template.f index b3984448..f70f69bf 100644 --- a/src/Dets/H_apply_template.f +++ b/src/Dets/H_apply_template.f @@ -377,8 +377,10 @@ subroutine $subroutine($params_main) integer :: ispin, k integer :: iproc - PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map N_det_selectors psi_generators - PROVIDE psi_det_sorted_bit coef_hf_selector psi_det psi_coef ref_bitmask_energy + PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map + PROVIDE N_det_selectors psi_generators HF_energy + PROVIDE psi_det_sorted_bit coef_hf_selector psi_det psi_coef + PROVIDE mo_mono_elec_integral ref_bitmask_energy nmax = ( N_det_generators/nproc ) *nproc call wall_time(wall_0) diff --git a/src/Dets/utils.irp.f b/src/Dets/utils.irp.f index 0b89f339..27022965 100644 --- a/src/Dets/utils.irp.f +++ b/src/Dets/utils.irp.f @@ -5,6 +5,9 @@ BEGIN_PROVIDER [ double precision, H_matrix_all_dets,(N_det,N_det) ] END_DOC integer :: i,j double precision :: hij + call i_H_j(psi_det(1,1,1),psi_det(1,1,1),N_int,hij) + !$OMP PARALLEL DO SCHEDULE(GUIDED) PRIVATE(i,j,hij) & + !$OMP SHARED (N_det, psi_det, N_int,H_matrix_all_dets) do i =1,N_det do j =i,N_det call i_H_j(psi_det(1,1,i),psi_det(1,1,j),N_int,hij) @@ -12,5 +15,6 @@ BEGIN_PROVIDER [ double precision, H_matrix_all_dets,(N_det,N_det) ] H_matrix_all_dets(j,i) = hij enddo enddo + !$OMP END PARALLEL DO END_PROVIDER diff --git a/src/Hartree_Fock/Fock_matrix.irp.f b/src/Hartree_Fock/Fock_matrix.irp.f index cd6e90c3..c2c98630 100644 --- a/src/Hartree_Fock/Fock_matrix.irp.f +++ b/src/Hartree_Fock/Fock_matrix.irp.f @@ -277,7 +277,7 @@ BEGIN_PROVIDER [ double precision, HF_energy ] BEGIN_DOC ! Hartree-Fock energy END_DOC - HF_energy = nuclear_repulsion + ref_bitmask_energy + HF_energy = nuclear_repulsion + ref_bitmask_energy END_PROVIDER diff --git a/src/Hartree_Fock/ref_bitmask.irp.f b/src/Hartree_Fock/ref_bitmask.irp.f index 5f0e3995..76bfbd42 100644 --- a/src/Hartree_Fock/ref_bitmask.irp.f +++ b/src/Hartree_Fock/ref_bitmask.irp.f @@ -11,6 +11,8 @@ integer :: occ(N_int*bit_kind_size,2) integer :: i,j + PROVIDE mo_mono_elec_integral + call bitstring_to_list(ref_bitmask(1,1), occ(1,1), i, N_int) call bitstring_to_list(ref_bitmask(1,2), occ(1,2), i, N_int) @@ -57,7 +59,6 @@ END_PROVIDER integer :: i,j - ref_bitmask_energy = 0.d0 do j=1,ao_num diff --git a/src/Selectors_full/e_corr_selectors.irp.f b/src/Selectors_full/e_corr_selectors.irp.f index e210c482..076a3213 100644 --- a/src/Selectors_full/e_corr_selectors.irp.f +++ b/src/Selectors_full/e_corr_selectors.irp.f @@ -24,6 +24,7 @@ use bitmasks endif enddo END_PROVIDER + BEGIN_PROVIDER[double precision, coef_hf_selector] &BEGIN_PROVIDER[double precision, inv_selectors_coef_hf] &BEGIN_PROVIDER[double precision, inv_selectors_coef_hf_squared]